]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
openmp: Improve composite simd vectorization
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110 };
111
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119
120 /* Return true if REGION is a combined parallel+workshare region. */
121
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
124 {
125 return region->is_combined_parallel;
126 }
127
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202 }
203
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
212
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222 }
223
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290 }
291
292 /* Discover whether REGION is a combined parallel+workshare region. */
293
294 static void
295 determine_parallel_type (struct omp_region *region)
296 {
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
356 }
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368 }
369
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374
375 /* Dump the parallel region tree rooted at REGION. */
376
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400 }
401
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
404 {
405 dump_omp_region (stderr, region, 0);
406 }
407
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
410 {
411 dump_omp_region (stderr, root_omp_region, 0);
412 }
413
414 /* Create a new parallel region starting at STMT inside region PARENT. */
415
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419 {
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442 }
443
444 /* Release the memory associated with the region tree rooted at REGION. */
445
446 static void
447 free_omp_region_1 (struct omp_region *region)
448 {
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458 }
459
460 /* Release the memory for the entire omp region tree. */
461
462 void
463 omp_free_regions (void)
464 {
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472 }
473
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477 static gcond *
478 gimple_build_cond_empty (tree cond)
479 {
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
491
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
494
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
498 {
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
509 {
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
523 }
524
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
528
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
530 {
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
533 {
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
536 }
537 }
538 }
539
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
546
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
551 {
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
559
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
561
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
569 {
570 switch (region->inner->type)
571 {
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
575 {
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
595 {
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
598 }
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
603 }
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
612 }
613 }
614
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
620
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
624
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
627 {
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
630 }
631 else
632 clause_loc = gimple_location (entry_stmt);
633
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
637
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
640
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
644 {
645 cond = gimple_boolify (cond);
646
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
652 {
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
656
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
659 {
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
663 }
664 else
665 {
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
669 }
670
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
675
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
680
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
684
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
687
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
692
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
699
700 if (gimple_in_ssa_p (cfun))
701 {
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
705 }
706
707 val = tmp_join;
708 }
709
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
713 }
714
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
723
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
731
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
734
735 if (rtmp)
736 {
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
741 }
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
748
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
752 {
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
756
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
758
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
770
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
776 {
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
795 {
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
798 {
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
801 }
802 else
803 num_tasks = integer_zero_node;
804 }
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
813 }
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
816
817 tree flags = build_int_cst (unsigned_type_node, iflags);
818
819 tree cond = boolean_true_node;
820 if (ifc)
821 {
822 if (taskloop_p)
823 {
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
831 }
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
834 }
835
836 if (finalc)
837 {
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
844 }
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
854
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
867
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
882
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
885 }
886
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
890
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
893 {
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
898
899 depend = OMP_CLAUSE_DECL (depend);
900
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
905
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
908 }
909
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
913
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
916 {
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
922 {
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
925 }
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
930 {
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
933 }
934
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
943
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
952
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
956
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
959 }
960
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
962
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
965 {
966 tree chain = NULL_TREE, t;
967 unsigned ix;
968
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
970 {
971 DECL_CHAIN (t) = chain;
972 chain = t;
973 }
974
975 return chain;
976 }
977
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
983
984 static void
985 remove_exit_barrier (struct omp_region *region)
986 {
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
993
994 exit_bb = region->exit;
995
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1000
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1011
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1013 {
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1020 {
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1030 {
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1036
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1040 {
1041 any_addressable_vars = 1;
1042 break;
1043 }
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1049 {
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1054 {
1055 any_addressable_vars = 1;
1056 break;
1057 }
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1060 }
1061 }
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1064 }
1065 }
1066 }
1067
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1070 {
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1073
1074 if (region->inner)
1075 {
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1079 {
1080 region = region->next;
1081 remove_exit_barriers (region);
1082 }
1083 }
1084 }
1085
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1093
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1096 {
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1106
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1109 {
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1112
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1118 {
1119 tree built_in;
1120
1121 if (DECL_NAME (decl) == thr_num_id)
1122 {
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1128 }
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1133
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1137
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1140
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1145
1146 gimple_call_set_fndecl (call, built_in);
1147 }
1148 }
1149 }
1150
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1153
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1156 {
1157 tree t = *tp;
1158
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1162
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1165
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1168 }
1169
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1171
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1175 {
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1187 {
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1190 }
1191 }
1192
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1194
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1197 {
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1205
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1209 {
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1216 }
1217
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1220
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1226
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1231
1232 if (child_cfun->cfg)
1233 {
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1238
1239 entry_succ_e = single_succ_edge (entry_bb);
1240
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1246
1247 new_bb = entry_bb;
1248 if (exit_bb)
1249 {
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1252 }
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1254 }
1255 else
1256 {
1257 unsigned srcidx, dstidx, num;
1258
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1266
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1273 {
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1279
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1281 {
1282 gimple *stmt;
1283
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1288
1289 if (gimple_num_ops (stmt) == 2)
1290 {
1291 tree arg = gimple_assign_rhs1 (stmt);
1292
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1295
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1299 {
1300 parcopy_stmt = stmt;
1301 break;
1302 }
1303 }
1304 }
1305
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1308
1309 if (!gimple_in_ssa_p (cfun))
1310 {
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1314 {
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1318 }
1319 }
1320 else
1321 {
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1332 }
1333 }
1334
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1348
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1352
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1367 {
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1376 }
1377
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1380 {
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1388 }
1389
1390 /* Move the parallel region into CHILD_CFUN. */
1391
1392 if (gimple_in_ssa_p (cfun))
1393 {
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1398 }
1399 else
1400 block = gimple_block (entry_stmt);
1401
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1406 {
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1412 }
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1417
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1421 {
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1428 }
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1431
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1439
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1442
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1448
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1453
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1459 {
1460 basic_block bb;
1461 bool changed = false;
1462
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1467 }
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1473
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1475 {
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1479 }
1480 }
1481
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1483
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1493 }
1494
1495 /* Information about members of an OpenACC collapsed loop nest. */
1496
1497 struct oacc_collapse
1498 {
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1504 };
1505
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1509
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree bound_type,
1514 location_t loc)
1515 {
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1519
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1522
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1527 {
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1529
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree diff_type = iter_type;
1532 tree plus_type = iter_type;
1533
1534 gcc_assert (loop->cond_code == fd->loop.cond_code);
1535
1536 if (POINTER_TYPE_P (iter_type))
1537 plus_type = sizetype;
1538 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1539 diff_type = signed_type_for (diff_type);
1540 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1541 diff_type = integer_type_node;
1542
1543 if (tiling)
1544 {
1545 tree num = build_int_cst (integer_type_node, fd->collapse);
1546 tree loop_no = build_int_cst (integer_type_node, ix);
1547 tree tile = TREE_VALUE (tiling);
1548 gcall *call
1549 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1550 /* gwv-outer=*/integer_zero_node,
1551 /* gwv-inner=*/integer_zero_node);
1552
1553 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1554 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1555 gimple_call_set_lhs (call, counts[ix].tile);
1556 gimple_set_location (call, loc);
1557 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1558
1559 tiling = TREE_CHAIN (tiling);
1560 }
1561 else
1562 {
1563 counts[ix].tile = NULL;
1564 counts[ix].outer = loop->v;
1565 }
1566
1567 tree b = loop->n1;
1568 tree e = loop->n2;
1569 tree s = loop->step;
1570 bool up = loop->cond_code == LT_EXPR;
1571 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1572 bool negating;
1573 tree expr;
1574
1575 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1576 true, GSI_SAME_STMT);
1577 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1578 true, GSI_SAME_STMT);
1579
1580 /* Convert the step, avoiding possible unsigned->signed overflow. */
1581 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1582 if (negating)
1583 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1584 s = fold_convert (diff_type, s);
1585 if (negating)
1586 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1587 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1588 true, GSI_SAME_STMT);
1589
1590 /* Determine the range, avoiding possible unsigned->signed overflow. */
1591 negating = !up && TYPE_UNSIGNED (iter_type);
1592 expr = fold_build2 (MINUS_EXPR, plus_type,
1593 fold_convert (plus_type, negating ? b : e),
1594 fold_convert (plus_type, negating ? e : b));
1595 expr = fold_convert (diff_type, expr);
1596 if (negating)
1597 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1598 tree range = force_gimple_operand_gsi
1599 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1600
1601 /* Determine number of iterations. */
1602 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1603 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1604 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1605
1606 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1607 true, GSI_SAME_STMT);
1608
1609 counts[ix].base = b;
1610 counts[ix].iters = iters;
1611 counts[ix].step = s;
1612
1613 total = fold_build2 (MULT_EXPR, bound_type, total,
1614 fold_convert (bound_type, iters));
1615 }
1616
1617 return total;
1618 }
1619
1620 /* Emit initializers for collapsed loop members. INNER is true if
1621 this is for the element loop of a TILE. IVAR is the outer
1622 loop iteration variable, from which collapsed loop iteration values
1623 are calculated. COUNTS array has been initialized by
1624 expand_oacc_collapse_inits. */
1625
1626 static void
1627 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1628 gimple_stmt_iterator *gsi,
1629 const oacc_collapse *counts, tree ivar)
1630 {
1631 tree ivar_type = TREE_TYPE (ivar);
1632
1633 /* The most rapidly changing iteration variable is the innermost
1634 one. */
1635 for (int ix = fd->collapse; ix--;)
1636 {
1637 const omp_for_data_loop *loop = &fd->loops[ix];
1638 const oacc_collapse *collapse = &counts[ix];
1639 tree v = inner ? loop->v : collapse->outer;
1640 tree iter_type = TREE_TYPE (v);
1641 tree diff_type = TREE_TYPE (collapse->step);
1642 tree plus_type = iter_type;
1643 enum tree_code plus_code = PLUS_EXPR;
1644 tree expr;
1645
1646 if (POINTER_TYPE_P (iter_type))
1647 {
1648 plus_code = POINTER_PLUS_EXPR;
1649 plus_type = sizetype;
1650 }
1651
1652 expr = ivar;
1653 if (ix)
1654 {
1655 tree mod = fold_convert (ivar_type, collapse->iters);
1656 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1657 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1658 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1659 true, GSI_SAME_STMT);
1660 }
1661
1662 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1663 collapse->step);
1664 expr = fold_build2 (plus_code, iter_type,
1665 inner ? collapse->outer : collapse->base,
1666 fold_convert (plus_type, expr));
1667 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1668 true, GSI_SAME_STMT);
1669 gassign *ass = gimple_build_assign (v, expr);
1670 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1671 }
1672 }
1673
1674 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1675 of the combined collapse > 1 loop constructs, generate code like:
1676 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1677 if (cond3 is <)
1678 adj = STEP3 - 1;
1679 else
1680 adj = STEP3 + 1;
1681 count3 = (adj + N32 - N31) / STEP3;
1682 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1683 if (cond2 is <)
1684 adj = STEP2 - 1;
1685 else
1686 adj = STEP2 + 1;
1687 count2 = (adj + N22 - N21) / STEP2;
1688 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1689 if (cond1 is <)
1690 adj = STEP1 - 1;
1691 else
1692 adj = STEP1 + 1;
1693 count1 = (adj + N12 - N11) / STEP1;
1694 count = count1 * count2 * count3;
1695 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1696 count = 0;
1697 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1698 of the combined loop constructs, just initialize COUNTS array
1699 from the _looptemp_ clauses. For loop nests with non-rectangular
1700 loops, do this only for the rectangular loops. Then pick
1701 the loops which reference outer vars in their bound expressions
1702 and the loops which they refer to and for this sub-nest compute
1703 number of iterations. For triangular loops use Faulhaber's formula,
1704 otherwise as a fallback, compute by iterating the loops.
1705 If e.g. the sub-nest is
1706 for (I = N11; I COND1 N12; I += STEP1)
1707 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1708 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1709 do:
1710 COUNT = 0;
1711 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1712 for (tmpj = M21 * tmpi + N21;
1713 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1714 {
1715 int tmpk1 = M31 * tmpj + N31;
1716 int tmpk2 = M32 * tmpj + N32;
1717 if (tmpk1 COND3 tmpk2)
1718 {
1719 if (COND3 is <)
1720 adj = STEP3 - 1;
1721 else
1722 adj = STEP3 + 1;
1723 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1724 }
1725 }
1726 and finally multiply the counts of the rectangular loops not
1727 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1728 store number of iterations of the loops from fd->first_nonrect
1729 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1730 by the counts of rectangular loops not referenced in any non-rectangular
1731 loops sandwitched in between those. */
1732
1733 /* NOTE: It *could* be better to moosh all of the BBs together,
1734 creating one larger BB with all the computation and the unexpected
1735 jump at the end. I.e.
1736
1737 bool zero3, zero2, zero1, zero;
1738
1739 zero3 = N32 c3 N31;
1740 count3 = (N32 - N31) /[cl] STEP3;
1741 zero2 = N22 c2 N21;
1742 count2 = (N22 - N21) /[cl] STEP2;
1743 zero1 = N12 c1 N11;
1744 count1 = (N12 - N11) /[cl] STEP1;
1745 zero = zero3 || zero2 || zero1;
1746 count = count1 * count2 * count3;
1747 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1748
1749 After all, we expect the zero=false, and thus we expect to have to
1750 evaluate all of the comparison expressions, so short-circuiting
1751 oughtn't be a win. Since the condition isn't protecting a
1752 denominator, we're not concerned about divide-by-zero, so we can
1753 fully evaluate count even if a numerator turned out to be wrong.
1754
1755 It seems like putting this all together would create much better
1756 scheduling opportunities, and less pressure on the chip's branch
1757 predictor. */
1758
1759 static void
1760 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1761 basic_block &entry_bb, tree *counts,
1762 basic_block &zero_iter1_bb, int &first_zero_iter1,
1763 basic_block &zero_iter2_bb, int &first_zero_iter2,
1764 basic_block &l2_dom_bb)
1765 {
1766 tree t, type = TREE_TYPE (fd->loop.v);
1767 edge e, ne;
1768 int i;
1769
1770 /* Collapsed loops need work for expansion into SSA form. */
1771 gcc_assert (!gimple_in_ssa_p (cfun));
1772
1773 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1774 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1775 {
1776 gcc_assert (fd->ordered == 0);
1777 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1778 isn't supposed to be handled, as the inner loop doesn't
1779 use it. */
1780 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 for (i = 0; i < fd->collapse; i++)
1784 {
1785 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1786 OMP_CLAUSE__LOOPTEMP_);
1787 gcc_assert (innerc);
1788 if (i)
1789 counts[i] = OMP_CLAUSE_DECL (innerc);
1790 else
1791 counts[0] = NULL_TREE;
1792 }
1793 return;
1794 }
1795
1796 for (i = fd->collapse; i < fd->ordered; i++)
1797 {
1798 tree itype = TREE_TYPE (fd->loops[i].v);
1799 counts[i] = NULL_TREE;
1800 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1801 fold_convert (itype, fd->loops[i].n1),
1802 fold_convert (itype, fd->loops[i].n2));
1803 if (t && integer_zerop (t))
1804 {
1805 for (i = fd->collapse; i < fd->ordered; i++)
1806 counts[i] = build_int_cst (type, 0);
1807 break;
1808 }
1809 }
1810 bool rect_count_seen = false;
1811 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1812 {
1813 tree itype = TREE_TYPE (fd->loops[i].v);
1814
1815 if (i >= fd->collapse && counts[i])
1816 continue;
1817 if (fd->non_rect)
1818 {
1819 /* Skip loops that use outer iterators in their expressions
1820 during this phase. */
1821 if (fd->loops[i].m1 || fd->loops[i].m2)
1822 {
1823 counts[i] = build_zero_cst (type);
1824 continue;
1825 }
1826 }
1827 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1828 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1829 fold_convert (itype, fd->loops[i].n1),
1830 fold_convert (itype, fd->loops[i].n2)))
1831 == NULL_TREE || !integer_onep (t)))
1832 {
1833 gcond *cond_stmt;
1834 tree n1, n2;
1835 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1836 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1837 true, GSI_SAME_STMT);
1838 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1839 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1840 true, GSI_SAME_STMT);
1841 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1842 NULL_TREE, NULL_TREE);
1843 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1844 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1845 expand_omp_regimplify_p, NULL, NULL)
1846 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1847 expand_omp_regimplify_p, NULL, NULL))
1848 {
1849 *gsi = gsi_for_stmt (cond_stmt);
1850 gimple_regimplify_operands (cond_stmt, gsi);
1851 }
1852 e = split_block (entry_bb, cond_stmt);
1853 basic_block &zero_iter_bb
1854 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1855 int &first_zero_iter
1856 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1857 if (zero_iter_bb == NULL)
1858 {
1859 gassign *assign_stmt;
1860 first_zero_iter = i;
1861 zero_iter_bb = create_empty_bb (entry_bb);
1862 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1863 *gsi = gsi_after_labels (zero_iter_bb);
1864 if (i < fd->collapse)
1865 assign_stmt = gimple_build_assign (fd->loop.n2,
1866 build_zero_cst (type));
1867 else
1868 {
1869 counts[i] = create_tmp_reg (type, ".count");
1870 assign_stmt
1871 = gimple_build_assign (counts[i], build_zero_cst (type));
1872 }
1873 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1874 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1875 entry_bb);
1876 }
1877 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1878 ne->probability = profile_probability::very_unlikely ();
1879 e->flags = EDGE_TRUE_VALUE;
1880 e->probability = ne->probability.invert ();
1881 if (l2_dom_bb == NULL)
1882 l2_dom_bb = entry_bb;
1883 entry_bb = e->dest;
1884 *gsi = gsi_last_nondebug_bb (entry_bb);
1885 }
1886
1887 if (POINTER_TYPE_P (itype))
1888 itype = signed_type_for (itype);
1889 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1890 ? -1 : 1));
1891 t = fold_build2 (PLUS_EXPR, itype,
1892 fold_convert (itype, fd->loops[i].step), t);
1893 t = fold_build2 (PLUS_EXPR, itype, t,
1894 fold_convert (itype, fd->loops[i].n2));
1895 t = fold_build2 (MINUS_EXPR, itype, t,
1896 fold_convert (itype, fd->loops[i].n1));
1897 /* ?? We could probably use CEIL_DIV_EXPR instead of
1898 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1899 generate the same code in the end because generically we
1900 don't know that the values involved must be negative for
1901 GT?? */
1902 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1903 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1904 fold_build1 (NEGATE_EXPR, itype, t),
1905 fold_build1 (NEGATE_EXPR, itype,
1906 fold_convert (itype,
1907 fd->loops[i].step)));
1908 else
1909 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1910 fold_convert (itype, fd->loops[i].step));
1911 t = fold_convert (type, t);
1912 if (TREE_CODE (t) == INTEGER_CST)
1913 counts[i] = t;
1914 else
1915 {
1916 if (i < fd->collapse || i != first_zero_iter2)
1917 counts[i] = create_tmp_reg (type, ".count");
1918 expand_omp_build_assign (gsi, counts[i], t);
1919 }
1920 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1921 {
1922 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1923 continue;
1924 if (!rect_count_seen)
1925 {
1926 t = counts[i];
1927 rect_count_seen = true;
1928 }
1929 else
1930 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1931 expand_omp_build_assign (gsi, fd->loop.n2, t);
1932 }
1933 }
1934 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1935 {
1936 gcc_assert (fd->last_nonrect != -1);
1937
1938 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1939 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1940 build_zero_cst (type));
1941 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1942 if (fd->loops[i].m1
1943 || fd->loops[i].m2
1944 || fd->loops[i].non_rect_referenced)
1945 break;
1946 if (i == fd->last_nonrect
1947 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1948 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1949 {
1950 int o = fd->first_nonrect;
1951 tree itype = TREE_TYPE (fd->loops[o].v);
1952 tree n1o = create_tmp_reg (itype, ".n1o");
1953 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1954 expand_omp_build_assign (gsi, n1o, t);
1955 tree n2o = create_tmp_reg (itype, ".n2o");
1956 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1957 expand_omp_build_assign (gsi, n2o, t);
1958 if (fd->loops[i].m1 && fd->loops[i].m2)
1959 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1960 unshare_expr (fd->loops[i].m1));
1961 else if (fd->loops[i].m1)
1962 t = fold_unary (NEGATE_EXPR, itype,
1963 unshare_expr (fd->loops[i].m1));
1964 else
1965 t = unshare_expr (fd->loops[i].m2);
1966 tree m2minusm1
1967 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1968 true, GSI_SAME_STMT);
1969
1970 gimple_stmt_iterator gsi2 = *gsi;
1971 gsi_prev (&gsi2);
1972 e = split_block (entry_bb, gsi_stmt (gsi2));
1973 e = split_block (e->dest, (gimple *) NULL);
1974 basic_block bb1 = e->src;
1975 entry_bb = e->dest;
1976 *gsi = gsi_after_labels (entry_bb);
1977
1978 gsi2 = gsi_after_labels (bb1);
1979 tree ostep = fold_convert (itype, fd->loops[o].step);
1980 t = build_int_cst (itype, (fd->loops[o].cond_code
1981 == LT_EXPR ? -1 : 1));
1982 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
1983 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
1984 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
1985 if (TYPE_UNSIGNED (itype)
1986 && fd->loops[o].cond_code == GT_EXPR)
1987 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1988 fold_build1 (NEGATE_EXPR, itype, t),
1989 fold_build1 (NEGATE_EXPR, itype, ostep));
1990 else
1991 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
1992 tree outer_niters
1993 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
1994 true, GSI_SAME_STMT);
1995 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
1996 build_one_cst (itype));
1997 t = fold_build2 (MULT_EXPR, itype, t, ostep);
1998 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
1999 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2000 true, GSI_SAME_STMT);
2001 tree n1, n2, n1e, n2e;
2002 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2003 if (fd->loops[i].m1)
2004 {
2005 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2006 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2007 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2008 }
2009 else
2010 n1 = t;
2011 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2013 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2014 if (fd->loops[i].m2)
2015 {
2016 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2017 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2018 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2019 }
2020 else
2021 n2 = t;
2022 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2023 true, GSI_SAME_STMT);
2024 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2025 if (fd->loops[i].m1)
2026 {
2027 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2028 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2029 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2030 }
2031 else
2032 n1e = t;
2033 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2034 true, GSI_SAME_STMT);
2035 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2036 if (fd->loops[i].m2)
2037 {
2038 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2039 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2040 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2041 }
2042 else
2043 n2e = t;
2044 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2045 true, GSI_SAME_STMT);
2046 gcond *cond_stmt
2047 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2048 NULL_TREE, NULL_TREE);
2049 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2050 e = split_block (bb1, cond_stmt);
2051 e->flags = EDGE_TRUE_VALUE;
2052 e->probability = profile_probability::likely ().guessed ();
2053 basic_block bb2 = e->dest;
2054 gsi2 = gsi_after_labels (bb2);
2055
2056 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2057 NULL_TREE, NULL_TREE);
2058 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2059 e = split_block (bb2, cond_stmt);
2060 e->flags = EDGE_TRUE_VALUE;
2061 e->probability = profile_probability::likely ().guessed ();
2062 gsi2 = gsi_after_labels (e->dest);
2063
2064 tree step = fold_convert (itype, fd->loops[i].step);
2065 t = build_int_cst (itype, (fd->loops[i].cond_code
2066 == LT_EXPR ? -1 : 1));
2067 t = fold_build2 (PLUS_EXPR, itype, step, t);
2068 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2069 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2070 if (TYPE_UNSIGNED (itype)
2071 && fd->loops[i].cond_code == GT_EXPR)
2072 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2073 fold_build1 (NEGATE_EXPR, itype, t),
2074 fold_build1 (NEGATE_EXPR, itype, step));
2075 else
2076 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2077 tree first_inner_iterations
2078 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2079 true, GSI_SAME_STMT);
2080 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2081 if (TYPE_UNSIGNED (itype)
2082 && fd->loops[i].cond_code == GT_EXPR)
2083 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2084 fold_build1 (NEGATE_EXPR, itype, t),
2085 fold_build1 (NEGATE_EXPR, itype, step));
2086 else
2087 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2088 tree factor
2089 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2090 true, GSI_SAME_STMT);
2091 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2092 build_one_cst (itype));
2093 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2094 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2095 t = fold_build2 (MULT_EXPR, itype, factor, t);
2096 t = fold_build2 (PLUS_EXPR, itype,
2097 fold_build2 (MULT_EXPR, itype, outer_niters,
2098 first_inner_iterations), t);
2099 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2100 fold_convert (type, t));
2101
2102 basic_block bb3 = create_empty_bb (bb1);
2103 add_bb_to_loop (bb3, bb1->loop_father);
2104
2105 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2106 e->probability = profile_probability::unlikely ().guessed ();
2107
2108 gsi2 = gsi_after_labels (bb3);
2109 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2110 NULL_TREE, NULL_TREE);
2111 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2112 e = split_block (bb3, cond_stmt);
2113 e->flags = EDGE_TRUE_VALUE;
2114 e->probability = profile_probability::likely ().guessed ();
2115 basic_block bb4 = e->dest;
2116
2117 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2118 ne->probability = e->probability.invert ();
2119
2120 basic_block bb5 = create_empty_bb (bb2);
2121 add_bb_to_loop (bb5, bb2->loop_father);
2122
2123 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2124 ne->probability = profile_probability::unlikely ().guessed ();
2125
2126 for (int j = 0; j < 2; j++)
2127 {
2128 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2129 t = fold_build2 (MINUS_EXPR, itype,
2130 unshare_expr (fd->loops[i].n1),
2131 unshare_expr (fd->loops[i].n2));
2132 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2133 tree tem
2134 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2135 true, GSI_SAME_STMT);
2136 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2137 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2138 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2139 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2140 true, GSI_SAME_STMT);
2141 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2142 if (fd->loops[i].m1)
2143 {
2144 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2145 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2146 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2147 }
2148 else
2149 n1 = t;
2150 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2151 true, GSI_SAME_STMT);
2152 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2153 if (fd->loops[i].m2)
2154 {
2155 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2156 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2157 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2158 }
2159 else
2160 n2 = t;
2161 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2162 true, GSI_SAME_STMT);
2163 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2164
2165 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2166 NULL_TREE, NULL_TREE);
2167 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2168 e = split_block (gsi_bb (gsi2), cond_stmt);
2169 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2170 e->probability = profile_probability::unlikely ().guessed ();
2171 ne = make_edge (e->src, bb1,
2172 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2173 ne->probability = e->probability.invert ();
2174 gsi2 = gsi_after_labels (e->dest);
2175
2176 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2177 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2178
2179 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2180 }
2181
2182 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2183 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2184 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2185
2186 if (fd->first_nonrect + 1 == fd->last_nonrect)
2187 {
2188 fd->first_inner_iterations = first_inner_iterations;
2189 fd->factor = factor;
2190 fd->adjn1 = n1o;
2191 }
2192 }
2193 else
2194 {
2195 /* Fallback implementation. Evaluate the loops with m1/m2
2196 non-NULL as well as their outer loops at runtime using temporaries
2197 instead of the original iteration variables, and in the
2198 body just bump the counter. */
2199 gimple_stmt_iterator gsi2 = *gsi;
2200 gsi_prev (&gsi2);
2201 e = split_block (entry_bb, gsi_stmt (gsi2));
2202 e = split_block (e->dest, (gimple *) NULL);
2203 basic_block cur_bb = e->src;
2204 basic_block next_bb = e->dest;
2205 entry_bb = e->dest;
2206 *gsi = gsi_after_labels (entry_bb);
2207
2208 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2209 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2210
2211 for (i = 0; i <= fd->last_nonrect; i++)
2212 {
2213 if (fd->loops[i].m1 == NULL_TREE
2214 && fd->loops[i].m2 == NULL_TREE
2215 && !fd->loops[i].non_rect_referenced)
2216 continue;
2217
2218 tree itype = TREE_TYPE (fd->loops[i].v);
2219
2220 gsi2 = gsi_after_labels (cur_bb);
2221 tree n1, n2;
2222 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2223 if (fd->loops[i].m1)
2224 {
2225 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2226 n1 = fold_build2 (MULT_EXPR, itype,
2227 vs[i - fd->loops[i].outer], n1);
2228 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2229 }
2230 else
2231 n1 = t;
2232 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2233 true, GSI_SAME_STMT);
2234 if (i < fd->last_nonrect)
2235 {
2236 vs[i] = create_tmp_reg (itype, ".it");
2237 expand_omp_build_assign (&gsi2, vs[i], n1);
2238 }
2239 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2240 if (fd->loops[i].m2)
2241 {
2242 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2243 n2 = fold_build2 (MULT_EXPR, itype,
2244 vs[i - fd->loops[i].outer], n2);
2245 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2246 }
2247 else
2248 n2 = t;
2249 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2250 true, GSI_SAME_STMT);
2251 if (i == fd->last_nonrect)
2252 {
2253 gcond *cond_stmt
2254 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2255 NULL_TREE, NULL_TREE);
2256 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2257 e = split_block (cur_bb, cond_stmt);
2258 e->flags = EDGE_TRUE_VALUE;
2259 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2260 e->probability = profile_probability::likely ().guessed ();
2261 ne->probability = e->probability.invert ();
2262 gsi2 = gsi_after_labels (e->dest);
2263
2264 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2265 ? -1 : 1));
2266 t = fold_build2 (PLUS_EXPR, itype,
2267 fold_convert (itype, fd->loops[i].step), t);
2268 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2269 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2270 tree step = fold_convert (itype, fd->loops[i].step);
2271 if (TYPE_UNSIGNED (itype)
2272 && fd->loops[i].cond_code == GT_EXPR)
2273 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2274 fold_build1 (NEGATE_EXPR, itype, t),
2275 fold_build1 (NEGATE_EXPR, itype, step));
2276 else
2277 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2278 t = fold_convert (type, t);
2279 t = fold_build2 (PLUS_EXPR, type,
2280 counts[fd->last_nonrect], t);
2281 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2282 true, GSI_SAME_STMT);
2283 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2284 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2285 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2286 break;
2287 }
2288 e = split_block (cur_bb, last_stmt (cur_bb));
2289
2290 basic_block new_cur_bb = create_empty_bb (cur_bb);
2291 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2292
2293 gsi2 = gsi_after_labels (e->dest);
2294 tree step = fold_convert (itype,
2295 unshare_expr (fd->loops[i].step));
2296 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2297 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2298 true, GSI_SAME_STMT);
2299 expand_omp_build_assign (&gsi2, vs[i], t);
2300
2301 ne = split_block (e->dest, last_stmt (e->dest));
2302 gsi2 = gsi_after_labels (ne->dest);
2303
2304 gcond *cond_stmt
2305 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2306 NULL_TREE, NULL_TREE);
2307 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2308 edge e3, e4;
2309 if (next_bb == entry_bb)
2310 {
2311 e3 = find_edge (ne->dest, next_bb);
2312 e3->flags = EDGE_FALSE_VALUE;
2313 }
2314 else
2315 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2316 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2317 e4->probability = profile_probability::likely ().guessed ();
2318 e3->probability = e4->probability.invert ();
2319 basic_block esrc = e->src;
2320 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2321 cur_bb = new_cur_bb;
2322 basic_block latch_bb = next_bb;
2323 next_bb = e->dest;
2324 remove_edge (e);
2325 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2326 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2327 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2328 }
2329 }
2330 t = NULL_TREE;
2331 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2332 if (!fd->loops[i].non_rect_referenced
2333 && fd->loops[i].m1 == NULL_TREE
2334 && fd->loops[i].m2 == NULL_TREE)
2335 {
2336 if (t == NULL_TREE)
2337 t = counts[i];
2338 else
2339 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2340 }
2341 if (t)
2342 {
2343 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2344 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2345 }
2346 if (!rect_count_seen)
2347 t = counts[fd->last_nonrect];
2348 else
2349 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2350 counts[fd->last_nonrect]);
2351 expand_omp_build_assign (gsi, fd->loop.n2, t);
2352 }
2353 else if (fd->non_rect)
2354 {
2355 tree t = fd->loop.n2;
2356 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2357 int non_rect_referenced = 0, non_rect = 0;
2358 for (i = 0; i < fd->collapse; i++)
2359 {
2360 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2361 && !integer_zerop (counts[i]))
2362 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2363 if (fd->loops[i].non_rect_referenced)
2364 non_rect_referenced++;
2365 if (fd->loops[i].m1 || fd->loops[i].m2)
2366 non_rect++;
2367 }
2368 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2369 counts[fd->last_nonrect] = t;
2370 }
2371 }
2372
2373 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2374 T = V;
2375 V3 = N31 + (T % count3) * STEP3;
2376 T = T / count3;
2377 V2 = N21 + (T % count2) * STEP2;
2378 T = T / count2;
2379 V1 = N11 + T * STEP1;
2380 if this loop doesn't have an inner loop construct combined with it.
2381 If it does have an inner loop construct combined with it and the
2382 iteration count isn't known constant, store values from counts array
2383 into its _looptemp_ temporaries instead.
2384 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2385 inclusive), use the count of all those loops together, and either
2386 find quadratic etc. equation roots, or as a fallback, do:
2387 COUNT = 0;
2388 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2389 for (tmpj = M21 * tmpi + N21;
2390 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2391 {
2392 int tmpk1 = M31 * tmpj + N31;
2393 int tmpk2 = M32 * tmpj + N32;
2394 if (tmpk1 COND3 tmpk2)
2395 {
2396 if (COND3 is <)
2397 adj = STEP3 - 1;
2398 else
2399 adj = STEP3 + 1;
2400 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2401 if (COUNT + temp > T)
2402 {
2403 V1 = tmpi;
2404 V2 = tmpj;
2405 V3 = tmpk1 + (T - COUNT) * STEP3;
2406 goto done;
2407 }
2408 else
2409 COUNT += temp;
2410 }
2411 }
2412 done:;
2413 but for optional innermost or outermost rectangular loops that aren't
2414 referenced by other loop expressions keep doing the division/modulo. */
2415
2416 static void
2417 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2418 tree *counts, tree *nonrect_bounds,
2419 gimple *inner_stmt, tree startvar)
2420 {
2421 int i;
2422 if (gimple_omp_for_combined_p (fd->for_stmt))
2423 {
2424 /* If fd->loop.n2 is constant, then no propagation of the counts
2425 is needed, they are constant. */
2426 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2427 return;
2428
2429 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2430 ? gimple_omp_taskreg_clauses (inner_stmt)
2431 : gimple_omp_for_clauses (inner_stmt);
2432 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2433 isn't supposed to be handled, as the inner loop doesn't
2434 use it. */
2435 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2436 gcc_assert (innerc);
2437 for (i = 0; i < fd->collapse; i++)
2438 {
2439 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2440 OMP_CLAUSE__LOOPTEMP_);
2441 gcc_assert (innerc);
2442 if (i)
2443 {
2444 tree tem = OMP_CLAUSE_DECL (innerc);
2445 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
2446 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2447 false, GSI_CONTINUE_LINKING);
2448 gassign *stmt = gimple_build_assign (tem, t);
2449 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2450 }
2451 }
2452 return;
2453 }
2454
2455 tree type = TREE_TYPE (fd->loop.v);
2456 tree tem = create_tmp_reg (type, ".tem");
2457 gassign *stmt = gimple_build_assign (tem, startvar);
2458 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2459
2460 for (i = fd->collapse - 1; i >= 0; i--)
2461 {
2462 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2463 itype = vtype;
2464 if (POINTER_TYPE_P (vtype))
2465 itype = signed_type_for (vtype);
2466 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2467 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2468 else
2469 t = tem;
2470 if (i == fd->last_nonrect)
2471 {
2472 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2473 false, GSI_CONTINUE_LINKING);
2474 tree stopval = t;
2475 tree idx = create_tmp_reg (type, ".count");
2476 expand_omp_build_assign (gsi, idx,
2477 build_zero_cst (type), true);
2478 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2479 if (fd->first_nonrect + 1 == fd->last_nonrect
2480 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2481 || (fd->first_inner_iterations
2482 /* For now. Later add clauses to propagate the
2483 values. */
2484 && !gimple_omp_for_combined_into_p (fd->for_stmt)))
2485 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2486 != CODE_FOR_nothing))
2487 {
2488 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2489 tree itype = TREE_TYPE (fd->loops[i].v);
2490 tree first_inner_iterations = fd->first_inner_iterations;
2491 tree factor = fd->factor;
2492 gcond *cond_stmt
2493 = gimple_build_cond (NE_EXPR, factor,
2494 build_zero_cst (TREE_TYPE (factor)),
2495 NULL_TREE, NULL_TREE);
2496 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2497 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2498 basic_block bb0 = e->src;
2499 e->flags = EDGE_TRUE_VALUE;
2500 e->probability = profile_probability::likely ();
2501 bb_triang_dom = bb0;
2502 *gsi = gsi_after_labels (e->dest);
2503 tree slltype = long_long_integer_type_node;
2504 tree ulltype = long_long_unsigned_type_node;
2505 tree stopvalull = fold_convert (ulltype, stopval);
2506 stopvalull
2507 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2508 false, GSI_CONTINUE_LINKING);
2509 first_inner_iterations
2510 = fold_convert (slltype, first_inner_iterations);
2511 first_inner_iterations
2512 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2513 NULL_TREE, false,
2514 GSI_CONTINUE_LINKING);
2515 factor = fold_convert (slltype, factor);
2516 factor
2517 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2518 false, GSI_CONTINUE_LINKING);
2519 tree first_inner_iterationsd
2520 = fold_build1 (FLOAT_EXPR, double_type_node,
2521 first_inner_iterations);
2522 first_inner_iterationsd
2523 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2524 NULL_TREE, false,
2525 GSI_CONTINUE_LINKING);
2526 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2527 factor);
2528 factord = force_gimple_operand_gsi (gsi, factord, true,
2529 NULL_TREE, false,
2530 GSI_CONTINUE_LINKING);
2531 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2532 stopvalull);
2533 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2534 NULL_TREE, false,
2535 GSI_CONTINUE_LINKING);
2536 /* Temporarily disable flag_rounding_math, values will be
2537 decimal numbers divided by 2 and worst case imprecisions
2538 due to too large values ought to be caught later by the
2539 checks for fallback. */
2540 int save_flag_rounding_math = flag_rounding_math;
2541 flag_rounding_math = 0;
2542 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2543 build_real (double_type_node, dconst2));
2544 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2545 first_inner_iterationsd, t);
2546 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2547 GSI_CONTINUE_LINKING);
2548 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2549 build_real (double_type_node, dconst2));
2550 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2551 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2552 fold_build2 (MULT_EXPR, double_type_node,
2553 t3, t3));
2554 flag_rounding_math = save_flag_rounding_math;
2555 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2556 GSI_CONTINUE_LINKING);
2557 if (flag_exceptions
2558 && cfun->can_throw_non_call_exceptions
2559 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2560 {
2561 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2562 build_zero_cst (double_type_node));
2563 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2564 false, GSI_CONTINUE_LINKING);
2565 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2566 boolean_false_node,
2567 NULL_TREE, NULL_TREE);
2568 }
2569 else
2570 cond_stmt
2571 = gimple_build_cond (LT_EXPR, t,
2572 build_zero_cst (double_type_node),
2573 NULL_TREE, NULL_TREE);
2574 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2575 e = split_block (gsi_bb (*gsi), cond_stmt);
2576 basic_block bb1 = e->src;
2577 e->flags = EDGE_FALSE_VALUE;
2578 e->probability = profile_probability::very_likely ();
2579 *gsi = gsi_after_labels (e->dest);
2580 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2581 tree sqrtr = create_tmp_var (double_type_node);
2582 gimple_call_set_lhs (call, sqrtr);
2583 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2584 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2585 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2586 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2587 tree c = create_tmp_var (ulltype);
2588 tree d = create_tmp_var (ulltype);
2589 expand_omp_build_assign (gsi, c, t, true);
2590 t = fold_build2 (MINUS_EXPR, ulltype, c,
2591 build_one_cst (ulltype));
2592 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2593 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2594 t = fold_build2 (MULT_EXPR, ulltype,
2595 fold_convert (ulltype, fd->factor), t);
2596 tree t2
2597 = fold_build2 (MULT_EXPR, ulltype, c,
2598 fold_convert (ulltype,
2599 fd->first_inner_iterations));
2600 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2601 expand_omp_build_assign (gsi, d, t, true);
2602 t = fold_build2 (MULT_EXPR, ulltype,
2603 fold_convert (ulltype, fd->factor), c);
2604 t = fold_build2 (PLUS_EXPR, ulltype,
2605 t, fold_convert (ulltype,
2606 fd->first_inner_iterations));
2607 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2608 GSI_CONTINUE_LINKING);
2609 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2610 NULL_TREE, NULL_TREE);
2611 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2612 e = split_block (gsi_bb (*gsi), cond_stmt);
2613 basic_block bb2 = e->src;
2614 e->flags = EDGE_TRUE_VALUE;
2615 e->probability = profile_probability::very_likely ();
2616 *gsi = gsi_after_labels (e->dest);
2617 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2618 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2619 GSI_CONTINUE_LINKING);
2620 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2621 NULL_TREE, NULL_TREE);
2622 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2623 e = split_block (gsi_bb (*gsi), cond_stmt);
2624 basic_block bb3 = e->src;
2625 e->flags = EDGE_FALSE_VALUE;
2626 e->probability = profile_probability::very_likely ();
2627 *gsi = gsi_after_labels (e->dest);
2628 t = fold_convert (itype, c);
2629 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2630 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2631 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2632 GSI_CONTINUE_LINKING);
2633 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2634 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2635 t2 = fold_convert (itype, t2);
2636 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2637 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2638 if (fd->loops[i].m1)
2639 {
2640 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2641 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2642 }
2643 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2644 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2645 bb_triang = e->src;
2646 *gsi = gsi_after_labels (e->dest);
2647 remove_edge (e);
2648 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2649 e->probability = profile_probability::very_unlikely ();
2650 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2651 e->probability = profile_probability::very_unlikely ();
2652 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2653 e->probability = profile_probability::very_unlikely ();
2654
2655 basic_block bb4 = create_empty_bb (bb0);
2656 add_bb_to_loop (bb4, bb0->loop_father);
2657 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2658 e->probability = profile_probability::unlikely ();
2659 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2660 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2661 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2662 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2663 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2664 counts[i], counts[i - 1]);
2665 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2666 GSI_CONTINUE_LINKING);
2667 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2668 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2669 t = fold_convert (itype, t);
2670 t2 = fold_convert (itype, t2);
2671 t = fold_build2 (MULT_EXPR, itype, t,
2672 fold_convert (itype, fd->loops[i].step));
2673 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2674 t2 = fold_build2 (MULT_EXPR, itype, t2,
2675 fold_convert (itype, fd->loops[i - 1].step));
2676 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2677 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2678 false, GSI_CONTINUE_LINKING);
2679 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2680 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2681 if (fd->loops[i].m1)
2682 {
2683 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2684 fd->loops[i - 1].v);
2685 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2686 }
2687 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2688 false, GSI_CONTINUE_LINKING);
2689 stmt = gimple_build_assign (fd->loops[i].v, t);
2690 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2691 }
2692 /* Fallback implementation. Evaluate the loops in between
2693 (inclusive) fd->first_nonrect and fd->last_nonrect at
2694 runtime unsing temporaries instead of the original iteration
2695 variables, in the body just bump the counter and compare
2696 with the desired value. */
2697 gimple_stmt_iterator gsi2 = *gsi;
2698 basic_block entry_bb = gsi_bb (gsi2);
2699 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2700 e = split_block (e->dest, (gimple *) NULL);
2701 basic_block dom_bb = NULL;
2702 basic_block cur_bb = e->src;
2703 basic_block next_bb = e->dest;
2704 entry_bb = e->dest;
2705 *gsi = gsi_after_labels (entry_bb);
2706
2707 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2708 tree n1 = NULL_TREE, n2 = NULL_TREE;
2709 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2710
2711 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2712 {
2713 tree itype = TREE_TYPE (fd->loops[j].v);
2714 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2715 && fd->loops[j].m2 == NULL_TREE
2716 && !fd->loops[j].non_rect_referenced);
2717 gsi2 = gsi_after_labels (cur_bb);
2718 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2719 if (fd->loops[j].m1)
2720 {
2721 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2722 n1 = fold_build2 (MULT_EXPR, itype,
2723 vs[j - fd->loops[j].outer], n1);
2724 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2725 }
2726 else if (rect_p)
2727 n1 = build_zero_cst (type);
2728 else
2729 n1 = t;
2730 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2731 true, GSI_SAME_STMT);
2732 if (j < fd->last_nonrect)
2733 {
2734 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2735 expand_omp_build_assign (&gsi2, vs[j], n1);
2736 }
2737 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2738 if (fd->loops[j].m2)
2739 {
2740 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2741 n2 = fold_build2 (MULT_EXPR, itype,
2742 vs[j - fd->loops[j].outer], n2);
2743 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2744 }
2745 else if (rect_p)
2746 n2 = counts[j];
2747 else
2748 n2 = t;
2749 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2750 true, GSI_SAME_STMT);
2751 if (j == fd->last_nonrect)
2752 {
2753 gcond *cond_stmt
2754 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2755 NULL_TREE, NULL_TREE);
2756 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2757 e = split_block (cur_bb, cond_stmt);
2758 e->flags = EDGE_TRUE_VALUE;
2759 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2760 e->probability = profile_probability::likely ().guessed ();
2761 ne->probability = e->probability.invert ();
2762 gsi2 = gsi_after_labels (e->dest);
2763
2764 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2765 ? -1 : 1));
2766 t = fold_build2 (PLUS_EXPR, itype,
2767 fold_convert (itype, fd->loops[j].step), t);
2768 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2769 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2770 tree step = fold_convert (itype, fd->loops[j].step);
2771 if (TYPE_UNSIGNED (itype)
2772 && fd->loops[j].cond_code == GT_EXPR)
2773 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2774 fold_build1 (NEGATE_EXPR, itype, t),
2775 fold_build1 (NEGATE_EXPR, itype, step));
2776 else
2777 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2778 t = fold_convert (type, t);
2779 t = fold_build2 (PLUS_EXPR, type, idx, t);
2780 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2781 true, GSI_SAME_STMT);
2782 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2783 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2784 cond_stmt
2785 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2786 NULL_TREE);
2787 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2788 e = split_block (gsi_bb (gsi2), cond_stmt);
2789 e->flags = EDGE_TRUE_VALUE;
2790 e->probability = profile_probability::likely ().guessed ();
2791 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2792 ne->probability = e->probability.invert ();
2793 gsi2 = gsi_after_labels (e->dest);
2794 expand_omp_build_assign (&gsi2, idx, t);
2795 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2796 break;
2797 }
2798 e = split_block (cur_bb, last_stmt (cur_bb));
2799
2800 basic_block new_cur_bb = create_empty_bb (cur_bb);
2801 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2802
2803 gsi2 = gsi_after_labels (e->dest);
2804 if (rect_p)
2805 t = fold_build2 (PLUS_EXPR, type, vs[j],
2806 build_one_cst (type));
2807 else
2808 {
2809 tree step
2810 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2811 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2812 }
2813 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2814 true, GSI_SAME_STMT);
2815 expand_omp_build_assign (&gsi2, vs[j], t);
2816
2817 edge ne = split_block (e->dest, last_stmt (e->dest));
2818 gsi2 = gsi_after_labels (ne->dest);
2819
2820 gcond *cond_stmt;
2821 if (next_bb == entry_bb)
2822 /* No need to actually check the outermost condition. */
2823 cond_stmt
2824 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2825 boolean_true_node,
2826 NULL_TREE, NULL_TREE);
2827 else
2828 cond_stmt
2829 = gimple_build_cond (rect_p ? LT_EXPR
2830 : fd->loops[j].cond_code,
2831 vs[j], n2, NULL_TREE, NULL_TREE);
2832 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2833 edge e3, e4;
2834 if (next_bb == entry_bb)
2835 {
2836 e3 = find_edge (ne->dest, next_bb);
2837 e3->flags = EDGE_FALSE_VALUE;
2838 dom_bb = ne->dest;
2839 }
2840 else
2841 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2842 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2843 e4->probability = profile_probability::likely ().guessed ();
2844 e3->probability = e4->probability.invert ();
2845 basic_block esrc = e->src;
2846 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2847 cur_bb = new_cur_bb;
2848 basic_block latch_bb = next_bb;
2849 next_bb = e->dest;
2850 remove_edge (e);
2851 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2852 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2853 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2854 }
2855 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2856 {
2857 tree itype = TREE_TYPE (fd->loops[j].v);
2858 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2859 && fd->loops[j].m2 == NULL_TREE
2860 && !fd->loops[j].non_rect_referenced);
2861 if (j == fd->last_nonrect)
2862 {
2863 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2864 t = fold_convert (itype, t);
2865 tree t2
2866 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2867 t = fold_build2 (MULT_EXPR, itype, t, t2);
2868 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2869 }
2870 else if (rect_p)
2871 {
2872 t = fold_convert (itype, vs[j]);
2873 t = fold_build2 (MULT_EXPR, itype, t,
2874 fold_convert (itype, fd->loops[j].step));
2875 if (POINTER_TYPE_P (vtype))
2876 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2877 else
2878 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2879 }
2880 else
2881 t = vs[j];
2882 t = force_gimple_operand_gsi (gsi, t, false,
2883 NULL_TREE, true,
2884 GSI_SAME_STMT);
2885 stmt = gimple_build_assign (fd->loops[j].v, t);
2886 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2887 }
2888 if (gsi_end_p (*gsi))
2889 *gsi = gsi_last_bb (gsi_bb (*gsi));
2890 else
2891 gsi_prev (gsi);
2892 if (bb_triang)
2893 {
2894 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2895 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2896 *gsi = gsi_after_labels (e->dest);
2897 if (!gsi_end_p (*gsi))
2898 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2899 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2900 }
2901 }
2902 else
2903 {
2904 t = fold_convert (itype, t);
2905 t = fold_build2 (MULT_EXPR, itype, t,
2906 fold_convert (itype, fd->loops[i].step));
2907 if (POINTER_TYPE_P (vtype))
2908 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2909 else
2910 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2911 t = force_gimple_operand_gsi (gsi, t,
2912 DECL_P (fd->loops[i].v)
2913 && TREE_ADDRESSABLE (fd->loops[i].v),
2914 NULL_TREE, false,
2915 GSI_CONTINUE_LINKING);
2916 stmt = gimple_build_assign (fd->loops[i].v, t);
2917 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2918 }
2919 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2920 {
2921 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2922 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2923 false, GSI_CONTINUE_LINKING);
2924 stmt = gimple_build_assign (tem, t);
2925 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2926 }
2927 if (i == fd->last_nonrect)
2928 i = fd->first_nonrect;
2929 }
2930 if (fd->non_rect)
2931 for (i = 0; i <= fd->last_nonrect; i++)
2932 if (fd->loops[i].m2)
2933 {
2934 tree itype = TREE_TYPE (fd->loops[i].v);
2935
2936 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2937 t = fold_build2 (MULT_EXPR, itype,
2938 fd->loops[i - fd->loops[i].outer].v, t);
2939 t = fold_build2 (PLUS_EXPR, itype, t,
2940 fold_convert (itype,
2941 unshare_expr (fd->loops[i].n2)));
2942 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2943 t = force_gimple_operand_gsi (gsi, t, false,
2944 NULL_TREE, false,
2945 GSI_CONTINUE_LINKING);
2946 stmt = gimple_build_assign (nonrect_bounds[i], t);
2947 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2948 }
2949 }
2950
2951 /* Helper function for expand_omp_for_*. Generate code like:
2952 L10:
2953 V3 += STEP3;
2954 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2955 L11:
2956 V3 = N31;
2957 V2 += STEP2;
2958 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2959 L12:
2960 V2 = N21;
2961 V1 += STEP1;
2962 goto BODY_BB;
2963 For non-rectangular loops, use temporaries stored in nonrect_bounds
2964 for the upper bounds if M?2 multiplier is present. Given e.g.
2965 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2966 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2967 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2968 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2969 do:
2970 L10:
2971 V4 += STEP4;
2972 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
2973 L11:
2974 V4 = N41 + M41 * V2; // This can be left out if the loop
2975 // refers to the immediate parent loop
2976 V3 += STEP3;
2977 if (V3 cond3 N32) goto BODY_BB; else goto L12;
2978 L12:
2979 V3 = N31;
2980 V2 += STEP2;
2981 if (V2 cond2 N22) goto L120; else goto L13;
2982 L120:
2983 V4 = N41 + M41 * V2;
2984 NONRECT_BOUND4 = N42 + M42 * V2;
2985 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
2986 L13:
2987 V2 = N21;
2988 V1 += STEP1;
2989 goto L120; */
2990
2991 static basic_block
2992 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
2993 basic_block cont_bb, basic_block body_bb)
2994 {
2995 basic_block last_bb, bb, collapse_bb = NULL;
2996 int i;
2997 gimple_stmt_iterator gsi;
2998 edge e;
2999 tree t;
3000 gimple *stmt;
3001
3002 last_bb = cont_bb;
3003 for (i = fd->collapse - 1; i >= 0; i--)
3004 {
3005 tree vtype = TREE_TYPE (fd->loops[i].v);
3006
3007 bb = create_empty_bb (last_bb);
3008 add_bb_to_loop (bb, last_bb->loop_father);
3009 gsi = gsi_start_bb (bb);
3010
3011 if (i < fd->collapse - 1)
3012 {
3013 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3014 e->probability
3015 = profile_probability::guessed_always ().apply_scale (1, 8);
3016
3017 struct omp_for_data_loop *l = &fd->loops[i + 1];
3018 if (l->m1 == NULL_TREE || l->outer != 1)
3019 {
3020 t = l->n1;
3021 if (l->m1)
3022 {
3023 tree t2
3024 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3025 fd->loops[i + 1 - l->outer].v, l->m1);
3026 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3027 }
3028 t = force_gimple_operand_gsi (&gsi, t,
3029 DECL_P (l->v)
3030 && TREE_ADDRESSABLE (l->v),
3031 NULL_TREE, false,
3032 GSI_CONTINUE_LINKING);
3033 stmt = gimple_build_assign (l->v, t);
3034 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3035 }
3036 }
3037 else
3038 collapse_bb = bb;
3039
3040 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3041
3042 if (POINTER_TYPE_P (vtype))
3043 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3044 else
3045 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3046 t = force_gimple_operand_gsi (&gsi, t,
3047 DECL_P (fd->loops[i].v)
3048 && TREE_ADDRESSABLE (fd->loops[i].v),
3049 NULL_TREE, false, GSI_CONTINUE_LINKING);
3050 stmt = gimple_build_assign (fd->loops[i].v, t);
3051 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3052
3053 if (fd->loops[i].non_rect_referenced)
3054 {
3055 basic_block update_bb = NULL, prev_bb = NULL;
3056 for (int j = i + 1; j <= fd->last_nonrect; j++)
3057 if (j - fd->loops[j].outer == i)
3058 {
3059 tree n1, n2;
3060 struct omp_for_data_loop *l = &fd->loops[j];
3061 basic_block this_bb = create_empty_bb (last_bb);
3062 add_bb_to_loop (this_bb, last_bb->loop_father);
3063 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3064 if (prev_bb)
3065 {
3066 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3067 e->probability
3068 = profile_probability::guessed_always ().apply_scale (7,
3069 8);
3070 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3071 }
3072 if (l->m1)
3073 {
3074 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3075 fd->loops[i].v);
3076 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3077 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3078 false,
3079 GSI_CONTINUE_LINKING);
3080 stmt = gimple_build_assign (l->v, n1);
3081 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3082 n1 = l->v;
3083 }
3084 else
3085 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3086 NULL_TREE, false,
3087 GSI_CONTINUE_LINKING);
3088 if (l->m2)
3089 {
3090 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3091 fd->loops[i].v);
3092 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3093 t, unshare_expr (l->n2));
3094 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3095 false,
3096 GSI_CONTINUE_LINKING);
3097 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3098 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3099 n2 = nonrect_bounds[j];
3100 }
3101 else
3102 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3103 true, NULL_TREE, false,
3104 GSI_CONTINUE_LINKING);
3105 gcond *cond_stmt
3106 = gimple_build_cond (l->cond_code, n1, n2,
3107 NULL_TREE, NULL_TREE);
3108 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3109 if (update_bb == NULL)
3110 update_bb = this_bb;
3111 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3112 e->probability
3113 = profile_probability::guessed_always ().apply_scale (1, 8);
3114 if (prev_bb == NULL)
3115 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3116 prev_bb = this_bb;
3117 }
3118 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3119 e->probability
3120 = profile_probability::guessed_always ().apply_scale (7, 8);
3121 body_bb = update_bb;
3122 }
3123
3124 if (i > 0)
3125 {
3126 if (fd->loops[i].m2)
3127 t = nonrect_bounds[i];
3128 else
3129 t = unshare_expr (fd->loops[i].n2);
3130 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3131 false, GSI_CONTINUE_LINKING);
3132 tree v = fd->loops[i].v;
3133 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3134 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3135 false, GSI_CONTINUE_LINKING);
3136 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3137 stmt = gimple_build_cond_empty (t);
3138 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3139 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3140 expand_omp_regimplify_p, NULL, NULL)
3141 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3142 expand_omp_regimplify_p, NULL, NULL))
3143 gimple_regimplify_operands (stmt, &gsi);
3144 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3145 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3146 }
3147 else
3148 make_edge (bb, body_bb, EDGE_FALLTHRU);
3149 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3150 last_bb = bb;
3151 }
3152
3153 return collapse_bb;
3154 }
3155
3156 /* Expand #pragma omp ordered depend(source). */
3157
3158 static void
3159 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3160 tree *counts, location_t loc)
3161 {
3162 enum built_in_function source_ix
3163 = fd->iter_type == long_integer_type_node
3164 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3165 gimple *g
3166 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3167 build_fold_addr_expr (counts[fd->ordered]));
3168 gimple_set_location (g, loc);
3169 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3170 }
3171
3172 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3173
3174 static void
3175 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3176 tree *counts, tree c, location_t loc)
3177 {
3178 auto_vec<tree, 10> args;
3179 enum built_in_function sink_ix
3180 = fd->iter_type == long_integer_type_node
3181 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3182 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3183 int i;
3184 gimple_stmt_iterator gsi2 = *gsi;
3185 bool warned_step = false;
3186
3187 for (i = 0; i < fd->ordered; i++)
3188 {
3189 tree step = NULL_TREE;
3190 off = TREE_PURPOSE (deps);
3191 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3192 {
3193 step = TREE_OPERAND (off, 1);
3194 off = TREE_OPERAND (off, 0);
3195 }
3196 if (!integer_zerop (off))
3197 {
3198 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3199 || fd->loops[i].cond_code == GT_EXPR);
3200 bool forward = fd->loops[i].cond_code == LT_EXPR;
3201 if (step)
3202 {
3203 /* Non-simple Fortran DO loops. If step is variable,
3204 we don't know at compile even the direction, so can't
3205 warn. */
3206 if (TREE_CODE (step) != INTEGER_CST)
3207 break;
3208 forward = tree_int_cst_sgn (step) != -1;
3209 }
3210 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3211 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3212 "waiting for lexically later iteration");
3213 break;
3214 }
3215 deps = TREE_CHAIN (deps);
3216 }
3217 /* If all offsets corresponding to the collapsed loops are zero,
3218 this depend clause can be ignored. FIXME: but there is still a
3219 flush needed. We need to emit one __sync_synchronize () for it
3220 though (perhaps conditionally)? Solve this together with the
3221 conservative dependence folding optimization.
3222 if (i >= fd->collapse)
3223 return; */
3224
3225 deps = OMP_CLAUSE_DECL (c);
3226 gsi_prev (&gsi2);
3227 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3228 edge e2 = split_block_after_labels (e1->dest);
3229
3230 gsi2 = gsi_after_labels (e1->dest);
3231 *gsi = gsi_last_bb (e1->src);
3232 for (i = 0; i < fd->ordered; i++)
3233 {
3234 tree itype = TREE_TYPE (fd->loops[i].v);
3235 tree step = NULL_TREE;
3236 tree orig_off = NULL_TREE;
3237 if (POINTER_TYPE_P (itype))
3238 itype = sizetype;
3239 if (i)
3240 deps = TREE_CHAIN (deps);
3241 off = TREE_PURPOSE (deps);
3242 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3243 {
3244 step = TREE_OPERAND (off, 1);
3245 off = TREE_OPERAND (off, 0);
3246 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3247 && integer_onep (fd->loops[i].step)
3248 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3249 }
3250 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3251 if (step)
3252 {
3253 off = fold_convert_loc (loc, itype, off);
3254 orig_off = off;
3255 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3256 }
3257
3258 if (integer_zerop (off))
3259 t = boolean_true_node;
3260 else
3261 {
3262 tree a;
3263 tree co = fold_convert_loc (loc, itype, off);
3264 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3265 {
3266 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3267 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3268 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3269 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3270 co);
3271 }
3272 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3273 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3274 fd->loops[i].v, co);
3275 else
3276 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3277 fd->loops[i].v, co);
3278 if (step)
3279 {
3280 tree t1, t2;
3281 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3282 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3283 fd->loops[i].n1);
3284 else
3285 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3286 fd->loops[i].n2);
3287 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3288 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3289 fd->loops[i].n2);
3290 else
3291 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3292 fd->loops[i].n1);
3293 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3294 step, build_int_cst (TREE_TYPE (step), 0));
3295 if (TREE_CODE (step) != INTEGER_CST)
3296 {
3297 t1 = unshare_expr (t1);
3298 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3299 false, GSI_CONTINUE_LINKING);
3300 t2 = unshare_expr (t2);
3301 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3302 false, GSI_CONTINUE_LINKING);
3303 }
3304 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3305 t, t2, t1);
3306 }
3307 else if (fd->loops[i].cond_code == LT_EXPR)
3308 {
3309 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3310 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3311 fd->loops[i].n1);
3312 else
3313 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3314 fd->loops[i].n2);
3315 }
3316 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3317 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3318 fd->loops[i].n2);
3319 else
3320 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3321 fd->loops[i].n1);
3322 }
3323 if (cond)
3324 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3325 else
3326 cond = t;
3327
3328 off = fold_convert_loc (loc, itype, off);
3329
3330 if (step
3331 || (fd->loops[i].cond_code == LT_EXPR
3332 ? !integer_onep (fd->loops[i].step)
3333 : !integer_minus_onep (fd->loops[i].step)))
3334 {
3335 if (step == NULL_TREE
3336 && TYPE_UNSIGNED (itype)
3337 && fd->loops[i].cond_code == GT_EXPR)
3338 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3339 fold_build1_loc (loc, NEGATE_EXPR, itype,
3340 s));
3341 else
3342 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3343 orig_off ? orig_off : off, s);
3344 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3345 build_int_cst (itype, 0));
3346 if (integer_zerop (t) && !warned_step)
3347 {
3348 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3349 "refers to iteration never in the iteration "
3350 "space");
3351 warned_step = true;
3352 }
3353 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3354 cond, t);
3355 }
3356
3357 if (i <= fd->collapse - 1 && fd->collapse > 1)
3358 t = fd->loop.v;
3359 else if (counts[i])
3360 t = counts[i];
3361 else
3362 {
3363 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3364 fd->loops[i].v, fd->loops[i].n1);
3365 t = fold_convert_loc (loc, fd->iter_type, t);
3366 }
3367 if (step)
3368 /* We have divided off by step already earlier. */;
3369 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3370 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3371 fold_build1_loc (loc, NEGATE_EXPR, itype,
3372 s));
3373 else
3374 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3375 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3376 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3377 off = fold_convert_loc (loc, fd->iter_type, off);
3378 if (i <= fd->collapse - 1 && fd->collapse > 1)
3379 {
3380 if (i)
3381 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3382 off);
3383 if (i < fd->collapse - 1)
3384 {
3385 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3386 counts[i]);
3387 continue;
3388 }
3389 }
3390 off = unshare_expr (off);
3391 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3392 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3393 true, GSI_SAME_STMT);
3394 args.safe_push (t);
3395 }
3396 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3397 gimple_set_location (g, loc);
3398 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3399
3400 cond = unshare_expr (cond);
3401 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3402 GSI_CONTINUE_LINKING);
3403 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3404 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3405 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3406 e1->probability = e3->probability.invert ();
3407 e1->flags = EDGE_TRUE_VALUE;
3408 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3409
3410 *gsi = gsi_after_labels (e2->dest);
3411 }
3412
3413 /* Expand all #pragma omp ordered depend(source) and
3414 #pragma omp ordered depend(sink:...) constructs in the current
3415 #pragma omp for ordered(n) region. */
3416
3417 static void
3418 expand_omp_ordered_source_sink (struct omp_region *region,
3419 struct omp_for_data *fd, tree *counts,
3420 basic_block cont_bb)
3421 {
3422 struct omp_region *inner;
3423 int i;
3424 for (i = fd->collapse - 1; i < fd->ordered; i++)
3425 if (i == fd->collapse - 1 && fd->collapse > 1)
3426 counts[i] = NULL_TREE;
3427 else if (i >= fd->collapse && !cont_bb)
3428 counts[i] = build_zero_cst (fd->iter_type);
3429 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3430 && integer_onep (fd->loops[i].step))
3431 counts[i] = NULL_TREE;
3432 else
3433 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3434 tree atype
3435 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3436 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3437 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3438
3439 for (inner = region->inner; inner; inner = inner->next)
3440 if (inner->type == GIMPLE_OMP_ORDERED)
3441 {
3442 gomp_ordered *ord_stmt = inner->ord_stmt;
3443 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3444 location_t loc = gimple_location (ord_stmt);
3445 tree c;
3446 for (c = gimple_omp_ordered_clauses (ord_stmt);
3447 c; c = OMP_CLAUSE_CHAIN (c))
3448 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3449 break;
3450 if (c)
3451 expand_omp_ordered_source (&gsi, fd, counts, loc);
3452 for (c = gimple_omp_ordered_clauses (ord_stmt);
3453 c; c = OMP_CLAUSE_CHAIN (c))
3454 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3455 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3456 gsi_remove (&gsi, true);
3457 }
3458 }
3459
3460 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3461 collapsed. */
3462
3463 static basic_block
3464 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3465 basic_block cont_bb, basic_block body_bb,
3466 bool ordered_lastprivate)
3467 {
3468 if (fd->ordered == fd->collapse)
3469 return cont_bb;
3470
3471 if (!cont_bb)
3472 {
3473 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3474 for (int i = fd->collapse; i < fd->ordered; i++)
3475 {
3476 tree type = TREE_TYPE (fd->loops[i].v);
3477 tree n1 = fold_convert (type, fd->loops[i].n1);
3478 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3479 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3480 size_int (i - fd->collapse + 1),
3481 NULL_TREE, NULL_TREE);
3482 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3483 }
3484 return NULL;
3485 }
3486
3487 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3488 {
3489 tree t, type = TREE_TYPE (fd->loops[i].v);
3490 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3491 expand_omp_build_assign (&gsi, fd->loops[i].v,
3492 fold_convert (type, fd->loops[i].n1));
3493 if (counts[i])
3494 expand_omp_build_assign (&gsi, counts[i],
3495 build_zero_cst (fd->iter_type));
3496 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3497 size_int (i - fd->collapse + 1),
3498 NULL_TREE, NULL_TREE);
3499 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3500 if (!gsi_end_p (gsi))
3501 gsi_prev (&gsi);
3502 else
3503 gsi = gsi_last_bb (body_bb);
3504 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3505 basic_block new_body = e1->dest;
3506 if (body_bb == cont_bb)
3507 cont_bb = new_body;
3508 edge e2 = NULL;
3509 basic_block new_header;
3510 if (EDGE_COUNT (cont_bb->preds) > 0)
3511 {
3512 gsi = gsi_last_bb (cont_bb);
3513 if (POINTER_TYPE_P (type))
3514 t = fold_build_pointer_plus (fd->loops[i].v,
3515 fold_convert (sizetype,
3516 fd->loops[i].step));
3517 else
3518 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3519 fold_convert (type, fd->loops[i].step));
3520 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3521 if (counts[i])
3522 {
3523 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3524 build_int_cst (fd->iter_type, 1));
3525 expand_omp_build_assign (&gsi, counts[i], t);
3526 t = counts[i];
3527 }
3528 else
3529 {
3530 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3531 fd->loops[i].v, fd->loops[i].n1);
3532 t = fold_convert (fd->iter_type, t);
3533 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 true, GSI_SAME_STMT);
3535 }
3536 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3537 size_int (i - fd->collapse + 1),
3538 NULL_TREE, NULL_TREE);
3539 expand_omp_build_assign (&gsi, aref, t);
3540 gsi_prev (&gsi);
3541 e2 = split_block (cont_bb, gsi_stmt (gsi));
3542 new_header = e2->dest;
3543 }
3544 else
3545 new_header = cont_bb;
3546 gsi = gsi_after_labels (new_header);
3547 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3548 true, GSI_SAME_STMT);
3549 tree n2
3550 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3551 true, NULL_TREE, true, GSI_SAME_STMT);
3552 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3553 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3554 edge e3 = split_block (new_header, gsi_stmt (gsi));
3555 cont_bb = e3->dest;
3556 remove_edge (e1);
3557 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3558 e3->flags = EDGE_FALSE_VALUE;
3559 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3560 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3561 e1->probability = e3->probability.invert ();
3562
3563 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3564 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3565
3566 if (e2)
3567 {
3568 class loop *loop = alloc_loop ();
3569 loop->header = new_header;
3570 loop->latch = e2->src;
3571 add_loop (loop, body_bb->loop_father);
3572 }
3573 }
3574
3575 /* If there are any lastprivate clauses and it is possible some loops
3576 might have zero iterations, ensure all the decls are initialized,
3577 otherwise we could crash evaluating C++ class iterators with lastprivate
3578 clauses. */
3579 bool need_inits = false;
3580 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3581 if (need_inits)
3582 {
3583 tree type = TREE_TYPE (fd->loops[i].v);
3584 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3585 expand_omp_build_assign (&gsi, fd->loops[i].v,
3586 fold_convert (type, fd->loops[i].n1));
3587 }
3588 else
3589 {
3590 tree type = TREE_TYPE (fd->loops[i].v);
3591 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3592 boolean_type_node,
3593 fold_convert (type, fd->loops[i].n1),
3594 fold_convert (type, fd->loops[i].n2));
3595 if (!integer_onep (this_cond))
3596 need_inits = true;
3597 }
3598
3599 return cont_bb;
3600 }
3601
3602 /* A subroutine of expand_omp_for. Generate code for a parallel
3603 loop with any schedule. Given parameters:
3604
3605 for (V = N1; V cond N2; V += STEP) BODY;
3606
3607 where COND is "<" or ">", we generate pseudocode
3608
3609 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3610 if (more) goto L0; else goto L3;
3611 L0:
3612 V = istart0;
3613 iend = iend0;
3614 L1:
3615 BODY;
3616 V += STEP;
3617 if (V cond iend) goto L1; else goto L2;
3618 L2:
3619 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3620 L3:
3621
3622 If this is a combined omp parallel loop, instead of the call to
3623 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3624 If this is gimple_omp_for_combined_p loop, then instead of assigning
3625 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3626 inner GIMPLE_OMP_FOR and V += STEP; and
3627 if (V cond iend) goto L1; else goto L2; are removed.
3628
3629 For collapsed loops, given parameters:
3630 collapse(3)
3631 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3632 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3633 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3634 BODY;
3635
3636 we generate pseudocode
3637
3638 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3639 if (cond3 is <)
3640 adj = STEP3 - 1;
3641 else
3642 adj = STEP3 + 1;
3643 count3 = (adj + N32 - N31) / STEP3;
3644 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3645 if (cond2 is <)
3646 adj = STEP2 - 1;
3647 else
3648 adj = STEP2 + 1;
3649 count2 = (adj + N22 - N21) / STEP2;
3650 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3651 if (cond1 is <)
3652 adj = STEP1 - 1;
3653 else
3654 adj = STEP1 + 1;
3655 count1 = (adj + N12 - N11) / STEP1;
3656 count = count1 * count2 * count3;
3657 goto Z1;
3658 Z0:
3659 count = 0;
3660 Z1:
3661 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3662 if (more) goto L0; else goto L3;
3663 L0:
3664 V = istart0;
3665 T = V;
3666 V3 = N31 + (T % count3) * STEP3;
3667 T = T / count3;
3668 V2 = N21 + (T % count2) * STEP2;
3669 T = T / count2;
3670 V1 = N11 + T * STEP1;
3671 iend = iend0;
3672 L1:
3673 BODY;
3674 V += 1;
3675 if (V < iend) goto L10; else goto L2;
3676 L10:
3677 V3 += STEP3;
3678 if (V3 cond3 N32) goto L1; else goto L11;
3679 L11:
3680 V3 = N31;
3681 V2 += STEP2;
3682 if (V2 cond2 N22) goto L1; else goto L12;
3683 L12:
3684 V2 = N21;
3685 V1 += STEP1;
3686 goto L1;
3687 L2:
3688 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3689 L3:
3690
3691 */
3692
3693 static void
3694 expand_omp_for_generic (struct omp_region *region,
3695 struct omp_for_data *fd,
3696 enum built_in_function start_fn,
3697 enum built_in_function next_fn,
3698 tree sched_arg,
3699 gimple *inner_stmt)
3700 {
3701 tree type, istart0, iend0, iend;
3702 tree t, vmain, vback, bias = NULL_TREE;
3703 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3704 basic_block l2_bb = NULL, l3_bb = NULL;
3705 gimple_stmt_iterator gsi;
3706 gassign *assign_stmt;
3707 bool in_combined_parallel = is_combined_parallel (region);
3708 bool broken_loop = region->cont == NULL;
3709 edge e, ne;
3710 tree *counts = NULL;
3711 int i;
3712 bool ordered_lastprivate = false;
3713
3714 gcc_assert (!broken_loop || !in_combined_parallel);
3715 gcc_assert (fd->iter_type == long_integer_type_node
3716 || !in_combined_parallel);
3717
3718 entry_bb = region->entry;
3719 cont_bb = region->cont;
3720 collapse_bb = NULL;
3721 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3722 gcc_assert (broken_loop
3723 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3724 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3725 l1_bb = single_succ (l0_bb);
3726 if (!broken_loop)
3727 {
3728 l2_bb = create_empty_bb (cont_bb);
3729 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3730 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3731 == l1_bb));
3732 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3733 }
3734 else
3735 l2_bb = NULL;
3736 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3737 exit_bb = region->exit;
3738
3739 gsi = gsi_last_nondebug_bb (entry_bb);
3740
3741 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3742 if (fd->ordered
3743 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3744 OMP_CLAUSE_LASTPRIVATE))
3745 ordered_lastprivate = false;
3746 tree reductions = NULL_TREE;
3747 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3748 tree memv = NULL_TREE;
3749 if (fd->lastprivate_conditional)
3750 {
3751 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3752 OMP_CLAUSE__CONDTEMP_);
3753 if (fd->have_pointer_condtemp)
3754 condtemp = OMP_CLAUSE_DECL (c);
3755 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3756 cond_var = OMP_CLAUSE_DECL (c);
3757 }
3758 if (sched_arg)
3759 {
3760 if (fd->have_reductemp)
3761 {
3762 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3763 OMP_CLAUSE__REDUCTEMP_);
3764 reductions = OMP_CLAUSE_DECL (c);
3765 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3766 gimple *g = SSA_NAME_DEF_STMT (reductions);
3767 reductions = gimple_assign_rhs1 (g);
3768 OMP_CLAUSE_DECL (c) = reductions;
3769 entry_bb = gimple_bb (g);
3770 edge e = split_block (entry_bb, g);
3771 if (region->entry == entry_bb)
3772 region->entry = e->dest;
3773 gsi = gsi_last_bb (entry_bb);
3774 }
3775 else
3776 reductions = null_pointer_node;
3777 if (fd->have_pointer_condtemp)
3778 {
3779 tree type = TREE_TYPE (condtemp);
3780 memv = create_tmp_var (type);
3781 TREE_ADDRESSABLE (memv) = 1;
3782 unsigned HOST_WIDE_INT sz
3783 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3784 sz *= fd->lastprivate_conditional;
3785 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3786 false);
3787 mem = build_fold_addr_expr (memv);
3788 }
3789 else
3790 mem = null_pointer_node;
3791 }
3792 if (fd->collapse > 1 || fd->ordered)
3793 {
3794 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3795 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3796
3797 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3798 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3799 zero_iter1_bb, first_zero_iter1,
3800 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3801
3802 if (zero_iter1_bb)
3803 {
3804 /* Some counts[i] vars might be uninitialized if
3805 some loop has zero iterations. But the body shouldn't
3806 be executed in that case, so just avoid uninit warnings. */
3807 for (i = first_zero_iter1;
3808 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3809 if (SSA_VAR_P (counts[i]))
3810 TREE_NO_WARNING (counts[i]) = 1;
3811 gsi_prev (&gsi);
3812 e = split_block (entry_bb, gsi_stmt (gsi));
3813 entry_bb = e->dest;
3814 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3815 gsi = gsi_last_nondebug_bb (entry_bb);
3816 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3817 get_immediate_dominator (CDI_DOMINATORS,
3818 zero_iter1_bb));
3819 }
3820 if (zero_iter2_bb)
3821 {
3822 /* Some counts[i] vars might be uninitialized if
3823 some loop has zero iterations. But the body shouldn't
3824 be executed in that case, so just avoid uninit warnings. */
3825 for (i = first_zero_iter2; i < fd->ordered; i++)
3826 if (SSA_VAR_P (counts[i]))
3827 TREE_NO_WARNING (counts[i]) = 1;
3828 if (zero_iter1_bb)
3829 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3830 else
3831 {
3832 gsi_prev (&gsi);
3833 e = split_block (entry_bb, gsi_stmt (gsi));
3834 entry_bb = e->dest;
3835 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3836 gsi = gsi_last_nondebug_bb (entry_bb);
3837 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3838 get_immediate_dominator
3839 (CDI_DOMINATORS, zero_iter2_bb));
3840 }
3841 }
3842 if (fd->collapse == 1)
3843 {
3844 counts[0] = fd->loop.n2;
3845 fd->loop = fd->loops[0];
3846 }
3847 }
3848
3849 type = TREE_TYPE (fd->loop.v);
3850 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3851 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3852 TREE_ADDRESSABLE (istart0) = 1;
3853 TREE_ADDRESSABLE (iend0) = 1;
3854
3855 /* See if we need to bias by LLONG_MIN. */
3856 if (fd->iter_type == long_long_unsigned_type_node
3857 && TREE_CODE (type) == INTEGER_TYPE
3858 && !TYPE_UNSIGNED (type)
3859 && fd->ordered == 0)
3860 {
3861 tree n1, n2;
3862
3863 if (fd->loop.cond_code == LT_EXPR)
3864 {
3865 n1 = fd->loop.n1;
3866 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3867 }
3868 else
3869 {
3870 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3871 n2 = fd->loop.n1;
3872 }
3873 if (TREE_CODE (n1) != INTEGER_CST
3874 || TREE_CODE (n2) != INTEGER_CST
3875 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3876 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3877 }
3878
3879 gimple_stmt_iterator gsif = gsi;
3880 gsi_prev (&gsif);
3881
3882 tree arr = NULL_TREE;
3883 if (in_combined_parallel)
3884 {
3885 gcc_assert (fd->ordered == 0);
3886 /* In a combined parallel loop, emit a call to
3887 GOMP_loop_foo_next. */
3888 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3889 build_fold_addr_expr (istart0),
3890 build_fold_addr_expr (iend0));
3891 }
3892 else
3893 {
3894 tree t0, t1, t2, t3, t4;
3895 /* If this is not a combined parallel loop, emit a call to
3896 GOMP_loop_foo_start in ENTRY_BB. */
3897 t4 = build_fold_addr_expr (iend0);
3898 t3 = build_fold_addr_expr (istart0);
3899 if (fd->ordered)
3900 {
3901 t0 = build_int_cst (unsigned_type_node,
3902 fd->ordered - fd->collapse + 1);
3903 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3904 fd->ordered
3905 - fd->collapse + 1),
3906 ".omp_counts");
3907 DECL_NAMELESS (arr) = 1;
3908 TREE_ADDRESSABLE (arr) = 1;
3909 TREE_STATIC (arr) = 1;
3910 vec<constructor_elt, va_gc> *v;
3911 vec_alloc (v, fd->ordered - fd->collapse + 1);
3912 int idx;
3913
3914 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3915 {
3916 tree c;
3917 if (idx == 0 && fd->collapse > 1)
3918 c = fd->loop.n2;
3919 else
3920 c = counts[idx + fd->collapse - 1];
3921 tree purpose = size_int (idx);
3922 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3923 if (TREE_CODE (c) != INTEGER_CST)
3924 TREE_STATIC (arr) = 0;
3925 }
3926
3927 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3928 if (!TREE_STATIC (arr))
3929 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3930 void_type_node, arr),
3931 true, NULL_TREE, true, GSI_SAME_STMT);
3932 t1 = build_fold_addr_expr (arr);
3933 t2 = NULL_TREE;
3934 }
3935 else
3936 {
3937 t2 = fold_convert (fd->iter_type, fd->loop.step);
3938 t1 = fd->loop.n2;
3939 t0 = fd->loop.n1;
3940 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3941 {
3942 tree innerc
3943 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3944 OMP_CLAUSE__LOOPTEMP_);
3945 gcc_assert (innerc);
3946 t0 = OMP_CLAUSE_DECL (innerc);
3947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3948 OMP_CLAUSE__LOOPTEMP_);
3949 gcc_assert (innerc);
3950 t1 = OMP_CLAUSE_DECL (innerc);
3951 }
3952 if (POINTER_TYPE_P (TREE_TYPE (t0))
3953 && TYPE_PRECISION (TREE_TYPE (t0))
3954 != TYPE_PRECISION (fd->iter_type))
3955 {
3956 /* Avoid casting pointers to integer of a different size. */
3957 tree itype = signed_type_for (type);
3958 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3959 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3960 }
3961 else
3962 {
3963 t1 = fold_convert (fd->iter_type, t1);
3964 t0 = fold_convert (fd->iter_type, t0);
3965 }
3966 if (bias)
3967 {
3968 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3969 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3970 }
3971 }
3972 if (fd->iter_type == long_integer_type_node || fd->ordered)
3973 {
3974 if (fd->chunk_size)
3975 {
3976 t = fold_convert (fd->iter_type, fd->chunk_size);
3977 t = omp_adjust_chunk_size (t, fd->simd_schedule);
3978 if (sched_arg)
3979 {
3980 if (fd->ordered)
3981 t = build_call_expr (builtin_decl_explicit (start_fn),
3982 8, t0, t1, sched_arg, t, t3, t4,
3983 reductions, mem);
3984 else
3985 t = build_call_expr (builtin_decl_explicit (start_fn),
3986 9, t0, t1, t2, sched_arg, t, t3, t4,
3987 reductions, mem);
3988 }
3989 else if (fd->ordered)
3990 t = build_call_expr (builtin_decl_explicit (start_fn),
3991 5, t0, t1, t, t3, t4);
3992 else
3993 t = build_call_expr (builtin_decl_explicit (start_fn),
3994 6, t0, t1, t2, t, t3, t4);
3995 }
3996 else if (fd->ordered)
3997 t = build_call_expr (builtin_decl_explicit (start_fn),
3998 4, t0, t1, t3, t4);
3999 else
4000 t = build_call_expr (builtin_decl_explicit (start_fn),
4001 5, t0, t1, t2, t3, t4);
4002 }
4003 else
4004 {
4005 tree t5;
4006 tree c_bool_type;
4007 tree bfn_decl;
4008
4009 /* The GOMP_loop_ull_*start functions have additional boolean
4010 argument, true for < loops and false for > loops.
4011 In Fortran, the C bool type can be different from
4012 boolean_type_node. */
4013 bfn_decl = builtin_decl_explicit (start_fn);
4014 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4015 t5 = build_int_cst (c_bool_type,
4016 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4017 if (fd->chunk_size)
4018 {
4019 tree bfn_decl = builtin_decl_explicit (start_fn);
4020 t = fold_convert (fd->iter_type, fd->chunk_size);
4021 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4022 if (sched_arg)
4023 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4024 t, t3, t4, reductions, mem);
4025 else
4026 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4027 }
4028 else
4029 t = build_call_expr (builtin_decl_explicit (start_fn),
4030 6, t5, t0, t1, t2, t3, t4);
4031 }
4032 }
4033 if (TREE_TYPE (t) != boolean_type_node)
4034 t = fold_build2 (NE_EXPR, boolean_type_node,
4035 t, build_int_cst (TREE_TYPE (t), 0));
4036 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4037 true, GSI_SAME_STMT);
4038 if (arr && !TREE_STATIC (arr))
4039 {
4040 tree clobber = build_clobber (TREE_TYPE (arr));
4041 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4042 GSI_SAME_STMT);
4043 }
4044 if (fd->have_pointer_condtemp)
4045 expand_omp_build_assign (&gsi, condtemp, memv, false);
4046 if (fd->have_reductemp)
4047 {
4048 gimple *g = gsi_stmt (gsi);
4049 gsi_remove (&gsi, true);
4050 release_ssa_name (gimple_assign_lhs (g));
4051
4052 entry_bb = region->entry;
4053 gsi = gsi_last_nondebug_bb (entry_bb);
4054
4055 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4056 }
4057 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4058
4059 /* Remove the GIMPLE_OMP_FOR statement. */
4060 gsi_remove (&gsi, true);
4061
4062 if (gsi_end_p (gsif))
4063 gsif = gsi_after_labels (gsi_bb (gsif));
4064 gsi_next (&gsif);
4065
4066 /* Iteration setup for sequential loop goes in L0_BB. */
4067 tree startvar = fd->loop.v;
4068 tree endvar = NULL_TREE;
4069
4070 if (gimple_omp_for_combined_p (fd->for_stmt))
4071 {
4072 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4073 && gimple_omp_for_kind (inner_stmt)
4074 == GF_OMP_FOR_KIND_SIMD);
4075 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4076 OMP_CLAUSE__LOOPTEMP_);
4077 gcc_assert (innerc);
4078 startvar = OMP_CLAUSE_DECL (innerc);
4079 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4080 OMP_CLAUSE__LOOPTEMP_);
4081 gcc_assert (innerc);
4082 endvar = OMP_CLAUSE_DECL (innerc);
4083 }
4084
4085 gsi = gsi_start_bb (l0_bb);
4086 t = istart0;
4087 if (fd->ordered && fd->collapse == 1)
4088 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4089 fold_convert (fd->iter_type, fd->loop.step));
4090 else if (bias)
4091 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4092 if (fd->ordered && fd->collapse == 1)
4093 {
4094 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4095 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4096 fd->loop.n1, fold_convert (sizetype, t));
4097 else
4098 {
4099 t = fold_convert (TREE_TYPE (startvar), t);
4100 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4101 fd->loop.n1, t);
4102 }
4103 }
4104 else
4105 {
4106 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4107 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4108 t = fold_convert (TREE_TYPE (startvar), t);
4109 }
4110 t = force_gimple_operand_gsi (&gsi, t,
4111 DECL_P (startvar)
4112 && TREE_ADDRESSABLE (startvar),
4113 NULL_TREE, false, GSI_CONTINUE_LINKING);
4114 assign_stmt = gimple_build_assign (startvar, t);
4115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4116 if (cond_var)
4117 {
4118 tree itype = TREE_TYPE (cond_var);
4119 /* For lastprivate(conditional:) itervar, we need some iteration
4120 counter that starts at unsigned non-zero and increases.
4121 Prefer as few IVs as possible, so if we can use startvar
4122 itself, use that, or startvar + constant (those would be
4123 incremented with step), and as last resort use the s0 + 1
4124 incremented by 1. */
4125 if ((fd->ordered && fd->collapse == 1)
4126 || bias
4127 || POINTER_TYPE_P (type)
4128 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4129 || fd->loop.cond_code != LT_EXPR)
4130 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4131 build_int_cst (itype, 1));
4132 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4133 t = fold_convert (itype, t);
4134 else
4135 {
4136 tree c = fold_convert (itype, fd->loop.n1);
4137 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4138 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4139 }
4140 t = force_gimple_operand_gsi (&gsi, t, false,
4141 NULL_TREE, false, GSI_CONTINUE_LINKING);
4142 assign_stmt = gimple_build_assign (cond_var, t);
4143 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4144 }
4145
4146 t = iend0;
4147 if (fd->ordered && fd->collapse == 1)
4148 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4149 fold_convert (fd->iter_type, fd->loop.step));
4150 else if (bias)
4151 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4152 if (fd->ordered && fd->collapse == 1)
4153 {
4154 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4155 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4156 fd->loop.n1, fold_convert (sizetype, t));
4157 else
4158 {
4159 t = fold_convert (TREE_TYPE (startvar), t);
4160 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4161 fd->loop.n1, t);
4162 }
4163 }
4164 else
4165 {
4166 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4167 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4168 t = fold_convert (TREE_TYPE (startvar), t);
4169 }
4170 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4171 false, GSI_CONTINUE_LINKING);
4172 if (endvar)
4173 {
4174 assign_stmt = gimple_build_assign (endvar, iend);
4175 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4176 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4177 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4178 else
4179 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4181 }
4182 /* Handle linear clause adjustments. */
4183 tree itercnt = NULL_TREE;
4184 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4185 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4186 c; c = OMP_CLAUSE_CHAIN (c))
4187 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4188 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4189 {
4190 tree d = OMP_CLAUSE_DECL (c);
4191 bool is_ref = omp_is_reference (d);
4192 tree t = d, a, dest;
4193 if (is_ref)
4194 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4195 tree type = TREE_TYPE (t);
4196 if (POINTER_TYPE_P (type))
4197 type = sizetype;
4198 dest = unshare_expr (t);
4199 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4200 expand_omp_build_assign (&gsif, v, t);
4201 if (itercnt == NULL_TREE)
4202 {
4203 itercnt = startvar;
4204 tree n1 = fd->loop.n1;
4205 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4206 {
4207 itercnt
4208 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4209 itercnt);
4210 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4211 }
4212 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4213 itercnt, n1);
4214 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4215 itercnt, fd->loop.step);
4216 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4217 NULL_TREE, false,
4218 GSI_CONTINUE_LINKING);
4219 }
4220 a = fold_build2 (MULT_EXPR, type,
4221 fold_convert (type, itercnt),
4222 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4223 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4224 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4225 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4226 false, GSI_CONTINUE_LINKING);
4227 assign_stmt = gimple_build_assign (dest, t);
4228 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4229 }
4230 if (fd->collapse > 1)
4231 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4232
4233 if (fd->ordered)
4234 {
4235 /* Until now, counts array contained number of iterations or
4236 variable containing it for ith loop. From now on, we need
4237 those counts only for collapsed loops, and only for the 2nd
4238 till the last collapsed one. Move those one element earlier,
4239 we'll use counts[fd->collapse - 1] for the first source/sink
4240 iteration counter and so on and counts[fd->ordered]
4241 as the array holding the current counter values for
4242 depend(source). */
4243 if (fd->collapse > 1)
4244 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4245 if (broken_loop)
4246 {
4247 int i;
4248 for (i = fd->collapse; i < fd->ordered; i++)
4249 {
4250 tree type = TREE_TYPE (fd->loops[i].v);
4251 tree this_cond
4252 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4253 fold_convert (type, fd->loops[i].n1),
4254 fold_convert (type, fd->loops[i].n2));
4255 if (!integer_onep (this_cond))
4256 break;
4257 }
4258 if (i < fd->ordered)
4259 {
4260 cont_bb
4261 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4262 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4263 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4264 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4265 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4266 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4267 make_edge (cont_bb, l1_bb, 0);
4268 l2_bb = create_empty_bb (cont_bb);
4269 broken_loop = false;
4270 }
4271 }
4272 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4273 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4274 ordered_lastprivate);
4275 if (counts[fd->collapse - 1])
4276 {
4277 gcc_assert (fd->collapse == 1);
4278 gsi = gsi_last_bb (l0_bb);
4279 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4280 istart0, true);
4281 gsi = gsi_last_bb (cont_bb);
4282 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4283 build_int_cst (fd->iter_type, 1));
4284 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4285 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4286 size_zero_node, NULL_TREE, NULL_TREE);
4287 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4288 t = counts[fd->collapse - 1];
4289 }
4290 else if (fd->collapse > 1)
4291 t = fd->loop.v;
4292 else
4293 {
4294 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4295 fd->loops[0].v, fd->loops[0].n1);
4296 t = fold_convert (fd->iter_type, t);
4297 }
4298 gsi = gsi_last_bb (l0_bb);
4299 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4300 size_zero_node, NULL_TREE, NULL_TREE);
4301 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 expand_omp_build_assign (&gsi, aref, t, true);
4304 }
4305
4306 if (!broken_loop)
4307 {
4308 /* Code to control the increment and predicate for the sequential
4309 loop goes in the CONT_BB. */
4310 gsi = gsi_last_nondebug_bb (cont_bb);
4311 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4312 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4313 vmain = gimple_omp_continue_control_use (cont_stmt);
4314 vback = gimple_omp_continue_control_def (cont_stmt);
4315
4316 if (cond_var)
4317 {
4318 tree itype = TREE_TYPE (cond_var);
4319 tree t2;
4320 if ((fd->ordered && fd->collapse == 1)
4321 || bias
4322 || POINTER_TYPE_P (type)
4323 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4324 || fd->loop.cond_code != LT_EXPR)
4325 t2 = build_int_cst (itype, 1);
4326 else
4327 t2 = fold_convert (itype, fd->loop.step);
4328 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4329 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4330 NULL_TREE, true, GSI_SAME_STMT);
4331 assign_stmt = gimple_build_assign (cond_var, t2);
4332 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4333 }
4334
4335 if (!gimple_omp_for_combined_p (fd->for_stmt))
4336 {
4337 if (POINTER_TYPE_P (type))
4338 t = fold_build_pointer_plus (vmain, fd->loop.step);
4339 else
4340 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4341 t = force_gimple_operand_gsi (&gsi, t,
4342 DECL_P (vback)
4343 && TREE_ADDRESSABLE (vback),
4344 NULL_TREE, true, GSI_SAME_STMT);
4345 assign_stmt = gimple_build_assign (vback, t);
4346 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4347
4348 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4349 {
4350 tree tem;
4351 if (fd->collapse > 1)
4352 tem = fd->loop.v;
4353 else
4354 {
4355 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4356 fd->loops[0].v, fd->loops[0].n1);
4357 tem = fold_convert (fd->iter_type, tem);
4358 }
4359 tree aref = build4 (ARRAY_REF, fd->iter_type,
4360 counts[fd->ordered], size_zero_node,
4361 NULL_TREE, NULL_TREE);
4362 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4363 true, GSI_SAME_STMT);
4364 expand_omp_build_assign (&gsi, aref, tem);
4365 }
4366
4367 t = build2 (fd->loop.cond_code, boolean_type_node,
4368 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4369 iend);
4370 gcond *cond_stmt = gimple_build_cond_empty (t);
4371 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4372 }
4373
4374 /* Remove GIMPLE_OMP_CONTINUE. */
4375 gsi_remove (&gsi, true);
4376
4377 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4378 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4379
4380 /* Emit code to get the next parallel iteration in L2_BB. */
4381 gsi = gsi_start_bb (l2_bb);
4382
4383 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4384 build_fold_addr_expr (istart0),
4385 build_fold_addr_expr (iend0));
4386 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4387 false, GSI_CONTINUE_LINKING);
4388 if (TREE_TYPE (t) != boolean_type_node)
4389 t = fold_build2 (NE_EXPR, boolean_type_node,
4390 t, build_int_cst (TREE_TYPE (t), 0));
4391 gcond *cond_stmt = gimple_build_cond_empty (t);
4392 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4393 }
4394
4395 /* Add the loop cleanup function. */
4396 gsi = gsi_last_nondebug_bb (exit_bb);
4397 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4398 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4399 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4400 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4401 else
4402 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4403 gcall *call_stmt = gimple_build_call (t, 0);
4404 if (fd->ordered)
4405 {
4406 tree arr = counts[fd->ordered];
4407 tree clobber = build_clobber (TREE_TYPE (arr));
4408 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4409 GSI_SAME_STMT);
4410 }
4411 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4412 {
4413 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4414 if (fd->have_reductemp)
4415 {
4416 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4417 gimple_call_lhs (call_stmt));
4418 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4419 }
4420 }
4421 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4422 gsi_remove (&gsi, true);
4423
4424 /* Connect the new blocks. */
4425 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4426 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4427
4428 if (!broken_loop)
4429 {
4430 gimple_seq phis;
4431
4432 e = find_edge (cont_bb, l3_bb);
4433 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4434
4435 phis = phi_nodes (l3_bb);
4436 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4437 {
4438 gimple *phi = gsi_stmt (gsi);
4439 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4440 PHI_ARG_DEF_FROM_EDGE (phi, e));
4441 }
4442 remove_edge (e);
4443
4444 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4445 e = find_edge (cont_bb, l1_bb);
4446 if (e == NULL)
4447 {
4448 e = BRANCH_EDGE (cont_bb);
4449 gcc_assert (single_succ (e->dest) == l1_bb);
4450 }
4451 if (gimple_omp_for_combined_p (fd->for_stmt))
4452 {
4453 remove_edge (e);
4454 e = NULL;
4455 }
4456 else if (fd->collapse > 1)
4457 {
4458 remove_edge (e);
4459 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4460 }
4461 else
4462 e->flags = EDGE_TRUE_VALUE;
4463 if (e)
4464 {
4465 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4466 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4467 }
4468 else
4469 {
4470 e = find_edge (cont_bb, l2_bb);
4471 e->flags = EDGE_FALLTHRU;
4472 }
4473 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4474
4475 if (gimple_in_ssa_p (cfun))
4476 {
4477 /* Add phis to the outer loop that connect to the phis in the inner,
4478 original loop, and move the loop entry value of the inner phi to
4479 the loop entry value of the outer phi. */
4480 gphi_iterator psi;
4481 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4482 {
4483 location_t locus;
4484 gphi *nphi;
4485 gphi *exit_phi = psi.phi ();
4486
4487 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4488 continue;
4489
4490 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4491 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4492
4493 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4494 edge latch_to_l1 = find_edge (latch, l1_bb);
4495 gphi *inner_phi
4496 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4497
4498 tree t = gimple_phi_result (exit_phi);
4499 tree new_res = copy_ssa_name (t, NULL);
4500 nphi = create_phi_node (new_res, l0_bb);
4501
4502 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4503 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4504 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4505 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4506 add_phi_arg (nphi, t, entry_to_l0, locus);
4507
4508 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4509 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4510
4511 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4512 }
4513 }
4514
4515 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4516 recompute_dominator (CDI_DOMINATORS, l2_bb));
4517 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4518 recompute_dominator (CDI_DOMINATORS, l3_bb));
4519 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4520 recompute_dominator (CDI_DOMINATORS, l0_bb));
4521 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4522 recompute_dominator (CDI_DOMINATORS, l1_bb));
4523
4524 /* We enter expand_omp_for_generic with a loop. This original loop may
4525 have its own loop struct, or it may be part of an outer loop struct
4526 (which may be the fake loop). */
4527 class loop *outer_loop = entry_bb->loop_father;
4528 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4529
4530 add_bb_to_loop (l2_bb, outer_loop);
4531
4532 /* We've added a new loop around the original loop. Allocate the
4533 corresponding loop struct. */
4534 class loop *new_loop = alloc_loop ();
4535 new_loop->header = l0_bb;
4536 new_loop->latch = l2_bb;
4537 add_loop (new_loop, outer_loop);
4538
4539 /* Allocate a loop structure for the original loop unless we already
4540 had one. */
4541 if (!orig_loop_has_loop_struct
4542 && !gimple_omp_for_combined_p (fd->for_stmt))
4543 {
4544 class loop *orig_loop = alloc_loop ();
4545 orig_loop->header = l1_bb;
4546 /* The loop may have multiple latches. */
4547 add_loop (orig_loop, new_loop);
4548 }
4549 }
4550 }
4551
4552 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4553 compute needed allocation size. If !ALLOC of team allocations,
4554 if ALLOC of thread allocation. SZ is the initial needed size for
4555 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4556 CNT number of elements of each array, for !ALLOC this is
4557 omp_get_num_threads (), for ALLOC number of iterations handled by the
4558 current thread. If PTR is non-NULL, it is the start of the allocation
4559 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4560 clauses pointers to the corresponding arrays. */
4561
4562 static tree
4563 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4564 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4565 gimple_stmt_iterator *gsi, bool alloc)
4566 {
4567 tree eltsz = NULL_TREE;
4568 unsigned HOST_WIDE_INT preval = 0;
4569 if (ptr && sz)
4570 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4571 ptr, size_int (sz));
4572 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4573 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4574 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4575 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4576 {
4577 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4578 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4579 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4580 {
4581 unsigned HOST_WIDE_INT szl
4582 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4583 szl = least_bit_hwi (szl);
4584 if (szl)
4585 al = MIN (al, szl);
4586 }
4587 if (ptr == NULL_TREE)
4588 {
4589 if (eltsz == NULL_TREE)
4590 eltsz = TYPE_SIZE_UNIT (pointee_type);
4591 else
4592 eltsz = size_binop (PLUS_EXPR, eltsz,
4593 TYPE_SIZE_UNIT (pointee_type));
4594 }
4595 if (preval == 0 && al <= alloc_align)
4596 {
4597 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4598 sz += diff;
4599 if (diff && ptr)
4600 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4601 ptr, size_int (diff));
4602 }
4603 else if (al > preval)
4604 {
4605 if (ptr)
4606 {
4607 ptr = fold_convert (pointer_sized_int_node, ptr);
4608 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4609 build_int_cst (pointer_sized_int_node,
4610 al - 1));
4611 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4612 build_int_cst (pointer_sized_int_node,
4613 -(HOST_WIDE_INT) al));
4614 ptr = fold_convert (ptr_type_node, ptr);
4615 }
4616 else
4617 sz += al - 1;
4618 }
4619 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4620 preval = al;
4621 else
4622 preval = 1;
4623 if (ptr)
4624 {
4625 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4626 ptr = OMP_CLAUSE_DECL (c);
4627 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4628 size_binop (MULT_EXPR, cnt,
4629 TYPE_SIZE_UNIT (pointee_type)));
4630 }
4631 }
4632
4633 if (ptr == NULL_TREE)
4634 {
4635 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4636 if (sz)
4637 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4638 return eltsz;
4639 }
4640 else
4641 return ptr;
4642 }
4643
4644 /* A subroutine of expand_omp_for. Generate code for a parallel
4645 loop with static schedule and no specified chunk size. Given
4646 parameters:
4647
4648 for (V = N1; V cond N2; V += STEP) BODY;
4649
4650 where COND is "<" or ">", we generate pseudocode
4651
4652 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4653 if (cond is <)
4654 adj = STEP - 1;
4655 else
4656 adj = STEP + 1;
4657 if ((__typeof (V)) -1 > 0 && cond is >)
4658 n = -(adj + N2 - N1) / -STEP;
4659 else
4660 n = (adj + N2 - N1) / STEP;
4661 q = n / nthreads;
4662 tt = n % nthreads;
4663 if (threadid < tt) goto L3; else goto L4;
4664 L3:
4665 tt = 0;
4666 q = q + 1;
4667 L4:
4668 s0 = q * threadid + tt;
4669 e0 = s0 + q;
4670 V = s0 * STEP + N1;
4671 if (s0 >= e0) goto L2; else goto L0;
4672 L0:
4673 e = e0 * STEP + N1;
4674 L1:
4675 BODY;
4676 V += STEP;
4677 if (V cond e) goto L1;
4678 L2:
4679 */
4680
4681 static void
4682 expand_omp_for_static_nochunk (struct omp_region *region,
4683 struct omp_for_data *fd,
4684 gimple *inner_stmt)
4685 {
4686 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4687 tree type, itype, vmain, vback;
4688 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4689 basic_block body_bb, cont_bb, collapse_bb = NULL;
4690 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4691 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4692 gimple_stmt_iterator gsi, gsip;
4693 edge ep;
4694 bool broken_loop = region->cont == NULL;
4695 tree *counts = NULL;
4696 tree n1, n2, step;
4697 tree reductions = NULL_TREE;
4698 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4699
4700 itype = type = TREE_TYPE (fd->loop.v);
4701 if (POINTER_TYPE_P (type))
4702 itype = signed_type_for (type);
4703
4704 entry_bb = region->entry;
4705 cont_bb = region->cont;
4706 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4707 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4708 gcc_assert (broken_loop
4709 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4710 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4711 body_bb = single_succ (seq_start_bb);
4712 if (!broken_loop)
4713 {
4714 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4715 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4716 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4717 }
4718 exit_bb = region->exit;
4719
4720 /* Iteration space partitioning goes in ENTRY_BB. */
4721 gsi = gsi_last_nondebug_bb (entry_bb);
4722 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4723 gsip = gsi;
4724 gsi_prev (&gsip);
4725
4726 if (fd->collapse > 1)
4727 {
4728 int first_zero_iter = -1, dummy = -1;
4729 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4730
4731 counts = XALLOCAVEC (tree, fd->collapse);
4732 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4733 fin_bb, first_zero_iter,
4734 dummy_bb, dummy, l2_dom_bb);
4735 t = NULL_TREE;
4736 }
4737 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4738 t = integer_one_node;
4739 else
4740 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4741 fold_convert (type, fd->loop.n1),
4742 fold_convert (type, fd->loop.n2));
4743 if (fd->collapse == 1
4744 && TYPE_UNSIGNED (type)
4745 && (t == NULL_TREE || !integer_onep (t)))
4746 {
4747 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4748 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4749 true, GSI_SAME_STMT);
4750 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4751 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4752 true, GSI_SAME_STMT);
4753 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4754 NULL_TREE, NULL_TREE);
4755 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4756 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4757 expand_omp_regimplify_p, NULL, NULL)
4758 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4759 expand_omp_regimplify_p, NULL, NULL))
4760 {
4761 gsi = gsi_for_stmt (cond_stmt);
4762 gimple_regimplify_operands (cond_stmt, &gsi);
4763 }
4764 ep = split_block (entry_bb, cond_stmt);
4765 ep->flags = EDGE_TRUE_VALUE;
4766 entry_bb = ep->dest;
4767 ep->probability = profile_probability::very_likely ();
4768 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4769 ep->probability = profile_probability::very_unlikely ();
4770 if (gimple_in_ssa_p (cfun))
4771 {
4772 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4773 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4774 !gsi_end_p (gpi); gsi_next (&gpi))
4775 {
4776 gphi *phi = gpi.phi ();
4777 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4778 ep, UNKNOWN_LOCATION);
4779 }
4780 }
4781 gsi = gsi_last_bb (entry_bb);
4782 }
4783
4784 if (fd->lastprivate_conditional)
4785 {
4786 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4787 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4788 if (fd->have_pointer_condtemp)
4789 condtemp = OMP_CLAUSE_DECL (c);
4790 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4791 cond_var = OMP_CLAUSE_DECL (c);
4792 }
4793 if (fd->have_reductemp
4794 /* For scan, we don't want to reinitialize condtemp before the
4795 second loop. */
4796 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4797 || fd->have_nonctrl_scantemp)
4798 {
4799 tree t1 = build_int_cst (long_integer_type_node, 0);
4800 tree t2 = build_int_cst (long_integer_type_node, 1);
4801 tree t3 = build_int_cstu (long_integer_type_node,
4802 (HOST_WIDE_INT_1U << 31) + 1);
4803 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4804 gimple_stmt_iterator gsi2 = gsi_none ();
4805 gimple *g = NULL;
4806 tree mem = null_pointer_node, memv = NULL_TREE;
4807 unsigned HOST_WIDE_INT condtemp_sz = 0;
4808 unsigned HOST_WIDE_INT alloc_align = 0;
4809 if (fd->have_reductemp)
4810 {
4811 gcc_assert (!fd->have_nonctrl_scantemp);
4812 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4813 reductions = OMP_CLAUSE_DECL (c);
4814 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4815 g = SSA_NAME_DEF_STMT (reductions);
4816 reductions = gimple_assign_rhs1 (g);
4817 OMP_CLAUSE_DECL (c) = reductions;
4818 gsi2 = gsi_for_stmt (g);
4819 }
4820 else
4821 {
4822 if (gsi_end_p (gsip))
4823 gsi2 = gsi_after_labels (region->entry);
4824 else
4825 gsi2 = gsip;
4826 reductions = null_pointer_node;
4827 }
4828 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4829 {
4830 tree type;
4831 if (fd->have_pointer_condtemp)
4832 type = TREE_TYPE (condtemp);
4833 else
4834 type = ptr_type_node;
4835 memv = create_tmp_var (type);
4836 TREE_ADDRESSABLE (memv) = 1;
4837 unsigned HOST_WIDE_INT sz = 0;
4838 tree size = NULL_TREE;
4839 if (fd->have_pointer_condtemp)
4840 {
4841 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4842 sz *= fd->lastprivate_conditional;
4843 condtemp_sz = sz;
4844 }
4845 if (fd->have_nonctrl_scantemp)
4846 {
4847 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4848 gimple *g = gimple_build_call (nthreads, 0);
4849 nthreads = create_tmp_var (integer_type_node);
4850 gimple_call_set_lhs (g, nthreads);
4851 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4852 nthreads = fold_convert (sizetype, nthreads);
4853 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4854 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4855 alloc_align, nthreads, NULL,
4856 false);
4857 size = fold_convert (type, size);
4858 }
4859 else
4860 size = build_int_cst (type, sz);
4861 expand_omp_build_assign (&gsi2, memv, size, false);
4862 mem = build_fold_addr_expr (memv);
4863 }
4864 tree t
4865 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4866 9, t1, t2, t2, t3, t1, null_pointer_node,
4867 null_pointer_node, reductions, mem);
4868 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4869 true, GSI_SAME_STMT);
4870 if (fd->have_pointer_condtemp)
4871 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4872 if (fd->have_nonctrl_scantemp)
4873 {
4874 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4875 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4876 alloc_align, nthreads, &gsi2, false);
4877 }
4878 if (fd->have_reductemp)
4879 {
4880 gsi_remove (&gsi2, true);
4881 release_ssa_name (gimple_assign_lhs (g));
4882 }
4883 }
4884 switch (gimple_omp_for_kind (fd->for_stmt))
4885 {
4886 case GF_OMP_FOR_KIND_FOR:
4887 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4888 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4889 break;
4890 case GF_OMP_FOR_KIND_DISTRIBUTE:
4891 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4892 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4893 break;
4894 default:
4895 gcc_unreachable ();
4896 }
4897 nthreads = build_call_expr (nthreads, 0);
4898 nthreads = fold_convert (itype, nthreads);
4899 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4900 true, GSI_SAME_STMT);
4901 threadid = build_call_expr (threadid, 0);
4902 threadid = fold_convert (itype, threadid);
4903 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4904 true, GSI_SAME_STMT);
4905
4906 n1 = fd->loop.n1;
4907 n2 = fd->loop.n2;
4908 step = fd->loop.step;
4909 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4910 {
4911 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4912 OMP_CLAUSE__LOOPTEMP_);
4913 gcc_assert (innerc);
4914 n1 = OMP_CLAUSE_DECL (innerc);
4915 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4916 OMP_CLAUSE__LOOPTEMP_);
4917 gcc_assert (innerc);
4918 n2 = OMP_CLAUSE_DECL (innerc);
4919 }
4920 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4921 true, NULL_TREE, true, GSI_SAME_STMT);
4922 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4923 true, NULL_TREE, true, GSI_SAME_STMT);
4924 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4925 true, NULL_TREE, true, GSI_SAME_STMT);
4926
4927 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4928 t = fold_build2 (PLUS_EXPR, itype, step, t);
4929 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4930 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4931 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4932 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4933 fold_build1 (NEGATE_EXPR, itype, t),
4934 fold_build1 (NEGATE_EXPR, itype, step));
4935 else
4936 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4937 t = fold_convert (itype, t);
4938 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4939
4940 q = create_tmp_reg (itype, "q");
4941 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4942 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4943 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4944
4945 tt = create_tmp_reg (itype, "tt");
4946 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
4947 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4948 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
4949
4950 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
4951 gcond *cond_stmt = gimple_build_cond_empty (t);
4952 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4953
4954 second_bb = split_block (entry_bb, cond_stmt)->dest;
4955 gsi = gsi_last_nondebug_bb (second_bb);
4956 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4957
4958 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
4959 GSI_SAME_STMT);
4960 gassign *assign_stmt
4961 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
4962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4963
4964 third_bb = split_block (second_bb, assign_stmt)->dest;
4965 gsi = gsi_last_nondebug_bb (third_bb);
4966 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4967
4968 if (fd->have_nonctrl_scantemp)
4969 {
4970 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4971 tree controlp = NULL_TREE, controlb = NULL_TREE;
4972 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4973 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4974 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
4975 {
4976 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
4977 controlb = OMP_CLAUSE_DECL (c);
4978 else
4979 controlp = OMP_CLAUSE_DECL (c);
4980 if (controlb && controlp)
4981 break;
4982 }
4983 gcc_assert (controlp && controlb);
4984 tree cnt = create_tmp_var (sizetype);
4985 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
4986 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4987 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
4988 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
4989 alloc_align, cnt, NULL, true);
4990 tree size = create_tmp_var (sizetype);
4991 expand_omp_build_assign (&gsi, size, sz, false);
4992 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
4993 size, size_int (16384));
4994 expand_omp_build_assign (&gsi, controlb, cmp);
4995 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
4996 NULL_TREE, NULL_TREE);
4997 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4998 fourth_bb = split_block (third_bb, g)->dest;
4999 gsi = gsi_last_nondebug_bb (fourth_bb);
5000 /* FIXME: Once we have allocators, this should use allocator. */
5001 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5002 gimple_call_set_lhs (g, controlp);
5003 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5004 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5005 &gsi, true);
5006 gsi_prev (&gsi);
5007 g = gsi_stmt (gsi);
5008 fifth_bb = split_block (fourth_bb, g)->dest;
5009 gsi = gsi_last_nondebug_bb (fifth_bb);
5010
5011 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5012 gimple_call_set_lhs (g, controlp);
5013 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5014 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5015 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5016 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5017 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5018 {
5019 tree tmp = create_tmp_var (sizetype);
5020 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5021 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5022 TYPE_SIZE_UNIT (pointee_type));
5023 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5024 g = gimple_build_call (alloca_decl, 2, tmp,
5025 size_int (TYPE_ALIGN (pointee_type)));
5026 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5027 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5028 }
5029
5030 sixth_bb = split_block (fifth_bb, g)->dest;
5031 gsi = gsi_last_nondebug_bb (sixth_bb);
5032 }
5033
5034 t = build2 (MULT_EXPR, itype, q, threadid);
5035 t = build2 (PLUS_EXPR, itype, t, tt);
5036 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5037
5038 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5039 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5040
5041 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5042 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5043
5044 /* Remove the GIMPLE_OMP_FOR statement. */
5045 gsi_remove (&gsi, true);
5046
5047 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5048 gsi = gsi_start_bb (seq_start_bb);
5049
5050 tree startvar = fd->loop.v;
5051 tree endvar = NULL_TREE;
5052
5053 if (gimple_omp_for_combined_p (fd->for_stmt))
5054 {
5055 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5056 ? gimple_omp_parallel_clauses (inner_stmt)
5057 : gimple_omp_for_clauses (inner_stmt);
5058 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5059 gcc_assert (innerc);
5060 startvar = OMP_CLAUSE_DECL (innerc);
5061 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5062 OMP_CLAUSE__LOOPTEMP_);
5063 gcc_assert (innerc);
5064 endvar = OMP_CLAUSE_DECL (innerc);
5065 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5066 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5067 {
5068 int i;
5069 for (i = 1; i < fd->collapse; i++)
5070 {
5071 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5072 OMP_CLAUSE__LOOPTEMP_);
5073 gcc_assert (innerc);
5074 }
5075 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5076 OMP_CLAUSE__LOOPTEMP_);
5077 if (innerc)
5078 {
5079 /* If needed (distribute parallel for with lastprivate),
5080 propagate down the total number of iterations. */
5081 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5082 fd->loop.n2);
5083 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5084 GSI_CONTINUE_LINKING);
5085 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5086 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5087 }
5088 }
5089 }
5090 t = fold_convert (itype, s0);
5091 t = fold_build2 (MULT_EXPR, itype, t, step);
5092 if (POINTER_TYPE_P (type))
5093 {
5094 t = fold_build_pointer_plus (n1, t);
5095 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5096 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5097 t = fold_convert (signed_type_for (type), t);
5098 }
5099 else
5100 t = fold_build2 (PLUS_EXPR, type, t, n1);
5101 t = fold_convert (TREE_TYPE (startvar), t);
5102 t = force_gimple_operand_gsi (&gsi, t,
5103 DECL_P (startvar)
5104 && TREE_ADDRESSABLE (startvar),
5105 NULL_TREE, false, GSI_CONTINUE_LINKING);
5106 assign_stmt = gimple_build_assign (startvar, t);
5107 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5108 if (cond_var)
5109 {
5110 tree itype = TREE_TYPE (cond_var);
5111 /* For lastprivate(conditional:) itervar, we need some iteration
5112 counter that starts at unsigned non-zero and increases.
5113 Prefer as few IVs as possible, so if we can use startvar
5114 itself, use that, or startvar + constant (those would be
5115 incremented with step), and as last resort use the s0 + 1
5116 incremented by 1. */
5117 if (POINTER_TYPE_P (type)
5118 || TREE_CODE (n1) != INTEGER_CST
5119 || fd->loop.cond_code != LT_EXPR)
5120 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5121 build_int_cst (itype, 1));
5122 else if (tree_int_cst_sgn (n1) == 1)
5123 t = fold_convert (itype, t);
5124 else
5125 {
5126 tree c = fold_convert (itype, n1);
5127 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5128 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5129 }
5130 t = force_gimple_operand_gsi (&gsi, t, false,
5131 NULL_TREE, false, GSI_CONTINUE_LINKING);
5132 assign_stmt = gimple_build_assign (cond_var, t);
5133 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5134 }
5135
5136 t = fold_convert (itype, e0);
5137 t = fold_build2 (MULT_EXPR, itype, t, step);
5138 if (POINTER_TYPE_P (type))
5139 {
5140 t = fold_build_pointer_plus (n1, t);
5141 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5142 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5143 t = fold_convert (signed_type_for (type), t);
5144 }
5145 else
5146 t = fold_build2 (PLUS_EXPR, type, t, n1);
5147 t = fold_convert (TREE_TYPE (startvar), t);
5148 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5149 false, GSI_CONTINUE_LINKING);
5150 if (endvar)
5151 {
5152 assign_stmt = gimple_build_assign (endvar, e);
5153 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5154 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5155 assign_stmt = gimple_build_assign (fd->loop.v, e);
5156 else
5157 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5158 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5159 }
5160 /* Handle linear clause adjustments. */
5161 tree itercnt = NULL_TREE;
5162 tree *nonrect_bounds = NULL;
5163 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5164 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5165 c; c = OMP_CLAUSE_CHAIN (c))
5166 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5167 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5168 {
5169 tree d = OMP_CLAUSE_DECL (c);
5170 bool is_ref = omp_is_reference (d);
5171 tree t = d, a, dest;
5172 if (is_ref)
5173 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5174 if (itercnt == NULL_TREE)
5175 {
5176 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5177 {
5178 itercnt = fold_build2 (MINUS_EXPR, itype,
5179 fold_convert (itype, n1),
5180 fold_convert (itype, fd->loop.n1));
5181 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5182 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5183 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5184 NULL_TREE, false,
5185 GSI_CONTINUE_LINKING);
5186 }
5187 else
5188 itercnt = s0;
5189 }
5190 tree type = TREE_TYPE (t);
5191 if (POINTER_TYPE_P (type))
5192 type = sizetype;
5193 a = fold_build2 (MULT_EXPR, type,
5194 fold_convert (type, itercnt),
5195 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5196 dest = unshare_expr (t);
5197 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5198 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5199 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5200 false, GSI_CONTINUE_LINKING);
5201 assign_stmt = gimple_build_assign (dest, t);
5202 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5203 }
5204 if (fd->collapse > 1)
5205 {
5206 if (fd->non_rect)
5207 {
5208 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5209 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5210 }
5211 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5212 startvar);
5213 }
5214
5215 if (!broken_loop)
5216 {
5217 /* The code controlling the sequential loop replaces the
5218 GIMPLE_OMP_CONTINUE. */
5219 gsi = gsi_last_nondebug_bb (cont_bb);
5220 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5221 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5222 vmain = gimple_omp_continue_control_use (cont_stmt);
5223 vback = gimple_omp_continue_control_def (cont_stmt);
5224
5225 if (cond_var)
5226 {
5227 tree itype = TREE_TYPE (cond_var);
5228 tree t2;
5229 if (POINTER_TYPE_P (type)
5230 || TREE_CODE (n1) != INTEGER_CST
5231 || fd->loop.cond_code != LT_EXPR)
5232 t2 = build_int_cst (itype, 1);
5233 else
5234 t2 = fold_convert (itype, step);
5235 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5236 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5237 NULL_TREE, true, GSI_SAME_STMT);
5238 assign_stmt = gimple_build_assign (cond_var, t2);
5239 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5240 }
5241
5242 if (!gimple_omp_for_combined_p (fd->for_stmt))
5243 {
5244 if (POINTER_TYPE_P (type))
5245 t = fold_build_pointer_plus (vmain, step);
5246 else
5247 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5248 t = force_gimple_operand_gsi (&gsi, t,
5249 DECL_P (vback)
5250 && TREE_ADDRESSABLE (vback),
5251 NULL_TREE, true, GSI_SAME_STMT);
5252 assign_stmt = gimple_build_assign (vback, t);
5253 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5254
5255 t = build2 (fd->loop.cond_code, boolean_type_node,
5256 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5257 ? t : vback, e);
5258 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5259 }
5260
5261 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5262 gsi_remove (&gsi, true);
5263
5264 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5265 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5266 cont_bb, body_bb);
5267 }
5268
5269 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5270 gsi = gsi_last_nondebug_bb (exit_bb);
5271 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5272 {
5273 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5274 if (fd->have_reductemp
5275 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5276 && !fd->have_nonctrl_scantemp))
5277 {
5278 tree fn;
5279 if (t)
5280 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5281 else
5282 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5283 gcall *g = gimple_build_call (fn, 0);
5284 if (t)
5285 {
5286 gimple_call_set_lhs (g, t);
5287 if (fd->have_reductemp)
5288 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5289 NOP_EXPR, t),
5290 GSI_SAME_STMT);
5291 }
5292 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5293 }
5294 else
5295 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5296 }
5297 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5298 && !fd->have_nonctrl_scantemp)
5299 {
5300 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5301 gcall *g = gimple_build_call (fn, 0);
5302 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5303 }
5304 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5305 {
5306 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5307 tree controlp = NULL_TREE, controlb = NULL_TREE;
5308 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5309 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5310 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5311 {
5312 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5313 controlb = OMP_CLAUSE_DECL (c);
5314 else
5315 controlp = OMP_CLAUSE_DECL (c);
5316 if (controlb && controlp)
5317 break;
5318 }
5319 gcc_assert (controlp && controlb);
5320 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5321 NULL_TREE, NULL_TREE);
5322 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5323 exit1_bb = split_block (exit_bb, g)->dest;
5324 gsi = gsi_after_labels (exit1_bb);
5325 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5326 controlp);
5327 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5328 exit2_bb = split_block (exit1_bb, g)->dest;
5329 gsi = gsi_after_labels (exit2_bb);
5330 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5331 controlp);
5332 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5333 exit3_bb = split_block (exit2_bb, g)->dest;
5334 gsi = gsi_after_labels (exit3_bb);
5335 }
5336 gsi_remove (&gsi, true);
5337
5338 /* Connect all the blocks. */
5339 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5340 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5341 ep = find_edge (entry_bb, second_bb);
5342 ep->flags = EDGE_TRUE_VALUE;
5343 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5344 if (fourth_bb)
5345 {
5346 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5347 ep->probability
5348 = profile_probability::guessed_always ().apply_scale (1, 2);
5349 ep = find_edge (third_bb, fourth_bb);
5350 ep->flags = EDGE_TRUE_VALUE;
5351 ep->probability
5352 = profile_probability::guessed_always ().apply_scale (1, 2);
5353 ep = find_edge (fourth_bb, fifth_bb);
5354 redirect_edge_and_branch (ep, sixth_bb);
5355 }
5356 else
5357 sixth_bb = third_bb;
5358 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5359 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5360 if (exit1_bb)
5361 {
5362 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5363 ep->probability
5364 = profile_probability::guessed_always ().apply_scale (1, 2);
5365 ep = find_edge (exit_bb, exit1_bb);
5366 ep->flags = EDGE_TRUE_VALUE;
5367 ep->probability
5368 = profile_probability::guessed_always ().apply_scale (1, 2);
5369 ep = find_edge (exit1_bb, exit2_bb);
5370 redirect_edge_and_branch (ep, exit3_bb);
5371 }
5372
5373 if (!broken_loop)
5374 {
5375 ep = find_edge (cont_bb, body_bb);
5376 if (ep == NULL)
5377 {
5378 ep = BRANCH_EDGE (cont_bb);
5379 gcc_assert (single_succ (ep->dest) == body_bb);
5380 }
5381 if (gimple_omp_for_combined_p (fd->for_stmt))
5382 {
5383 remove_edge (ep);
5384 ep = NULL;
5385 }
5386 else if (fd->collapse > 1)
5387 {
5388 remove_edge (ep);
5389 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5390 }
5391 else
5392 ep->flags = EDGE_TRUE_VALUE;
5393 find_edge (cont_bb, fin_bb)->flags
5394 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5395 }
5396
5397 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5398 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5399 if (fourth_bb)
5400 {
5401 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5402 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5403 }
5404 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5405
5406 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5407 recompute_dominator (CDI_DOMINATORS, body_bb));
5408 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5409 recompute_dominator (CDI_DOMINATORS, fin_bb));
5410 if (exit1_bb)
5411 {
5412 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5413 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5414 }
5415
5416 class loop *loop = body_bb->loop_father;
5417 if (loop != entry_bb->loop_father)
5418 {
5419 gcc_assert (broken_loop || loop->header == body_bb);
5420 gcc_assert (broken_loop
5421 || loop->latch == region->cont
5422 || single_pred (loop->latch) == region->cont);
5423 return;
5424 }
5425
5426 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5427 {
5428 loop = alloc_loop ();
5429 loop->header = body_bb;
5430 if (collapse_bb == NULL)
5431 loop->latch = cont_bb;
5432 add_loop (loop, body_bb->loop_father);
5433 }
5434 }
5435
5436 /* Return phi in E->DEST with ARG on edge E. */
5437
5438 static gphi *
5439 find_phi_with_arg_on_edge (tree arg, edge e)
5440 {
5441 basic_block bb = e->dest;
5442
5443 for (gphi_iterator gpi = gsi_start_phis (bb);
5444 !gsi_end_p (gpi);
5445 gsi_next (&gpi))
5446 {
5447 gphi *phi = gpi.phi ();
5448 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5449 return phi;
5450 }
5451
5452 return NULL;
5453 }
5454
5455 /* A subroutine of expand_omp_for. Generate code for a parallel
5456 loop with static schedule and a specified chunk size. Given
5457 parameters:
5458
5459 for (V = N1; V cond N2; V += STEP) BODY;
5460
5461 where COND is "<" or ">", we generate pseudocode
5462
5463 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5464 if (cond is <)
5465 adj = STEP - 1;
5466 else
5467 adj = STEP + 1;
5468 if ((__typeof (V)) -1 > 0 && cond is >)
5469 n = -(adj + N2 - N1) / -STEP;
5470 else
5471 n = (adj + N2 - N1) / STEP;
5472 trip = 0;
5473 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5474 here so that V is defined
5475 if the loop is not entered
5476 L0:
5477 s0 = (trip * nthreads + threadid) * CHUNK;
5478 e0 = min (s0 + CHUNK, n);
5479 if (s0 < n) goto L1; else goto L4;
5480 L1:
5481 V = s0 * STEP + N1;
5482 e = e0 * STEP + N1;
5483 L2:
5484 BODY;
5485 V += STEP;
5486 if (V cond e) goto L2; else goto L3;
5487 L3:
5488 trip += 1;
5489 goto L0;
5490 L4:
5491 */
5492
5493 static void
5494 expand_omp_for_static_chunk (struct omp_region *region,
5495 struct omp_for_data *fd, gimple *inner_stmt)
5496 {
5497 tree n, s0, e0, e, t;
5498 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5499 tree type, itype, vmain, vback, vextra;
5500 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5501 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5502 gimple_stmt_iterator gsi, gsip;
5503 edge se;
5504 bool broken_loop = region->cont == NULL;
5505 tree *counts = NULL;
5506 tree n1, n2, step;
5507 tree reductions = NULL_TREE;
5508 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5509
5510 itype = type = TREE_TYPE (fd->loop.v);
5511 if (POINTER_TYPE_P (type))
5512 itype = signed_type_for (type);
5513
5514 entry_bb = region->entry;
5515 se = split_block (entry_bb, last_stmt (entry_bb));
5516 entry_bb = se->src;
5517 iter_part_bb = se->dest;
5518 cont_bb = region->cont;
5519 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5520 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5521 gcc_assert (broken_loop
5522 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5523 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5524 body_bb = single_succ (seq_start_bb);
5525 if (!broken_loop)
5526 {
5527 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5528 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5529 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5530 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5531 }
5532 exit_bb = region->exit;
5533
5534 /* Trip and adjustment setup goes in ENTRY_BB. */
5535 gsi = gsi_last_nondebug_bb (entry_bb);
5536 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5537 gsip = gsi;
5538 gsi_prev (&gsip);
5539
5540 if (fd->collapse > 1)
5541 {
5542 int first_zero_iter = -1, dummy = -1;
5543 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5544
5545 counts = XALLOCAVEC (tree, fd->collapse);
5546 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5547 fin_bb, first_zero_iter,
5548 dummy_bb, dummy, l2_dom_bb);
5549 t = NULL_TREE;
5550 }
5551 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5552 t = integer_one_node;
5553 else
5554 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5555 fold_convert (type, fd->loop.n1),
5556 fold_convert (type, fd->loop.n2));
5557 if (fd->collapse == 1
5558 && TYPE_UNSIGNED (type)
5559 && (t == NULL_TREE || !integer_onep (t)))
5560 {
5561 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5562 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5563 true, GSI_SAME_STMT);
5564 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5565 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5566 true, GSI_SAME_STMT);
5567 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5568 NULL_TREE, NULL_TREE);
5569 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5570 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5571 expand_omp_regimplify_p, NULL, NULL)
5572 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5573 expand_omp_regimplify_p, NULL, NULL))
5574 {
5575 gsi = gsi_for_stmt (cond_stmt);
5576 gimple_regimplify_operands (cond_stmt, &gsi);
5577 }
5578 se = split_block (entry_bb, cond_stmt);
5579 se->flags = EDGE_TRUE_VALUE;
5580 entry_bb = se->dest;
5581 se->probability = profile_probability::very_likely ();
5582 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5583 se->probability = profile_probability::very_unlikely ();
5584 if (gimple_in_ssa_p (cfun))
5585 {
5586 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5587 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5588 !gsi_end_p (gpi); gsi_next (&gpi))
5589 {
5590 gphi *phi = gpi.phi ();
5591 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5592 se, UNKNOWN_LOCATION);
5593 }
5594 }
5595 gsi = gsi_last_bb (entry_bb);
5596 }
5597
5598 if (fd->lastprivate_conditional)
5599 {
5600 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5601 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5602 if (fd->have_pointer_condtemp)
5603 condtemp = OMP_CLAUSE_DECL (c);
5604 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5605 cond_var = OMP_CLAUSE_DECL (c);
5606 }
5607 if (fd->have_reductemp || fd->have_pointer_condtemp)
5608 {
5609 tree t1 = build_int_cst (long_integer_type_node, 0);
5610 tree t2 = build_int_cst (long_integer_type_node, 1);
5611 tree t3 = build_int_cstu (long_integer_type_node,
5612 (HOST_WIDE_INT_1U << 31) + 1);
5613 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5614 gimple_stmt_iterator gsi2 = gsi_none ();
5615 gimple *g = NULL;
5616 tree mem = null_pointer_node, memv = NULL_TREE;
5617 if (fd->have_reductemp)
5618 {
5619 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5620 reductions = OMP_CLAUSE_DECL (c);
5621 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5622 g = SSA_NAME_DEF_STMT (reductions);
5623 reductions = gimple_assign_rhs1 (g);
5624 OMP_CLAUSE_DECL (c) = reductions;
5625 gsi2 = gsi_for_stmt (g);
5626 }
5627 else
5628 {
5629 if (gsi_end_p (gsip))
5630 gsi2 = gsi_after_labels (region->entry);
5631 else
5632 gsi2 = gsip;
5633 reductions = null_pointer_node;
5634 }
5635 if (fd->have_pointer_condtemp)
5636 {
5637 tree type = TREE_TYPE (condtemp);
5638 memv = create_tmp_var (type);
5639 TREE_ADDRESSABLE (memv) = 1;
5640 unsigned HOST_WIDE_INT sz
5641 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5642 sz *= fd->lastprivate_conditional;
5643 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5644 false);
5645 mem = build_fold_addr_expr (memv);
5646 }
5647 tree t
5648 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5649 9, t1, t2, t2, t3, t1, null_pointer_node,
5650 null_pointer_node, reductions, mem);
5651 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5652 true, GSI_SAME_STMT);
5653 if (fd->have_pointer_condtemp)
5654 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5655 if (fd->have_reductemp)
5656 {
5657 gsi_remove (&gsi2, true);
5658 release_ssa_name (gimple_assign_lhs (g));
5659 }
5660 }
5661 switch (gimple_omp_for_kind (fd->for_stmt))
5662 {
5663 case GF_OMP_FOR_KIND_FOR:
5664 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5665 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5666 break;
5667 case GF_OMP_FOR_KIND_DISTRIBUTE:
5668 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5669 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5670 break;
5671 default:
5672 gcc_unreachable ();
5673 }
5674 nthreads = build_call_expr (nthreads, 0);
5675 nthreads = fold_convert (itype, nthreads);
5676 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5677 true, GSI_SAME_STMT);
5678 threadid = build_call_expr (threadid, 0);
5679 threadid = fold_convert (itype, threadid);
5680 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5681 true, GSI_SAME_STMT);
5682
5683 n1 = fd->loop.n1;
5684 n2 = fd->loop.n2;
5685 step = fd->loop.step;
5686 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5687 {
5688 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5689 OMP_CLAUSE__LOOPTEMP_);
5690 gcc_assert (innerc);
5691 n1 = OMP_CLAUSE_DECL (innerc);
5692 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5693 OMP_CLAUSE__LOOPTEMP_);
5694 gcc_assert (innerc);
5695 n2 = OMP_CLAUSE_DECL (innerc);
5696 }
5697 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5698 true, NULL_TREE, true, GSI_SAME_STMT);
5699 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5700 true, NULL_TREE, true, GSI_SAME_STMT);
5701 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5702 true, NULL_TREE, true, GSI_SAME_STMT);
5703 tree chunk_size = fold_convert (itype, fd->chunk_size);
5704 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5705 chunk_size
5706 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5707 GSI_SAME_STMT);
5708
5709 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5710 t = fold_build2 (PLUS_EXPR, itype, step, t);
5711 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5712 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5713 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5714 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5715 fold_build1 (NEGATE_EXPR, itype, t),
5716 fold_build1 (NEGATE_EXPR, itype, step));
5717 else
5718 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5719 t = fold_convert (itype, t);
5720 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5721 true, GSI_SAME_STMT);
5722
5723 trip_var = create_tmp_reg (itype, ".trip");
5724 if (gimple_in_ssa_p (cfun))
5725 {
5726 trip_init = make_ssa_name (trip_var);
5727 trip_main = make_ssa_name (trip_var);
5728 trip_back = make_ssa_name (trip_var);
5729 }
5730 else
5731 {
5732 trip_init = trip_var;
5733 trip_main = trip_var;
5734 trip_back = trip_var;
5735 }
5736
5737 gassign *assign_stmt
5738 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5739 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5740
5741 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5742 t = fold_build2 (MULT_EXPR, itype, t, step);
5743 if (POINTER_TYPE_P (type))
5744 t = fold_build_pointer_plus (n1, t);
5745 else
5746 t = fold_build2 (PLUS_EXPR, type, t, n1);
5747 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5748 true, GSI_SAME_STMT);
5749
5750 /* Remove the GIMPLE_OMP_FOR. */
5751 gsi_remove (&gsi, true);
5752
5753 gimple_stmt_iterator gsif = gsi;
5754
5755 /* Iteration space partitioning goes in ITER_PART_BB. */
5756 gsi = gsi_last_bb (iter_part_bb);
5757
5758 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5759 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5760 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5761 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5762 false, GSI_CONTINUE_LINKING);
5763
5764 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5765 t = fold_build2 (MIN_EXPR, itype, t, n);
5766 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5767 false, GSI_CONTINUE_LINKING);
5768
5769 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5770 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5771
5772 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5773 gsi = gsi_start_bb (seq_start_bb);
5774
5775 tree startvar = fd->loop.v;
5776 tree endvar = NULL_TREE;
5777
5778 if (gimple_omp_for_combined_p (fd->for_stmt))
5779 {
5780 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5781 ? gimple_omp_parallel_clauses (inner_stmt)
5782 : gimple_omp_for_clauses (inner_stmt);
5783 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5784 gcc_assert (innerc);
5785 startvar = OMP_CLAUSE_DECL (innerc);
5786 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5787 OMP_CLAUSE__LOOPTEMP_);
5788 gcc_assert (innerc);
5789 endvar = OMP_CLAUSE_DECL (innerc);
5790 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5791 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5792 {
5793 int i;
5794 for (i = 1; i < fd->collapse; i++)
5795 {
5796 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5797 OMP_CLAUSE__LOOPTEMP_);
5798 gcc_assert (innerc);
5799 }
5800 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5801 OMP_CLAUSE__LOOPTEMP_);
5802 if (innerc)
5803 {
5804 /* If needed (distribute parallel for with lastprivate),
5805 propagate down the total number of iterations. */
5806 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5807 fd->loop.n2);
5808 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5809 GSI_CONTINUE_LINKING);
5810 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5811 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5812 }
5813 }
5814 }
5815
5816 t = fold_convert (itype, s0);
5817 t = fold_build2 (MULT_EXPR, itype, t, step);
5818 if (POINTER_TYPE_P (type))
5819 {
5820 t = fold_build_pointer_plus (n1, t);
5821 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5822 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5823 t = fold_convert (signed_type_for (type), t);
5824 }
5825 else
5826 t = fold_build2 (PLUS_EXPR, type, t, n1);
5827 t = fold_convert (TREE_TYPE (startvar), t);
5828 t = force_gimple_operand_gsi (&gsi, t,
5829 DECL_P (startvar)
5830 && TREE_ADDRESSABLE (startvar),
5831 NULL_TREE, false, GSI_CONTINUE_LINKING);
5832 assign_stmt = gimple_build_assign (startvar, t);
5833 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5834 if (cond_var)
5835 {
5836 tree itype = TREE_TYPE (cond_var);
5837 /* For lastprivate(conditional:) itervar, we need some iteration
5838 counter that starts at unsigned non-zero and increases.
5839 Prefer as few IVs as possible, so if we can use startvar
5840 itself, use that, or startvar + constant (those would be
5841 incremented with step), and as last resort use the s0 + 1
5842 incremented by 1. */
5843 if (POINTER_TYPE_P (type)
5844 || TREE_CODE (n1) != INTEGER_CST
5845 || fd->loop.cond_code != LT_EXPR)
5846 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5847 build_int_cst (itype, 1));
5848 else if (tree_int_cst_sgn (n1) == 1)
5849 t = fold_convert (itype, t);
5850 else
5851 {
5852 tree c = fold_convert (itype, n1);
5853 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5854 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5855 }
5856 t = force_gimple_operand_gsi (&gsi, t, false,
5857 NULL_TREE, false, GSI_CONTINUE_LINKING);
5858 assign_stmt = gimple_build_assign (cond_var, t);
5859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5860 }
5861
5862 t = fold_convert (itype, e0);
5863 t = fold_build2 (MULT_EXPR, itype, t, step);
5864 if (POINTER_TYPE_P (type))
5865 {
5866 t = fold_build_pointer_plus (n1, t);
5867 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5868 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5869 t = fold_convert (signed_type_for (type), t);
5870 }
5871 else
5872 t = fold_build2 (PLUS_EXPR, type, t, n1);
5873 t = fold_convert (TREE_TYPE (startvar), t);
5874 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5875 false, GSI_CONTINUE_LINKING);
5876 if (endvar)
5877 {
5878 assign_stmt = gimple_build_assign (endvar, e);
5879 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5880 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5881 assign_stmt = gimple_build_assign (fd->loop.v, e);
5882 else
5883 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5884 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5885 }
5886 /* Handle linear clause adjustments. */
5887 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5888 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5889 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5890 c; c = OMP_CLAUSE_CHAIN (c))
5891 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5892 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5893 {
5894 tree d = OMP_CLAUSE_DECL (c);
5895 bool is_ref = omp_is_reference (d);
5896 tree t = d, a, dest;
5897 if (is_ref)
5898 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5899 tree type = TREE_TYPE (t);
5900 if (POINTER_TYPE_P (type))
5901 type = sizetype;
5902 dest = unshare_expr (t);
5903 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5904 expand_omp_build_assign (&gsif, v, t);
5905 if (itercnt == NULL_TREE)
5906 {
5907 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5908 {
5909 itercntbias
5910 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5911 fold_convert (itype, fd->loop.n1));
5912 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5913 itercntbias, step);
5914 itercntbias
5915 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5916 NULL_TREE, true,
5917 GSI_SAME_STMT);
5918 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5919 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5920 NULL_TREE, false,
5921 GSI_CONTINUE_LINKING);
5922 }
5923 else
5924 itercnt = s0;
5925 }
5926 a = fold_build2 (MULT_EXPR, type,
5927 fold_convert (type, itercnt),
5928 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5929 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5930 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5931 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5932 false, GSI_CONTINUE_LINKING);
5933 assign_stmt = gimple_build_assign (dest, t);
5934 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5935 }
5936 if (fd->collapse > 1)
5937 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5938
5939 if (!broken_loop)
5940 {
5941 /* The code controlling the sequential loop goes in CONT_BB,
5942 replacing the GIMPLE_OMP_CONTINUE. */
5943 gsi = gsi_last_nondebug_bb (cont_bb);
5944 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5945 vmain = gimple_omp_continue_control_use (cont_stmt);
5946 vback = gimple_omp_continue_control_def (cont_stmt);
5947
5948 if (cond_var)
5949 {
5950 tree itype = TREE_TYPE (cond_var);
5951 tree t2;
5952 if (POINTER_TYPE_P (type)
5953 || TREE_CODE (n1) != INTEGER_CST
5954 || fd->loop.cond_code != LT_EXPR)
5955 t2 = build_int_cst (itype, 1);
5956 else
5957 t2 = fold_convert (itype, step);
5958 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5959 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5960 NULL_TREE, true, GSI_SAME_STMT);
5961 assign_stmt = gimple_build_assign (cond_var, t2);
5962 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5963 }
5964
5965 if (!gimple_omp_for_combined_p (fd->for_stmt))
5966 {
5967 if (POINTER_TYPE_P (type))
5968 t = fold_build_pointer_plus (vmain, step);
5969 else
5970 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5971 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
5972 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5973 true, GSI_SAME_STMT);
5974 assign_stmt = gimple_build_assign (vback, t);
5975 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5976
5977 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
5978 t = build2 (EQ_EXPR, boolean_type_node,
5979 build_int_cst (itype, 0),
5980 build_int_cst (itype, 1));
5981 else
5982 t = build2 (fd->loop.cond_code, boolean_type_node,
5983 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5984 ? t : vback, e);
5985 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5986 }
5987
5988 /* Remove GIMPLE_OMP_CONTINUE. */
5989 gsi_remove (&gsi, true);
5990
5991 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5992 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
5993
5994 /* Trip update code goes into TRIP_UPDATE_BB. */
5995 gsi = gsi_start_bb (trip_update_bb);
5996
5997 t = build_int_cst (itype, 1);
5998 t = build2 (PLUS_EXPR, itype, trip_main, t);
5999 assign_stmt = gimple_build_assign (trip_back, t);
6000 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6001 }
6002
6003 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6004 gsi = gsi_last_nondebug_bb (exit_bb);
6005 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6006 {
6007 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6008 if (fd->have_reductemp || fd->have_pointer_condtemp)
6009 {
6010 tree fn;
6011 if (t)
6012 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6013 else
6014 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6015 gcall *g = gimple_build_call (fn, 0);
6016 if (t)
6017 {
6018 gimple_call_set_lhs (g, t);
6019 if (fd->have_reductemp)
6020 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6021 NOP_EXPR, t),
6022 GSI_SAME_STMT);
6023 }
6024 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6025 }
6026 else
6027 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6028 }
6029 else if (fd->have_pointer_condtemp)
6030 {
6031 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6032 gcall *g = gimple_build_call (fn, 0);
6033 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6034 }
6035 gsi_remove (&gsi, true);
6036
6037 /* Connect the new blocks. */
6038 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6039 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6040
6041 if (!broken_loop)
6042 {
6043 se = find_edge (cont_bb, body_bb);
6044 if (se == NULL)
6045 {
6046 se = BRANCH_EDGE (cont_bb);
6047 gcc_assert (single_succ (se->dest) == body_bb);
6048 }
6049 if (gimple_omp_for_combined_p (fd->for_stmt))
6050 {
6051 remove_edge (se);
6052 se = NULL;
6053 }
6054 else if (fd->collapse > 1)
6055 {
6056 remove_edge (se);
6057 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6058 }
6059 else
6060 se->flags = EDGE_TRUE_VALUE;
6061 find_edge (cont_bb, trip_update_bb)->flags
6062 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6063
6064 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6065 iter_part_bb);
6066 }
6067
6068 if (gimple_in_ssa_p (cfun))
6069 {
6070 gphi_iterator psi;
6071 gphi *phi;
6072 edge re, ene;
6073 edge_var_map *vm;
6074 size_t i;
6075
6076 gcc_assert (fd->collapse == 1 && !broken_loop);
6077
6078 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6079 remove arguments of the phi nodes in fin_bb. We need to create
6080 appropriate phi nodes in iter_part_bb instead. */
6081 se = find_edge (iter_part_bb, fin_bb);
6082 re = single_succ_edge (trip_update_bb);
6083 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6084 ene = single_succ_edge (entry_bb);
6085
6086 psi = gsi_start_phis (fin_bb);
6087 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6088 gsi_next (&psi), ++i)
6089 {
6090 gphi *nphi;
6091 location_t locus;
6092
6093 phi = psi.phi ();
6094 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6095 redirect_edge_var_map_def (vm), 0))
6096 continue;
6097
6098 t = gimple_phi_result (phi);
6099 gcc_assert (t == redirect_edge_var_map_result (vm));
6100
6101 if (!single_pred_p (fin_bb))
6102 t = copy_ssa_name (t, phi);
6103
6104 nphi = create_phi_node (t, iter_part_bb);
6105
6106 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6107 locus = gimple_phi_arg_location_from_edge (phi, se);
6108
6109 /* A special case -- fd->loop.v is not yet computed in
6110 iter_part_bb, we need to use vextra instead. */
6111 if (t == fd->loop.v)
6112 t = vextra;
6113 add_phi_arg (nphi, t, ene, locus);
6114 locus = redirect_edge_var_map_location (vm);
6115 tree back_arg = redirect_edge_var_map_def (vm);
6116 add_phi_arg (nphi, back_arg, re, locus);
6117 edge ce = find_edge (cont_bb, body_bb);
6118 if (ce == NULL)
6119 {
6120 ce = BRANCH_EDGE (cont_bb);
6121 gcc_assert (single_succ (ce->dest) == body_bb);
6122 ce = single_succ_edge (ce->dest);
6123 }
6124 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6125 gcc_assert (inner_loop_phi != NULL);
6126 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6127 find_edge (seq_start_bb, body_bb), locus);
6128
6129 if (!single_pred_p (fin_bb))
6130 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6131 }
6132 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6133 redirect_edge_var_map_clear (re);
6134 if (single_pred_p (fin_bb))
6135 while (1)
6136 {
6137 psi = gsi_start_phis (fin_bb);
6138 if (gsi_end_p (psi))
6139 break;
6140 remove_phi_node (&psi, false);
6141 }
6142
6143 /* Make phi node for trip. */
6144 phi = create_phi_node (trip_main, iter_part_bb);
6145 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6146 UNKNOWN_LOCATION);
6147 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6148 UNKNOWN_LOCATION);
6149 }
6150
6151 if (!broken_loop)
6152 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6153 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6154 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6155 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6156 recompute_dominator (CDI_DOMINATORS, fin_bb));
6157 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6158 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6159 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6160 recompute_dominator (CDI_DOMINATORS, body_bb));
6161
6162 if (!broken_loop)
6163 {
6164 class loop *loop = body_bb->loop_father;
6165 class loop *trip_loop = alloc_loop ();
6166 trip_loop->header = iter_part_bb;
6167 trip_loop->latch = trip_update_bb;
6168 add_loop (trip_loop, iter_part_bb->loop_father);
6169
6170 if (loop != entry_bb->loop_father)
6171 {
6172 gcc_assert (loop->header == body_bb);
6173 gcc_assert (loop->latch == region->cont
6174 || single_pred (loop->latch) == region->cont);
6175 trip_loop->inner = loop;
6176 return;
6177 }
6178
6179 if (!gimple_omp_for_combined_p (fd->for_stmt))
6180 {
6181 loop = alloc_loop ();
6182 loop->header = body_bb;
6183 if (collapse_bb == NULL)
6184 loop->latch = cont_bb;
6185 add_loop (loop, trip_loop);
6186 }
6187 }
6188 }
6189
6190 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6191 loop. Given parameters:
6192
6193 for (V = N1; V cond N2; V += STEP) BODY;
6194
6195 where COND is "<" or ">", we generate pseudocode
6196
6197 V = N1;
6198 goto L1;
6199 L0:
6200 BODY;
6201 V += STEP;
6202 L1:
6203 if (V cond N2) goto L0; else goto L2;
6204 L2:
6205
6206 For collapsed loops, emit the outer loops as scalar
6207 and only try to vectorize the innermost loop. */
6208
6209 static void
6210 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6211 {
6212 tree type, t;
6213 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6214 gimple_stmt_iterator gsi;
6215 gimple *stmt;
6216 gcond *cond_stmt;
6217 bool broken_loop = region->cont == NULL;
6218 edge e, ne;
6219 tree *counts = NULL;
6220 int i;
6221 int safelen_int = INT_MAX;
6222 bool dont_vectorize = false;
6223 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6224 OMP_CLAUSE_SAFELEN);
6225 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6226 OMP_CLAUSE__SIMDUID_);
6227 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6228 OMP_CLAUSE_IF);
6229 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6230 OMP_CLAUSE_SIMDLEN);
6231 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6232 OMP_CLAUSE__CONDTEMP_);
6233 tree n1, n2;
6234 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6235
6236 if (safelen)
6237 {
6238 poly_uint64 val;
6239 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6240 if (!poly_int_tree_p (safelen, &val))
6241 safelen_int = 0;
6242 else
6243 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6244 if (safelen_int == 1)
6245 safelen_int = 0;
6246 }
6247 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6248 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6249 {
6250 safelen_int = 0;
6251 dont_vectorize = true;
6252 }
6253 type = TREE_TYPE (fd->loop.v);
6254 entry_bb = region->entry;
6255 cont_bb = region->cont;
6256 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6257 gcc_assert (broken_loop
6258 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6259 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6260 if (!broken_loop)
6261 {
6262 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6263 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6264 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6265 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6266 }
6267 else
6268 {
6269 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6270 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6271 l2_bb = single_succ (l1_bb);
6272 }
6273 exit_bb = region->exit;
6274 l2_dom_bb = NULL;
6275
6276 gsi = gsi_last_nondebug_bb (entry_bb);
6277
6278 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6279 /* Not needed in SSA form right now. */
6280 gcc_assert (!gimple_in_ssa_p (cfun));
6281 if (fd->collapse > 1
6282 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6283 || broken_loop))
6284 {
6285 int first_zero_iter = -1, dummy = -1;
6286 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6287
6288 counts = XALLOCAVEC (tree, fd->collapse);
6289 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6290 zero_iter_bb, first_zero_iter,
6291 dummy_bb, dummy, l2_dom_bb);
6292 }
6293 if (l2_dom_bb == NULL)
6294 l2_dom_bb = l1_bb;
6295
6296 n1 = fd->loop.n1;
6297 n2 = fd->loop.n2;
6298 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6299 {
6300 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6301 OMP_CLAUSE__LOOPTEMP_);
6302 gcc_assert (innerc);
6303 n1 = OMP_CLAUSE_DECL (innerc);
6304 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6305 OMP_CLAUSE__LOOPTEMP_);
6306 gcc_assert (innerc);
6307 n2 = OMP_CLAUSE_DECL (innerc);
6308 }
6309 tree step = fd->loop.step;
6310
6311 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6312 OMP_CLAUSE__SIMT_);
6313 if (is_simt)
6314 {
6315 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6316 is_simt = safelen_int > 1;
6317 }
6318 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6319 if (is_simt)
6320 {
6321 simt_lane = create_tmp_var (unsigned_type_node);
6322 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6323 gimple_call_set_lhs (g, simt_lane);
6324 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6325 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6326 fold_convert (TREE_TYPE (step), simt_lane));
6327 n1 = fold_convert (type, n1);
6328 if (POINTER_TYPE_P (type))
6329 n1 = fold_build_pointer_plus (n1, offset);
6330 else
6331 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6332
6333 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6334 if (fd->collapse > 1)
6335 simt_maxlane = build_one_cst (unsigned_type_node);
6336 else if (safelen_int < omp_max_simt_vf ())
6337 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6338 tree vf
6339 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6340 unsigned_type_node, 0);
6341 if (simt_maxlane)
6342 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6343 vf = fold_convert (TREE_TYPE (step), vf);
6344 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6345 }
6346
6347 tree n2var = NULL_TREE;
6348 tree n2v = NULL_TREE;
6349 tree *nonrect_bounds = NULL;
6350 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6351 if (fd->collapse > 1)
6352 {
6353 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6354 {
6355 if (fd->non_rect)
6356 {
6357 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6358 memset (nonrect_bounds, 0,
6359 sizeof (tree) * (fd->last_nonrect + 1));
6360 }
6361 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6362 gcc_assert (entry_bb == gsi_bb (gsi));
6363 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6364 gsi_prev (&gsi);
6365 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6366 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6367 NULL, n1);
6368 gsi = gsi_for_stmt (fd->for_stmt);
6369 }
6370 if (broken_loop)
6371 ;
6372 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6373 {
6374 /* Compute in n2var the limit for the first innermost loop,
6375 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6376 where cnt is how many iterations would the loop have if
6377 all further iterations were assigned to the current task. */
6378 n2var = create_tmp_var (type);
6379 i = fd->collapse - 1;
6380 tree itype = TREE_TYPE (fd->loops[i].v);
6381 if (POINTER_TYPE_P (itype))
6382 itype = signed_type_for (itype);
6383 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6384 ? -1 : 1));
6385 t = fold_build2 (PLUS_EXPR, itype,
6386 fold_convert (itype, fd->loops[i].step), t);
6387 t = fold_build2 (PLUS_EXPR, itype, t,
6388 fold_convert (itype, fd->loops[i].n2));
6389 if (fd->loops[i].m2)
6390 {
6391 tree t2 = fold_convert (itype,
6392 fd->loops[i - fd->loops[i].outer].v);
6393 tree t3 = fold_convert (itype, fd->loops[i].m2);
6394 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6395 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6396 }
6397 t = fold_build2 (MINUS_EXPR, itype, t,
6398 fold_convert (itype, fd->loops[i].v));
6399 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6400 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6401 fold_build1 (NEGATE_EXPR, itype, t),
6402 fold_build1 (NEGATE_EXPR, itype,
6403 fold_convert (itype,
6404 fd->loops[i].step)));
6405 else
6406 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6407 fold_convert (itype, fd->loops[i].step));
6408 t = fold_convert (type, t);
6409 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6410 min_arg1 = create_tmp_var (type);
6411 expand_omp_build_assign (&gsi, min_arg1, t2);
6412 min_arg2 = create_tmp_var (type);
6413 expand_omp_build_assign (&gsi, min_arg2, t);
6414 }
6415 else
6416 {
6417 if (TREE_CODE (n2) == INTEGER_CST)
6418 {
6419 /* Indicate for lastprivate handling that at least one iteration
6420 has been performed, without wasting runtime. */
6421 if (integer_nonzerop (n2))
6422 expand_omp_build_assign (&gsi, fd->loop.v,
6423 fold_convert (type, n2));
6424 else
6425 /* Indicate that no iteration has been performed. */
6426 expand_omp_build_assign (&gsi, fd->loop.v,
6427 build_one_cst (type));
6428 }
6429 else
6430 {
6431 expand_omp_build_assign (&gsi, fd->loop.v,
6432 build_zero_cst (type));
6433 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6434 }
6435 for (i = 0; i < fd->collapse; i++)
6436 {
6437 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6438 if (fd->loops[i].m1)
6439 {
6440 tree t2
6441 = fold_convert (TREE_TYPE (t),
6442 fd->loops[i - fd->loops[i].outer].v);
6443 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6444 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6445 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6446 }
6447 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6448 /* For normal non-combined collapsed loops just initialize
6449 the outermost iterator in the entry_bb. */
6450 if (!broken_loop)
6451 break;
6452 }
6453 }
6454 }
6455 else
6456 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6457 tree altv = NULL_TREE, altn2 = NULL_TREE;
6458 if (fd->collapse == 1
6459 && !broken_loop
6460 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6461 {
6462 /* The vectorizer currently punts on loops with non-constant steps
6463 for the main IV (can't compute number of iterations and gives up
6464 because of that). As for OpenMP loops it is always possible to
6465 compute the number of iterations upfront, use an alternate IV
6466 as the loop iterator:
6467 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6468 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6469 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6470 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6471 tree itype = TREE_TYPE (fd->loop.v);
6472 if (POINTER_TYPE_P (itype))
6473 itype = signed_type_for (itype);
6474 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6475 t = fold_build2 (PLUS_EXPR, itype,
6476 fold_convert (itype, fd->loop.step), t);
6477 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6478 t = fold_build2 (MINUS_EXPR, itype, t,
6479 fold_convert (itype, fd->loop.v));
6480 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6481 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6482 fold_build1 (NEGATE_EXPR, itype, t),
6483 fold_build1 (NEGATE_EXPR, itype,
6484 fold_convert (itype, fd->loop.step)));
6485 else
6486 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6487 fold_convert (itype, fd->loop.step));
6488 t = fold_convert (TREE_TYPE (altv), t);
6489 altn2 = create_tmp_var (TREE_TYPE (altv));
6490 expand_omp_build_assign (&gsi, altn2, t);
6491 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6492 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6493 true, GSI_SAME_STMT);
6494 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6495 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6496 build_zero_cst (TREE_TYPE (altv)));
6497 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6498 }
6499 else if (fd->collapse > 1
6500 && !broken_loop
6501 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6502 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6503 {
6504 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6505 altn2 = create_tmp_var (TREE_TYPE (altv));
6506 }
6507 if (cond_var)
6508 {
6509 if (POINTER_TYPE_P (type)
6510 || TREE_CODE (n1) != INTEGER_CST
6511 || fd->loop.cond_code != LT_EXPR
6512 || tree_int_cst_sgn (n1) != 1)
6513 expand_omp_build_assign (&gsi, cond_var,
6514 build_one_cst (TREE_TYPE (cond_var)));
6515 else
6516 expand_omp_build_assign (&gsi, cond_var,
6517 fold_convert (TREE_TYPE (cond_var), n1));
6518 }
6519
6520 /* Remove the GIMPLE_OMP_FOR statement. */
6521 gsi_remove (&gsi, true);
6522
6523 if (!broken_loop)
6524 {
6525 /* Code to control the increment goes in the CONT_BB. */
6526 gsi = gsi_last_nondebug_bb (cont_bb);
6527 stmt = gsi_stmt (gsi);
6528 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6529
6530 if (fd->collapse == 1
6531 || gimple_omp_for_combined_into_p (fd->for_stmt))
6532 {
6533 if (POINTER_TYPE_P (type))
6534 t = fold_build_pointer_plus (fd->loop.v, step);
6535 else
6536 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6537 expand_omp_build_assign (&gsi, fd->loop.v, t);
6538 }
6539 else if (TREE_CODE (n2) != INTEGER_CST)
6540 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6541 if (altv)
6542 {
6543 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6544 build_one_cst (TREE_TYPE (altv)));
6545 expand_omp_build_assign (&gsi, altv, t);
6546 }
6547
6548 if (fd->collapse > 1)
6549 {
6550 i = fd->collapse - 1;
6551 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6552 {
6553 t = fold_convert (sizetype, fd->loops[i].step);
6554 t = fold_build_pointer_plus (fd->loops[i].v, t);
6555 }
6556 else
6557 {
6558 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6559 fd->loops[i].step);
6560 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6561 fd->loops[i].v, t);
6562 }
6563 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6564 }
6565 if (cond_var)
6566 {
6567 if (POINTER_TYPE_P (type)
6568 || TREE_CODE (n1) != INTEGER_CST
6569 || fd->loop.cond_code != LT_EXPR
6570 || tree_int_cst_sgn (n1) != 1)
6571 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6572 build_one_cst (TREE_TYPE (cond_var)));
6573 else
6574 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6575 fold_convert (TREE_TYPE (cond_var), step));
6576 expand_omp_build_assign (&gsi, cond_var, t);
6577 }
6578
6579 /* Remove GIMPLE_OMP_CONTINUE. */
6580 gsi_remove (&gsi, true);
6581 }
6582
6583 /* Emit the condition in L1_BB. */
6584 gsi = gsi_start_bb (l1_bb);
6585
6586 if (altv)
6587 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6588 else if (fd->collapse > 1
6589 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6590 && !broken_loop)
6591 {
6592 i = fd->collapse - 1;
6593 tree itype = TREE_TYPE (fd->loops[i].v);
6594 if (fd->loops[i].m2)
6595 t = n2v = create_tmp_var (itype);
6596 else
6597 t = fold_convert (itype, fd->loops[i].n2);
6598 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6599 false, GSI_CONTINUE_LINKING);
6600 tree v = fd->loops[i].v;
6601 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6602 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6603 false, GSI_CONTINUE_LINKING);
6604 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6605 }
6606 else
6607 {
6608 if (fd->collapse > 1 && !broken_loop)
6609 t = n2var;
6610 else
6611 t = fold_convert (type, n2);
6612 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6613 false, GSI_CONTINUE_LINKING);
6614 tree v = fd->loop.v;
6615 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6616 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6617 false, GSI_CONTINUE_LINKING);
6618 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6619 }
6620 cond_stmt = gimple_build_cond_empty (t);
6621 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6622 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6623 NULL, NULL)
6624 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6625 NULL, NULL))
6626 {
6627 gsi = gsi_for_stmt (cond_stmt);
6628 gimple_regimplify_operands (cond_stmt, &gsi);
6629 }
6630
6631 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6632 if (is_simt)
6633 {
6634 gsi = gsi_start_bb (l2_bb);
6635 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6636 if (POINTER_TYPE_P (type))
6637 t = fold_build_pointer_plus (fd->loop.v, step);
6638 else
6639 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6640 expand_omp_build_assign (&gsi, fd->loop.v, t);
6641 }
6642
6643 /* Remove GIMPLE_OMP_RETURN. */
6644 gsi = gsi_last_nondebug_bb (exit_bb);
6645 gsi_remove (&gsi, true);
6646
6647 /* Connect the new blocks. */
6648 remove_edge (FALLTHRU_EDGE (entry_bb));
6649
6650 if (!broken_loop)
6651 {
6652 remove_edge (BRANCH_EDGE (entry_bb));
6653 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6654
6655 e = BRANCH_EDGE (l1_bb);
6656 ne = FALLTHRU_EDGE (l1_bb);
6657 e->flags = EDGE_TRUE_VALUE;
6658 }
6659 else
6660 {
6661 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6662
6663 ne = single_succ_edge (l1_bb);
6664 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6665
6666 }
6667 ne->flags = EDGE_FALSE_VALUE;
6668 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6669 ne->probability = e->probability.invert ();
6670
6671 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6672 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6673
6674 if (simt_maxlane)
6675 {
6676 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6677 NULL_TREE, NULL_TREE);
6678 gsi = gsi_last_bb (entry_bb);
6679 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6680 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6681 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6682 FALLTHRU_EDGE (entry_bb)->probability
6683 = profile_probability::guessed_always ().apply_scale (7, 8);
6684 BRANCH_EDGE (entry_bb)->probability
6685 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6686 l2_dom_bb = entry_bb;
6687 }
6688 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6689
6690 if (!broken_loop && fd->collapse > 1)
6691 {
6692 basic_block last_bb = l1_bb;
6693 basic_block init_bb = NULL;
6694 for (i = fd->collapse - 2; i >= 0; i--)
6695 {
6696 tree nextn2v = NULL_TREE;
6697 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6698 e = EDGE_SUCC (last_bb, 0);
6699 else
6700 e = EDGE_SUCC (last_bb, 1);
6701 basic_block bb = split_edge (e);
6702 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6703 {
6704 t = fold_convert (sizetype, fd->loops[i].step);
6705 t = fold_build_pointer_plus (fd->loops[i].v, t);
6706 }
6707 else
6708 {
6709 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6710 fd->loops[i].step);
6711 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6712 fd->loops[i].v, t);
6713 }
6714 gsi = gsi_after_labels (bb);
6715 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6716
6717 bb = split_block (bb, last_stmt (bb))->dest;
6718 gsi = gsi_start_bb (bb);
6719 tree itype = TREE_TYPE (fd->loops[i].v);
6720 if (fd->loops[i].m2)
6721 t = nextn2v = create_tmp_var (itype);
6722 else
6723 t = fold_convert (itype, fd->loops[i].n2);
6724 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6725 false, GSI_CONTINUE_LINKING);
6726 tree v = fd->loops[i].v;
6727 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6728 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6729 false, GSI_CONTINUE_LINKING);
6730 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6731 cond_stmt = gimple_build_cond_empty (t);
6732 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6733 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6734 expand_omp_regimplify_p, NULL, NULL)
6735 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6736 expand_omp_regimplify_p, NULL, NULL))
6737 {
6738 gsi = gsi_for_stmt (cond_stmt);
6739 gimple_regimplify_operands (cond_stmt, &gsi);
6740 }
6741 ne = single_succ_edge (bb);
6742 ne->flags = EDGE_FALSE_VALUE;
6743
6744 init_bb = create_empty_bb (bb);
6745 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6746 add_bb_to_loop (init_bb, bb->loop_father);
6747 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6748 e->probability
6749 = profile_probability::guessed_always ().apply_scale (7, 8);
6750 ne->probability = e->probability.invert ();
6751
6752 gsi = gsi_after_labels (init_bb);
6753 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6754 fd->loops[i + 1].n1);
6755 if (fd->loops[i + 1].m1)
6756 {
6757 tree t2 = fold_convert (TREE_TYPE (t),
6758 fd->loops[i + 1
6759 - fd->loops[i + 1].outer].v);
6760 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6761 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6762 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6763 }
6764 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6765 if (fd->loops[i + 1].m2)
6766 {
6767 if (i + 2 == fd->collapse && (n2var || altv))
6768 {
6769 gcc_assert (n2v == NULL_TREE);
6770 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6771 }
6772 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6773 fd->loops[i + 1].n2);
6774 tree t2 = fold_convert (TREE_TYPE (t),
6775 fd->loops[i + 1
6776 - fd->loops[i + 1].outer].v);
6777 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6778 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6779 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6780 expand_omp_build_assign (&gsi, n2v, t);
6781 }
6782 if (i + 2 == fd->collapse && n2var)
6783 {
6784 /* For composite simd, n2 is the first iteration the current
6785 task shouldn't already handle, so we effectively want to use
6786 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6787 as the vectorized loop. Except the vectorizer will not
6788 vectorize that, so instead compute N2VAR as
6789 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6790 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6791 as the loop to vectorize. */
6792 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6793 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6794 {
6795 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6796 == LT_EXPR ? -1 : 1));
6797 t = fold_build2 (PLUS_EXPR, itype,
6798 fold_convert (itype,
6799 fd->loops[i + 1].step), t);
6800 if (fd->loops[i + 1].m2)
6801 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6802 else
6803 t = fold_build2 (PLUS_EXPR, itype, t,
6804 fold_convert (itype,
6805 fd->loops[i + 1].n2));
6806 t = fold_build2 (MINUS_EXPR, itype, t,
6807 fold_convert (itype, fd->loops[i + 1].v));
6808 tree step = fold_convert (itype, fd->loops[i + 1].step);
6809 if (TYPE_UNSIGNED (itype)
6810 && fd->loops[i + 1].cond_code == GT_EXPR)
6811 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6812 fold_build1 (NEGATE_EXPR, itype, t),
6813 fold_build1 (NEGATE_EXPR, itype, step));
6814 else
6815 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6816 t = fold_convert (type, t);
6817 }
6818 else
6819 t = counts[i + 1];
6820 expand_omp_build_assign (&gsi, min_arg1, t2);
6821 expand_omp_build_assign (&gsi, min_arg2, t);
6822 e = split_block (init_bb, last_stmt (init_bb));
6823 gsi = gsi_after_labels (e->dest);
6824 init_bb = e->dest;
6825 remove_edge (FALLTHRU_EDGE (entry_bb));
6826 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6827 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6828 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6829 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6830 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6831 expand_omp_build_assign (&gsi, n2var, t);
6832 }
6833 if (i + 2 == fd->collapse && altv)
6834 {
6835 /* The vectorizer currently punts on loops with non-constant
6836 steps for the main IV (can't compute number of iterations
6837 and gives up because of that). As for OpenMP loops it is
6838 always possible to compute the number of iterations upfront,
6839 use an alternate IV as the loop iterator. */
6840 expand_omp_build_assign (&gsi, altv,
6841 build_zero_cst (TREE_TYPE (altv)));
6842 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6843 if (POINTER_TYPE_P (itype))
6844 itype = signed_type_for (itype);
6845 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6846 ? -1 : 1));
6847 t = fold_build2 (PLUS_EXPR, itype,
6848 fold_convert (itype, fd->loops[i + 1].step), t);
6849 t = fold_build2 (PLUS_EXPR, itype, t,
6850 fold_convert (itype,
6851 fd->loops[i + 1].m2
6852 ? n2v : fd->loops[i + 1].n2));
6853 t = fold_build2 (MINUS_EXPR, itype, t,
6854 fold_convert (itype, fd->loops[i + 1].v));
6855 tree step = fold_convert (itype, fd->loops[i + 1].step);
6856 if (TYPE_UNSIGNED (itype)
6857 && fd->loops[i + 1].cond_code == GT_EXPR)
6858 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6859 fold_build1 (NEGATE_EXPR, itype, t),
6860 fold_build1 (NEGATE_EXPR, itype, step));
6861 else
6862 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6863 t = fold_convert (TREE_TYPE (altv), t);
6864 expand_omp_build_assign (&gsi, altn2, t);
6865 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6866 fd->loops[i + 1].m2
6867 ? n2v : fd->loops[i + 1].n2);
6868 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6869 true, GSI_SAME_STMT);
6870 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6871 fd->loops[i + 1].v, t2);
6872 gassign *g
6873 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6874 build_zero_cst (TREE_TYPE (altv)));
6875 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6876 }
6877 n2v = nextn2v;
6878
6879 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6880 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6881 {
6882 e = find_edge (entry_bb, last_bb);
6883 redirect_edge_succ (e, bb);
6884 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6885 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6886 }
6887
6888 last_bb = bb;
6889 }
6890 }
6891 if (!broken_loop)
6892 {
6893 class loop *loop = alloc_loop ();
6894 loop->header = l1_bb;
6895 loop->latch = cont_bb;
6896 add_loop (loop, l1_bb->loop_father);
6897 loop->safelen = safelen_int;
6898 if (simduid)
6899 {
6900 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6901 cfun->has_simduid_loops = true;
6902 }
6903 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6904 the loop. */
6905 if ((flag_tree_loop_vectorize
6906 || !global_options_set.x_flag_tree_loop_vectorize)
6907 && flag_tree_loop_optimize
6908 && loop->safelen > 1)
6909 {
6910 loop->force_vectorize = true;
6911 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6912 {
6913 unsigned HOST_WIDE_INT v
6914 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6915 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6916 loop->simdlen = v;
6917 }
6918 cfun->has_force_vectorize_loops = true;
6919 }
6920 else if (dont_vectorize)
6921 loop->dont_vectorize = true;
6922 }
6923 else if (simduid)
6924 cfun->has_simduid_loops = true;
6925 }
6926
6927 /* Taskloop construct is represented after gimplification with
6928 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6929 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6930 which should just compute all the needed loop temporaries
6931 for GIMPLE_OMP_TASK. */
6932
6933 static void
6934 expand_omp_taskloop_for_outer (struct omp_region *region,
6935 struct omp_for_data *fd,
6936 gimple *inner_stmt)
6937 {
6938 tree type, bias = NULL_TREE;
6939 basic_block entry_bb, cont_bb, exit_bb;
6940 gimple_stmt_iterator gsi;
6941 gassign *assign_stmt;
6942 tree *counts = NULL;
6943 int i;
6944
6945 gcc_assert (inner_stmt);
6946 gcc_assert (region->cont);
6947 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6948 && gimple_omp_task_taskloop_p (inner_stmt));
6949 type = TREE_TYPE (fd->loop.v);
6950
6951 /* See if we need to bias by LLONG_MIN. */
6952 if (fd->iter_type == long_long_unsigned_type_node
6953 && TREE_CODE (type) == INTEGER_TYPE
6954 && !TYPE_UNSIGNED (type))
6955 {
6956 tree n1, n2;
6957
6958 if (fd->loop.cond_code == LT_EXPR)
6959 {
6960 n1 = fd->loop.n1;
6961 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6962 }
6963 else
6964 {
6965 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
6966 n2 = fd->loop.n1;
6967 }
6968 if (TREE_CODE (n1) != INTEGER_CST
6969 || TREE_CODE (n2) != INTEGER_CST
6970 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
6971 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
6972 }
6973
6974 entry_bb = region->entry;
6975 cont_bb = region->cont;
6976 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6977 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6978 exit_bb = region->exit;
6979
6980 gsi = gsi_last_nondebug_bb (entry_bb);
6981 gimple *for_stmt = gsi_stmt (gsi);
6982 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
6983 if (fd->collapse > 1)
6984 {
6985 int first_zero_iter = -1, dummy = -1;
6986 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
6987
6988 counts = XALLOCAVEC (tree, fd->collapse);
6989 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6990 zero_iter_bb, first_zero_iter,
6991 dummy_bb, dummy, l2_dom_bb);
6992
6993 if (zero_iter_bb)
6994 {
6995 /* Some counts[i] vars might be uninitialized if
6996 some loop has zero iterations. But the body shouldn't
6997 be executed in that case, so just avoid uninit warnings. */
6998 for (i = first_zero_iter; i < fd->collapse; i++)
6999 if (SSA_VAR_P (counts[i]))
7000 TREE_NO_WARNING (counts[i]) = 1;
7001 gsi_prev (&gsi);
7002 edge e = split_block (entry_bb, gsi_stmt (gsi));
7003 entry_bb = e->dest;
7004 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7005 gsi = gsi_last_bb (entry_bb);
7006 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7007 get_immediate_dominator (CDI_DOMINATORS,
7008 zero_iter_bb));
7009 }
7010 }
7011
7012 tree t0, t1;
7013 t1 = fd->loop.n2;
7014 t0 = fd->loop.n1;
7015 if (POINTER_TYPE_P (TREE_TYPE (t0))
7016 && TYPE_PRECISION (TREE_TYPE (t0))
7017 != TYPE_PRECISION (fd->iter_type))
7018 {
7019 /* Avoid casting pointers to integer of a different size. */
7020 tree itype = signed_type_for (type);
7021 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7022 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7023 }
7024 else
7025 {
7026 t1 = fold_convert (fd->iter_type, t1);
7027 t0 = fold_convert (fd->iter_type, t0);
7028 }
7029 if (bias)
7030 {
7031 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7032 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7033 }
7034
7035 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7036 OMP_CLAUSE__LOOPTEMP_);
7037 gcc_assert (innerc);
7038 tree startvar = OMP_CLAUSE_DECL (innerc);
7039 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7040 gcc_assert (innerc);
7041 tree endvar = OMP_CLAUSE_DECL (innerc);
7042 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7043 {
7044 gcc_assert (innerc);
7045 for (i = 1; i < fd->collapse; i++)
7046 {
7047 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7048 OMP_CLAUSE__LOOPTEMP_);
7049 gcc_assert (innerc);
7050 }
7051 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7052 OMP_CLAUSE__LOOPTEMP_);
7053 if (innerc)
7054 {
7055 /* If needed (inner taskloop has lastprivate clause), propagate
7056 down the total number of iterations. */
7057 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7058 NULL_TREE, false,
7059 GSI_CONTINUE_LINKING);
7060 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7061 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7062 }
7063 }
7064
7065 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7066 GSI_CONTINUE_LINKING);
7067 assign_stmt = gimple_build_assign (startvar, t0);
7068 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7069
7070 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7071 GSI_CONTINUE_LINKING);
7072 assign_stmt = gimple_build_assign (endvar, t1);
7073 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7074 if (fd->collapse > 1)
7075 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7076
7077 /* Remove the GIMPLE_OMP_FOR statement. */
7078 gsi = gsi_for_stmt (for_stmt);
7079 gsi_remove (&gsi, true);
7080
7081 gsi = gsi_last_nondebug_bb (cont_bb);
7082 gsi_remove (&gsi, true);
7083
7084 gsi = gsi_last_nondebug_bb (exit_bb);
7085 gsi_remove (&gsi, true);
7086
7087 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7088 remove_edge (BRANCH_EDGE (entry_bb));
7089 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7090 remove_edge (BRANCH_EDGE (cont_bb));
7091 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7092 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7093 recompute_dominator (CDI_DOMINATORS, region->entry));
7094 }
7095
7096 /* Taskloop construct is represented after gimplification with
7097 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7098 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7099 GOMP_taskloop{,_ull} function arranges for each task to be given just
7100 a single range of iterations. */
7101
7102 static void
7103 expand_omp_taskloop_for_inner (struct omp_region *region,
7104 struct omp_for_data *fd,
7105 gimple *inner_stmt)
7106 {
7107 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7108 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7109 basic_block fin_bb;
7110 gimple_stmt_iterator gsi;
7111 edge ep;
7112 bool broken_loop = region->cont == NULL;
7113 tree *counts = NULL;
7114 tree n1, n2, step;
7115
7116 itype = type = TREE_TYPE (fd->loop.v);
7117 if (POINTER_TYPE_P (type))
7118 itype = signed_type_for (type);
7119
7120 /* See if we need to bias by LLONG_MIN. */
7121 if (fd->iter_type == long_long_unsigned_type_node
7122 && TREE_CODE (type) == INTEGER_TYPE
7123 && !TYPE_UNSIGNED (type))
7124 {
7125 tree n1, n2;
7126
7127 if (fd->loop.cond_code == LT_EXPR)
7128 {
7129 n1 = fd->loop.n1;
7130 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7131 }
7132 else
7133 {
7134 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7135 n2 = fd->loop.n1;
7136 }
7137 if (TREE_CODE (n1) != INTEGER_CST
7138 || TREE_CODE (n2) != INTEGER_CST
7139 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7140 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7141 }
7142
7143 entry_bb = region->entry;
7144 cont_bb = region->cont;
7145 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7146 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7147 gcc_assert (broken_loop
7148 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7149 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7150 if (!broken_loop)
7151 {
7152 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7153 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7154 }
7155 exit_bb = region->exit;
7156
7157 /* Iteration space partitioning goes in ENTRY_BB. */
7158 gsi = gsi_last_nondebug_bb (entry_bb);
7159 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7160
7161 if (fd->collapse > 1)
7162 {
7163 int first_zero_iter = -1, dummy = -1;
7164 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7165
7166 counts = XALLOCAVEC (tree, fd->collapse);
7167 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7168 fin_bb, first_zero_iter,
7169 dummy_bb, dummy, l2_dom_bb);
7170 t = NULL_TREE;
7171 }
7172 else
7173 t = integer_one_node;
7174
7175 step = fd->loop.step;
7176 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7177 OMP_CLAUSE__LOOPTEMP_);
7178 gcc_assert (innerc);
7179 n1 = OMP_CLAUSE_DECL (innerc);
7180 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7181 gcc_assert (innerc);
7182 n2 = OMP_CLAUSE_DECL (innerc);
7183 if (bias)
7184 {
7185 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7186 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7187 }
7188 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7189 true, NULL_TREE, true, GSI_SAME_STMT);
7190 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7191 true, NULL_TREE, true, GSI_SAME_STMT);
7192 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7193 true, NULL_TREE, true, GSI_SAME_STMT);
7194
7195 tree startvar = fd->loop.v;
7196 tree endvar = NULL_TREE;
7197
7198 if (gimple_omp_for_combined_p (fd->for_stmt))
7199 {
7200 tree clauses = gimple_omp_for_clauses (inner_stmt);
7201 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7202 gcc_assert (innerc);
7203 startvar = OMP_CLAUSE_DECL (innerc);
7204 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7205 OMP_CLAUSE__LOOPTEMP_);
7206 gcc_assert (innerc);
7207 endvar = OMP_CLAUSE_DECL (innerc);
7208 }
7209 t = fold_convert (TREE_TYPE (startvar), n1);
7210 t = force_gimple_operand_gsi (&gsi, t,
7211 DECL_P (startvar)
7212 && TREE_ADDRESSABLE (startvar),
7213 NULL_TREE, false, GSI_CONTINUE_LINKING);
7214 gimple *assign_stmt = gimple_build_assign (startvar, t);
7215 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7216
7217 t = fold_convert (TREE_TYPE (startvar), n2);
7218 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7219 false, GSI_CONTINUE_LINKING);
7220 if (endvar)
7221 {
7222 assign_stmt = gimple_build_assign (endvar, e);
7223 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7224 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7225 assign_stmt = gimple_build_assign (fd->loop.v, e);
7226 else
7227 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7228 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7229 }
7230
7231 tree *nonrect_bounds = NULL;
7232 if (fd->collapse > 1)
7233 {
7234 if (fd->non_rect)
7235 {
7236 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7237 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7238 }
7239 gcc_assert (gsi_bb (gsi) == entry_bb);
7240 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7241 startvar);
7242 entry_bb = gsi_bb (gsi);
7243 }
7244
7245 if (!broken_loop)
7246 {
7247 /* The code controlling the sequential loop replaces the
7248 GIMPLE_OMP_CONTINUE. */
7249 gsi = gsi_last_nondebug_bb (cont_bb);
7250 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7251 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7252 vmain = gimple_omp_continue_control_use (cont_stmt);
7253 vback = gimple_omp_continue_control_def (cont_stmt);
7254
7255 if (!gimple_omp_for_combined_p (fd->for_stmt))
7256 {
7257 if (POINTER_TYPE_P (type))
7258 t = fold_build_pointer_plus (vmain, step);
7259 else
7260 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7261 t = force_gimple_operand_gsi (&gsi, t,
7262 DECL_P (vback)
7263 && TREE_ADDRESSABLE (vback),
7264 NULL_TREE, true, GSI_SAME_STMT);
7265 assign_stmt = gimple_build_assign (vback, t);
7266 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7267
7268 t = build2 (fd->loop.cond_code, boolean_type_node,
7269 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7270 ? t : vback, e);
7271 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7272 }
7273
7274 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7275 gsi_remove (&gsi, true);
7276
7277 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7278 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7279 cont_bb, body_bb);
7280 }
7281
7282 /* Remove the GIMPLE_OMP_FOR statement. */
7283 gsi = gsi_for_stmt (fd->for_stmt);
7284 gsi_remove (&gsi, true);
7285
7286 /* Remove the GIMPLE_OMP_RETURN statement. */
7287 gsi = gsi_last_nondebug_bb (exit_bb);
7288 gsi_remove (&gsi, true);
7289
7290 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7291 if (!broken_loop)
7292 remove_edge (BRANCH_EDGE (entry_bb));
7293 else
7294 {
7295 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7296 region->outer->cont = NULL;
7297 }
7298
7299 /* Connect all the blocks. */
7300 if (!broken_loop)
7301 {
7302 ep = find_edge (cont_bb, body_bb);
7303 if (gimple_omp_for_combined_p (fd->for_stmt))
7304 {
7305 remove_edge (ep);
7306 ep = NULL;
7307 }
7308 else if (fd->collapse > 1)
7309 {
7310 remove_edge (ep);
7311 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7312 }
7313 else
7314 ep->flags = EDGE_TRUE_VALUE;
7315 find_edge (cont_bb, fin_bb)->flags
7316 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7317 }
7318
7319 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7320 recompute_dominator (CDI_DOMINATORS, body_bb));
7321 if (!broken_loop)
7322 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7323 recompute_dominator (CDI_DOMINATORS, fin_bb));
7324
7325 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7326 {
7327 class loop *loop = alloc_loop ();
7328 loop->header = body_bb;
7329 if (collapse_bb == NULL)
7330 loop->latch = cont_bb;
7331 add_loop (loop, body_bb->loop_father);
7332 }
7333 }
7334
7335 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7336 partitioned loop. The lowering here is abstracted, in that the
7337 loop parameters are passed through internal functions, which are
7338 further lowered by oacc_device_lower, once we get to the target
7339 compiler. The loop is of the form:
7340
7341 for (V = B; V LTGT E; V += S) {BODY}
7342
7343 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7344 (constant 0 for no chunking) and we will have a GWV partitioning
7345 mask, specifying dimensions over which the loop is to be
7346 partitioned (see note below). We generate code that looks like
7347 (this ignores tiling):
7348
7349 <entry_bb> [incoming FALL->body, BRANCH->exit]
7350 typedef signedintify (typeof (V)) T; // underlying signed integral type
7351 T range = E - B;
7352 T chunk_no = 0;
7353 T DIR = LTGT == '<' ? +1 : -1;
7354 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7355 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7356
7357 <head_bb> [created by splitting end of entry_bb]
7358 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7359 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7360 if (!(offset LTGT bound)) goto bottom_bb;
7361
7362 <body_bb> [incoming]
7363 V = B + offset;
7364 {BODY}
7365
7366 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7367 offset += step;
7368 if (offset LTGT bound) goto body_bb; [*]
7369
7370 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7371 chunk_no++;
7372 if (chunk < chunk_max) goto head_bb;
7373
7374 <exit_bb> [incoming]
7375 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7376
7377 [*] Needed if V live at end of loop. */
7378
7379 static void
7380 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7381 {
7382 tree v = fd->loop.v;
7383 enum tree_code cond_code = fd->loop.cond_code;
7384 enum tree_code plus_code = PLUS_EXPR;
7385
7386 tree chunk_size = integer_minus_one_node;
7387 tree gwv = integer_zero_node;
7388 tree iter_type = TREE_TYPE (v);
7389 tree diff_type = iter_type;
7390 tree plus_type = iter_type;
7391 struct oacc_collapse *counts = NULL;
7392
7393 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7394 == GF_OMP_FOR_KIND_OACC_LOOP);
7395 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7396 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7397
7398 if (POINTER_TYPE_P (iter_type))
7399 {
7400 plus_code = POINTER_PLUS_EXPR;
7401 plus_type = sizetype;
7402 }
7403 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7404 diff_type = signed_type_for (diff_type);
7405 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7406 diff_type = integer_type_node;
7407
7408 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7409 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7410 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7411 basic_block bottom_bb = NULL;
7412
7413 /* entry_bb has two successors; the branch edge is to the exit
7414 block, fallthrough edge to body. */
7415 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7416 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7417
7418 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7419 body_bb, or to a block whose only successor is the body_bb. Its
7420 fallthrough successor is the final block (same as the branch
7421 successor of the entry_bb). */
7422 if (cont_bb)
7423 {
7424 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7425 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7426
7427 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7428 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7429 }
7430 else
7431 gcc_assert (!gimple_in_ssa_p (cfun));
7432
7433 /* The exit block only has entry_bb and cont_bb as predecessors. */
7434 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7435
7436 tree chunk_no;
7437 tree chunk_max = NULL_TREE;
7438 tree bound, offset;
7439 tree step = create_tmp_var (diff_type, ".step");
7440 bool up = cond_code == LT_EXPR;
7441 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7442 bool chunking = !gimple_in_ssa_p (cfun);
7443 bool negating;
7444
7445 /* Tiling vars. */
7446 tree tile_size = NULL_TREE;
7447 tree element_s = NULL_TREE;
7448 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7449 basic_block elem_body_bb = NULL;
7450 basic_block elem_cont_bb = NULL;
7451
7452 /* SSA instances. */
7453 tree offset_incr = NULL_TREE;
7454 tree offset_init = NULL_TREE;
7455
7456 gimple_stmt_iterator gsi;
7457 gassign *ass;
7458 gcall *call;
7459 gimple *stmt;
7460 tree expr;
7461 location_t loc;
7462 edge split, be, fte;
7463
7464 /* Split the end of entry_bb to create head_bb. */
7465 split = split_block (entry_bb, last_stmt (entry_bb));
7466 basic_block head_bb = split->dest;
7467 entry_bb = split->src;
7468
7469 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7470 gsi = gsi_last_nondebug_bb (entry_bb);
7471 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7472 loc = gimple_location (for_stmt);
7473
7474 if (gimple_in_ssa_p (cfun))
7475 {
7476 offset_init = gimple_omp_for_index (for_stmt, 0);
7477 gcc_assert (integer_zerop (fd->loop.n1));
7478 /* The SSA parallelizer does gang parallelism. */
7479 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7480 }
7481
7482 if (fd->collapse > 1 || fd->tiling)
7483 {
7484 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7485 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7486 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
7487 TREE_TYPE (fd->loop.n2), loc);
7488
7489 if (SSA_VAR_P (fd->loop.n2))
7490 {
7491 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7492 true, GSI_SAME_STMT);
7493 ass = gimple_build_assign (fd->loop.n2, total);
7494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7495 }
7496 }
7497
7498 tree b = fd->loop.n1;
7499 tree e = fd->loop.n2;
7500 tree s = fd->loop.step;
7501
7502 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7503 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7504
7505 /* Convert the step, avoiding possible unsigned->signed overflow. */
7506 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7507 if (negating)
7508 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7509 s = fold_convert (diff_type, s);
7510 if (negating)
7511 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7512 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7513
7514 if (!chunking)
7515 chunk_size = integer_zero_node;
7516 expr = fold_convert (diff_type, chunk_size);
7517 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7518 NULL_TREE, true, GSI_SAME_STMT);
7519
7520 if (fd->tiling)
7521 {
7522 /* Determine the tile size and element step,
7523 modify the outer loop step size. */
7524 tile_size = create_tmp_var (diff_type, ".tile_size");
7525 expr = build_int_cst (diff_type, 1);
7526 for (int ix = 0; ix < fd->collapse; ix++)
7527 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7528 expr = force_gimple_operand_gsi (&gsi, expr, true,
7529 NULL_TREE, true, GSI_SAME_STMT);
7530 ass = gimple_build_assign (tile_size, expr);
7531 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7532
7533 element_s = create_tmp_var (diff_type, ".element_s");
7534 ass = gimple_build_assign (element_s, s);
7535 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7536
7537 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7538 s = force_gimple_operand_gsi (&gsi, expr, true,
7539 NULL_TREE, true, GSI_SAME_STMT);
7540 }
7541
7542 /* Determine the range, avoiding possible unsigned->signed overflow. */
7543 negating = !up && TYPE_UNSIGNED (iter_type);
7544 expr = fold_build2 (MINUS_EXPR, plus_type,
7545 fold_convert (plus_type, negating ? b : e),
7546 fold_convert (plus_type, negating ? e : b));
7547 expr = fold_convert (diff_type, expr);
7548 if (negating)
7549 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7550 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7551 NULL_TREE, true, GSI_SAME_STMT);
7552
7553 chunk_no = build_int_cst (diff_type, 0);
7554 if (chunking)
7555 {
7556 gcc_assert (!gimple_in_ssa_p (cfun));
7557
7558 expr = chunk_no;
7559 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7560 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7561
7562 ass = gimple_build_assign (chunk_no, expr);
7563 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7564
7565 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7566 build_int_cst (integer_type_node,
7567 IFN_GOACC_LOOP_CHUNKS),
7568 dir, range, s, chunk_size, gwv);
7569 gimple_call_set_lhs (call, chunk_max);
7570 gimple_set_location (call, loc);
7571 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7572 }
7573 else
7574 chunk_size = chunk_no;
7575
7576 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7577 build_int_cst (integer_type_node,
7578 IFN_GOACC_LOOP_STEP),
7579 dir, range, s, chunk_size, gwv);
7580 gimple_call_set_lhs (call, step);
7581 gimple_set_location (call, loc);
7582 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7583
7584 /* Remove the GIMPLE_OMP_FOR. */
7585 gsi_remove (&gsi, true);
7586
7587 /* Fixup edges from head_bb. */
7588 be = BRANCH_EDGE (head_bb);
7589 fte = FALLTHRU_EDGE (head_bb);
7590 be->flags |= EDGE_FALSE_VALUE;
7591 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7592
7593 basic_block body_bb = fte->dest;
7594
7595 if (gimple_in_ssa_p (cfun))
7596 {
7597 gsi = gsi_last_nondebug_bb (cont_bb);
7598 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7599
7600 offset = gimple_omp_continue_control_use (cont_stmt);
7601 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7602 }
7603 else
7604 {
7605 offset = create_tmp_var (diff_type, ".offset");
7606 offset_init = offset_incr = offset;
7607 }
7608 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7609
7610 /* Loop offset & bound go into head_bb. */
7611 gsi = gsi_start_bb (head_bb);
7612
7613 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7614 build_int_cst (integer_type_node,
7615 IFN_GOACC_LOOP_OFFSET),
7616 dir, range, s,
7617 chunk_size, gwv, chunk_no);
7618 gimple_call_set_lhs (call, offset_init);
7619 gimple_set_location (call, loc);
7620 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7621
7622 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7623 build_int_cst (integer_type_node,
7624 IFN_GOACC_LOOP_BOUND),
7625 dir, range, s,
7626 chunk_size, gwv, offset_init);
7627 gimple_call_set_lhs (call, bound);
7628 gimple_set_location (call, loc);
7629 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7630
7631 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7632 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7633 GSI_CONTINUE_LINKING);
7634
7635 /* V assignment goes into body_bb. */
7636 if (!gimple_in_ssa_p (cfun))
7637 {
7638 gsi = gsi_start_bb (body_bb);
7639
7640 expr = build2 (plus_code, iter_type, b,
7641 fold_convert (plus_type, offset));
7642 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7643 true, GSI_SAME_STMT);
7644 ass = gimple_build_assign (v, expr);
7645 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7646
7647 if (fd->collapse > 1 || fd->tiling)
7648 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
7649
7650 if (fd->tiling)
7651 {
7652 /* Determine the range of the element loop -- usually simply
7653 the tile_size, but could be smaller if the final
7654 iteration of the outer loop is a partial tile. */
7655 tree e_range = create_tmp_var (diff_type, ".e_range");
7656
7657 expr = build2 (MIN_EXPR, diff_type,
7658 build2 (MINUS_EXPR, diff_type, bound, offset),
7659 build2 (MULT_EXPR, diff_type, tile_size,
7660 element_s));
7661 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7662 true, GSI_SAME_STMT);
7663 ass = gimple_build_assign (e_range, expr);
7664 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7665
7666 /* Determine bound, offset & step of inner loop. */
7667 e_bound = create_tmp_var (diff_type, ".e_bound");
7668 e_offset = create_tmp_var (diff_type, ".e_offset");
7669 e_step = create_tmp_var (diff_type, ".e_step");
7670
7671 /* Mark these as element loops. */
7672 tree t, e_gwv = integer_minus_one_node;
7673 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7674
7675 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7676 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7677 element_s, chunk, e_gwv, chunk);
7678 gimple_call_set_lhs (call, e_offset);
7679 gimple_set_location (call, loc);
7680 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7681
7682 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7683 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7684 element_s, chunk, e_gwv, e_offset);
7685 gimple_call_set_lhs (call, e_bound);
7686 gimple_set_location (call, loc);
7687 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7688
7689 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7690 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7691 element_s, chunk, e_gwv);
7692 gimple_call_set_lhs (call, e_step);
7693 gimple_set_location (call, loc);
7694 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7695
7696 /* Add test and split block. */
7697 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7698 stmt = gimple_build_cond_empty (expr);
7699 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7700 split = split_block (body_bb, stmt);
7701 elem_body_bb = split->dest;
7702 if (cont_bb == body_bb)
7703 cont_bb = elem_body_bb;
7704 body_bb = split->src;
7705
7706 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7707
7708 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7709 if (cont_bb == NULL)
7710 {
7711 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7712 e->probability = profile_probability::even ();
7713 split->probability = profile_probability::even ();
7714 }
7715
7716 /* Initialize the user's loop vars. */
7717 gsi = gsi_start_bb (elem_body_bb);
7718 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
7719 }
7720 }
7721
7722 /* Loop increment goes into cont_bb. If this is not a loop, we
7723 will have spawned threads as if it was, and each one will
7724 execute one iteration. The specification is not explicit about
7725 whether such constructs are ill-formed or not, and they can
7726 occur, especially when noreturn routines are involved. */
7727 if (cont_bb)
7728 {
7729 gsi = gsi_last_nondebug_bb (cont_bb);
7730 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7731 loc = gimple_location (cont_stmt);
7732
7733 if (fd->tiling)
7734 {
7735 /* Insert element loop increment and test. */
7736 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7737 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7738 true, GSI_SAME_STMT);
7739 ass = gimple_build_assign (e_offset, expr);
7740 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7741 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7742
7743 stmt = gimple_build_cond_empty (expr);
7744 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7745 split = split_block (cont_bb, stmt);
7746 elem_cont_bb = split->src;
7747 cont_bb = split->dest;
7748
7749 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7750 split->probability = profile_probability::unlikely ().guessed ();
7751 edge latch_edge
7752 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7753 latch_edge->probability = profile_probability::likely ().guessed ();
7754
7755 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7756 skip_edge->probability = profile_probability::unlikely ().guessed ();
7757 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7758 loop_entry_edge->probability
7759 = profile_probability::likely ().guessed ();
7760
7761 gsi = gsi_for_stmt (cont_stmt);
7762 }
7763
7764 /* Increment offset. */
7765 if (gimple_in_ssa_p (cfun))
7766 expr = build2 (plus_code, iter_type, offset,
7767 fold_convert (plus_type, step));
7768 else
7769 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7770 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7771 true, GSI_SAME_STMT);
7772 ass = gimple_build_assign (offset_incr, expr);
7773 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7774 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7775 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7776
7777 /* Remove the GIMPLE_OMP_CONTINUE. */
7778 gsi_remove (&gsi, true);
7779
7780 /* Fixup edges from cont_bb. */
7781 be = BRANCH_EDGE (cont_bb);
7782 fte = FALLTHRU_EDGE (cont_bb);
7783 be->flags |= EDGE_TRUE_VALUE;
7784 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7785
7786 if (chunking)
7787 {
7788 /* Split the beginning of exit_bb to make bottom_bb. We
7789 need to insert a nop at the start, because splitting is
7790 after a stmt, not before. */
7791 gsi = gsi_start_bb (exit_bb);
7792 stmt = gimple_build_nop ();
7793 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7794 split = split_block (exit_bb, stmt);
7795 bottom_bb = split->src;
7796 exit_bb = split->dest;
7797 gsi = gsi_last_bb (bottom_bb);
7798
7799 /* Chunk increment and test goes into bottom_bb. */
7800 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7801 build_int_cst (diff_type, 1));
7802 ass = gimple_build_assign (chunk_no, expr);
7803 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7804
7805 /* Chunk test at end of bottom_bb. */
7806 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7807 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7808 GSI_CONTINUE_LINKING);
7809
7810 /* Fixup edges from bottom_bb. */
7811 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7812 split->probability = profile_probability::unlikely ().guessed ();
7813 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7814 latch_edge->probability = profile_probability::likely ().guessed ();
7815 }
7816 }
7817
7818 gsi = gsi_last_nondebug_bb (exit_bb);
7819 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7820 loc = gimple_location (gsi_stmt (gsi));
7821
7822 if (!gimple_in_ssa_p (cfun))
7823 {
7824 /* Insert the final value of V, in case it is live. This is the
7825 value for the only thread that survives past the join. */
7826 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7827 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7828 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7829 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7830 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7831 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7832 true, GSI_SAME_STMT);
7833 ass = gimple_build_assign (v, expr);
7834 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7835 }
7836
7837 /* Remove the OMP_RETURN. */
7838 gsi_remove (&gsi, true);
7839
7840 if (cont_bb)
7841 {
7842 /* We now have one, two or three nested loops. Update the loop
7843 structures. */
7844 class loop *parent = entry_bb->loop_father;
7845 class loop *body = body_bb->loop_father;
7846
7847 if (chunking)
7848 {
7849 class loop *chunk_loop = alloc_loop ();
7850 chunk_loop->header = head_bb;
7851 chunk_loop->latch = bottom_bb;
7852 add_loop (chunk_loop, parent);
7853 parent = chunk_loop;
7854 }
7855 else if (parent != body)
7856 {
7857 gcc_assert (body->header == body_bb);
7858 gcc_assert (body->latch == cont_bb
7859 || single_pred (body->latch) == cont_bb);
7860 parent = NULL;
7861 }
7862
7863 if (parent)
7864 {
7865 class loop *body_loop = alloc_loop ();
7866 body_loop->header = body_bb;
7867 body_loop->latch = cont_bb;
7868 add_loop (body_loop, parent);
7869
7870 if (fd->tiling)
7871 {
7872 /* Insert tiling's element loop. */
7873 class loop *inner_loop = alloc_loop ();
7874 inner_loop->header = elem_body_bb;
7875 inner_loop->latch = elem_cont_bb;
7876 add_loop (inner_loop, body_loop);
7877 }
7878 }
7879 }
7880 }
7881
7882 /* Expand the OMP loop defined by REGION. */
7883
7884 static void
7885 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7886 {
7887 struct omp_for_data fd;
7888 struct omp_for_data_loop *loops;
7889
7890 loops = XALLOCAVEC (struct omp_for_data_loop,
7891 gimple_omp_for_collapse (last_stmt (region->entry)));
7892 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7893 &fd, loops);
7894 region->sched_kind = fd.sched_kind;
7895 region->sched_modifiers = fd.sched_modifiers;
7896 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7897 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7898 {
7899 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7900 if ((loops[i].m1 || loops[i].m2)
7901 && (loops[i].m1 == NULL_TREE
7902 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7903 && (loops[i].m2 == NULL_TREE
7904 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7905 && TREE_CODE (loops[i].step) == INTEGER_CST
7906 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7907 {
7908 tree t;
7909 tree itype = TREE_TYPE (loops[i].v);
7910 if (loops[i].m1 && loops[i].m2)
7911 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7912 else if (loops[i].m1)
7913 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7914 else
7915 t = loops[i].m2;
7916 t = fold_build2 (MULT_EXPR, itype, t,
7917 fold_convert (itype,
7918 loops[i - loops[i].outer].step));
7919 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7920 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7921 fold_build1 (NEGATE_EXPR, itype, t),
7922 fold_build1 (NEGATE_EXPR, itype,
7923 fold_convert (itype,
7924 loops[i].step)));
7925 else
7926 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7927 fold_convert (itype, loops[i].step));
7928 if (integer_nonzerop (t))
7929 error_at (gimple_location (fd.for_stmt),
7930 "invalid OpenMP non-rectangular loop step; "
7931 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7932 "step %qE",
7933 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7934 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7935 loops[i - loops[i].outer].step, i + 1,
7936 loops[i].step);
7937 }
7938 }
7939
7940 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7941 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7942 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7943 if (region->cont)
7944 {
7945 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7946 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7947 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7948 }
7949 else
7950 /* If there isn't a continue then this is a degerate case where
7951 the introduction of abnormal edges during lowering will prevent
7952 original loops from being detected. Fix that up. */
7953 loops_state_set (LOOPS_NEED_FIXUP);
7954
7955 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
7956 expand_omp_simd (region, &fd);
7957 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
7958 {
7959 gcc_assert (!inner_stmt && !fd.non_rect);
7960 expand_oacc_for (region, &fd);
7961 }
7962 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
7963 {
7964 if (gimple_omp_for_combined_into_p (fd.for_stmt))
7965 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
7966 else
7967 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
7968 }
7969 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
7970 && !fd.have_ordered)
7971 {
7972 if (fd.chunk_size == NULL)
7973 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
7974 else
7975 expand_omp_for_static_chunk (region, &fd, inner_stmt);
7976 }
7977 else
7978 {
7979 int fn_index, start_ix, next_ix;
7980 unsigned HOST_WIDE_INT sched = 0;
7981 tree sched_arg = NULL_TREE;
7982
7983 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
7984 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
7985 if (fd.chunk_size == NULL
7986 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
7987 fd.chunk_size = integer_zero_node;
7988 switch (fd.sched_kind)
7989 {
7990 case OMP_CLAUSE_SCHEDULE_RUNTIME:
7991 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
7992 && fd.lastprivate_conditional == 0)
7993 {
7994 gcc_assert (!fd.have_ordered);
7995 fn_index = 6;
7996 sched = 4;
7997 }
7998 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
7999 && !fd.have_ordered
8000 && fd.lastprivate_conditional == 0)
8001 fn_index = 7;
8002 else
8003 {
8004 fn_index = 3;
8005 sched = (HOST_WIDE_INT_1U << 31);
8006 }
8007 break;
8008 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8009 case OMP_CLAUSE_SCHEDULE_GUIDED:
8010 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8011 && !fd.have_ordered
8012 && fd.lastprivate_conditional == 0)
8013 {
8014 fn_index = 3 + fd.sched_kind;
8015 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8016 break;
8017 }
8018 fn_index = fd.sched_kind;
8019 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8020 sched += (HOST_WIDE_INT_1U << 31);
8021 break;
8022 case OMP_CLAUSE_SCHEDULE_STATIC:
8023 gcc_assert (fd.have_ordered);
8024 fn_index = 0;
8025 sched = (HOST_WIDE_INT_1U << 31) + 1;
8026 break;
8027 default:
8028 gcc_unreachable ();
8029 }
8030 if (!fd.ordered)
8031 fn_index += fd.have_ordered * 8;
8032 if (fd.ordered)
8033 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8034 else
8035 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8036 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8037 if (fd.have_reductemp || fd.have_pointer_condtemp)
8038 {
8039 if (fd.ordered)
8040 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8041 else if (fd.have_ordered)
8042 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8043 else
8044 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8045 sched_arg = build_int_cstu (long_integer_type_node, sched);
8046 if (!fd.chunk_size)
8047 fd.chunk_size = integer_zero_node;
8048 }
8049 if (fd.iter_type == long_long_unsigned_type_node)
8050 {
8051 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8052 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8053 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8054 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8055 }
8056 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8057 (enum built_in_function) next_ix, sched_arg,
8058 inner_stmt);
8059 }
8060
8061 if (gimple_in_ssa_p (cfun))
8062 update_ssa (TODO_update_ssa_only_virtuals);
8063 }
8064
8065 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8066
8067 v = GOMP_sections_start (n);
8068 L0:
8069 switch (v)
8070 {
8071 case 0:
8072 goto L2;
8073 case 1:
8074 section 1;
8075 goto L1;
8076 case 2:
8077 ...
8078 case n:
8079 ...
8080 default:
8081 abort ();
8082 }
8083 L1:
8084 v = GOMP_sections_next ();
8085 goto L0;
8086 L2:
8087 reduction;
8088
8089 If this is a combined parallel sections, replace the call to
8090 GOMP_sections_start with call to GOMP_sections_next. */
8091
8092 static void
8093 expand_omp_sections (struct omp_region *region)
8094 {
8095 tree t, u, vin = NULL, vmain, vnext, l2;
8096 unsigned len;
8097 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8098 gimple_stmt_iterator si, switch_si;
8099 gomp_sections *sections_stmt;
8100 gimple *stmt;
8101 gomp_continue *cont;
8102 edge_iterator ei;
8103 edge e;
8104 struct omp_region *inner;
8105 unsigned i, casei;
8106 bool exit_reachable = region->cont != NULL;
8107
8108 gcc_assert (region->exit != NULL);
8109 entry_bb = region->entry;
8110 l0_bb = single_succ (entry_bb);
8111 l1_bb = region->cont;
8112 l2_bb = region->exit;
8113 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8114 l2 = gimple_block_label (l2_bb);
8115 else
8116 {
8117 /* This can happen if there are reductions. */
8118 len = EDGE_COUNT (l0_bb->succs);
8119 gcc_assert (len > 0);
8120 e = EDGE_SUCC (l0_bb, len - 1);
8121 si = gsi_last_nondebug_bb (e->dest);
8122 l2 = NULL_TREE;
8123 if (gsi_end_p (si)
8124 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8125 l2 = gimple_block_label (e->dest);
8126 else
8127 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8128 {
8129 si = gsi_last_nondebug_bb (e->dest);
8130 if (gsi_end_p (si)
8131 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8132 {
8133 l2 = gimple_block_label (e->dest);
8134 break;
8135 }
8136 }
8137 }
8138 if (exit_reachable)
8139 default_bb = create_empty_bb (l1_bb->prev_bb);
8140 else
8141 default_bb = create_empty_bb (l0_bb);
8142
8143 /* We will build a switch() with enough cases for all the
8144 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8145 and a default case to abort if something goes wrong. */
8146 len = EDGE_COUNT (l0_bb->succs);
8147
8148 /* Use vec::quick_push on label_vec throughout, since we know the size
8149 in advance. */
8150 auto_vec<tree> label_vec (len);
8151
8152 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8153 GIMPLE_OMP_SECTIONS statement. */
8154 si = gsi_last_nondebug_bb (entry_bb);
8155 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8156 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8157 vin = gimple_omp_sections_control (sections_stmt);
8158 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8159 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8160 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8161 tree cond_var = NULL_TREE;
8162 if (reductmp || condtmp)
8163 {
8164 tree reductions = null_pointer_node, mem = null_pointer_node;
8165 tree memv = NULL_TREE, condtemp = NULL_TREE;
8166 gimple_stmt_iterator gsi = gsi_none ();
8167 gimple *g = NULL;
8168 if (reductmp)
8169 {
8170 reductions = OMP_CLAUSE_DECL (reductmp);
8171 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8172 g = SSA_NAME_DEF_STMT (reductions);
8173 reductions = gimple_assign_rhs1 (g);
8174 OMP_CLAUSE_DECL (reductmp) = reductions;
8175 gsi = gsi_for_stmt (g);
8176 }
8177 else
8178 gsi = si;
8179 if (condtmp)
8180 {
8181 condtemp = OMP_CLAUSE_DECL (condtmp);
8182 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8183 OMP_CLAUSE__CONDTEMP_);
8184 cond_var = OMP_CLAUSE_DECL (c);
8185 tree type = TREE_TYPE (condtemp);
8186 memv = create_tmp_var (type);
8187 TREE_ADDRESSABLE (memv) = 1;
8188 unsigned cnt = 0;
8189 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8190 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8191 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8192 ++cnt;
8193 unsigned HOST_WIDE_INT sz
8194 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8195 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8196 false);
8197 mem = build_fold_addr_expr (memv);
8198 }
8199 t = build_int_cst (unsigned_type_node, len - 1);
8200 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8201 stmt = gimple_build_call (u, 3, t, reductions, mem);
8202 gimple_call_set_lhs (stmt, vin);
8203 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8204 if (condtmp)
8205 {
8206 expand_omp_build_assign (&gsi, condtemp, memv, false);
8207 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8208 vin, build_one_cst (TREE_TYPE (cond_var)));
8209 expand_omp_build_assign (&gsi, cond_var, t, false);
8210 }
8211 if (reductmp)
8212 {
8213 gsi_remove (&gsi, true);
8214 release_ssa_name (gimple_assign_lhs (g));
8215 }
8216 }
8217 else if (!is_combined_parallel (region))
8218 {
8219 /* If we are not inside a combined parallel+sections region,
8220 call GOMP_sections_start. */
8221 t = build_int_cst (unsigned_type_node, len - 1);
8222 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8223 stmt = gimple_build_call (u, 1, t);
8224 }
8225 else
8226 {
8227 /* Otherwise, call GOMP_sections_next. */
8228 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8229 stmt = gimple_build_call (u, 0);
8230 }
8231 if (!reductmp && !condtmp)
8232 {
8233 gimple_call_set_lhs (stmt, vin);
8234 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8235 }
8236 gsi_remove (&si, true);
8237
8238 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8239 L0_BB. */
8240 switch_si = gsi_last_nondebug_bb (l0_bb);
8241 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8242 if (exit_reachable)
8243 {
8244 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8245 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8246 vmain = gimple_omp_continue_control_use (cont);
8247 vnext = gimple_omp_continue_control_def (cont);
8248 }
8249 else
8250 {
8251 vmain = vin;
8252 vnext = NULL_TREE;
8253 }
8254
8255 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8256 label_vec.quick_push (t);
8257 i = 1;
8258
8259 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8260 for (inner = region->inner, casei = 1;
8261 inner;
8262 inner = inner->next, i++, casei++)
8263 {
8264 basic_block s_entry_bb, s_exit_bb;
8265
8266 /* Skip optional reduction region. */
8267 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8268 {
8269 --i;
8270 --casei;
8271 continue;
8272 }
8273
8274 s_entry_bb = inner->entry;
8275 s_exit_bb = inner->exit;
8276
8277 t = gimple_block_label (s_entry_bb);
8278 u = build_int_cst (unsigned_type_node, casei);
8279 u = build_case_label (u, NULL, t);
8280 label_vec.quick_push (u);
8281
8282 si = gsi_last_nondebug_bb (s_entry_bb);
8283 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8284 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8285 gsi_remove (&si, true);
8286 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8287
8288 if (s_exit_bb == NULL)
8289 continue;
8290
8291 si = gsi_last_nondebug_bb (s_exit_bb);
8292 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8293 gsi_remove (&si, true);
8294
8295 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8296 }
8297
8298 /* Error handling code goes in DEFAULT_BB. */
8299 t = gimple_block_label (default_bb);
8300 u = build_case_label (NULL, NULL, t);
8301 make_edge (l0_bb, default_bb, 0);
8302 add_bb_to_loop (default_bb, current_loops->tree_root);
8303
8304 stmt = gimple_build_switch (vmain, u, label_vec);
8305 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8306 gsi_remove (&switch_si, true);
8307
8308 si = gsi_start_bb (default_bb);
8309 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8310 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8311
8312 if (exit_reachable)
8313 {
8314 tree bfn_decl;
8315
8316 /* Code to get the next section goes in L1_BB. */
8317 si = gsi_last_nondebug_bb (l1_bb);
8318 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8319
8320 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8321 stmt = gimple_build_call (bfn_decl, 0);
8322 gimple_call_set_lhs (stmt, vnext);
8323 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8324 if (cond_var)
8325 {
8326 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8327 vnext, build_one_cst (TREE_TYPE (cond_var)));
8328 expand_omp_build_assign (&si, cond_var, t, false);
8329 }
8330 gsi_remove (&si, true);
8331
8332 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8333 }
8334
8335 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8336 si = gsi_last_nondebug_bb (l2_bb);
8337 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8338 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8339 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8340 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8341 else
8342 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8343 stmt = gimple_build_call (t, 0);
8344 if (gimple_omp_return_lhs (gsi_stmt (si)))
8345 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8346 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8347 gsi_remove (&si, true);
8348
8349 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8350 }
8351
8352 /* Expand code for an OpenMP single directive. We've already expanded
8353 much of the code, here we simply place the GOMP_barrier call. */
8354
8355 static void
8356 expand_omp_single (struct omp_region *region)
8357 {
8358 basic_block entry_bb, exit_bb;
8359 gimple_stmt_iterator si;
8360
8361 entry_bb = region->entry;
8362 exit_bb = region->exit;
8363
8364 si = gsi_last_nondebug_bb (entry_bb);
8365 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8366 gsi_remove (&si, true);
8367 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8368
8369 si = gsi_last_nondebug_bb (exit_bb);
8370 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8371 {
8372 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8373 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8374 }
8375 gsi_remove (&si, true);
8376 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8377 }
8378
8379 /* Generic expansion for OpenMP synchronization directives: master,
8380 ordered and critical. All we need to do here is remove the entry
8381 and exit markers for REGION. */
8382
8383 static void
8384 expand_omp_synch (struct omp_region *region)
8385 {
8386 basic_block entry_bb, exit_bb;
8387 gimple_stmt_iterator si;
8388
8389 entry_bb = region->entry;
8390 exit_bb = region->exit;
8391
8392 si = gsi_last_nondebug_bb (entry_bb);
8393 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8394 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8395 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8396 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8397 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8398 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8399 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8400 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8401 {
8402 expand_omp_taskreg (region);
8403 return;
8404 }
8405 gsi_remove (&si, true);
8406 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8407
8408 if (exit_bb)
8409 {
8410 si = gsi_last_nondebug_bb (exit_bb);
8411 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8412 gsi_remove (&si, true);
8413 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8414 }
8415 }
8416
8417 /* Translate enum omp_memory_order to enum memmodel. The two enums
8418 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8419 is 0. */
8420
8421 static enum memmodel
8422 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8423 {
8424 switch (mo)
8425 {
8426 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8427 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8428 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8429 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8430 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8431 default: gcc_unreachable ();
8432 }
8433 }
8434
8435 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8436 operation as a normal volatile load. */
8437
8438 static bool
8439 expand_omp_atomic_load (basic_block load_bb, tree addr,
8440 tree loaded_val, int index)
8441 {
8442 enum built_in_function tmpbase;
8443 gimple_stmt_iterator gsi;
8444 basic_block store_bb;
8445 location_t loc;
8446 gimple *stmt;
8447 tree decl, call, type, itype;
8448
8449 gsi = gsi_last_nondebug_bb (load_bb);
8450 stmt = gsi_stmt (gsi);
8451 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8452 loc = gimple_location (stmt);
8453
8454 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8455 is smaller than word size, then expand_atomic_load assumes that the load
8456 is atomic. We could avoid the builtin entirely in this case. */
8457
8458 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8459 decl = builtin_decl_explicit (tmpbase);
8460 if (decl == NULL_TREE)
8461 return false;
8462
8463 type = TREE_TYPE (loaded_val);
8464 itype = TREE_TYPE (TREE_TYPE (decl));
8465
8466 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8467 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8468 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8469 if (!useless_type_conversion_p (type, itype))
8470 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8471 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8472
8473 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8474 gsi_remove (&gsi, true);
8475
8476 store_bb = single_succ (load_bb);
8477 gsi = gsi_last_nondebug_bb (store_bb);
8478 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8479 gsi_remove (&gsi, true);
8480
8481 if (gimple_in_ssa_p (cfun))
8482 update_ssa (TODO_update_ssa_no_phi);
8483
8484 return true;
8485 }
8486
8487 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8488 operation as a normal volatile store. */
8489
8490 static bool
8491 expand_omp_atomic_store (basic_block load_bb, tree addr,
8492 tree loaded_val, tree stored_val, int index)
8493 {
8494 enum built_in_function tmpbase;
8495 gimple_stmt_iterator gsi;
8496 basic_block store_bb = single_succ (load_bb);
8497 location_t loc;
8498 gimple *stmt;
8499 tree decl, call, type, itype;
8500 machine_mode imode;
8501 bool exchange;
8502
8503 gsi = gsi_last_nondebug_bb (load_bb);
8504 stmt = gsi_stmt (gsi);
8505 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8506
8507 /* If the load value is needed, then this isn't a store but an exchange. */
8508 exchange = gimple_omp_atomic_need_value_p (stmt);
8509
8510 gsi = gsi_last_nondebug_bb (store_bb);
8511 stmt = gsi_stmt (gsi);
8512 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8513 loc = gimple_location (stmt);
8514
8515 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8516 is smaller than word size, then expand_atomic_store assumes that the store
8517 is atomic. We could avoid the builtin entirely in this case. */
8518
8519 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8520 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8521 decl = builtin_decl_explicit (tmpbase);
8522 if (decl == NULL_TREE)
8523 return false;
8524
8525 type = TREE_TYPE (stored_val);
8526
8527 /* Dig out the type of the function's second argument. */
8528 itype = TREE_TYPE (decl);
8529 itype = TYPE_ARG_TYPES (itype);
8530 itype = TREE_CHAIN (itype);
8531 itype = TREE_VALUE (itype);
8532 imode = TYPE_MODE (itype);
8533
8534 if (exchange && !can_atomic_exchange_p (imode, true))
8535 return false;
8536
8537 if (!useless_type_conversion_p (itype, type))
8538 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8539 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8540 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8541 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8542 if (exchange)
8543 {
8544 if (!useless_type_conversion_p (type, itype))
8545 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8546 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8547 }
8548
8549 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8550 gsi_remove (&gsi, true);
8551
8552 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8553 gsi = gsi_last_nondebug_bb (load_bb);
8554 gsi_remove (&gsi, true);
8555
8556 if (gimple_in_ssa_p (cfun))
8557 update_ssa (TODO_update_ssa_no_phi);
8558
8559 return true;
8560 }
8561
8562 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8563 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8564 size of the data type, and thus usable to find the index of the builtin
8565 decl. Returns false if the expression is not of the proper form. */
8566
8567 static bool
8568 expand_omp_atomic_fetch_op (basic_block load_bb,
8569 tree addr, tree loaded_val,
8570 tree stored_val, int index)
8571 {
8572 enum built_in_function oldbase, newbase, tmpbase;
8573 tree decl, itype, call;
8574 tree lhs, rhs;
8575 basic_block store_bb = single_succ (load_bb);
8576 gimple_stmt_iterator gsi;
8577 gimple *stmt;
8578 location_t loc;
8579 enum tree_code code;
8580 bool need_old, need_new;
8581 machine_mode imode;
8582
8583 /* We expect to find the following sequences:
8584
8585 load_bb:
8586 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8587
8588 store_bb:
8589 val = tmp OP something; (or: something OP tmp)
8590 GIMPLE_OMP_STORE (val)
8591
8592 ???FIXME: Allow a more flexible sequence.
8593 Perhaps use data flow to pick the statements.
8594
8595 */
8596
8597 gsi = gsi_after_labels (store_bb);
8598 stmt = gsi_stmt (gsi);
8599 if (is_gimple_debug (stmt))
8600 {
8601 gsi_next_nondebug (&gsi);
8602 if (gsi_end_p (gsi))
8603 return false;
8604 stmt = gsi_stmt (gsi);
8605 }
8606 loc = gimple_location (stmt);
8607 if (!is_gimple_assign (stmt))
8608 return false;
8609 gsi_next_nondebug (&gsi);
8610 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8611 return false;
8612 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8613 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8614 enum omp_memory_order omo
8615 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8616 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8617 gcc_checking_assert (!need_old || !need_new);
8618
8619 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8620 return false;
8621
8622 /* Check for one of the supported fetch-op operations. */
8623 code = gimple_assign_rhs_code (stmt);
8624 switch (code)
8625 {
8626 case PLUS_EXPR:
8627 case POINTER_PLUS_EXPR:
8628 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8629 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8630 break;
8631 case MINUS_EXPR:
8632 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8633 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8634 break;
8635 case BIT_AND_EXPR:
8636 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8637 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8638 break;
8639 case BIT_IOR_EXPR:
8640 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8641 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8642 break;
8643 case BIT_XOR_EXPR:
8644 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8645 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8646 break;
8647 default:
8648 return false;
8649 }
8650
8651 /* Make sure the expression is of the proper form. */
8652 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8653 rhs = gimple_assign_rhs2 (stmt);
8654 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8655 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8656 rhs = gimple_assign_rhs1 (stmt);
8657 else
8658 return false;
8659
8660 tmpbase = ((enum built_in_function)
8661 ((need_new ? newbase : oldbase) + index + 1));
8662 decl = builtin_decl_explicit (tmpbase);
8663 if (decl == NULL_TREE)
8664 return false;
8665 itype = TREE_TYPE (TREE_TYPE (decl));
8666 imode = TYPE_MODE (itype);
8667
8668 /* We could test all of the various optabs involved, but the fact of the
8669 matter is that (with the exception of i486 vs i586 and xadd) all targets
8670 that support any atomic operaton optab also implements compare-and-swap.
8671 Let optabs.c take care of expanding any compare-and-swap loop. */
8672 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8673 return false;
8674
8675 gsi = gsi_last_nondebug_bb (load_bb);
8676 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8677
8678 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8679 It only requires that the operation happen atomically. Thus we can
8680 use the RELAXED memory model. */
8681 call = build_call_expr_loc (loc, decl, 3, addr,
8682 fold_convert_loc (loc, itype, rhs),
8683 build_int_cst (NULL, mo));
8684
8685 if (need_old || need_new)
8686 {
8687 lhs = need_old ? loaded_val : stored_val;
8688 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8689 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8690 }
8691 else
8692 call = fold_convert_loc (loc, void_type_node, call);
8693 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8694 gsi_remove (&gsi, true);
8695
8696 gsi = gsi_last_nondebug_bb (store_bb);
8697 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8698 gsi_remove (&gsi, true);
8699 gsi = gsi_last_nondebug_bb (store_bb);
8700 stmt = gsi_stmt (gsi);
8701 gsi_remove (&gsi, true);
8702
8703 if (gimple_in_ssa_p (cfun))
8704 {
8705 release_defs (stmt);
8706 update_ssa (TODO_update_ssa_no_phi);
8707 }
8708
8709 return true;
8710 }
8711
8712 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8713
8714 oldval = *addr;
8715 repeat:
8716 newval = rhs; // with oldval replacing *addr in rhs
8717 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8718 if (oldval != newval)
8719 goto repeat;
8720
8721 INDEX is log2 of the size of the data type, and thus usable to find the
8722 index of the builtin decl. */
8723
8724 static bool
8725 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8726 tree addr, tree loaded_val, tree stored_val,
8727 int index)
8728 {
8729 tree loadedi, storedi, initial, new_storedi, old_vali;
8730 tree type, itype, cmpxchg, iaddr, atype;
8731 gimple_stmt_iterator si;
8732 basic_block loop_header = single_succ (load_bb);
8733 gimple *phi, *stmt;
8734 edge e;
8735 enum built_in_function fncode;
8736
8737 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8738 order to use the RELAXED memory model effectively. */
8739 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8740 + index + 1);
8741 cmpxchg = builtin_decl_explicit (fncode);
8742 if (cmpxchg == NULL_TREE)
8743 return false;
8744 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8745 atype = type;
8746 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8747
8748 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8749 || !can_atomic_load_p (TYPE_MODE (itype)))
8750 return false;
8751
8752 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8753 si = gsi_last_nondebug_bb (load_bb);
8754 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8755
8756 /* For floating-point values, we'll need to view-convert them to integers
8757 so that we can perform the atomic compare and swap. Simplify the
8758 following code by always setting up the "i"ntegral variables. */
8759 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8760 {
8761 tree iaddr_val;
8762
8763 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8764 true));
8765 atype = itype;
8766 iaddr_val
8767 = force_gimple_operand_gsi (&si,
8768 fold_convert (TREE_TYPE (iaddr), addr),
8769 false, NULL_TREE, true, GSI_SAME_STMT);
8770 stmt = gimple_build_assign (iaddr, iaddr_val);
8771 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8772 loadedi = create_tmp_var (itype);
8773 if (gimple_in_ssa_p (cfun))
8774 loadedi = make_ssa_name (loadedi);
8775 }
8776 else
8777 {
8778 iaddr = addr;
8779 loadedi = loaded_val;
8780 }
8781
8782 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8783 tree loaddecl = builtin_decl_explicit (fncode);
8784 if (loaddecl)
8785 initial
8786 = fold_convert (atype,
8787 build_call_expr (loaddecl, 2, iaddr,
8788 build_int_cst (NULL_TREE,
8789 MEMMODEL_RELAXED)));
8790 else
8791 {
8792 tree off
8793 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8794 true), 0);
8795 initial = build2 (MEM_REF, atype, iaddr, off);
8796 }
8797
8798 initial
8799 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8800 GSI_SAME_STMT);
8801
8802 /* Move the value to the LOADEDI temporary. */
8803 if (gimple_in_ssa_p (cfun))
8804 {
8805 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8806 phi = create_phi_node (loadedi, loop_header);
8807 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8808 initial);
8809 }
8810 else
8811 gsi_insert_before (&si,
8812 gimple_build_assign (loadedi, initial),
8813 GSI_SAME_STMT);
8814 if (loadedi != loaded_val)
8815 {
8816 gimple_stmt_iterator gsi2;
8817 tree x;
8818
8819 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8820 gsi2 = gsi_start_bb (loop_header);
8821 if (gimple_in_ssa_p (cfun))
8822 {
8823 gassign *stmt;
8824 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8825 true, GSI_SAME_STMT);
8826 stmt = gimple_build_assign (loaded_val, x);
8827 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8828 }
8829 else
8830 {
8831 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8832 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8833 true, GSI_SAME_STMT);
8834 }
8835 }
8836 gsi_remove (&si, true);
8837
8838 si = gsi_last_nondebug_bb (store_bb);
8839 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8840
8841 if (iaddr == addr)
8842 storedi = stored_val;
8843 else
8844 storedi
8845 = force_gimple_operand_gsi (&si,
8846 build1 (VIEW_CONVERT_EXPR, itype,
8847 stored_val), true, NULL_TREE, true,
8848 GSI_SAME_STMT);
8849
8850 /* Build the compare&swap statement. */
8851 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8852 new_storedi = force_gimple_operand_gsi (&si,
8853 fold_convert (TREE_TYPE (loadedi),
8854 new_storedi),
8855 true, NULL_TREE,
8856 true, GSI_SAME_STMT);
8857
8858 if (gimple_in_ssa_p (cfun))
8859 old_vali = loadedi;
8860 else
8861 {
8862 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8863 stmt = gimple_build_assign (old_vali, loadedi);
8864 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8865
8866 stmt = gimple_build_assign (loadedi, new_storedi);
8867 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8868 }
8869
8870 /* Note that we always perform the comparison as an integer, even for
8871 floating point. This allows the atomic operation to properly
8872 succeed even with NaNs and -0.0. */
8873 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8874 stmt = gimple_build_cond_empty (ne);
8875 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8876
8877 /* Update cfg. */
8878 e = single_succ_edge (store_bb);
8879 e->flags &= ~EDGE_FALLTHRU;
8880 e->flags |= EDGE_FALSE_VALUE;
8881 /* Expect no looping. */
8882 e->probability = profile_probability::guessed_always ();
8883
8884 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8885 e->probability = profile_probability::guessed_never ();
8886
8887 /* Copy the new value to loadedi (we already did that before the condition
8888 if we are not in SSA). */
8889 if (gimple_in_ssa_p (cfun))
8890 {
8891 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8892 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8893 }
8894
8895 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8896 gsi_remove (&si, true);
8897
8898 class loop *loop = alloc_loop ();
8899 loop->header = loop_header;
8900 loop->latch = store_bb;
8901 add_loop (loop, loop_header->loop_father);
8902
8903 if (gimple_in_ssa_p (cfun))
8904 update_ssa (TODO_update_ssa_no_phi);
8905
8906 return true;
8907 }
8908
8909 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8910
8911 GOMP_atomic_start ();
8912 *addr = rhs;
8913 GOMP_atomic_end ();
8914
8915 The result is not globally atomic, but works so long as all parallel
8916 references are within #pragma omp atomic directives. According to
8917 responses received from omp@openmp.org, appears to be within spec.
8918 Which makes sense, since that's how several other compilers handle
8919 this situation as well.
8920 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8921 expanding. STORED_VAL is the operand of the matching
8922 GIMPLE_OMP_ATOMIC_STORE.
8923
8924 We replace
8925 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8926 loaded_val = *addr;
8927
8928 and replace
8929 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8930 *addr = stored_val;
8931 */
8932
8933 static bool
8934 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8935 tree addr, tree loaded_val, tree stored_val)
8936 {
8937 gimple_stmt_iterator si;
8938 gassign *stmt;
8939 tree t;
8940
8941 si = gsi_last_nondebug_bb (load_bb);
8942 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8943
8944 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8945 t = build_call_expr (t, 0);
8946 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8947
8948 tree mem = build_simple_mem_ref (addr);
8949 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
8950 TREE_OPERAND (mem, 1)
8951 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
8952 true),
8953 TREE_OPERAND (mem, 1));
8954 stmt = gimple_build_assign (loaded_val, mem);
8955 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8956 gsi_remove (&si, true);
8957
8958 si = gsi_last_nondebug_bb (store_bb);
8959 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8960
8961 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
8962 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8963
8964 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
8965 t = build_call_expr (t, 0);
8966 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8967 gsi_remove (&si, true);
8968
8969 if (gimple_in_ssa_p (cfun))
8970 update_ssa (TODO_update_ssa_no_phi);
8971 return true;
8972 }
8973
8974 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
8975 using expand_omp_atomic_fetch_op. If it failed, we try to
8976 call expand_omp_atomic_pipeline, and if it fails too, the
8977 ultimate fallback is wrapping the operation in a mutex
8978 (expand_omp_atomic_mutex). REGION is the atomic region built
8979 by build_omp_regions_1(). */
8980
8981 static void
8982 expand_omp_atomic (struct omp_region *region)
8983 {
8984 basic_block load_bb = region->entry, store_bb = region->exit;
8985 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
8986 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
8987 tree loaded_val = gimple_omp_atomic_load_lhs (load);
8988 tree addr = gimple_omp_atomic_load_rhs (load);
8989 tree stored_val = gimple_omp_atomic_store_val (store);
8990 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8991 HOST_WIDE_INT index;
8992
8993 /* Make sure the type is one of the supported sizes. */
8994 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
8995 index = exact_log2 (index);
8996 if (index >= 0 && index <= 4)
8997 {
8998 unsigned int align = TYPE_ALIGN_UNIT (type);
8999
9000 /* __sync builtins require strict data alignment. */
9001 if (exact_log2 (align) >= index)
9002 {
9003 /* Atomic load. */
9004 scalar_mode smode;
9005 if (loaded_val == stored_val
9006 && (is_int_mode (TYPE_MODE (type), &smode)
9007 || is_float_mode (TYPE_MODE (type), &smode))
9008 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9009 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9010 return;
9011
9012 /* Atomic store. */
9013 if ((is_int_mode (TYPE_MODE (type), &smode)
9014 || is_float_mode (TYPE_MODE (type), &smode))
9015 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9016 && store_bb == single_succ (load_bb)
9017 && first_stmt (store_bb) == store
9018 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9019 stored_val, index))
9020 return;
9021
9022 /* When possible, use specialized atomic update functions. */
9023 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9024 && store_bb == single_succ (load_bb)
9025 && expand_omp_atomic_fetch_op (load_bb, addr,
9026 loaded_val, stored_val, index))
9027 return;
9028
9029 /* If we don't have specialized __sync builtins, try and implement
9030 as a compare and swap loop. */
9031 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9032 loaded_val, stored_val, index))
9033 return;
9034 }
9035 }
9036
9037 /* The ultimate fallback is wrapping the operation in a mutex. */
9038 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9039 }
9040
9041 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9042 at REGION_EXIT. */
9043
9044 static void
9045 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9046 basic_block region_exit)
9047 {
9048 class loop *outer = region_entry->loop_father;
9049 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9050
9051 /* Don't parallelize the kernels region if it contains more than one outer
9052 loop. */
9053 unsigned int nr_outer_loops = 0;
9054 class loop *single_outer = NULL;
9055 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9056 {
9057 gcc_assert (loop_outer (loop) == outer);
9058
9059 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9060 continue;
9061
9062 if (region_exit != NULL
9063 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9064 continue;
9065
9066 nr_outer_loops++;
9067 single_outer = loop;
9068 }
9069 if (nr_outer_loops != 1)
9070 return;
9071
9072 for (class loop *loop = single_outer->inner;
9073 loop != NULL;
9074 loop = loop->inner)
9075 if (loop->next)
9076 return;
9077
9078 /* Mark the loops in the region. */
9079 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9080 loop->in_oacc_kernels_region = true;
9081 }
9082
9083 /* Build target argument identifier from the DEVICE identifier, value
9084 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9085
9086 static tree
9087 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9088 {
9089 tree t = build_int_cst (integer_type_node, device);
9090 if (subseqent_param)
9091 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9092 build_int_cst (integer_type_node,
9093 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9094 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9095 build_int_cst (integer_type_node, id));
9096 return t;
9097 }
9098
9099 /* Like above but return it in type that can be directly stored as an element
9100 of the argument array. */
9101
9102 static tree
9103 get_target_argument_identifier (int device, bool subseqent_param, int id)
9104 {
9105 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9106 return fold_convert (ptr_type_node, t);
9107 }
9108
9109 /* Return a target argument consisting of DEVICE identifier, value identifier
9110 ID, and the actual VALUE. */
9111
9112 static tree
9113 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9114 tree value)
9115 {
9116 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9117 fold_convert (integer_type_node, value),
9118 build_int_cst (unsigned_type_node,
9119 GOMP_TARGET_ARG_VALUE_SHIFT));
9120 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9121 get_target_argument_identifier_1 (device, false, id));
9122 t = fold_convert (ptr_type_node, t);
9123 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9124 }
9125
9126 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9127 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9128 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9129 arguments. */
9130
9131 static void
9132 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9133 int id, tree value, vec <tree> *args)
9134 {
9135 if (tree_fits_shwi_p (value)
9136 && tree_to_shwi (value) > -(1 << 15)
9137 && tree_to_shwi (value) < (1 << 15))
9138 args->quick_push (get_target_argument_value (gsi, device, id, value));
9139 else
9140 {
9141 args->quick_push (get_target_argument_identifier (device, true, id));
9142 value = fold_convert (ptr_type_node, value);
9143 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9144 GSI_SAME_STMT);
9145 args->quick_push (value);
9146 }
9147 }
9148
9149 /* Create an array of arguments that is then passed to GOMP_target. */
9150
9151 static tree
9152 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9153 {
9154 auto_vec <tree, 6> args;
9155 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9156 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9157 if (c)
9158 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9159 else
9160 t = integer_minus_one_node;
9161 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9162 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9163
9164 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9165 if (c)
9166 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9167 else
9168 t = integer_minus_one_node;
9169 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9170 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9171 &args);
9172
9173 /* Produce more, perhaps device specific, arguments here. */
9174
9175 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9176 args.length () + 1),
9177 ".omp_target_args");
9178 for (unsigned i = 0; i < args.length (); i++)
9179 {
9180 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9181 build_int_cst (integer_type_node, i),
9182 NULL_TREE, NULL_TREE);
9183 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9184 GSI_SAME_STMT);
9185 }
9186 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9187 build_int_cst (integer_type_node, args.length ()),
9188 NULL_TREE, NULL_TREE);
9189 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9190 GSI_SAME_STMT);
9191 TREE_ADDRESSABLE (argarray) = 1;
9192 return build_fold_addr_expr (argarray);
9193 }
9194
9195 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9196
9197 static void
9198 expand_omp_target (struct omp_region *region)
9199 {
9200 basic_block entry_bb, exit_bb, new_bb;
9201 struct function *child_cfun;
9202 tree child_fn, block, t;
9203 gimple_stmt_iterator gsi;
9204 gomp_target *entry_stmt;
9205 gimple *stmt;
9206 edge e;
9207 bool offloaded, data_region;
9208 int target_kind;
9209
9210 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9211 target_kind = gimple_omp_target_kind (entry_stmt);
9212 new_bb = region->entry;
9213
9214 offloaded = is_gimple_omp_offloaded (entry_stmt);
9215 switch (target_kind)
9216 {
9217 case GF_OMP_TARGET_KIND_REGION:
9218 case GF_OMP_TARGET_KIND_UPDATE:
9219 case GF_OMP_TARGET_KIND_ENTER_DATA:
9220 case GF_OMP_TARGET_KIND_EXIT_DATA:
9221 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9222 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9223 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9224 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9225 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9226 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9227 data_region = false;
9228 break;
9229 case GF_OMP_TARGET_KIND_DATA:
9230 case GF_OMP_TARGET_KIND_OACC_DATA:
9231 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9232 data_region = true;
9233 break;
9234 default:
9235 gcc_unreachable ();
9236 }
9237
9238 child_fn = NULL_TREE;
9239 child_cfun = NULL;
9240 if (offloaded)
9241 {
9242 child_fn = gimple_omp_target_child_fn (entry_stmt);
9243 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9244 }
9245
9246 /* Supported by expand_omp_taskreg, but not here. */
9247 if (child_cfun != NULL)
9248 gcc_checking_assert (!child_cfun->cfg);
9249 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9250
9251 entry_bb = region->entry;
9252 exit_bb = region->exit;
9253
9254 switch (target_kind)
9255 {
9256 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9257 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9258
9259 /* Further down, all OpenACC compute constructs will be mapped to
9260 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9261 is an "oacc kernels" attribute set for OpenACC kernels. */
9262 DECL_ATTRIBUTES (child_fn)
9263 = tree_cons (get_identifier ("oacc kernels"),
9264 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9265 break;
9266 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9267 /* Further down, all OpenACC compute constructs will be mapped to
9268 BUILT_IN_GOACC_PARALLEL, and to distinguish between them, there
9269 is an "oacc serial" attribute set for OpenACC serial. */
9270 DECL_ATTRIBUTES (child_fn)
9271 = tree_cons (get_identifier ("oacc serial"),
9272 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9273 break;
9274 default:
9275 break;
9276 }
9277
9278 if (offloaded)
9279 {
9280 unsigned srcidx, dstidx, num;
9281
9282 /* If the offloading region needs data sent from the parent
9283 function, then the very first statement (except possible
9284 tree profile counter updates) of the offloading body
9285 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9286 &.OMP_DATA_O is passed as an argument to the child function,
9287 we need to replace it with the argument as seen by the child
9288 function.
9289
9290 In most cases, this will end up being the identity assignment
9291 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9292 a function call that has been inlined, the original PARM_DECL
9293 .OMP_DATA_I may have been converted into a different local
9294 variable. In which case, we need to keep the assignment. */
9295 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9296 if (data_arg)
9297 {
9298 basic_block entry_succ_bb = single_succ (entry_bb);
9299 gimple_stmt_iterator gsi;
9300 tree arg;
9301 gimple *tgtcopy_stmt = NULL;
9302 tree sender = TREE_VEC_ELT (data_arg, 0);
9303
9304 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9305 {
9306 gcc_assert (!gsi_end_p (gsi));
9307 stmt = gsi_stmt (gsi);
9308 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9309 continue;
9310
9311 if (gimple_num_ops (stmt) == 2)
9312 {
9313 tree arg = gimple_assign_rhs1 (stmt);
9314
9315 /* We're ignoring the subcode because we're
9316 effectively doing a STRIP_NOPS. */
9317
9318 if (TREE_CODE (arg) == ADDR_EXPR
9319 && TREE_OPERAND (arg, 0) == sender)
9320 {
9321 tgtcopy_stmt = stmt;
9322 break;
9323 }
9324 }
9325 }
9326
9327 gcc_assert (tgtcopy_stmt != NULL);
9328 arg = DECL_ARGUMENTS (child_fn);
9329
9330 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9331 gsi_remove (&gsi, true);
9332 }
9333
9334 /* Declare local variables needed in CHILD_CFUN. */
9335 block = DECL_INITIAL (child_fn);
9336 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9337 /* The gimplifier could record temporaries in the offloading block
9338 rather than in containing function's local_decls chain,
9339 which would mean cgraph missed finalizing them. Do it now. */
9340 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9341 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9342 varpool_node::finalize_decl (t);
9343 DECL_SAVED_TREE (child_fn) = NULL;
9344 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9345 gimple_set_body (child_fn, NULL);
9346 TREE_USED (block) = 1;
9347
9348 /* Reset DECL_CONTEXT on function arguments. */
9349 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9350 DECL_CONTEXT (t) = child_fn;
9351
9352 /* Split ENTRY_BB at GIMPLE_*,
9353 so that it can be moved to the child function. */
9354 gsi = gsi_last_nondebug_bb (entry_bb);
9355 stmt = gsi_stmt (gsi);
9356 gcc_assert (stmt
9357 && gimple_code (stmt) == gimple_code (entry_stmt));
9358 e = split_block (entry_bb, stmt);
9359 gsi_remove (&gsi, true);
9360 entry_bb = e->dest;
9361 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9362
9363 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9364 if (exit_bb)
9365 {
9366 gsi = gsi_last_nondebug_bb (exit_bb);
9367 gcc_assert (!gsi_end_p (gsi)
9368 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9369 stmt = gimple_build_return (NULL);
9370 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9371 gsi_remove (&gsi, true);
9372 }
9373
9374 /* Move the offloading region into CHILD_CFUN. */
9375
9376 block = gimple_block (entry_stmt);
9377
9378 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9379 if (exit_bb)
9380 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9381 /* When the OMP expansion process cannot guarantee an up-to-date
9382 loop tree arrange for the child function to fixup loops. */
9383 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9384 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9385
9386 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9387 num = vec_safe_length (child_cfun->local_decls);
9388 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9389 {
9390 t = (*child_cfun->local_decls)[srcidx];
9391 if (DECL_CONTEXT (t) == cfun->decl)
9392 continue;
9393 if (srcidx != dstidx)
9394 (*child_cfun->local_decls)[dstidx] = t;
9395 dstidx++;
9396 }
9397 if (dstidx != num)
9398 vec_safe_truncate (child_cfun->local_decls, dstidx);
9399
9400 /* Inform the callgraph about the new function. */
9401 child_cfun->curr_properties = cfun->curr_properties;
9402 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9403 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9404 cgraph_node *node = cgraph_node::get_create (child_fn);
9405 node->parallelized_function = 1;
9406 cgraph_node::add_new_function (child_fn, true);
9407
9408 /* Add the new function to the offload table. */
9409 if (ENABLE_OFFLOADING)
9410 {
9411 if (in_lto_p)
9412 DECL_PRESERVE_P (child_fn) = 1;
9413 vec_safe_push (offload_funcs, child_fn);
9414 }
9415
9416 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9417 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9418
9419 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9420 fixed in a following pass. */
9421 push_cfun (child_cfun);
9422 if (need_asm)
9423 assign_assembler_name_if_needed (child_fn);
9424 cgraph_edge::rebuild_edges ();
9425
9426 /* Some EH regions might become dead, see PR34608. If
9427 pass_cleanup_cfg isn't the first pass to happen with the
9428 new child, these dead EH edges might cause problems.
9429 Clean them up now. */
9430 if (flag_exceptions)
9431 {
9432 basic_block bb;
9433 bool changed = false;
9434
9435 FOR_EACH_BB_FN (bb, cfun)
9436 changed |= gimple_purge_dead_eh_edges (bb);
9437 if (changed)
9438 cleanup_tree_cfg ();
9439 }
9440 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9441 verify_loop_structure ();
9442 pop_cfun ();
9443
9444 if (dump_file && !gimple_in_ssa_p (cfun))
9445 {
9446 omp_any_child_fn_dumped = true;
9447 dump_function_header (dump_file, child_fn, dump_flags);
9448 dump_function_to_file (child_fn, dump_file, dump_flags);
9449 }
9450
9451 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9452 }
9453
9454 /* Emit a library call to launch the offloading region, or do data
9455 transfers. */
9456 tree t1, t2, t3, t4, depend, c, clauses;
9457 enum built_in_function start_ix;
9458 unsigned int flags_i = 0;
9459
9460 switch (gimple_omp_target_kind (entry_stmt))
9461 {
9462 case GF_OMP_TARGET_KIND_REGION:
9463 start_ix = BUILT_IN_GOMP_TARGET;
9464 break;
9465 case GF_OMP_TARGET_KIND_DATA:
9466 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9467 break;
9468 case GF_OMP_TARGET_KIND_UPDATE:
9469 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9470 break;
9471 case GF_OMP_TARGET_KIND_ENTER_DATA:
9472 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9473 break;
9474 case GF_OMP_TARGET_KIND_EXIT_DATA:
9475 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9476 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9477 break;
9478 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9479 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9480 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9481 start_ix = BUILT_IN_GOACC_PARALLEL;
9482 break;
9483 case GF_OMP_TARGET_KIND_OACC_DATA:
9484 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9485 start_ix = BUILT_IN_GOACC_DATA_START;
9486 break;
9487 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9488 start_ix = BUILT_IN_GOACC_UPDATE;
9489 break;
9490 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9491 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9492 break;
9493 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9494 start_ix = BUILT_IN_GOACC_DECLARE;
9495 break;
9496 default:
9497 gcc_unreachable ();
9498 }
9499
9500 clauses = gimple_omp_target_clauses (entry_stmt);
9501
9502 tree device = NULL_TREE;
9503 location_t device_loc = UNKNOWN_LOCATION;
9504 tree goacc_flags = NULL_TREE;
9505 if (is_gimple_omp_oacc (entry_stmt))
9506 {
9507 /* By default, no GOACC_FLAGs are set. */
9508 goacc_flags = integer_zero_node;
9509 }
9510 else
9511 {
9512 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9513 if (c)
9514 {
9515 device = OMP_CLAUSE_DEVICE_ID (c);
9516 device_loc = OMP_CLAUSE_LOCATION (c);
9517 }
9518 else
9519 {
9520 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9521 library choose). */
9522 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9523 device_loc = gimple_location (entry_stmt);
9524 }
9525
9526 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9527 if (c)
9528 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9529 }
9530
9531 /* By default, there is no conditional. */
9532 tree cond = NULL_TREE;
9533 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9534 if (c)
9535 cond = OMP_CLAUSE_IF_EXPR (c);
9536 /* If we found the clause 'if (cond)', build:
9537 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9538 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9539 if (cond)
9540 {
9541 tree *tp;
9542 if (is_gimple_omp_oacc (entry_stmt))
9543 tp = &goacc_flags;
9544 else
9545 {
9546 /* Ensure 'device' is of the correct type. */
9547 device = fold_convert_loc (device_loc, integer_type_node, device);
9548
9549 tp = &device;
9550 }
9551
9552 cond = gimple_boolify (cond);
9553
9554 basic_block cond_bb, then_bb, else_bb;
9555 edge e;
9556 tree tmp_var;
9557
9558 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9559 if (offloaded)
9560 e = split_block_after_labels (new_bb);
9561 else
9562 {
9563 gsi = gsi_last_nondebug_bb (new_bb);
9564 gsi_prev (&gsi);
9565 e = split_block (new_bb, gsi_stmt (gsi));
9566 }
9567 cond_bb = e->src;
9568 new_bb = e->dest;
9569 remove_edge (e);
9570
9571 then_bb = create_empty_bb (cond_bb);
9572 else_bb = create_empty_bb (then_bb);
9573 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9574 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9575
9576 stmt = gimple_build_cond_empty (cond);
9577 gsi = gsi_last_bb (cond_bb);
9578 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9579
9580 gsi = gsi_start_bb (then_bb);
9581 stmt = gimple_build_assign (tmp_var, *tp);
9582 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9583
9584 gsi = gsi_start_bb (else_bb);
9585 if (is_gimple_omp_oacc (entry_stmt))
9586 stmt = gimple_build_assign (tmp_var,
9587 BIT_IOR_EXPR,
9588 *tp,
9589 build_int_cst (integer_type_node,
9590 GOACC_FLAG_HOST_FALLBACK));
9591 else
9592 stmt = gimple_build_assign (tmp_var,
9593 build_int_cst (integer_type_node,
9594 GOMP_DEVICE_HOST_FALLBACK));
9595 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9596
9597 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9598 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9599 add_bb_to_loop (then_bb, cond_bb->loop_father);
9600 add_bb_to_loop (else_bb, cond_bb->loop_father);
9601 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9602 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9603
9604 *tp = tmp_var;
9605
9606 gsi = gsi_last_nondebug_bb (new_bb);
9607 }
9608 else
9609 {
9610 gsi = gsi_last_nondebug_bb (new_bb);
9611
9612 if (device != NULL_TREE)
9613 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9614 true, GSI_SAME_STMT);
9615 }
9616
9617 t = gimple_omp_target_data_arg (entry_stmt);
9618 if (t == NULL)
9619 {
9620 t1 = size_zero_node;
9621 t2 = build_zero_cst (ptr_type_node);
9622 t3 = t2;
9623 t4 = t2;
9624 }
9625 else
9626 {
9627 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9628 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9629 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9630 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9631 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9632 }
9633
9634 gimple *g;
9635 bool tagging = false;
9636 /* The maximum number used by any start_ix, without varargs. */
9637 auto_vec<tree, 11> args;
9638 if (is_gimple_omp_oacc (entry_stmt))
9639 {
9640 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9641 TREE_TYPE (goacc_flags), goacc_flags);
9642 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9643 NULL_TREE, true,
9644 GSI_SAME_STMT);
9645 args.quick_push (goacc_flags_m);
9646 }
9647 else
9648 args.quick_push (device);
9649 if (offloaded)
9650 args.quick_push (build_fold_addr_expr (child_fn));
9651 args.quick_push (t1);
9652 args.quick_push (t2);
9653 args.quick_push (t3);
9654 args.quick_push (t4);
9655 switch (start_ix)
9656 {
9657 case BUILT_IN_GOACC_DATA_START:
9658 case BUILT_IN_GOACC_DECLARE:
9659 case BUILT_IN_GOMP_TARGET_DATA:
9660 break;
9661 case BUILT_IN_GOMP_TARGET:
9662 case BUILT_IN_GOMP_TARGET_UPDATE:
9663 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9664 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9665 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9666 if (c)
9667 depend = OMP_CLAUSE_DECL (c);
9668 else
9669 depend = build_int_cst (ptr_type_node, 0);
9670 args.quick_push (depend);
9671 if (start_ix == BUILT_IN_GOMP_TARGET)
9672 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9673 break;
9674 case BUILT_IN_GOACC_PARALLEL:
9675 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9676 {
9677 tree dims = NULL_TREE;
9678 unsigned int ix;
9679
9680 /* For serial constructs we set all dimensions to 1. */
9681 for (ix = GOMP_DIM_MAX; ix--;)
9682 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9683 oacc_replace_fn_attrib (child_fn, dims);
9684 }
9685 else
9686 oacc_set_fn_attrib (child_fn, clauses, &args);
9687 tagging = true;
9688 /* FALLTHRU */
9689 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9690 case BUILT_IN_GOACC_UPDATE:
9691 {
9692 tree t_async = NULL_TREE;
9693
9694 /* If present, use the value specified by the respective
9695 clause, making sure that is of the correct type. */
9696 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9697 if (c)
9698 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9699 integer_type_node,
9700 OMP_CLAUSE_ASYNC_EXPR (c));
9701 else if (!tagging)
9702 /* Default values for t_async. */
9703 t_async = fold_convert_loc (gimple_location (entry_stmt),
9704 integer_type_node,
9705 build_int_cst (integer_type_node,
9706 GOMP_ASYNC_SYNC));
9707 if (tagging && t_async)
9708 {
9709 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9710
9711 if (TREE_CODE (t_async) == INTEGER_CST)
9712 {
9713 /* See if we can pack the async arg in to the tag's
9714 operand. */
9715 i_async = TREE_INT_CST_LOW (t_async);
9716 if (i_async < GOMP_LAUNCH_OP_MAX)
9717 t_async = NULL_TREE;
9718 else
9719 i_async = GOMP_LAUNCH_OP_MAX;
9720 }
9721 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9722 i_async));
9723 }
9724 if (t_async)
9725 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9726 NULL_TREE, true,
9727 GSI_SAME_STMT));
9728
9729 /* Save the argument index, and ... */
9730 unsigned t_wait_idx = args.length ();
9731 unsigned num_waits = 0;
9732 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9733 if (!tagging || c)
9734 /* ... push a placeholder. */
9735 args.safe_push (integer_zero_node);
9736
9737 for (; c; c = OMP_CLAUSE_CHAIN (c))
9738 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9739 {
9740 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9741 integer_type_node,
9742 OMP_CLAUSE_WAIT_EXPR (c));
9743 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9744 GSI_SAME_STMT);
9745 args.safe_push (arg);
9746 num_waits++;
9747 }
9748
9749 if (!tagging || num_waits)
9750 {
9751 tree len;
9752
9753 /* Now that we know the number, update the placeholder. */
9754 if (tagging)
9755 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9756 else
9757 len = build_int_cst (integer_type_node, num_waits);
9758 len = fold_convert_loc (gimple_location (entry_stmt),
9759 unsigned_type_node, len);
9760 args[t_wait_idx] = len;
9761 }
9762 }
9763 break;
9764 default:
9765 gcc_unreachable ();
9766 }
9767 if (tagging)
9768 /* Push terminal marker - zero. */
9769 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9770
9771 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9772 gimple_set_location (g, gimple_location (entry_stmt));
9773 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9774 if (!offloaded)
9775 {
9776 g = gsi_stmt (gsi);
9777 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9778 gsi_remove (&gsi, true);
9779 }
9780 if (data_region && region->exit)
9781 {
9782 gsi = gsi_last_nondebug_bb (region->exit);
9783 g = gsi_stmt (gsi);
9784 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9785 gsi_remove (&gsi, true);
9786 }
9787 }
9788
9789 /* Expand the parallel region tree rooted at REGION. Expansion
9790 proceeds in depth-first order. Innermost regions are expanded
9791 first. This way, parallel regions that require a new function to
9792 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9793 internal dependencies in their body. */
9794
9795 static void
9796 expand_omp (struct omp_region *region)
9797 {
9798 omp_any_child_fn_dumped = false;
9799 while (region)
9800 {
9801 location_t saved_location;
9802 gimple *inner_stmt = NULL;
9803
9804 /* First, determine whether this is a combined parallel+workshare
9805 region. */
9806 if (region->type == GIMPLE_OMP_PARALLEL)
9807 determine_parallel_type (region);
9808
9809 if (region->type == GIMPLE_OMP_FOR
9810 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9811 inner_stmt = last_stmt (region->inner->entry);
9812
9813 if (region->inner)
9814 expand_omp (region->inner);
9815
9816 saved_location = input_location;
9817 if (gimple_has_location (last_stmt (region->entry)))
9818 input_location = gimple_location (last_stmt (region->entry));
9819
9820 switch (region->type)
9821 {
9822 case GIMPLE_OMP_PARALLEL:
9823 case GIMPLE_OMP_TASK:
9824 expand_omp_taskreg (region);
9825 break;
9826
9827 case GIMPLE_OMP_FOR:
9828 expand_omp_for (region, inner_stmt);
9829 break;
9830
9831 case GIMPLE_OMP_SECTIONS:
9832 expand_omp_sections (region);
9833 break;
9834
9835 case GIMPLE_OMP_SECTION:
9836 /* Individual omp sections are handled together with their
9837 parent GIMPLE_OMP_SECTIONS region. */
9838 break;
9839
9840 case GIMPLE_OMP_SINGLE:
9841 expand_omp_single (region);
9842 break;
9843
9844 case GIMPLE_OMP_ORDERED:
9845 {
9846 gomp_ordered *ord_stmt
9847 = as_a <gomp_ordered *> (last_stmt (region->entry));
9848 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9849 OMP_CLAUSE_DEPEND))
9850 {
9851 /* We'll expand these when expanding corresponding
9852 worksharing region with ordered(n) clause. */
9853 gcc_assert (region->outer
9854 && region->outer->type == GIMPLE_OMP_FOR);
9855 region->ord_stmt = ord_stmt;
9856 break;
9857 }
9858 }
9859 /* FALLTHRU */
9860 case GIMPLE_OMP_MASTER:
9861 case GIMPLE_OMP_TASKGROUP:
9862 case GIMPLE_OMP_CRITICAL:
9863 case GIMPLE_OMP_TEAMS:
9864 expand_omp_synch (region);
9865 break;
9866
9867 case GIMPLE_OMP_ATOMIC_LOAD:
9868 expand_omp_atomic (region);
9869 break;
9870
9871 case GIMPLE_OMP_TARGET:
9872 expand_omp_target (region);
9873 break;
9874
9875 default:
9876 gcc_unreachable ();
9877 }
9878
9879 input_location = saved_location;
9880 region = region->next;
9881 }
9882 if (omp_any_child_fn_dumped)
9883 {
9884 if (dump_file)
9885 dump_function_header (dump_file, current_function_decl, dump_flags);
9886 omp_any_child_fn_dumped = false;
9887 }
9888 }
9889
9890 /* Helper for build_omp_regions. Scan the dominator tree starting at
9891 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9892 true, the function ends once a single tree is built (otherwise, whole
9893 forest of OMP constructs may be built). */
9894
9895 static void
9896 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9897 bool single_tree)
9898 {
9899 gimple_stmt_iterator gsi;
9900 gimple *stmt;
9901 basic_block son;
9902
9903 gsi = gsi_last_nondebug_bb (bb);
9904 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9905 {
9906 struct omp_region *region;
9907 enum gimple_code code;
9908
9909 stmt = gsi_stmt (gsi);
9910 code = gimple_code (stmt);
9911 if (code == GIMPLE_OMP_RETURN)
9912 {
9913 /* STMT is the return point out of region PARENT. Mark it
9914 as the exit point and make PARENT the immediately
9915 enclosing region. */
9916 gcc_assert (parent);
9917 region = parent;
9918 region->exit = bb;
9919 parent = parent->outer;
9920 }
9921 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9922 {
9923 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9924 GIMPLE_OMP_RETURN, but matches with
9925 GIMPLE_OMP_ATOMIC_LOAD. */
9926 gcc_assert (parent);
9927 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
9928 region = parent;
9929 region->exit = bb;
9930 parent = parent->outer;
9931 }
9932 else if (code == GIMPLE_OMP_CONTINUE)
9933 {
9934 gcc_assert (parent);
9935 parent->cont = bb;
9936 }
9937 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
9938 {
9939 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
9940 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
9941 }
9942 else
9943 {
9944 region = new_omp_region (bb, code, parent);
9945 /* Otherwise... */
9946 if (code == GIMPLE_OMP_TARGET)
9947 {
9948 switch (gimple_omp_target_kind (stmt))
9949 {
9950 case GF_OMP_TARGET_KIND_REGION:
9951 case GF_OMP_TARGET_KIND_DATA:
9952 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9953 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9954 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9955 case GF_OMP_TARGET_KIND_OACC_DATA:
9956 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9957 break;
9958 case GF_OMP_TARGET_KIND_UPDATE:
9959 case GF_OMP_TARGET_KIND_ENTER_DATA:
9960 case GF_OMP_TARGET_KIND_EXIT_DATA:
9961 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9962 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9963 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9964 /* ..., other than for those stand-alone directives... */
9965 region = NULL;
9966 break;
9967 default:
9968 gcc_unreachable ();
9969 }
9970 }
9971 else if (code == GIMPLE_OMP_ORDERED
9972 && omp_find_clause (gimple_omp_ordered_clauses
9973 (as_a <gomp_ordered *> (stmt)),
9974 OMP_CLAUSE_DEPEND))
9975 /* #pragma omp ordered depend is also just a stand-alone
9976 directive. */
9977 region = NULL;
9978 else if (code == GIMPLE_OMP_TASK
9979 && gimple_omp_task_taskwait_p (stmt))
9980 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
9981 region = NULL;
9982 /* ..., this directive becomes the parent for a new region. */
9983 if (region)
9984 parent = region;
9985 }
9986 }
9987
9988 if (single_tree && !parent)
9989 return;
9990
9991 for (son = first_dom_son (CDI_DOMINATORS, bb);
9992 son;
9993 son = next_dom_son (CDI_DOMINATORS, son))
9994 build_omp_regions_1 (son, parent, single_tree);
9995 }
9996
9997 /* Builds the tree of OMP regions rooted at ROOT, storing it to
9998 root_omp_region. */
9999
10000 static void
10001 build_omp_regions_root (basic_block root)
10002 {
10003 gcc_assert (root_omp_region == NULL);
10004 build_omp_regions_1 (root, NULL, true);
10005 gcc_assert (root_omp_region != NULL);
10006 }
10007
10008 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10009
10010 void
10011 omp_expand_local (basic_block head)
10012 {
10013 build_omp_regions_root (head);
10014 if (dump_file && (dump_flags & TDF_DETAILS))
10015 {
10016 fprintf (dump_file, "\nOMP region tree\n\n");
10017 dump_omp_region (dump_file, root_omp_region, 0);
10018 fprintf (dump_file, "\n");
10019 }
10020
10021 remove_exit_barriers (root_omp_region);
10022 expand_omp (root_omp_region);
10023
10024 omp_free_regions ();
10025 }
10026
10027 /* Scan the CFG and build a tree of OMP regions. Return the root of
10028 the OMP region tree. */
10029
10030 static void
10031 build_omp_regions (void)
10032 {
10033 gcc_assert (root_omp_region == NULL);
10034 calculate_dominance_info (CDI_DOMINATORS);
10035 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10036 }
10037
10038 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10039
10040 static unsigned int
10041 execute_expand_omp (void)
10042 {
10043 build_omp_regions ();
10044
10045 if (!root_omp_region)
10046 return 0;
10047
10048 if (dump_file)
10049 {
10050 fprintf (dump_file, "\nOMP region tree\n\n");
10051 dump_omp_region (dump_file, root_omp_region, 0);
10052 fprintf (dump_file, "\n");
10053 }
10054
10055 remove_exit_barriers (root_omp_region);
10056
10057 expand_omp (root_omp_region);
10058
10059 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10060 verify_loop_structure ();
10061 cleanup_tree_cfg ();
10062
10063 omp_free_regions ();
10064
10065 return 0;
10066 }
10067
10068 /* OMP expansion -- the default pass, run before creation of SSA form. */
10069
10070 namespace {
10071
10072 const pass_data pass_data_expand_omp =
10073 {
10074 GIMPLE_PASS, /* type */
10075 "ompexp", /* name */
10076 OPTGROUP_OMP, /* optinfo_flags */
10077 TV_NONE, /* tv_id */
10078 PROP_gimple_any, /* properties_required */
10079 PROP_gimple_eomp, /* properties_provided */
10080 0, /* properties_destroyed */
10081 0, /* todo_flags_start */
10082 0, /* todo_flags_finish */
10083 };
10084
10085 class pass_expand_omp : public gimple_opt_pass
10086 {
10087 public:
10088 pass_expand_omp (gcc::context *ctxt)
10089 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10090 {}
10091
10092 /* opt_pass methods: */
10093 virtual unsigned int execute (function *)
10094 {
10095 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10096 || flag_openmp_simd != 0)
10097 && !seen_error ());
10098
10099 /* This pass always runs, to provide PROP_gimple_eomp.
10100 But often, there is nothing to do. */
10101 if (!gate)
10102 return 0;
10103
10104 return execute_expand_omp ();
10105 }
10106
10107 }; // class pass_expand_omp
10108
10109 } // anon namespace
10110
10111 gimple_opt_pass *
10112 make_pass_expand_omp (gcc::context *ctxt)
10113 {
10114 return new pass_expand_omp (ctxt);
10115 }
10116
10117 namespace {
10118
10119 const pass_data pass_data_expand_omp_ssa =
10120 {
10121 GIMPLE_PASS, /* type */
10122 "ompexpssa", /* name */
10123 OPTGROUP_OMP, /* optinfo_flags */
10124 TV_NONE, /* tv_id */
10125 PROP_cfg | PROP_ssa, /* properties_required */
10126 PROP_gimple_eomp, /* properties_provided */
10127 0, /* properties_destroyed */
10128 0, /* todo_flags_start */
10129 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10130 };
10131
10132 class pass_expand_omp_ssa : public gimple_opt_pass
10133 {
10134 public:
10135 pass_expand_omp_ssa (gcc::context *ctxt)
10136 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10137 {}
10138
10139 /* opt_pass methods: */
10140 virtual bool gate (function *fun)
10141 {
10142 return !(fun->curr_properties & PROP_gimple_eomp);
10143 }
10144 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10145 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10146
10147 }; // class pass_expand_omp_ssa
10148
10149 } // anon namespace
10150
10151 gimple_opt_pass *
10152 make_pass_expand_omp_ssa (gcc::context *ctxt)
10153 {
10154 return new pass_expand_omp_ssa (ctxt);
10155 }
10156
10157 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10158 GIMPLE_* codes. */
10159
10160 bool
10161 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10162 int *region_idx)
10163 {
10164 gimple *last = last_stmt (bb);
10165 enum gimple_code code = gimple_code (last);
10166 struct omp_region *cur_region = *region;
10167 bool fallthru = false;
10168
10169 switch (code)
10170 {
10171 case GIMPLE_OMP_PARALLEL:
10172 case GIMPLE_OMP_FOR:
10173 case GIMPLE_OMP_SINGLE:
10174 case GIMPLE_OMP_TEAMS:
10175 case GIMPLE_OMP_MASTER:
10176 case GIMPLE_OMP_TASKGROUP:
10177 case GIMPLE_OMP_CRITICAL:
10178 case GIMPLE_OMP_SECTION:
10179 cur_region = new_omp_region (bb, code, cur_region);
10180 fallthru = true;
10181 break;
10182
10183 case GIMPLE_OMP_TASK:
10184 cur_region = new_omp_region (bb, code, cur_region);
10185 fallthru = true;
10186 if (gimple_omp_task_taskwait_p (last))
10187 cur_region = cur_region->outer;
10188 break;
10189
10190 case GIMPLE_OMP_ORDERED:
10191 cur_region = new_omp_region (bb, code, cur_region);
10192 fallthru = true;
10193 if (omp_find_clause (gimple_omp_ordered_clauses
10194 (as_a <gomp_ordered *> (last)),
10195 OMP_CLAUSE_DEPEND))
10196 cur_region = cur_region->outer;
10197 break;
10198
10199 case GIMPLE_OMP_TARGET:
10200 cur_region = new_omp_region (bb, code, cur_region);
10201 fallthru = true;
10202 switch (gimple_omp_target_kind (last))
10203 {
10204 case GF_OMP_TARGET_KIND_REGION:
10205 case GF_OMP_TARGET_KIND_DATA:
10206 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10207 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10208 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10209 case GF_OMP_TARGET_KIND_OACC_DATA:
10210 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10211 break;
10212 case GF_OMP_TARGET_KIND_UPDATE:
10213 case GF_OMP_TARGET_KIND_ENTER_DATA:
10214 case GF_OMP_TARGET_KIND_EXIT_DATA:
10215 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10216 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10217 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10218 cur_region = cur_region->outer;
10219 break;
10220 default:
10221 gcc_unreachable ();
10222 }
10223 break;
10224
10225 case GIMPLE_OMP_SECTIONS:
10226 cur_region = new_omp_region (bb, code, cur_region);
10227 fallthru = true;
10228 break;
10229
10230 case GIMPLE_OMP_SECTIONS_SWITCH:
10231 fallthru = false;
10232 break;
10233
10234 case GIMPLE_OMP_ATOMIC_LOAD:
10235 case GIMPLE_OMP_ATOMIC_STORE:
10236 fallthru = true;
10237 break;
10238
10239 case GIMPLE_OMP_RETURN:
10240 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10241 somewhere other than the next block. This will be
10242 created later. */
10243 cur_region->exit = bb;
10244 if (cur_region->type == GIMPLE_OMP_TASK)
10245 /* Add an edge corresponding to not scheduling the task
10246 immediately. */
10247 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10248 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10249 cur_region = cur_region->outer;
10250 break;
10251
10252 case GIMPLE_OMP_CONTINUE:
10253 cur_region->cont = bb;
10254 switch (cur_region->type)
10255 {
10256 case GIMPLE_OMP_FOR:
10257 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10258 succs edges as abnormal to prevent splitting
10259 them. */
10260 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10261 /* Make the loopback edge. */
10262 make_edge (bb, single_succ (cur_region->entry),
10263 EDGE_ABNORMAL);
10264
10265 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10266 corresponds to the case that the body of the loop
10267 is not executed at all. */
10268 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10269 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10270 fallthru = false;
10271 break;
10272
10273 case GIMPLE_OMP_SECTIONS:
10274 /* Wire up the edges into and out of the nested sections. */
10275 {
10276 basic_block switch_bb = single_succ (cur_region->entry);
10277
10278 struct omp_region *i;
10279 for (i = cur_region->inner; i ; i = i->next)
10280 {
10281 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10282 make_edge (switch_bb, i->entry, 0);
10283 make_edge (i->exit, bb, EDGE_FALLTHRU);
10284 }
10285
10286 /* Make the loopback edge to the block with
10287 GIMPLE_OMP_SECTIONS_SWITCH. */
10288 make_edge (bb, switch_bb, 0);
10289
10290 /* Make the edge from the switch to exit. */
10291 make_edge (switch_bb, bb->next_bb, 0);
10292 fallthru = false;
10293 }
10294 break;
10295
10296 case GIMPLE_OMP_TASK:
10297 fallthru = true;
10298 break;
10299
10300 default:
10301 gcc_unreachable ();
10302 }
10303 break;
10304
10305 default:
10306 gcc_unreachable ();
10307 }
10308
10309 if (*region != cur_region)
10310 {
10311 *region = cur_region;
10312 if (cur_region)
10313 *region_idx = cur_region->entry->index;
10314 else
10315 *region_idx = 0;
10316 }
10317
10318 return fallthru;
10319 }