]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
OpenACC: Fix integer-type issue with collapse/tile [PR97880]
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2020 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "alloc-pool.h"
56 #include "symbol-summary.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "stringpool.h"
60 #include "attribs.h"
61 #include "tree-eh.h"
62
63 /* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67 struct omp_region
68 {
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* Copy of fd.lastprivate_conditional != 0. */
105 bool has_lastprivate_conditional;
106
107 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
108 a depend clause. */
109 gomp_ordered *ord_stmt;
110 };
111
112 static struct omp_region *root_omp_region;
113 static bool omp_any_child_fn_dumped;
114
115 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
116 bool = false);
117 static gphi *find_phi_with_arg_on_edge (tree, edge);
118 static void expand_omp (struct omp_region *region);
119
120 /* Return true if REGION is a combined parallel+workshare region. */
121
122 static inline bool
123 is_combined_parallel (struct omp_region *region)
124 {
125 return region->is_combined_parallel;
126 }
127
128 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
129 is the immediate dominator of PAR_ENTRY_BB, return true if there
130 are no data dependencies that would prevent expanding the parallel
131 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
132
133 When expanding a combined parallel+workshare region, the call to
134 the child function may need additional arguments in the case of
135 GIMPLE_OMP_FOR regions. In some cases, these arguments are
136 computed out of variables passed in from the parent to the child
137 via 'struct .omp_data_s'. For instance:
138
139 #pragma omp parallel for schedule (guided, i * 4)
140 for (j ...)
141
142 Is lowered into:
143
144 # BLOCK 2 (PAR_ENTRY_BB)
145 .omp_data_o.i = i;
146 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
147
148 # BLOCK 3 (WS_ENTRY_BB)
149 .omp_data_i = &.omp_data_o;
150 D.1667 = .omp_data_i->i;
151 D.1598 = D.1667 * 4;
152 #pragma omp for schedule (guided, D.1598)
153
154 When we outline the parallel region, the call to the child function
155 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
156 that value is computed *after* the call site. So, in principle we
157 cannot do the transformation.
158
159 To see whether the code in WS_ENTRY_BB blocks the combined
160 parallel+workshare call, we collect all the variables used in the
161 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
162 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
163 call.
164
165 FIXME. If we had the SSA form built at this point, we could merely
166 hoist the code in block 3 into block 2 and be done with it. But at
167 this point we don't have dataflow information and though we could
168 hack something up here, it is really not worth the aggravation. */
169
170 static bool
171 workshare_safe_to_combine_p (basic_block ws_entry_bb)
172 {
173 struct omp_for_data fd;
174 gimple *ws_stmt = last_stmt (ws_entry_bb);
175
176 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
177 return true;
178
179 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
180 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
181 return false;
182
183 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
184
185 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
186 return false;
187 if (fd.iter_type != long_integer_type_node)
188 return false;
189
190 /* FIXME. We give up too easily here. If any of these arguments
191 are not constants, they will likely involve variables that have
192 been mapped into fields of .omp_data_s for sharing with the child
193 function. With appropriate data flow, it would be possible to
194 see through this. */
195 if (!is_gimple_min_invariant (fd.loop.n1)
196 || !is_gimple_min_invariant (fd.loop.n2)
197 || !is_gimple_min_invariant (fd.loop.step)
198 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
199 return false;
200
201 return true;
202 }
203
204 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
205 presence (SIMD_SCHEDULE). */
206
207 static tree
208 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
209 {
210 if (!simd_schedule || integer_zerop (chunk_size))
211 return chunk_size;
212
213 poly_uint64 vf = omp_max_vf ();
214 if (known_eq (vf, 1U))
215 return chunk_size;
216
217 tree type = TREE_TYPE (chunk_size);
218 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
219 build_int_cst (type, vf - 1));
220 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
221 build_int_cst (type, -vf));
222 }
223
224 /* Collect additional arguments needed to emit a combined
225 parallel+workshare call. WS_STMT is the workshare directive being
226 expanded. */
227
228 static vec<tree, va_gc> *
229 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
230 {
231 tree t;
232 location_t loc = gimple_location (ws_stmt);
233 vec<tree, va_gc> *ws_args;
234
235 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
236 {
237 struct omp_for_data fd;
238 tree n1, n2;
239
240 omp_extract_for_data (for_stmt, &fd, NULL);
241 n1 = fd.loop.n1;
242 n2 = fd.loop.n2;
243
244 if (gimple_omp_for_combined_into_p (for_stmt))
245 {
246 tree innerc
247 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n1 = OMP_CLAUSE_DECL (innerc);
251 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
252 OMP_CLAUSE__LOOPTEMP_);
253 gcc_assert (innerc);
254 n2 = OMP_CLAUSE_DECL (innerc);
255 }
256
257 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
258
259 t = fold_convert_loc (loc, long_integer_type_node, n1);
260 ws_args->quick_push (t);
261
262 t = fold_convert_loc (loc, long_integer_type_node, n2);
263 ws_args->quick_push (t);
264
265 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
266 ws_args->quick_push (t);
267
268 if (fd.chunk_size)
269 {
270 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
271 t = omp_adjust_chunk_size (t, fd.simd_schedule);
272 ws_args->quick_push (t);
273 }
274
275 return ws_args;
276 }
277 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
278 {
279 /* Number of sections is equal to the number of edges from the
280 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
281 the exit of the sections region. */
282 basic_block bb = single_succ (gimple_bb (ws_stmt));
283 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
284 vec_alloc (ws_args, 1);
285 ws_args->quick_push (t);
286 return ws_args;
287 }
288
289 gcc_unreachable ();
290 }
291
292 /* Discover whether REGION is a combined parallel+workshare region. */
293
294 static void
295 determine_parallel_type (struct omp_region *region)
296 {
297 basic_block par_entry_bb, par_exit_bb;
298 basic_block ws_entry_bb, ws_exit_bb;
299
300 if (region == NULL || region->inner == NULL
301 || region->exit == NULL || region->inner->exit == NULL
302 || region->inner->cont == NULL)
303 return;
304
305 /* We only support parallel+for and parallel+sections. */
306 if (region->type != GIMPLE_OMP_PARALLEL
307 || (region->inner->type != GIMPLE_OMP_FOR
308 && region->inner->type != GIMPLE_OMP_SECTIONS))
309 return;
310
311 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
312 WS_EXIT_BB -> PAR_EXIT_BB. */
313 par_entry_bb = region->entry;
314 par_exit_bb = region->exit;
315 ws_entry_bb = region->inner->entry;
316 ws_exit_bb = region->inner->exit;
317
318 /* Give up for task reductions on the parallel, while it is implementable,
319 adding another big set of APIs or slowing down the normal paths is
320 not acceptable. */
321 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
322 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
323 return;
324
325 if (single_succ (par_entry_bb) == ws_entry_bb
326 && single_succ (ws_exit_bb) == par_exit_bb
327 && workshare_safe_to_combine_p (ws_entry_bb)
328 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
329 || (last_and_only_stmt (ws_entry_bb)
330 && last_and_only_stmt (par_exit_bb))))
331 {
332 gimple *par_stmt = last_stmt (par_entry_bb);
333 gimple *ws_stmt = last_stmt (ws_entry_bb);
334
335 if (region->inner->type == GIMPLE_OMP_FOR)
336 {
337 /* If this is a combined parallel loop, we need to determine
338 whether or not to use the combined library calls. There
339 are two cases where we do not apply the transformation:
340 static loops and any kind of ordered loop. In the first
341 case, we already open code the loop so there is no need
342 to do anything else. In the latter case, the combined
343 parallel loop call would still need extra synchronization
344 to implement ordered semantics, so there would not be any
345 gain in using the combined call. */
346 tree clauses = gimple_omp_for_clauses (ws_stmt);
347 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
348 if (c == NULL
349 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
350 == OMP_CLAUSE_SCHEDULE_STATIC)
351 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
352 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
353 || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
354 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
355 return;
356 }
357 else if (region->inner->type == GIMPLE_OMP_SECTIONS
358 && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
359 OMP_CLAUSE__REDUCTEMP_)
360 || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
361 OMP_CLAUSE__CONDTEMP_)))
362 return;
363
364 region->is_combined_parallel = true;
365 region->inner->is_combined_parallel = true;
366 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
367 }
368 }
369
370 /* Debugging dumps for parallel regions. */
371 void dump_omp_region (FILE *, struct omp_region *, int);
372 void debug_omp_region (struct omp_region *);
373 void debug_all_omp_regions (void);
374
375 /* Dump the parallel region tree rooted at REGION. */
376
377 void
378 dump_omp_region (FILE *file, struct omp_region *region, int indent)
379 {
380 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
381 gimple_code_name[region->type]);
382
383 if (region->inner)
384 dump_omp_region (file, region->inner, indent + 4);
385
386 if (region->cont)
387 {
388 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
389 region->cont->index);
390 }
391
392 if (region->exit)
393 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
394 region->exit->index);
395 else
396 fprintf (file, "%*s[no exit marker]\n", indent, "");
397
398 if (region->next)
399 dump_omp_region (file, region->next, indent);
400 }
401
402 DEBUG_FUNCTION void
403 debug_omp_region (struct omp_region *region)
404 {
405 dump_omp_region (stderr, region, 0);
406 }
407
408 DEBUG_FUNCTION void
409 debug_all_omp_regions (void)
410 {
411 dump_omp_region (stderr, root_omp_region, 0);
412 }
413
414 /* Create a new parallel region starting at STMT inside region PARENT. */
415
416 static struct omp_region *
417 new_omp_region (basic_block bb, enum gimple_code type,
418 struct omp_region *parent)
419 {
420 struct omp_region *region = XCNEW (struct omp_region);
421
422 region->outer = parent;
423 region->entry = bb;
424 region->type = type;
425
426 if (parent)
427 {
428 /* This is a nested region. Add it to the list of inner
429 regions in PARENT. */
430 region->next = parent->inner;
431 parent->inner = region;
432 }
433 else
434 {
435 /* This is a toplevel region. Add it to the list of toplevel
436 regions in ROOT_OMP_REGION. */
437 region->next = root_omp_region;
438 root_omp_region = region;
439 }
440
441 return region;
442 }
443
444 /* Release the memory associated with the region tree rooted at REGION. */
445
446 static void
447 free_omp_region_1 (struct omp_region *region)
448 {
449 struct omp_region *i, *n;
450
451 for (i = region->inner; i ; i = n)
452 {
453 n = i->next;
454 free_omp_region_1 (i);
455 }
456
457 free (region);
458 }
459
460 /* Release the memory for the entire omp region tree. */
461
462 void
463 omp_free_regions (void)
464 {
465 struct omp_region *r, *n;
466 for (r = root_omp_region; r ; r = n)
467 {
468 n = r->next;
469 free_omp_region_1 (r);
470 }
471 root_omp_region = NULL;
472 }
473
474 /* A convenience function to build an empty GIMPLE_COND with just the
475 condition. */
476
477 static gcond *
478 gimple_build_cond_empty (tree cond)
479 {
480 enum tree_code pred_code;
481 tree lhs, rhs;
482
483 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
484 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
485 }
486
487 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
488 Add CHILD_FNDECL to decl chain of the supercontext of the block
489 ENTRY_BLOCK - this is the block which originally contained the
490 code from which CHILD_FNDECL was created.
491
492 Together, these actions ensure that the debug info for the outlined
493 function will be emitted with the correct lexical scope. */
494
495 static void
496 adjust_context_and_scope (struct omp_region *region, tree entry_block,
497 tree child_fndecl)
498 {
499 tree parent_fndecl = NULL_TREE;
500 gimple *entry_stmt;
501 /* OMP expansion expands inner regions before outer ones, so if
502 we e.g. have explicit task region nested in parallel region, when
503 expanding the task region current_function_decl will be the original
504 source function, but we actually want to use as context the child
505 function of the parallel. */
506 for (region = region->outer;
507 region && parent_fndecl == NULL_TREE; region = region->outer)
508 switch (region->type)
509 {
510 case GIMPLE_OMP_PARALLEL:
511 case GIMPLE_OMP_TASK:
512 case GIMPLE_OMP_TEAMS:
513 entry_stmt = last_stmt (region->entry);
514 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
515 break;
516 case GIMPLE_OMP_TARGET:
517 entry_stmt = last_stmt (region->entry);
518 parent_fndecl
519 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
520 break;
521 default:
522 break;
523 }
524
525 if (parent_fndecl == NULL_TREE)
526 parent_fndecl = current_function_decl;
527 DECL_CONTEXT (child_fndecl) = parent_fndecl;
528
529 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
530 {
531 tree b = BLOCK_SUPERCONTEXT (entry_block);
532 if (TREE_CODE (b) == BLOCK)
533 {
534 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
535 BLOCK_VARS (b) = child_fndecl;
536 }
537 }
538 }
539
540 /* Build the function calls to GOMP_parallel etc to actually
541 generate the parallel operation. REGION is the parallel region
542 being expanded. BB is the block where to insert the code. WS_ARGS
543 will be set if this is a call to a combined parallel+workshare
544 construct, it contains the list of additional arguments needed by
545 the workshare construct. */
546
547 static void
548 expand_parallel_call (struct omp_region *region, basic_block bb,
549 gomp_parallel *entry_stmt,
550 vec<tree, va_gc> *ws_args)
551 {
552 tree t, t1, t2, val, cond, c, clauses, flags;
553 gimple_stmt_iterator gsi;
554 gimple *stmt;
555 enum built_in_function start_ix;
556 int start_ix2;
557 location_t clause_loc;
558 vec<tree, va_gc> *args;
559
560 clauses = gimple_omp_parallel_clauses (entry_stmt);
561
562 /* Determine what flavor of GOMP_parallel we will be
563 emitting. */
564 start_ix = BUILT_IN_GOMP_PARALLEL;
565 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
566 if (rtmp)
567 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
568 else if (is_combined_parallel (region))
569 {
570 switch (region->inner->type)
571 {
572 case GIMPLE_OMP_FOR:
573 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
574 switch (region->inner->sched_kind)
575 {
576 case OMP_CLAUSE_SCHEDULE_RUNTIME:
577 /* For lastprivate(conditional:), our implementation
578 requires monotonic behavior. */
579 if (region->inner->has_lastprivate_conditional != 0)
580 start_ix2 = 3;
581 else if ((region->inner->sched_modifiers
582 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
583 start_ix2 = 6;
584 else if ((region->inner->sched_modifiers
585 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
586 start_ix2 = 7;
587 else
588 start_ix2 = 3;
589 break;
590 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
591 case OMP_CLAUSE_SCHEDULE_GUIDED:
592 if ((region->inner->sched_modifiers
593 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
594 && !region->inner->has_lastprivate_conditional)
595 {
596 start_ix2 = 3 + region->inner->sched_kind;
597 break;
598 }
599 /* FALLTHRU */
600 default:
601 start_ix2 = region->inner->sched_kind;
602 break;
603 }
604 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
605 start_ix = (enum built_in_function) start_ix2;
606 break;
607 case GIMPLE_OMP_SECTIONS:
608 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
609 break;
610 default:
611 gcc_unreachable ();
612 }
613 }
614
615 /* By default, the value of NUM_THREADS is zero (selected at run time)
616 and there is no conditional. */
617 cond = NULL_TREE;
618 val = build_int_cst (unsigned_type_node, 0);
619 flags = build_int_cst (unsigned_type_node, 0);
620
621 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
622 if (c)
623 cond = OMP_CLAUSE_IF_EXPR (c);
624
625 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
626 if (c)
627 {
628 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
629 clause_loc = OMP_CLAUSE_LOCATION (c);
630 }
631 else
632 clause_loc = gimple_location (entry_stmt);
633
634 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
635 if (c)
636 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
637
638 /* Ensure 'val' is of the correct type. */
639 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
640
641 /* If we found the clause 'if (cond)', build either
642 (cond != 0) or (cond ? val : 1u). */
643 if (cond)
644 {
645 cond = gimple_boolify (cond);
646
647 if (integer_zerop (val))
648 val = fold_build2_loc (clause_loc,
649 EQ_EXPR, unsigned_type_node, cond,
650 build_int_cst (TREE_TYPE (cond), 0));
651 else
652 {
653 basic_block cond_bb, then_bb, else_bb;
654 edge e, e_then, e_else;
655 tree tmp_then, tmp_else, tmp_join, tmp_var;
656
657 tmp_var = create_tmp_var (TREE_TYPE (val));
658 if (gimple_in_ssa_p (cfun))
659 {
660 tmp_then = make_ssa_name (tmp_var);
661 tmp_else = make_ssa_name (tmp_var);
662 tmp_join = make_ssa_name (tmp_var);
663 }
664 else
665 {
666 tmp_then = tmp_var;
667 tmp_else = tmp_var;
668 tmp_join = tmp_var;
669 }
670
671 e = split_block_after_labels (bb);
672 cond_bb = e->src;
673 bb = e->dest;
674 remove_edge (e);
675
676 then_bb = create_empty_bb (cond_bb);
677 else_bb = create_empty_bb (then_bb);
678 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
679 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
680
681 stmt = gimple_build_cond_empty (cond);
682 gsi = gsi_start_bb (cond_bb);
683 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
684
685 gsi = gsi_start_bb (then_bb);
686 expand_omp_build_assign (&gsi, tmp_then, val, true);
687
688 gsi = gsi_start_bb (else_bb);
689 expand_omp_build_assign (&gsi, tmp_else,
690 build_int_cst (unsigned_type_node, 1),
691 true);
692
693 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
694 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
695 add_bb_to_loop (then_bb, cond_bb->loop_father);
696 add_bb_to_loop (else_bb, cond_bb->loop_father);
697 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
698 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
699
700 if (gimple_in_ssa_p (cfun))
701 {
702 gphi *phi = create_phi_node (tmp_join, bb);
703 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
704 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
705 }
706
707 val = tmp_join;
708 }
709
710 gsi = gsi_start_bb (bb);
711 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
712 false, GSI_CONTINUE_LINKING);
713 }
714
715 gsi = gsi_last_nondebug_bb (bb);
716 t = gimple_omp_parallel_data_arg (entry_stmt);
717 if (t == NULL)
718 t1 = null_pointer_node;
719 else
720 t1 = build_fold_addr_expr (t);
721 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
722 t2 = build_fold_addr_expr (child_fndecl);
723
724 vec_alloc (args, 4 + vec_safe_length (ws_args));
725 args->quick_push (t2);
726 args->quick_push (t1);
727 args->quick_push (val);
728 if (ws_args)
729 args->splice (*ws_args);
730 args->quick_push (flags);
731
732 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
733 builtin_decl_explicit (start_ix), args);
734
735 if (rtmp)
736 {
737 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
738 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
739 fold_convert (type,
740 fold_convert (pointer_sized_int_node, t)));
741 }
742 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
743 false, GSI_CONTINUE_LINKING);
744 }
745
746 /* Build the function call to GOMP_task to actually
747 generate the task operation. BB is the block where to insert the code. */
748
749 static void
750 expand_task_call (struct omp_region *region, basic_block bb,
751 gomp_task *entry_stmt)
752 {
753 tree t1, t2, t3;
754 gimple_stmt_iterator gsi;
755 location_t loc = gimple_location (entry_stmt);
756
757 tree clauses = gimple_omp_task_clauses (entry_stmt);
758
759 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
760 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
761 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
762 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
763 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
764 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
765
766 unsigned int iflags
767 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
768 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
769 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
770
771 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
772 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
773 tree num_tasks = NULL_TREE;
774 bool ull = false;
775 if (taskloop_p)
776 {
777 gimple *g = last_stmt (region->outer->entry);
778 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
779 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
780 struct omp_for_data fd;
781 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
782 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
783 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
784 OMP_CLAUSE__LOOPTEMP_);
785 startvar = OMP_CLAUSE_DECL (startvar);
786 endvar = OMP_CLAUSE_DECL (endvar);
787 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
788 if (fd.loop.cond_code == LT_EXPR)
789 iflags |= GOMP_TASK_FLAG_UP;
790 tree tclauses = gimple_omp_for_clauses (g);
791 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
792 if (num_tasks)
793 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
794 else
795 {
796 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
797 if (num_tasks)
798 {
799 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
800 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
801 }
802 else
803 num_tasks = integer_zero_node;
804 }
805 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
806 if (ifc == NULL_TREE)
807 iflags |= GOMP_TASK_FLAG_IF;
808 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
809 iflags |= GOMP_TASK_FLAG_NOGROUP;
810 ull = fd.iter_type == long_long_unsigned_type_node;
811 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
812 iflags |= GOMP_TASK_FLAG_REDUCTION;
813 }
814 else if (priority)
815 iflags |= GOMP_TASK_FLAG_PRIORITY;
816
817 tree flags = build_int_cst (unsigned_type_node, iflags);
818
819 tree cond = boolean_true_node;
820 if (ifc)
821 {
822 if (taskloop_p)
823 {
824 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
825 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
826 build_int_cst (unsigned_type_node,
827 GOMP_TASK_FLAG_IF),
828 build_int_cst (unsigned_type_node, 0));
829 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
830 flags, t);
831 }
832 else
833 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
834 }
835
836 if (finalc)
837 {
838 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
839 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
840 build_int_cst (unsigned_type_node,
841 GOMP_TASK_FLAG_FINAL),
842 build_int_cst (unsigned_type_node, 0));
843 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
844 }
845 if (depend)
846 depend = OMP_CLAUSE_DECL (depend);
847 else
848 depend = build_int_cst (ptr_type_node, 0);
849 if (priority)
850 priority = fold_convert (integer_type_node,
851 OMP_CLAUSE_PRIORITY_EXPR (priority));
852 else
853 priority = integer_zero_node;
854
855 gsi = gsi_last_nondebug_bb (bb);
856 tree t = gimple_omp_task_data_arg (entry_stmt);
857 if (t == NULL)
858 t2 = null_pointer_node;
859 else
860 t2 = build_fold_addr_expr_loc (loc, t);
861 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
862 t = gimple_omp_task_copy_fn (entry_stmt);
863 if (t == NULL)
864 t3 = null_pointer_node;
865 else
866 t3 = build_fold_addr_expr_loc (loc, t);
867
868 if (taskloop_p)
869 t = build_call_expr (ull
870 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
871 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
872 11, t1, t2, t3,
873 gimple_omp_task_arg_size (entry_stmt),
874 gimple_omp_task_arg_align (entry_stmt), flags,
875 num_tasks, priority, startvar, endvar, step);
876 else
877 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
878 9, t1, t2, t3,
879 gimple_omp_task_arg_size (entry_stmt),
880 gimple_omp_task_arg_align (entry_stmt), cond, flags,
881 depend, priority);
882
883 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
884 false, GSI_CONTINUE_LINKING);
885 }
886
887 /* Build the function call to GOMP_taskwait_depend to actually
888 generate the taskwait operation. BB is the block where to insert the
889 code. */
890
891 static void
892 expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
893 {
894 tree clauses = gimple_omp_task_clauses (entry_stmt);
895 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
896 if (depend == NULL_TREE)
897 return;
898
899 depend = OMP_CLAUSE_DECL (depend);
900
901 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
902 tree t
903 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
904 1, depend);
905
906 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
907 false, GSI_CONTINUE_LINKING);
908 }
909
910 /* Build the function call to GOMP_teams_reg to actually
911 generate the host teams operation. REGION is the teams region
912 being expanded. BB is the block where to insert the code. */
913
914 static void
915 expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
916 {
917 tree clauses = gimple_omp_teams_clauses (entry_stmt);
918 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
919 if (num_teams == NULL_TREE)
920 num_teams = build_int_cst (unsigned_type_node, 0);
921 else
922 {
923 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
924 num_teams = fold_convert (unsigned_type_node, num_teams);
925 }
926 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
927 if (thread_limit == NULL_TREE)
928 thread_limit = build_int_cst (unsigned_type_node, 0);
929 else
930 {
931 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
932 thread_limit = fold_convert (unsigned_type_node, thread_limit);
933 }
934
935 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
936 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
937 if (t == NULL)
938 t1 = null_pointer_node;
939 else
940 t1 = build_fold_addr_expr (t);
941 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
942 tree t2 = build_fold_addr_expr (child_fndecl);
943
944 vec<tree, va_gc> *args;
945 vec_alloc (args, 5);
946 args->quick_push (t2);
947 args->quick_push (t1);
948 args->quick_push (num_teams);
949 args->quick_push (thread_limit);
950 /* For future extensibility. */
951 args->quick_push (build_zero_cst (unsigned_type_node));
952
953 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
954 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
955 args);
956
957 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
958 false, GSI_CONTINUE_LINKING);
959 }
960
961 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
962
963 static tree
964 vec2chain (vec<tree, va_gc> *v)
965 {
966 tree chain = NULL_TREE, t;
967 unsigned ix;
968
969 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
970 {
971 DECL_CHAIN (t) = chain;
972 chain = t;
973 }
974
975 return chain;
976 }
977
978 /* Remove barriers in REGION->EXIT's block. Note that this is only
979 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
980 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
981 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
982 removed. */
983
984 static void
985 remove_exit_barrier (struct omp_region *region)
986 {
987 gimple_stmt_iterator gsi;
988 basic_block exit_bb;
989 edge_iterator ei;
990 edge e;
991 gimple *stmt;
992 int any_addressable_vars = -1;
993
994 exit_bb = region->exit;
995
996 /* If the parallel region doesn't return, we don't have REGION->EXIT
997 block at all. */
998 if (! exit_bb)
999 return;
1000
1001 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1002 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1003 statements that can appear in between are extremely limited -- no
1004 memory operations at all. Here, we allow nothing at all, so the
1005 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
1006 gsi = gsi_last_nondebug_bb (exit_bb);
1007 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1008 gsi_prev_nondebug (&gsi);
1009 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1010 return;
1011
1012 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1013 {
1014 gsi = gsi_last_nondebug_bb (e->src);
1015 if (gsi_end_p (gsi))
1016 continue;
1017 stmt = gsi_stmt (gsi);
1018 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1019 && !gimple_omp_return_nowait_p (stmt))
1020 {
1021 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1022 in many cases. If there could be tasks queued, the barrier
1023 might be needed to let the tasks run before some local
1024 variable of the parallel that the task uses as shared
1025 runs out of scope. The task can be spawned either
1026 from within current function (this would be easy to check)
1027 or from some function it calls and gets passed an address
1028 of such a variable. */
1029 if (any_addressable_vars < 0)
1030 {
1031 gomp_parallel *parallel_stmt
1032 = as_a <gomp_parallel *> (last_stmt (region->entry));
1033 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1034 tree local_decls, block, decl;
1035 unsigned ix;
1036
1037 any_addressable_vars = 0;
1038 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1039 if (TREE_ADDRESSABLE (decl))
1040 {
1041 any_addressable_vars = 1;
1042 break;
1043 }
1044 for (block = gimple_block (stmt);
1045 !any_addressable_vars
1046 && block
1047 && TREE_CODE (block) == BLOCK;
1048 block = BLOCK_SUPERCONTEXT (block))
1049 {
1050 for (local_decls = BLOCK_VARS (block);
1051 local_decls;
1052 local_decls = DECL_CHAIN (local_decls))
1053 if (TREE_ADDRESSABLE (local_decls))
1054 {
1055 any_addressable_vars = 1;
1056 break;
1057 }
1058 if (block == gimple_block (parallel_stmt))
1059 break;
1060 }
1061 }
1062 if (!any_addressable_vars)
1063 gimple_omp_return_set_nowait (stmt);
1064 }
1065 }
1066 }
1067
1068 static void
1069 remove_exit_barriers (struct omp_region *region)
1070 {
1071 if (region->type == GIMPLE_OMP_PARALLEL)
1072 remove_exit_barrier (region);
1073
1074 if (region->inner)
1075 {
1076 region = region->inner;
1077 remove_exit_barriers (region);
1078 while (region->next)
1079 {
1080 region = region->next;
1081 remove_exit_barriers (region);
1082 }
1083 }
1084 }
1085
1086 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
1087 calls. These can't be declared as const functions, but
1088 within one parallel body they are constant, so they can be
1089 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1090 which are declared const. Similarly for task body, except
1091 that in untied task omp_get_thread_num () can change at any task
1092 scheduling point. */
1093
1094 static void
1095 optimize_omp_library_calls (gimple *entry_stmt)
1096 {
1097 basic_block bb;
1098 gimple_stmt_iterator gsi;
1099 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1100 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1101 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1102 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1103 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1104 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1105 OMP_CLAUSE_UNTIED) != NULL);
1106
1107 FOR_EACH_BB_FN (bb, cfun)
1108 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1109 {
1110 gimple *call = gsi_stmt (gsi);
1111 tree decl;
1112
1113 if (is_gimple_call (call)
1114 && (decl = gimple_call_fndecl (call))
1115 && DECL_EXTERNAL (decl)
1116 && TREE_PUBLIC (decl)
1117 && DECL_INITIAL (decl) == NULL)
1118 {
1119 tree built_in;
1120
1121 if (DECL_NAME (decl) == thr_num_id)
1122 {
1123 /* In #pragma omp task untied omp_get_thread_num () can change
1124 during the execution of the task region. */
1125 if (untied_task)
1126 continue;
1127 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1128 }
1129 else if (DECL_NAME (decl) == num_thr_id)
1130 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1131 else
1132 continue;
1133
1134 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1135 || gimple_call_num_args (call) != 0)
1136 continue;
1137
1138 if (flag_exceptions && !TREE_NOTHROW (decl))
1139 continue;
1140
1141 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1142 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1143 TREE_TYPE (TREE_TYPE (built_in))))
1144 continue;
1145
1146 gimple_call_set_fndecl (call, built_in);
1147 }
1148 }
1149 }
1150
1151 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1152 regimplified. */
1153
1154 static tree
1155 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1156 {
1157 tree t = *tp;
1158
1159 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1160 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1161 return t;
1162
1163 if (TREE_CODE (t) == ADDR_EXPR)
1164 recompute_tree_invariant_for_addr_expr (t);
1165
1166 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1167 return NULL_TREE;
1168 }
1169
1170 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1171
1172 static void
1173 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1174 bool after)
1175 {
1176 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1177 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1178 !after, after ? GSI_CONTINUE_LINKING
1179 : GSI_SAME_STMT);
1180 gimple *stmt = gimple_build_assign (to, from);
1181 if (after)
1182 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1183 else
1184 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1185 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1186 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1187 {
1188 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1189 gimple_regimplify_operands (stmt, &gsi);
1190 }
1191 }
1192
1193 /* Expand the OpenMP parallel or task directive starting at REGION. */
1194
1195 static void
1196 expand_omp_taskreg (struct omp_region *region)
1197 {
1198 basic_block entry_bb, exit_bb, new_bb;
1199 struct function *child_cfun;
1200 tree child_fn, block, t;
1201 gimple_stmt_iterator gsi;
1202 gimple *entry_stmt, *stmt;
1203 edge e;
1204 vec<tree, va_gc> *ws_args;
1205
1206 entry_stmt = last_stmt (region->entry);
1207 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1208 && gimple_omp_task_taskwait_p (entry_stmt))
1209 {
1210 new_bb = region->entry;
1211 gsi = gsi_last_nondebug_bb (region->entry);
1212 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1213 gsi_remove (&gsi, true);
1214 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1215 return;
1216 }
1217
1218 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1219 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1220
1221 entry_bb = region->entry;
1222 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1223 exit_bb = region->cont;
1224 else
1225 exit_bb = region->exit;
1226
1227 if (is_combined_parallel (region))
1228 ws_args = region->ws_args;
1229 else
1230 ws_args = NULL;
1231
1232 if (child_cfun->cfg)
1233 {
1234 /* Due to inlining, it may happen that we have already outlined
1235 the region, in which case all we need to do is make the
1236 sub-graph unreachable and emit the parallel call. */
1237 edge entry_succ_e, exit_succ_e;
1238
1239 entry_succ_e = single_succ_edge (entry_bb);
1240
1241 gsi = gsi_last_nondebug_bb (entry_bb);
1242 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1243 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1244 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
1245 gsi_remove (&gsi, true);
1246
1247 new_bb = entry_bb;
1248 if (exit_bb)
1249 {
1250 exit_succ_e = single_succ_edge (exit_bb);
1251 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1252 }
1253 remove_edge_and_dominated_blocks (entry_succ_e);
1254 }
1255 else
1256 {
1257 unsigned srcidx, dstidx, num;
1258
1259 /* If the parallel region needs data sent from the parent
1260 function, then the very first statement (except possible
1261 tree profile counter updates) of the parallel body
1262 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1263 &.OMP_DATA_O is passed as an argument to the child function,
1264 we need to replace it with the argument as seen by the child
1265 function.
1266
1267 In most cases, this will end up being the identity assignment
1268 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1269 a function call that has been inlined, the original PARM_DECL
1270 .OMP_DATA_I may have been converted into a different local
1271 variable. In which case, we need to keep the assignment. */
1272 if (gimple_omp_taskreg_data_arg (entry_stmt))
1273 {
1274 basic_block entry_succ_bb
1275 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1276 : FALLTHRU_EDGE (entry_bb)->dest;
1277 tree arg;
1278 gimple *parcopy_stmt = NULL;
1279
1280 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1281 {
1282 gimple *stmt;
1283
1284 gcc_assert (!gsi_end_p (gsi));
1285 stmt = gsi_stmt (gsi);
1286 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1287 continue;
1288
1289 if (gimple_num_ops (stmt) == 2)
1290 {
1291 tree arg = gimple_assign_rhs1 (stmt);
1292
1293 /* We're ignore the subcode because we're
1294 effectively doing a STRIP_NOPS. */
1295
1296 if (TREE_CODE (arg) == ADDR_EXPR
1297 && (TREE_OPERAND (arg, 0)
1298 == gimple_omp_taskreg_data_arg (entry_stmt)))
1299 {
1300 parcopy_stmt = stmt;
1301 break;
1302 }
1303 }
1304 }
1305
1306 gcc_assert (parcopy_stmt != NULL);
1307 arg = DECL_ARGUMENTS (child_fn);
1308
1309 if (!gimple_in_ssa_p (cfun))
1310 {
1311 if (gimple_assign_lhs (parcopy_stmt) == arg)
1312 gsi_remove (&gsi, true);
1313 else
1314 {
1315 /* ?? Is setting the subcode really necessary ?? */
1316 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1317 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1318 }
1319 }
1320 else
1321 {
1322 tree lhs = gimple_assign_lhs (parcopy_stmt);
1323 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1324 /* We'd like to set the rhs to the default def in the child_fn,
1325 but it's too early to create ssa names in the child_fn.
1326 Instead, we set the rhs to the parm. In
1327 move_sese_region_to_fn, we introduce a default def for the
1328 parm, map the parm to it's default def, and once we encounter
1329 this stmt, replace the parm with the default def. */
1330 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1331 update_stmt (parcopy_stmt);
1332 }
1333 }
1334
1335 /* Declare local variables needed in CHILD_CFUN. */
1336 block = DECL_INITIAL (child_fn);
1337 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1338 /* The gimplifier could record temporaries in parallel/task block
1339 rather than in containing function's local_decls chain,
1340 which would mean cgraph missed finalizing them. Do it now. */
1341 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1342 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1343 varpool_node::finalize_decl (t);
1344 DECL_SAVED_TREE (child_fn) = NULL;
1345 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1346 gimple_set_body (child_fn, NULL);
1347 TREE_USED (block) = 1;
1348
1349 /* Reset DECL_CONTEXT on function arguments. */
1350 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1351 DECL_CONTEXT (t) = child_fn;
1352
1353 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1354 so that it can be moved to the child function. */
1355 gsi = gsi_last_nondebug_bb (entry_bb);
1356 stmt = gsi_stmt (gsi);
1357 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1358 || gimple_code (stmt) == GIMPLE_OMP_TASK
1359 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
1360 e = split_block (entry_bb, stmt);
1361 gsi_remove (&gsi, true);
1362 entry_bb = e->dest;
1363 edge e2 = NULL;
1364 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
1365 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1366 else
1367 {
1368 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1369 gcc_assert (e2->dest == region->exit);
1370 remove_edge (BRANCH_EDGE (entry_bb));
1371 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1372 gsi = gsi_last_nondebug_bb (region->exit);
1373 gcc_assert (!gsi_end_p (gsi)
1374 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1375 gsi_remove (&gsi, true);
1376 }
1377
1378 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1379 if (exit_bb)
1380 {
1381 gsi = gsi_last_nondebug_bb (exit_bb);
1382 gcc_assert (!gsi_end_p (gsi)
1383 && (gimple_code (gsi_stmt (gsi))
1384 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1385 stmt = gimple_build_return (NULL);
1386 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1387 gsi_remove (&gsi, true);
1388 }
1389
1390 /* Move the parallel region into CHILD_CFUN. */
1391
1392 if (gimple_in_ssa_p (cfun))
1393 {
1394 init_tree_ssa (child_cfun);
1395 init_ssa_operands (child_cfun);
1396 child_cfun->gimple_df->in_ssa_p = true;
1397 block = NULL_TREE;
1398 }
1399 else
1400 block = gimple_block (entry_stmt);
1401
1402 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1403 if (exit_bb)
1404 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1405 if (e2)
1406 {
1407 basic_block dest_bb = e2->dest;
1408 if (!exit_bb)
1409 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1410 remove_edge (e2);
1411 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1412 }
1413 /* When the OMP expansion process cannot guarantee an up-to-date
1414 loop tree arrange for the child function to fixup loops. */
1415 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1416 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1417
1418 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1419 num = vec_safe_length (child_cfun->local_decls);
1420 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1421 {
1422 t = (*child_cfun->local_decls)[srcidx];
1423 if (DECL_CONTEXT (t) == cfun->decl)
1424 continue;
1425 if (srcidx != dstidx)
1426 (*child_cfun->local_decls)[dstidx] = t;
1427 dstidx++;
1428 }
1429 if (dstidx != num)
1430 vec_safe_truncate (child_cfun->local_decls, dstidx);
1431
1432 /* Inform the callgraph about the new function. */
1433 child_cfun->curr_properties = cfun->curr_properties;
1434 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1435 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1436 cgraph_node *node = cgraph_node::get_create (child_fn);
1437 node->parallelized_function = 1;
1438 cgraph_node::add_new_function (child_fn, true);
1439
1440 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1441 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1442
1443 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1444 fixed in a following pass. */
1445 push_cfun (child_cfun);
1446 if (need_asm)
1447 assign_assembler_name_if_needed (child_fn);
1448
1449 if (optimize)
1450 optimize_omp_library_calls (entry_stmt);
1451 update_max_bb_count ();
1452 cgraph_edge::rebuild_edges ();
1453
1454 /* Some EH regions might become dead, see PR34608. If
1455 pass_cleanup_cfg isn't the first pass to happen with the
1456 new child, these dead EH edges might cause problems.
1457 Clean them up now. */
1458 if (flag_exceptions)
1459 {
1460 basic_block bb;
1461 bool changed = false;
1462
1463 FOR_EACH_BB_FN (bb, cfun)
1464 changed |= gimple_purge_dead_eh_edges (bb);
1465 if (changed)
1466 cleanup_tree_cfg ();
1467 }
1468 if (gimple_in_ssa_p (cfun))
1469 update_ssa (TODO_update_ssa);
1470 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1471 verify_loop_structure ();
1472 pop_cfun ();
1473
1474 if (dump_file && !gimple_in_ssa_p (cfun))
1475 {
1476 omp_any_child_fn_dumped = true;
1477 dump_function_header (dump_file, child_fn, dump_flags);
1478 dump_function_to_file (child_fn, dump_file, dump_flags);
1479 }
1480 }
1481
1482 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1483
1484 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1485 expand_parallel_call (region, new_bb,
1486 as_a <gomp_parallel *> (entry_stmt), ws_args);
1487 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1488 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
1489 else
1490 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1491 if (gimple_in_ssa_p (cfun))
1492 update_ssa (TODO_update_ssa_only_virtuals);
1493 }
1494
1495 /* Information about members of an OpenACC collapsed loop nest. */
1496
1497 struct oacc_collapse
1498 {
1499 tree base; /* Base value. */
1500 tree iters; /* Number of steps. */
1501 tree step; /* Step size. */
1502 tree tile; /* Tile increment (if tiled). */
1503 tree outer; /* Tile iterator var. */
1504 };
1505
1506 /* Helper for expand_oacc_for. Determine collapsed loop information.
1507 Fill in COUNTS array. Emit any initialization code before GSI.
1508 Return the calculated outer loop bound of BOUND_TYPE. */
1509
1510 static tree
1511 expand_oacc_collapse_init (const struct omp_for_data *fd,
1512 gimple_stmt_iterator *gsi,
1513 oacc_collapse *counts, tree diff_type,
1514 tree bound_type, location_t loc)
1515 {
1516 tree tiling = fd->tiling;
1517 tree total = build_int_cst (bound_type, 1);
1518 int ix;
1519
1520 gcc_assert (integer_onep (fd->loop.step));
1521 gcc_assert (integer_zerop (fd->loop.n1));
1522
1523 /* When tiling, the first operand of the tile clause applies to the
1524 innermost loop, and we work outwards from there. Seems
1525 backwards, but whatever. */
1526 for (ix = fd->collapse; ix--;)
1527 {
1528 const omp_for_data_loop *loop = &fd->loops[ix];
1529
1530 tree iter_type = TREE_TYPE (loop->v);
1531 tree plus_type = iter_type;
1532
1533 gcc_assert (loop->cond_code == fd->loop.cond_code);
1534
1535 if (POINTER_TYPE_P (iter_type))
1536 plus_type = sizetype;
1537
1538 if (tiling)
1539 {
1540 tree num = build_int_cst (integer_type_node, fd->collapse);
1541 tree loop_no = build_int_cst (integer_type_node, ix);
1542 tree tile = TREE_VALUE (tiling);
1543 gcall *call
1544 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1545 /* gwv-outer=*/integer_zero_node,
1546 /* gwv-inner=*/integer_zero_node);
1547
1548 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1549 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1550 gimple_call_set_lhs (call, counts[ix].tile);
1551 gimple_set_location (call, loc);
1552 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1553
1554 tiling = TREE_CHAIN (tiling);
1555 }
1556 else
1557 {
1558 counts[ix].tile = NULL;
1559 counts[ix].outer = loop->v;
1560 }
1561
1562 tree b = loop->n1;
1563 tree e = loop->n2;
1564 tree s = loop->step;
1565 bool up = loop->cond_code == LT_EXPR;
1566 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1567 bool negating;
1568 tree expr;
1569
1570 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1571 true, GSI_SAME_STMT);
1572 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1573 true, GSI_SAME_STMT);
1574
1575 /* Convert the step, avoiding possible unsigned->signed overflow. */
1576 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1577 if (negating)
1578 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1579 s = fold_convert (diff_type, s);
1580 if (negating)
1581 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1582 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1583 true, GSI_SAME_STMT);
1584
1585 /* Determine the range, avoiding possible unsigned->signed overflow. */
1586 negating = !up && TYPE_UNSIGNED (iter_type);
1587 expr = fold_build2 (MINUS_EXPR, plus_type,
1588 fold_convert (plus_type, negating ? b : e),
1589 fold_convert (plus_type, negating ? e : b));
1590 expr = fold_convert (diff_type, expr);
1591 if (negating)
1592 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1593 tree range = force_gimple_operand_gsi
1594 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1595
1596 /* Determine number of iterations. */
1597 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1598 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1599 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1600
1601 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1602 true, GSI_SAME_STMT);
1603
1604 counts[ix].base = b;
1605 counts[ix].iters = iters;
1606 counts[ix].step = s;
1607
1608 total = fold_build2 (MULT_EXPR, bound_type, total,
1609 fold_convert (bound_type, iters));
1610 }
1611
1612 return total;
1613 }
1614
1615 /* Emit initializers for collapsed loop members. INNER is true if
1616 this is for the element loop of a TILE. IVAR is the outer
1617 loop iteration variable, from which collapsed loop iteration values
1618 are calculated. COUNTS array has been initialized by
1619 expand_oacc_collapse_inits. */
1620
1621 static void
1622 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1623 gimple_stmt_iterator *gsi,
1624 const oacc_collapse *counts, tree ivar,
1625 tree diff_type)
1626 {
1627 tree ivar_type = TREE_TYPE (ivar);
1628
1629 /* The most rapidly changing iteration variable is the innermost
1630 one. */
1631 for (int ix = fd->collapse; ix--;)
1632 {
1633 const omp_for_data_loop *loop = &fd->loops[ix];
1634 const oacc_collapse *collapse = &counts[ix];
1635 tree v = inner ? loop->v : collapse->outer;
1636 tree iter_type = TREE_TYPE (v);
1637 tree plus_type = iter_type;
1638 enum tree_code plus_code = PLUS_EXPR;
1639 tree expr;
1640
1641 if (POINTER_TYPE_P (iter_type))
1642 {
1643 plus_code = POINTER_PLUS_EXPR;
1644 plus_type = sizetype;
1645 }
1646
1647 expr = ivar;
1648 if (ix)
1649 {
1650 tree mod = fold_convert (ivar_type, collapse->iters);
1651 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1652 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1653 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1654 true, GSI_SAME_STMT);
1655 }
1656
1657 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1658 fold_convert (diff_type, collapse->step));
1659 expr = fold_build2 (plus_code, iter_type,
1660 inner ? collapse->outer : collapse->base,
1661 fold_convert (plus_type, expr));
1662 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1663 true, GSI_SAME_STMT);
1664 gassign *ass = gimple_build_assign (v, expr);
1665 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1666 }
1667 }
1668
1669 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1670 of the combined collapse > 1 loop constructs, generate code like:
1671 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1672 if (cond3 is <)
1673 adj = STEP3 - 1;
1674 else
1675 adj = STEP3 + 1;
1676 count3 = (adj + N32 - N31) / STEP3;
1677 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1678 if (cond2 is <)
1679 adj = STEP2 - 1;
1680 else
1681 adj = STEP2 + 1;
1682 count2 = (adj + N22 - N21) / STEP2;
1683 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1684 if (cond1 is <)
1685 adj = STEP1 - 1;
1686 else
1687 adj = STEP1 + 1;
1688 count1 = (adj + N12 - N11) / STEP1;
1689 count = count1 * count2 * count3;
1690 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1691 count = 0;
1692 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1693 of the combined loop constructs, just initialize COUNTS array
1694 from the _looptemp_ clauses. For loop nests with non-rectangular
1695 loops, do this only for the rectangular loops. Then pick
1696 the loops which reference outer vars in their bound expressions
1697 and the loops which they refer to and for this sub-nest compute
1698 number of iterations. For triangular loops use Faulhaber's formula,
1699 otherwise as a fallback, compute by iterating the loops.
1700 If e.g. the sub-nest is
1701 for (I = N11; I COND1 N12; I += STEP1)
1702 for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
1703 for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
1704 do:
1705 COUNT = 0;
1706 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
1707 for (tmpj = M21 * tmpi + N21;
1708 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
1709 {
1710 int tmpk1 = M31 * tmpj + N31;
1711 int tmpk2 = M32 * tmpj + N32;
1712 if (tmpk1 COND3 tmpk2)
1713 {
1714 if (COND3 is <)
1715 adj = STEP3 - 1;
1716 else
1717 adj = STEP3 + 1;
1718 COUNT += (adj + tmpk2 - tmpk1) / STEP3;
1719 }
1720 }
1721 and finally multiply the counts of the rectangular loops not
1722 in the sub-nest with COUNT. Also, as counts[fd->last_nonrect]
1723 store number of iterations of the loops from fd->first_nonrect
1724 to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
1725 by the counts of rectangular loops not referenced in any non-rectangular
1726 loops sandwitched in between those. */
1727
1728 /* NOTE: It *could* be better to moosh all of the BBs together,
1729 creating one larger BB with all the computation and the unexpected
1730 jump at the end. I.e.
1731
1732 bool zero3, zero2, zero1, zero;
1733
1734 zero3 = N32 c3 N31;
1735 count3 = (N32 - N31) /[cl] STEP3;
1736 zero2 = N22 c2 N21;
1737 count2 = (N22 - N21) /[cl] STEP2;
1738 zero1 = N12 c1 N11;
1739 count1 = (N12 - N11) /[cl] STEP1;
1740 zero = zero3 || zero2 || zero1;
1741 count = count1 * count2 * count3;
1742 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1743
1744 After all, we expect the zero=false, and thus we expect to have to
1745 evaluate all of the comparison expressions, so short-circuiting
1746 oughtn't be a win. Since the condition isn't protecting a
1747 denominator, we're not concerned about divide-by-zero, so we can
1748 fully evaluate count even if a numerator turned out to be wrong.
1749
1750 It seems like putting this all together would create much better
1751 scheduling opportunities, and less pressure on the chip's branch
1752 predictor. */
1753
1754 static void
1755 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1756 basic_block &entry_bb, tree *counts,
1757 basic_block &zero_iter1_bb, int &first_zero_iter1,
1758 basic_block &zero_iter2_bb, int &first_zero_iter2,
1759 basic_block &l2_dom_bb)
1760 {
1761 tree t, type = TREE_TYPE (fd->loop.v);
1762 edge e, ne;
1763 int i;
1764
1765 /* Collapsed loops need work for expansion into SSA form. */
1766 gcc_assert (!gimple_in_ssa_p (cfun));
1767
1768 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1769 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1770 {
1771 gcc_assert (fd->ordered == 0);
1772 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1773 isn't supposed to be handled, as the inner loop doesn't
1774 use it. */
1775 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1776 OMP_CLAUSE__LOOPTEMP_);
1777 gcc_assert (innerc);
1778 for (i = 0; i < fd->collapse; i++)
1779 {
1780 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1781 OMP_CLAUSE__LOOPTEMP_);
1782 gcc_assert (innerc);
1783 if (i)
1784 counts[i] = OMP_CLAUSE_DECL (innerc);
1785 else
1786 counts[0] = NULL_TREE;
1787 }
1788 if (fd->non_rect
1789 && fd->last_nonrect == fd->first_nonrect + 1
1790 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
1791 {
1792 tree c[4];
1793 for (i = 0; i < 4; i++)
1794 {
1795 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1796 OMP_CLAUSE__LOOPTEMP_);
1797 gcc_assert (innerc);
1798 c[i] = OMP_CLAUSE_DECL (innerc);
1799 }
1800 counts[0] = c[0];
1801 fd->first_inner_iterations = c[1];
1802 fd->factor = c[2];
1803 fd->adjn1 = c[3];
1804 }
1805 return;
1806 }
1807
1808 for (i = fd->collapse; i < fd->ordered; i++)
1809 {
1810 tree itype = TREE_TYPE (fd->loops[i].v);
1811 counts[i] = NULL_TREE;
1812 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1813 fold_convert (itype, fd->loops[i].n1),
1814 fold_convert (itype, fd->loops[i].n2));
1815 if (t && integer_zerop (t))
1816 {
1817 for (i = fd->collapse; i < fd->ordered; i++)
1818 counts[i] = build_int_cst (type, 0);
1819 break;
1820 }
1821 }
1822 bool rect_count_seen = false;
1823 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1824 {
1825 tree itype = TREE_TYPE (fd->loops[i].v);
1826
1827 if (i >= fd->collapse && counts[i])
1828 continue;
1829 if (fd->non_rect)
1830 {
1831 /* Skip loops that use outer iterators in their expressions
1832 during this phase. */
1833 if (fd->loops[i].m1 || fd->loops[i].m2)
1834 {
1835 counts[i] = build_zero_cst (type);
1836 continue;
1837 }
1838 }
1839 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1840 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1841 fold_convert (itype, fd->loops[i].n1),
1842 fold_convert (itype, fd->loops[i].n2)))
1843 == NULL_TREE || !integer_onep (t)))
1844 {
1845 gcond *cond_stmt;
1846 tree n1, n2;
1847 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1848 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1849 true, GSI_SAME_STMT);
1850 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1851 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1852 true, GSI_SAME_STMT);
1853 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1854 NULL_TREE, NULL_TREE);
1855 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1856 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1857 expand_omp_regimplify_p, NULL, NULL)
1858 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1859 expand_omp_regimplify_p, NULL, NULL))
1860 {
1861 *gsi = gsi_for_stmt (cond_stmt);
1862 gimple_regimplify_operands (cond_stmt, gsi);
1863 }
1864 e = split_block (entry_bb, cond_stmt);
1865 basic_block &zero_iter_bb
1866 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1867 int &first_zero_iter
1868 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1869 if (zero_iter_bb == NULL)
1870 {
1871 gassign *assign_stmt;
1872 first_zero_iter = i;
1873 zero_iter_bb = create_empty_bb (entry_bb);
1874 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1875 *gsi = gsi_after_labels (zero_iter_bb);
1876 if (i < fd->collapse)
1877 assign_stmt = gimple_build_assign (fd->loop.n2,
1878 build_zero_cst (type));
1879 else
1880 {
1881 counts[i] = create_tmp_reg (type, ".count");
1882 assign_stmt
1883 = gimple_build_assign (counts[i], build_zero_cst (type));
1884 }
1885 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1886 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1887 entry_bb);
1888 }
1889 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1890 ne->probability = profile_probability::very_unlikely ();
1891 e->flags = EDGE_TRUE_VALUE;
1892 e->probability = ne->probability.invert ();
1893 if (l2_dom_bb == NULL)
1894 l2_dom_bb = entry_bb;
1895 entry_bb = e->dest;
1896 *gsi = gsi_last_nondebug_bb (entry_bb);
1897 }
1898
1899 if (POINTER_TYPE_P (itype))
1900 itype = signed_type_for (itype);
1901 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1902 ? -1 : 1));
1903 t = fold_build2 (PLUS_EXPR, itype,
1904 fold_convert (itype, fd->loops[i].step), t);
1905 t = fold_build2 (PLUS_EXPR, itype, t,
1906 fold_convert (itype, fd->loops[i].n2));
1907 t = fold_build2 (MINUS_EXPR, itype, t,
1908 fold_convert (itype, fd->loops[i].n1));
1909 /* ?? We could probably use CEIL_DIV_EXPR instead of
1910 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1911 generate the same code in the end because generically we
1912 don't know that the values involved must be negative for
1913 GT?? */
1914 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1915 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1916 fold_build1 (NEGATE_EXPR, itype, t),
1917 fold_build1 (NEGATE_EXPR, itype,
1918 fold_convert (itype,
1919 fd->loops[i].step)));
1920 else
1921 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1922 fold_convert (itype, fd->loops[i].step));
1923 t = fold_convert (type, t);
1924 if (TREE_CODE (t) == INTEGER_CST)
1925 counts[i] = t;
1926 else
1927 {
1928 if (i < fd->collapse || i != first_zero_iter2)
1929 counts[i] = create_tmp_reg (type, ".count");
1930 expand_omp_build_assign (gsi, counts[i], t);
1931 }
1932 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1933 {
1934 if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
1935 continue;
1936 if (!rect_count_seen)
1937 {
1938 t = counts[i];
1939 rect_count_seen = true;
1940 }
1941 else
1942 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1943 expand_omp_build_assign (gsi, fd->loop.n2, t);
1944 }
1945 }
1946 if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
1947 {
1948 gcc_assert (fd->last_nonrect != -1);
1949
1950 counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
1951 expand_omp_build_assign (gsi, counts[fd->last_nonrect],
1952 build_zero_cst (type));
1953 for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
1954 if (fd->loops[i].m1
1955 || fd->loops[i].m2
1956 || fd->loops[i].non_rect_referenced)
1957 break;
1958 if (i == fd->last_nonrect
1959 && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
1960 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
1961 {
1962 int o = fd->first_nonrect;
1963 tree itype = TREE_TYPE (fd->loops[o].v);
1964 tree n1o = create_tmp_reg (itype, ".n1o");
1965 t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
1966 expand_omp_build_assign (gsi, n1o, t);
1967 tree n2o = create_tmp_reg (itype, ".n2o");
1968 t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
1969 expand_omp_build_assign (gsi, n2o, t);
1970 if (fd->loops[i].m1 && fd->loops[i].m2)
1971 t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
1972 unshare_expr (fd->loops[i].m1));
1973 else if (fd->loops[i].m1)
1974 t = fold_unary (NEGATE_EXPR, itype,
1975 unshare_expr (fd->loops[i].m1));
1976 else
1977 t = unshare_expr (fd->loops[i].m2);
1978 tree m2minusm1
1979 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1980 true, GSI_SAME_STMT);
1981
1982 gimple_stmt_iterator gsi2 = *gsi;
1983 gsi_prev (&gsi2);
1984 e = split_block (entry_bb, gsi_stmt (gsi2));
1985 e = split_block (e->dest, (gimple *) NULL);
1986 basic_block bb1 = e->src;
1987 entry_bb = e->dest;
1988 *gsi = gsi_after_labels (entry_bb);
1989
1990 gsi2 = gsi_after_labels (bb1);
1991 tree ostep = fold_convert (itype, fd->loops[o].step);
1992 t = build_int_cst (itype, (fd->loops[o].cond_code
1993 == LT_EXPR ? -1 : 1));
1994 t = fold_build2 (PLUS_EXPR, itype, ostep, t);
1995 t = fold_build2 (PLUS_EXPR, itype, t, n2o);
1996 t = fold_build2 (MINUS_EXPR, itype, t, n1o);
1997 if (TYPE_UNSIGNED (itype)
1998 && fd->loops[o].cond_code == GT_EXPR)
1999 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2000 fold_build1 (NEGATE_EXPR, itype, t),
2001 fold_build1 (NEGATE_EXPR, itype, ostep));
2002 else
2003 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
2004 tree outer_niters
2005 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2006 true, GSI_SAME_STMT);
2007 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2008 build_one_cst (itype));
2009 t = fold_build2 (MULT_EXPR, itype, t, ostep);
2010 t = fold_build2 (PLUS_EXPR, itype, n1o, t);
2011 tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2012 true, GSI_SAME_STMT);
2013 tree n1, n2, n1e, n2e;
2014 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2015 if (fd->loops[i].m1)
2016 {
2017 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2018 n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
2019 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2020 }
2021 else
2022 n1 = t;
2023 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2024 true, GSI_SAME_STMT);
2025 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2026 if (fd->loops[i].m2)
2027 {
2028 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2029 n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
2030 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2031 }
2032 else
2033 n2 = t;
2034 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2035 true, GSI_SAME_STMT);
2036 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2037 if (fd->loops[i].m1)
2038 {
2039 n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2040 n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
2041 n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
2042 }
2043 else
2044 n1e = t;
2045 n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
2046 true, GSI_SAME_STMT);
2047 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2048 if (fd->loops[i].m2)
2049 {
2050 n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2051 n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
2052 n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
2053 }
2054 else
2055 n2e = t;
2056 n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
2057 true, GSI_SAME_STMT);
2058 gcond *cond_stmt
2059 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2060 NULL_TREE, NULL_TREE);
2061 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2062 e = split_block (bb1, cond_stmt);
2063 e->flags = EDGE_TRUE_VALUE;
2064 e->probability = profile_probability::likely ().guessed ();
2065 basic_block bb2 = e->dest;
2066 gsi2 = gsi_after_labels (bb2);
2067
2068 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2069 NULL_TREE, NULL_TREE);
2070 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2071 e = split_block (bb2, cond_stmt);
2072 e->flags = EDGE_TRUE_VALUE;
2073 e->probability = profile_probability::likely ().guessed ();
2074 gsi2 = gsi_after_labels (e->dest);
2075
2076 tree step = fold_convert (itype, fd->loops[i].step);
2077 t = build_int_cst (itype, (fd->loops[i].cond_code
2078 == LT_EXPR ? -1 : 1));
2079 t = fold_build2 (PLUS_EXPR, itype, step, t);
2080 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2081 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2082 if (TYPE_UNSIGNED (itype)
2083 && fd->loops[i].cond_code == GT_EXPR)
2084 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2085 fold_build1 (NEGATE_EXPR, itype, t),
2086 fold_build1 (NEGATE_EXPR, itype, step));
2087 else
2088 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2089 tree first_inner_iterations
2090 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2091 true, GSI_SAME_STMT);
2092 t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
2093 if (TYPE_UNSIGNED (itype)
2094 && fd->loops[i].cond_code == GT_EXPR)
2095 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2096 fold_build1 (NEGATE_EXPR, itype, t),
2097 fold_build1 (NEGATE_EXPR, itype, step));
2098 else
2099 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2100 tree factor
2101 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2102 true, GSI_SAME_STMT);
2103 t = fold_build2 (MINUS_EXPR, itype, outer_niters,
2104 build_one_cst (itype));
2105 t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
2106 t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
2107 t = fold_build2 (MULT_EXPR, itype, factor, t);
2108 t = fold_build2 (PLUS_EXPR, itype,
2109 fold_build2 (MULT_EXPR, itype, outer_niters,
2110 first_inner_iterations), t);
2111 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
2112 fold_convert (type, t));
2113
2114 basic_block bb3 = create_empty_bb (bb1);
2115 add_bb_to_loop (bb3, bb1->loop_father);
2116
2117 e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
2118 e->probability = profile_probability::unlikely ().guessed ();
2119
2120 gsi2 = gsi_after_labels (bb3);
2121 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
2122 NULL_TREE, NULL_TREE);
2123 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2124 e = split_block (bb3, cond_stmt);
2125 e->flags = EDGE_TRUE_VALUE;
2126 e->probability = profile_probability::likely ().guessed ();
2127 basic_block bb4 = e->dest;
2128
2129 ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
2130 ne->probability = e->probability.invert ();
2131
2132 basic_block bb5 = create_empty_bb (bb2);
2133 add_bb_to_loop (bb5, bb2->loop_father);
2134
2135 ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
2136 ne->probability = profile_probability::unlikely ().guessed ();
2137
2138 for (int j = 0; j < 2; j++)
2139 {
2140 gsi2 = gsi_after_labels (j ? bb5 : bb4);
2141 t = fold_build2 (MINUS_EXPR, itype,
2142 unshare_expr (fd->loops[i].n1),
2143 unshare_expr (fd->loops[i].n2));
2144 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
2145 tree tem
2146 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2147 true, GSI_SAME_STMT);
2148 t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
2149 t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
2150 t = fold_build2 (MINUS_EXPR, itype, tem, t);
2151 tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2152 true, GSI_SAME_STMT);
2153 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2154 if (fd->loops[i].m1)
2155 {
2156 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2157 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
2158 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2159 }
2160 else
2161 n1 = t;
2162 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2163 true, GSI_SAME_STMT);
2164 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2165 if (fd->loops[i].m2)
2166 {
2167 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2168 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
2169 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2170 }
2171 else
2172 n2 = t;
2173 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2174 true, GSI_SAME_STMT);
2175 expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
2176
2177 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2178 NULL_TREE, NULL_TREE);
2179 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2180 e = split_block (gsi_bb (gsi2), cond_stmt);
2181 e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
2182 e->probability = profile_probability::unlikely ().guessed ();
2183 ne = make_edge (e->src, bb1,
2184 j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
2185 ne->probability = e->probability.invert ();
2186 gsi2 = gsi_after_labels (e->dest);
2187
2188 t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
2189 expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
2190
2191 make_edge (e->dest, bb1, EDGE_FALLTHRU);
2192 }
2193
2194 set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
2195 set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
2196 set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
2197
2198 if (fd->first_nonrect + 1 == fd->last_nonrect)
2199 {
2200 fd->first_inner_iterations = first_inner_iterations;
2201 fd->factor = factor;
2202 fd->adjn1 = n1o;
2203 }
2204 }
2205 else
2206 {
2207 /* Fallback implementation. Evaluate the loops with m1/m2
2208 non-NULL as well as their outer loops at runtime using temporaries
2209 instead of the original iteration variables, and in the
2210 body just bump the counter. */
2211 gimple_stmt_iterator gsi2 = *gsi;
2212 gsi_prev (&gsi2);
2213 e = split_block (entry_bb, gsi_stmt (gsi2));
2214 e = split_block (e->dest, (gimple *) NULL);
2215 basic_block cur_bb = e->src;
2216 basic_block next_bb = e->dest;
2217 entry_bb = e->dest;
2218 *gsi = gsi_after_labels (entry_bb);
2219
2220 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2221 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2222
2223 for (i = 0; i <= fd->last_nonrect; i++)
2224 {
2225 if (fd->loops[i].m1 == NULL_TREE
2226 && fd->loops[i].m2 == NULL_TREE
2227 && !fd->loops[i].non_rect_referenced)
2228 continue;
2229
2230 tree itype = TREE_TYPE (fd->loops[i].v);
2231
2232 gsi2 = gsi_after_labels (cur_bb);
2233 tree n1, n2;
2234 t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
2235 if (fd->loops[i].m1)
2236 {
2237 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
2238 n1 = fold_build2 (MULT_EXPR, itype,
2239 vs[i - fd->loops[i].outer], n1);
2240 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2241 }
2242 else
2243 n1 = t;
2244 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2245 true, GSI_SAME_STMT);
2246 if (i < fd->last_nonrect)
2247 {
2248 vs[i] = create_tmp_reg (itype, ".it");
2249 expand_omp_build_assign (&gsi2, vs[i], n1);
2250 }
2251 t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
2252 if (fd->loops[i].m2)
2253 {
2254 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2255 n2 = fold_build2 (MULT_EXPR, itype,
2256 vs[i - fd->loops[i].outer], n2);
2257 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2258 }
2259 else
2260 n2 = t;
2261 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2262 true, GSI_SAME_STMT);
2263 if (i == fd->last_nonrect)
2264 {
2265 gcond *cond_stmt
2266 = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
2267 NULL_TREE, NULL_TREE);
2268 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2269 e = split_block (cur_bb, cond_stmt);
2270 e->flags = EDGE_TRUE_VALUE;
2271 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2272 e->probability = profile_probability::likely ().guessed ();
2273 ne->probability = e->probability.invert ();
2274 gsi2 = gsi_after_labels (e->dest);
2275
2276 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
2277 ? -1 : 1));
2278 t = fold_build2 (PLUS_EXPR, itype,
2279 fold_convert (itype, fd->loops[i].step), t);
2280 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2281 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2282 tree step = fold_convert (itype, fd->loops[i].step);
2283 if (TYPE_UNSIGNED (itype)
2284 && fd->loops[i].cond_code == GT_EXPR)
2285 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2286 fold_build1 (NEGATE_EXPR, itype, t),
2287 fold_build1 (NEGATE_EXPR, itype, step));
2288 else
2289 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2290 t = fold_convert (type, t);
2291 t = fold_build2 (PLUS_EXPR, type,
2292 counts[fd->last_nonrect], t);
2293 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2294 true, GSI_SAME_STMT);
2295 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
2296 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2297 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2298 break;
2299 }
2300 e = split_block (cur_bb, last_stmt (cur_bb));
2301
2302 basic_block new_cur_bb = create_empty_bb (cur_bb);
2303 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2304
2305 gsi2 = gsi_after_labels (e->dest);
2306 tree step = fold_convert (itype,
2307 unshare_expr (fd->loops[i].step));
2308 t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
2309 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2310 true, GSI_SAME_STMT);
2311 expand_omp_build_assign (&gsi2, vs[i], t);
2312
2313 ne = split_block (e->dest, last_stmt (e->dest));
2314 gsi2 = gsi_after_labels (ne->dest);
2315
2316 gcond *cond_stmt
2317 = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
2318 NULL_TREE, NULL_TREE);
2319 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2320 edge e3, e4;
2321 if (next_bb == entry_bb)
2322 {
2323 e3 = find_edge (ne->dest, next_bb);
2324 e3->flags = EDGE_FALSE_VALUE;
2325 }
2326 else
2327 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2328 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2329 e4->probability = profile_probability::likely ().guessed ();
2330 e3->probability = e4->probability.invert ();
2331 basic_block esrc = e->src;
2332 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2333 cur_bb = new_cur_bb;
2334 basic_block latch_bb = next_bb;
2335 next_bb = e->dest;
2336 remove_edge (e);
2337 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2338 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2339 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2340 }
2341 }
2342 t = NULL_TREE;
2343 for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
2344 if (!fd->loops[i].non_rect_referenced
2345 && fd->loops[i].m1 == NULL_TREE
2346 && fd->loops[i].m2 == NULL_TREE)
2347 {
2348 if (t == NULL_TREE)
2349 t = counts[i];
2350 else
2351 t = fold_build2 (MULT_EXPR, type, t, counts[i]);
2352 }
2353 if (t)
2354 {
2355 t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
2356 expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
2357 }
2358 if (!rect_count_seen)
2359 t = counts[fd->last_nonrect];
2360 else
2361 t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
2362 counts[fd->last_nonrect]);
2363 expand_omp_build_assign (gsi, fd->loop.n2, t);
2364 }
2365 else if (fd->non_rect)
2366 {
2367 tree t = fd->loop.n2;
2368 gcc_assert (TREE_CODE (t) == INTEGER_CST);
2369 int non_rect_referenced = 0, non_rect = 0;
2370 for (i = 0; i < fd->collapse; i++)
2371 {
2372 if ((i < fd->first_nonrect || i > fd->last_nonrect)
2373 && !integer_zerop (counts[i]))
2374 t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
2375 if (fd->loops[i].non_rect_referenced)
2376 non_rect_referenced++;
2377 if (fd->loops[i].m1 || fd->loops[i].m2)
2378 non_rect++;
2379 }
2380 gcc_assert (non_rect == 1 && non_rect_referenced == 1);
2381 counts[fd->last_nonrect] = t;
2382 }
2383 }
2384
2385 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
2386 T = V;
2387 V3 = N31 + (T % count3) * STEP3;
2388 T = T / count3;
2389 V2 = N21 + (T % count2) * STEP2;
2390 T = T / count2;
2391 V1 = N11 + T * STEP1;
2392 if this loop doesn't have an inner loop construct combined with it.
2393 If it does have an inner loop construct combined with it and the
2394 iteration count isn't known constant, store values from counts array
2395 into its _looptemp_ temporaries instead.
2396 For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
2397 inclusive), use the count of all those loops together, and either
2398 find quadratic etc. equation roots, or as a fallback, do:
2399 COUNT = 0;
2400 for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
2401 for (tmpj = M21 * tmpi + N21;
2402 tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
2403 {
2404 int tmpk1 = M31 * tmpj + N31;
2405 int tmpk2 = M32 * tmpj + N32;
2406 if (tmpk1 COND3 tmpk2)
2407 {
2408 if (COND3 is <)
2409 adj = STEP3 - 1;
2410 else
2411 adj = STEP3 + 1;
2412 int temp = (adj + tmpk2 - tmpk1) / STEP3;
2413 if (COUNT + temp > T)
2414 {
2415 V1 = tmpi;
2416 V2 = tmpj;
2417 V3 = tmpk1 + (T - COUNT) * STEP3;
2418 goto done;
2419 }
2420 else
2421 COUNT += temp;
2422 }
2423 }
2424 done:;
2425 but for optional innermost or outermost rectangular loops that aren't
2426 referenced by other loop expressions keep doing the division/modulo. */
2427
2428 static void
2429 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
2430 tree *counts, tree *nonrect_bounds,
2431 gimple *inner_stmt, tree startvar)
2432 {
2433 int i;
2434 if (gimple_omp_for_combined_p (fd->for_stmt))
2435 {
2436 /* If fd->loop.n2 is constant, then no propagation of the counts
2437 is needed, they are constant. */
2438 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
2439 return;
2440
2441 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
2442 ? gimple_omp_taskreg_clauses (inner_stmt)
2443 : gimple_omp_for_clauses (inner_stmt);
2444 /* First two _looptemp_ clauses are for istart/iend, counts[0]
2445 isn't supposed to be handled, as the inner loop doesn't
2446 use it. */
2447 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
2448 gcc_assert (innerc);
2449 int count = 0;
2450 if (fd->non_rect
2451 && fd->last_nonrect == fd->first_nonrect + 1
2452 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
2453 count = 4;
2454 for (i = 0; i < fd->collapse + count; i++)
2455 {
2456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2457 OMP_CLAUSE__LOOPTEMP_);
2458 gcc_assert (innerc);
2459 if (i)
2460 {
2461 tree tem = OMP_CLAUSE_DECL (innerc);
2462 tree t;
2463 if (i < fd->collapse)
2464 t = counts[i];
2465 else
2466 switch (i - fd->collapse)
2467 {
2468 case 0: t = counts[0]; break;
2469 case 1: t = fd->first_inner_iterations; break;
2470 case 2: t = fd->factor; break;
2471 case 3: t = fd->adjn1; break;
2472 default: gcc_unreachable ();
2473 }
2474 t = fold_convert (TREE_TYPE (tem), t);
2475 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2476 false, GSI_CONTINUE_LINKING);
2477 gassign *stmt = gimple_build_assign (tem, t);
2478 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2479 }
2480 }
2481 return;
2482 }
2483
2484 tree type = TREE_TYPE (fd->loop.v);
2485 tree tem = create_tmp_reg (type, ".tem");
2486 gassign *stmt = gimple_build_assign (tem, startvar);
2487 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2488
2489 for (i = fd->collapse - 1; i >= 0; i--)
2490 {
2491 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
2492 itype = vtype;
2493 if (POINTER_TYPE_P (vtype))
2494 itype = signed_type_for (vtype);
2495 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2496 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
2497 else
2498 t = tem;
2499 if (i == fd->last_nonrect)
2500 {
2501 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
2502 false, GSI_CONTINUE_LINKING);
2503 tree stopval = t;
2504 tree idx = create_tmp_reg (type, ".count");
2505 expand_omp_build_assign (gsi, idx,
2506 build_zero_cst (type), true);
2507 basic_block bb_triang = NULL, bb_triang_dom = NULL;
2508 if (fd->first_nonrect + 1 == fd->last_nonrect
2509 && (TREE_CODE (fd->loop.n2) == INTEGER_CST
2510 || fd->first_inner_iterations)
2511 && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
2512 != CODE_FOR_nothing)
2513 && !integer_zerop (fd->loop.n2))
2514 {
2515 tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
2516 tree itype = TREE_TYPE (fd->loops[i].v);
2517 tree first_inner_iterations = fd->first_inner_iterations;
2518 tree factor = fd->factor;
2519 gcond *cond_stmt
2520 = gimple_build_cond (NE_EXPR, factor,
2521 build_zero_cst (TREE_TYPE (factor)),
2522 NULL_TREE, NULL_TREE);
2523 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2524 edge e = split_block (gsi_bb (*gsi), cond_stmt);
2525 basic_block bb0 = e->src;
2526 e->flags = EDGE_TRUE_VALUE;
2527 e->probability = profile_probability::likely ();
2528 bb_triang_dom = bb0;
2529 *gsi = gsi_after_labels (e->dest);
2530 tree slltype = long_long_integer_type_node;
2531 tree ulltype = long_long_unsigned_type_node;
2532 tree stopvalull = fold_convert (ulltype, stopval);
2533 stopvalull
2534 = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
2535 false, GSI_CONTINUE_LINKING);
2536 first_inner_iterations
2537 = fold_convert (slltype, first_inner_iterations);
2538 first_inner_iterations
2539 = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
2540 NULL_TREE, false,
2541 GSI_CONTINUE_LINKING);
2542 factor = fold_convert (slltype, factor);
2543 factor
2544 = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
2545 false, GSI_CONTINUE_LINKING);
2546 tree first_inner_iterationsd
2547 = fold_build1 (FLOAT_EXPR, double_type_node,
2548 first_inner_iterations);
2549 first_inner_iterationsd
2550 = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
2551 NULL_TREE, false,
2552 GSI_CONTINUE_LINKING);
2553 tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
2554 factor);
2555 factord = force_gimple_operand_gsi (gsi, factord, true,
2556 NULL_TREE, false,
2557 GSI_CONTINUE_LINKING);
2558 tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
2559 stopvalull);
2560 stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
2561 NULL_TREE, false,
2562 GSI_CONTINUE_LINKING);
2563 /* Temporarily disable flag_rounding_math, values will be
2564 decimal numbers divided by 2 and worst case imprecisions
2565 due to too large values ought to be caught later by the
2566 checks for fallback. */
2567 int save_flag_rounding_math = flag_rounding_math;
2568 flag_rounding_math = 0;
2569 t = fold_build2 (RDIV_EXPR, double_type_node, factord,
2570 build_real (double_type_node, dconst2));
2571 tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
2572 first_inner_iterationsd, t);
2573 t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
2574 GSI_CONTINUE_LINKING);
2575 t = fold_build2 (MULT_EXPR, double_type_node, factord,
2576 build_real (double_type_node, dconst2));
2577 t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
2578 t = fold_build2 (PLUS_EXPR, double_type_node, t,
2579 fold_build2 (MULT_EXPR, double_type_node,
2580 t3, t3));
2581 flag_rounding_math = save_flag_rounding_math;
2582 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2583 GSI_CONTINUE_LINKING);
2584 if (flag_exceptions
2585 && cfun->can_throw_non_call_exceptions
2586 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
2587 {
2588 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
2589 build_zero_cst (double_type_node));
2590 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
2591 false, GSI_CONTINUE_LINKING);
2592 cond_stmt = gimple_build_cond (NE_EXPR, tem,
2593 boolean_false_node,
2594 NULL_TREE, NULL_TREE);
2595 }
2596 else
2597 cond_stmt
2598 = gimple_build_cond (LT_EXPR, t,
2599 build_zero_cst (double_type_node),
2600 NULL_TREE, NULL_TREE);
2601 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2602 e = split_block (gsi_bb (*gsi), cond_stmt);
2603 basic_block bb1 = e->src;
2604 e->flags = EDGE_FALSE_VALUE;
2605 e->probability = profile_probability::very_likely ();
2606 *gsi = gsi_after_labels (e->dest);
2607 gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
2608 tree sqrtr = create_tmp_var (double_type_node);
2609 gimple_call_set_lhs (call, sqrtr);
2610 gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
2611 t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
2612 t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
2613 t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
2614 tree c = create_tmp_var (ulltype);
2615 tree d = create_tmp_var (ulltype);
2616 expand_omp_build_assign (gsi, c, t, true);
2617 t = fold_build2 (MINUS_EXPR, ulltype, c,
2618 build_one_cst (ulltype));
2619 t = fold_build2 (MULT_EXPR, ulltype, c, t);
2620 t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
2621 t = fold_build2 (MULT_EXPR, ulltype,
2622 fold_convert (ulltype, fd->factor), t);
2623 tree t2
2624 = fold_build2 (MULT_EXPR, ulltype, c,
2625 fold_convert (ulltype,
2626 fd->first_inner_iterations));
2627 t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
2628 expand_omp_build_assign (gsi, d, t, true);
2629 t = fold_build2 (MULT_EXPR, ulltype,
2630 fold_convert (ulltype, fd->factor), c);
2631 t = fold_build2 (PLUS_EXPR, ulltype,
2632 t, fold_convert (ulltype,
2633 fd->first_inner_iterations));
2634 t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2635 GSI_CONTINUE_LINKING);
2636 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
2637 NULL_TREE, NULL_TREE);
2638 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2639 e = split_block (gsi_bb (*gsi), cond_stmt);
2640 basic_block bb2 = e->src;
2641 e->flags = EDGE_TRUE_VALUE;
2642 e->probability = profile_probability::very_likely ();
2643 *gsi = gsi_after_labels (e->dest);
2644 t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
2645 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2646 GSI_CONTINUE_LINKING);
2647 cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
2648 NULL_TREE, NULL_TREE);
2649 gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
2650 e = split_block (gsi_bb (*gsi), cond_stmt);
2651 basic_block bb3 = e->src;
2652 e->flags = EDGE_FALSE_VALUE;
2653 e->probability = profile_probability::very_likely ();
2654 *gsi = gsi_after_labels (e->dest);
2655 t = fold_convert (itype, c);
2656 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
2657 t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
2658 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
2659 GSI_CONTINUE_LINKING);
2660 expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
2661 t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
2662 t2 = fold_convert (itype, t2);
2663 t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
2664 t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
2665 if (fd->loops[i].m1)
2666 {
2667 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
2668 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
2669 }
2670 expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
2671 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2672 bb_triang = e->src;
2673 *gsi = gsi_after_labels (e->dest);
2674 remove_edge (e);
2675 e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2676 e->probability = profile_probability::very_unlikely ();
2677 e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
2678 e->probability = profile_probability::very_unlikely ();
2679 e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
2680 e->probability = profile_probability::very_unlikely ();
2681
2682 basic_block bb4 = create_empty_bb (bb0);
2683 add_bb_to_loop (bb4, bb0->loop_father);
2684 e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
2685 e->probability = profile_probability::unlikely ();
2686 make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
2687 set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
2688 set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
2689 gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
2690 t2 = fold_build2 (TRUNC_DIV_EXPR, type,
2691 counts[i], counts[i - 1]);
2692 t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
2693 GSI_CONTINUE_LINKING);
2694 t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
2695 t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
2696 t = fold_convert (itype, t);
2697 t2 = fold_convert (itype, t2);
2698 t = fold_build2 (MULT_EXPR, itype, t,
2699 fold_convert (itype, fd->loops[i].step));
2700 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2701 t2 = fold_build2 (MULT_EXPR, itype, t2,
2702 fold_convert (itype, fd->loops[i - 1].step));
2703 t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
2704 t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
2705 false, GSI_CONTINUE_LINKING);
2706 stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
2707 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2708 if (fd->loops[i].m1)
2709 {
2710 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
2711 fd->loops[i - 1].v);
2712 t = fold_build2 (PLUS_EXPR, itype, t, t2);
2713 }
2714 t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
2715 false, GSI_CONTINUE_LINKING);
2716 stmt = gimple_build_assign (fd->loops[i].v, t);
2717 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
2718 }
2719 /* Fallback implementation. Evaluate the loops in between
2720 (inclusive) fd->first_nonrect and fd->last_nonrect at
2721 runtime unsing temporaries instead of the original iteration
2722 variables, in the body just bump the counter and compare
2723 with the desired value. */
2724 gimple_stmt_iterator gsi2 = *gsi;
2725 basic_block entry_bb = gsi_bb (gsi2);
2726 edge e = split_block (entry_bb, gsi_stmt (gsi2));
2727 e = split_block (e->dest, (gimple *) NULL);
2728 basic_block dom_bb = NULL;
2729 basic_block cur_bb = e->src;
2730 basic_block next_bb = e->dest;
2731 entry_bb = e->dest;
2732 *gsi = gsi_after_labels (entry_bb);
2733
2734 tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
2735 tree n1 = NULL_TREE, n2 = NULL_TREE;
2736 memset (vs, 0, fd->last_nonrect * sizeof (tree));
2737
2738 for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
2739 {
2740 tree itype = TREE_TYPE (fd->loops[j].v);
2741 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2742 && fd->loops[j].m2 == NULL_TREE
2743 && !fd->loops[j].non_rect_referenced);
2744 gsi2 = gsi_after_labels (cur_bb);
2745 t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
2746 if (fd->loops[j].m1)
2747 {
2748 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
2749 n1 = fold_build2 (MULT_EXPR, itype,
2750 vs[j - fd->loops[j].outer], n1);
2751 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
2752 }
2753 else if (rect_p)
2754 n1 = build_zero_cst (type);
2755 else
2756 n1 = t;
2757 n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
2758 true, GSI_SAME_STMT);
2759 if (j < fd->last_nonrect)
2760 {
2761 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
2762 expand_omp_build_assign (&gsi2, vs[j], n1);
2763 }
2764 t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
2765 if (fd->loops[j].m2)
2766 {
2767 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
2768 n2 = fold_build2 (MULT_EXPR, itype,
2769 vs[j - fd->loops[j].outer], n2);
2770 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
2771 }
2772 else if (rect_p)
2773 n2 = counts[j];
2774 else
2775 n2 = t;
2776 n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
2777 true, GSI_SAME_STMT);
2778 if (j == fd->last_nonrect)
2779 {
2780 gcond *cond_stmt
2781 = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
2782 NULL_TREE, NULL_TREE);
2783 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2784 e = split_block (cur_bb, cond_stmt);
2785 e->flags = EDGE_TRUE_VALUE;
2786 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
2787 e->probability = profile_probability::likely ().guessed ();
2788 ne->probability = e->probability.invert ();
2789 gsi2 = gsi_after_labels (e->dest);
2790
2791 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
2792 ? -1 : 1));
2793 t = fold_build2 (PLUS_EXPR, itype,
2794 fold_convert (itype, fd->loops[j].step), t);
2795 t = fold_build2 (PLUS_EXPR, itype, t, n2);
2796 t = fold_build2 (MINUS_EXPR, itype, t, n1);
2797 tree step = fold_convert (itype, fd->loops[j].step);
2798 if (TYPE_UNSIGNED (itype)
2799 && fd->loops[j].cond_code == GT_EXPR)
2800 t = fold_build2 (TRUNC_DIV_EXPR, itype,
2801 fold_build1 (NEGATE_EXPR, itype, t),
2802 fold_build1 (NEGATE_EXPR, itype, step));
2803 else
2804 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
2805 t = fold_convert (type, t);
2806 t = fold_build2 (PLUS_EXPR, type, idx, t);
2807 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2808 true, GSI_SAME_STMT);
2809 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
2810 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
2811 cond_stmt
2812 = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
2813 NULL_TREE);
2814 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2815 e = split_block (gsi_bb (gsi2), cond_stmt);
2816 e->flags = EDGE_TRUE_VALUE;
2817 e->probability = profile_probability::likely ().guessed ();
2818 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
2819 ne->probability = e->probability.invert ();
2820 gsi2 = gsi_after_labels (e->dest);
2821 expand_omp_build_assign (&gsi2, idx, t);
2822 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
2823 break;
2824 }
2825 e = split_block (cur_bb, last_stmt (cur_bb));
2826
2827 basic_block new_cur_bb = create_empty_bb (cur_bb);
2828 add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
2829
2830 gsi2 = gsi_after_labels (e->dest);
2831 if (rect_p)
2832 t = fold_build2 (PLUS_EXPR, type, vs[j],
2833 build_one_cst (type));
2834 else
2835 {
2836 tree step
2837 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2838 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
2839 }
2840 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2841 true, GSI_SAME_STMT);
2842 expand_omp_build_assign (&gsi2, vs[j], t);
2843
2844 edge ne = split_block (e->dest, last_stmt (e->dest));
2845 gsi2 = gsi_after_labels (ne->dest);
2846
2847 gcond *cond_stmt;
2848 if (next_bb == entry_bb)
2849 /* No need to actually check the outermost condition. */
2850 cond_stmt
2851 = gimple_build_cond (EQ_EXPR, boolean_true_node,
2852 boolean_true_node,
2853 NULL_TREE, NULL_TREE);
2854 else
2855 cond_stmt
2856 = gimple_build_cond (rect_p ? LT_EXPR
2857 : fd->loops[j].cond_code,
2858 vs[j], n2, NULL_TREE, NULL_TREE);
2859 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
2860 edge e3, e4;
2861 if (next_bb == entry_bb)
2862 {
2863 e3 = find_edge (ne->dest, next_bb);
2864 e3->flags = EDGE_FALSE_VALUE;
2865 dom_bb = ne->dest;
2866 }
2867 else
2868 e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
2869 e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
2870 e4->probability = profile_probability::likely ().guessed ();
2871 e3->probability = e4->probability.invert ();
2872 basic_block esrc = e->src;
2873 make_edge (e->src, ne->dest, EDGE_FALLTHRU);
2874 cur_bb = new_cur_bb;
2875 basic_block latch_bb = next_bb;
2876 next_bb = e->dest;
2877 remove_edge (e);
2878 set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
2879 set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
2880 set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
2881 }
2882 for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
2883 {
2884 tree itype = TREE_TYPE (fd->loops[j].v);
2885 bool rect_p = (fd->loops[j].m1 == NULL_TREE
2886 && fd->loops[j].m2 == NULL_TREE
2887 && !fd->loops[j].non_rect_referenced);
2888 if (j == fd->last_nonrect)
2889 {
2890 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
2891 t = fold_convert (itype, t);
2892 tree t2
2893 = fold_convert (itype, unshare_expr (fd->loops[j].step));
2894 t = fold_build2 (MULT_EXPR, itype, t, t2);
2895 t = fold_build2 (PLUS_EXPR, itype, n1, t);
2896 }
2897 else if (rect_p)
2898 {
2899 t = fold_convert (itype, vs[j]);
2900 t = fold_build2 (MULT_EXPR, itype, t,
2901 fold_convert (itype, fd->loops[j].step));
2902 if (POINTER_TYPE_P (vtype))
2903 t = fold_build_pointer_plus (fd->loops[j].n1, t);
2904 else
2905 t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
2906 }
2907 else
2908 t = vs[j];
2909 t = force_gimple_operand_gsi (gsi, t, false,
2910 NULL_TREE, true,
2911 GSI_SAME_STMT);
2912 stmt = gimple_build_assign (fd->loops[j].v, t);
2913 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
2914 }
2915 if (gsi_end_p (*gsi))
2916 *gsi = gsi_last_bb (gsi_bb (*gsi));
2917 else
2918 gsi_prev (gsi);
2919 if (bb_triang)
2920 {
2921 e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
2922 make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
2923 *gsi = gsi_after_labels (e->dest);
2924 if (!gsi_end_p (*gsi))
2925 gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
2926 set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
2927 }
2928 }
2929 else
2930 {
2931 t = fold_convert (itype, t);
2932 t = fold_build2 (MULT_EXPR, itype, t,
2933 fold_convert (itype, fd->loops[i].step));
2934 if (POINTER_TYPE_P (vtype))
2935 t = fold_build_pointer_plus (fd->loops[i].n1, t);
2936 else
2937 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
2938 t = force_gimple_operand_gsi (gsi, t,
2939 DECL_P (fd->loops[i].v)
2940 && TREE_ADDRESSABLE (fd->loops[i].v),
2941 NULL_TREE, false,
2942 GSI_CONTINUE_LINKING);
2943 stmt = gimple_build_assign (fd->loops[i].v, t);
2944 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2945 }
2946 if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
2947 {
2948 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
2949 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
2950 false, GSI_CONTINUE_LINKING);
2951 stmt = gimple_build_assign (tem, t);
2952 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2953 }
2954 if (i == fd->last_nonrect)
2955 i = fd->first_nonrect;
2956 }
2957 if (fd->non_rect)
2958 for (i = 0; i <= fd->last_nonrect; i++)
2959 if (fd->loops[i].m2)
2960 {
2961 tree itype = TREE_TYPE (fd->loops[i].v);
2962
2963 tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
2964 t = fold_build2 (MULT_EXPR, itype,
2965 fd->loops[i - fd->loops[i].outer].v, t);
2966 t = fold_build2 (PLUS_EXPR, itype, t,
2967 fold_convert (itype,
2968 unshare_expr (fd->loops[i].n2)));
2969 nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
2970 t = force_gimple_operand_gsi (gsi, t, false,
2971 NULL_TREE, false,
2972 GSI_CONTINUE_LINKING);
2973 stmt = gimple_build_assign (nonrect_bounds[i], t);
2974 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
2975 }
2976 }
2977
2978 /* Helper function for expand_omp_for_*. Generate code like:
2979 L10:
2980 V3 += STEP3;
2981 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2982 L11:
2983 V3 = N31;
2984 V2 += STEP2;
2985 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2986 L12:
2987 V2 = N21;
2988 V1 += STEP1;
2989 goto BODY_BB;
2990 For non-rectangular loops, use temporaries stored in nonrect_bounds
2991 for the upper bounds if M?2 multiplier is present. Given e.g.
2992 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2993 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2994 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2995 for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
2996 do:
2997 L10:
2998 V4 += STEP4;
2999 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
3000 L11:
3001 V4 = N41 + M41 * V2; // This can be left out if the loop
3002 // refers to the immediate parent loop
3003 V3 += STEP3;
3004 if (V3 cond3 N32) goto BODY_BB; else goto L12;
3005 L12:
3006 V3 = N31;
3007 V2 += STEP2;
3008 if (V2 cond2 N22) goto L120; else goto L13;
3009 L120:
3010 V4 = N41 + M41 * V2;
3011 NONRECT_BOUND4 = N42 + M42 * V2;
3012 if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
3013 L13:
3014 V2 = N21;
3015 V1 += STEP1;
3016 goto L120; */
3017
3018 static basic_block
3019 extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
3020 basic_block cont_bb, basic_block body_bb)
3021 {
3022 basic_block last_bb, bb, collapse_bb = NULL;
3023 int i;
3024 gimple_stmt_iterator gsi;
3025 edge e;
3026 tree t;
3027 gimple *stmt;
3028
3029 last_bb = cont_bb;
3030 for (i = fd->collapse - 1; i >= 0; i--)
3031 {
3032 tree vtype = TREE_TYPE (fd->loops[i].v);
3033
3034 bb = create_empty_bb (last_bb);
3035 add_bb_to_loop (bb, last_bb->loop_father);
3036 gsi = gsi_start_bb (bb);
3037
3038 if (i < fd->collapse - 1)
3039 {
3040 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
3041 e->probability
3042 = profile_probability::guessed_always ().apply_scale (1, 8);
3043
3044 struct omp_for_data_loop *l = &fd->loops[i + 1];
3045 if (l->m1 == NULL_TREE || l->outer != 1)
3046 {
3047 t = l->n1;
3048 if (l->m1)
3049 {
3050 tree t2
3051 = fold_build2 (MULT_EXPR, TREE_TYPE (t),
3052 fd->loops[i + 1 - l->outer].v, l->m1);
3053 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
3054 }
3055 t = force_gimple_operand_gsi (&gsi, t,
3056 DECL_P (l->v)
3057 && TREE_ADDRESSABLE (l->v),
3058 NULL_TREE, false,
3059 GSI_CONTINUE_LINKING);
3060 stmt = gimple_build_assign (l->v, t);
3061 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3062 }
3063 }
3064 else
3065 collapse_bb = bb;
3066
3067 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3068
3069 if (POINTER_TYPE_P (vtype))
3070 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
3071 else
3072 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
3073 t = force_gimple_operand_gsi (&gsi, t,
3074 DECL_P (fd->loops[i].v)
3075 && TREE_ADDRESSABLE (fd->loops[i].v),
3076 NULL_TREE, false, GSI_CONTINUE_LINKING);
3077 stmt = gimple_build_assign (fd->loops[i].v, t);
3078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3079
3080 if (fd->loops[i].non_rect_referenced)
3081 {
3082 basic_block update_bb = NULL, prev_bb = NULL;
3083 for (int j = i + 1; j <= fd->last_nonrect; j++)
3084 if (j - fd->loops[j].outer == i)
3085 {
3086 tree n1, n2;
3087 struct omp_for_data_loop *l = &fd->loops[j];
3088 basic_block this_bb = create_empty_bb (last_bb);
3089 add_bb_to_loop (this_bb, last_bb->loop_father);
3090 gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
3091 if (prev_bb)
3092 {
3093 e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
3094 e->probability
3095 = profile_probability::guessed_always ().apply_scale (7,
3096 8);
3097 set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
3098 }
3099 if (l->m1)
3100 {
3101 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
3102 fd->loops[i].v);
3103 t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
3104 n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3105 false,
3106 GSI_CONTINUE_LINKING);
3107 stmt = gimple_build_assign (l->v, n1);
3108 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3109 n1 = l->v;
3110 }
3111 else
3112 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
3113 NULL_TREE, false,
3114 GSI_CONTINUE_LINKING);
3115 if (l->m2)
3116 {
3117 t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
3118 fd->loops[i].v);
3119 t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
3120 t, unshare_expr (l->n2));
3121 n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3122 false,
3123 GSI_CONTINUE_LINKING);
3124 stmt = gimple_build_assign (nonrect_bounds[j], n2);
3125 gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
3126 n2 = nonrect_bounds[j];
3127 }
3128 else
3129 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
3130 true, NULL_TREE, false,
3131 GSI_CONTINUE_LINKING);
3132 gcond *cond_stmt
3133 = gimple_build_cond (l->cond_code, n1, n2,
3134 NULL_TREE, NULL_TREE);
3135 gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
3136 if (update_bb == NULL)
3137 update_bb = this_bb;
3138 e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
3139 e->probability
3140 = profile_probability::guessed_always ().apply_scale (1, 8);
3141 if (prev_bb == NULL)
3142 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
3143 prev_bb = this_bb;
3144 }
3145 e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
3146 e->probability
3147 = profile_probability::guessed_always ().apply_scale (7, 8);
3148 body_bb = update_bb;
3149 }
3150
3151 if (i > 0)
3152 {
3153 if (fd->loops[i].m2)
3154 t = nonrect_bounds[i];
3155 else
3156 t = unshare_expr (fd->loops[i].n2);
3157 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3158 false, GSI_CONTINUE_LINKING);
3159 tree v = fd->loops[i].v;
3160 if (DECL_P (v) && TREE_ADDRESSABLE (v))
3161 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
3162 false, GSI_CONTINUE_LINKING);
3163 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
3164 stmt = gimple_build_cond_empty (t);
3165 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
3166 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
3167 expand_omp_regimplify_p, NULL, NULL)
3168 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
3169 expand_omp_regimplify_p, NULL, NULL))
3170 gimple_regimplify_operands (stmt, &gsi);
3171 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
3172 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3173 }
3174 else
3175 make_edge (bb, body_bb, EDGE_FALLTHRU);
3176 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
3177 last_bb = bb;
3178 }
3179
3180 return collapse_bb;
3181 }
3182
3183 /* Expand #pragma omp ordered depend(source). */
3184
3185 static void
3186 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3187 tree *counts, location_t loc)
3188 {
3189 enum built_in_function source_ix
3190 = fd->iter_type == long_integer_type_node
3191 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
3192 gimple *g
3193 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
3194 build_fold_addr_expr (counts[fd->ordered]));
3195 gimple_set_location (g, loc);
3196 gsi_insert_before (gsi, g, GSI_SAME_STMT);
3197 }
3198
3199 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
3200
3201 static void
3202 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
3203 tree *counts, tree c, location_t loc)
3204 {
3205 auto_vec<tree, 10> args;
3206 enum built_in_function sink_ix
3207 = fd->iter_type == long_integer_type_node
3208 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
3209 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
3210 int i;
3211 gimple_stmt_iterator gsi2 = *gsi;
3212 bool warned_step = false;
3213
3214 for (i = 0; i < fd->ordered; i++)
3215 {
3216 tree step = NULL_TREE;
3217 off = TREE_PURPOSE (deps);
3218 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3219 {
3220 step = TREE_OPERAND (off, 1);
3221 off = TREE_OPERAND (off, 0);
3222 }
3223 if (!integer_zerop (off))
3224 {
3225 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3226 || fd->loops[i].cond_code == GT_EXPR);
3227 bool forward = fd->loops[i].cond_code == LT_EXPR;
3228 if (step)
3229 {
3230 /* Non-simple Fortran DO loops. If step is variable,
3231 we don't know at compile even the direction, so can't
3232 warn. */
3233 if (TREE_CODE (step) != INTEGER_CST)
3234 break;
3235 forward = tree_int_cst_sgn (step) != -1;
3236 }
3237 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3238 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3239 "waiting for lexically later iteration");
3240 break;
3241 }
3242 deps = TREE_CHAIN (deps);
3243 }
3244 /* If all offsets corresponding to the collapsed loops are zero,
3245 this depend clause can be ignored. FIXME: but there is still a
3246 flush needed. We need to emit one __sync_synchronize () for it
3247 though (perhaps conditionally)? Solve this together with the
3248 conservative dependence folding optimization.
3249 if (i >= fd->collapse)
3250 return; */
3251
3252 deps = OMP_CLAUSE_DECL (c);
3253 gsi_prev (&gsi2);
3254 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
3255 edge e2 = split_block_after_labels (e1->dest);
3256
3257 gsi2 = gsi_after_labels (e1->dest);
3258 *gsi = gsi_last_bb (e1->src);
3259 for (i = 0; i < fd->ordered; i++)
3260 {
3261 tree itype = TREE_TYPE (fd->loops[i].v);
3262 tree step = NULL_TREE;
3263 tree orig_off = NULL_TREE;
3264 if (POINTER_TYPE_P (itype))
3265 itype = sizetype;
3266 if (i)
3267 deps = TREE_CHAIN (deps);
3268 off = TREE_PURPOSE (deps);
3269 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
3270 {
3271 step = TREE_OPERAND (off, 1);
3272 off = TREE_OPERAND (off, 0);
3273 gcc_assert (fd->loops[i].cond_code == LT_EXPR
3274 && integer_onep (fd->loops[i].step)
3275 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
3276 }
3277 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
3278 if (step)
3279 {
3280 off = fold_convert_loc (loc, itype, off);
3281 orig_off = off;
3282 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3283 }
3284
3285 if (integer_zerop (off))
3286 t = boolean_true_node;
3287 else
3288 {
3289 tree a;
3290 tree co = fold_convert_loc (loc, itype, off);
3291 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
3292 {
3293 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3294 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
3295 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
3296 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
3297 co);
3298 }
3299 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3300 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3301 fd->loops[i].v, co);
3302 else
3303 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
3304 fd->loops[i].v, co);
3305 if (step)
3306 {
3307 tree t1, t2;
3308 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3309 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3310 fd->loops[i].n1);
3311 else
3312 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3313 fd->loops[i].n2);
3314 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3315 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3316 fd->loops[i].n2);
3317 else
3318 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3319 fd->loops[i].n1);
3320 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
3321 step, build_int_cst (TREE_TYPE (step), 0));
3322 if (TREE_CODE (step) != INTEGER_CST)
3323 {
3324 t1 = unshare_expr (t1);
3325 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
3326 false, GSI_CONTINUE_LINKING);
3327 t2 = unshare_expr (t2);
3328 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
3329 false, GSI_CONTINUE_LINKING);
3330 }
3331 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
3332 t, t2, t1);
3333 }
3334 else if (fd->loops[i].cond_code == LT_EXPR)
3335 {
3336 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3337 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
3338 fd->loops[i].n1);
3339 else
3340 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
3341 fd->loops[i].n2);
3342 }
3343 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3344 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
3345 fd->loops[i].n2);
3346 else
3347 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
3348 fd->loops[i].n1);
3349 }
3350 if (cond)
3351 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
3352 else
3353 cond = t;
3354
3355 off = fold_convert_loc (loc, itype, off);
3356
3357 if (step
3358 || (fd->loops[i].cond_code == LT_EXPR
3359 ? !integer_onep (fd->loops[i].step)
3360 : !integer_minus_onep (fd->loops[i].step)))
3361 {
3362 if (step == NULL_TREE
3363 && TYPE_UNSIGNED (itype)
3364 && fd->loops[i].cond_code == GT_EXPR)
3365 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
3366 fold_build1_loc (loc, NEGATE_EXPR, itype,
3367 s));
3368 else
3369 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
3370 orig_off ? orig_off : off, s);
3371 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
3372 build_int_cst (itype, 0));
3373 if (integer_zerop (t) && !warned_step)
3374 {
3375 warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
3376 "refers to iteration never in the iteration "
3377 "space");
3378 warned_step = true;
3379 }
3380 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
3381 cond, t);
3382 }
3383
3384 if (i <= fd->collapse - 1 && fd->collapse > 1)
3385 t = fd->loop.v;
3386 else if (counts[i])
3387 t = counts[i];
3388 else
3389 {
3390 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3391 fd->loops[i].v, fd->loops[i].n1);
3392 t = fold_convert_loc (loc, fd->iter_type, t);
3393 }
3394 if (step)
3395 /* We have divided off by step already earlier. */;
3396 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
3397 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
3398 fold_build1_loc (loc, NEGATE_EXPR, itype,
3399 s));
3400 else
3401 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
3402 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
3403 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
3404 off = fold_convert_loc (loc, fd->iter_type, off);
3405 if (i <= fd->collapse - 1 && fd->collapse > 1)
3406 {
3407 if (i)
3408 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
3409 off);
3410 if (i < fd->collapse - 1)
3411 {
3412 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
3413 counts[i]);
3414 continue;
3415 }
3416 }
3417 off = unshare_expr (off);
3418 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
3419 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3420 true, GSI_SAME_STMT);
3421 args.safe_push (t);
3422 }
3423 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
3424 gimple_set_location (g, loc);
3425 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
3426
3427 cond = unshare_expr (cond);
3428 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
3429 GSI_CONTINUE_LINKING);
3430 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
3431 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
3432 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3433 e1->probability = e3->probability.invert ();
3434 e1->flags = EDGE_TRUE_VALUE;
3435 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
3436
3437 *gsi = gsi_after_labels (e2->dest);
3438 }
3439
3440 /* Expand all #pragma omp ordered depend(source) and
3441 #pragma omp ordered depend(sink:...) constructs in the current
3442 #pragma omp for ordered(n) region. */
3443
3444 static void
3445 expand_omp_ordered_source_sink (struct omp_region *region,
3446 struct omp_for_data *fd, tree *counts,
3447 basic_block cont_bb)
3448 {
3449 struct omp_region *inner;
3450 int i;
3451 for (i = fd->collapse - 1; i < fd->ordered; i++)
3452 if (i == fd->collapse - 1 && fd->collapse > 1)
3453 counts[i] = NULL_TREE;
3454 else if (i >= fd->collapse && !cont_bb)
3455 counts[i] = build_zero_cst (fd->iter_type);
3456 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
3457 && integer_onep (fd->loops[i].step))
3458 counts[i] = NULL_TREE;
3459 else
3460 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
3461 tree atype
3462 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
3463 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
3464 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
3465
3466 for (inner = region->inner; inner; inner = inner->next)
3467 if (inner->type == GIMPLE_OMP_ORDERED)
3468 {
3469 gomp_ordered *ord_stmt = inner->ord_stmt;
3470 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
3471 location_t loc = gimple_location (ord_stmt);
3472 tree c;
3473 for (c = gimple_omp_ordered_clauses (ord_stmt);
3474 c; c = OMP_CLAUSE_CHAIN (c))
3475 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
3476 break;
3477 if (c)
3478 expand_omp_ordered_source (&gsi, fd, counts, loc);
3479 for (c = gimple_omp_ordered_clauses (ord_stmt);
3480 c; c = OMP_CLAUSE_CHAIN (c))
3481 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
3482 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
3483 gsi_remove (&gsi, true);
3484 }
3485 }
3486
3487 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
3488 collapsed. */
3489
3490 static basic_block
3491 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
3492 basic_block cont_bb, basic_block body_bb,
3493 bool ordered_lastprivate)
3494 {
3495 if (fd->ordered == fd->collapse)
3496 return cont_bb;
3497
3498 if (!cont_bb)
3499 {
3500 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3501 for (int i = fd->collapse; i < fd->ordered; i++)
3502 {
3503 tree type = TREE_TYPE (fd->loops[i].v);
3504 tree n1 = fold_convert (type, fd->loops[i].n1);
3505 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
3506 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3507 size_int (i - fd->collapse + 1),
3508 NULL_TREE, NULL_TREE);
3509 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3510 }
3511 return NULL;
3512 }
3513
3514 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
3515 {
3516 tree t, type = TREE_TYPE (fd->loops[i].v);
3517 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3518 expand_omp_build_assign (&gsi, fd->loops[i].v,
3519 fold_convert (type, fd->loops[i].n1));
3520 if (counts[i])
3521 expand_omp_build_assign (&gsi, counts[i],
3522 build_zero_cst (fd->iter_type));
3523 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3524 size_int (i - fd->collapse + 1),
3525 NULL_TREE, NULL_TREE);
3526 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
3527 if (!gsi_end_p (gsi))
3528 gsi_prev (&gsi);
3529 else
3530 gsi = gsi_last_bb (body_bb);
3531 edge e1 = split_block (body_bb, gsi_stmt (gsi));
3532 basic_block new_body = e1->dest;
3533 if (body_bb == cont_bb)
3534 cont_bb = new_body;
3535 edge e2 = NULL;
3536 basic_block new_header;
3537 if (EDGE_COUNT (cont_bb->preds) > 0)
3538 {
3539 gsi = gsi_last_bb (cont_bb);
3540 if (POINTER_TYPE_P (type))
3541 t = fold_build_pointer_plus (fd->loops[i].v,
3542 fold_convert (sizetype,
3543 fd->loops[i].step));
3544 else
3545 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
3546 fold_convert (type, fd->loops[i].step));
3547 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
3548 if (counts[i])
3549 {
3550 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
3551 build_int_cst (fd->iter_type, 1));
3552 expand_omp_build_assign (&gsi, counts[i], t);
3553 t = counts[i];
3554 }
3555 else
3556 {
3557 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
3558 fd->loops[i].v, fd->loops[i].n1);
3559 t = fold_convert (fd->iter_type, t);
3560 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3561 true, GSI_SAME_STMT);
3562 }
3563 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3564 size_int (i - fd->collapse + 1),
3565 NULL_TREE, NULL_TREE);
3566 expand_omp_build_assign (&gsi, aref, t);
3567 gsi_prev (&gsi);
3568 e2 = split_block (cont_bb, gsi_stmt (gsi));
3569 new_header = e2->dest;
3570 }
3571 else
3572 new_header = cont_bb;
3573 gsi = gsi_after_labels (new_header);
3574 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
3575 true, GSI_SAME_STMT);
3576 tree n2
3577 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
3578 true, NULL_TREE, true, GSI_SAME_STMT);
3579 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
3580 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
3581 edge e3 = split_block (new_header, gsi_stmt (gsi));
3582 cont_bb = e3->dest;
3583 remove_edge (e1);
3584 make_edge (body_bb, new_header, EDGE_FALLTHRU);
3585 e3->flags = EDGE_FALSE_VALUE;
3586 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
3587 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
3588 e1->probability = e3->probability.invert ();
3589
3590 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
3591 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
3592
3593 if (e2)
3594 {
3595 class loop *loop = alloc_loop ();
3596 loop->header = new_header;
3597 loop->latch = e2->src;
3598 add_loop (loop, body_bb->loop_father);
3599 }
3600 }
3601
3602 /* If there are any lastprivate clauses and it is possible some loops
3603 might have zero iterations, ensure all the decls are initialized,
3604 otherwise we could crash evaluating C++ class iterators with lastprivate
3605 clauses. */
3606 bool need_inits = false;
3607 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
3608 if (need_inits)
3609 {
3610 tree type = TREE_TYPE (fd->loops[i].v);
3611 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
3612 expand_omp_build_assign (&gsi, fd->loops[i].v,
3613 fold_convert (type, fd->loops[i].n1));
3614 }
3615 else
3616 {
3617 tree type = TREE_TYPE (fd->loops[i].v);
3618 tree this_cond = fold_build2 (fd->loops[i].cond_code,
3619 boolean_type_node,
3620 fold_convert (type, fd->loops[i].n1),
3621 fold_convert (type, fd->loops[i].n2));
3622 if (!integer_onep (this_cond))
3623 need_inits = true;
3624 }
3625
3626 return cont_bb;
3627 }
3628
3629 /* A subroutine of expand_omp_for. Generate code for a parallel
3630 loop with any schedule. Given parameters:
3631
3632 for (V = N1; V cond N2; V += STEP) BODY;
3633
3634 where COND is "<" or ">", we generate pseudocode
3635
3636 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
3637 if (more) goto L0; else goto L3;
3638 L0:
3639 V = istart0;
3640 iend = iend0;
3641 L1:
3642 BODY;
3643 V += STEP;
3644 if (V cond iend) goto L1; else goto L2;
3645 L2:
3646 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3647 L3:
3648
3649 If this is a combined omp parallel loop, instead of the call to
3650 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
3651 If this is gimple_omp_for_combined_p loop, then instead of assigning
3652 V and iend in L0 we assign the first two _looptemp_ clause decls of the
3653 inner GIMPLE_OMP_FOR and V += STEP; and
3654 if (V cond iend) goto L1; else goto L2; are removed.
3655
3656 For collapsed loops, given parameters:
3657 collapse(3)
3658 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
3659 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
3660 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
3661 BODY;
3662
3663 we generate pseudocode
3664
3665 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
3666 if (cond3 is <)
3667 adj = STEP3 - 1;
3668 else
3669 adj = STEP3 + 1;
3670 count3 = (adj + N32 - N31) / STEP3;
3671 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
3672 if (cond2 is <)
3673 adj = STEP2 - 1;
3674 else
3675 adj = STEP2 + 1;
3676 count2 = (adj + N22 - N21) / STEP2;
3677 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
3678 if (cond1 is <)
3679 adj = STEP1 - 1;
3680 else
3681 adj = STEP1 + 1;
3682 count1 = (adj + N12 - N11) / STEP1;
3683 count = count1 * count2 * count3;
3684 goto Z1;
3685 Z0:
3686 count = 0;
3687 Z1:
3688 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
3689 if (more) goto L0; else goto L3;
3690 L0:
3691 V = istart0;
3692 T = V;
3693 V3 = N31 + (T % count3) * STEP3;
3694 T = T / count3;
3695 V2 = N21 + (T % count2) * STEP2;
3696 T = T / count2;
3697 V1 = N11 + T * STEP1;
3698 iend = iend0;
3699 L1:
3700 BODY;
3701 V += 1;
3702 if (V < iend) goto L10; else goto L2;
3703 L10:
3704 V3 += STEP3;
3705 if (V3 cond3 N32) goto L1; else goto L11;
3706 L11:
3707 V3 = N31;
3708 V2 += STEP2;
3709 if (V2 cond2 N22) goto L1; else goto L12;
3710 L12:
3711 V2 = N21;
3712 V1 += STEP1;
3713 goto L1;
3714 L2:
3715 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
3716 L3:
3717
3718 */
3719
3720 static void
3721 expand_omp_for_generic (struct omp_region *region,
3722 struct omp_for_data *fd,
3723 enum built_in_function start_fn,
3724 enum built_in_function next_fn,
3725 tree sched_arg,
3726 gimple *inner_stmt)
3727 {
3728 tree type, istart0, iend0, iend;
3729 tree t, vmain, vback, bias = NULL_TREE;
3730 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
3731 basic_block l2_bb = NULL, l3_bb = NULL;
3732 gimple_stmt_iterator gsi;
3733 gassign *assign_stmt;
3734 bool in_combined_parallel = is_combined_parallel (region);
3735 bool broken_loop = region->cont == NULL;
3736 edge e, ne;
3737 tree *counts = NULL;
3738 int i;
3739 bool ordered_lastprivate = false;
3740
3741 gcc_assert (!broken_loop || !in_combined_parallel);
3742 gcc_assert (fd->iter_type == long_integer_type_node
3743 || !in_combined_parallel);
3744
3745 entry_bb = region->entry;
3746 cont_bb = region->cont;
3747 collapse_bb = NULL;
3748 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3749 gcc_assert (broken_loop
3750 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
3751 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3752 l1_bb = single_succ (l0_bb);
3753 if (!broken_loop)
3754 {
3755 l2_bb = create_empty_bb (cont_bb);
3756 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
3757 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
3758 == l1_bb));
3759 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3760 }
3761 else
3762 l2_bb = NULL;
3763 l3_bb = BRANCH_EDGE (entry_bb)->dest;
3764 exit_bb = region->exit;
3765
3766 gsi = gsi_last_nondebug_bb (entry_bb);
3767
3768 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3769 if (fd->ordered
3770 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3771 OMP_CLAUSE_LASTPRIVATE))
3772 ordered_lastprivate = false;
3773 tree reductions = NULL_TREE;
3774 tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
3775 tree memv = NULL_TREE;
3776 if (fd->lastprivate_conditional)
3777 {
3778 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3779 OMP_CLAUSE__CONDTEMP_);
3780 if (fd->have_pointer_condtemp)
3781 condtemp = OMP_CLAUSE_DECL (c);
3782 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
3783 cond_var = OMP_CLAUSE_DECL (c);
3784 }
3785 if (sched_arg)
3786 {
3787 if (fd->have_reductemp)
3788 {
3789 tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3790 OMP_CLAUSE__REDUCTEMP_);
3791 reductions = OMP_CLAUSE_DECL (c);
3792 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3793 gimple *g = SSA_NAME_DEF_STMT (reductions);
3794 reductions = gimple_assign_rhs1 (g);
3795 OMP_CLAUSE_DECL (c) = reductions;
3796 entry_bb = gimple_bb (g);
3797 edge e = split_block (entry_bb, g);
3798 if (region->entry == entry_bb)
3799 region->entry = e->dest;
3800 gsi = gsi_last_bb (entry_bb);
3801 }
3802 else
3803 reductions = null_pointer_node;
3804 if (fd->have_pointer_condtemp)
3805 {
3806 tree type = TREE_TYPE (condtemp);
3807 memv = create_tmp_var (type);
3808 TREE_ADDRESSABLE (memv) = 1;
3809 unsigned HOST_WIDE_INT sz
3810 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
3811 sz *= fd->lastprivate_conditional;
3812 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
3813 false);
3814 mem = build_fold_addr_expr (memv);
3815 }
3816 else
3817 mem = null_pointer_node;
3818 }
3819 if (fd->collapse > 1 || fd->ordered)
3820 {
3821 int first_zero_iter1 = -1, first_zero_iter2 = -1;
3822 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
3823
3824 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
3825 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3826 zero_iter1_bb, first_zero_iter1,
3827 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
3828
3829 if (zero_iter1_bb)
3830 {
3831 /* Some counts[i] vars might be uninitialized if
3832 some loop has zero iterations. But the body shouldn't
3833 be executed in that case, so just avoid uninit warnings. */
3834 for (i = first_zero_iter1;
3835 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
3836 if (SSA_VAR_P (counts[i]))
3837 TREE_NO_WARNING (counts[i]) = 1;
3838 gsi_prev (&gsi);
3839 e = split_block (entry_bb, gsi_stmt (gsi));
3840 entry_bb = e->dest;
3841 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
3842 gsi = gsi_last_nondebug_bb (entry_bb);
3843 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3844 get_immediate_dominator (CDI_DOMINATORS,
3845 zero_iter1_bb));
3846 }
3847 if (zero_iter2_bb)
3848 {
3849 /* Some counts[i] vars might be uninitialized if
3850 some loop has zero iterations. But the body shouldn't
3851 be executed in that case, so just avoid uninit warnings. */
3852 for (i = first_zero_iter2; i < fd->ordered; i++)
3853 if (SSA_VAR_P (counts[i]))
3854 TREE_NO_WARNING (counts[i]) = 1;
3855 if (zero_iter1_bb)
3856 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3857 else
3858 {
3859 gsi_prev (&gsi);
3860 e = split_block (entry_bb, gsi_stmt (gsi));
3861 entry_bb = e->dest;
3862 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
3863 gsi = gsi_last_nondebug_bb (entry_bb);
3864 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
3865 get_immediate_dominator
3866 (CDI_DOMINATORS, zero_iter2_bb));
3867 }
3868 }
3869 if (fd->collapse == 1)
3870 {
3871 counts[0] = fd->loop.n2;
3872 fd->loop = fd->loops[0];
3873 }
3874 }
3875
3876 type = TREE_TYPE (fd->loop.v);
3877 istart0 = create_tmp_var (fd->iter_type, ".istart0");
3878 iend0 = create_tmp_var (fd->iter_type, ".iend0");
3879 TREE_ADDRESSABLE (istart0) = 1;
3880 TREE_ADDRESSABLE (iend0) = 1;
3881
3882 /* See if we need to bias by LLONG_MIN. */
3883 if (fd->iter_type == long_long_unsigned_type_node
3884 && TREE_CODE (type) == INTEGER_TYPE
3885 && !TYPE_UNSIGNED (type)
3886 && fd->ordered == 0)
3887 {
3888 tree n1, n2;
3889
3890 if (fd->loop.cond_code == LT_EXPR)
3891 {
3892 n1 = fd->loop.n1;
3893 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
3894 }
3895 else
3896 {
3897 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
3898 n2 = fd->loop.n1;
3899 }
3900 if (TREE_CODE (n1) != INTEGER_CST
3901 || TREE_CODE (n2) != INTEGER_CST
3902 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
3903 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
3904 }
3905
3906 gimple_stmt_iterator gsif = gsi;
3907 gsi_prev (&gsif);
3908
3909 tree arr = NULL_TREE;
3910 if (in_combined_parallel)
3911 {
3912 gcc_assert (fd->ordered == 0);
3913 /* In a combined parallel loop, emit a call to
3914 GOMP_loop_foo_next. */
3915 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3916 build_fold_addr_expr (istart0),
3917 build_fold_addr_expr (iend0));
3918 }
3919 else
3920 {
3921 tree t0, t1, t2, t3, t4;
3922 /* If this is not a combined parallel loop, emit a call to
3923 GOMP_loop_foo_start in ENTRY_BB. */
3924 t4 = build_fold_addr_expr (iend0);
3925 t3 = build_fold_addr_expr (istart0);
3926 if (fd->ordered)
3927 {
3928 t0 = build_int_cst (unsigned_type_node,
3929 fd->ordered - fd->collapse + 1);
3930 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
3931 fd->ordered
3932 - fd->collapse + 1),
3933 ".omp_counts");
3934 DECL_NAMELESS (arr) = 1;
3935 TREE_ADDRESSABLE (arr) = 1;
3936 TREE_STATIC (arr) = 1;
3937 vec<constructor_elt, va_gc> *v;
3938 vec_alloc (v, fd->ordered - fd->collapse + 1);
3939 int idx;
3940
3941 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
3942 {
3943 tree c;
3944 if (idx == 0 && fd->collapse > 1)
3945 c = fd->loop.n2;
3946 else
3947 c = counts[idx + fd->collapse - 1];
3948 tree purpose = size_int (idx);
3949 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
3950 if (TREE_CODE (c) != INTEGER_CST)
3951 TREE_STATIC (arr) = 0;
3952 }
3953
3954 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
3955 if (!TREE_STATIC (arr))
3956 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
3957 void_type_node, arr),
3958 true, NULL_TREE, true, GSI_SAME_STMT);
3959 t1 = build_fold_addr_expr (arr);
3960 t2 = NULL_TREE;
3961 }
3962 else
3963 {
3964 t2 = fold_convert (fd->iter_type, fd->loop.step);
3965 t1 = fd->loop.n2;
3966 t0 = fd->loop.n1;
3967 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3968 {
3969 tree innerc
3970 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3971 OMP_CLAUSE__LOOPTEMP_);
3972 gcc_assert (innerc);
3973 t0 = OMP_CLAUSE_DECL (innerc);
3974 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3975 OMP_CLAUSE__LOOPTEMP_);
3976 gcc_assert (innerc);
3977 t1 = OMP_CLAUSE_DECL (innerc);
3978 }
3979 if (POINTER_TYPE_P (TREE_TYPE (t0))
3980 && TYPE_PRECISION (TREE_TYPE (t0))
3981 != TYPE_PRECISION (fd->iter_type))
3982 {
3983 /* Avoid casting pointers to integer of a different size. */
3984 tree itype = signed_type_for (type);
3985 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
3986 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
3987 }
3988 else
3989 {
3990 t1 = fold_convert (fd->iter_type, t1);
3991 t0 = fold_convert (fd->iter_type, t0);
3992 }
3993 if (bias)
3994 {
3995 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
3996 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
3997 }
3998 }
3999 if (fd->iter_type == long_integer_type_node || fd->ordered)
4000 {
4001 if (fd->chunk_size)
4002 {
4003 t = fold_convert (fd->iter_type, fd->chunk_size);
4004 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4005 if (sched_arg)
4006 {
4007 if (fd->ordered)
4008 t = build_call_expr (builtin_decl_explicit (start_fn),
4009 8, t0, t1, sched_arg, t, t3, t4,
4010 reductions, mem);
4011 else
4012 t = build_call_expr (builtin_decl_explicit (start_fn),
4013 9, t0, t1, t2, sched_arg, t, t3, t4,
4014 reductions, mem);
4015 }
4016 else if (fd->ordered)
4017 t = build_call_expr (builtin_decl_explicit (start_fn),
4018 5, t0, t1, t, t3, t4);
4019 else
4020 t = build_call_expr (builtin_decl_explicit (start_fn),
4021 6, t0, t1, t2, t, t3, t4);
4022 }
4023 else if (fd->ordered)
4024 t = build_call_expr (builtin_decl_explicit (start_fn),
4025 4, t0, t1, t3, t4);
4026 else
4027 t = build_call_expr (builtin_decl_explicit (start_fn),
4028 5, t0, t1, t2, t3, t4);
4029 }
4030 else
4031 {
4032 tree t5;
4033 tree c_bool_type;
4034 tree bfn_decl;
4035
4036 /* The GOMP_loop_ull_*start functions have additional boolean
4037 argument, true for < loops and false for > loops.
4038 In Fortran, the C bool type can be different from
4039 boolean_type_node. */
4040 bfn_decl = builtin_decl_explicit (start_fn);
4041 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
4042 t5 = build_int_cst (c_bool_type,
4043 fd->loop.cond_code == LT_EXPR ? 1 : 0);
4044 if (fd->chunk_size)
4045 {
4046 tree bfn_decl = builtin_decl_explicit (start_fn);
4047 t = fold_convert (fd->iter_type, fd->chunk_size);
4048 t = omp_adjust_chunk_size (t, fd->simd_schedule);
4049 if (sched_arg)
4050 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
4051 t, t3, t4, reductions, mem);
4052 else
4053 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
4054 }
4055 else
4056 t = build_call_expr (builtin_decl_explicit (start_fn),
4057 6, t5, t0, t1, t2, t3, t4);
4058 }
4059 }
4060 if (TREE_TYPE (t) != boolean_type_node)
4061 t = fold_build2 (NE_EXPR, boolean_type_node,
4062 t, build_int_cst (TREE_TYPE (t), 0));
4063 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4064 true, GSI_SAME_STMT);
4065 if (arr && !TREE_STATIC (arr))
4066 {
4067 tree clobber = build_clobber (TREE_TYPE (arr));
4068 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
4069 GSI_SAME_STMT);
4070 }
4071 if (fd->have_pointer_condtemp)
4072 expand_omp_build_assign (&gsi, condtemp, memv, false);
4073 if (fd->have_reductemp)
4074 {
4075 gimple *g = gsi_stmt (gsi);
4076 gsi_remove (&gsi, true);
4077 release_ssa_name (gimple_assign_lhs (g));
4078
4079 entry_bb = region->entry;
4080 gsi = gsi_last_nondebug_bb (entry_bb);
4081
4082 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4083 }
4084 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4085
4086 /* Remove the GIMPLE_OMP_FOR statement. */
4087 gsi_remove (&gsi, true);
4088
4089 if (gsi_end_p (gsif))
4090 gsif = gsi_after_labels (gsi_bb (gsif));
4091 gsi_next (&gsif);
4092
4093 /* Iteration setup for sequential loop goes in L0_BB. */
4094 tree startvar = fd->loop.v;
4095 tree endvar = NULL_TREE;
4096
4097 if (gimple_omp_for_combined_p (fd->for_stmt))
4098 {
4099 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
4100 && gimple_omp_for_kind (inner_stmt)
4101 == GF_OMP_FOR_KIND_SIMD);
4102 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
4103 OMP_CLAUSE__LOOPTEMP_);
4104 gcc_assert (innerc);
4105 startvar = OMP_CLAUSE_DECL (innerc);
4106 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4107 OMP_CLAUSE__LOOPTEMP_);
4108 gcc_assert (innerc);
4109 endvar = OMP_CLAUSE_DECL (innerc);
4110 }
4111
4112 gsi = gsi_start_bb (l0_bb);
4113 t = istart0;
4114 if (fd->ordered && fd->collapse == 1)
4115 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4116 fold_convert (fd->iter_type, fd->loop.step));
4117 else if (bias)
4118 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4119 if (fd->ordered && fd->collapse == 1)
4120 {
4121 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4122 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4123 fd->loop.n1, fold_convert (sizetype, t));
4124 else
4125 {
4126 t = fold_convert (TREE_TYPE (startvar), t);
4127 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4128 fd->loop.n1, t);
4129 }
4130 }
4131 else
4132 {
4133 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4134 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4135 t = fold_convert (TREE_TYPE (startvar), t);
4136 }
4137 t = force_gimple_operand_gsi (&gsi, t,
4138 DECL_P (startvar)
4139 && TREE_ADDRESSABLE (startvar),
4140 NULL_TREE, false, GSI_CONTINUE_LINKING);
4141 assign_stmt = gimple_build_assign (startvar, t);
4142 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4143 if (cond_var)
4144 {
4145 tree itype = TREE_TYPE (cond_var);
4146 /* For lastprivate(conditional:) itervar, we need some iteration
4147 counter that starts at unsigned non-zero and increases.
4148 Prefer as few IVs as possible, so if we can use startvar
4149 itself, use that, or startvar + constant (those would be
4150 incremented with step), and as last resort use the s0 + 1
4151 incremented by 1. */
4152 if ((fd->ordered && fd->collapse == 1)
4153 || bias
4154 || POINTER_TYPE_P (type)
4155 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4156 || fd->loop.cond_code != LT_EXPR)
4157 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
4158 build_int_cst (itype, 1));
4159 else if (tree_int_cst_sgn (fd->loop.n1) == 1)
4160 t = fold_convert (itype, t);
4161 else
4162 {
4163 tree c = fold_convert (itype, fd->loop.n1);
4164 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
4165 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
4166 }
4167 t = force_gimple_operand_gsi (&gsi, t, false,
4168 NULL_TREE, false, GSI_CONTINUE_LINKING);
4169 assign_stmt = gimple_build_assign (cond_var, t);
4170 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4171 }
4172
4173 t = iend0;
4174 if (fd->ordered && fd->collapse == 1)
4175 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
4176 fold_convert (fd->iter_type, fd->loop.step));
4177 else if (bias)
4178 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
4179 if (fd->ordered && fd->collapse == 1)
4180 {
4181 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4182 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
4183 fd->loop.n1, fold_convert (sizetype, t));
4184 else
4185 {
4186 t = fold_convert (TREE_TYPE (startvar), t);
4187 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
4188 fd->loop.n1, t);
4189 }
4190 }
4191 else
4192 {
4193 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
4194 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
4195 t = fold_convert (TREE_TYPE (startvar), t);
4196 }
4197 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4198 false, GSI_CONTINUE_LINKING);
4199 if (endvar)
4200 {
4201 assign_stmt = gimple_build_assign (endvar, iend);
4202 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4203 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
4204 assign_stmt = gimple_build_assign (fd->loop.v, iend);
4205 else
4206 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
4207 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4208 }
4209 /* Handle linear clause adjustments. */
4210 tree itercnt = NULL_TREE;
4211 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4212 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4213 c; c = OMP_CLAUSE_CHAIN (c))
4214 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4215 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4216 {
4217 tree d = OMP_CLAUSE_DECL (c);
4218 bool is_ref = omp_is_reference (d);
4219 tree t = d, a, dest;
4220 if (is_ref)
4221 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4222 tree type = TREE_TYPE (t);
4223 if (POINTER_TYPE_P (type))
4224 type = sizetype;
4225 dest = unshare_expr (t);
4226 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4227 expand_omp_build_assign (&gsif, v, t);
4228 if (itercnt == NULL_TREE)
4229 {
4230 itercnt = startvar;
4231 tree n1 = fd->loop.n1;
4232 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
4233 {
4234 itercnt
4235 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
4236 itercnt);
4237 n1 = fold_convert (TREE_TYPE (itercnt), n1);
4238 }
4239 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
4240 itercnt, n1);
4241 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
4242 itercnt, fd->loop.step);
4243 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4244 NULL_TREE, false,
4245 GSI_CONTINUE_LINKING);
4246 }
4247 a = fold_build2 (MULT_EXPR, type,
4248 fold_convert (type, itercnt),
4249 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4250 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4251 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4252 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4253 false, GSI_CONTINUE_LINKING);
4254 expand_omp_build_assign (&gsi, dest, t, true);
4255 }
4256 if (fd->collapse > 1)
4257 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
4258
4259 if (fd->ordered)
4260 {
4261 /* Until now, counts array contained number of iterations or
4262 variable containing it for ith loop. From now on, we need
4263 those counts only for collapsed loops, and only for the 2nd
4264 till the last collapsed one. Move those one element earlier,
4265 we'll use counts[fd->collapse - 1] for the first source/sink
4266 iteration counter and so on and counts[fd->ordered]
4267 as the array holding the current counter values for
4268 depend(source). */
4269 if (fd->collapse > 1)
4270 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
4271 if (broken_loop)
4272 {
4273 int i;
4274 for (i = fd->collapse; i < fd->ordered; i++)
4275 {
4276 tree type = TREE_TYPE (fd->loops[i].v);
4277 tree this_cond
4278 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
4279 fold_convert (type, fd->loops[i].n1),
4280 fold_convert (type, fd->loops[i].n2));
4281 if (!integer_onep (this_cond))
4282 break;
4283 }
4284 if (i < fd->ordered)
4285 {
4286 cont_bb
4287 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
4288 add_bb_to_loop (cont_bb, l1_bb->loop_father);
4289 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
4290 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
4291 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4292 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
4293 make_edge (cont_bb, l1_bb, 0);
4294 l2_bb = create_empty_bb (cont_bb);
4295 broken_loop = false;
4296 }
4297 }
4298 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
4299 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
4300 ordered_lastprivate);
4301 if (counts[fd->collapse - 1])
4302 {
4303 gcc_assert (fd->collapse == 1);
4304 gsi = gsi_last_bb (l0_bb);
4305 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
4306 istart0, true);
4307 gsi = gsi_last_bb (cont_bb);
4308 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
4309 build_int_cst (fd->iter_type, 1));
4310 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
4311 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4312 size_zero_node, NULL_TREE, NULL_TREE);
4313 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
4314 t = counts[fd->collapse - 1];
4315 }
4316 else if (fd->collapse > 1)
4317 t = fd->loop.v;
4318 else
4319 {
4320 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4321 fd->loops[0].v, fd->loops[0].n1);
4322 t = fold_convert (fd->iter_type, t);
4323 }
4324 gsi = gsi_last_bb (l0_bb);
4325 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
4326 size_zero_node, NULL_TREE, NULL_TREE);
4327 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4328 false, GSI_CONTINUE_LINKING);
4329 expand_omp_build_assign (&gsi, aref, t, true);
4330 }
4331
4332 if (!broken_loop)
4333 {
4334 /* Code to control the increment and predicate for the sequential
4335 loop goes in the CONT_BB. */
4336 gsi = gsi_last_nondebug_bb (cont_bb);
4337 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4338 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
4339 vmain = gimple_omp_continue_control_use (cont_stmt);
4340 vback = gimple_omp_continue_control_def (cont_stmt);
4341
4342 if (cond_var)
4343 {
4344 tree itype = TREE_TYPE (cond_var);
4345 tree t2;
4346 if ((fd->ordered && fd->collapse == 1)
4347 || bias
4348 || POINTER_TYPE_P (type)
4349 || TREE_CODE (fd->loop.n1) != INTEGER_CST
4350 || fd->loop.cond_code != LT_EXPR)
4351 t2 = build_int_cst (itype, 1);
4352 else
4353 t2 = fold_convert (itype, fd->loop.step);
4354 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
4355 t2 = force_gimple_operand_gsi (&gsi, t2, false,
4356 NULL_TREE, true, GSI_SAME_STMT);
4357 assign_stmt = gimple_build_assign (cond_var, t2);
4358 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4359 }
4360
4361 if (!gimple_omp_for_combined_p (fd->for_stmt))
4362 {
4363 if (POINTER_TYPE_P (type))
4364 t = fold_build_pointer_plus (vmain, fd->loop.step);
4365 else
4366 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
4367 t = force_gimple_operand_gsi (&gsi, t,
4368 DECL_P (vback)
4369 && TREE_ADDRESSABLE (vback),
4370 NULL_TREE, true, GSI_SAME_STMT);
4371 assign_stmt = gimple_build_assign (vback, t);
4372 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4373
4374 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
4375 {
4376 tree tem;
4377 if (fd->collapse > 1)
4378 tem = fd->loop.v;
4379 else
4380 {
4381 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
4382 fd->loops[0].v, fd->loops[0].n1);
4383 tem = fold_convert (fd->iter_type, tem);
4384 }
4385 tree aref = build4 (ARRAY_REF, fd->iter_type,
4386 counts[fd->ordered], size_zero_node,
4387 NULL_TREE, NULL_TREE);
4388 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
4389 true, GSI_SAME_STMT);
4390 expand_omp_build_assign (&gsi, aref, tem);
4391 }
4392
4393 t = build2 (fd->loop.cond_code, boolean_type_node,
4394 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
4395 iend);
4396 gcond *cond_stmt = gimple_build_cond_empty (t);
4397 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4398 }
4399
4400 /* Remove GIMPLE_OMP_CONTINUE. */
4401 gsi_remove (&gsi, true);
4402
4403 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4404 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
4405
4406 /* Emit code to get the next parallel iteration in L2_BB. */
4407 gsi = gsi_start_bb (l2_bb);
4408
4409 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
4410 build_fold_addr_expr (istart0),
4411 build_fold_addr_expr (iend0));
4412 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4413 false, GSI_CONTINUE_LINKING);
4414 if (TREE_TYPE (t) != boolean_type_node)
4415 t = fold_build2 (NE_EXPR, boolean_type_node,
4416 t, build_int_cst (TREE_TYPE (t), 0));
4417 gcond *cond_stmt = gimple_build_cond_empty (t);
4418 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4419 }
4420
4421 /* Add the loop cleanup function. */
4422 gsi = gsi_last_nondebug_bb (exit_bb);
4423 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4424 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
4425 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4426 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4427 else
4428 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4429 gcall *call_stmt = gimple_build_call (t, 0);
4430 if (fd->ordered)
4431 {
4432 tree arr = counts[fd->ordered];
4433 tree clobber = build_clobber (TREE_TYPE (arr));
4434 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
4435 GSI_SAME_STMT);
4436 }
4437 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
4438 {
4439 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
4440 if (fd->have_reductemp)
4441 {
4442 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
4443 gimple_call_lhs (call_stmt));
4444 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4445 }
4446 }
4447 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
4448 gsi_remove (&gsi, true);
4449
4450 /* Connect the new blocks. */
4451 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
4452 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
4453
4454 if (!broken_loop)
4455 {
4456 gimple_seq phis;
4457
4458 e = find_edge (cont_bb, l3_bb);
4459 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
4460
4461 phis = phi_nodes (l3_bb);
4462 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
4463 {
4464 gimple *phi = gsi_stmt (gsi);
4465 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
4466 PHI_ARG_DEF_FROM_EDGE (phi, e));
4467 }
4468 remove_edge (e);
4469
4470 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
4471 e = find_edge (cont_bb, l1_bb);
4472 if (e == NULL)
4473 {
4474 e = BRANCH_EDGE (cont_bb);
4475 gcc_assert (single_succ (e->dest) == l1_bb);
4476 }
4477 if (gimple_omp_for_combined_p (fd->for_stmt))
4478 {
4479 remove_edge (e);
4480 e = NULL;
4481 }
4482 else if (fd->collapse > 1)
4483 {
4484 remove_edge (e);
4485 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4486 }
4487 else
4488 e->flags = EDGE_TRUE_VALUE;
4489 if (e)
4490 {
4491 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4492 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
4493 }
4494 else
4495 {
4496 e = find_edge (cont_bb, l2_bb);
4497 e->flags = EDGE_FALLTHRU;
4498 }
4499 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
4500
4501 if (gimple_in_ssa_p (cfun))
4502 {
4503 /* Add phis to the outer loop that connect to the phis in the inner,
4504 original loop, and move the loop entry value of the inner phi to
4505 the loop entry value of the outer phi. */
4506 gphi_iterator psi;
4507 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
4508 {
4509 location_t locus;
4510 gphi *nphi;
4511 gphi *exit_phi = psi.phi ();
4512
4513 if (virtual_operand_p (gimple_phi_result (exit_phi)))
4514 continue;
4515
4516 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
4517 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
4518
4519 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
4520 edge latch_to_l1 = find_edge (latch, l1_bb);
4521 gphi *inner_phi
4522 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
4523
4524 tree t = gimple_phi_result (exit_phi);
4525 tree new_res = copy_ssa_name (t, NULL);
4526 nphi = create_phi_node (new_res, l0_bb);
4527
4528 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
4529 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
4530 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
4531 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
4532 add_phi_arg (nphi, t, entry_to_l0, locus);
4533
4534 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
4535 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
4536
4537 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
4538 }
4539 }
4540
4541 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
4542 recompute_dominator (CDI_DOMINATORS, l2_bb));
4543 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
4544 recompute_dominator (CDI_DOMINATORS, l3_bb));
4545 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
4546 recompute_dominator (CDI_DOMINATORS, l0_bb));
4547 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
4548 recompute_dominator (CDI_DOMINATORS, l1_bb));
4549
4550 /* We enter expand_omp_for_generic with a loop. This original loop may
4551 have its own loop struct, or it may be part of an outer loop struct
4552 (which may be the fake loop). */
4553 class loop *outer_loop = entry_bb->loop_father;
4554 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
4555
4556 add_bb_to_loop (l2_bb, outer_loop);
4557
4558 /* We've added a new loop around the original loop. Allocate the
4559 corresponding loop struct. */
4560 class loop *new_loop = alloc_loop ();
4561 new_loop->header = l0_bb;
4562 new_loop->latch = l2_bb;
4563 add_loop (new_loop, outer_loop);
4564
4565 /* Allocate a loop structure for the original loop unless we already
4566 had one. */
4567 if (!orig_loop_has_loop_struct
4568 && !gimple_omp_for_combined_p (fd->for_stmt))
4569 {
4570 class loop *orig_loop = alloc_loop ();
4571 orig_loop->header = l1_bb;
4572 /* The loop may have multiple latches. */
4573 add_loop (orig_loop, new_loop);
4574 }
4575 }
4576 }
4577
4578 /* Helper function for expand_omp_for_static_nochunk. If PTR is NULL,
4579 compute needed allocation size. If !ALLOC of team allocations,
4580 if ALLOC of thread allocation. SZ is the initial needed size for
4581 other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
4582 CNT number of elements of each array, for !ALLOC this is
4583 omp_get_num_threads (), for ALLOC number of iterations handled by the
4584 current thread. If PTR is non-NULL, it is the start of the allocation
4585 and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
4586 clauses pointers to the corresponding arrays. */
4587
4588 static tree
4589 expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
4590 unsigned HOST_WIDE_INT alloc_align, tree cnt,
4591 gimple_stmt_iterator *gsi, bool alloc)
4592 {
4593 tree eltsz = NULL_TREE;
4594 unsigned HOST_WIDE_INT preval = 0;
4595 if (ptr && sz)
4596 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4597 ptr, size_int (sz));
4598 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
4599 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
4600 && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
4601 && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
4602 {
4603 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
4604 unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
4605 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4606 {
4607 unsigned HOST_WIDE_INT szl
4608 = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
4609 szl = least_bit_hwi (szl);
4610 if (szl)
4611 al = MIN (al, szl);
4612 }
4613 if (ptr == NULL_TREE)
4614 {
4615 if (eltsz == NULL_TREE)
4616 eltsz = TYPE_SIZE_UNIT (pointee_type);
4617 else
4618 eltsz = size_binop (PLUS_EXPR, eltsz,
4619 TYPE_SIZE_UNIT (pointee_type));
4620 }
4621 if (preval == 0 && al <= alloc_align)
4622 {
4623 unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
4624 sz += diff;
4625 if (diff && ptr)
4626 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
4627 ptr, size_int (diff));
4628 }
4629 else if (al > preval)
4630 {
4631 if (ptr)
4632 {
4633 ptr = fold_convert (pointer_sized_int_node, ptr);
4634 ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
4635 build_int_cst (pointer_sized_int_node,
4636 al - 1));
4637 ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
4638 build_int_cst (pointer_sized_int_node,
4639 -(HOST_WIDE_INT) al));
4640 ptr = fold_convert (ptr_type_node, ptr);
4641 }
4642 else
4643 sz += al - 1;
4644 }
4645 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
4646 preval = al;
4647 else
4648 preval = 1;
4649 if (ptr)
4650 {
4651 expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
4652 ptr = OMP_CLAUSE_DECL (c);
4653 ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
4654 size_binop (MULT_EXPR, cnt,
4655 TYPE_SIZE_UNIT (pointee_type)));
4656 }
4657 }
4658
4659 if (ptr == NULL_TREE)
4660 {
4661 eltsz = size_binop (MULT_EXPR, eltsz, cnt);
4662 if (sz)
4663 eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
4664 return eltsz;
4665 }
4666 else
4667 return ptr;
4668 }
4669
4670 /* Return the last _looptemp_ clause if one has been created for
4671 lastprivate on distribute parallel for{, simd} or taskloop.
4672 FD is the loop data and INNERC should be the second _looptemp_
4673 clause (the one holding the end of the range).
4674 This is followed by collapse - 1 _looptemp_ clauses for the
4675 counts[1] and up, and for triangular loops followed by 4
4676 further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
4677 one factor and one adjn1). After this there is optionally one
4678 _looptemp_ clause that this function returns. */
4679
4680 static tree
4681 find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
4682 {
4683 gcc_assert (innerc);
4684 int count = fd->collapse - 1;
4685 if (fd->non_rect
4686 && fd->last_nonrect == fd->first_nonrect + 1
4687 && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
4688 count += 4;
4689 for (int i = 0; i < count; i++)
4690 {
4691 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4692 OMP_CLAUSE__LOOPTEMP_);
4693 gcc_assert (innerc);
4694 }
4695 return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4696 OMP_CLAUSE__LOOPTEMP_);
4697 }
4698
4699 /* A subroutine of expand_omp_for. Generate code for a parallel
4700 loop with static schedule and no specified chunk size. Given
4701 parameters:
4702
4703 for (V = N1; V cond N2; V += STEP) BODY;
4704
4705 where COND is "<" or ">", we generate pseudocode
4706
4707 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
4708 if (cond is <)
4709 adj = STEP - 1;
4710 else
4711 adj = STEP + 1;
4712 if ((__typeof (V)) -1 > 0 && cond is >)
4713 n = -(adj + N2 - N1) / -STEP;
4714 else
4715 n = (adj + N2 - N1) / STEP;
4716 q = n / nthreads;
4717 tt = n % nthreads;
4718 if (threadid < tt) goto L3; else goto L4;
4719 L3:
4720 tt = 0;
4721 q = q + 1;
4722 L4:
4723 s0 = q * threadid + tt;
4724 e0 = s0 + q;
4725 V = s0 * STEP + N1;
4726 if (s0 >= e0) goto L2; else goto L0;
4727 L0:
4728 e = e0 * STEP + N1;
4729 L1:
4730 BODY;
4731 V += STEP;
4732 if (V cond e) goto L1;
4733 L2:
4734 */
4735
4736 static void
4737 expand_omp_for_static_nochunk (struct omp_region *region,
4738 struct omp_for_data *fd,
4739 gimple *inner_stmt)
4740 {
4741 tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
4742 tree type, itype, vmain, vback;
4743 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
4744 basic_block body_bb, cont_bb, collapse_bb = NULL;
4745 basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
4746 basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
4747 gimple_stmt_iterator gsi, gsip;
4748 edge ep;
4749 bool broken_loop = region->cont == NULL;
4750 tree *counts = NULL;
4751 tree n1, n2, step;
4752 tree reductions = NULL_TREE;
4753 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
4754
4755 itype = type = TREE_TYPE (fd->loop.v);
4756 if (POINTER_TYPE_P (type))
4757 itype = signed_type_for (type);
4758
4759 entry_bb = region->entry;
4760 cont_bb = region->cont;
4761 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4762 fin_bb = BRANCH_EDGE (entry_bb)->dest;
4763 gcc_assert (broken_loop
4764 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
4765 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
4766 body_bb = single_succ (seq_start_bb);
4767 if (!broken_loop)
4768 {
4769 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4770 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4771 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4772 }
4773 exit_bb = region->exit;
4774
4775 /* Iteration space partitioning goes in ENTRY_BB. */
4776 gsi = gsi_last_nondebug_bb (entry_bb);
4777 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4778 gsip = gsi;
4779 gsi_prev (&gsip);
4780
4781 if (fd->collapse > 1)
4782 {
4783 int first_zero_iter = -1, dummy = -1;
4784 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4785
4786 counts = XALLOCAVEC (tree, fd->collapse);
4787 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4788 fin_bb, first_zero_iter,
4789 dummy_bb, dummy, l2_dom_bb);
4790 t = NULL_TREE;
4791 }
4792 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4793 t = integer_one_node;
4794 else
4795 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4796 fold_convert (type, fd->loop.n1),
4797 fold_convert (type, fd->loop.n2));
4798 if (fd->collapse == 1
4799 && TYPE_UNSIGNED (type)
4800 && (t == NULL_TREE || !integer_onep (t)))
4801 {
4802 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4803 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4804 true, GSI_SAME_STMT);
4805 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4806 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4807 true, GSI_SAME_STMT);
4808 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4809 NULL_TREE, NULL_TREE);
4810 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4811 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4812 expand_omp_regimplify_p, NULL, NULL)
4813 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4814 expand_omp_regimplify_p, NULL, NULL))
4815 {
4816 gsi = gsi_for_stmt (cond_stmt);
4817 gimple_regimplify_operands (cond_stmt, &gsi);
4818 }
4819 ep = split_block (entry_bb, cond_stmt);
4820 ep->flags = EDGE_TRUE_VALUE;
4821 entry_bb = ep->dest;
4822 ep->probability = profile_probability::very_likely ();
4823 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
4824 ep->probability = profile_probability::very_unlikely ();
4825 if (gimple_in_ssa_p (cfun))
4826 {
4827 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
4828 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4829 !gsi_end_p (gpi); gsi_next (&gpi))
4830 {
4831 gphi *phi = gpi.phi ();
4832 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4833 ep, UNKNOWN_LOCATION);
4834 }
4835 }
4836 gsi = gsi_last_bb (entry_bb);
4837 }
4838
4839 if (fd->lastprivate_conditional)
4840 {
4841 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4842 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
4843 if (fd->have_pointer_condtemp)
4844 condtemp = OMP_CLAUSE_DECL (c);
4845 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
4846 cond_var = OMP_CLAUSE_DECL (c);
4847 }
4848 if (fd->have_reductemp
4849 /* For scan, we don't want to reinitialize condtemp before the
4850 second loop. */
4851 || (fd->have_pointer_condtemp && !fd->have_scantemp)
4852 || fd->have_nonctrl_scantemp)
4853 {
4854 tree t1 = build_int_cst (long_integer_type_node, 0);
4855 tree t2 = build_int_cst (long_integer_type_node, 1);
4856 tree t3 = build_int_cstu (long_integer_type_node,
4857 (HOST_WIDE_INT_1U << 31) + 1);
4858 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4859 gimple_stmt_iterator gsi2 = gsi_none ();
4860 gimple *g = NULL;
4861 tree mem = null_pointer_node, memv = NULL_TREE;
4862 unsigned HOST_WIDE_INT condtemp_sz = 0;
4863 unsigned HOST_WIDE_INT alloc_align = 0;
4864 if (fd->have_reductemp)
4865 {
4866 gcc_assert (!fd->have_nonctrl_scantemp);
4867 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4868 reductions = OMP_CLAUSE_DECL (c);
4869 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4870 g = SSA_NAME_DEF_STMT (reductions);
4871 reductions = gimple_assign_rhs1 (g);
4872 OMP_CLAUSE_DECL (c) = reductions;
4873 gsi2 = gsi_for_stmt (g);
4874 }
4875 else
4876 {
4877 if (gsi_end_p (gsip))
4878 gsi2 = gsi_after_labels (region->entry);
4879 else
4880 gsi2 = gsip;
4881 reductions = null_pointer_node;
4882 }
4883 if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
4884 {
4885 tree type;
4886 if (fd->have_pointer_condtemp)
4887 type = TREE_TYPE (condtemp);
4888 else
4889 type = ptr_type_node;
4890 memv = create_tmp_var (type);
4891 TREE_ADDRESSABLE (memv) = 1;
4892 unsigned HOST_WIDE_INT sz = 0;
4893 tree size = NULL_TREE;
4894 if (fd->have_pointer_condtemp)
4895 {
4896 sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
4897 sz *= fd->lastprivate_conditional;
4898 condtemp_sz = sz;
4899 }
4900 if (fd->have_nonctrl_scantemp)
4901 {
4902 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4903 gimple *g = gimple_build_call (nthreads, 0);
4904 nthreads = create_tmp_var (integer_type_node);
4905 gimple_call_set_lhs (g, nthreads);
4906 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
4907 nthreads = fold_convert (sizetype, nthreads);
4908 alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
4909 size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
4910 alloc_align, nthreads, NULL,
4911 false);
4912 size = fold_convert (type, size);
4913 }
4914 else
4915 size = build_int_cst (type, sz);
4916 expand_omp_build_assign (&gsi2, memv, size, false);
4917 mem = build_fold_addr_expr (memv);
4918 }
4919 tree t
4920 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4921 9, t1, t2, t2, t3, t1, null_pointer_node,
4922 null_pointer_node, reductions, mem);
4923 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4924 true, GSI_SAME_STMT);
4925 if (fd->have_pointer_condtemp)
4926 expand_omp_build_assign (&gsi2, condtemp, memv, false);
4927 if (fd->have_nonctrl_scantemp)
4928 {
4929 tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
4930 expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
4931 alloc_align, nthreads, &gsi2, false);
4932 }
4933 if (fd->have_reductemp)
4934 {
4935 gsi_remove (&gsi2, true);
4936 release_ssa_name (gimple_assign_lhs (g));
4937 }
4938 }
4939 switch (gimple_omp_for_kind (fd->for_stmt))
4940 {
4941 case GF_OMP_FOR_KIND_FOR:
4942 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4943 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4944 break;
4945 case GF_OMP_FOR_KIND_DISTRIBUTE:
4946 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4947 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4948 break;
4949 default:
4950 gcc_unreachable ();
4951 }
4952 nthreads = build_call_expr (nthreads, 0);
4953 nthreads = fold_convert (itype, nthreads);
4954 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4955 true, GSI_SAME_STMT);
4956 threadid = build_call_expr (threadid, 0);
4957 threadid = fold_convert (itype, threadid);
4958 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4959 true, GSI_SAME_STMT);
4960
4961 n1 = fd->loop.n1;
4962 n2 = fd->loop.n2;
4963 step = fd->loop.step;
4964 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4965 {
4966 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4967 OMP_CLAUSE__LOOPTEMP_);
4968 gcc_assert (innerc);
4969 n1 = OMP_CLAUSE_DECL (innerc);
4970 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4971 OMP_CLAUSE__LOOPTEMP_);
4972 gcc_assert (innerc);
4973 n2 = OMP_CLAUSE_DECL (innerc);
4974 }
4975 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4976 true, NULL_TREE, true, GSI_SAME_STMT);
4977 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4978 true, NULL_TREE, true, GSI_SAME_STMT);
4979 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4980 true, NULL_TREE, true, GSI_SAME_STMT);
4981
4982 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4983 t = fold_build2 (PLUS_EXPR, itype, step, t);
4984 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4985 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4986 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4987 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4988 fold_build1 (NEGATE_EXPR, itype, t),
4989 fold_build1 (NEGATE_EXPR, itype, step));
4990 else
4991 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4992 t = fold_convert (itype, t);
4993 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
4994
4995 q = create_tmp_reg (itype, "q");
4996 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
4997 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
4998 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
4999
5000 tt = create_tmp_reg (itype, "tt");
5001 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
5002 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
5003 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
5004
5005 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
5006 gcond *cond_stmt = gimple_build_cond_empty (t);
5007 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5008
5009 second_bb = split_block (entry_bb, cond_stmt)->dest;
5010 gsi = gsi_last_nondebug_bb (second_bb);
5011 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5012
5013 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
5014 GSI_SAME_STMT);
5015 gassign *assign_stmt
5016 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
5017 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5018
5019 third_bb = split_block (second_bb, assign_stmt)->dest;
5020 gsi = gsi_last_nondebug_bb (third_bb);
5021 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5022
5023 if (fd->have_nonctrl_scantemp)
5024 {
5025 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5026 tree controlp = NULL_TREE, controlb = NULL_TREE;
5027 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5028 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5029 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5030 {
5031 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5032 controlb = OMP_CLAUSE_DECL (c);
5033 else
5034 controlp = OMP_CLAUSE_DECL (c);
5035 if (controlb && controlp)
5036 break;
5037 }
5038 gcc_assert (controlp && controlb);
5039 tree cnt = create_tmp_var (sizetype);
5040 gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
5041 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5042 unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
5043 tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
5044 alloc_align, cnt, NULL, true);
5045 tree size = create_tmp_var (sizetype);
5046 expand_omp_build_assign (&gsi, size, sz, false);
5047 tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
5048 size, size_int (16384));
5049 expand_omp_build_assign (&gsi, controlb, cmp);
5050 g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5051 NULL_TREE, NULL_TREE);
5052 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5053 fourth_bb = split_block (third_bb, g)->dest;
5054 gsi = gsi_last_nondebug_bb (fourth_bb);
5055 /* FIXME: Once we have allocators, this should use allocator. */
5056 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
5057 gimple_call_set_lhs (g, controlp);
5058 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5059 expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
5060 &gsi, true);
5061 gsi_prev (&gsi);
5062 g = gsi_stmt (gsi);
5063 fifth_bb = split_block (fourth_bb, g)->dest;
5064 gsi = gsi_last_nondebug_bb (fifth_bb);
5065
5066 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
5067 gimple_call_set_lhs (g, controlp);
5068 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5069 tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
5070 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5071 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5072 && OMP_CLAUSE__SCANTEMP__ALLOC (c))
5073 {
5074 tree tmp = create_tmp_var (sizetype);
5075 tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
5076 g = gimple_build_assign (tmp, MULT_EXPR, cnt,
5077 TYPE_SIZE_UNIT (pointee_type));
5078 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5079 g = gimple_build_call (alloca_decl, 2, tmp,
5080 size_int (TYPE_ALIGN (pointee_type)));
5081 gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
5082 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5083 }
5084
5085 sixth_bb = split_block (fifth_bb, g)->dest;
5086 gsi = gsi_last_nondebug_bb (sixth_bb);
5087 }
5088
5089 t = build2 (MULT_EXPR, itype, q, threadid);
5090 t = build2 (PLUS_EXPR, itype, t, tt);
5091 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5092
5093 t = fold_build2 (PLUS_EXPR, itype, s0, q);
5094 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
5095
5096 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
5097 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5098
5099 /* Remove the GIMPLE_OMP_FOR statement. */
5100 gsi_remove (&gsi, true);
5101
5102 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5103 gsi = gsi_start_bb (seq_start_bb);
5104
5105 tree startvar = fd->loop.v;
5106 tree endvar = NULL_TREE;
5107
5108 if (gimple_omp_for_combined_p (fd->for_stmt))
5109 {
5110 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5111 ? gimple_omp_parallel_clauses (inner_stmt)
5112 : gimple_omp_for_clauses (inner_stmt);
5113 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5114 gcc_assert (innerc);
5115 startvar = OMP_CLAUSE_DECL (innerc);
5116 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5117 OMP_CLAUSE__LOOPTEMP_);
5118 gcc_assert (innerc);
5119 endvar = OMP_CLAUSE_DECL (innerc);
5120 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5121 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5122 {
5123 innerc = find_lastprivate_looptemp (fd, innerc);
5124 if (innerc)
5125 {
5126 /* If needed (distribute parallel for with lastprivate),
5127 propagate down the total number of iterations. */
5128 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5129 fd->loop.n2);
5130 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5131 GSI_CONTINUE_LINKING);
5132 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5133 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5134 }
5135 }
5136 }
5137 t = fold_convert (itype, s0);
5138 t = fold_build2 (MULT_EXPR, itype, t, step);
5139 if (POINTER_TYPE_P (type))
5140 {
5141 t = fold_build_pointer_plus (n1, t);
5142 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5143 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5144 t = fold_convert (signed_type_for (type), t);
5145 }
5146 else
5147 t = fold_build2 (PLUS_EXPR, type, t, n1);
5148 t = fold_convert (TREE_TYPE (startvar), t);
5149 t = force_gimple_operand_gsi (&gsi, t,
5150 DECL_P (startvar)
5151 && TREE_ADDRESSABLE (startvar),
5152 NULL_TREE, false, GSI_CONTINUE_LINKING);
5153 assign_stmt = gimple_build_assign (startvar, t);
5154 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5155 if (cond_var)
5156 {
5157 tree itype = TREE_TYPE (cond_var);
5158 /* For lastprivate(conditional:) itervar, we need some iteration
5159 counter that starts at unsigned non-zero and increases.
5160 Prefer as few IVs as possible, so if we can use startvar
5161 itself, use that, or startvar + constant (those would be
5162 incremented with step), and as last resort use the s0 + 1
5163 incremented by 1. */
5164 if (POINTER_TYPE_P (type)
5165 || TREE_CODE (n1) != INTEGER_CST
5166 || fd->loop.cond_code != LT_EXPR)
5167 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5168 build_int_cst (itype, 1));
5169 else if (tree_int_cst_sgn (n1) == 1)
5170 t = fold_convert (itype, t);
5171 else
5172 {
5173 tree c = fold_convert (itype, n1);
5174 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5175 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5176 }
5177 t = force_gimple_operand_gsi (&gsi, t, false,
5178 NULL_TREE, false, GSI_CONTINUE_LINKING);
5179 assign_stmt = gimple_build_assign (cond_var, t);
5180 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5181 }
5182
5183 t = fold_convert (itype, e0);
5184 t = fold_build2 (MULT_EXPR, itype, t, step);
5185 if (POINTER_TYPE_P (type))
5186 {
5187 t = fold_build_pointer_plus (n1, t);
5188 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5189 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5190 t = fold_convert (signed_type_for (type), t);
5191 }
5192 else
5193 t = fold_build2 (PLUS_EXPR, type, t, n1);
5194 t = fold_convert (TREE_TYPE (startvar), t);
5195 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5196 false, GSI_CONTINUE_LINKING);
5197 if (endvar)
5198 {
5199 assign_stmt = gimple_build_assign (endvar, e);
5200 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5201 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5202 assign_stmt = gimple_build_assign (fd->loop.v, e);
5203 else
5204 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5205 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5206 }
5207 /* Handle linear clause adjustments. */
5208 tree itercnt = NULL_TREE;
5209 tree *nonrect_bounds = NULL;
5210 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5211 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5212 c; c = OMP_CLAUSE_CHAIN (c))
5213 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5214 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5215 {
5216 tree d = OMP_CLAUSE_DECL (c);
5217 bool is_ref = omp_is_reference (d);
5218 tree t = d, a, dest;
5219 if (is_ref)
5220 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5221 if (itercnt == NULL_TREE)
5222 {
5223 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5224 {
5225 itercnt = fold_build2 (MINUS_EXPR, itype,
5226 fold_convert (itype, n1),
5227 fold_convert (itype, fd->loop.n1));
5228 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
5229 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
5230 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5231 NULL_TREE, false,
5232 GSI_CONTINUE_LINKING);
5233 }
5234 else
5235 itercnt = s0;
5236 }
5237 tree type = TREE_TYPE (t);
5238 if (POINTER_TYPE_P (type))
5239 type = sizetype;
5240 a = fold_build2 (MULT_EXPR, type,
5241 fold_convert (type, itercnt),
5242 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5243 dest = unshare_expr (t);
5244 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5245 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
5246 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5247 false, GSI_CONTINUE_LINKING);
5248 expand_omp_build_assign (&gsi, dest, t, true);
5249 }
5250 if (fd->collapse > 1)
5251 {
5252 if (fd->non_rect)
5253 {
5254 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
5255 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
5256 }
5257 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
5258 startvar);
5259 }
5260
5261 if (!broken_loop)
5262 {
5263 /* The code controlling the sequential loop replaces the
5264 GIMPLE_OMP_CONTINUE. */
5265 gsi = gsi_last_nondebug_bb (cont_bb);
5266 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5267 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5268 vmain = gimple_omp_continue_control_use (cont_stmt);
5269 vback = gimple_omp_continue_control_def (cont_stmt);
5270
5271 if (cond_var)
5272 {
5273 tree itype = TREE_TYPE (cond_var);
5274 tree t2;
5275 if (POINTER_TYPE_P (type)
5276 || TREE_CODE (n1) != INTEGER_CST
5277 || fd->loop.cond_code != LT_EXPR)
5278 t2 = build_int_cst (itype, 1);
5279 else
5280 t2 = fold_convert (itype, step);
5281 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5282 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5283 NULL_TREE, true, GSI_SAME_STMT);
5284 assign_stmt = gimple_build_assign (cond_var, t2);
5285 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5286 }
5287
5288 if (!gimple_omp_for_combined_p (fd->for_stmt))
5289 {
5290 if (POINTER_TYPE_P (type))
5291 t = fold_build_pointer_plus (vmain, step);
5292 else
5293 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5294 t = force_gimple_operand_gsi (&gsi, t,
5295 DECL_P (vback)
5296 && TREE_ADDRESSABLE (vback),
5297 NULL_TREE, true, GSI_SAME_STMT);
5298 assign_stmt = gimple_build_assign (vback, t);
5299 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5300
5301 t = build2 (fd->loop.cond_code, boolean_type_node,
5302 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5303 ? t : vback, e);
5304 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5305 }
5306
5307 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5308 gsi_remove (&gsi, true);
5309
5310 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5311 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
5312 cont_bb, body_bb);
5313 }
5314
5315 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
5316 gsi = gsi_last_nondebug_bb (exit_bb);
5317 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
5318 {
5319 t = gimple_omp_return_lhs (gsi_stmt (gsi));
5320 if (fd->have_reductemp
5321 || ((fd->have_pointer_condtemp || fd->have_scantemp)
5322 && !fd->have_nonctrl_scantemp))
5323 {
5324 tree fn;
5325 if (t)
5326 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
5327 else
5328 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
5329 gcall *g = gimple_build_call (fn, 0);
5330 if (t)
5331 {
5332 gimple_call_set_lhs (g, t);
5333 if (fd->have_reductemp)
5334 gsi_insert_after (&gsi, gimple_build_assign (reductions,
5335 NOP_EXPR, t),
5336 GSI_SAME_STMT);
5337 }
5338 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5339 }
5340 else
5341 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
5342 }
5343 else if ((fd->have_pointer_condtemp || fd->have_scantemp)
5344 && !fd->have_nonctrl_scantemp)
5345 {
5346 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
5347 gcall *g = gimple_build_call (fn, 0);
5348 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
5349 }
5350 if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
5351 {
5352 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5353 tree controlp = NULL_TREE, controlb = NULL_TREE;
5354 for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
5355 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
5356 && OMP_CLAUSE__SCANTEMP__CONTROL (c))
5357 {
5358 if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
5359 controlb = OMP_CLAUSE_DECL (c);
5360 else
5361 controlp = OMP_CLAUSE_DECL (c);
5362 if (controlb && controlp)
5363 break;
5364 }
5365 gcc_assert (controlp && controlb);
5366 gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
5367 NULL_TREE, NULL_TREE);
5368 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5369 exit1_bb = split_block (exit_bb, g)->dest;
5370 gsi = gsi_after_labels (exit1_bb);
5371 g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
5372 controlp);
5373 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5374 exit2_bb = split_block (exit1_bb, g)->dest;
5375 gsi = gsi_after_labels (exit2_bb);
5376 g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
5377 controlp);
5378 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
5379 exit3_bb = split_block (exit2_bb, g)->dest;
5380 gsi = gsi_after_labels (exit3_bb);
5381 }
5382 gsi_remove (&gsi, true);
5383
5384 /* Connect all the blocks. */
5385 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
5386 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
5387 ep = find_edge (entry_bb, second_bb);
5388 ep->flags = EDGE_TRUE_VALUE;
5389 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
5390 if (fourth_bb)
5391 {
5392 ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
5393 ep->probability
5394 = profile_probability::guessed_always ().apply_scale (1, 2);
5395 ep = find_edge (third_bb, fourth_bb);
5396 ep->flags = EDGE_TRUE_VALUE;
5397 ep->probability
5398 = profile_probability::guessed_always ().apply_scale (1, 2);
5399 ep = find_edge (fourth_bb, fifth_bb);
5400 redirect_edge_and_branch (ep, sixth_bb);
5401 }
5402 else
5403 sixth_bb = third_bb;
5404 find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
5405 find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
5406 if (exit1_bb)
5407 {
5408 ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
5409 ep->probability
5410 = profile_probability::guessed_always ().apply_scale (1, 2);
5411 ep = find_edge (exit_bb, exit1_bb);
5412 ep->flags = EDGE_TRUE_VALUE;
5413 ep->probability
5414 = profile_probability::guessed_always ().apply_scale (1, 2);
5415 ep = find_edge (exit1_bb, exit2_bb);
5416 redirect_edge_and_branch (ep, exit3_bb);
5417 }
5418
5419 if (!broken_loop)
5420 {
5421 ep = find_edge (cont_bb, body_bb);
5422 if (ep == NULL)
5423 {
5424 ep = BRANCH_EDGE (cont_bb);
5425 gcc_assert (single_succ (ep->dest) == body_bb);
5426 }
5427 if (gimple_omp_for_combined_p (fd->for_stmt))
5428 {
5429 remove_edge (ep);
5430 ep = NULL;
5431 }
5432 else if (fd->collapse > 1)
5433 {
5434 remove_edge (ep);
5435 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5436 }
5437 else
5438 ep->flags = EDGE_TRUE_VALUE;
5439 find_edge (cont_bb, fin_bb)->flags
5440 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5441 }
5442
5443 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
5444 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
5445 if (fourth_bb)
5446 {
5447 set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
5448 set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
5449 }
5450 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
5451
5452 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5453 recompute_dominator (CDI_DOMINATORS, body_bb));
5454 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5455 recompute_dominator (CDI_DOMINATORS, fin_bb));
5456 if (exit1_bb)
5457 {
5458 set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
5459 set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
5460 }
5461
5462 class loop *loop = body_bb->loop_father;
5463 if (loop != entry_bb->loop_father)
5464 {
5465 gcc_assert (broken_loop || loop->header == body_bb);
5466 gcc_assert (broken_loop
5467 || loop->latch == region->cont
5468 || single_pred (loop->latch) == region->cont);
5469 return;
5470 }
5471
5472 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5473 {
5474 loop = alloc_loop ();
5475 loop->header = body_bb;
5476 if (collapse_bb == NULL)
5477 loop->latch = cont_bb;
5478 add_loop (loop, body_bb->loop_father);
5479 }
5480 }
5481
5482 /* Return phi in E->DEST with ARG on edge E. */
5483
5484 static gphi *
5485 find_phi_with_arg_on_edge (tree arg, edge e)
5486 {
5487 basic_block bb = e->dest;
5488
5489 for (gphi_iterator gpi = gsi_start_phis (bb);
5490 !gsi_end_p (gpi);
5491 gsi_next (&gpi))
5492 {
5493 gphi *phi = gpi.phi ();
5494 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
5495 return phi;
5496 }
5497
5498 return NULL;
5499 }
5500
5501 /* A subroutine of expand_omp_for. Generate code for a parallel
5502 loop with static schedule and a specified chunk size. Given
5503 parameters:
5504
5505 for (V = N1; V cond N2; V += STEP) BODY;
5506
5507 where COND is "<" or ">", we generate pseudocode
5508
5509 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
5510 if (cond is <)
5511 adj = STEP - 1;
5512 else
5513 adj = STEP + 1;
5514 if ((__typeof (V)) -1 > 0 && cond is >)
5515 n = -(adj + N2 - N1) / -STEP;
5516 else
5517 n = (adj + N2 - N1) / STEP;
5518 trip = 0;
5519 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
5520 here so that V is defined
5521 if the loop is not entered
5522 L0:
5523 s0 = (trip * nthreads + threadid) * CHUNK;
5524 e0 = min (s0 + CHUNK, n);
5525 if (s0 < n) goto L1; else goto L4;
5526 L1:
5527 V = s0 * STEP + N1;
5528 e = e0 * STEP + N1;
5529 L2:
5530 BODY;
5531 V += STEP;
5532 if (V cond e) goto L2; else goto L3;
5533 L3:
5534 trip += 1;
5535 goto L0;
5536 L4:
5537 */
5538
5539 static void
5540 expand_omp_for_static_chunk (struct omp_region *region,
5541 struct omp_for_data *fd, gimple *inner_stmt)
5542 {
5543 tree n, s0, e0, e, t;
5544 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
5545 tree type, itype, vmain, vback, vextra;
5546 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
5547 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
5548 gimple_stmt_iterator gsi, gsip;
5549 edge se;
5550 bool broken_loop = region->cont == NULL;
5551 tree *counts = NULL;
5552 tree n1, n2, step;
5553 tree reductions = NULL_TREE;
5554 tree cond_var = NULL_TREE, condtemp = NULL_TREE;
5555
5556 itype = type = TREE_TYPE (fd->loop.v);
5557 if (POINTER_TYPE_P (type))
5558 itype = signed_type_for (type);
5559
5560 entry_bb = region->entry;
5561 se = split_block (entry_bb, last_stmt (entry_bb));
5562 entry_bb = se->src;
5563 iter_part_bb = se->dest;
5564 cont_bb = region->cont;
5565 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
5566 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
5567 gcc_assert (broken_loop
5568 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
5569 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
5570 body_bb = single_succ (seq_start_bb);
5571 if (!broken_loop)
5572 {
5573 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
5574 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
5575 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5576 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
5577 }
5578 exit_bb = region->exit;
5579
5580 /* Trip and adjustment setup goes in ENTRY_BB. */
5581 gsi = gsi_last_nondebug_bb (entry_bb);
5582 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5583 gsip = gsi;
5584 gsi_prev (&gsip);
5585
5586 if (fd->collapse > 1)
5587 {
5588 int first_zero_iter = -1, dummy = -1;
5589 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5590
5591 counts = XALLOCAVEC (tree, fd->collapse);
5592 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5593 fin_bb, first_zero_iter,
5594 dummy_bb, dummy, l2_dom_bb);
5595 t = NULL_TREE;
5596 }
5597 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
5598 t = integer_one_node;
5599 else
5600 t = fold_binary (fd->loop.cond_code, boolean_type_node,
5601 fold_convert (type, fd->loop.n1),
5602 fold_convert (type, fd->loop.n2));
5603 if (fd->collapse == 1
5604 && TYPE_UNSIGNED (type)
5605 && (t == NULL_TREE || !integer_onep (t)))
5606 {
5607 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
5608 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
5609 true, GSI_SAME_STMT);
5610 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
5611 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
5612 true, GSI_SAME_STMT);
5613 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
5614 NULL_TREE, NULL_TREE);
5615 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
5616 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
5617 expand_omp_regimplify_p, NULL, NULL)
5618 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
5619 expand_omp_regimplify_p, NULL, NULL))
5620 {
5621 gsi = gsi_for_stmt (cond_stmt);
5622 gimple_regimplify_operands (cond_stmt, &gsi);
5623 }
5624 se = split_block (entry_bb, cond_stmt);
5625 se->flags = EDGE_TRUE_VALUE;
5626 entry_bb = se->dest;
5627 se->probability = profile_probability::very_likely ();
5628 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
5629 se->probability = profile_probability::very_unlikely ();
5630 if (gimple_in_ssa_p (cfun))
5631 {
5632 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
5633 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
5634 !gsi_end_p (gpi); gsi_next (&gpi))
5635 {
5636 gphi *phi = gpi.phi ();
5637 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
5638 se, UNKNOWN_LOCATION);
5639 }
5640 }
5641 gsi = gsi_last_bb (entry_bb);
5642 }
5643
5644 if (fd->lastprivate_conditional)
5645 {
5646 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5647 tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
5648 if (fd->have_pointer_condtemp)
5649 condtemp = OMP_CLAUSE_DECL (c);
5650 c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
5651 cond_var = OMP_CLAUSE_DECL (c);
5652 }
5653 if (fd->have_reductemp || fd->have_pointer_condtemp)
5654 {
5655 tree t1 = build_int_cst (long_integer_type_node, 0);
5656 tree t2 = build_int_cst (long_integer_type_node, 1);
5657 tree t3 = build_int_cstu (long_integer_type_node,
5658 (HOST_WIDE_INT_1U << 31) + 1);
5659 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
5660 gimple_stmt_iterator gsi2 = gsi_none ();
5661 gimple *g = NULL;
5662 tree mem = null_pointer_node, memv = NULL_TREE;
5663 if (fd->have_reductemp)
5664 {
5665 tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
5666 reductions = OMP_CLAUSE_DECL (c);
5667 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
5668 g = SSA_NAME_DEF_STMT (reductions);
5669 reductions = gimple_assign_rhs1 (g);
5670 OMP_CLAUSE_DECL (c) = reductions;
5671 gsi2 = gsi_for_stmt (g);
5672 }
5673 else
5674 {
5675 if (gsi_end_p (gsip))
5676 gsi2 = gsi_after_labels (region->entry);
5677 else
5678 gsi2 = gsip;
5679 reductions = null_pointer_node;
5680 }
5681 if (fd->have_pointer_condtemp)
5682 {
5683 tree type = TREE_TYPE (condtemp);
5684 memv = create_tmp_var (type);
5685 TREE_ADDRESSABLE (memv) = 1;
5686 unsigned HOST_WIDE_INT sz
5687 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
5688 sz *= fd->lastprivate_conditional;
5689 expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
5690 false);
5691 mem = build_fold_addr_expr (memv);
5692 }
5693 tree t
5694 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
5695 9, t1, t2, t2, t3, t1, null_pointer_node,
5696 null_pointer_node, reductions, mem);
5697 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
5698 true, GSI_SAME_STMT);
5699 if (fd->have_pointer_condtemp)
5700 expand_omp_build_assign (&gsi2, condtemp, memv, false);
5701 if (fd->have_reductemp)
5702 {
5703 gsi_remove (&gsi2, true);
5704 release_ssa_name (gimple_assign_lhs (g));
5705 }
5706 }
5707 switch (gimple_omp_for_kind (fd->for_stmt))
5708 {
5709 case GF_OMP_FOR_KIND_FOR:
5710 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
5711 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
5712 break;
5713 case GF_OMP_FOR_KIND_DISTRIBUTE:
5714 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
5715 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
5716 break;
5717 default:
5718 gcc_unreachable ();
5719 }
5720 nthreads = build_call_expr (nthreads, 0);
5721 nthreads = fold_convert (itype, nthreads);
5722 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
5723 true, GSI_SAME_STMT);
5724 threadid = build_call_expr (threadid, 0);
5725 threadid = fold_convert (itype, threadid);
5726 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
5727 true, GSI_SAME_STMT);
5728
5729 n1 = fd->loop.n1;
5730 n2 = fd->loop.n2;
5731 step = fd->loop.step;
5732 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5733 {
5734 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5735 OMP_CLAUSE__LOOPTEMP_);
5736 gcc_assert (innerc);
5737 n1 = OMP_CLAUSE_DECL (innerc);
5738 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5739 OMP_CLAUSE__LOOPTEMP_);
5740 gcc_assert (innerc);
5741 n2 = OMP_CLAUSE_DECL (innerc);
5742 }
5743 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5744 true, NULL_TREE, true, GSI_SAME_STMT);
5745 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5746 true, NULL_TREE, true, GSI_SAME_STMT);
5747 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5748 true, NULL_TREE, true, GSI_SAME_STMT);
5749 tree chunk_size = fold_convert (itype, fd->chunk_size);
5750 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
5751 chunk_size
5752 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
5753 GSI_SAME_STMT);
5754
5755 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
5756 t = fold_build2 (PLUS_EXPR, itype, step, t);
5757 t = fold_build2 (PLUS_EXPR, itype, t, n2);
5758 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
5759 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
5760 t = fold_build2 (TRUNC_DIV_EXPR, itype,
5761 fold_build1 (NEGATE_EXPR, itype, t),
5762 fold_build1 (NEGATE_EXPR, itype, step));
5763 else
5764 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
5765 t = fold_convert (itype, t);
5766 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5767 true, GSI_SAME_STMT);
5768
5769 trip_var = create_tmp_reg (itype, ".trip");
5770 if (gimple_in_ssa_p (cfun))
5771 {
5772 trip_init = make_ssa_name (trip_var);
5773 trip_main = make_ssa_name (trip_var);
5774 trip_back = make_ssa_name (trip_var);
5775 }
5776 else
5777 {
5778 trip_init = trip_var;
5779 trip_main = trip_var;
5780 trip_back = trip_var;
5781 }
5782
5783 gassign *assign_stmt
5784 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
5785 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5786
5787 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
5788 t = fold_build2 (MULT_EXPR, itype, t, step);
5789 if (POINTER_TYPE_P (type))
5790 t = fold_build_pointer_plus (n1, t);
5791 else
5792 t = fold_build2 (PLUS_EXPR, type, t, n1);
5793 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5794 true, GSI_SAME_STMT);
5795
5796 /* Remove the GIMPLE_OMP_FOR. */
5797 gsi_remove (&gsi, true);
5798
5799 gimple_stmt_iterator gsif = gsi;
5800
5801 /* Iteration space partitioning goes in ITER_PART_BB. */
5802 gsi = gsi_last_bb (iter_part_bb);
5803
5804 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
5805 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
5806 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
5807 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5808 false, GSI_CONTINUE_LINKING);
5809
5810 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
5811 t = fold_build2 (MIN_EXPR, itype, t, n);
5812 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5813 false, GSI_CONTINUE_LINKING);
5814
5815 t = build2 (LT_EXPR, boolean_type_node, s0, n);
5816 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
5817
5818 /* Setup code for sequential iteration goes in SEQ_START_BB. */
5819 gsi = gsi_start_bb (seq_start_bb);
5820
5821 tree startvar = fd->loop.v;
5822 tree endvar = NULL_TREE;
5823
5824 if (gimple_omp_for_combined_p (fd->for_stmt))
5825 {
5826 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
5827 ? gimple_omp_parallel_clauses (inner_stmt)
5828 : gimple_omp_for_clauses (inner_stmt);
5829 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5830 gcc_assert (innerc);
5831 startvar = OMP_CLAUSE_DECL (innerc);
5832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5833 OMP_CLAUSE__LOOPTEMP_);
5834 gcc_assert (innerc);
5835 endvar = OMP_CLAUSE_DECL (innerc);
5836 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
5837 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
5838 {
5839 innerc = find_lastprivate_looptemp (fd, innerc);
5840 if (innerc)
5841 {
5842 /* If needed (distribute parallel for with lastprivate),
5843 propagate down the total number of iterations. */
5844 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
5845 fd->loop.n2);
5846 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
5847 GSI_CONTINUE_LINKING);
5848 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5849 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5850 }
5851 }
5852 }
5853
5854 t = fold_convert (itype, s0);
5855 t = fold_build2 (MULT_EXPR, itype, t, step);
5856 if (POINTER_TYPE_P (type))
5857 {
5858 t = fold_build_pointer_plus (n1, t);
5859 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5860 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5861 t = fold_convert (signed_type_for (type), t);
5862 }
5863 else
5864 t = fold_build2 (PLUS_EXPR, type, t, n1);
5865 t = fold_convert (TREE_TYPE (startvar), t);
5866 t = force_gimple_operand_gsi (&gsi, t,
5867 DECL_P (startvar)
5868 && TREE_ADDRESSABLE (startvar),
5869 NULL_TREE, false, GSI_CONTINUE_LINKING);
5870 assign_stmt = gimple_build_assign (startvar, t);
5871 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5872 if (cond_var)
5873 {
5874 tree itype = TREE_TYPE (cond_var);
5875 /* For lastprivate(conditional:) itervar, we need some iteration
5876 counter that starts at unsigned non-zero and increases.
5877 Prefer as few IVs as possible, so if we can use startvar
5878 itself, use that, or startvar + constant (those would be
5879 incremented with step), and as last resort use the s0 + 1
5880 incremented by 1. */
5881 if (POINTER_TYPE_P (type)
5882 || TREE_CODE (n1) != INTEGER_CST
5883 || fd->loop.cond_code != LT_EXPR)
5884 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
5885 build_int_cst (itype, 1));
5886 else if (tree_int_cst_sgn (n1) == 1)
5887 t = fold_convert (itype, t);
5888 else
5889 {
5890 tree c = fold_convert (itype, n1);
5891 c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
5892 t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
5893 }
5894 t = force_gimple_operand_gsi (&gsi, t, false,
5895 NULL_TREE, false, GSI_CONTINUE_LINKING);
5896 assign_stmt = gimple_build_assign (cond_var, t);
5897 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5898 }
5899
5900 t = fold_convert (itype, e0);
5901 t = fold_build2 (MULT_EXPR, itype, t, step);
5902 if (POINTER_TYPE_P (type))
5903 {
5904 t = fold_build_pointer_plus (n1, t);
5905 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
5906 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
5907 t = fold_convert (signed_type_for (type), t);
5908 }
5909 else
5910 t = fold_build2 (PLUS_EXPR, type, t, n1);
5911 t = fold_convert (TREE_TYPE (startvar), t);
5912 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5913 false, GSI_CONTINUE_LINKING);
5914 if (endvar)
5915 {
5916 assign_stmt = gimple_build_assign (endvar, e);
5917 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5918 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5919 assign_stmt = gimple_build_assign (fd->loop.v, e);
5920 else
5921 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5922 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5923 }
5924 /* Handle linear clause adjustments. */
5925 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
5926 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
5927 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
5928 c; c = OMP_CLAUSE_CHAIN (c))
5929 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
5930 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
5931 {
5932 tree d = OMP_CLAUSE_DECL (c);
5933 bool is_ref = omp_is_reference (d);
5934 tree t = d, a, dest;
5935 if (is_ref)
5936 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
5937 tree type = TREE_TYPE (t);
5938 if (POINTER_TYPE_P (type))
5939 type = sizetype;
5940 dest = unshare_expr (t);
5941 tree v = create_tmp_var (TREE_TYPE (t), NULL);
5942 expand_omp_build_assign (&gsif, v, t);
5943 if (itercnt == NULL_TREE)
5944 {
5945 if (gimple_omp_for_combined_into_p (fd->for_stmt))
5946 {
5947 itercntbias
5948 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
5949 fold_convert (itype, fd->loop.n1));
5950 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
5951 itercntbias, step);
5952 itercntbias
5953 = force_gimple_operand_gsi (&gsif, itercntbias, true,
5954 NULL_TREE, true,
5955 GSI_SAME_STMT);
5956 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
5957 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
5958 NULL_TREE, false,
5959 GSI_CONTINUE_LINKING);
5960 }
5961 else
5962 itercnt = s0;
5963 }
5964 a = fold_build2 (MULT_EXPR, type,
5965 fold_convert (type, itercnt),
5966 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
5967 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
5968 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
5969 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5970 false, GSI_CONTINUE_LINKING);
5971 expand_omp_build_assign (&gsi, dest, t, true);
5972 }
5973 if (fd->collapse > 1)
5974 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
5975
5976 if (!broken_loop)
5977 {
5978 /* The code controlling the sequential loop goes in CONT_BB,
5979 replacing the GIMPLE_OMP_CONTINUE. */
5980 gsi = gsi_last_nondebug_bb (cont_bb);
5981 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5982 vmain = gimple_omp_continue_control_use (cont_stmt);
5983 vback = gimple_omp_continue_control_def (cont_stmt);
5984
5985 if (cond_var)
5986 {
5987 tree itype = TREE_TYPE (cond_var);
5988 tree t2;
5989 if (POINTER_TYPE_P (type)
5990 || TREE_CODE (n1) != INTEGER_CST
5991 || fd->loop.cond_code != LT_EXPR)
5992 t2 = build_int_cst (itype, 1);
5993 else
5994 t2 = fold_convert (itype, step);
5995 t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
5996 t2 = force_gimple_operand_gsi (&gsi, t2, false,
5997 NULL_TREE, true, GSI_SAME_STMT);
5998 assign_stmt = gimple_build_assign (cond_var, t2);
5999 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6000 }
6001
6002 if (!gimple_omp_for_combined_p (fd->for_stmt))
6003 {
6004 if (POINTER_TYPE_P (type))
6005 t = fold_build_pointer_plus (vmain, step);
6006 else
6007 t = fold_build2 (PLUS_EXPR, type, vmain, step);
6008 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
6009 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6010 true, GSI_SAME_STMT);
6011 assign_stmt = gimple_build_assign (vback, t);
6012 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
6013
6014 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
6015 t = build2 (EQ_EXPR, boolean_type_node,
6016 build_int_cst (itype, 0),
6017 build_int_cst (itype, 1));
6018 else
6019 t = build2 (fd->loop.cond_code, boolean_type_node,
6020 DECL_P (vback) && TREE_ADDRESSABLE (vback)
6021 ? t : vback, e);
6022 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
6023 }
6024
6025 /* Remove GIMPLE_OMP_CONTINUE. */
6026 gsi_remove (&gsi, true);
6027
6028 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
6029 collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
6030
6031 /* Trip update code goes into TRIP_UPDATE_BB. */
6032 gsi = gsi_start_bb (trip_update_bb);
6033
6034 t = build_int_cst (itype, 1);
6035 t = build2 (PLUS_EXPR, itype, trip_main, t);
6036 assign_stmt = gimple_build_assign (trip_back, t);
6037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
6038 }
6039
6040 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
6041 gsi = gsi_last_nondebug_bb (exit_bb);
6042 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
6043 {
6044 t = gimple_omp_return_lhs (gsi_stmt (gsi));
6045 if (fd->have_reductemp || fd->have_pointer_condtemp)
6046 {
6047 tree fn;
6048 if (t)
6049 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
6050 else
6051 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
6052 gcall *g = gimple_build_call (fn, 0);
6053 if (t)
6054 {
6055 gimple_call_set_lhs (g, t);
6056 if (fd->have_reductemp)
6057 gsi_insert_after (&gsi, gimple_build_assign (reductions,
6058 NOP_EXPR, t),
6059 GSI_SAME_STMT);
6060 }
6061 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6062 }
6063 else
6064 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
6065 }
6066 else if (fd->have_pointer_condtemp)
6067 {
6068 tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
6069 gcall *g = gimple_build_call (fn, 0);
6070 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
6071 }
6072 gsi_remove (&gsi, true);
6073
6074 /* Connect the new blocks. */
6075 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
6076 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
6077
6078 if (!broken_loop)
6079 {
6080 se = find_edge (cont_bb, body_bb);
6081 if (se == NULL)
6082 {
6083 se = BRANCH_EDGE (cont_bb);
6084 gcc_assert (single_succ (se->dest) == body_bb);
6085 }
6086 if (gimple_omp_for_combined_p (fd->for_stmt))
6087 {
6088 remove_edge (se);
6089 se = NULL;
6090 }
6091 else if (fd->collapse > 1)
6092 {
6093 remove_edge (se);
6094 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
6095 }
6096 else
6097 se->flags = EDGE_TRUE_VALUE;
6098 find_edge (cont_bb, trip_update_bb)->flags
6099 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
6100
6101 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
6102 iter_part_bb);
6103 }
6104
6105 if (gimple_in_ssa_p (cfun))
6106 {
6107 gphi_iterator psi;
6108 gphi *phi;
6109 edge re, ene;
6110 edge_var_map *vm;
6111 size_t i;
6112
6113 gcc_assert (fd->collapse == 1 && !broken_loop);
6114
6115 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
6116 remove arguments of the phi nodes in fin_bb. We need to create
6117 appropriate phi nodes in iter_part_bb instead. */
6118 se = find_edge (iter_part_bb, fin_bb);
6119 re = single_succ_edge (trip_update_bb);
6120 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
6121 ene = single_succ_edge (entry_bb);
6122
6123 psi = gsi_start_phis (fin_bb);
6124 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
6125 gsi_next (&psi), ++i)
6126 {
6127 gphi *nphi;
6128 location_t locus;
6129
6130 phi = psi.phi ();
6131 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
6132 redirect_edge_var_map_def (vm), 0))
6133 continue;
6134
6135 t = gimple_phi_result (phi);
6136 gcc_assert (t == redirect_edge_var_map_result (vm));
6137
6138 if (!single_pred_p (fin_bb))
6139 t = copy_ssa_name (t, phi);
6140
6141 nphi = create_phi_node (t, iter_part_bb);
6142
6143 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
6144 locus = gimple_phi_arg_location_from_edge (phi, se);
6145
6146 /* A special case -- fd->loop.v is not yet computed in
6147 iter_part_bb, we need to use vextra instead. */
6148 if (t == fd->loop.v)
6149 t = vextra;
6150 add_phi_arg (nphi, t, ene, locus);
6151 locus = redirect_edge_var_map_location (vm);
6152 tree back_arg = redirect_edge_var_map_def (vm);
6153 add_phi_arg (nphi, back_arg, re, locus);
6154 edge ce = find_edge (cont_bb, body_bb);
6155 if (ce == NULL)
6156 {
6157 ce = BRANCH_EDGE (cont_bb);
6158 gcc_assert (single_succ (ce->dest) == body_bb);
6159 ce = single_succ_edge (ce->dest);
6160 }
6161 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
6162 gcc_assert (inner_loop_phi != NULL);
6163 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
6164 find_edge (seq_start_bb, body_bb), locus);
6165
6166 if (!single_pred_p (fin_bb))
6167 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
6168 }
6169 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
6170 redirect_edge_var_map_clear (re);
6171 if (single_pred_p (fin_bb))
6172 while (1)
6173 {
6174 psi = gsi_start_phis (fin_bb);
6175 if (gsi_end_p (psi))
6176 break;
6177 remove_phi_node (&psi, false);
6178 }
6179
6180 /* Make phi node for trip. */
6181 phi = create_phi_node (trip_main, iter_part_bb);
6182 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
6183 UNKNOWN_LOCATION);
6184 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
6185 UNKNOWN_LOCATION);
6186 }
6187
6188 if (!broken_loop)
6189 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
6190 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
6191 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
6192 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
6193 recompute_dominator (CDI_DOMINATORS, fin_bb));
6194 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
6195 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
6196 set_immediate_dominator (CDI_DOMINATORS, body_bb,
6197 recompute_dominator (CDI_DOMINATORS, body_bb));
6198
6199 if (!broken_loop)
6200 {
6201 class loop *loop = body_bb->loop_father;
6202 class loop *trip_loop = alloc_loop ();
6203 trip_loop->header = iter_part_bb;
6204 trip_loop->latch = trip_update_bb;
6205 add_loop (trip_loop, iter_part_bb->loop_father);
6206
6207 if (loop != entry_bb->loop_father)
6208 {
6209 gcc_assert (loop->header == body_bb);
6210 gcc_assert (loop->latch == region->cont
6211 || single_pred (loop->latch) == region->cont);
6212 trip_loop->inner = loop;
6213 return;
6214 }
6215
6216 if (!gimple_omp_for_combined_p (fd->for_stmt))
6217 {
6218 loop = alloc_loop ();
6219 loop->header = body_bb;
6220 if (collapse_bb == NULL)
6221 loop->latch = cont_bb;
6222 add_loop (loop, trip_loop);
6223 }
6224 }
6225 }
6226
6227 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
6228 loop. Given parameters:
6229
6230 for (V = N1; V cond N2; V += STEP) BODY;
6231
6232 where COND is "<" or ">", we generate pseudocode
6233
6234 V = N1;
6235 goto L1;
6236 L0:
6237 BODY;
6238 V += STEP;
6239 L1:
6240 if (V cond N2) goto L0; else goto L2;
6241 L2:
6242
6243 For collapsed loops, emit the outer loops as scalar
6244 and only try to vectorize the innermost loop. */
6245
6246 static void
6247 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
6248 {
6249 tree type, t;
6250 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
6251 gimple_stmt_iterator gsi;
6252 gimple *stmt;
6253 gcond *cond_stmt;
6254 bool broken_loop = region->cont == NULL;
6255 edge e, ne;
6256 tree *counts = NULL;
6257 int i;
6258 int safelen_int = INT_MAX;
6259 bool dont_vectorize = false;
6260 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6261 OMP_CLAUSE_SAFELEN);
6262 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6263 OMP_CLAUSE__SIMDUID_);
6264 tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6265 OMP_CLAUSE_IF);
6266 tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6267 OMP_CLAUSE_SIMDLEN);
6268 tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6269 OMP_CLAUSE__CONDTEMP_);
6270 tree n1, n2;
6271 tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
6272
6273 if (safelen)
6274 {
6275 poly_uint64 val;
6276 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
6277 if (!poly_int_tree_p (safelen, &val))
6278 safelen_int = 0;
6279 else
6280 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
6281 if (safelen_int == 1)
6282 safelen_int = 0;
6283 }
6284 if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
6285 || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
6286 {
6287 safelen_int = 0;
6288 dont_vectorize = true;
6289 }
6290 type = TREE_TYPE (fd->loop.v);
6291 entry_bb = region->entry;
6292 cont_bb = region->cont;
6293 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
6294 gcc_assert (broken_loop
6295 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
6296 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
6297 if (!broken_loop)
6298 {
6299 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
6300 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
6301 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
6302 l2_bb = BRANCH_EDGE (entry_bb)->dest;
6303 }
6304 else
6305 {
6306 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
6307 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
6308 l2_bb = single_succ (l1_bb);
6309 }
6310 exit_bb = region->exit;
6311 l2_dom_bb = NULL;
6312
6313 gsi = gsi_last_nondebug_bb (entry_bb);
6314
6315 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
6316 /* Not needed in SSA form right now. */
6317 gcc_assert (!gimple_in_ssa_p (cfun));
6318 if (fd->collapse > 1
6319 && (gimple_omp_for_combined_into_p (fd->for_stmt)
6320 || broken_loop))
6321 {
6322 int first_zero_iter = -1, dummy = -1;
6323 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
6324
6325 counts = XALLOCAVEC (tree, fd->collapse);
6326 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
6327 zero_iter_bb, first_zero_iter,
6328 dummy_bb, dummy, l2_dom_bb);
6329 }
6330 if (l2_dom_bb == NULL)
6331 l2_dom_bb = l1_bb;
6332
6333 n1 = fd->loop.n1;
6334 n2 = fd->loop.n2;
6335 if (gimple_omp_for_combined_into_p (fd->for_stmt))
6336 {
6337 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6338 OMP_CLAUSE__LOOPTEMP_);
6339 gcc_assert (innerc);
6340 n1 = OMP_CLAUSE_DECL (innerc);
6341 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
6342 OMP_CLAUSE__LOOPTEMP_);
6343 gcc_assert (innerc);
6344 n2 = OMP_CLAUSE_DECL (innerc);
6345 }
6346 tree step = fd->loop.step;
6347
6348 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
6349 OMP_CLAUSE__SIMT_);
6350 if (is_simt)
6351 {
6352 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
6353 is_simt = safelen_int > 1;
6354 }
6355 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
6356 if (is_simt)
6357 {
6358 simt_lane = create_tmp_var (unsigned_type_node);
6359 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
6360 gimple_call_set_lhs (g, simt_lane);
6361 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6362 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
6363 fold_convert (TREE_TYPE (step), simt_lane));
6364 n1 = fold_convert (type, n1);
6365 if (POINTER_TYPE_P (type))
6366 n1 = fold_build_pointer_plus (n1, offset);
6367 else
6368 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
6369
6370 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
6371 if (fd->collapse > 1)
6372 simt_maxlane = build_one_cst (unsigned_type_node);
6373 else if (safelen_int < omp_max_simt_vf ())
6374 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
6375 tree vf
6376 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
6377 unsigned_type_node, 0);
6378 if (simt_maxlane)
6379 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
6380 vf = fold_convert (TREE_TYPE (step), vf);
6381 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
6382 }
6383
6384 tree n2var = NULL_TREE;
6385 tree n2v = NULL_TREE;
6386 tree *nonrect_bounds = NULL;
6387 tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
6388 if (fd->collapse > 1)
6389 {
6390 if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
6391 {
6392 if (fd->non_rect)
6393 {
6394 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
6395 memset (nonrect_bounds, 0,
6396 sizeof (tree) * (fd->last_nonrect + 1));
6397 }
6398 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6399 gcc_assert (entry_bb == gsi_bb (gsi));
6400 gcc_assert (fd->for_stmt == gsi_stmt (gsi));
6401 gsi_prev (&gsi);
6402 entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
6403 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
6404 NULL, n1);
6405 gsi = gsi_for_stmt (fd->for_stmt);
6406 }
6407 if (broken_loop)
6408 ;
6409 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
6410 {
6411 /* Compute in n2var the limit for the first innermost loop,
6412 i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
6413 where cnt is how many iterations would the loop have if
6414 all further iterations were assigned to the current task. */
6415 n2var = create_tmp_var (type);
6416 i = fd->collapse - 1;
6417 tree itype = TREE_TYPE (fd->loops[i].v);
6418 if (POINTER_TYPE_P (itype))
6419 itype = signed_type_for (itype);
6420 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
6421 ? -1 : 1));
6422 t = fold_build2 (PLUS_EXPR, itype,
6423 fold_convert (itype, fd->loops[i].step), t);
6424 t = fold_build2 (PLUS_EXPR, itype, t,
6425 fold_convert (itype, fd->loops[i].n2));
6426 if (fd->loops[i].m2)
6427 {
6428 tree t2 = fold_convert (itype,
6429 fd->loops[i - fd->loops[i].outer].v);
6430 tree t3 = fold_convert (itype, fd->loops[i].m2);
6431 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6432 t = fold_build2 (PLUS_EXPR, itype, t, t2);
6433 }
6434 t = fold_build2 (MINUS_EXPR, itype, t,
6435 fold_convert (itype, fd->loops[i].v));
6436 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
6437 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6438 fold_build1 (NEGATE_EXPR, itype, t),
6439 fold_build1 (NEGATE_EXPR, itype,
6440 fold_convert (itype,
6441 fd->loops[i].step)));
6442 else
6443 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6444 fold_convert (itype, fd->loops[i].step));
6445 t = fold_convert (type, t);
6446 tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
6447 min_arg1 = create_tmp_var (type);
6448 expand_omp_build_assign (&gsi, min_arg1, t2);
6449 min_arg2 = create_tmp_var (type);
6450 expand_omp_build_assign (&gsi, min_arg2, t);
6451 }
6452 else
6453 {
6454 if (TREE_CODE (n2) == INTEGER_CST)
6455 {
6456 /* Indicate for lastprivate handling that at least one iteration
6457 has been performed, without wasting runtime. */
6458 if (integer_nonzerop (n2))
6459 expand_omp_build_assign (&gsi, fd->loop.v,
6460 fold_convert (type, n2));
6461 else
6462 /* Indicate that no iteration has been performed. */
6463 expand_omp_build_assign (&gsi, fd->loop.v,
6464 build_one_cst (type));
6465 }
6466 else
6467 {
6468 expand_omp_build_assign (&gsi, fd->loop.v,
6469 build_zero_cst (type));
6470 expand_omp_build_assign (&gsi, n2, build_one_cst (type));
6471 }
6472 for (i = 0; i < fd->collapse; i++)
6473 {
6474 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
6475 if (fd->loops[i].m1)
6476 {
6477 tree t2
6478 = fold_convert (TREE_TYPE (t),
6479 fd->loops[i - fd->loops[i].outer].v);
6480 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
6481 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6482 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6483 }
6484 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6485 /* For normal non-combined collapsed loops just initialize
6486 the outermost iterator in the entry_bb. */
6487 if (!broken_loop)
6488 break;
6489 }
6490 }
6491 }
6492 else
6493 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
6494 tree altv = NULL_TREE, altn2 = NULL_TREE;
6495 if (fd->collapse == 1
6496 && !broken_loop
6497 && TREE_CODE (fd->loops[0].step) != INTEGER_CST)
6498 {
6499 /* The vectorizer currently punts on loops with non-constant steps
6500 for the main IV (can't compute number of iterations and gives up
6501 because of that). As for OpenMP loops it is always possible to
6502 compute the number of iterations upfront, use an alternate IV
6503 as the loop iterator:
6504 altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
6505 for (i = n1, altv = 0; altv < altn2; altv++, i += step) */
6506 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6507 expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
6508 tree itype = TREE_TYPE (fd->loop.v);
6509 if (POINTER_TYPE_P (itype))
6510 itype = signed_type_for (itype);
6511 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
6512 t = fold_build2 (PLUS_EXPR, itype,
6513 fold_convert (itype, fd->loop.step), t);
6514 t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
6515 t = fold_build2 (MINUS_EXPR, itype, t,
6516 fold_convert (itype, fd->loop.v));
6517 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
6518 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6519 fold_build1 (NEGATE_EXPR, itype, t),
6520 fold_build1 (NEGATE_EXPR, itype,
6521 fold_convert (itype, fd->loop.step)));
6522 else
6523 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
6524 fold_convert (itype, fd->loop.step));
6525 t = fold_convert (TREE_TYPE (altv), t);
6526 altn2 = create_tmp_var (TREE_TYPE (altv));
6527 expand_omp_build_assign (&gsi, altn2, t);
6528 tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
6529 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6530 true, GSI_SAME_STMT);
6531 t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
6532 gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6533 build_zero_cst (TREE_TYPE (altv)));
6534 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6535 }
6536 else if (fd->collapse > 1
6537 && !broken_loop
6538 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6539 && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
6540 {
6541 altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
6542 altn2 = create_tmp_var (TREE_TYPE (altv));
6543 }
6544 if (cond_var)
6545 {
6546 if (POINTER_TYPE_P (type)
6547 || TREE_CODE (n1) != INTEGER_CST
6548 || fd->loop.cond_code != LT_EXPR
6549 || tree_int_cst_sgn (n1) != 1)
6550 expand_omp_build_assign (&gsi, cond_var,
6551 build_one_cst (TREE_TYPE (cond_var)));
6552 else
6553 expand_omp_build_assign (&gsi, cond_var,
6554 fold_convert (TREE_TYPE (cond_var), n1));
6555 }
6556
6557 /* Remove the GIMPLE_OMP_FOR statement. */
6558 gsi_remove (&gsi, true);
6559
6560 if (!broken_loop)
6561 {
6562 /* Code to control the increment goes in the CONT_BB. */
6563 gsi = gsi_last_nondebug_bb (cont_bb);
6564 stmt = gsi_stmt (gsi);
6565 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
6566
6567 if (fd->collapse == 1
6568 || gimple_omp_for_combined_into_p (fd->for_stmt))
6569 {
6570 if (POINTER_TYPE_P (type))
6571 t = fold_build_pointer_plus (fd->loop.v, step);
6572 else
6573 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6574 expand_omp_build_assign (&gsi, fd->loop.v, t);
6575 }
6576 else if (TREE_CODE (n2) != INTEGER_CST)
6577 expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
6578 if (altv)
6579 {
6580 t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
6581 build_one_cst (TREE_TYPE (altv)));
6582 expand_omp_build_assign (&gsi, altv, t);
6583 }
6584
6585 if (fd->collapse > 1)
6586 {
6587 i = fd->collapse - 1;
6588 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6589 {
6590 t = fold_convert (sizetype, fd->loops[i].step);
6591 t = fold_build_pointer_plus (fd->loops[i].v, t);
6592 }
6593 else
6594 {
6595 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6596 fd->loops[i].step);
6597 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6598 fd->loops[i].v, t);
6599 }
6600 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6601 }
6602 if (cond_var)
6603 {
6604 if (POINTER_TYPE_P (type)
6605 || TREE_CODE (n1) != INTEGER_CST
6606 || fd->loop.cond_code != LT_EXPR
6607 || tree_int_cst_sgn (n1) != 1)
6608 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6609 build_one_cst (TREE_TYPE (cond_var)));
6610 else
6611 t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
6612 fold_convert (TREE_TYPE (cond_var), step));
6613 expand_omp_build_assign (&gsi, cond_var, t);
6614 }
6615
6616 /* Remove GIMPLE_OMP_CONTINUE. */
6617 gsi_remove (&gsi, true);
6618 }
6619
6620 /* Emit the condition in L1_BB. */
6621 gsi = gsi_start_bb (l1_bb);
6622
6623 if (altv)
6624 t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
6625 else if (fd->collapse > 1
6626 && !gimple_omp_for_combined_into_p (fd->for_stmt)
6627 && !broken_loop)
6628 {
6629 i = fd->collapse - 1;
6630 tree itype = TREE_TYPE (fd->loops[i].v);
6631 if (fd->loops[i].m2)
6632 t = n2v = create_tmp_var (itype);
6633 else
6634 t = fold_convert (itype, fd->loops[i].n2);
6635 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6636 false, GSI_CONTINUE_LINKING);
6637 tree v = fd->loops[i].v;
6638 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6639 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6640 false, GSI_CONTINUE_LINKING);
6641 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6642 }
6643 else
6644 {
6645 if (fd->collapse > 1 && !broken_loop)
6646 t = n2var;
6647 else
6648 t = fold_convert (type, n2);
6649 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6650 false, GSI_CONTINUE_LINKING);
6651 tree v = fd->loop.v;
6652 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6653 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6654 false, GSI_CONTINUE_LINKING);
6655 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
6656 }
6657 cond_stmt = gimple_build_cond_empty (t);
6658 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6659 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
6660 NULL, NULL)
6661 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
6662 NULL, NULL))
6663 {
6664 gsi = gsi_for_stmt (cond_stmt);
6665 gimple_regimplify_operands (cond_stmt, &gsi);
6666 }
6667
6668 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
6669 if (is_simt)
6670 {
6671 gsi = gsi_start_bb (l2_bb);
6672 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
6673 if (POINTER_TYPE_P (type))
6674 t = fold_build_pointer_plus (fd->loop.v, step);
6675 else
6676 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
6677 expand_omp_build_assign (&gsi, fd->loop.v, t);
6678 }
6679
6680 /* Remove GIMPLE_OMP_RETURN. */
6681 gsi = gsi_last_nondebug_bb (exit_bb);
6682 gsi_remove (&gsi, true);
6683
6684 /* Connect the new blocks. */
6685 remove_edge (FALLTHRU_EDGE (entry_bb));
6686
6687 if (!broken_loop)
6688 {
6689 remove_edge (BRANCH_EDGE (entry_bb));
6690 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
6691
6692 e = BRANCH_EDGE (l1_bb);
6693 ne = FALLTHRU_EDGE (l1_bb);
6694 e->flags = EDGE_TRUE_VALUE;
6695 }
6696 else
6697 {
6698 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6699
6700 ne = single_succ_edge (l1_bb);
6701 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
6702
6703 }
6704 ne->flags = EDGE_FALSE_VALUE;
6705 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
6706 ne->probability = e->probability.invert ();
6707
6708 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
6709 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
6710
6711 if (simt_maxlane)
6712 {
6713 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
6714 NULL_TREE, NULL_TREE);
6715 gsi = gsi_last_bb (entry_bb);
6716 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
6717 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
6718 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
6719 FALLTHRU_EDGE (entry_bb)->probability
6720 = profile_probability::guessed_always ().apply_scale (7, 8);
6721 BRANCH_EDGE (entry_bb)->probability
6722 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
6723 l2_dom_bb = entry_bb;
6724 }
6725 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
6726
6727 if (!broken_loop && fd->collapse > 1)
6728 {
6729 basic_block last_bb = l1_bb;
6730 basic_block init_bb = NULL;
6731 for (i = fd->collapse - 2; i >= 0; i--)
6732 {
6733 tree nextn2v = NULL_TREE;
6734 if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
6735 e = EDGE_SUCC (last_bb, 0);
6736 else
6737 e = EDGE_SUCC (last_bb, 1);
6738 basic_block bb = split_edge (e);
6739 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
6740 {
6741 t = fold_convert (sizetype, fd->loops[i].step);
6742 t = fold_build_pointer_plus (fd->loops[i].v, t);
6743 }
6744 else
6745 {
6746 t = fold_convert (TREE_TYPE (fd->loops[i].v),
6747 fd->loops[i].step);
6748 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
6749 fd->loops[i].v, t);
6750 }
6751 gsi = gsi_after_labels (bb);
6752 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
6753
6754 bb = split_block (bb, last_stmt (bb))->dest;
6755 gsi = gsi_start_bb (bb);
6756 tree itype = TREE_TYPE (fd->loops[i].v);
6757 if (fd->loops[i].m2)
6758 t = nextn2v = create_tmp_var (itype);
6759 else
6760 t = fold_convert (itype, fd->loops[i].n2);
6761 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
6762 false, GSI_CONTINUE_LINKING);
6763 tree v = fd->loops[i].v;
6764 if (DECL_P (v) && TREE_ADDRESSABLE (v))
6765 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
6766 false, GSI_CONTINUE_LINKING);
6767 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
6768 cond_stmt = gimple_build_cond_empty (t);
6769 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
6770 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
6771 expand_omp_regimplify_p, NULL, NULL)
6772 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
6773 expand_omp_regimplify_p, NULL, NULL))
6774 {
6775 gsi = gsi_for_stmt (cond_stmt);
6776 gimple_regimplify_operands (cond_stmt, &gsi);
6777 }
6778 ne = single_succ_edge (bb);
6779 ne->flags = EDGE_FALSE_VALUE;
6780
6781 init_bb = create_empty_bb (bb);
6782 set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
6783 add_bb_to_loop (init_bb, bb->loop_father);
6784 e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
6785 e->probability
6786 = profile_probability::guessed_always ().apply_scale (7, 8);
6787 ne->probability = e->probability.invert ();
6788
6789 gsi = gsi_after_labels (init_bb);
6790 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6791 fd->loops[i + 1].n1);
6792 if (fd->loops[i + 1].m1)
6793 {
6794 tree t2 = fold_convert (TREE_TYPE (t),
6795 fd->loops[i + 1
6796 - fd->loops[i + 1].outer].v);
6797 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
6798 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6799 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6800 }
6801 expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
6802 if (fd->loops[i + 1].m2)
6803 {
6804 if (i + 2 == fd->collapse && (n2var || altv))
6805 {
6806 gcc_assert (n2v == NULL_TREE);
6807 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
6808 }
6809 t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6810 fd->loops[i + 1].n2);
6811 tree t2 = fold_convert (TREE_TYPE (t),
6812 fd->loops[i + 1
6813 - fd->loops[i + 1].outer].v);
6814 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
6815 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
6816 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
6817 expand_omp_build_assign (&gsi, n2v, t);
6818 }
6819 if (i + 2 == fd->collapse && n2var)
6820 {
6821 /* For composite simd, n2 is the first iteration the current
6822 task shouldn't already handle, so we effectively want to use
6823 for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
6824 as the vectorized loop. Except the vectorizer will not
6825 vectorize that, so instead compute N2VAR as
6826 N2VAR = V + MIN (N2 - V, COUNTS3) and use
6827 for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
6828 as the loop to vectorize. */
6829 tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
6830 if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
6831 {
6832 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
6833 == LT_EXPR ? -1 : 1));
6834 t = fold_build2 (PLUS_EXPR, itype,
6835 fold_convert (itype,
6836 fd->loops[i + 1].step), t);
6837 if (fd->loops[i + 1].m2)
6838 t = fold_build2 (PLUS_EXPR, itype, t, n2v);
6839 else
6840 t = fold_build2 (PLUS_EXPR, itype, t,
6841 fold_convert (itype,
6842 fd->loops[i + 1].n2));
6843 t = fold_build2 (MINUS_EXPR, itype, t,
6844 fold_convert (itype, fd->loops[i + 1].v));
6845 tree step = fold_convert (itype, fd->loops[i + 1].step);
6846 if (TYPE_UNSIGNED (itype)
6847 && fd->loops[i + 1].cond_code == GT_EXPR)
6848 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6849 fold_build1 (NEGATE_EXPR, itype, t),
6850 fold_build1 (NEGATE_EXPR, itype, step));
6851 else
6852 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6853 t = fold_convert (type, t);
6854 }
6855 else
6856 t = counts[i + 1];
6857 expand_omp_build_assign (&gsi, min_arg1, t2);
6858 expand_omp_build_assign (&gsi, min_arg2, t);
6859 e = split_block (init_bb, last_stmt (init_bb));
6860 gsi = gsi_after_labels (e->dest);
6861 init_bb = e->dest;
6862 remove_edge (FALLTHRU_EDGE (entry_bb));
6863 make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
6864 set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
6865 set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
6866 t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
6867 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
6868 expand_omp_build_assign (&gsi, n2var, t);
6869 }
6870 if (i + 2 == fd->collapse && altv)
6871 {
6872 /* The vectorizer currently punts on loops with non-constant
6873 steps for the main IV (can't compute number of iterations
6874 and gives up because of that). As for OpenMP loops it is
6875 always possible to compute the number of iterations upfront,
6876 use an alternate IV as the loop iterator. */
6877 expand_omp_build_assign (&gsi, altv,
6878 build_zero_cst (TREE_TYPE (altv)));
6879 tree itype = TREE_TYPE (fd->loops[i + 1].v);
6880 if (POINTER_TYPE_P (itype))
6881 itype = signed_type_for (itype);
6882 t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
6883 ? -1 : 1));
6884 t = fold_build2 (PLUS_EXPR, itype,
6885 fold_convert (itype, fd->loops[i + 1].step), t);
6886 t = fold_build2 (PLUS_EXPR, itype, t,
6887 fold_convert (itype,
6888 fd->loops[i + 1].m2
6889 ? n2v : fd->loops[i + 1].n2));
6890 t = fold_build2 (MINUS_EXPR, itype, t,
6891 fold_convert (itype, fd->loops[i + 1].v));
6892 tree step = fold_convert (itype, fd->loops[i + 1].step);
6893 if (TYPE_UNSIGNED (itype)
6894 && fd->loops[i + 1].cond_code == GT_EXPR)
6895 t = fold_build2 (TRUNC_DIV_EXPR, itype,
6896 fold_build1 (NEGATE_EXPR, itype, t),
6897 fold_build1 (NEGATE_EXPR, itype, step));
6898 else
6899 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
6900 t = fold_convert (TREE_TYPE (altv), t);
6901 expand_omp_build_assign (&gsi, altn2, t);
6902 tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
6903 fd->loops[i + 1].m2
6904 ? n2v : fd->loops[i + 1].n2);
6905 t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
6906 true, GSI_SAME_STMT);
6907 t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
6908 fd->loops[i + 1].v, t2);
6909 gassign *g
6910 = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
6911 build_zero_cst (TREE_TYPE (altv)));
6912 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
6913 }
6914 n2v = nextn2v;
6915
6916 make_edge (init_bb, last_bb, EDGE_FALLTHRU);
6917 if (!gimple_omp_for_combined_into_p (fd->for_stmt))
6918 {
6919 e = find_edge (entry_bb, last_bb);
6920 redirect_edge_succ (e, bb);
6921 set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
6922 set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
6923 }
6924
6925 last_bb = bb;
6926 }
6927 }
6928 if (!broken_loop)
6929 {
6930 class loop *loop = alloc_loop ();
6931 loop->header = l1_bb;
6932 loop->latch = cont_bb;
6933 add_loop (loop, l1_bb->loop_father);
6934 loop->safelen = safelen_int;
6935 if (simduid)
6936 {
6937 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
6938 cfun->has_simduid_loops = true;
6939 }
6940 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
6941 the loop. */
6942 if ((flag_tree_loop_vectorize
6943 || !global_options_set.x_flag_tree_loop_vectorize)
6944 && flag_tree_loop_optimize
6945 && loop->safelen > 1)
6946 {
6947 loop->force_vectorize = true;
6948 if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
6949 {
6950 unsigned HOST_WIDE_INT v
6951 = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
6952 if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
6953 loop->simdlen = v;
6954 }
6955 cfun->has_force_vectorize_loops = true;
6956 }
6957 else if (dont_vectorize)
6958 loop->dont_vectorize = true;
6959 }
6960 else if (simduid)
6961 cfun->has_simduid_loops = true;
6962 }
6963
6964 /* Taskloop construct is represented after gimplification with
6965 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
6966 in between them. This routine expands the outer GIMPLE_OMP_FOR,
6967 which should just compute all the needed loop temporaries
6968 for GIMPLE_OMP_TASK. */
6969
6970 static void
6971 expand_omp_taskloop_for_outer (struct omp_region *region,
6972 struct omp_for_data *fd,
6973 gimple *inner_stmt)
6974 {
6975 tree type, bias = NULL_TREE;
6976 basic_block entry_bb, cont_bb, exit_bb;
6977 gimple_stmt_iterator gsi;
6978 gassign *assign_stmt;
6979 tree *counts = NULL;
6980 int i;
6981
6982 gcc_assert (inner_stmt);
6983 gcc_assert (region->cont);
6984 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
6985 && gimple_omp_task_taskloop_p (inner_stmt));
6986 type = TREE_TYPE (fd->loop.v);
6987
6988 /* See if we need to bias by LLONG_MIN. */
6989 if (fd->iter_type == long_long_unsigned_type_node
6990 && TREE_CODE (type) == INTEGER_TYPE
6991 && !TYPE_UNSIGNED (type))
6992 {
6993 tree n1, n2;
6994
6995 if (fd->loop.cond_code == LT_EXPR)
6996 {
6997 n1 = fd->loop.n1;
6998 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
6999 }
7000 else
7001 {
7002 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7003 n2 = fd->loop.n1;
7004 }
7005 if (TREE_CODE (n1) != INTEGER_CST
7006 || TREE_CODE (n2) != INTEGER_CST
7007 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7008 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7009 }
7010
7011 entry_bb = region->entry;
7012 cont_bb = region->cont;
7013 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7014 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
7015 exit_bb = region->exit;
7016
7017 gsi = gsi_last_nondebug_bb (entry_bb);
7018 gimple *for_stmt = gsi_stmt (gsi);
7019 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
7020 if (fd->collapse > 1)
7021 {
7022 int first_zero_iter = -1, dummy = -1;
7023 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
7024
7025 counts = XALLOCAVEC (tree, fd->collapse);
7026 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7027 zero_iter_bb, first_zero_iter,
7028 dummy_bb, dummy, l2_dom_bb);
7029
7030 if (zero_iter_bb)
7031 {
7032 /* Some counts[i] vars might be uninitialized if
7033 some loop has zero iterations. But the body shouldn't
7034 be executed in that case, so just avoid uninit warnings. */
7035 for (i = first_zero_iter; i < fd->collapse; i++)
7036 if (SSA_VAR_P (counts[i]))
7037 TREE_NO_WARNING (counts[i]) = 1;
7038 gsi_prev (&gsi);
7039 edge e = split_block (entry_bb, gsi_stmt (gsi));
7040 entry_bb = e->dest;
7041 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
7042 gsi = gsi_last_bb (entry_bb);
7043 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
7044 get_immediate_dominator (CDI_DOMINATORS,
7045 zero_iter_bb));
7046 }
7047 }
7048
7049 tree t0, t1;
7050 t1 = fd->loop.n2;
7051 t0 = fd->loop.n1;
7052 if (POINTER_TYPE_P (TREE_TYPE (t0))
7053 && TYPE_PRECISION (TREE_TYPE (t0))
7054 != TYPE_PRECISION (fd->iter_type))
7055 {
7056 /* Avoid casting pointers to integer of a different size. */
7057 tree itype = signed_type_for (type);
7058 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
7059 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
7060 }
7061 else
7062 {
7063 t1 = fold_convert (fd->iter_type, t1);
7064 t0 = fold_convert (fd->iter_type, t0);
7065 }
7066 if (bias)
7067 {
7068 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
7069 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
7070 }
7071
7072 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
7073 OMP_CLAUSE__LOOPTEMP_);
7074 gcc_assert (innerc);
7075 tree startvar = OMP_CLAUSE_DECL (innerc);
7076 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7077 gcc_assert (innerc);
7078 tree endvar = OMP_CLAUSE_DECL (innerc);
7079 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
7080 {
7081 innerc = find_lastprivate_looptemp (fd, innerc);
7082 if (innerc)
7083 {
7084 /* If needed (inner taskloop has lastprivate clause), propagate
7085 down the total number of iterations. */
7086 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
7087 NULL_TREE, false,
7088 GSI_CONTINUE_LINKING);
7089 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
7090 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7091 }
7092 }
7093
7094 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
7095 GSI_CONTINUE_LINKING);
7096 assign_stmt = gimple_build_assign (startvar, t0);
7097 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7098
7099 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
7100 GSI_CONTINUE_LINKING);
7101 assign_stmt = gimple_build_assign (endvar, t1);
7102 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7103 if (fd->collapse > 1)
7104 expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
7105
7106 /* Remove the GIMPLE_OMP_FOR statement. */
7107 gsi = gsi_for_stmt (for_stmt);
7108 gsi_remove (&gsi, true);
7109
7110 gsi = gsi_last_nondebug_bb (cont_bb);
7111 gsi_remove (&gsi, true);
7112
7113 gsi = gsi_last_nondebug_bb (exit_bb);
7114 gsi_remove (&gsi, true);
7115
7116 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7117 remove_edge (BRANCH_EDGE (entry_bb));
7118 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
7119 remove_edge (BRANCH_EDGE (cont_bb));
7120 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
7121 set_immediate_dominator (CDI_DOMINATORS, region->entry,
7122 recompute_dominator (CDI_DOMINATORS, region->entry));
7123 }
7124
7125 /* Taskloop construct is represented after gimplification with
7126 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
7127 in between them. This routine expands the inner GIMPLE_OMP_FOR.
7128 GOMP_taskloop{,_ull} function arranges for each task to be given just
7129 a single range of iterations. */
7130
7131 static void
7132 expand_omp_taskloop_for_inner (struct omp_region *region,
7133 struct omp_for_data *fd,
7134 gimple *inner_stmt)
7135 {
7136 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
7137 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
7138 basic_block fin_bb;
7139 gimple_stmt_iterator gsi;
7140 edge ep;
7141 bool broken_loop = region->cont == NULL;
7142 tree *counts = NULL;
7143 tree n1, n2, step;
7144
7145 itype = type = TREE_TYPE (fd->loop.v);
7146 if (POINTER_TYPE_P (type))
7147 itype = signed_type_for (type);
7148
7149 /* See if we need to bias by LLONG_MIN. */
7150 if (fd->iter_type == long_long_unsigned_type_node
7151 && TREE_CODE (type) == INTEGER_TYPE
7152 && !TYPE_UNSIGNED (type))
7153 {
7154 tree n1, n2;
7155
7156 if (fd->loop.cond_code == LT_EXPR)
7157 {
7158 n1 = fd->loop.n1;
7159 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
7160 }
7161 else
7162 {
7163 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
7164 n2 = fd->loop.n1;
7165 }
7166 if (TREE_CODE (n1) != INTEGER_CST
7167 || TREE_CODE (n2) != INTEGER_CST
7168 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
7169 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
7170 }
7171
7172 entry_bb = region->entry;
7173 cont_bb = region->cont;
7174 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
7175 fin_bb = BRANCH_EDGE (entry_bb)->dest;
7176 gcc_assert (broken_loop
7177 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
7178 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7179 if (!broken_loop)
7180 {
7181 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
7182 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
7183 }
7184 exit_bb = region->exit;
7185
7186 /* Iteration space partitioning goes in ENTRY_BB. */
7187 gsi = gsi_last_nondebug_bb (entry_bb);
7188 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
7189
7190 if (fd->collapse > 1)
7191 {
7192 int first_zero_iter = -1, dummy = -1;
7193 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
7194
7195 counts = XALLOCAVEC (tree, fd->collapse);
7196 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
7197 fin_bb, first_zero_iter,
7198 dummy_bb, dummy, l2_dom_bb);
7199 t = NULL_TREE;
7200 }
7201 else
7202 t = integer_one_node;
7203
7204 step = fd->loop.step;
7205 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
7206 OMP_CLAUSE__LOOPTEMP_);
7207 gcc_assert (innerc);
7208 n1 = OMP_CLAUSE_DECL (innerc);
7209 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
7210 gcc_assert (innerc);
7211 n2 = OMP_CLAUSE_DECL (innerc);
7212 if (bias)
7213 {
7214 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
7215 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
7216 }
7217 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7218 true, NULL_TREE, true, GSI_SAME_STMT);
7219 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
7220 true, NULL_TREE, true, GSI_SAME_STMT);
7221 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7222 true, NULL_TREE, true, GSI_SAME_STMT);
7223
7224 tree startvar = fd->loop.v;
7225 tree endvar = NULL_TREE;
7226
7227 if (gimple_omp_for_combined_p (fd->for_stmt))
7228 {
7229 tree clauses = gimple_omp_for_clauses (inner_stmt);
7230 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
7231 gcc_assert (innerc);
7232 startvar = OMP_CLAUSE_DECL (innerc);
7233 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
7234 OMP_CLAUSE__LOOPTEMP_);
7235 gcc_assert (innerc);
7236 endvar = OMP_CLAUSE_DECL (innerc);
7237 }
7238 t = fold_convert (TREE_TYPE (startvar), n1);
7239 t = force_gimple_operand_gsi (&gsi, t,
7240 DECL_P (startvar)
7241 && TREE_ADDRESSABLE (startvar),
7242 NULL_TREE, false, GSI_CONTINUE_LINKING);
7243 gimple *assign_stmt = gimple_build_assign (startvar, t);
7244 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7245
7246 t = fold_convert (TREE_TYPE (startvar), n2);
7247 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7248 false, GSI_CONTINUE_LINKING);
7249 if (endvar)
7250 {
7251 assign_stmt = gimple_build_assign (endvar, e);
7252 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7253 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
7254 assign_stmt = gimple_build_assign (fd->loop.v, e);
7255 else
7256 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
7257 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
7258 }
7259
7260 tree *nonrect_bounds = NULL;
7261 if (fd->collapse > 1)
7262 {
7263 if (fd->non_rect)
7264 {
7265 nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
7266 memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
7267 }
7268 gcc_assert (gsi_bb (gsi) == entry_bb);
7269 expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
7270 startvar);
7271 entry_bb = gsi_bb (gsi);
7272 }
7273
7274 if (!broken_loop)
7275 {
7276 /* The code controlling the sequential loop replaces the
7277 GIMPLE_OMP_CONTINUE. */
7278 gsi = gsi_last_nondebug_bb (cont_bb);
7279 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7280 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
7281 vmain = gimple_omp_continue_control_use (cont_stmt);
7282 vback = gimple_omp_continue_control_def (cont_stmt);
7283
7284 if (!gimple_omp_for_combined_p (fd->for_stmt))
7285 {
7286 if (POINTER_TYPE_P (type))
7287 t = fold_build_pointer_plus (vmain, step);
7288 else
7289 t = fold_build2 (PLUS_EXPR, type, vmain, step);
7290 t = force_gimple_operand_gsi (&gsi, t,
7291 DECL_P (vback)
7292 && TREE_ADDRESSABLE (vback),
7293 NULL_TREE, true, GSI_SAME_STMT);
7294 assign_stmt = gimple_build_assign (vback, t);
7295 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7296
7297 t = build2 (fd->loop.cond_code, boolean_type_node,
7298 DECL_P (vback) && TREE_ADDRESSABLE (vback)
7299 ? t : vback, e);
7300 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
7301 }
7302
7303 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7304 gsi_remove (&gsi, true);
7305
7306 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
7307 collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
7308 cont_bb, body_bb);
7309 }
7310
7311 /* Remove the GIMPLE_OMP_FOR statement. */
7312 gsi = gsi_for_stmt (fd->for_stmt);
7313 gsi_remove (&gsi, true);
7314
7315 /* Remove the GIMPLE_OMP_RETURN statement. */
7316 gsi = gsi_last_nondebug_bb (exit_bb);
7317 gsi_remove (&gsi, true);
7318
7319 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
7320 if (!broken_loop)
7321 remove_edge (BRANCH_EDGE (entry_bb));
7322 else
7323 {
7324 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
7325 region->outer->cont = NULL;
7326 }
7327
7328 /* Connect all the blocks. */
7329 if (!broken_loop)
7330 {
7331 ep = find_edge (cont_bb, body_bb);
7332 if (gimple_omp_for_combined_p (fd->for_stmt))
7333 {
7334 remove_edge (ep);
7335 ep = NULL;
7336 }
7337 else if (fd->collapse > 1)
7338 {
7339 remove_edge (ep);
7340 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
7341 }
7342 else
7343 ep->flags = EDGE_TRUE_VALUE;
7344 find_edge (cont_bb, fin_bb)->flags
7345 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
7346 }
7347
7348 set_immediate_dominator (CDI_DOMINATORS, body_bb,
7349 recompute_dominator (CDI_DOMINATORS, body_bb));
7350 if (!broken_loop)
7351 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
7352 recompute_dominator (CDI_DOMINATORS, fin_bb));
7353
7354 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
7355 {
7356 class loop *loop = alloc_loop ();
7357 loop->header = body_bb;
7358 if (collapse_bb == NULL)
7359 loop->latch = cont_bb;
7360 add_loop (loop, body_bb->loop_father);
7361 }
7362 }
7363
7364 /* A subroutine of expand_omp_for. Generate code for an OpenACC
7365 partitioned loop. The lowering here is abstracted, in that the
7366 loop parameters are passed through internal functions, which are
7367 further lowered by oacc_device_lower, once we get to the target
7368 compiler. The loop is of the form:
7369
7370 for (V = B; V LTGT E; V += S) {BODY}
7371
7372 where LTGT is < or >. We may have a specified chunking size, CHUNKING
7373 (constant 0 for no chunking) and we will have a GWV partitioning
7374 mask, specifying dimensions over which the loop is to be
7375 partitioned (see note below). We generate code that looks like
7376 (this ignores tiling):
7377
7378 <entry_bb> [incoming FALL->body, BRANCH->exit]
7379 typedef signedintify (typeof (V)) T; // underlying signed integral type
7380 T range = E - B;
7381 T chunk_no = 0;
7382 T DIR = LTGT == '<' ? +1 : -1;
7383 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
7384 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
7385
7386 <head_bb> [created by splitting end of entry_bb]
7387 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
7388 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
7389 if (!(offset LTGT bound)) goto bottom_bb;
7390
7391 <body_bb> [incoming]
7392 V = B + offset;
7393 {BODY}
7394
7395 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
7396 offset += step;
7397 if (offset LTGT bound) goto body_bb; [*]
7398
7399 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
7400 chunk_no++;
7401 if (chunk < chunk_max) goto head_bb;
7402
7403 <exit_bb> [incoming]
7404 V = B + ((range -/+ 1) / S +/- 1) * S [*]
7405
7406 [*] Needed if V live at end of loop. */
7407
7408 static void
7409 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
7410 {
7411 bool is_oacc_kernels_parallelized
7412 = (lookup_attribute ("oacc kernels parallelized",
7413 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7414 {
7415 bool is_oacc_kernels
7416 = (lookup_attribute ("oacc kernels",
7417 DECL_ATTRIBUTES (current_function_decl)) != NULL);
7418 if (is_oacc_kernels_parallelized)
7419 gcc_checking_assert (is_oacc_kernels);
7420 }
7421 gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
7422 /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
7423 for SSA specifics, and some are for 'parloops' OpenACC
7424 'kernels'-parallelized specifics. */
7425
7426 tree v = fd->loop.v;
7427 enum tree_code cond_code = fd->loop.cond_code;
7428 enum tree_code plus_code = PLUS_EXPR;
7429
7430 tree chunk_size = integer_minus_one_node;
7431 tree gwv = integer_zero_node;
7432 tree iter_type = TREE_TYPE (v);
7433 tree diff_type = iter_type;
7434 tree plus_type = iter_type;
7435 struct oacc_collapse *counts = NULL;
7436
7437 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
7438 == GF_OMP_FOR_KIND_OACC_LOOP);
7439 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
7440 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
7441
7442 if (POINTER_TYPE_P (iter_type))
7443 {
7444 plus_code = POINTER_PLUS_EXPR;
7445 plus_type = sizetype;
7446 }
7447 for (int ix = fd->collapse; ix--;)
7448 {
7449 tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
7450 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
7451 diff_type = diff_type2;
7452 }
7453 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
7454 diff_type = signed_type_for (diff_type);
7455 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
7456 diff_type = integer_type_node;
7457
7458 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
7459 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
7460 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
7461 basic_block bottom_bb = NULL;
7462
7463 /* entry_bb has two successors; the branch edge is to the exit
7464 block, fallthrough edge to body. */
7465 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
7466 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
7467
7468 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
7469 body_bb, or to a block whose only successor is the body_bb. Its
7470 fallthrough successor is the final block (same as the branch
7471 successor of the entry_bb). */
7472 if (cont_bb)
7473 {
7474 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
7475 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
7476
7477 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
7478 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
7479 }
7480 else
7481 gcc_assert (!gimple_in_ssa_p (cfun));
7482
7483 /* The exit block only has entry_bb and cont_bb as predecessors. */
7484 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
7485
7486 tree chunk_no;
7487 tree chunk_max = NULL_TREE;
7488 tree bound, offset;
7489 tree step = create_tmp_var (diff_type, ".step");
7490 bool up = cond_code == LT_EXPR;
7491 tree dir = build_int_cst (diff_type, up ? +1 : -1);
7492 bool chunking = !gimple_in_ssa_p (cfun);
7493 bool negating;
7494
7495 /* Tiling vars. */
7496 tree tile_size = NULL_TREE;
7497 tree element_s = NULL_TREE;
7498 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
7499 basic_block elem_body_bb = NULL;
7500 basic_block elem_cont_bb = NULL;
7501
7502 /* SSA instances. */
7503 tree offset_incr = NULL_TREE;
7504 tree offset_init = NULL_TREE;
7505
7506 gimple_stmt_iterator gsi;
7507 gassign *ass;
7508 gcall *call;
7509 gimple *stmt;
7510 tree expr;
7511 location_t loc;
7512 edge split, be, fte;
7513
7514 /* Split the end of entry_bb to create head_bb. */
7515 split = split_block (entry_bb, last_stmt (entry_bb));
7516 basic_block head_bb = split->dest;
7517 entry_bb = split->src;
7518
7519 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
7520 gsi = gsi_last_nondebug_bb (entry_bb);
7521 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
7522 loc = gimple_location (for_stmt);
7523
7524 if (gimple_in_ssa_p (cfun))
7525 {
7526 offset_init = gimple_omp_for_index (for_stmt, 0);
7527 gcc_assert (integer_zerop (fd->loop.n1));
7528 /* The SSA parallelizer does gang parallelism. */
7529 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
7530 }
7531
7532 if (fd->collapse > 1 || fd->tiling)
7533 {
7534 gcc_assert (!gimple_in_ssa_p (cfun) && up);
7535 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
7536 tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
7537 TREE_TYPE (fd->loop.n2), loc);
7538
7539 if (SSA_VAR_P (fd->loop.n2))
7540 {
7541 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
7542 true, GSI_SAME_STMT);
7543 ass = gimple_build_assign (fd->loop.n2, total);
7544 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7545 }
7546 }
7547
7548 tree b = fd->loop.n1;
7549 tree e = fd->loop.n2;
7550 tree s = fd->loop.step;
7551
7552 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
7553 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
7554
7555 /* Convert the step, avoiding possible unsigned->signed overflow. */
7556 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
7557 if (negating)
7558 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
7559 s = fold_convert (diff_type, s);
7560 if (negating)
7561 s = fold_build1 (NEGATE_EXPR, diff_type, s);
7562 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
7563
7564 if (!chunking)
7565 chunk_size = integer_zero_node;
7566 expr = fold_convert (diff_type, chunk_size);
7567 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
7568 NULL_TREE, true, GSI_SAME_STMT);
7569
7570 if (fd->tiling)
7571 {
7572 /* Determine the tile size and element step,
7573 modify the outer loop step size. */
7574 tile_size = create_tmp_var (diff_type, ".tile_size");
7575 expr = build_int_cst (diff_type, 1);
7576 for (int ix = 0; ix < fd->collapse; ix++)
7577 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
7578 expr = force_gimple_operand_gsi (&gsi, expr, true,
7579 NULL_TREE, true, GSI_SAME_STMT);
7580 ass = gimple_build_assign (tile_size, expr);
7581 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7582
7583 element_s = create_tmp_var (diff_type, ".element_s");
7584 ass = gimple_build_assign (element_s, s);
7585 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7586
7587 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
7588 s = force_gimple_operand_gsi (&gsi, expr, true,
7589 NULL_TREE, true, GSI_SAME_STMT);
7590 }
7591
7592 /* Determine the range, avoiding possible unsigned->signed overflow. */
7593 negating = !up && TYPE_UNSIGNED (iter_type);
7594 expr = fold_build2 (MINUS_EXPR, plus_type,
7595 fold_convert (plus_type, negating ? b : e),
7596 fold_convert (plus_type, negating ? e : b));
7597 expr = fold_convert (diff_type, expr);
7598 if (negating)
7599 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
7600 tree range = force_gimple_operand_gsi (&gsi, expr, true,
7601 NULL_TREE, true, GSI_SAME_STMT);
7602
7603 chunk_no = build_int_cst (diff_type, 0);
7604 if (chunking)
7605 {
7606 gcc_assert (!gimple_in_ssa_p (cfun));
7607
7608 expr = chunk_no;
7609 chunk_max = create_tmp_var (diff_type, ".chunk_max");
7610 chunk_no = create_tmp_var (diff_type, ".chunk_no");
7611
7612 ass = gimple_build_assign (chunk_no, expr);
7613 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7614
7615 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7616 build_int_cst (integer_type_node,
7617 IFN_GOACC_LOOP_CHUNKS),
7618 dir, range, s, chunk_size, gwv);
7619 gimple_call_set_lhs (call, chunk_max);
7620 gimple_set_location (call, loc);
7621 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7622 }
7623 else
7624 chunk_size = chunk_no;
7625
7626 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
7627 build_int_cst (integer_type_node,
7628 IFN_GOACC_LOOP_STEP),
7629 dir, range, s, chunk_size, gwv);
7630 gimple_call_set_lhs (call, step);
7631 gimple_set_location (call, loc);
7632 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7633
7634 /* Remove the GIMPLE_OMP_FOR. */
7635 gsi_remove (&gsi, true);
7636
7637 /* Fixup edges from head_bb. */
7638 be = BRANCH_EDGE (head_bb);
7639 fte = FALLTHRU_EDGE (head_bb);
7640 be->flags |= EDGE_FALSE_VALUE;
7641 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7642
7643 basic_block body_bb = fte->dest;
7644
7645 if (gimple_in_ssa_p (cfun))
7646 {
7647 gsi = gsi_last_nondebug_bb (cont_bb);
7648 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7649
7650 offset = gimple_omp_continue_control_use (cont_stmt);
7651 offset_incr = gimple_omp_continue_control_def (cont_stmt);
7652 }
7653 else
7654 {
7655 offset = create_tmp_var (diff_type, ".offset");
7656 offset_init = offset_incr = offset;
7657 }
7658 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
7659
7660 /* Loop offset & bound go into head_bb. */
7661 gsi = gsi_start_bb (head_bb);
7662
7663 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7664 build_int_cst (integer_type_node,
7665 IFN_GOACC_LOOP_OFFSET),
7666 dir, range, s,
7667 chunk_size, gwv, chunk_no);
7668 gimple_call_set_lhs (call, offset_init);
7669 gimple_set_location (call, loc);
7670 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7671
7672 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
7673 build_int_cst (integer_type_node,
7674 IFN_GOACC_LOOP_BOUND),
7675 dir, range, s,
7676 chunk_size, gwv, offset_init);
7677 gimple_call_set_lhs (call, bound);
7678 gimple_set_location (call, loc);
7679 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
7680
7681 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
7682 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7683 GSI_CONTINUE_LINKING);
7684
7685 /* V assignment goes into body_bb. */
7686 if (!gimple_in_ssa_p (cfun))
7687 {
7688 gsi = gsi_start_bb (body_bb);
7689
7690 expr = build2 (plus_code, iter_type, b,
7691 fold_convert (plus_type, offset));
7692 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7693 true, GSI_SAME_STMT);
7694 ass = gimple_build_assign (v, expr);
7695 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7696
7697 if (fd->collapse > 1 || fd->tiling)
7698 expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
7699
7700 if (fd->tiling)
7701 {
7702 /* Determine the range of the element loop -- usually simply
7703 the tile_size, but could be smaller if the final
7704 iteration of the outer loop is a partial tile. */
7705 tree e_range = create_tmp_var (diff_type, ".e_range");
7706
7707 expr = build2 (MIN_EXPR, diff_type,
7708 build2 (MINUS_EXPR, diff_type, bound, offset),
7709 build2 (MULT_EXPR, diff_type, tile_size,
7710 element_s));
7711 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7712 true, GSI_SAME_STMT);
7713 ass = gimple_build_assign (e_range, expr);
7714 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7715
7716 /* Determine bound, offset & step of inner loop. */
7717 e_bound = create_tmp_var (diff_type, ".e_bound");
7718 e_offset = create_tmp_var (diff_type, ".e_offset");
7719 e_step = create_tmp_var (diff_type, ".e_step");
7720
7721 /* Mark these as element loops. */
7722 tree t, e_gwv = integer_minus_one_node;
7723 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
7724
7725 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
7726 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7727 element_s, chunk, e_gwv, chunk);
7728 gimple_call_set_lhs (call, e_offset);
7729 gimple_set_location (call, loc);
7730 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7731
7732 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
7733 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
7734 element_s, chunk, e_gwv, e_offset);
7735 gimple_call_set_lhs (call, e_bound);
7736 gimple_set_location (call, loc);
7737 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7738
7739 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
7740 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
7741 element_s, chunk, e_gwv);
7742 gimple_call_set_lhs (call, e_step);
7743 gimple_set_location (call, loc);
7744 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
7745
7746 /* Add test and split block. */
7747 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7748 stmt = gimple_build_cond_empty (expr);
7749 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7750 split = split_block (body_bb, stmt);
7751 elem_body_bb = split->dest;
7752 if (cont_bb == body_bb)
7753 cont_bb = elem_body_bb;
7754 body_bb = split->src;
7755
7756 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
7757
7758 /* Add a dummy exit for the tiled block when cont_bb is missing. */
7759 if (cont_bb == NULL)
7760 {
7761 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
7762 e->probability = profile_probability::even ();
7763 split->probability = profile_probability::even ();
7764 }
7765
7766 /* Initialize the user's loop vars. */
7767 gsi = gsi_start_bb (elem_body_bb);
7768 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
7769 diff_type);
7770 }
7771 }
7772
7773 /* Loop increment goes into cont_bb. If this is not a loop, we
7774 will have spawned threads as if it was, and each one will
7775 execute one iteration. The specification is not explicit about
7776 whether such constructs are ill-formed or not, and they can
7777 occur, especially when noreturn routines are involved. */
7778 if (cont_bb)
7779 {
7780 gsi = gsi_last_nondebug_bb (cont_bb);
7781 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
7782 loc = gimple_location (cont_stmt);
7783
7784 if (fd->tiling)
7785 {
7786 /* Insert element loop increment and test. */
7787 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
7788 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7789 true, GSI_SAME_STMT);
7790 ass = gimple_build_assign (e_offset, expr);
7791 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7792 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
7793
7794 stmt = gimple_build_cond_empty (expr);
7795 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7796 split = split_block (cont_bb, stmt);
7797 elem_cont_bb = split->src;
7798 cont_bb = split->dest;
7799
7800 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7801 split->probability = profile_probability::unlikely ().guessed ();
7802 edge latch_edge
7803 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
7804 latch_edge->probability = profile_probability::likely ().guessed ();
7805
7806 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
7807 skip_edge->probability = profile_probability::unlikely ().guessed ();
7808 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
7809 loop_entry_edge->probability
7810 = profile_probability::likely ().guessed ();
7811
7812 gsi = gsi_for_stmt (cont_stmt);
7813 }
7814
7815 /* Increment offset. */
7816 if (gimple_in_ssa_p (cfun))
7817 expr = build2 (plus_code, iter_type, offset,
7818 fold_convert (plus_type, step));
7819 else
7820 expr = build2 (PLUS_EXPR, diff_type, offset, step);
7821 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7822 true, GSI_SAME_STMT);
7823 ass = gimple_build_assign (offset_incr, expr);
7824 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7825 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
7826 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
7827
7828 /* Remove the GIMPLE_OMP_CONTINUE. */
7829 gsi_remove (&gsi, true);
7830
7831 /* Fixup edges from cont_bb. */
7832 be = BRANCH_EDGE (cont_bb);
7833 fte = FALLTHRU_EDGE (cont_bb);
7834 be->flags |= EDGE_TRUE_VALUE;
7835 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7836
7837 if (chunking)
7838 {
7839 /* Split the beginning of exit_bb to make bottom_bb. We
7840 need to insert a nop at the start, because splitting is
7841 after a stmt, not before. */
7842 gsi = gsi_start_bb (exit_bb);
7843 stmt = gimple_build_nop ();
7844 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
7845 split = split_block (exit_bb, stmt);
7846 bottom_bb = split->src;
7847 exit_bb = split->dest;
7848 gsi = gsi_last_bb (bottom_bb);
7849
7850 /* Chunk increment and test goes into bottom_bb. */
7851 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
7852 build_int_cst (diff_type, 1));
7853 ass = gimple_build_assign (chunk_no, expr);
7854 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
7855
7856 /* Chunk test at end of bottom_bb. */
7857 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
7858 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
7859 GSI_CONTINUE_LINKING);
7860
7861 /* Fixup edges from bottom_bb. */
7862 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
7863 split->probability = profile_probability::unlikely ().guessed ();
7864 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
7865 latch_edge->probability = profile_probability::likely ().guessed ();
7866 }
7867 }
7868
7869 gsi = gsi_last_nondebug_bb (exit_bb);
7870 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7871 loc = gimple_location (gsi_stmt (gsi));
7872
7873 if (!gimple_in_ssa_p (cfun))
7874 {
7875 /* Insert the final value of V, in case it is live. This is the
7876 value for the only thread that survives past the join. */
7877 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
7878 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
7879 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
7880 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
7881 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
7882 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
7883 true, GSI_SAME_STMT);
7884 ass = gimple_build_assign (v, expr);
7885 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
7886 }
7887
7888 /* Remove the OMP_RETURN. */
7889 gsi_remove (&gsi, true);
7890
7891 if (cont_bb)
7892 {
7893 /* We now have one, two or three nested loops. Update the loop
7894 structures. */
7895 class loop *parent = entry_bb->loop_father;
7896 class loop *body = body_bb->loop_father;
7897
7898 if (chunking)
7899 {
7900 class loop *chunk_loop = alloc_loop ();
7901 chunk_loop->header = head_bb;
7902 chunk_loop->latch = bottom_bb;
7903 add_loop (chunk_loop, parent);
7904 parent = chunk_loop;
7905 }
7906 else if (parent != body)
7907 {
7908 gcc_assert (body->header == body_bb);
7909 gcc_assert (body->latch == cont_bb
7910 || single_pred (body->latch) == cont_bb);
7911 parent = NULL;
7912 }
7913
7914 if (parent)
7915 {
7916 class loop *body_loop = alloc_loop ();
7917 body_loop->header = body_bb;
7918 body_loop->latch = cont_bb;
7919 add_loop (body_loop, parent);
7920
7921 if (fd->tiling)
7922 {
7923 /* Insert tiling's element loop. */
7924 class loop *inner_loop = alloc_loop ();
7925 inner_loop->header = elem_body_bb;
7926 inner_loop->latch = elem_cont_bb;
7927 add_loop (inner_loop, body_loop);
7928 }
7929 }
7930 }
7931 }
7932
7933 /* Expand the OMP loop defined by REGION. */
7934
7935 static void
7936 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
7937 {
7938 struct omp_for_data fd;
7939 struct omp_for_data_loop *loops;
7940
7941 loops = XALLOCAVEC (struct omp_for_data_loop,
7942 gimple_omp_for_collapse (last_stmt (region->entry)));
7943 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
7944 &fd, loops);
7945 region->sched_kind = fd.sched_kind;
7946 region->sched_modifiers = fd.sched_modifiers;
7947 region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
7948 if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
7949 {
7950 for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
7951 if ((loops[i].m1 || loops[i].m2)
7952 && (loops[i].m1 == NULL_TREE
7953 || TREE_CODE (loops[i].m1) == INTEGER_CST)
7954 && (loops[i].m2 == NULL_TREE
7955 || TREE_CODE (loops[i].m2) == INTEGER_CST)
7956 && TREE_CODE (loops[i].step) == INTEGER_CST
7957 && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
7958 {
7959 tree t;
7960 tree itype = TREE_TYPE (loops[i].v);
7961 if (loops[i].m1 && loops[i].m2)
7962 t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
7963 else if (loops[i].m1)
7964 t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
7965 else
7966 t = loops[i].m2;
7967 t = fold_build2 (MULT_EXPR, itype, t,
7968 fold_convert (itype,
7969 loops[i - loops[i].outer].step));
7970 if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
7971 t = fold_build2 (TRUNC_MOD_EXPR, itype,
7972 fold_build1 (NEGATE_EXPR, itype, t),
7973 fold_build1 (NEGATE_EXPR, itype,
7974 fold_convert (itype,
7975 loops[i].step)));
7976 else
7977 t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
7978 fold_convert (itype, loops[i].step));
7979 if (integer_nonzerop (t))
7980 error_at (gimple_location (fd.for_stmt),
7981 "invalid OpenMP non-rectangular loop step; "
7982 "%<(%E - %E) * %E%> is not a multiple of loop %d "
7983 "step %qE",
7984 loops[i].m2 ? loops[i].m2 : integer_zero_node,
7985 loops[i].m1 ? loops[i].m1 : integer_zero_node,
7986 loops[i - loops[i].outer].step, i + 1,
7987 loops[i].step);
7988 }
7989 }
7990
7991 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
7992 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7993 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
7994 if (region->cont)
7995 {
7996 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
7997 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7998 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
7999 }
8000 else
8001 /* If there isn't a continue then this is a degerate case where
8002 the introduction of abnormal edges during lowering will prevent
8003 original loops from being detected. Fix that up. */
8004 loops_state_set (LOOPS_NEED_FIXUP);
8005
8006 if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
8007 expand_omp_simd (region, &fd);
8008 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
8009 {
8010 gcc_assert (!inner_stmt && !fd.non_rect);
8011 expand_oacc_for (region, &fd);
8012 }
8013 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
8014 {
8015 if (gimple_omp_for_combined_into_p (fd.for_stmt))
8016 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
8017 else
8018 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
8019 }
8020 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
8021 && !fd.have_ordered)
8022 {
8023 if (fd.chunk_size == NULL)
8024 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
8025 else
8026 expand_omp_for_static_chunk (region, &fd, inner_stmt);
8027 }
8028 else
8029 {
8030 int fn_index, start_ix, next_ix;
8031 unsigned HOST_WIDE_INT sched = 0;
8032 tree sched_arg = NULL_TREE;
8033
8034 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
8035 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
8036 if (fd.chunk_size == NULL
8037 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
8038 fd.chunk_size = integer_zero_node;
8039 switch (fd.sched_kind)
8040 {
8041 case OMP_CLAUSE_SCHEDULE_RUNTIME:
8042 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
8043 && fd.lastprivate_conditional == 0)
8044 {
8045 gcc_assert (!fd.have_ordered);
8046 fn_index = 6;
8047 sched = 4;
8048 }
8049 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8050 && !fd.have_ordered
8051 && fd.lastprivate_conditional == 0)
8052 fn_index = 7;
8053 else
8054 {
8055 fn_index = 3;
8056 sched = (HOST_WIDE_INT_1U << 31);
8057 }
8058 break;
8059 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
8060 case OMP_CLAUSE_SCHEDULE_GUIDED:
8061 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
8062 && !fd.have_ordered
8063 && fd.lastprivate_conditional == 0)
8064 {
8065 fn_index = 3 + fd.sched_kind;
8066 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8067 break;
8068 }
8069 fn_index = fd.sched_kind;
8070 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
8071 sched += (HOST_WIDE_INT_1U << 31);
8072 break;
8073 case OMP_CLAUSE_SCHEDULE_STATIC:
8074 gcc_assert (fd.have_ordered);
8075 fn_index = 0;
8076 sched = (HOST_WIDE_INT_1U << 31) + 1;
8077 break;
8078 default:
8079 gcc_unreachable ();
8080 }
8081 if (!fd.ordered)
8082 fn_index += fd.have_ordered * 8;
8083 if (fd.ordered)
8084 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
8085 else
8086 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
8087 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
8088 if (fd.have_reductemp || fd.have_pointer_condtemp)
8089 {
8090 if (fd.ordered)
8091 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
8092 else if (fd.have_ordered)
8093 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
8094 else
8095 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
8096 sched_arg = build_int_cstu (long_integer_type_node, sched);
8097 if (!fd.chunk_size)
8098 fd.chunk_size = integer_zero_node;
8099 }
8100 if (fd.iter_type == long_long_unsigned_type_node)
8101 {
8102 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
8103 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
8104 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
8105 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
8106 }
8107 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
8108 (enum built_in_function) next_ix, sched_arg,
8109 inner_stmt);
8110 }
8111
8112 if (gimple_in_ssa_p (cfun))
8113 update_ssa (TODO_update_ssa_only_virtuals);
8114 }
8115
8116 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
8117
8118 v = GOMP_sections_start (n);
8119 L0:
8120 switch (v)
8121 {
8122 case 0:
8123 goto L2;
8124 case 1:
8125 section 1;
8126 goto L1;
8127 case 2:
8128 ...
8129 case n:
8130 ...
8131 default:
8132 abort ();
8133 }
8134 L1:
8135 v = GOMP_sections_next ();
8136 goto L0;
8137 L2:
8138 reduction;
8139
8140 If this is a combined parallel sections, replace the call to
8141 GOMP_sections_start with call to GOMP_sections_next. */
8142
8143 static void
8144 expand_omp_sections (struct omp_region *region)
8145 {
8146 tree t, u, vin = NULL, vmain, vnext, l2;
8147 unsigned len;
8148 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
8149 gimple_stmt_iterator si, switch_si;
8150 gomp_sections *sections_stmt;
8151 gimple *stmt;
8152 gomp_continue *cont;
8153 edge_iterator ei;
8154 edge e;
8155 struct omp_region *inner;
8156 unsigned i, casei;
8157 bool exit_reachable = region->cont != NULL;
8158
8159 gcc_assert (region->exit != NULL);
8160 entry_bb = region->entry;
8161 l0_bb = single_succ (entry_bb);
8162 l1_bb = region->cont;
8163 l2_bb = region->exit;
8164 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
8165 l2 = gimple_block_label (l2_bb);
8166 else
8167 {
8168 /* This can happen if there are reductions. */
8169 len = EDGE_COUNT (l0_bb->succs);
8170 gcc_assert (len > 0);
8171 e = EDGE_SUCC (l0_bb, len - 1);
8172 si = gsi_last_nondebug_bb (e->dest);
8173 l2 = NULL_TREE;
8174 if (gsi_end_p (si)
8175 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8176 l2 = gimple_block_label (e->dest);
8177 else
8178 FOR_EACH_EDGE (e, ei, l0_bb->succs)
8179 {
8180 si = gsi_last_nondebug_bb (e->dest);
8181 if (gsi_end_p (si)
8182 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
8183 {
8184 l2 = gimple_block_label (e->dest);
8185 break;
8186 }
8187 }
8188 }
8189 if (exit_reachable)
8190 default_bb = create_empty_bb (l1_bb->prev_bb);
8191 else
8192 default_bb = create_empty_bb (l0_bb);
8193
8194 /* We will build a switch() with enough cases for all the
8195 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
8196 and a default case to abort if something goes wrong. */
8197 len = EDGE_COUNT (l0_bb->succs);
8198
8199 /* Use vec::quick_push on label_vec throughout, since we know the size
8200 in advance. */
8201 auto_vec<tree> label_vec (len);
8202
8203 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
8204 GIMPLE_OMP_SECTIONS statement. */
8205 si = gsi_last_nondebug_bb (entry_bb);
8206 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
8207 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
8208 vin = gimple_omp_sections_control (sections_stmt);
8209 tree clauses = gimple_omp_sections_clauses (sections_stmt);
8210 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
8211 tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
8212 tree cond_var = NULL_TREE;
8213 if (reductmp || condtmp)
8214 {
8215 tree reductions = null_pointer_node, mem = null_pointer_node;
8216 tree memv = NULL_TREE, condtemp = NULL_TREE;
8217 gimple_stmt_iterator gsi = gsi_none ();
8218 gimple *g = NULL;
8219 if (reductmp)
8220 {
8221 reductions = OMP_CLAUSE_DECL (reductmp);
8222 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
8223 g = SSA_NAME_DEF_STMT (reductions);
8224 reductions = gimple_assign_rhs1 (g);
8225 OMP_CLAUSE_DECL (reductmp) = reductions;
8226 gsi = gsi_for_stmt (g);
8227 }
8228 else
8229 gsi = si;
8230 if (condtmp)
8231 {
8232 condtemp = OMP_CLAUSE_DECL (condtmp);
8233 tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
8234 OMP_CLAUSE__CONDTEMP_);
8235 cond_var = OMP_CLAUSE_DECL (c);
8236 tree type = TREE_TYPE (condtemp);
8237 memv = create_tmp_var (type);
8238 TREE_ADDRESSABLE (memv) = 1;
8239 unsigned cnt = 0;
8240 for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
8241 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
8242 && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
8243 ++cnt;
8244 unsigned HOST_WIDE_INT sz
8245 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
8246 expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
8247 false);
8248 mem = build_fold_addr_expr (memv);
8249 }
8250 t = build_int_cst (unsigned_type_node, len - 1);
8251 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
8252 stmt = gimple_build_call (u, 3, t, reductions, mem);
8253 gimple_call_set_lhs (stmt, vin);
8254 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
8255 if (condtmp)
8256 {
8257 expand_omp_build_assign (&gsi, condtemp, memv, false);
8258 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8259 vin, build_one_cst (TREE_TYPE (cond_var)));
8260 expand_omp_build_assign (&gsi, cond_var, t, false);
8261 }
8262 if (reductmp)
8263 {
8264 gsi_remove (&gsi, true);
8265 release_ssa_name (gimple_assign_lhs (g));
8266 }
8267 }
8268 else if (!is_combined_parallel (region))
8269 {
8270 /* If we are not inside a combined parallel+sections region,
8271 call GOMP_sections_start. */
8272 t = build_int_cst (unsigned_type_node, len - 1);
8273 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
8274 stmt = gimple_build_call (u, 1, t);
8275 }
8276 else
8277 {
8278 /* Otherwise, call GOMP_sections_next. */
8279 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8280 stmt = gimple_build_call (u, 0);
8281 }
8282 if (!reductmp && !condtmp)
8283 {
8284 gimple_call_set_lhs (stmt, vin);
8285 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8286 }
8287 gsi_remove (&si, true);
8288
8289 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
8290 L0_BB. */
8291 switch_si = gsi_last_nondebug_bb (l0_bb);
8292 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
8293 if (exit_reachable)
8294 {
8295 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
8296 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
8297 vmain = gimple_omp_continue_control_use (cont);
8298 vnext = gimple_omp_continue_control_def (cont);
8299 }
8300 else
8301 {
8302 vmain = vin;
8303 vnext = NULL_TREE;
8304 }
8305
8306 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
8307 label_vec.quick_push (t);
8308 i = 1;
8309
8310 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
8311 for (inner = region->inner, casei = 1;
8312 inner;
8313 inner = inner->next, i++, casei++)
8314 {
8315 basic_block s_entry_bb, s_exit_bb;
8316
8317 /* Skip optional reduction region. */
8318 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
8319 {
8320 --i;
8321 --casei;
8322 continue;
8323 }
8324
8325 s_entry_bb = inner->entry;
8326 s_exit_bb = inner->exit;
8327
8328 t = gimple_block_label (s_entry_bb);
8329 u = build_int_cst (unsigned_type_node, casei);
8330 u = build_case_label (u, NULL, t);
8331 label_vec.quick_push (u);
8332
8333 si = gsi_last_nondebug_bb (s_entry_bb);
8334 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
8335 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
8336 gsi_remove (&si, true);
8337 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
8338
8339 if (s_exit_bb == NULL)
8340 continue;
8341
8342 si = gsi_last_nondebug_bb (s_exit_bb);
8343 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8344 gsi_remove (&si, true);
8345
8346 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
8347 }
8348
8349 /* Error handling code goes in DEFAULT_BB. */
8350 t = gimple_block_label (default_bb);
8351 u = build_case_label (NULL, NULL, t);
8352 make_edge (l0_bb, default_bb, 0);
8353 add_bb_to_loop (default_bb, current_loops->tree_root);
8354
8355 stmt = gimple_build_switch (vmain, u, label_vec);
8356 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
8357 gsi_remove (&switch_si, true);
8358
8359 si = gsi_start_bb (default_bb);
8360 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
8361 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
8362
8363 if (exit_reachable)
8364 {
8365 tree bfn_decl;
8366
8367 /* Code to get the next section goes in L1_BB. */
8368 si = gsi_last_nondebug_bb (l1_bb);
8369 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
8370
8371 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
8372 stmt = gimple_build_call (bfn_decl, 0);
8373 gimple_call_set_lhs (stmt, vnext);
8374 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8375 if (cond_var)
8376 {
8377 tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
8378 vnext, build_one_cst (TREE_TYPE (cond_var)));
8379 expand_omp_build_assign (&si, cond_var, t, false);
8380 }
8381 gsi_remove (&si, true);
8382
8383 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
8384 }
8385
8386 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
8387 si = gsi_last_nondebug_bb (l2_bb);
8388 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
8389 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
8390 else if (gimple_omp_return_lhs (gsi_stmt (si)))
8391 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
8392 else
8393 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
8394 stmt = gimple_build_call (t, 0);
8395 if (gimple_omp_return_lhs (gsi_stmt (si)))
8396 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
8397 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
8398 gsi_remove (&si, true);
8399
8400 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
8401 }
8402
8403 /* Expand code for an OpenMP single directive. We've already expanded
8404 much of the code, here we simply place the GOMP_barrier call. */
8405
8406 static void
8407 expand_omp_single (struct omp_region *region)
8408 {
8409 basic_block entry_bb, exit_bb;
8410 gimple_stmt_iterator si;
8411
8412 entry_bb = region->entry;
8413 exit_bb = region->exit;
8414
8415 si = gsi_last_nondebug_bb (entry_bb);
8416 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
8417 gsi_remove (&si, true);
8418 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8419
8420 si = gsi_last_nondebug_bb (exit_bb);
8421 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
8422 {
8423 tree t = gimple_omp_return_lhs (gsi_stmt (si));
8424 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
8425 }
8426 gsi_remove (&si, true);
8427 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8428 }
8429
8430 /* Generic expansion for OpenMP synchronization directives: master,
8431 ordered and critical. All we need to do here is remove the entry
8432 and exit markers for REGION. */
8433
8434 static void
8435 expand_omp_synch (struct omp_region *region)
8436 {
8437 basic_block entry_bb, exit_bb;
8438 gimple_stmt_iterator si;
8439
8440 entry_bb = region->entry;
8441 exit_bb = region->exit;
8442
8443 si = gsi_last_nondebug_bb (entry_bb);
8444 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
8445 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
8446 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
8447 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
8448 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
8449 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
8450 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
8451 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
8452 {
8453 expand_omp_taskreg (region);
8454 return;
8455 }
8456 gsi_remove (&si, true);
8457 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
8458
8459 if (exit_bb)
8460 {
8461 si = gsi_last_nondebug_bb (exit_bb);
8462 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
8463 gsi_remove (&si, true);
8464 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
8465 }
8466 }
8467
8468 /* Translate enum omp_memory_order to enum memmodel. The two enums
8469 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
8470 is 0. */
8471
8472 static enum memmodel
8473 omp_memory_order_to_memmodel (enum omp_memory_order mo)
8474 {
8475 switch (mo)
8476 {
8477 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
8478 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
8479 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
8480 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
8481 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
8482 default: gcc_unreachable ();
8483 }
8484 }
8485
8486 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8487 operation as a normal volatile load. */
8488
8489 static bool
8490 expand_omp_atomic_load (basic_block load_bb, tree addr,
8491 tree loaded_val, int index)
8492 {
8493 enum built_in_function tmpbase;
8494 gimple_stmt_iterator gsi;
8495 basic_block store_bb;
8496 location_t loc;
8497 gimple *stmt;
8498 tree decl, call, type, itype;
8499
8500 gsi = gsi_last_nondebug_bb (load_bb);
8501 stmt = gsi_stmt (gsi);
8502 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8503 loc = gimple_location (stmt);
8504
8505 /* ??? If the target does not implement atomic_load_optab[mode], and mode
8506 is smaller than word size, then expand_atomic_load assumes that the load
8507 is atomic. We could avoid the builtin entirely in this case. */
8508
8509 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8510 decl = builtin_decl_explicit (tmpbase);
8511 if (decl == NULL_TREE)
8512 return false;
8513
8514 type = TREE_TYPE (loaded_val);
8515 itype = TREE_TYPE (TREE_TYPE (decl));
8516
8517 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8518 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8519 call = build_call_expr_loc (loc, decl, 2, addr, mo);
8520 if (!useless_type_conversion_p (type, itype))
8521 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8522 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8523
8524 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8525 gsi_remove (&gsi, true);
8526
8527 store_bb = single_succ (load_bb);
8528 gsi = gsi_last_nondebug_bb (store_bb);
8529 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8530 gsi_remove (&gsi, true);
8531
8532 if (gimple_in_ssa_p (cfun))
8533 update_ssa (TODO_update_ssa_no_phi);
8534
8535 return true;
8536 }
8537
8538 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8539 operation as a normal volatile store. */
8540
8541 static bool
8542 expand_omp_atomic_store (basic_block load_bb, tree addr,
8543 tree loaded_val, tree stored_val, int index)
8544 {
8545 enum built_in_function tmpbase;
8546 gimple_stmt_iterator gsi;
8547 basic_block store_bb = single_succ (load_bb);
8548 location_t loc;
8549 gimple *stmt;
8550 tree decl, call, type, itype;
8551 machine_mode imode;
8552 bool exchange;
8553
8554 gsi = gsi_last_nondebug_bb (load_bb);
8555 stmt = gsi_stmt (gsi);
8556 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
8557
8558 /* If the load value is needed, then this isn't a store but an exchange. */
8559 exchange = gimple_omp_atomic_need_value_p (stmt);
8560
8561 gsi = gsi_last_nondebug_bb (store_bb);
8562 stmt = gsi_stmt (gsi);
8563 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
8564 loc = gimple_location (stmt);
8565
8566 /* ??? If the target does not implement atomic_store_optab[mode], and mode
8567 is smaller than word size, then expand_atomic_store assumes that the store
8568 is atomic. We could avoid the builtin entirely in this case. */
8569
8570 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
8571 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
8572 decl = builtin_decl_explicit (tmpbase);
8573 if (decl == NULL_TREE)
8574 return false;
8575
8576 type = TREE_TYPE (stored_val);
8577
8578 /* Dig out the type of the function's second argument. */
8579 itype = TREE_TYPE (decl);
8580 itype = TYPE_ARG_TYPES (itype);
8581 itype = TREE_CHAIN (itype);
8582 itype = TREE_VALUE (itype);
8583 imode = TYPE_MODE (itype);
8584
8585 if (exchange && !can_atomic_exchange_p (imode, true))
8586 return false;
8587
8588 if (!useless_type_conversion_p (itype, type))
8589 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
8590 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
8591 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
8592 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
8593 if (exchange)
8594 {
8595 if (!useless_type_conversion_p (type, itype))
8596 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
8597 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
8598 }
8599
8600 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8601 gsi_remove (&gsi, true);
8602
8603 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
8604 gsi = gsi_last_nondebug_bb (load_bb);
8605 gsi_remove (&gsi, true);
8606
8607 if (gimple_in_ssa_p (cfun))
8608 update_ssa (TODO_update_ssa_no_phi);
8609
8610 return true;
8611 }
8612
8613 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
8614 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
8615 size of the data type, and thus usable to find the index of the builtin
8616 decl. Returns false if the expression is not of the proper form. */
8617
8618 static bool
8619 expand_omp_atomic_fetch_op (basic_block load_bb,
8620 tree addr, tree loaded_val,
8621 tree stored_val, int index)
8622 {
8623 enum built_in_function oldbase, newbase, tmpbase;
8624 tree decl, itype, call;
8625 tree lhs, rhs;
8626 basic_block store_bb = single_succ (load_bb);
8627 gimple_stmt_iterator gsi;
8628 gimple *stmt;
8629 location_t loc;
8630 enum tree_code code;
8631 bool need_old, need_new;
8632 machine_mode imode;
8633
8634 /* We expect to find the following sequences:
8635
8636 load_bb:
8637 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
8638
8639 store_bb:
8640 val = tmp OP something; (or: something OP tmp)
8641 GIMPLE_OMP_STORE (val)
8642
8643 ???FIXME: Allow a more flexible sequence.
8644 Perhaps use data flow to pick the statements.
8645
8646 */
8647
8648 gsi = gsi_after_labels (store_bb);
8649 stmt = gsi_stmt (gsi);
8650 if (is_gimple_debug (stmt))
8651 {
8652 gsi_next_nondebug (&gsi);
8653 if (gsi_end_p (gsi))
8654 return false;
8655 stmt = gsi_stmt (gsi);
8656 }
8657 loc = gimple_location (stmt);
8658 if (!is_gimple_assign (stmt))
8659 return false;
8660 gsi_next_nondebug (&gsi);
8661 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
8662 return false;
8663 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
8664 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
8665 enum omp_memory_order omo
8666 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
8667 enum memmodel mo = omp_memory_order_to_memmodel (omo);
8668 gcc_checking_assert (!need_old || !need_new);
8669
8670 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
8671 return false;
8672
8673 /* Check for one of the supported fetch-op operations. */
8674 code = gimple_assign_rhs_code (stmt);
8675 switch (code)
8676 {
8677 case PLUS_EXPR:
8678 case POINTER_PLUS_EXPR:
8679 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
8680 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
8681 break;
8682 case MINUS_EXPR:
8683 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
8684 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
8685 break;
8686 case BIT_AND_EXPR:
8687 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
8688 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
8689 break;
8690 case BIT_IOR_EXPR:
8691 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
8692 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
8693 break;
8694 case BIT_XOR_EXPR:
8695 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
8696 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
8697 break;
8698 default:
8699 return false;
8700 }
8701
8702 /* Make sure the expression is of the proper form. */
8703 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
8704 rhs = gimple_assign_rhs2 (stmt);
8705 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
8706 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
8707 rhs = gimple_assign_rhs1 (stmt);
8708 else
8709 return false;
8710
8711 tmpbase = ((enum built_in_function)
8712 ((need_new ? newbase : oldbase) + index + 1));
8713 decl = builtin_decl_explicit (tmpbase);
8714 if (decl == NULL_TREE)
8715 return false;
8716 itype = TREE_TYPE (TREE_TYPE (decl));
8717 imode = TYPE_MODE (itype);
8718
8719 /* We could test all of the various optabs involved, but the fact of the
8720 matter is that (with the exception of i486 vs i586 and xadd) all targets
8721 that support any atomic operaton optab also implements compare-and-swap.
8722 Let optabs.c take care of expanding any compare-and-swap loop. */
8723 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
8724 return false;
8725
8726 gsi = gsi_last_nondebug_bb (load_bb);
8727 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
8728
8729 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
8730 It only requires that the operation happen atomically. Thus we can
8731 use the RELAXED memory model. */
8732 call = build_call_expr_loc (loc, decl, 3, addr,
8733 fold_convert_loc (loc, itype, rhs),
8734 build_int_cst (NULL, mo));
8735
8736 if (need_old || need_new)
8737 {
8738 lhs = need_old ? loaded_val : stored_val;
8739 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
8740 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
8741 }
8742 else
8743 call = fold_convert_loc (loc, void_type_node, call);
8744 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
8745 gsi_remove (&gsi, true);
8746
8747 gsi = gsi_last_nondebug_bb (store_bb);
8748 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
8749 gsi_remove (&gsi, true);
8750 gsi = gsi_last_nondebug_bb (store_bb);
8751 stmt = gsi_stmt (gsi);
8752 gsi_remove (&gsi, true);
8753
8754 if (gimple_in_ssa_p (cfun))
8755 {
8756 release_defs (stmt);
8757 update_ssa (TODO_update_ssa_no_phi);
8758 }
8759
8760 return true;
8761 }
8762
8763 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8764
8765 oldval = *addr;
8766 repeat:
8767 newval = rhs; // with oldval replacing *addr in rhs
8768 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
8769 if (oldval != newval)
8770 goto repeat;
8771
8772 INDEX is log2 of the size of the data type, and thus usable to find the
8773 index of the builtin decl. */
8774
8775 static bool
8776 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
8777 tree addr, tree loaded_val, tree stored_val,
8778 int index)
8779 {
8780 tree loadedi, storedi, initial, new_storedi, old_vali;
8781 tree type, itype, cmpxchg, iaddr, atype;
8782 gimple_stmt_iterator si;
8783 basic_block loop_header = single_succ (load_bb);
8784 gimple *phi, *stmt;
8785 edge e;
8786 enum built_in_function fncode;
8787
8788 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
8789 order to use the RELAXED memory model effectively. */
8790 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
8791 + index + 1);
8792 cmpxchg = builtin_decl_explicit (fncode);
8793 if (cmpxchg == NULL_TREE)
8794 return false;
8795 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
8796 atype = type;
8797 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
8798
8799 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
8800 || !can_atomic_load_p (TYPE_MODE (itype)))
8801 return false;
8802
8803 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
8804 si = gsi_last_nondebug_bb (load_bb);
8805 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8806
8807 /* For floating-point values, we'll need to view-convert them to integers
8808 so that we can perform the atomic compare and swap. Simplify the
8809 following code by always setting up the "i"ntegral variables. */
8810 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
8811 {
8812 tree iaddr_val;
8813
8814 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
8815 true));
8816 atype = itype;
8817 iaddr_val
8818 = force_gimple_operand_gsi (&si,
8819 fold_convert (TREE_TYPE (iaddr), addr),
8820 false, NULL_TREE, true, GSI_SAME_STMT);
8821 stmt = gimple_build_assign (iaddr, iaddr_val);
8822 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8823 loadedi = create_tmp_var (itype);
8824 if (gimple_in_ssa_p (cfun))
8825 loadedi = make_ssa_name (loadedi);
8826 }
8827 else
8828 {
8829 iaddr = addr;
8830 loadedi = loaded_val;
8831 }
8832
8833 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
8834 tree loaddecl = builtin_decl_explicit (fncode);
8835 if (loaddecl)
8836 initial
8837 = fold_convert (atype,
8838 build_call_expr (loaddecl, 2, iaddr,
8839 build_int_cst (NULL_TREE,
8840 MEMMODEL_RELAXED)));
8841 else
8842 {
8843 tree off
8844 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
8845 true), 0);
8846 initial = build2 (MEM_REF, atype, iaddr, off);
8847 }
8848
8849 initial
8850 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
8851 GSI_SAME_STMT);
8852
8853 /* Move the value to the LOADEDI temporary. */
8854 if (gimple_in_ssa_p (cfun))
8855 {
8856 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
8857 phi = create_phi_node (loadedi, loop_header);
8858 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
8859 initial);
8860 }
8861 else
8862 gsi_insert_before (&si,
8863 gimple_build_assign (loadedi, initial),
8864 GSI_SAME_STMT);
8865 if (loadedi != loaded_val)
8866 {
8867 gimple_stmt_iterator gsi2;
8868 tree x;
8869
8870 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
8871 gsi2 = gsi_start_bb (loop_header);
8872 if (gimple_in_ssa_p (cfun))
8873 {
8874 gassign *stmt;
8875 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8876 true, GSI_SAME_STMT);
8877 stmt = gimple_build_assign (loaded_val, x);
8878 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
8879 }
8880 else
8881 {
8882 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
8883 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
8884 true, GSI_SAME_STMT);
8885 }
8886 }
8887 gsi_remove (&si, true);
8888
8889 si = gsi_last_nondebug_bb (store_bb);
8890 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
8891
8892 if (iaddr == addr)
8893 storedi = stored_val;
8894 else
8895 storedi
8896 = force_gimple_operand_gsi (&si,
8897 build1 (VIEW_CONVERT_EXPR, itype,
8898 stored_val), true, NULL_TREE, true,
8899 GSI_SAME_STMT);
8900
8901 /* Build the compare&swap statement. */
8902 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
8903 new_storedi = force_gimple_operand_gsi (&si,
8904 fold_convert (TREE_TYPE (loadedi),
8905 new_storedi),
8906 true, NULL_TREE,
8907 true, GSI_SAME_STMT);
8908
8909 if (gimple_in_ssa_p (cfun))
8910 old_vali = loadedi;
8911 else
8912 {
8913 old_vali = create_tmp_var (TREE_TYPE (loadedi));
8914 stmt = gimple_build_assign (old_vali, loadedi);
8915 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8916
8917 stmt = gimple_build_assign (loadedi, new_storedi);
8918 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8919 }
8920
8921 /* Note that we always perform the comparison as an integer, even for
8922 floating point. This allows the atomic operation to properly
8923 succeed even with NaNs and -0.0. */
8924 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
8925 stmt = gimple_build_cond_empty (ne);
8926 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
8927
8928 /* Update cfg. */
8929 e = single_succ_edge (store_bb);
8930 e->flags &= ~EDGE_FALLTHRU;
8931 e->flags |= EDGE_FALSE_VALUE;
8932 /* Expect no looping. */
8933 e->probability = profile_probability::guessed_always ();
8934
8935 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
8936 e->probability = profile_probability::guessed_never ();
8937
8938 /* Copy the new value to loadedi (we already did that before the condition
8939 if we are not in SSA). */
8940 if (gimple_in_ssa_p (cfun))
8941 {
8942 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
8943 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
8944 }
8945
8946 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
8947 gsi_remove (&si, true);
8948
8949 class loop *loop = alloc_loop ();
8950 loop->header = loop_header;
8951 loop->latch = store_bb;
8952 add_loop (loop, loop_header->loop_father);
8953
8954 if (gimple_in_ssa_p (cfun))
8955 update_ssa (TODO_update_ssa_no_phi);
8956
8957 return true;
8958 }
8959
8960 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
8961
8962 GOMP_atomic_start ();
8963 *addr = rhs;
8964 GOMP_atomic_end ();
8965
8966 The result is not globally atomic, but works so long as all parallel
8967 references are within #pragma omp atomic directives. According to
8968 responses received from omp@openmp.org, appears to be within spec.
8969 Which makes sense, since that's how several other compilers handle
8970 this situation as well.
8971 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
8972 expanding. STORED_VAL is the operand of the matching
8973 GIMPLE_OMP_ATOMIC_STORE.
8974
8975 We replace
8976 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
8977 loaded_val = *addr;
8978
8979 and replace
8980 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
8981 *addr = stored_val;
8982 */
8983
8984 static bool
8985 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
8986 tree addr, tree loaded_val, tree stored_val)
8987 {
8988 gimple_stmt_iterator si;
8989 gassign *stmt;
8990 tree t;
8991
8992 si = gsi_last_nondebug_bb (load_bb);
8993 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
8994
8995 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
8996 t = build_call_expr (t, 0);
8997 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
8998
8999 tree mem = build_simple_mem_ref (addr);
9000 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
9001 TREE_OPERAND (mem, 1)
9002 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
9003 true),
9004 TREE_OPERAND (mem, 1));
9005 stmt = gimple_build_assign (loaded_val, mem);
9006 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9007 gsi_remove (&si, true);
9008
9009 si = gsi_last_nondebug_bb (store_bb);
9010 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
9011
9012 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
9013 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
9014
9015 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
9016 t = build_call_expr (t, 0);
9017 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
9018 gsi_remove (&si, true);
9019
9020 if (gimple_in_ssa_p (cfun))
9021 update_ssa (TODO_update_ssa_no_phi);
9022 return true;
9023 }
9024
9025 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
9026 using expand_omp_atomic_fetch_op. If it failed, we try to
9027 call expand_omp_atomic_pipeline, and if it fails too, the
9028 ultimate fallback is wrapping the operation in a mutex
9029 (expand_omp_atomic_mutex). REGION is the atomic region built
9030 by build_omp_regions_1(). */
9031
9032 static void
9033 expand_omp_atomic (struct omp_region *region)
9034 {
9035 basic_block load_bb = region->entry, store_bb = region->exit;
9036 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
9037 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
9038 tree loaded_val = gimple_omp_atomic_load_lhs (load);
9039 tree addr = gimple_omp_atomic_load_rhs (load);
9040 tree stored_val = gimple_omp_atomic_store_val (store);
9041 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
9042 HOST_WIDE_INT index;
9043
9044 /* Make sure the type is one of the supported sizes. */
9045 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
9046 index = exact_log2 (index);
9047 if (index >= 0 && index <= 4)
9048 {
9049 unsigned int align = TYPE_ALIGN_UNIT (type);
9050
9051 /* __sync builtins require strict data alignment. */
9052 if (exact_log2 (align) >= index)
9053 {
9054 /* Atomic load. */
9055 scalar_mode smode;
9056 if (loaded_val == stored_val
9057 && (is_int_mode (TYPE_MODE (type), &smode)
9058 || is_float_mode (TYPE_MODE (type), &smode))
9059 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9060 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
9061 return;
9062
9063 /* Atomic store. */
9064 if ((is_int_mode (TYPE_MODE (type), &smode)
9065 || is_float_mode (TYPE_MODE (type), &smode))
9066 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
9067 && store_bb == single_succ (load_bb)
9068 && first_stmt (store_bb) == store
9069 && expand_omp_atomic_store (load_bb, addr, loaded_val,
9070 stored_val, index))
9071 return;
9072
9073 /* When possible, use specialized atomic update functions. */
9074 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
9075 && store_bb == single_succ (load_bb)
9076 && expand_omp_atomic_fetch_op (load_bb, addr,
9077 loaded_val, stored_val, index))
9078 return;
9079
9080 /* If we don't have specialized __sync builtins, try and implement
9081 as a compare and swap loop. */
9082 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
9083 loaded_val, stored_val, index))
9084 return;
9085 }
9086 }
9087
9088 /* The ultimate fallback is wrapping the operation in a mutex. */
9089 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
9090 }
9091
9092 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
9093 at REGION_EXIT. */
9094
9095 static void
9096 mark_loops_in_oacc_kernels_region (basic_block region_entry,
9097 basic_block region_exit)
9098 {
9099 class loop *outer = region_entry->loop_father;
9100 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
9101
9102 /* Don't parallelize the kernels region if it contains more than one outer
9103 loop. */
9104 unsigned int nr_outer_loops = 0;
9105 class loop *single_outer = NULL;
9106 for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
9107 {
9108 gcc_assert (loop_outer (loop) == outer);
9109
9110 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
9111 continue;
9112
9113 if (region_exit != NULL
9114 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
9115 continue;
9116
9117 nr_outer_loops++;
9118 single_outer = loop;
9119 }
9120 if (nr_outer_loops != 1)
9121 return;
9122
9123 for (class loop *loop = single_outer->inner;
9124 loop != NULL;
9125 loop = loop->inner)
9126 if (loop->next)
9127 return;
9128
9129 /* Mark the loops in the region. */
9130 for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
9131 loop->in_oacc_kernels_region = true;
9132 }
9133
9134 /* Build target argument identifier from the DEVICE identifier, value
9135 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
9136
9137 static tree
9138 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
9139 {
9140 tree t = build_int_cst (integer_type_node, device);
9141 if (subseqent_param)
9142 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9143 build_int_cst (integer_type_node,
9144 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
9145 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9146 build_int_cst (integer_type_node, id));
9147 return t;
9148 }
9149
9150 /* Like above but return it in type that can be directly stored as an element
9151 of the argument array. */
9152
9153 static tree
9154 get_target_argument_identifier (int device, bool subseqent_param, int id)
9155 {
9156 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
9157 return fold_convert (ptr_type_node, t);
9158 }
9159
9160 /* Return a target argument consisting of DEVICE identifier, value identifier
9161 ID, and the actual VALUE. */
9162
9163 static tree
9164 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
9165 tree value)
9166 {
9167 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
9168 fold_convert (integer_type_node, value),
9169 build_int_cst (unsigned_type_node,
9170 GOMP_TARGET_ARG_VALUE_SHIFT));
9171 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
9172 get_target_argument_identifier_1 (device, false, id));
9173 t = fold_convert (ptr_type_node, t);
9174 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
9175 }
9176
9177 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
9178 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
9179 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
9180 arguments. */
9181
9182 static void
9183 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
9184 int id, tree value, vec <tree> *args)
9185 {
9186 if (tree_fits_shwi_p (value)
9187 && tree_to_shwi (value) > -(1 << 15)
9188 && tree_to_shwi (value) < (1 << 15))
9189 args->quick_push (get_target_argument_value (gsi, device, id, value));
9190 else
9191 {
9192 args->quick_push (get_target_argument_identifier (device, true, id));
9193 value = fold_convert (ptr_type_node, value);
9194 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
9195 GSI_SAME_STMT);
9196 args->quick_push (value);
9197 }
9198 }
9199
9200 /* Create an array of arguments that is then passed to GOMP_target. */
9201
9202 static tree
9203 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
9204 {
9205 auto_vec <tree, 6> args;
9206 tree clauses = gimple_omp_target_clauses (tgt_stmt);
9207 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
9208 if (c)
9209 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
9210 else
9211 t = integer_minus_one_node;
9212 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9213 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
9214
9215 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
9216 if (c)
9217 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
9218 else
9219 t = integer_minus_one_node;
9220 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
9221 GOMP_TARGET_ARG_THREAD_LIMIT, t,
9222 &args);
9223
9224 /* Produce more, perhaps device specific, arguments here. */
9225
9226 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
9227 args.length () + 1),
9228 ".omp_target_args");
9229 for (unsigned i = 0; i < args.length (); i++)
9230 {
9231 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9232 build_int_cst (integer_type_node, i),
9233 NULL_TREE, NULL_TREE);
9234 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
9235 GSI_SAME_STMT);
9236 }
9237 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
9238 build_int_cst (integer_type_node, args.length ()),
9239 NULL_TREE, NULL_TREE);
9240 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
9241 GSI_SAME_STMT);
9242 TREE_ADDRESSABLE (argarray) = 1;
9243 return build_fold_addr_expr (argarray);
9244 }
9245
9246 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
9247
9248 static void
9249 expand_omp_target (struct omp_region *region)
9250 {
9251 basic_block entry_bb, exit_bb, new_bb;
9252 struct function *child_cfun;
9253 tree child_fn, block, t;
9254 gimple_stmt_iterator gsi;
9255 gomp_target *entry_stmt;
9256 gimple *stmt;
9257 edge e;
9258 bool offloaded, data_region;
9259 int target_kind;
9260
9261 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
9262 target_kind = gimple_omp_target_kind (entry_stmt);
9263 new_bb = region->entry;
9264
9265 offloaded = is_gimple_omp_offloaded (entry_stmt);
9266 switch (target_kind)
9267 {
9268 case GF_OMP_TARGET_KIND_REGION:
9269 case GF_OMP_TARGET_KIND_UPDATE:
9270 case GF_OMP_TARGET_KIND_ENTER_DATA:
9271 case GF_OMP_TARGET_KIND_EXIT_DATA:
9272 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9273 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9274 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9275 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9276 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9277 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9278 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9279 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9280 data_region = false;
9281 break;
9282 case GF_OMP_TARGET_KIND_DATA:
9283 case GF_OMP_TARGET_KIND_OACC_DATA:
9284 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9285 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9286 data_region = true;
9287 break;
9288 default:
9289 gcc_unreachable ();
9290 }
9291
9292 child_fn = NULL_TREE;
9293 child_cfun = NULL;
9294 if (offloaded)
9295 {
9296 child_fn = gimple_omp_target_child_fn (entry_stmt);
9297 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
9298 }
9299
9300 /* Supported by expand_omp_taskreg, but not here. */
9301 if (child_cfun != NULL)
9302 gcc_checking_assert (!child_cfun->cfg);
9303 gcc_checking_assert (!gimple_in_ssa_p (cfun));
9304
9305 entry_bb = region->entry;
9306 exit_bb = region->exit;
9307
9308 if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
9309 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
9310
9311 /* Going on, all OpenACC compute constructs are mapped to
9312 'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
9313 To distinguish between them, we attach attributes. */
9314 switch (target_kind)
9315 {
9316 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9317 DECL_ATTRIBUTES (child_fn)
9318 = tree_cons (get_identifier ("oacc parallel"),
9319 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9320 break;
9321 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9322 DECL_ATTRIBUTES (child_fn)
9323 = tree_cons (get_identifier ("oacc kernels"),
9324 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9325 break;
9326 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9327 DECL_ATTRIBUTES (child_fn)
9328 = tree_cons (get_identifier ("oacc serial"),
9329 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9330 break;
9331 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9332 DECL_ATTRIBUTES (child_fn)
9333 = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
9334 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9335 break;
9336 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9337 DECL_ATTRIBUTES (child_fn)
9338 = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
9339 NULL_TREE, DECL_ATTRIBUTES (child_fn));
9340 break;
9341 default:
9342 /* Make sure we don't miss any. */
9343 gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
9344 && is_gimple_omp_offloaded (entry_stmt)));
9345 break;
9346 }
9347
9348 if (offloaded)
9349 {
9350 unsigned srcidx, dstidx, num;
9351
9352 /* If the offloading region needs data sent from the parent
9353 function, then the very first statement (except possible
9354 tree profile counter updates) of the offloading body
9355 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
9356 &.OMP_DATA_O is passed as an argument to the child function,
9357 we need to replace it with the argument as seen by the child
9358 function.
9359
9360 In most cases, this will end up being the identity assignment
9361 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
9362 a function call that has been inlined, the original PARM_DECL
9363 .OMP_DATA_I may have been converted into a different local
9364 variable. In which case, we need to keep the assignment. */
9365 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
9366 if (data_arg)
9367 {
9368 basic_block entry_succ_bb = single_succ (entry_bb);
9369 gimple_stmt_iterator gsi;
9370 tree arg;
9371 gimple *tgtcopy_stmt = NULL;
9372 tree sender = TREE_VEC_ELT (data_arg, 0);
9373
9374 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
9375 {
9376 gcc_assert (!gsi_end_p (gsi));
9377 stmt = gsi_stmt (gsi);
9378 if (gimple_code (stmt) != GIMPLE_ASSIGN)
9379 continue;
9380
9381 if (gimple_num_ops (stmt) == 2)
9382 {
9383 tree arg = gimple_assign_rhs1 (stmt);
9384
9385 /* We're ignoring the subcode because we're
9386 effectively doing a STRIP_NOPS. */
9387
9388 if (TREE_CODE (arg) == ADDR_EXPR
9389 && TREE_OPERAND (arg, 0) == sender)
9390 {
9391 tgtcopy_stmt = stmt;
9392 break;
9393 }
9394 }
9395 }
9396
9397 gcc_assert (tgtcopy_stmt != NULL);
9398 arg = DECL_ARGUMENTS (child_fn);
9399
9400 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
9401 gsi_remove (&gsi, true);
9402 }
9403
9404 /* Declare local variables needed in CHILD_CFUN. */
9405 block = DECL_INITIAL (child_fn);
9406 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
9407 /* The gimplifier could record temporaries in the offloading block
9408 rather than in containing function's local_decls chain,
9409 which would mean cgraph missed finalizing them. Do it now. */
9410 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
9411 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
9412 varpool_node::finalize_decl (t);
9413 DECL_SAVED_TREE (child_fn) = NULL;
9414 /* We'll create a CFG for child_fn, so no gimple body is needed. */
9415 gimple_set_body (child_fn, NULL);
9416 TREE_USED (block) = 1;
9417
9418 /* Reset DECL_CONTEXT on function arguments. */
9419 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
9420 DECL_CONTEXT (t) = child_fn;
9421
9422 /* Split ENTRY_BB at GIMPLE_*,
9423 so that it can be moved to the child function. */
9424 gsi = gsi_last_nondebug_bb (entry_bb);
9425 stmt = gsi_stmt (gsi);
9426 gcc_assert (stmt
9427 && gimple_code (stmt) == gimple_code (entry_stmt));
9428 e = split_block (entry_bb, stmt);
9429 gsi_remove (&gsi, true);
9430 entry_bb = e->dest;
9431 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
9432
9433 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
9434 if (exit_bb)
9435 {
9436 gsi = gsi_last_nondebug_bb (exit_bb);
9437 gcc_assert (!gsi_end_p (gsi)
9438 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
9439 stmt = gimple_build_return (NULL);
9440 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
9441 gsi_remove (&gsi, true);
9442 }
9443
9444 /* Move the offloading region into CHILD_CFUN. */
9445
9446 block = gimple_block (entry_stmt);
9447
9448 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
9449 if (exit_bb)
9450 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
9451 /* When the OMP expansion process cannot guarantee an up-to-date
9452 loop tree arrange for the child function to fixup loops. */
9453 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9454 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
9455
9456 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
9457 num = vec_safe_length (child_cfun->local_decls);
9458 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
9459 {
9460 t = (*child_cfun->local_decls)[srcidx];
9461 if (DECL_CONTEXT (t) == cfun->decl)
9462 continue;
9463 if (srcidx != dstidx)
9464 (*child_cfun->local_decls)[dstidx] = t;
9465 dstidx++;
9466 }
9467 if (dstidx != num)
9468 vec_safe_truncate (child_cfun->local_decls, dstidx);
9469
9470 /* Inform the callgraph about the new function. */
9471 child_cfun->curr_properties = cfun->curr_properties;
9472 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
9473 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
9474 cgraph_node *node = cgraph_node::get_create (child_fn);
9475 node->parallelized_function = 1;
9476 cgraph_node::add_new_function (child_fn, true);
9477
9478 /* Add the new function to the offload table. */
9479 if (ENABLE_OFFLOADING)
9480 {
9481 if (in_lto_p)
9482 DECL_PRESERVE_P (child_fn) = 1;
9483 vec_safe_push (offload_funcs, child_fn);
9484 }
9485
9486 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
9487 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
9488
9489 /* Fix the callgraph edges for child_cfun. Those for cfun will be
9490 fixed in a following pass. */
9491 push_cfun (child_cfun);
9492 if (need_asm)
9493 assign_assembler_name_if_needed (child_fn);
9494 cgraph_edge::rebuild_edges ();
9495
9496 /* Some EH regions might become dead, see PR34608. If
9497 pass_cleanup_cfg isn't the first pass to happen with the
9498 new child, these dead EH edges might cause problems.
9499 Clean them up now. */
9500 if (flag_exceptions)
9501 {
9502 basic_block bb;
9503 bool changed = false;
9504
9505 FOR_EACH_BB_FN (bb, cfun)
9506 changed |= gimple_purge_dead_eh_edges (bb);
9507 if (changed)
9508 cleanup_tree_cfg ();
9509 }
9510 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
9511 verify_loop_structure ();
9512 pop_cfun ();
9513
9514 if (dump_file && !gimple_in_ssa_p (cfun))
9515 {
9516 omp_any_child_fn_dumped = true;
9517 dump_function_header (dump_file, child_fn, dump_flags);
9518 dump_function_to_file (child_fn, dump_file, dump_flags);
9519 }
9520
9521 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
9522 }
9523
9524 /* Emit a library call to launch the offloading region, or do data
9525 transfers. */
9526 tree t1, t2, t3, t4, depend, c, clauses;
9527 enum built_in_function start_ix;
9528 unsigned int flags_i = 0;
9529
9530 switch (gimple_omp_target_kind (entry_stmt))
9531 {
9532 case GF_OMP_TARGET_KIND_REGION:
9533 start_ix = BUILT_IN_GOMP_TARGET;
9534 break;
9535 case GF_OMP_TARGET_KIND_DATA:
9536 start_ix = BUILT_IN_GOMP_TARGET_DATA;
9537 break;
9538 case GF_OMP_TARGET_KIND_UPDATE:
9539 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
9540 break;
9541 case GF_OMP_TARGET_KIND_ENTER_DATA:
9542 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9543 break;
9544 case GF_OMP_TARGET_KIND_EXIT_DATA:
9545 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
9546 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
9547 break;
9548 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
9549 case GF_OMP_TARGET_KIND_OACC_KERNELS:
9550 case GF_OMP_TARGET_KIND_OACC_SERIAL:
9551 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
9552 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
9553 start_ix = BUILT_IN_GOACC_PARALLEL;
9554 break;
9555 case GF_OMP_TARGET_KIND_OACC_DATA:
9556 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
9557 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
9558 start_ix = BUILT_IN_GOACC_DATA_START;
9559 break;
9560 case GF_OMP_TARGET_KIND_OACC_UPDATE:
9561 start_ix = BUILT_IN_GOACC_UPDATE;
9562 break;
9563 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
9564 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
9565 break;
9566 case GF_OMP_TARGET_KIND_OACC_DECLARE:
9567 start_ix = BUILT_IN_GOACC_DECLARE;
9568 break;
9569 default:
9570 gcc_unreachable ();
9571 }
9572
9573 clauses = gimple_omp_target_clauses (entry_stmt);
9574
9575 tree device = NULL_TREE;
9576 location_t device_loc = UNKNOWN_LOCATION;
9577 tree goacc_flags = NULL_TREE;
9578 if (is_gimple_omp_oacc (entry_stmt))
9579 {
9580 /* By default, no GOACC_FLAGs are set. */
9581 goacc_flags = integer_zero_node;
9582 }
9583 else
9584 {
9585 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
9586 if (c)
9587 {
9588 device = OMP_CLAUSE_DEVICE_ID (c);
9589 device_loc = OMP_CLAUSE_LOCATION (c);
9590 }
9591 else
9592 {
9593 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
9594 library choose). */
9595 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
9596 device_loc = gimple_location (entry_stmt);
9597 }
9598
9599 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
9600 if (c)
9601 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
9602 }
9603
9604 /* By default, there is no conditional. */
9605 tree cond = NULL_TREE;
9606 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
9607 if (c)
9608 cond = OMP_CLAUSE_IF_EXPR (c);
9609 /* If we found the clause 'if (cond)', build:
9610 OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
9611 OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
9612 if (cond)
9613 {
9614 tree *tp;
9615 if (is_gimple_omp_oacc (entry_stmt))
9616 tp = &goacc_flags;
9617 else
9618 {
9619 /* Ensure 'device' is of the correct type. */
9620 device = fold_convert_loc (device_loc, integer_type_node, device);
9621
9622 tp = &device;
9623 }
9624
9625 cond = gimple_boolify (cond);
9626
9627 basic_block cond_bb, then_bb, else_bb;
9628 edge e;
9629 tree tmp_var;
9630
9631 tmp_var = create_tmp_var (TREE_TYPE (*tp));
9632 if (offloaded)
9633 e = split_block_after_labels (new_bb);
9634 else
9635 {
9636 gsi = gsi_last_nondebug_bb (new_bb);
9637 gsi_prev (&gsi);
9638 e = split_block (new_bb, gsi_stmt (gsi));
9639 }
9640 cond_bb = e->src;
9641 new_bb = e->dest;
9642 remove_edge (e);
9643
9644 then_bb = create_empty_bb (cond_bb);
9645 else_bb = create_empty_bb (then_bb);
9646 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
9647 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
9648
9649 stmt = gimple_build_cond_empty (cond);
9650 gsi = gsi_last_bb (cond_bb);
9651 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9652
9653 gsi = gsi_start_bb (then_bb);
9654 stmt = gimple_build_assign (tmp_var, *tp);
9655 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9656
9657 gsi = gsi_start_bb (else_bb);
9658 if (is_gimple_omp_oacc (entry_stmt))
9659 stmt = gimple_build_assign (tmp_var,
9660 BIT_IOR_EXPR,
9661 *tp,
9662 build_int_cst (integer_type_node,
9663 GOACC_FLAG_HOST_FALLBACK));
9664 else
9665 stmt = gimple_build_assign (tmp_var,
9666 build_int_cst (integer_type_node,
9667 GOMP_DEVICE_HOST_FALLBACK));
9668 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
9669
9670 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
9671 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
9672 add_bb_to_loop (then_bb, cond_bb->loop_father);
9673 add_bb_to_loop (else_bb, cond_bb->loop_father);
9674 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
9675 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
9676
9677 *tp = tmp_var;
9678
9679 gsi = gsi_last_nondebug_bb (new_bb);
9680 }
9681 else
9682 {
9683 gsi = gsi_last_nondebug_bb (new_bb);
9684
9685 if (device != NULL_TREE)
9686 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
9687 true, GSI_SAME_STMT);
9688 }
9689
9690 t = gimple_omp_target_data_arg (entry_stmt);
9691 if (t == NULL)
9692 {
9693 t1 = size_zero_node;
9694 t2 = build_zero_cst (ptr_type_node);
9695 t3 = t2;
9696 t4 = t2;
9697 }
9698 else
9699 {
9700 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
9701 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
9702 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
9703 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
9704 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
9705 }
9706
9707 gimple *g;
9708 bool tagging = false;
9709 /* The maximum number used by any start_ix, without varargs. */
9710 auto_vec<tree, 11> args;
9711 if (is_gimple_omp_oacc (entry_stmt))
9712 {
9713 tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
9714 TREE_TYPE (goacc_flags), goacc_flags);
9715 goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
9716 NULL_TREE, true,
9717 GSI_SAME_STMT);
9718 args.quick_push (goacc_flags_m);
9719 }
9720 else
9721 args.quick_push (device);
9722 if (offloaded)
9723 args.quick_push (build_fold_addr_expr (child_fn));
9724 args.quick_push (t1);
9725 args.quick_push (t2);
9726 args.quick_push (t3);
9727 args.quick_push (t4);
9728 switch (start_ix)
9729 {
9730 case BUILT_IN_GOACC_DATA_START:
9731 case BUILT_IN_GOACC_DECLARE:
9732 case BUILT_IN_GOMP_TARGET_DATA:
9733 break;
9734 case BUILT_IN_GOMP_TARGET:
9735 case BUILT_IN_GOMP_TARGET_UPDATE:
9736 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
9737 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
9738 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
9739 if (c)
9740 depend = OMP_CLAUSE_DECL (c);
9741 else
9742 depend = build_int_cst (ptr_type_node, 0);
9743 args.quick_push (depend);
9744 if (start_ix == BUILT_IN_GOMP_TARGET)
9745 args.quick_push (get_target_arguments (&gsi, entry_stmt));
9746 break;
9747 case BUILT_IN_GOACC_PARALLEL:
9748 if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
9749 {
9750 tree dims = NULL_TREE;
9751 unsigned int ix;
9752
9753 /* For serial constructs we set all dimensions to 1. */
9754 for (ix = GOMP_DIM_MAX; ix--;)
9755 dims = tree_cons (NULL_TREE, integer_one_node, dims);
9756 oacc_replace_fn_attrib (child_fn, dims);
9757 }
9758 else
9759 oacc_set_fn_attrib (child_fn, clauses, &args);
9760 tagging = true;
9761 /* FALLTHRU */
9762 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
9763 case BUILT_IN_GOACC_UPDATE:
9764 {
9765 tree t_async = NULL_TREE;
9766
9767 /* If present, use the value specified by the respective
9768 clause, making sure that is of the correct type. */
9769 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
9770 if (c)
9771 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9772 integer_type_node,
9773 OMP_CLAUSE_ASYNC_EXPR (c));
9774 else if (!tagging)
9775 /* Default values for t_async. */
9776 t_async = fold_convert_loc (gimple_location (entry_stmt),
9777 integer_type_node,
9778 build_int_cst (integer_type_node,
9779 GOMP_ASYNC_SYNC));
9780 if (tagging && t_async)
9781 {
9782 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
9783
9784 if (TREE_CODE (t_async) == INTEGER_CST)
9785 {
9786 /* See if we can pack the async arg in to the tag's
9787 operand. */
9788 i_async = TREE_INT_CST_LOW (t_async);
9789 if (i_async < GOMP_LAUNCH_OP_MAX)
9790 t_async = NULL_TREE;
9791 else
9792 i_async = GOMP_LAUNCH_OP_MAX;
9793 }
9794 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
9795 i_async));
9796 }
9797 if (t_async)
9798 args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
9799 NULL_TREE, true,
9800 GSI_SAME_STMT));
9801
9802 /* Save the argument index, and ... */
9803 unsigned t_wait_idx = args.length ();
9804 unsigned num_waits = 0;
9805 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
9806 if (!tagging || c)
9807 /* ... push a placeholder. */
9808 args.safe_push (integer_zero_node);
9809
9810 for (; c; c = OMP_CLAUSE_CHAIN (c))
9811 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
9812 {
9813 tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
9814 integer_type_node,
9815 OMP_CLAUSE_WAIT_EXPR (c));
9816 arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
9817 GSI_SAME_STMT);
9818 args.safe_push (arg);
9819 num_waits++;
9820 }
9821
9822 if (!tagging || num_waits)
9823 {
9824 tree len;
9825
9826 /* Now that we know the number, update the placeholder. */
9827 if (tagging)
9828 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
9829 else
9830 len = build_int_cst (integer_type_node, num_waits);
9831 len = fold_convert_loc (gimple_location (entry_stmt),
9832 unsigned_type_node, len);
9833 args[t_wait_idx] = len;
9834 }
9835 }
9836 break;
9837 default:
9838 gcc_unreachable ();
9839 }
9840 if (tagging)
9841 /* Push terminal marker - zero. */
9842 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
9843
9844 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
9845 gimple_set_location (g, gimple_location (entry_stmt));
9846 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
9847 if (!offloaded)
9848 {
9849 g = gsi_stmt (gsi);
9850 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
9851 gsi_remove (&gsi, true);
9852 }
9853 if (data_region && region->exit)
9854 {
9855 gsi = gsi_last_nondebug_bb (region->exit);
9856 g = gsi_stmt (gsi);
9857 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
9858 gsi_remove (&gsi, true);
9859 }
9860 }
9861
9862 /* Expand the parallel region tree rooted at REGION. Expansion
9863 proceeds in depth-first order. Innermost regions are expanded
9864 first. This way, parallel regions that require a new function to
9865 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
9866 internal dependencies in their body. */
9867
9868 static void
9869 expand_omp (struct omp_region *region)
9870 {
9871 omp_any_child_fn_dumped = false;
9872 while (region)
9873 {
9874 location_t saved_location;
9875 gimple *inner_stmt = NULL;
9876
9877 /* First, determine whether this is a combined parallel+workshare
9878 region. */
9879 if (region->type == GIMPLE_OMP_PARALLEL)
9880 determine_parallel_type (region);
9881
9882 if (region->type == GIMPLE_OMP_FOR
9883 && gimple_omp_for_combined_p (last_stmt (region->entry)))
9884 inner_stmt = last_stmt (region->inner->entry);
9885
9886 if (region->inner)
9887 expand_omp (region->inner);
9888
9889 saved_location = input_location;
9890 if (gimple_has_location (last_stmt (region->entry)))
9891 input_location = gimple_location (last_stmt (region->entry));
9892
9893 switch (region->type)
9894 {
9895 case GIMPLE_OMP_PARALLEL:
9896 case GIMPLE_OMP_TASK:
9897 expand_omp_taskreg (region);
9898 break;
9899
9900 case GIMPLE_OMP_FOR:
9901 expand_omp_for (region, inner_stmt);
9902 break;
9903
9904 case GIMPLE_OMP_SECTIONS:
9905 expand_omp_sections (region);
9906 break;
9907
9908 case GIMPLE_OMP_SECTION:
9909 /* Individual omp sections are handled together with their
9910 parent GIMPLE_OMP_SECTIONS region. */
9911 break;
9912
9913 case GIMPLE_OMP_SINGLE:
9914 expand_omp_single (region);
9915 break;
9916
9917 case GIMPLE_OMP_ORDERED:
9918 {
9919 gomp_ordered *ord_stmt
9920 = as_a <gomp_ordered *> (last_stmt (region->entry));
9921 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
9922 OMP_CLAUSE_DEPEND))
9923 {
9924 /* We'll expand these when expanding corresponding
9925 worksharing region with ordered(n) clause. */
9926 gcc_assert (region->outer
9927 && region->outer->type == GIMPLE_OMP_FOR);
9928 region->ord_stmt = ord_stmt;
9929 break;
9930 }
9931 }
9932 /* FALLTHRU */
9933 case GIMPLE_OMP_MASTER:
9934 case GIMPLE_OMP_TASKGROUP:
9935 case GIMPLE_OMP_CRITICAL:
9936 case GIMPLE_OMP_TEAMS:
9937 expand_omp_synch (region);
9938 break;
9939
9940 case GIMPLE_OMP_ATOMIC_LOAD:
9941 expand_omp_atomic (region);
9942 break;
9943
9944 case GIMPLE_OMP_TARGET:
9945 expand_omp_target (region);
9946 break;
9947
9948 default:
9949 gcc_unreachable ();
9950 }
9951
9952 input_location = saved_location;
9953 region = region->next;
9954 }
9955 if (omp_any_child_fn_dumped)
9956 {
9957 if (dump_file)
9958 dump_function_header (dump_file, current_function_decl, dump_flags);
9959 omp_any_child_fn_dumped = false;
9960 }
9961 }
9962
9963 /* Helper for build_omp_regions. Scan the dominator tree starting at
9964 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
9965 true, the function ends once a single tree is built (otherwise, whole
9966 forest of OMP constructs may be built). */
9967
9968 static void
9969 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
9970 bool single_tree)
9971 {
9972 gimple_stmt_iterator gsi;
9973 gimple *stmt;
9974 basic_block son;
9975
9976 gsi = gsi_last_nondebug_bb (bb);
9977 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
9978 {
9979 struct omp_region *region;
9980 enum gimple_code code;
9981
9982 stmt = gsi_stmt (gsi);
9983 code = gimple_code (stmt);
9984 if (code == GIMPLE_OMP_RETURN)
9985 {
9986 /* STMT is the return point out of region PARENT. Mark it
9987 as the exit point and make PARENT the immediately
9988 enclosing region. */
9989 gcc_assert (parent);
9990 region = parent;
9991 region->exit = bb;
9992 parent = parent->outer;
9993 }
9994 else if (code == GIMPLE_OMP_ATOMIC_STORE)
9995 {
9996 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
9997 GIMPLE_OMP_RETURN, but matches with
9998 GIMPLE_OMP_ATOMIC_LOAD. */
9999 gcc_assert (parent);
10000 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
10001 region = parent;
10002 region->exit = bb;
10003 parent = parent->outer;
10004 }
10005 else if (code == GIMPLE_OMP_CONTINUE)
10006 {
10007 gcc_assert (parent);
10008 parent->cont = bb;
10009 }
10010 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
10011 {
10012 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
10013 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
10014 }
10015 else
10016 {
10017 region = new_omp_region (bb, code, parent);
10018 /* Otherwise... */
10019 if (code == GIMPLE_OMP_TARGET)
10020 {
10021 switch (gimple_omp_target_kind (stmt))
10022 {
10023 case GF_OMP_TARGET_KIND_REGION:
10024 case GF_OMP_TARGET_KIND_DATA:
10025 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10026 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10027 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10028 case GF_OMP_TARGET_KIND_OACC_DATA:
10029 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10030 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10031 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10032 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10033 break;
10034 case GF_OMP_TARGET_KIND_UPDATE:
10035 case GF_OMP_TARGET_KIND_ENTER_DATA:
10036 case GF_OMP_TARGET_KIND_EXIT_DATA:
10037 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10038 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10039 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10040 /* ..., other than for those stand-alone directives... */
10041 region = NULL;
10042 break;
10043 default:
10044 gcc_unreachable ();
10045 }
10046 }
10047 else if (code == GIMPLE_OMP_ORDERED
10048 && omp_find_clause (gimple_omp_ordered_clauses
10049 (as_a <gomp_ordered *> (stmt)),
10050 OMP_CLAUSE_DEPEND))
10051 /* #pragma omp ordered depend is also just a stand-alone
10052 directive. */
10053 region = NULL;
10054 else if (code == GIMPLE_OMP_TASK
10055 && gimple_omp_task_taskwait_p (stmt))
10056 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
10057 region = NULL;
10058 /* ..., this directive becomes the parent for a new region. */
10059 if (region)
10060 parent = region;
10061 }
10062 }
10063
10064 if (single_tree && !parent)
10065 return;
10066
10067 for (son = first_dom_son (CDI_DOMINATORS, bb);
10068 son;
10069 son = next_dom_son (CDI_DOMINATORS, son))
10070 build_omp_regions_1 (son, parent, single_tree);
10071 }
10072
10073 /* Builds the tree of OMP regions rooted at ROOT, storing it to
10074 root_omp_region. */
10075
10076 static void
10077 build_omp_regions_root (basic_block root)
10078 {
10079 gcc_assert (root_omp_region == NULL);
10080 build_omp_regions_1 (root, NULL, true);
10081 gcc_assert (root_omp_region != NULL);
10082 }
10083
10084 /* Expands omp construct (and its subconstructs) starting in HEAD. */
10085
10086 void
10087 omp_expand_local (basic_block head)
10088 {
10089 build_omp_regions_root (head);
10090 if (dump_file && (dump_flags & TDF_DETAILS))
10091 {
10092 fprintf (dump_file, "\nOMP region tree\n\n");
10093 dump_omp_region (dump_file, root_omp_region, 0);
10094 fprintf (dump_file, "\n");
10095 }
10096
10097 remove_exit_barriers (root_omp_region);
10098 expand_omp (root_omp_region);
10099
10100 omp_free_regions ();
10101 }
10102
10103 /* Scan the CFG and build a tree of OMP regions. Return the root of
10104 the OMP region tree. */
10105
10106 static void
10107 build_omp_regions (void)
10108 {
10109 gcc_assert (root_omp_region == NULL);
10110 calculate_dominance_info (CDI_DOMINATORS);
10111 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
10112 }
10113
10114 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
10115
10116 static unsigned int
10117 execute_expand_omp (void)
10118 {
10119 build_omp_regions ();
10120
10121 if (!root_omp_region)
10122 return 0;
10123
10124 if (dump_file)
10125 {
10126 fprintf (dump_file, "\nOMP region tree\n\n");
10127 dump_omp_region (dump_file, root_omp_region, 0);
10128 fprintf (dump_file, "\n");
10129 }
10130
10131 remove_exit_barriers (root_omp_region);
10132
10133 expand_omp (root_omp_region);
10134
10135 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
10136 verify_loop_structure ();
10137 cleanup_tree_cfg ();
10138
10139 omp_free_regions ();
10140
10141 return 0;
10142 }
10143
10144 /* OMP expansion -- the default pass, run before creation of SSA form. */
10145
10146 namespace {
10147
10148 const pass_data pass_data_expand_omp =
10149 {
10150 GIMPLE_PASS, /* type */
10151 "ompexp", /* name */
10152 OPTGROUP_OMP, /* optinfo_flags */
10153 TV_NONE, /* tv_id */
10154 PROP_gimple_any, /* properties_required */
10155 PROP_gimple_eomp, /* properties_provided */
10156 0, /* properties_destroyed */
10157 0, /* todo_flags_start */
10158 0, /* todo_flags_finish */
10159 };
10160
10161 class pass_expand_omp : public gimple_opt_pass
10162 {
10163 public:
10164 pass_expand_omp (gcc::context *ctxt)
10165 : gimple_opt_pass (pass_data_expand_omp, ctxt)
10166 {}
10167
10168 /* opt_pass methods: */
10169 virtual unsigned int execute (function *)
10170 {
10171 bool gate = ((flag_openacc != 0 || flag_openmp != 0
10172 || flag_openmp_simd != 0)
10173 && !seen_error ());
10174
10175 /* This pass always runs, to provide PROP_gimple_eomp.
10176 But often, there is nothing to do. */
10177 if (!gate)
10178 return 0;
10179
10180 return execute_expand_omp ();
10181 }
10182
10183 }; // class pass_expand_omp
10184
10185 } // anon namespace
10186
10187 gimple_opt_pass *
10188 make_pass_expand_omp (gcc::context *ctxt)
10189 {
10190 return new pass_expand_omp (ctxt);
10191 }
10192
10193 namespace {
10194
10195 const pass_data pass_data_expand_omp_ssa =
10196 {
10197 GIMPLE_PASS, /* type */
10198 "ompexpssa", /* name */
10199 OPTGROUP_OMP, /* optinfo_flags */
10200 TV_NONE, /* tv_id */
10201 PROP_cfg | PROP_ssa, /* properties_required */
10202 PROP_gimple_eomp, /* properties_provided */
10203 0, /* properties_destroyed */
10204 0, /* todo_flags_start */
10205 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
10206 };
10207
10208 class pass_expand_omp_ssa : public gimple_opt_pass
10209 {
10210 public:
10211 pass_expand_omp_ssa (gcc::context *ctxt)
10212 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
10213 {}
10214
10215 /* opt_pass methods: */
10216 virtual bool gate (function *fun)
10217 {
10218 return !(fun->curr_properties & PROP_gimple_eomp);
10219 }
10220 virtual unsigned int execute (function *) { return execute_expand_omp (); }
10221 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
10222
10223 }; // class pass_expand_omp_ssa
10224
10225 } // anon namespace
10226
10227 gimple_opt_pass *
10228 make_pass_expand_omp_ssa (gcc::context *ctxt)
10229 {
10230 return new pass_expand_omp_ssa (ctxt);
10231 }
10232
10233 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
10234 GIMPLE_* codes. */
10235
10236 bool
10237 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
10238 int *region_idx)
10239 {
10240 gimple *last = last_stmt (bb);
10241 enum gimple_code code = gimple_code (last);
10242 struct omp_region *cur_region = *region;
10243 bool fallthru = false;
10244
10245 switch (code)
10246 {
10247 case GIMPLE_OMP_PARALLEL:
10248 case GIMPLE_OMP_FOR:
10249 case GIMPLE_OMP_SINGLE:
10250 case GIMPLE_OMP_TEAMS:
10251 case GIMPLE_OMP_MASTER:
10252 case GIMPLE_OMP_TASKGROUP:
10253 case GIMPLE_OMP_CRITICAL:
10254 case GIMPLE_OMP_SECTION:
10255 cur_region = new_omp_region (bb, code, cur_region);
10256 fallthru = true;
10257 break;
10258
10259 case GIMPLE_OMP_TASK:
10260 cur_region = new_omp_region (bb, code, cur_region);
10261 fallthru = true;
10262 if (gimple_omp_task_taskwait_p (last))
10263 cur_region = cur_region->outer;
10264 break;
10265
10266 case GIMPLE_OMP_ORDERED:
10267 cur_region = new_omp_region (bb, code, cur_region);
10268 fallthru = true;
10269 if (omp_find_clause (gimple_omp_ordered_clauses
10270 (as_a <gomp_ordered *> (last)),
10271 OMP_CLAUSE_DEPEND))
10272 cur_region = cur_region->outer;
10273 break;
10274
10275 case GIMPLE_OMP_TARGET:
10276 cur_region = new_omp_region (bb, code, cur_region);
10277 fallthru = true;
10278 switch (gimple_omp_target_kind (last))
10279 {
10280 case GF_OMP_TARGET_KIND_REGION:
10281 case GF_OMP_TARGET_KIND_DATA:
10282 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
10283 case GF_OMP_TARGET_KIND_OACC_KERNELS:
10284 case GF_OMP_TARGET_KIND_OACC_SERIAL:
10285 case GF_OMP_TARGET_KIND_OACC_DATA:
10286 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
10287 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
10288 case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
10289 case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
10290 break;
10291 case GF_OMP_TARGET_KIND_UPDATE:
10292 case GF_OMP_TARGET_KIND_ENTER_DATA:
10293 case GF_OMP_TARGET_KIND_EXIT_DATA:
10294 case GF_OMP_TARGET_KIND_OACC_UPDATE:
10295 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
10296 case GF_OMP_TARGET_KIND_OACC_DECLARE:
10297 cur_region = cur_region->outer;
10298 break;
10299 default:
10300 gcc_unreachable ();
10301 }
10302 break;
10303
10304 case GIMPLE_OMP_SECTIONS:
10305 cur_region = new_omp_region (bb, code, cur_region);
10306 fallthru = true;
10307 break;
10308
10309 case GIMPLE_OMP_SECTIONS_SWITCH:
10310 fallthru = false;
10311 break;
10312
10313 case GIMPLE_OMP_ATOMIC_LOAD:
10314 case GIMPLE_OMP_ATOMIC_STORE:
10315 fallthru = true;
10316 break;
10317
10318 case GIMPLE_OMP_RETURN:
10319 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
10320 somewhere other than the next block. This will be
10321 created later. */
10322 cur_region->exit = bb;
10323 if (cur_region->type == GIMPLE_OMP_TASK)
10324 /* Add an edge corresponding to not scheduling the task
10325 immediately. */
10326 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
10327 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
10328 cur_region = cur_region->outer;
10329 break;
10330
10331 case GIMPLE_OMP_CONTINUE:
10332 cur_region->cont = bb;
10333 switch (cur_region->type)
10334 {
10335 case GIMPLE_OMP_FOR:
10336 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
10337 succs edges as abnormal to prevent splitting
10338 them. */
10339 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
10340 /* Make the loopback edge. */
10341 make_edge (bb, single_succ (cur_region->entry),
10342 EDGE_ABNORMAL);
10343
10344 /* Create an edge from GIMPLE_OMP_FOR to exit, which
10345 corresponds to the case that the body of the loop
10346 is not executed at all. */
10347 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
10348 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
10349 fallthru = false;
10350 break;
10351
10352 case GIMPLE_OMP_SECTIONS:
10353 /* Wire up the edges into and out of the nested sections. */
10354 {
10355 basic_block switch_bb = single_succ (cur_region->entry);
10356
10357 struct omp_region *i;
10358 for (i = cur_region->inner; i ; i = i->next)
10359 {
10360 gcc_assert (i->type == GIMPLE_OMP_SECTION);
10361 make_edge (switch_bb, i->entry, 0);
10362 make_edge (i->exit, bb, EDGE_FALLTHRU);
10363 }
10364
10365 /* Make the loopback edge to the block with
10366 GIMPLE_OMP_SECTIONS_SWITCH. */
10367 make_edge (bb, switch_bb, 0);
10368
10369 /* Make the edge from the switch to exit. */
10370 make_edge (switch_bb, bb->next_bb, 0);
10371 fallthru = false;
10372 }
10373 break;
10374
10375 case GIMPLE_OMP_TASK:
10376 fallthru = true;
10377 break;
10378
10379 default:
10380 gcc_unreachable ();
10381 }
10382 break;
10383
10384 default:
10385 gcc_unreachable ();
10386 }
10387
10388 if (*region != cur_region)
10389 {
10390 *region = cur_region;
10391 if (cur_region)
10392 *region_idx = cur_region->entry->index;
10393 else
10394 *region_idx = 0;
10395 }
10396
10397 return fallthru;
10398 }