]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
poly_int: get_inner_reference & co.: fix known_eq typo/bug
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
85ec4feb 5Copyright (C) 2005-2018 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
629b3d75
MJ
56#include "gomp-constants.h"
57#include "gimple-pretty-print.h"
13293add 58#include "hsa-common.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
629b3d75
MJ
61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106};
107
108static struct omp_region *root_omp_region;
109static bool omp_any_child_fn_dumped;
110
111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113static gphi *find_phi_with_arg_on_edge (tree, edge);
114static void expand_omp (struct omp_region *region);
115
116/* Return true if REGION is a combined parallel+workshare region. */
117
118static inline bool
119is_combined_parallel (struct omp_region *region)
120{
121 return region->is_combined_parallel;
122}
123
124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
01914336 140 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166static bool
167workshare_safe_to_combine_p (basic_block ws_entry_bb)
168{
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
28567c40
JJ
176 if (gimple_omp_for_kind (ws_stmt) != GF_OMP_FOR_KIND_FOR)
177 return false;
629b3d75
MJ
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198}
199
200/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203static tree
204omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205{
28567c40 206 if (!simd_schedule || integer_zerop (chunk_size))
629b3d75
MJ
207 return chunk_size;
208
9d2f08ab
RS
209 poly_uint64 vf = omp_max_vf ();
210 if (known_eq (vf, 1U))
629b3d75
MJ
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218}
219
220/* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224static vec<tree, va_gc> *
225get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226{
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286}
287
288/* Discover whether REGION is a combined parallel+workshare region. */
289
290static void
291determine_parallel_type (struct omp_region *region)
292{
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
28567c40
JJ
314 /* Give up for task reductions on the parallel, while it is implementable,
315 adding another big set of APIs or slowing down the normal paths is
316 not acceptable. */
317 tree pclauses = gimple_omp_parallel_clauses (last_stmt (par_entry_bb));
318 if (omp_find_clause (pclauses, OMP_CLAUSE__REDUCTEMP_))
319 return;
320
629b3d75
MJ
321 if (single_succ (par_entry_bb) == ws_entry_bb
322 && single_succ (ws_exit_bb) == par_exit_bb
323 && workshare_safe_to_combine_p (ws_entry_bb)
324 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
325 || (last_and_only_stmt (ws_entry_bb)
326 && last_and_only_stmt (par_exit_bb))))
327 {
328 gimple *par_stmt = last_stmt (par_entry_bb);
329 gimple *ws_stmt = last_stmt (ws_entry_bb);
330
331 if (region->inner->type == GIMPLE_OMP_FOR)
332 {
333 /* If this is a combined parallel loop, we need to determine
334 whether or not to use the combined library calls. There
335 are two cases where we do not apply the transformation:
336 static loops and any kind of ordered loop. In the first
337 case, we already open code the loop so there is no need
338 to do anything else. In the latter case, the combined
339 parallel loop call would still need extra synchronization
340 to implement ordered semantics, so there would not be any
341 gain in using the combined call. */
342 tree clauses = gimple_omp_for_clauses (ws_stmt);
343 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
344 if (c == NULL
345 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
346 == OMP_CLAUSE_SCHEDULE_STATIC)
28567c40
JJ
347 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
348 || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
349 return;
629b3d75 350 }
28567c40
JJ
351 else if (region->inner->type == GIMPLE_OMP_SECTIONS
352 && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
353 OMP_CLAUSE__REDUCTEMP_))
354 return;
629b3d75
MJ
355
356 region->is_combined_parallel = true;
357 region->inner->is_combined_parallel = true;
358 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
359 }
360}
361
362/* Debugging dumps for parallel regions. */
363void dump_omp_region (FILE *, struct omp_region *, int);
364void debug_omp_region (struct omp_region *);
365void debug_all_omp_regions (void);
366
367/* Dump the parallel region tree rooted at REGION. */
368
369void
370dump_omp_region (FILE *file, struct omp_region *region, int indent)
371{
372 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
373 gimple_code_name[region->type]);
374
375 if (region->inner)
376 dump_omp_region (file, region->inner, indent + 4);
377
378 if (region->cont)
379 {
380 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
381 region->cont->index);
382 }
383
384 if (region->exit)
385 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
386 region->exit->index);
387 else
388 fprintf (file, "%*s[no exit marker]\n", indent, "");
389
390 if (region->next)
391 dump_omp_region (file, region->next, indent);
392}
393
394DEBUG_FUNCTION void
395debug_omp_region (struct omp_region *region)
396{
397 dump_omp_region (stderr, region, 0);
398}
399
400DEBUG_FUNCTION void
401debug_all_omp_regions (void)
402{
403 dump_omp_region (stderr, root_omp_region, 0);
404}
405
406/* Create a new parallel region starting at STMT inside region PARENT. */
407
408static struct omp_region *
409new_omp_region (basic_block bb, enum gimple_code type,
410 struct omp_region *parent)
411{
412 struct omp_region *region = XCNEW (struct omp_region);
413
414 region->outer = parent;
415 region->entry = bb;
416 region->type = type;
417
418 if (parent)
419 {
420 /* This is a nested region. Add it to the list of inner
421 regions in PARENT. */
422 region->next = parent->inner;
423 parent->inner = region;
424 }
425 else
426 {
427 /* This is a toplevel region. Add it to the list of toplevel
428 regions in ROOT_OMP_REGION. */
429 region->next = root_omp_region;
430 root_omp_region = region;
431 }
432
433 return region;
434}
435
436/* Release the memory associated with the region tree rooted at REGION. */
437
438static void
439free_omp_region_1 (struct omp_region *region)
440{
441 struct omp_region *i, *n;
442
443 for (i = region->inner; i ; i = n)
444 {
445 n = i->next;
446 free_omp_region_1 (i);
447 }
448
449 free (region);
450}
451
452/* Release the memory for the entire omp region tree. */
453
454void
455omp_free_regions (void)
456{
457 struct omp_region *r, *n;
458 for (r = root_omp_region; r ; r = n)
459 {
460 n = r->next;
461 free_omp_region_1 (r);
462 }
463 root_omp_region = NULL;
464}
465
466/* A convenience function to build an empty GIMPLE_COND with just the
467 condition. */
468
469static gcond *
470gimple_build_cond_empty (tree cond)
471{
472 enum tree_code pred_code;
473 tree lhs, rhs;
474
475 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
476 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
477}
478
479/* Return true if a parallel REGION is within a declare target function or
480 within a target region and is not a part of a gridified target. */
481
482static bool
483parallel_needs_hsa_kernel_p (struct omp_region *region)
484{
485 bool indirect = false;
486 for (region = region->outer; region; region = region->outer)
487 {
488 if (region->type == GIMPLE_OMP_PARALLEL)
489 indirect = true;
490 else if (region->type == GIMPLE_OMP_TARGET)
491 {
492 gomp_target *tgt_stmt
493 = as_a <gomp_target *> (last_stmt (region->entry));
494
495 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
496 OMP_CLAUSE__GRIDDIM_))
497 return indirect;
498 else
499 return true;
500 }
501 }
502
503 if (lookup_attribute ("omp declare target",
504 DECL_ATTRIBUTES (current_function_decl)))
505 return true;
506
507 return false;
508}
509
623c6df5
KB
510/* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
511 Add CHILD_FNDECL to decl chain of the supercontext of the block
512 ENTRY_BLOCK - this is the block which originally contained the
513 code from which CHILD_FNDECL was created.
514
515 Together, these actions ensure that the debug info for the outlined
516 function will be emitted with the correct lexical scope. */
517
518static void
4ccc4e30
JJ
519adjust_context_and_scope (struct omp_region *region, tree entry_block,
520 tree child_fndecl)
623c6df5 521{
4ccc4e30
JJ
522 tree parent_fndecl = NULL_TREE;
523 gimple *entry_stmt;
524 /* OMP expansion expands inner regions before outer ones, so if
525 we e.g. have explicit task region nested in parallel region, when
526 expanding the task region current_function_decl will be the original
527 source function, but we actually want to use as context the child
528 function of the parallel. */
529 for (region = region->outer;
530 region && parent_fndecl == NULL_TREE; region = region->outer)
531 switch (region->type)
532 {
533 case GIMPLE_OMP_PARALLEL:
534 case GIMPLE_OMP_TASK:
535 case GIMPLE_OMP_TEAMS:
536 entry_stmt = last_stmt (region->entry);
537 parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
538 break;
539 case GIMPLE_OMP_TARGET:
540 entry_stmt = last_stmt (region->entry);
541 parent_fndecl
542 = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
543 break;
544 default:
545 break;
546 }
547
548 if (parent_fndecl == NULL_TREE)
549 parent_fndecl = current_function_decl;
550 DECL_CONTEXT (child_fndecl) = parent_fndecl;
551
623c6df5
KB
552 if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
553 {
554 tree b = BLOCK_SUPERCONTEXT (entry_block);
623c6df5
KB
555 if (TREE_CODE (b) == BLOCK)
556 {
623c6df5
KB
557 DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
558 BLOCK_VARS (b) = child_fndecl;
559 }
560 }
561}
562
28567c40 563/* Build the function calls to GOMP_parallel etc to actually
629b3d75
MJ
564 generate the parallel operation. REGION is the parallel region
565 being expanded. BB is the block where to insert the code. WS_ARGS
566 will be set if this is a call to a combined parallel+workshare
567 construct, it contains the list of additional arguments needed by
568 the workshare construct. */
569
570static void
571expand_parallel_call (struct omp_region *region, basic_block bb,
572 gomp_parallel *entry_stmt,
573 vec<tree, va_gc> *ws_args)
574{
575 tree t, t1, t2, val, cond, c, clauses, flags;
576 gimple_stmt_iterator gsi;
577 gimple *stmt;
578 enum built_in_function start_ix;
579 int start_ix2;
580 location_t clause_loc;
581 vec<tree, va_gc> *args;
582
583 clauses = gimple_omp_parallel_clauses (entry_stmt);
584
585 /* Determine what flavor of GOMP_parallel we will be
586 emitting. */
587 start_ix = BUILT_IN_GOMP_PARALLEL;
28567c40
JJ
588 tree rtmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
589 if (rtmp)
590 start_ix = BUILT_IN_GOMP_PARALLEL_REDUCTIONS;
591 else if (is_combined_parallel (region))
629b3d75
MJ
592 {
593 switch (region->inner->type)
594 {
595 case GIMPLE_OMP_FOR:
596 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
597 switch (region->inner->sched_kind)
598 {
599 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
600 if ((region->inner->sched_modifiers
601 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
602 start_ix2 = 6;
603 else if ((region->inner->sched_modifiers
604 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
605 start_ix2 = 7;
606 else
607 start_ix2 = 3;
629b3d75
MJ
608 break;
609 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
610 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40
JJ
611 if ((region->inner->sched_modifiers
612 & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
629b3d75
MJ
613 {
614 start_ix2 = 3 + region->inner->sched_kind;
615 break;
616 }
617 /* FALLTHRU */
618 default:
619 start_ix2 = region->inner->sched_kind;
620 break;
621 }
622 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
623 start_ix = (enum built_in_function) start_ix2;
624 break;
625 case GIMPLE_OMP_SECTIONS:
626 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
627 break;
628 default:
629 gcc_unreachable ();
630 }
631 }
632
633 /* By default, the value of NUM_THREADS is zero (selected at run time)
634 and there is no conditional. */
635 cond = NULL_TREE;
636 val = build_int_cst (unsigned_type_node, 0);
637 flags = build_int_cst (unsigned_type_node, 0);
638
639 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
640 if (c)
641 cond = OMP_CLAUSE_IF_EXPR (c);
642
643 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
644 if (c)
645 {
646 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
647 clause_loc = OMP_CLAUSE_LOCATION (c);
648 }
649 else
650 clause_loc = gimple_location (entry_stmt);
651
652 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
653 if (c)
654 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
655
656 /* Ensure 'val' is of the correct type. */
657 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
658
659 /* If we found the clause 'if (cond)', build either
660 (cond != 0) or (cond ? val : 1u). */
661 if (cond)
662 {
663 cond = gimple_boolify (cond);
664
665 if (integer_zerop (val))
666 val = fold_build2_loc (clause_loc,
667 EQ_EXPR, unsigned_type_node, cond,
668 build_int_cst (TREE_TYPE (cond), 0));
669 else
670 {
671 basic_block cond_bb, then_bb, else_bb;
672 edge e, e_then, e_else;
673 tree tmp_then, tmp_else, tmp_join, tmp_var;
674
675 tmp_var = create_tmp_var (TREE_TYPE (val));
676 if (gimple_in_ssa_p (cfun))
677 {
678 tmp_then = make_ssa_name (tmp_var);
679 tmp_else = make_ssa_name (tmp_var);
680 tmp_join = make_ssa_name (tmp_var);
681 }
682 else
683 {
684 tmp_then = tmp_var;
685 tmp_else = tmp_var;
686 tmp_join = tmp_var;
687 }
688
689 e = split_block_after_labels (bb);
690 cond_bb = e->src;
691 bb = e->dest;
692 remove_edge (e);
693
694 then_bb = create_empty_bb (cond_bb);
695 else_bb = create_empty_bb (then_bb);
696 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
697 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
698
699 stmt = gimple_build_cond_empty (cond);
700 gsi = gsi_start_bb (cond_bb);
701 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
702
703 gsi = gsi_start_bb (then_bb);
704 expand_omp_build_assign (&gsi, tmp_then, val, true);
705
706 gsi = gsi_start_bb (else_bb);
707 expand_omp_build_assign (&gsi, tmp_else,
708 build_int_cst (unsigned_type_node, 1),
709 true);
710
711 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
712 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
713 add_bb_to_loop (then_bb, cond_bb->loop_father);
714 add_bb_to_loop (else_bb, cond_bb->loop_father);
715 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
716 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
717
718 if (gimple_in_ssa_p (cfun))
719 {
720 gphi *phi = create_phi_node (tmp_join, bb);
721 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
722 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
723 }
724
725 val = tmp_join;
726 }
727
728 gsi = gsi_start_bb (bb);
729 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
730 false, GSI_CONTINUE_LINKING);
731 }
732
65f4b875 733 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
734 t = gimple_omp_parallel_data_arg (entry_stmt);
735 if (t == NULL)
736 t1 = null_pointer_node;
737 else
738 t1 = build_fold_addr_expr (t);
739 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
740 t2 = build_fold_addr_expr (child_fndecl);
741
742 vec_alloc (args, 4 + vec_safe_length (ws_args));
743 args->quick_push (t2);
744 args->quick_push (t1);
745 args->quick_push (val);
746 if (ws_args)
747 args->splice (*ws_args);
748 args->quick_push (flags);
749
750 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
751 builtin_decl_explicit (start_ix), args);
752
28567c40
JJ
753 if (rtmp)
754 {
755 tree type = TREE_TYPE (OMP_CLAUSE_DECL (rtmp));
756 t = build2 (MODIFY_EXPR, type, OMP_CLAUSE_DECL (rtmp),
757 fold_convert (type,
758 fold_convert (pointer_sized_int_node, t)));
759 }
629b3d75
MJ
760 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
761 false, GSI_CONTINUE_LINKING);
762
763 if (hsa_gen_requested_p ()
764 && parallel_needs_hsa_kernel_p (region))
765 {
766 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
767 hsa_register_kernel (child_cnode);
768 }
769}
770
629b3d75
MJ
771/* Build the function call to GOMP_task to actually
772 generate the task operation. BB is the block where to insert the code. */
773
774static void
775expand_task_call (struct omp_region *region, basic_block bb,
776 gomp_task *entry_stmt)
777{
778 tree t1, t2, t3;
779 gimple_stmt_iterator gsi;
780 location_t loc = gimple_location (entry_stmt);
781
782 tree clauses = gimple_omp_task_clauses (entry_stmt);
783
784 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
785 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
786 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
787 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
788 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
789 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
790
791 unsigned int iflags
792 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
793 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
794 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
795
796 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
797 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
798 tree num_tasks = NULL_TREE;
799 bool ull = false;
800 if (taskloop_p)
801 {
802 gimple *g = last_stmt (region->outer->entry);
803 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
804 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
805 struct omp_for_data fd;
806 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
807 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
808 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
809 OMP_CLAUSE__LOOPTEMP_);
810 startvar = OMP_CLAUSE_DECL (startvar);
811 endvar = OMP_CLAUSE_DECL (endvar);
812 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
813 if (fd.loop.cond_code == LT_EXPR)
814 iflags |= GOMP_TASK_FLAG_UP;
815 tree tclauses = gimple_omp_for_clauses (g);
816 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
817 if (num_tasks)
818 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
819 else
820 {
821 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
822 if (num_tasks)
823 {
824 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
825 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
826 }
827 else
828 num_tasks = integer_zero_node;
829 }
830 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
831 if (ifc == NULL_TREE)
832 iflags |= GOMP_TASK_FLAG_IF;
833 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
834 iflags |= GOMP_TASK_FLAG_NOGROUP;
835 ull = fd.iter_type == long_long_unsigned_type_node;
28567c40
JJ
836 if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
837 iflags |= GOMP_TASK_FLAG_REDUCTION;
629b3d75
MJ
838 }
839 else if (priority)
840 iflags |= GOMP_TASK_FLAG_PRIORITY;
841
842 tree flags = build_int_cst (unsigned_type_node, iflags);
843
844 tree cond = boolean_true_node;
845 if (ifc)
846 {
847 if (taskloop_p)
848 {
849 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
850 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
851 build_int_cst (unsigned_type_node,
852 GOMP_TASK_FLAG_IF),
853 build_int_cst (unsigned_type_node, 0));
854 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
855 flags, t);
856 }
857 else
858 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
859 }
860
861 if (finalc)
862 {
863 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
864 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
865 build_int_cst (unsigned_type_node,
866 GOMP_TASK_FLAG_FINAL),
867 build_int_cst (unsigned_type_node, 0));
868 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
869 }
870 if (depend)
871 depend = OMP_CLAUSE_DECL (depend);
872 else
873 depend = build_int_cst (ptr_type_node, 0);
874 if (priority)
875 priority = fold_convert (integer_type_node,
876 OMP_CLAUSE_PRIORITY_EXPR (priority));
877 else
878 priority = integer_zero_node;
879
65f4b875 880 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
881 tree t = gimple_omp_task_data_arg (entry_stmt);
882 if (t == NULL)
883 t2 = null_pointer_node;
884 else
885 t2 = build_fold_addr_expr_loc (loc, t);
886 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
887 t = gimple_omp_task_copy_fn (entry_stmt);
888 if (t == NULL)
889 t3 = null_pointer_node;
890 else
891 t3 = build_fold_addr_expr_loc (loc, t);
892
893 if (taskloop_p)
894 t = build_call_expr (ull
895 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
896 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
897 11, t1, t2, t3,
898 gimple_omp_task_arg_size (entry_stmt),
899 gimple_omp_task_arg_align (entry_stmt), flags,
900 num_tasks, priority, startvar, endvar, step);
901 else
902 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
903 9, t1, t2, t3,
904 gimple_omp_task_arg_size (entry_stmt),
905 gimple_omp_task_arg_align (entry_stmt), cond, flags,
906 depend, priority);
907
908 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
909 false, GSI_CONTINUE_LINKING);
910}
911
28567c40
JJ
912/* Build the function call to GOMP_taskwait_depend to actually
913 generate the taskwait operation. BB is the block where to insert the
914 code. */
915
916static void
917expand_taskwait_call (basic_block bb, gomp_task *entry_stmt)
918{
919 tree clauses = gimple_omp_task_clauses (entry_stmt);
920 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
921 if (depend == NULL_TREE)
922 return;
923
924 depend = OMP_CLAUSE_DECL (depend);
925
926 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
927 tree t
928 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASKWAIT_DEPEND),
929 1, depend);
930
931 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
932 false, GSI_CONTINUE_LINKING);
933}
934
935/* Build the function call to GOMP_teams_reg to actually
936 generate the host teams operation. REGION is the teams region
937 being expanded. BB is the block where to insert the code. */
938
939static void
940expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
941{
942 tree clauses = gimple_omp_teams_clauses (entry_stmt);
943 tree num_teams = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
944 if (num_teams == NULL_TREE)
945 num_teams = build_int_cst (unsigned_type_node, 0);
946 else
947 {
948 num_teams = OMP_CLAUSE_NUM_TEAMS_EXPR (num_teams);
949 num_teams = fold_convert (unsigned_type_node, num_teams);
950 }
951 tree thread_limit = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
952 if (thread_limit == NULL_TREE)
953 thread_limit = build_int_cst (unsigned_type_node, 0);
954 else
955 {
956 thread_limit = OMP_CLAUSE_THREAD_LIMIT_EXPR (thread_limit);
957 thread_limit = fold_convert (unsigned_type_node, thread_limit);
958 }
959
960 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (bb);
961 tree t = gimple_omp_teams_data_arg (entry_stmt), t1;
962 if (t == NULL)
963 t1 = null_pointer_node;
964 else
965 t1 = build_fold_addr_expr (t);
966 tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
967 tree t2 = build_fold_addr_expr (child_fndecl);
968
28567c40
JJ
969 vec<tree, va_gc> *args;
970 vec_alloc (args, 5);
971 args->quick_push (t2);
972 args->quick_push (t1);
973 args->quick_push (num_teams);
974 args->quick_push (thread_limit);
975 /* For future extensibility. */
976 args->quick_push (build_zero_cst (unsigned_type_node));
977
978 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
979 builtin_decl_explicit (BUILT_IN_GOMP_TEAMS_REG),
980 args);
981
982 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
983 false, GSI_CONTINUE_LINKING);
984}
985
629b3d75
MJ
986/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
987
988static tree
989vec2chain (vec<tree, va_gc> *v)
990{
991 tree chain = NULL_TREE, t;
992 unsigned ix;
993
994 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
995 {
996 DECL_CHAIN (t) = chain;
997 chain = t;
998 }
999
1000 return chain;
1001}
1002
1003/* Remove barriers in REGION->EXIT's block. Note that this is only
1004 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
1005 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
1006 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
1007 removed. */
1008
1009static void
1010remove_exit_barrier (struct omp_region *region)
1011{
1012 gimple_stmt_iterator gsi;
1013 basic_block exit_bb;
1014 edge_iterator ei;
1015 edge e;
1016 gimple *stmt;
1017 int any_addressable_vars = -1;
1018
1019 exit_bb = region->exit;
1020
1021 /* If the parallel region doesn't return, we don't have REGION->EXIT
1022 block at all. */
1023 if (! exit_bb)
1024 return;
1025
1026 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
1027 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
1028 statements that can appear in between are extremely limited -- no
1029 memory operations at all. Here, we allow nothing at all, so the
1030 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
65f4b875 1031 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75 1032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
65f4b875 1033 gsi_prev_nondebug (&gsi);
629b3d75
MJ
1034 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
1035 return;
1036
1037 FOR_EACH_EDGE (e, ei, exit_bb->preds)
1038 {
65f4b875 1039 gsi = gsi_last_nondebug_bb (e->src);
629b3d75
MJ
1040 if (gsi_end_p (gsi))
1041 continue;
1042 stmt = gsi_stmt (gsi);
1043 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
1044 && !gimple_omp_return_nowait_p (stmt))
1045 {
1046 /* OpenMP 3.0 tasks unfortunately prevent this optimization
1047 in many cases. If there could be tasks queued, the barrier
1048 might be needed to let the tasks run before some local
1049 variable of the parallel that the task uses as shared
1050 runs out of scope. The task can be spawned either
1051 from within current function (this would be easy to check)
1052 or from some function it calls and gets passed an address
1053 of such a variable. */
1054 if (any_addressable_vars < 0)
1055 {
1056 gomp_parallel *parallel_stmt
1057 = as_a <gomp_parallel *> (last_stmt (region->entry));
1058 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
1059 tree local_decls, block, decl;
1060 unsigned ix;
1061
1062 any_addressable_vars = 0;
1063 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
1064 if (TREE_ADDRESSABLE (decl))
1065 {
1066 any_addressable_vars = 1;
1067 break;
1068 }
1069 for (block = gimple_block (stmt);
1070 !any_addressable_vars
1071 && block
1072 && TREE_CODE (block) == BLOCK;
1073 block = BLOCK_SUPERCONTEXT (block))
1074 {
1075 for (local_decls = BLOCK_VARS (block);
1076 local_decls;
1077 local_decls = DECL_CHAIN (local_decls))
1078 if (TREE_ADDRESSABLE (local_decls))
1079 {
1080 any_addressable_vars = 1;
1081 break;
1082 }
1083 if (block == gimple_block (parallel_stmt))
1084 break;
1085 }
1086 }
1087 if (!any_addressable_vars)
1088 gimple_omp_return_set_nowait (stmt);
1089 }
1090 }
1091}
1092
1093static void
1094remove_exit_barriers (struct omp_region *region)
1095{
1096 if (region->type == GIMPLE_OMP_PARALLEL)
1097 remove_exit_barrier (region);
1098
1099 if (region->inner)
1100 {
1101 region = region->inner;
1102 remove_exit_barriers (region);
1103 while (region->next)
1104 {
1105 region = region->next;
1106 remove_exit_barriers (region);
1107 }
1108 }
1109}
1110
1111/* Optimize omp_get_thread_num () and omp_get_num_threads ()
1112 calls. These can't be declared as const functions, but
1113 within one parallel body they are constant, so they can be
1114 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1115 which are declared const. Similarly for task body, except
1116 that in untied task omp_get_thread_num () can change at any task
1117 scheduling point. */
1118
1119static void
1120optimize_omp_library_calls (gimple *entry_stmt)
1121{
1122 basic_block bb;
1123 gimple_stmt_iterator gsi;
1124 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1125 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1126 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1127 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1128 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1129 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1130 OMP_CLAUSE_UNTIED) != NULL);
1131
1132 FOR_EACH_BB_FN (bb, cfun)
1133 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1134 {
1135 gimple *call = gsi_stmt (gsi);
1136 tree decl;
1137
1138 if (is_gimple_call (call)
1139 && (decl = gimple_call_fndecl (call))
1140 && DECL_EXTERNAL (decl)
1141 && TREE_PUBLIC (decl)
1142 && DECL_INITIAL (decl) == NULL)
1143 {
1144 tree built_in;
1145
1146 if (DECL_NAME (decl) == thr_num_id)
1147 {
1148 /* In #pragma omp task untied omp_get_thread_num () can change
1149 during the execution of the task region. */
1150 if (untied_task)
1151 continue;
1152 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1153 }
1154 else if (DECL_NAME (decl) == num_thr_id)
1155 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1156 else
1157 continue;
1158
1159 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1160 || gimple_call_num_args (call) != 0)
1161 continue;
1162
1163 if (flag_exceptions && !TREE_NOTHROW (decl))
1164 continue;
1165
1166 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1167 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1168 TREE_TYPE (TREE_TYPE (built_in))))
1169 continue;
1170
1171 gimple_call_set_fndecl (call, built_in);
1172 }
1173 }
1174}
1175
1176/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1177 regimplified. */
1178
1179static tree
1180expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1181{
1182 tree t = *tp;
1183
1184 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1185 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1186 return t;
1187
1188 if (TREE_CODE (t) == ADDR_EXPR)
1189 recompute_tree_invariant_for_addr_expr (t);
1190
1191 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1192 return NULL_TREE;
1193}
1194
1195/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1196
1197static void
1198expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1199 bool after)
1200{
1201 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1202 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1203 !after, after ? GSI_CONTINUE_LINKING
1204 : GSI_SAME_STMT);
1205 gimple *stmt = gimple_build_assign (to, from);
1206 if (after)
1207 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1208 else
1209 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1210 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1211 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1212 {
1213 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1214 gimple_regimplify_operands (stmt, &gsi);
1215 }
1216}
1217
1218/* Expand the OpenMP parallel or task directive starting at REGION. */
1219
1220static void
1221expand_omp_taskreg (struct omp_region *region)
1222{
1223 basic_block entry_bb, exit_bb, new_bb;
1224 struct function *child_cfun;
1225 tree child_fn, block, t;
1226 gimple_stmt_iterator gsi;
1227 gimple *entry_stmt, *stmt;
1228 edge e;
1229 vec<tree, va_gc> *ws_args;
1230
1231 entry_stmt = last_stmt (region->entry);
28567c40
JJ
1232 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1233 && gimple_omp_task_taskwait_p (entry_stmt))
1234 {
1235 new_bb = region->entry;
1236 gsi = gsi_last_nondebug_bb (region->entry);
1237 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1238 gsi_remove (&gsi, true);
1239 expand_taskwait_call (new_bb, as_a <gomp_task *> (entry_stmt));
1240 return;
1241 }
1242
629b3d75
MJ
1243 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1244 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1245
1246 entry_bb = region->entry;
1247 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1248 exit_bb = region->cont;
1249 else
1250 exit_bb = region->exit;
1251
5e9d6aa4 1252 if (is_combined_parallel (region))
629b3d75
MJ
1253 ws_args = region->ws_args;
1254 else
1255 ws_args = NULL;
1256
1257 if (child_cfun->cfg)
1258 {
1259 /* Due to inlining, it may happen that we have already outlined
1260 the region, in which case all we need to do is make the
1261 sub-graph unreachable and emit the parallel call. */
1262 edge entry_succ_e, exit_succ_e;
1263
1264 entry_succ_e = single_succ_edge (entry_bb);
1265
65f4b875 1266 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75 1267 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1268 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK
1269 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TEAMS);
629b3d75
MJ
1270 gsi_remove (&gsi, true);
1271
1272 new_bb = entry_bb;
1273 if (exit_bb)
1274 {
1275 exit_succ_e = single_succ_edge (exit_bb);
1276 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1277 }
1278 remove_edge_and_dominated_blocks (entry_succ_e);
1279 }
1280 else
1281 {
1282 unsigned srcidx, dstidx, num;
1283
1284 /* If the parallel region needs data sent from the parent
1285 function, then the very first statement (except possible
1286 tree profile counter updates) of the parallel body
1287 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1288 &.OMP_DATA_O is passed as an argument to the child function,
1289 we need to replace it with the argument as seen by the child
1290 function.
1291
1292 In most cases, this will end up being the identity assignment
1293 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1294 a function call that has been inlined, the original PARM_DECL
1295 .OMP_DATA_I may have been converted into a different local
1296 variable. In which case, we need to keep the assignment. */
1297 if (gimple_omp_taskreg_data_arg (entry_stmt))
1298 {
1299 basic_block entry_succ_bb
1300 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1301 : FALLTHRU_EDGE (entry_bb)->dest;
1302 tree arg;
1303 gimple *parcopy_stmt = NULL;
1304
1305 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1306 {
1307 gimple *stmt;
1308
1309 gcc_assert (!gsi_end_p (gsi));
1310 stmt = gsi_stmt (gsi);
1311 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1312 continue;
1313
1314 if (gimple_num_ops (stmt) == 2)
1315 {
1316 tree arg = gimple_assign_rhs1 (stmt);
1317
1318 /* We're ignore the subcode because we're
1319 effectively doing a STRIP_NOPS. */
1320
1321 if (TREE_CODE (arg) == ADDR_EXPR
28567c40
JJ
1322 && (TREE_OPERAND (arg, 0)
1323 == gimple_omp_taskreg_data_arg (entry_stmt)))
629b3d75
MJ
1324 {
1325 parcopy_stmt = stmt;
1326 break;
1327 }
1328 }
1329 }
1330
1331 gcc_assert (parcopy_stmt != NULL);
1332 arg = DECL_ARGUMENTS (child_fn);
1333
1334 if (!gimple_in_ssa_p (cfun))
1335 {
1336 if (gimple_assign_lhs (parcopy_stmt) == arg)
1337 gsi_remove (&gsi, true);
1338 else
1339 {
01914336 1340 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1341 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1342 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1343 }
1344 }
1345 else
1346 {
1347 tree lhs = gimple_assign_lhs (parcopy_stmt);
1348 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1349 /* We'd like to set the rhs to the default def in the child_fn,
1350 but it's too early to create ssa names in the child_fn.
1351 Instead, we set the rhs to the parm. In
1352 move_sese_region_to_fn, we introduce a default def for the
1353 parm, map the parm to it's default def, and once we encounter
1354 this stmt, replace the parm with the default def. */
1355 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1356 update_stmt (parcopy_stmt);
1357 }
1358 }
1359
1360 /* Declare local variables needed in CHILD_CFUN. */
1361 block = DECL_INITIAL (child_fn);
1362 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1363 /* The gimplifier could record temporaries in parallel/task block
1364 rather than in containing function's local_decls chain,
1365 which would mean cgraph missed finalizing them. Do it now. */
1366 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1367 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1368 varpool_node::finalize_decl (t);
1369 DECL_SAVED_TREE (child_fn) = NULL;
1370 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1371 gimple_set_body (child_fn, NULL);
1372 TREE_USED (block) = 1;
1373
1374 /* Reset DECL_CONTEXT on function arguments. */
1375 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1376 DECL_CONTEXT (t) = child_fn;
1377
1378 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1379 so that it can be moved to the child function. */
65f4b875 1380 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1381 stmt = gsi_stmt (gsi);
1382 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
28567c40
JJ
1383 || gimple_code (stmt) == GIMPLE_OMP_TASK
1384 || gimple_code (stmt) == GIMPLE_OMP_TEAMS));
629b3d75
MJ
1385 e = split_block (entry_bb, stmt);
1386 gsi_remove (&gsi, true);
1387 entry_bb = e->dest;
1388 edge e2 = NULL;
28567c40 1389 if (gimple_code (entry_stmt) != GIMPLE_OMP_TASK)
629b3d75
MJ
1390 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1391 else
1392 {
1393 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1394 gcc_assert (e2->dest == region->exit);
1395 remove_edge (BRANCH_EDGE (entry_bb));
1396 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
65f4b875 1397 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
1398 gcc_assert (!gsi_end_p (gsi)
1399 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1400 gsi_remove (&gsi, true);
1401 }
1402
1403 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1404 if (exit_bb)
1405 {
65f4b875 1406 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
1407 gcc_assert (!gsi_end_p (gsi)
1408 && (gimple_code (gsi_stmt (gsi))
1409 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1410 stmt = gimple_build_return (NULL);
1411 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1412 gsi_remove (&gsi, true);
1413 }
1414
1415 /* Move the parallel region into CHILD_CFUN. */
1416
1417 if (gimple_in_ssa_p (cfun))
1418 {
1419 init_tree_ssa (child_cfun);
1420 init_ssa_operands (child_cfun);
1421 child_cfun->gimple_df->in_ssa_p = true;
1422 block = NULL_TREE;
1423 }
1424 else
1425 block = gimple_block (entry_stmt);
1426
1427 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1428 if (exit_bb)
1429 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1430 if (e2)
1431 {
1432 basic_block dest_bb = e2->dest;
1433 if (!exit_bb)
1434 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1435 remove_edge (e2);
1436 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1437 }
1438 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1439 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1440 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1441 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1442
1443 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1444 num = vec_safe_length (child_cfun->local_decls);
1445 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1446 {
1447 t = (*child_cfun->local_decls)[srcidx];
1448 if (DECL_CONTEXT (t) == cfun->decl)
1449 continue;
1450 if (srcidx != dstidx)
1451 (*child_cfun->local_decls)[dstidx] = t;
1452 dstidx++;
1453 }
1454 if (dstidx != num)
1455 vec_safe_truncate (child_cfun->local_decls, dstidx);
1456
1457 /* Inform the callgraph about the new function. */
1458 child_cfun->curr_properties = cfun->curr_properties;
1459 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1460 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1461 cgraph_node *node = cgraph_node::get_create (child_fn);
1462 node->parallelized_function = 1;
1463 cgraph_node::add_new_function (child_fn, true);
1464
1465 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1466 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1467
1468 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1469 fixed in a following pass. */
1470 push_cfun (child_cfun);
1471 if (need_asm)
9579db35 1472 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1473
1474 if (optimize)
1475 optimize_omp_library_calls (entry_stmt);
fc06ae0d 1476 update_max_bb_count ();
629b3d75
MJ
1477 cgraph_edge::rebuild_edges ();
1478
1479 /* Some EH regions might become dead, see PR34608. If
1480 pass_cleanup_cfg isn't the first pass to happen with the
1481 new child, these dead EH edges might cause problems.
1482 Clean them up now. */
1483 if (flag_exceptions)
1484 {
1485 basic_block bb;
1486 bool changed = false;
1487
1488 FOR_EACH_BB_FN (bb, cfun)
1489 changed |= gimple_purge_dead_eh_edges (bb);
1490 if (changed)
1491 cleanup_tree_cfg ();
1492 }
1493 if (gimple_in_ssa_p (cfun))
1494 update_ssa (TODO_update_ssa);
1495 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1496 verify_loop_structure ();
1497 pop_cfun ();
1498
1499 if (dump_file && !gimple_in_ssa_p (cfun))
1500 {
1501 omp_any_child_fn_dumped = true;
1502 dump_function_header (dump_file, child_fn, dump_flags);
1503 dump_function_to_file (child_fn, dump_file, dump_flags);
1504 }
1505 }
1506
4ccc4e30
JJ
1507 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
1508
5e9d6aa4 1509 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
629b3d75
MJ
1510 expand_parallel_call (region, new_bb,
1511 as_a <gomp_parallel *> (entry_stmt), ws_args);
28567c40
JJ
1512 else if (gimple_code (entry_stmt) == GIMPLE_OMP_TEAMS)
1513 expand_teams_call (new_bb, as_a <gomp_teams *> (entry_stmt));
629b3d75
MJ
1514 else
1515 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1516 if (gimple_in_ssa_p (cfun))
1517 update_ssa (TODO_update_ssa_only_virtuals);
1518}
1519
1520/* Information about members of an OpenACC collapsed loop nest. */
1521
1522struct oacc_collapse
1523{
01914336 1524 tree base; /* Base value. */
629b3d75 1525 tree iters; /* Number of steps. */
02889d23
CLT
1526 tree step; /* Step size. */
1527 tree tile; /* Tile increment (if tiled). */
1528 tree outer; /* Tile iterator var. */
629b3d75
MJ
1529};
1530
1531/* Helper for expand_oacc_for. Determine collapsed loop information.
1532 Fill in COUNTS array. Emit any initialization code before GSI.
1533 Return the calculated outer loop bound of BOUND_TYPE. */
1534
1535static tree
1536expand_oacc_collapse_init (const struct omp_for_data *fd,
1537 gimple_stmt_iterator *gsi,
02889d23
CLT
1538 oacc_collapse *counts, tree bound_type,
1539 location_t loc)
629b3d75 1540{
02889d23 1541 tree tiling = fd->tiling;
629b3d75
MJ
1542 tree total = build_int_cst (bound_type, 1);
1543 int ix;
1544
1545 gcc_assert (integer_onep (fd->loop.step));
1546 gcc_assert (integer_zerop (fd->loop.n1));
1547
02889d23
CLT
1548 /* When tiling, the first operand of the tile clause applies to the
1549 innermost loop, and we work outwards from there. Seems
1550 backwards, but whatever. */
1551 for (ix = fd->collapse; ix--;)
629b3d75
MJ
1552 {
1553 const omp_for_data_loop *loop = &fd->loops[ix];
1554
1555 tree iter_type = TREE_TYPE (loop->v);
1556 tree diff_type = iter_type;
1557 tree plus_type = iter_type;
1558
1559 gcc_assert (loop->cond_code == fd->loop.cond_code);
1560
1561 if (POINTER_TYPE_P (iter_type))
1562 plus_type = sizetype;
1563 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1564 diff_type = signed_type_for (diff_type);
c31bc4ac
TV
1565 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
1566 diff_type = integer_type_node;
629b3d75 1567
02889d23
CLT
1568 if (tiling)
1569 {
1570 tree num = build_int_cst (integer_type_node, fd->collapse);
1571 tree loop_no = build_int_cst (integer_type_node, ix);
1572 tree tile = TREE_VALUE (tiling);
1573 gcall *call
1574 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1575 /* gwv-outer=*/integer_zero_node,
1576 /* gwv-inner=*/integer_zero_node);
1577
1578 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1579 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1580 gimple_call_set_lhs (call, counts[ix].tile);
1581 gimple_set_location (call, loc);
1582 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1583
1584 tiling = TREE_CHAIN (tiling);
1585 }
1586 else
1587 {
1588 counts[ix].tile = NULL;
1589 counts[ix].outer = loop->v;
1590 }
1591
629b3d75
MJ
1592 tree b = loop->n1;
1593 tree e = loop->n2;
1594 tree s = loop->step;
1595 bool up = loop->cond_code == LT_EXPR;
1596 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1597 bool negating;
1598 tree expr;
1599
1600 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1601 true, GSI_SAME_STMT);
1602 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1603 true, GSI_SAME_STMT);
1604
01914336 1605 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1606 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1607 if (negating)
1608 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1609 s = fold_convert (diff_type, s);
1610 if (negating)
1611 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1612 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1613 true, GSI_SAME_STMT);
1614
01914336 1615 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1616 negating = !up && TYPE_UNSIGNED (iter_type);
1617 expr = fold_build2 (MINUS_EXPR, plus_type,
1618 fold_convert (plus_type, negating ? b : e),
1619 fold_convert (plus_type, negating ? e : b));
1620 expr = fold_convert (diff_type, expr);
1621 if (negating)
1622 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1623 tree range = force_gimple_operand_gsi
1624 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1625
1626 /* Determine number of iterations. */
1627 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1628 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1629 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1630
1631 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1633
1634 counts[ix].base = b;
1635 counts[ix].iters = iters;
1636 counts[ix].step = s;
1637
1638 total = fold_build2 (MULT_EXPR, bound_type, total,
1639 fold_convert (bound_type, iters));
1640 }
1641
1642 return total;
1643}
1644
02889d23
CLT
1645/* Emit initializers for collapsed loop members. INNER is true if
1646 this is for the element loop of a TILE. IVAR is the outer
629b3d75
MJ
1647 loop iteration variable, from which collapsed loop iteration values
1648 are calculated. COUNTS array has been initialized by
1649 expand_oacc_collapse_inits. */
1650
1651static void
02889d23 1652expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
629b3d75
MJ
1653 gimple_stmt_iterator *gsi,
1654 const oacc_collapse *counts, tree ivar)
1655{
1656 tree ivar_type = TREE_TYPE (ivar);
1657
1658 /* The most rapidly changing iteration variable is the innermost
1659 one. */
1660 for (int ix = fd->collapse; ix--;)
1661 {
1662 const omp_for_data_loop *loop = &fd->loops[ix];
1663 const oacc_collapse *collapse = &counts[ix];
02889d23
CLT
1664 tree v = inner ? loop->v : collapse->outer;
1665 tree iter_type = TREE_TYPE (v);
629b3d75
MJ
1666 tree diff_type = TREE_TYPE (collapse->step);
1667 tree plus_type = iter_type;
1668 enum tree_code plus_code = PLUS_EXPR;
1669 tree expr;
1670
1671 if (POINTER_TYPE_P (iter_type))
1672 {
1673 plus_code = POINTER_PLUS_EXPR;
1674 plus_type = sizetype;
1675 }
1676
02889d23
CLT
1677 expr = ivar;
1678 if (ix)
1679 {
1680 tree mod = fold_convert (ivar_type, collapse->iters);
1681 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1682 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1683 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1684 true, GSI_SAME_STMT);
1685 }
1686
629b3d75
MJ
1687 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1688 collapse->step);
02889d23
CLT
1689 expr = fold_build2 (plus_code, iter_type,
1690 inner ? collapse->outer : collapse->base,
629b3d75
MJ
1691 fold_convert (plus_type, expr));
1692 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1693 true, GSI_SAME_STMT);
02889d23 1694 gassign *ass = gimple_build_assign (v, expr);
629b3d75 1695 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
629b3d75
MJ
1696 }
1697}
1698
1699/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1700 of the combined collapse > 1 loop constructs, generate code like:
1701 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1702 if (cond3 is <)
1703 adj = STEP3 - 1;
1704 else
1705 adj = STEP3 + 1;
1706 count3 = (adj + N32 - N31) / STEP3;
1707 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1708 if (cond2 is <)
1709 adj = STEP2 - 1;
1710 else
1711 adj = STEP2 + 1;
1712 count2 = (adj + N22 - N21) / STEP2;
1713 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1714 if (cond1 is <)
1715 adj = STEP1 - 1;
1716 else
1717 adj = STEP1 + 1;
1718 count1 = (adj + N12 - N11) / STEP1;
1719 count = count1 * count2 * count3;
1720 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1721 count = 0;
1722 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1723 of the combined loop constructs, just initialize COUNTS array
1724 from the _looptemp_ clauses. */
1725
1726/* NOTE: It *could* be better to moosh all of the BBs together,
1727 creating one larger BB with all the computation and the unexpected
1728 jump at the end. I.e.
1729
1730 bool zero3, zero2, zero1, zero;
1731
1732 zero3 = N32 c3 N31;
1733 count3 = (N32 - N31) /[cl] STEP3;
1734 zero2 = N22 c2 N21;
1735 count2 = (N22 - N21) /[cl] STEP2;
1736 zero1 = N12 c1 N11;
1737 count1 = (N12 - N11) /[cl] STEP1;
1738 zero = zero3 || zero2 || zero1;
1739 count = count1 * count2 * count3;
1740 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1741
1742 After all, we expect the zero=false, and thus we expect to have to
1743 evaluate all of the comparison expressions, so short-circuiting
1744 oughtn't be a win. Since the condition isn't protecting a
1745 denominator, we're not concerned about divide-by-zero, so we can
1746 fully evaluate count even if a numerator turned out to be wrong.
1747
1748 It seems like putting this all together would create much better
1749 scheduling opportunities, and less pressure on the chip's branch
1750 predictor. */
1751
1752static void
1753expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1754 basic_block &entry_bb, tree *counts,
1755 basic_block &zero_iter1_bb, int &first_zero_iter1,
1756 basic_block &zero_iter2_bb, int &first_zero_iter2,
1757 basic_block &l2_dom_bb)
1758{
1759 tree t, type = TREE_TYPE (fd->loop.v);
1760 edge e, ne;
1761 int i;
1762
1763 /* Collapsed loops need work for expansion into SSA form. */
1764 gcc_assert (!gimple_in_ssa_p (cfun));
1765
1766 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1767 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1768 {
1769 gcc_assert (fd->ordered == 0);
1770 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1771 isn't supposed to be handled, as the inner loop doesn't
1772 use it. */
1773 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1774 OMP_CLAUSE__LOOPTEMP_);
1775 gcc_assert (innerc);
1776 for (i = 0; i < fd->collapse; i++)
1777 {
1778 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1779 OMP_CLAUSE__LOOPTEMP_);
1780 gcc_assert (innerc);
1781 if (i)
1782 counts[i] = OMP_CLAUSE_DECL (innerc);
1783 else
1784 counts[0] = NULL_TREE;
1785 }
1786 return;
1787 }
1788
1789 for (i = fd->collapse; i < fd->ordered; i++)
1790 {
1791 tree itype = TREE_TYPE (fd->loops[i].v);
1792 counts[i] = NULL_TREE;
1793 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1794 fold_convert (itype, fd->loops[i].n1),
1795 fold_convert (itype, fd->loops[i].n2));
1796 if (t && integer_zerop (t))
1797 {
1798 for (i = fd->collapse; i < fd->ordered; i++)
1799 counts[i] = build_int_cst (type, 0);
1800 break;
1801 }
1802 }
1803 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1804 {
1805 tree itype = TREE_TYPE (fd->loops[i].v);
1806
1807 if (i >= fd->collapse && counts[i])
1808 continue;
1809 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1810 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1811 fold_convert (itype, fd->loops[i].n1),
1812 fold_convert (itype, fd->loops[i].n2)))
1813 == NULL_TREE || !integer_onep (t)))
1814 {
1815 gcond *cond_stmt;
1816 tree n1, n2;
1817 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1818 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1819 true, GSI_SAME_STMT);
1820 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1821 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1822 true, GSI_SAME_STMT);
1823 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1824 NULL_TREE, NULL_TREE);
1825 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1826 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1827 expand_omp_regimplify_p, NULL, NULL)
1828 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1829 expand_omp_regimplify_p, NULL, NULL))
1830 {
1831 *gsi = gsi_for_stmt (cond_stmt);
1832 gimple_regimplify_operands (cond_stmt, gsi);
1833 }
1834 e = split_block (entry_bb, cond_stmt);
1835 basic_block &zero_iter_bb
1836 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1837 int &first_zero_iter
1838 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1839 if (zero_iter_bb == NULL)
1840 {
1841 gassign *assign_stmt;
1842 first_zero_iter = i;
1843 zero_iter_bb = create_empty_bb (entry_bb);
1844 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1845 *gsi = gsi_after_labels (zero_iter_bb);
1846 if (i < fd->collapse)
1847 assign_stmt = gimple_build_assign (fd->loop.n2,
1848 build_zero_cst (type));
1849 else
1850 {
1851 counts[i] = create_tmp_reg (type, ".count");
1852 assign_stmt
1853 = gimple_build_assign (counts[i], build_zero_cst (type));
1854 }
1855 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1856 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1857 entry_bb);
1858 }
1859 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
357067f2 1860 ne->probability = profile_probability::very_unlikely ();
629b3d75 1861 e->flags = EDGE_TRUE_VALUE;
357067f2 1862 e->probability = ne->probability.invert ();
629b3d75
MJ
1863 if (l2_dom_bb == NULL)
1864 l2_dom_bb = entry_bb;
1865 entry_bb = e->dest;
65f4b875 1866 *gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
1867 }
1868
1869 if (POINTER_TYPE_P (itype))
1870 itype = signed_type_for (itype);
1871 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1872 ? -1 : 1));
1873 t = fold_build2 (PLUS_EXPR, itype,
1874 fold_convert (itype, fd->loops[i].step), t);
1875 t = fold_build2 (PLUS_EXPR, itype, t,
1876 fold_convert (itype, fd->loops[i].n2));
1877 t = fold_build2 (MINUS_EXPR, itype, t,
1878 fold_convert (itype, fd->loops[i].n1));
1879 /* ?? We could probably use CEIL_DIV_EXPR instead of
1880 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1881 generate the same code in the end because generically we
1882 don't know that the values involved must be negative for
1883 GT?? */
1884 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1885 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1886 fold_build1 (NEGATE_EXPR, itype, t),
1887 fold_build1 (NEGATE_EXPR, itype,
1888 fold_convert (itype,
1889 fd->loops[i].step)));
1890 else
1891 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1892 fold_convert (itype, fd->loops[i].step));
1893 t = fold_convert (type, t);
1894 if (TREE_CODE (t) == INTEGER_CST)
1895 counts[i] = t;
1896 else
1897 {
1898 if (i < fd->collapse || i != first_zero_iter2)
1899 counts[i] = create_tmp_reg (type, ".count");
1900 expand_omp_build_assign (gsi, counts[i], t);
1901 }
1902 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1903 {
1904 if (i == 0)
1905 t = counts[0];
1906 else
1907 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1908 expand_omp_build_assign (gsi, fd->loop.n2, t);
1909 }
1910 }
1911}
1912
1913/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1914 T = V;
1915 V3 = N31 + (T % count3) * STEP3;
1916 T = T / count3;
1917 V2 = N21 + (T % count2) * STEP2;
1918 T = T / count2;
1919 V1 = N11 + T * STEP1;
1920 if this loop doesn't have an inner loop construct combined with it.
1921 If it does have an inner loop construct combined with it and the
1922 iteration count isn't known constant, store values from counts array
1923 into its _looptemp_ temporaries instead. */
1924
1925static void
1926expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1927 tree *counts, gimple *inner_stmt, tree startvar)
1928{
1929 int i;
1930 if (gimple_omp_for_combined_p (fd->for_stmt))
1931 {
1932 /* If fd->loop.n2 is constant, then no propagation of the counts
1933 is needed, they are constant. */
1934 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1935 return;
1936
1937 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1938 ? gimple_omp_taskreg_clauses (inner_stmt)
1939 : gimple_omp_for_clauses (inner_stmt);
1940 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1941 isn't supposed to be handled, as the inner loop doesn't
1942 use it. */
1943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1944 gcc_assert (innerc);
1945 for (i = 0; i < fd->collapse; i++)
1946 {
1947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1948 OMP_CLAUSE__LOOPTEMP_);
1949 gcc_assert (innerc);
1950 if (i)
1951 {
1952 tree tem = OMP_CLAUSE_DECL (innerc);
1953 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1954 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1955 false, GSI_CONTINUE_LINKING);
1956 gassign *stmt = gimple_build_assign (tem, t);
1957 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1958 }
1959 }
1960 return;
1961 }
1962
1963 tree type = TREE_TYPE (fd->loop.v);
1964 tree tem = create_tmp_reg (type, ".tem");
1965 gassign *stmt = gimple_build_assign (tem, startvar);
1966 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1967
1968 for (i = fd->collapse - 1; i >= 0; i--)
1969 {
1970 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1971 itype = vtype;
1972 if (POINTER_TYPE_P (vtype))
1973 itype = signed_type_for (vtype);
1974 if (i != 0)
1975 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1976 else
1977 t = tem;
1978 t = fold_convert (itype, t);
1979 t = fold_build2 (MULT_EXPR, itype, t,
1980 fold_convert (itype, fd->loops[i].step));
1981 if (POINTER_TYPE_P (vtype))
1982 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1983 else
1984 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1985 t = force_gimple_operand_gsi (gsi, t,
1986 DECL_P (fd->loops[i].v)
1987 && TREE_ADDRESSABLE (fd->loops[i].v),
1988 NULL_TREE, false,
1989 GSI_CONTINUE_LINKING);
1990 stmt = gimple_build_assign (fd->loops[i].v, t);
1991 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1992 if (i != 0)
1993 {
1994 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1995 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1996 false, GSI_CONTINUE_LINKING);
1997 stmt = gimple_build_assign (tem, t);
1998 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1999 }
2000 }
2001}
2002
2003/* Helper function for expand_omp_for_*. Generate code like:
2004 L10:
2005 V3 += STEP3;
2006 if (V3 cond3 N32) goto BODY_BB; else goto L11;
2007 L11:
2008 V3 = N31;
2009 V2 += STEP2;
2010 if (V2 cond2 N22) goto BODY_BB; else goto L12;
2011 L12:
2012 V2 = N21;
2013 V1 += STEP1;
2014 goto BODY_BB; */
2015
2016static basic_block
2017extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
2018 basic_block body_bb)
2019{
2020 basic_block last_bb, bb, collapse_bb = NULL;
2021 int i;
2022 gimple_stmt_iterator gsi;
2023 edge e;
2024 tree t;
2025 gimple *stmt;
2026
2027 last_bb = cont_bb;
2028 for (i = fd->collapse - 1; i >= 0; i--)
2029 {
2030 tree vtype = TREE_TYPE (fd->loops[i].v);
2031
2032 bb = create_empty_bb (last_bb);
2033 add_bb_to_loop (bb, last_bb->loop_father);
2034 gsi = gsi_start_bb (bb);
2035
2036 if (i < fd->collapse - 1)
2037 {
2038 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
357067f2 2039 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75
MJ
2040
2041 t = fd->loops[i + 1].n1;
2042 t = force_gimple_operand_gsi (&gsi, t,
2043 DECL_P (fd->loops[i + 1].v)
2044 && TREE_ADDRESSABLE (fd->loops[i
2045 + 1].v),
2046 NULL_TREE, false,
2047 GSI_CONTINUE_LINKING);
2048 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
2049 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2050 }
2051 else
2052 collapse_bb = bb;
2053
2054 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
2055
2056 if (POINTER_TYPE_P (vtype))
2057 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
2058 else
2059 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
2060 t = force_gimple_operand_gsi (&gsi, t,
2061 DECL_P (fd->loops[i].v)
2062 && TREE_ADDRESSABLE (fd->loops[i].v),
2063 NULL_TREE, false, GSI_CONTINUE_LINKING);
2064 stmt = gimple_build_assign (fd->loops[i].v, t);
2065 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
2066
2067 if (i > 0)
2068 {
2069 t = fd->loops[i].n2;
2070 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2071 false, GSI_CONTINUE_LINKING);
2072 tree v = fd->loops[i].v;
2073 if (DECL_P (v) && TREE_ADDRESSABLE (v))
2074 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
2075 false, GSI_CONTINUE_LINKING);
2076 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
2077 stmt = gimple_build_cond_empty (t);
2078 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
d1ffbd43
JJ
2079 if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
2080 expand_omp_regimplify_p, NULL, NULL)
2081 || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
2082 expand_omp_regimplify_p, NULL, NULL))
2083 gimple_regimplify_operands (stmt, &gsi);
629b3d75 2084 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
357067f2 2085 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
629b3d75
MJ
2086 }
2087 else
2088 make_edge (bb, body_bb, EDGE_FALLTHRU);
2089 last_bb = bb;
2090 }
2091
2092 return collapse_bb;
2093}
2094
2095/* Expand #pragma omp ordered depend(source). */
2096
2097static void
2098expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2099 tree *counts, location_t loc)
2100{
2101 enum built_in_function source_ix
2102 = fd->iter_type == long_integer_type_node
2103 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
2104 gimple *g
2105 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
2106 build_fold_addr_expr (counts[fd->ordered]));
2107 gimple_set_location (g, loc);
2108 gsi_insert_before (gsi, g, GSI_SAME_STMT);
2109}
2110
2111/* Expand a single depend from #pragma omp ordered depend(sink:...). */
2112
2113static void
2114expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
2115 tree *counts, tree c, location_t loc)
2116{
2117 auto_vec<tree, 10> args;
2118 enum built_in_function sink_ix
2119 = fd->iter_type == long_integer_type_node
2120 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2121 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2122 int i;
2123 gimple_stmt_iterator gsi2 = *gsi;
2124 bool warned_step = false;
2125
2126 for (i = 0; i < fd->ordered; i++)
2127 {
2128 tree step = NULL_TREE;
2129 off = TREE_PURPOSE (deps);
2130 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2131 {
2132 step = TREE_OPERAND (off, 1);
2133 off = TREE_OPERAND (off, 0);
2134 }
2135 if (!integer_zerop (off))
2136 {
2137 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2138 || fd->loops[i].cond_code == GT_EXPR);
2139 bool forward = fd->loops[i].cond_code == LT_EXPR;
2140 if (step)
2141 {
2142 /* Non-simple Fortran DO loops. If step is variable,
2143 we don't know at compile even the direction, so can't
2144 warn. */
2145 if (TREE_CODE (step) != INTEGER_CST)
2146 break;
2147 forward = tree_int_cst_sgn (step) != -1;
2148 }
2149 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2150 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2151 "lexically later iteration");
2152 break;
2153 }
2154 deps = TREE_CHAIN (deps);
2155 }
2156 /* If all offsets corresponding to the collapsed loops are zero,
2157 this depend clause can be ignored. FIXME: but there is still a
2158 flush needed. We need to emit one __sync_synchronize () for it
2159 though (perhaps conditionally)? Solve this together with the
2160 conservative dependence folding optimization.
2161 if (i >= fd->collapse)
2162 return; */
2163
2164 deps = OMP_CLAUSE_DECL (c);
2165 gsi_prev (&gsi2);
2166 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2167 edge e2 = split_block_after_labels (e1->dest);
2168
2169 gsi2 = gsi_after_labels (e1->dest);
2170 *gsi = gsi_last_bb (e1->src);
2171 for (i = 0; i < fd->ordered; i++)
2172 {
2173 tree itype = TREE_TYPE (fd->loops[i].v);
2174 tree step = NULL_TREE;
2175 tree orig_off = NULL_TREE;
2176 if (POINTER_TYPE_P (itype))
2177 itype = sizetype;
2178 if (i)
2179 deps = TREE_CHAIN (deps);
2180 off = TREE_PURPOSE (deps);
2181 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2182 {
2183 step = TREE_OPERAND (off, 1);
2184 off = TREE_OPERAND (off, 0);
2185 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2186 && integer_onep (fd->loops[i].step)
2187 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2188 }
2189 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2190 if (step)
2191 {
2192 off = fold_convert_loc (loc, itype, off);
2193 orig_off = off;
2194 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2195 }
2196
2197 if (integer_zerop (off))
2198 t = boolean_true_node;
2199 else
2200 {
2201 tree a;
2202 tree co = fold_convert_loc (loc, itype, off);
2203 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2204 {
2205 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2206 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2207 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2208 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2209 co);
2210 }
2211 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2212 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2213 fd->loops[i].v, co);
2214 else
2215 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2216 fd->loops[i].v, co);
2217 if (step)
2218 {
2219 tree t1, t2;
2220 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2221 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2222 fd->loops[i].n1);
2223 else
2224 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2225 fd->loops[i].n2);
2226 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2227 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2228 fd->loops[i].n2);
2229 else
2230 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2231 fd->loops[i].n1);
2232 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2233 step, build_int_cst (TREE_TYPE (step), 0));
2234 if (TREE_CODE (step) != INTEGER_CST)
2235 {
2236 t1 = unshare_expr (t1);
2237 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2238 false, GSI_CONTINUE_LINKING);
2239 t2 = unshare_expr (t2);
2240 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2241 false, GSI_CONTINUE_LINKING);
2242 }
2243 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2244 t, t2, t1);
2245 }
2246 else if (fd->loops[i].cond_code == LT_EXPR)
2247 {
2248 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2249 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2250 fd->loops[i].n1);
2251 else
2252 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2253 fd->loops[i].n2);
2254 }
2255 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2256 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2257 fd->loops[i].n2);
2258 else
2259 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2260 fd->loops[i].n1);
2261 }
2262 if (cond)
2263 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2264 else
2265 cond = t;
2266
2267 off = fold_convert_loc (loc, itype, off);
2268
2269 if (step
2270 || (fd->loops[i].cond_code == LT_EXPR
2271 ? !integer_onep (fd->loops[i].step)
2272 : !integer_minus_onep (fd->loops[i].step)))
2273 {
2274 if (step == NULL_TREE
2275 && TYPE_UNSIGNED (itype)
2276 && fd->loops[i].cond_code == GT_EXPR)
2277 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2278 fold_build1_loc (loc, NEGATE_EXPR, itype,
2279 s));
2280 else
2281 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2282 orig_off ? orig_off : off, s);
2283 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2284 build_int_cst (itype, 0));
2285 if (integer_zerop (t) && !warned_step)
2286 {
2287 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2288 "in the iteration space");
2289 warned_step = true;
2290 }
2291 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2292 cond, t);
2293 }
2294
2295 if (i <= fd->collapse - 1 && fd->collapse > 1)
2296 t = fd->loop.v;
2297 else if (counts[i])
2298 t = counts[i];
2299 else
2300 {
2301 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2302 fd->loops[i].v, fd->loops[i].n1);
2303 t = fold_convert_loc (loc, fd->iter_type, t);
2304 }
2305 if (step)
2306 /* We have divided off by step already earlier. */;
2307 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2308 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2309 fold_build1_loc (loc, NEGATE_EXPR, itype,
2310 s));
2311 else
2312 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2313 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2314 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2315 off = fold_convert_loc (loc, fd->iter_type, off);
2316 if (i <= fd->collapse - 1 && fd->collapse > 1)
2317 {
2318 if (i)
2319 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2320 off);
2321 if (i < fd->collapse - 1)
2322 {
2323 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2324 counts[i]);
2325 continue;
2326 }
2327 }
2328 off = unshare_expr (off);
2329 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2330 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2331 true, GSI_SAME_STMT);
2332 args.safe_push (t);
2333 }
2334 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2335 gimple_set_location (g, loc);
2336 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2337
2338 cond = unshare_expr (cond);
2339 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2340 GSI_CONTINUE_LINKING);
2341 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2342 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
357067f2
JH
2343 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2344 e1->probability = e3->probability.invert ();
629b3d75
MJ
2345 e1->flags = EDGE_TRUE_VALUE;
2346 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2347
2348 *gsi = gsi_after_labels (e2->dest);
2349}
2350
2351/* Expand all #pragma omp ordered depend(source) and
2352 #pragma omp ordered depend(sink:...) constructs in the current
2353 #pragma omp for ordered(n) region. */
2354
2355static void
2356expand_omp_ordered_source_sink (struct omp_region *region,
2357 struct omp_for_data *fd, tree *counts,
2358 basic_block cont_bb)
2359{
2360 struct omp_region *inner;
2361 int i;
2362 for (i = fd->collapse - 1; i < fd->ordered; i++)
2363 if (i == fd->collapse - 1 && fd->collapse > 1)
2364 counts[i] = NULL_TREE;
2365 else if (i >= fd->collapse && !cont_bb)
2366 counts[i] = build_zero_cst (fd->iter_type);
2367 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2368 && integer_onep (fd->loops[i].step))
2369 counts[i] = NULL_TREE;
2370 else
2371 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2372 tree atype
2373 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2374 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2375 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2376
2377 for (inner = region->inner; inner; inner = inner->next)
2378 if (inner->type == GIMPLE_OMP_ORDERED)
2379 {
2380 gomp_ordered *ord_stmt = inner->ord_stmt;
2381 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2382 location_t loc = gimple_location (ord_stmt);
2383 tree c;
2384 for (c = gimple_omp_ordered_clauses (ord_stmt);
2385 c; c = OMP_CLAUSE_CHAIN (c))
2386 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2387 break;
2388 if (c)
2389 expand_omp_ordered_source (&gsi, fd, counts, loc);
2390 for (c = gimple_omp_ordered_clauses (ord_stmt);
2391 c; c = OMP_CLAUSE_CHAIN (c))
2392 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2393 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2394 gsi_remove (&gsi, true);
2395 }
2396}
2397
2398/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2399 collapsed. */
2400
2401static basic_block
2402expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2403 basic_block cont_bb, basic_block body_bb,
2404 bool ordered_lastprivate)
2405{
2406 if (fd->ordered == fd->collapse)
2407 return cont_bb;
2408
2409 if (!cont_bb)
2410 {
2411 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2412 for (int i = fd->collapse; i < fd->ordered; i++)
2413 {
2414 tree type = TREE_TYPE (fd->loops[i].v);
2415 tree n1 = fold_convert (type, fd->loops[i].n1);
2416 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2417 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2418 size_int (i - fd->collapse + 1),
2419 NULL_TREE, NULL_TREE);
2420 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2421 }
2422 return NULL;
2423 }
2424
2425 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2426 {
2427 tree t, type = TREE_TYPE (fd->loops[i].v);
2428 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2429 expand_omp_build_assign (&gsi, fd->loops[i].v,
2430 fold_convert (type, fd->loops[i].n1));
2431 if (counts[i])
2432 expand_omp_build_assign (&gsi, counts[i],
2433 build_zero_cst (fd->iter_type));
2434 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2435 size_int (i - fd->collapse + 1),
2436 NULL_TREE, NULL_TREE);
2437 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2438 if (!gsi_end_p (gsi))
2439 gsi_prev (&gsi);
2440 else
2441 gsi = gsi_last_bb (body_bb);
2442 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2443 basic_block new_body = e1->dest;
2444 if (body_bb == cont_bb)
2445 cont_bb = new_body;
2446 edge e2 = NULL;
2447 basic_block new_header;
2448 if (EDGE_COUNT (cont_bb->preds) > 0)
2449 {
2450 gsi = gsi_last_bb (cont_bb);
2451 if (POINTER_TYPE_P (type))
2452 t = fold_build_pointer_plus (fd->loops[i].v,
2453 fold_convert (sizetype,
2454 fd->loops[i].step));
2455 else
2456 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2457 fold_convert (type, fd->loops[i].step));
2458 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2459 if (counts[i])
2460 {
2461 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2462 build_int_cst (fd->iter_type, 1));
2463 expand_omp_build_assign (&gsi, counts[i], t);
2464 t = counts[i];
2465 }
2466 else
2467 {
2468 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2469 fd->loops[i].v, fd->loops[i].n1);
2470 t = fold_convert (fd->iter_type, t);
2471 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2472 true, GSI_SAME_STMT);
2473 }
2474 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2475 size_int (i - fd->collapse + 1),
2476 NULL_TREE, NULL_TREE);
2477 expand_omp_build_assign (&gsi, aref, t);
2478 gsi_prev (&gsi);
2479 e2 = split_block (cont_bb, gsi_stmt (gsi));
2480 new_header = e2->dest;
2481 }
2482 else
2483 new_header = cont_bb;
2484 gsi = gsi_after_labels (new_header);
2485 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2486 true, GSI_SAME_STMT);
2487 tree n2
2488 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2489 true, NULL_TREE, true, GSI_SAME_STMT);
2490 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2491 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2492 edge e3 = split_block (new_header, gsi_stmt (gsi));
2493 cont_bb = e3->dest;
2494 remove_edge (e1);
2495 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2496 e3->flags = EDGE_FALSE_VALUE;
357067f2 2497 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
629b3d75 2498 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
357067f2 2499 e1->probability = e3->probability.invert ();
629b3d75
MJ
2500
2501 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2502 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2503
2504 if (e2)
2505 {
2506 struct loop *loop = alloc_loop ();
2507 loop->header = new_header;
2508 loop->latch = e2->src;
2509 add_loop (loop, body_bb->loop_father);
2510 }
2511 }
2512
2513 /* If there are any lastprivate clauses and it is possible some loops
2514 might have zero iterations, ensure all the decls are initialized,
2515 otherwise we could crash evaluating C++ class iterators with lastprivate
2516 clauses. */
2517 bool need_inits = false;
2518 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2519 if (need_inits)
2520 {
2521 tree type = TREE_TYPE (fd->loops[i].v);
2522 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2523 expand_omp_build_assign (&gsi, fd->loops[i].v,
2524 fold_convert (type, fd->loops[i].n1));
2525 }
2526 else
2527 {
2528 tree type = TREE_TYPE (fd->loops[i].v);
2529 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2530 boolean_type_node,
2531 fold_convert (type, fd->loops[i].n1),
2532 fold_convert (type, fd->loops[i].n2));
2533 if (!integer_onep (this_cond))
2534 need_inits = true;
2535 }
2536
2537 return cont_bb;
2538}
2539
2540/* A subroutine of expand_omp_for. Generate code for a parallel
2541 loop with any schedule. Given parameters:
2542
2543 for (V = N1; V cond N2; V += STEP) BODY;
2544
2545 where COND is "<" or ">", we generate pseudocode
2546
2547 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2548 if (more) goto L0; else goto L3;
2549 L0:
2550 V = istart0;
2551 iend = iend0;
2552 L1:
2553 BODY;
2554 V += STEP;
2555 if (V cond iend) goto L1; else goto L2;
2556 L2:
2557 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2558 L3:
2559
2560 If this is a combined omp parallel loop, instead of the call to
2561 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2562 If this is gimple_omp_for_combined_p loop, then instead of assigning
2563 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2564 inner GIMPLE_OMP_FOR and V += STEP; and
2565 if (V cond iend) goto L1; else goto L2; are removed.
2566
2567 For collapsed loops, given parameters:
2568 collapse(3)
2569 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2570 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2571 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2572 BODY;
2573
2574 we generate pseudocode
2575
2576 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2577 if (cond3 is <)
2578 adj = STEP3 - 1;
2579 else
2580 adj = STEP3 + 1;
2581 count3 = (adj + N32 - N31) / STEP3;
2582 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2583 if (cond2 is <)
2584 adj = STEP2 - 1;
2585 else
2586 adj = STEP2 + 1;
2587 count2 = (adj + N22 - N21) / STEP2;
2588 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2589 if (cond1 is <)
2590 adj = STEP1 - 1;
2591 else
2592 adj = STEP1 + 1;
2593 count1 = (adj + N12 - N11) / STEP1;
2594 count = count1 * count2 * count3;
2595 goto Z1;
2596 Z0:
2597 count = 0;
2598 Z1:
2599 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2600 if (more) goto L0; else goto L3;
2601 L0:
2602 V = istart0;
2603 T = V;
2604 V3 = N31 + (T % count3) * STEP3;
2605 T = T / count3;
2606 V2 = N21 + (T % count2) * STEP2;
2607 T = T / count2;
2608 V1 = N11 + T * STEP1;
2609 iend = iend0;
2610 L1:
2611 BODY;
2612 V += 1;
2613 if (V < iend) goto L10; else goto L2;
2614 L10:
2615 V3 += STEP3;
2616 if (V3 cond3 N32) goto L1; else goto L11;
2617 L11:
2618 V3 = N31;
2619 V2 += STEP2;
2620 if (V2 cond2 N22) goto L1; else goto L12;
2621 L12:
2622 V2 = N21;
2623 V1 += STEP1;
2624 goto L1;
2625 L2:
2626 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2627 L3:
2628
2629 */
2630
2631static void
2632expand_omp_for_generic (struct omp_region *region,
2633 struct omp_for_data *fd,
2634 enum built_in_function start_fn,
2635 enum built_in_function next_fn,
28567c40 2636 tree sched_arg,
629b3d75
MJ
2637 gimple *inner_stmt)
2638{
2639 tree type, istart0, iend0, iend;
2640 tree t, vmain, vback, bias = NULL_TREE;
2641 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2642 basic_block l2_bb = NULL, l3_bb = NULL;
2643 gimple_stmt_iterator gsi;
2644 gassign *assign_stmt;
2645 bool in_combined_parallel = is_combined_parallel (region);
2646 bool broken_loop = region->cont == NULL;
2647 edge e, ne;
2648 tree *counts = NULL;
2649 int i;
2650 bool ordered_lastprivate = false;
2651
2652 gcc_assert (!broken_loop || !in_combined_parallel);
2653 gcc_assert (fd->iter_type == long_integer_type_node
2654 || !in_combined_parallel);
2655
2656 entry_bb = region->entry;
2657 cont_bb = region->cont;
2658 collapse_bb = NULL;
2659 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2660 gcc_assert (broken_loop
2661 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2662 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2663 l1_bb = single_succ (l0_bb);
2664 if (!broken_loop)
2665 {
2666 l2_bb = create_empty_bb (cont_bb);
2667 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2668 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2669 == l1_bb));
2670 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2671 }
2672 else
2673 l2_bb = NULL;
2674 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2675 exit_bb = region->exit;
2676
65f4b875 2677 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2678
2679 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2680 if (fd->ordered
2681 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2682 OMP_CLAUSE_LASTPRIVATE))
2683 ordered_lastprivate = false;
28567c40
JJ
2684 tree reductions = NULL_TREE;
2685 tree mem = NULL_TREE;
2686 if (sched_arg)
2687 {
2688 if (fd->have_reductemp)
2689 {
2690 tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2691 OMP_CLAUSE__REDUCTEMP_);
2692 reductions = OMP_CLAUSE_DECL (c);
2693 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
2694 gimple *g = SSA_NAME_DEF_STMT (reductions);
2695 reductions = gimple_assign_rhs1 (g);
2696 OMP_CLAUSE_DECL (c) = reductions;
2697 entry_bb = gimple_bb (g);
2698 edge e = split_block (entry_bb, g);
2699 if (region->entry == entry_bb)
2700 region->entry = e->dest;
2701 gsi = gsi_last_bb (entry_bb);
2702 }
2703 else
2704 reductions = null_pointer_node;
2705 /* For now. */
2706 mem = null_pointer_node;
2707 }
629b3d75
MJ
2708 if (fd->collapse > 1 || fd->ordered)
2709 {
2710 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2711 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2712
2713 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2714 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2715 zero_iter1_bb, first_zero_iter1,
2716 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2717
2718 if (zero_iter1_bb)
2719 {
2720 /* Some counts[i] vars might be uninitialized if
2721 some loop has zero iterations. But the body shouldn't
2722 be executed in that case, so just avoid uninit warnings. */
2723 for (i = first_zero_iter1;
2724 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2725 if (SSA_VAR_P (counts[i]))
2726 TREE_NO_WARNING (counts[i]) = 1;
2727 gsi_prev (&gsi);
2728 e = split_block (entry_bb, gsi_stmt (gsi));
2729 entry_bb = e->dest;
2730 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2731 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2732 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2733 get_immediate_dominator (CDI_DOMINATORS,
2734 zero_iter1_bb));
2735 }
2736 if (zero_iter2_bb)
2737 {
2738 /* Some counts[i] vars might be uninitialized if
2739 some loop has zero iterations. But the body shouldn't
2740 be executed in that case, so just avoid uninit warnings. */
2741 for (i = first_zero_iter2; i < fd->ordered; i++)
2742 if (SSA_VAR_P (counts[i]))
2743 TREE_NO_WARNING (counts[i]) = 1;
2744 if (zero_iter1_bb)
2745 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2746 else
2747 {
2748 gsi_prev (&gsi);
2749 e = split_block (entry_bb, gsi_stmt (gsi));
2750 entry_bb = e->dest;
2751 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
65f4b875 2752 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
2753 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2754 get_immediate_dominator
2755 (CDI_DOMINATORS, zero_iter2_bb));
2756 }
2757 }
2758 if (fd->collapse == 1)
2759 {
2760 counts[0] = fd->loop.n2;
2761 fd->loop = fd->loops[0];
2762 }
2763 }
2764
2765 type = TREE_TYPE (fd->loop.v);
2766 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2767 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2768 TREE_ADDRESSABLE (istart0) = 1;
2769 TREE_ADDRESSABLE (iend0) = 1;
2770
2771 /* See if we need to bias by LLONG_MIN. */
2772 if (fd->iter_type == long_long_unsigned_type_node
2773 && TREE_CODE (type) == INTEGER_TYPE
2774 && !TYPE_UNSIGNED (type)
2775 && fd->ordered == 0)
2776 {
2777 tree n1, n2;
2778
2779 if (fd->loop.cond_code == LT_EXPR)
2780 {
2781 n1 = fd->loop.n1;
2782 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2783 }
2784 else
2785 {
2786 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2787 n2 = fd->loop.n1;
2788 }
2789 if (TREE_CODE (n1) != INTEGER_CST
2790 || TREE_CODE (n2) != INTEGER_CST
2791 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2792 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2793 }
2794
2795 gimple_stmt_iterator gsif = gsi;
2796 gsi_prev (&gsif);
2797
2798 tree arr = NULL_TREE;
2799 if (in_combined_parallel)
2800 {
2801 gcc_assert (fd->ordered == 0);
2802 /* In a combined parallel loop, emit a call to
2803 GOMP_loop_foo_next. */
2804 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2805 build_fold_addr_expr (istart0),
2806 build_fold_addr_expr (iend0));
2807 }
2808 else
2809 {
2810 tree t0, t1, t2, t3, t4;
2811 /* If this is not a combined parallel loop, emit a call to
2812 GOMP_loop_foo_start in ENTRY_BB. */
2813 t4 = build_fold_addr_expr (iend0);
2814 t3 = build_fold_addr_expr (istart0);
2815 if (fd->ordered)
2816 {
2817 t0 = build_int_cst (unsigned_type_node,
2818 fd->ordered - fd->collapse + 1);
2819 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2820 fd->ordered
2821 - fd->collapse + 1),
2822 ".omp_counts");
2823 DECL_NAMELESS (arr) = 1;
2824 TREE_ADDRESSABLE (arr) = 1;
2825 TREE_STATIC (arr) = 1;
2826 vec<constructor_elt, va_gc> *v;
2827 vec_alloc (v, fd->ordered - fd->collapse + 1);
2828 int idx;
2829
2830 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2831 {
2832 tree c;
2833 if (idx == 0 && fd->collapse > 1)
2834 c = fd->loop.n2;
2835 else
2836 c = counts[idx + fd->collapse - 1];
2837 tree purpose = size_int (idx);
2838 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2839 if (TREE_CODE (c) != INTEGER_CST)
2840 TREE_STATIC (arr) = 0;
2841 }
2842
2843 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2844 if (!TREE_STATIC (arr))
2845 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2846 void_type_node, arr),
2847 true, NULL_TREE, true, GSI_SAME_STMT);
2848 t1 = build_fold_addr_expr (arr);
2849 t2 = NULL_TREE;
2850 }
2851 else
2852 {
2853 t2 = fold_convert (fd->iter_type, fd->loop.step);
2854 t1 = fd->loop.n2;
2855 t0 = fd->loop.n1;
2856 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2857 {
2858 tree innerc
2859 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2860 OMP_CLAUSE__LOOPTEMP_);
2861 gcc_assert (innerc);
2862 t0 = OMP_CLAUSE_DECL (innerc);
2863 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2864 OMP_CLAUSE__LOOPTEMP_);
2865 gcc_assert (innerc);
2866 t1 = OMP_CLAUSE_DECL (innerc);
2867 }
2868 if (POINTER_TYPE_P (TREE_TYPE (t0))
2869 && TYPE_PRECISION (TREE_TYPE (t0))
2870 != TYPE_PRECISION (fd->iter_type))
2871 {
2872 /* Avoid casting pointers to integer of a different size. */
2873 tree itype = signed_type_for (type);
2874 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2875 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2876 }
2877 else
2878 {
2879 t1 = fold_convert (fd->iter_type, t1);
2880 t0 = fold_convert (fd->iter_type, t0);
2881 }
2882 if (bias)
2883 {
2884 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2885 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2886 }
2887 }
2888 if (fd->iter_type == long_integer_type_node || fd->ordered)
2889 {
2890 if (fd->chunk_size)
2891 {
2892 t = fold_convert (fd->iter_type, fd->chunk_size);
2893 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2894 if (sched_arg)
2895 {
2896 if (fd->ordered)
2897 t = build_call_expr (builtin_decl_explicit (start_fn),
2898 8, t0, t1, sched_arg, t, t3, t4,
2899 reductions, mem);
2900 else
2901 t = build_call_expr (builtin_decl_explicit (start_fn),
2902 9, t0, t1, t2, sched_arg, t, t3, t4,
2903 reductions, mem);
2904 }
2905 else if (fd->ordered)
629b3d75
MJ
2906 t = build_call_expr (builtin_decl_explicit (start_fn),
2907 5, t0, t1, t, t3, t4);
2908 else
2909 t = build_call_expr (builtin_decl_explicit (start_fn),
2910 6, t0, t1, t2, t, t3, t4);
2911 }
2912 else if (fd->ordered)
2913 t = build_call_expr (builtin_decl_explicit (start_fn),
2914 4, t0, t1, t3, t4);
2915 else
2916 t = build_call_expr (builtin_decl_explicit (start_fn),
2917 5, t0, t1, t2, t3, t4);
2918 }
2919 else
2920 {
2921 tree t5;
2922 tree c_bool_type;
2923 tree bfn_decl;
2924
2925 /* The GOMP_loop_ull_*start functions have additional boolean
2926 argument, true for < loops and false for > loops.
2927 In Fortran, the C bool type can be different from
2928 boolean_type_node. */
2929 bfn_decl = builtin_decl_explicit (start_fn);
2930 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2931 t5 = build_int_cst (c_bool_type,
2932 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2933 if (fd->chunk_size)
2934 {
2935 tree bfn_decl = builtin_decl_explicit (start_fn);
2936 t = fold_convert (fd->iter_type, fd->chunk_size);
2937 t = omp_adjust_chunk_size (t, fd->simd_schedule);
28567c40
JJ
2938 if (sched_arg)
2939 t = build_call_expr (bfn_decl, 10, t5, t0, t1, t2, sched_arg,
2940 t, t3, t4, reductions, mem);
2941 else
2942 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
629b3d75
MJ
2943 }
2944 else
2945 t = build_call_expr (builtin_decl_explicit (start_fn),
2946 6, t5, t0, t1, t2, t3, t4);
2947 }
2948 }
2949 if (TREE_TYPE (t) != boolean_type_node)
2950 t = fold_build2 (NE_EXPR, boolean_type_node,
2951 t, build_int_cst (TREE_TYPE (t), 0));
2952 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2953 true, GSI_SAME_STMT);
629b3d75
MJ
2954 if (arr && !TREE_STATIC (arr))
2955 {
2956 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2957 TREE_THIS_VOLATILE (clobber) = 1;
2958 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2959 GSI_SAME_STMT);
2960 }
28567c40
JJ
2961 if (fd->have_reductemp)
2962 {
2963 gimple *g = gsi_stmt (gsi);
2964 gsi_remove (&gsi, true);
2965 release_ssa_name (gimple_assign_lhs (g));
2966
2967 entry_bb = region->entry;
2968 gsi = gsi_last_nondebug_bb (entry_bb);
2969
2970 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2971 }
629b3d75
MJ
2972 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2973
2974 /* Remove the GIMPLE_OMP_FOR statement. */
2975 gsi_remove (&gsi, true);
2976
2977 if (gsi_end_p (gsif))
2978 gsif = gsi_after_labels (gsi_bb (gsif));
2979 gsi_next (&gsif);
2980
2981 /* Iteration setup for sequential loop goes in L0_BB. */
2982 tree startvar = fd->loop.v;
2983 tree endvar = NULL_TREE;
2984
2985 if (gimple_omp_for_combined_p (fd->for_stmt))
2986 {
2987 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2988 && gimple_omp_for_kind (inner_stmt)
2989 == GF_OMP_FOR_KIND_SIMD);
2990 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2991 OMP_CLAUSE__LOOPTEMP_);
2992 gcc_assert (innerc);
2993 startvar = OMP_CLAUSE_DECL (innerc);
2994 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2995 OMP_CLAUSE__LOOPTEMP_);
2996 gcc_assert (innerc);
2997 endvar = OMP_CLAUSE_DECL (innerc);
2998 }
2999
3000 gsi = gsi_start_bb (l0_bb);
3001 t = istart0;
3002 if (fd->ordered && fd->collapse == 1)
3003 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3004 fold_convert (fd->iter_type, fd->loop.step));
3005 else if (bias)
3006 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3007 if (fd->ordered && fd->collapse == 1)
3008 {
3009 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3010 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3011 fd->loop.n1, fold_convert (sizetype, t));
3012 else
3013 {
3014 t = fold_convert (TREE_TYPE (startvar), t);
3015 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3016 fd->loop.n1, t);
3017 }
3018 }
3019 else
3020 {
3021 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3022 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3023 t = fold_convert (TREE_TYPE (startvar), t);
3024 }
3025 t = force_gimple_operand_gsi (&gsi, t,
3026 DECL_P (startvar)
3027 && TREE_ADDRESSABLE (startvar),
3028 NULL_TREE, false, GSI_CONTINUE_LINKING);
3029 assign_stmt = gimple_build_assign (startvar, t);
3030 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3031
3032 t = iend0;
3033 if (fd->ordered && fd->collapse == 1)
3034 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
3035 fold_convert (fd->iter_type, fd->loop.step));
3036 else if (bias)
3037 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
3038 if (fd->ordered && fd->collapse == 1)
3039 {
3040 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3041 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
3042 fd->loop.n1, fold_convert (sizetype, t));
3043 else
3044 {
3045 t = fold_convert (TREE_TYPE (startvar), t);
3046 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
3047 fd->loop.n1, t);
3048 }
3049 }
3050 else
3051 {
3052 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
3053 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
3054 t = fold_convert (TREE_TYPE (startvar), t);
3055 }
3056 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3057 false, GSI_CONTINUE_LINKING);
3058 if (endvar)
3059 {
3060 assign_stmt = gimple_build_assign (endvar, iend);
3061 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3062 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
3063 assign_stmt = gimple_build_assign (fd->loop.v, iend);
3064 else
3065 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
3066 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3067 }
3068 /* Handle linear clause adjustments. */
3069 tree itercnt = NULL_TREE;
3070 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3071 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3072 c; c = OMP_CLAUSE_CHAIN (c))
3073 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3074 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3075 {
3076 tree d = OMP_CLAUSE_DECL (c);
3077 bool is_ref = omp_is_reference (d);
3078 tree t = d, a, dest;
3079 if (is_ref)
3080 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3081 tree type = TREE_TYPE (t);
3082 if (POINTER_TYPE_P (type))
3083 type = sizetype;
3084 dest = unshare_expr (t);
3085 tree v = create_tmp_var (TREE_TYPE (t), NULL);
3086 expand_omp_build_assign (&gsif, v, t);
3087 if (itercnt == NULL_TREE)
3088 {
3089 itercnt = startvar;
3090 tree n1 = fd->loop.n1;
3091 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
3092 {
3093 itercnt
3094 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
3095 itercnt);
3096 n1 = fold_convert (TREE_TYPE (itercnt), n1);
3097 }
3098 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
3099 itercnt, n1);
3100 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
3101 itercnt, fd->loop.step);
3102 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3103 NULL_TREE, false,
3104 GSI_CONTINUE_LINKING);
3105 }
3106 a = fold_build2 (MULT_EXPR, type,
3107 fold_convert (type, itercnt),
3108 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3109 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3110 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
3111 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3112 false, GSI_CONTINUE_LINKING);
3113 assign_stmt = gimple_build_assign (dest, t);
3114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3115 }
3116 if (fd->collapse > 1)
3117 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3118
3119 if (fd->ordered)
3120 {
3121 /* Until now, counts array contained number of iterations or
3122 variable containing it for ith loop. From now on, we need
3123 those counts only for collapsed loops, and only for the 2nd
3124 till the last collapsed one. Move those one element earlier,
3125 we'll use counts[fd->collapse - 1] for the first source/sink
3126 iteration counter and so on and counts[fd->ordered]
3127 as the array holding the current counter values for
3128 depend(source). */
3129 if (fd->collapse > 1)
3130 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
3131 if (broken_loop)
3132 {
3133 int i;
3134 for (i = fd->collapse; i < fd->ordered; i++)
3135 {
3136 tree type = TREE_TYPE (fd->loops[i].v);
3137 tree this_cond
3138 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
3139 fold_convert (type, fd->loops[i].n1),
3140 fold_convert (type, fd->loops[i].n2));
3141 if (!integer_onep (this_cond))
3142 break;
3143 }
3144 if (i < fd->ordered)
3145 {
3146 cont_bb
3147 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
3148 add_bb_to_loop (cont_bb, l1_bb->loop_father);
3149 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
3150 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
3151 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
3152 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
3153 make_edge (cont_bb, l1_bb, 0);
3154 l2_bb = create_empty_bb (cont_bb);
3155 broken_loop = false;
3156 }
3157 }
3158 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
3159 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
3160 ordered_lastprivate);
3161 if (counts[fd->collapse - 1])
3162 {
3163 gcc_assert (fd->collapse == 1);
3164 gsi = gsi_last_bb (l0_bb);
3165 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
3166 istart0, true);
3167 gsi = gsi_last_bb (cont_bb);
3168 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
3169 build_int_cst (fd->iter_type, 1));
3170 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3171 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3172 size_zero_node, NULL_TREE, NULL_TREE);
3173 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3174 t = counts[fd->collapse - 1];
3175 }
3176 else if (fd->collapse > 1)
3177 t = fd->loop.v;
3178 else
3179 {
3180 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3181 fd->loops[0].v, fd->loops[0].n1);
3182 t = fold_convert (fd->iter_type, t);
3183 }
3184 gsi = gsi_last_bb (l0_bb);
3185 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3186 size_zero_node, NULL_TREE, NULL_TREE);
3187 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3188 false, GSI_CONTINUE_LINKING);
3189 expand_omp_build_assign (&gsi, aref, t, true);
3190 }
3191
3192 if (!broken_loop)
3193 {
3194 /* Code to control the increment and predicate for the sequential
3195 loop goes in the CONT_BB. */
65f4b875 3196 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3197 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3198 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3199 vmain = gimple_omp_continue_control_use (cont_stmt);
3200 vback = gimple_omp_continue_control_def (cont_stmt);
3201
3202 if (!gimple_omp_for_combined_p (fd->for_stmt))
3203 {
3204 if (POINTER_TYPE_P (type))
3205 t = fold_build_pointer_plus (vmain, fd->loop.step);
3206 else
3207 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3208 t = force_gimple_operand_gsi (&gsi, t,
3209 DECL_P (vback)
3210 && TREE_ADDRESSABLE (vback),
3211 NULL_TREE, true, GSI_SAME_STMT);
3212 assign_stmt = gimple_build_assign (vback, t);
3213 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3214
3215 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3216 {
d1ffbd43 3217 tree tem;
629b3d75 3218 if (fd->collapse > 1)
d1ffbd43 3219 tem = fd->loop.v;
629b3d75
MJ
3220 else
3221 {
d1ffbd43
JJ
3222 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3223 fd->loops[0].v, fd->loops[0].n1);
3224 tem = fold_convert (fd->iter_type, tem);
629b3d75
MJ
3225 }
3226 tree aref = build4 (ARRAY_REF, fd->iter_type,
3227 counts[fd->ordered], size_zero_node,
3228 NULL_TREE, NULL_TREE);
d1ffbd43
JJ
3229 tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
3230 true, GSI_SAME_STMT);
3231 expand_omp_build_assign (&gsi, aref, tem);
629b3d75
MJ
3232 }
3233
3234 t = build2 (fd->loop.cond_code, boolean_type_node,
3235 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3236 iend);
3237 gcond *cond_stmt = gimple_build_cond_empty (t);
3238 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3239 }
3240
3241 /* Remove GIMPLE_OMP_CONTINUE. */
3242 gsi_remove (&gsi, true);
3243
3244 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3245 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3246
3247 /* Emit code to get the next parallel iteration in L2_BB. */
3248 gsi = gsi_start_bb (l2_bb);
3249
3250 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3251 build_fold_addr_expr (istart0),
3252 build_fold_addr_expr (iend0));
3253 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3254 false, GSI_CONTINUE_LINKING);
3255 if (TREE_TYPE (t) != boolean_type_node)
3256 t = fold_build2 (NE_EXPR, boolean_type_node,
3257 t, build_int_cst (TREE_TYPE (t), 0));
3258 gcond *cond_stmt = gimple_build_cond_empty (t);
3259 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3260 }
3261
3262 /* Add the loop cleanup function. */
65f4b875 3263 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3264 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3265 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3266 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3267 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3268 else
3269 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3270 gcall *call_stmt = gimple_build_call (t, 0);
629b3d75
MJ
3271 if (fd->ordered)
3272 {
3273 tree arr = counts[fd->ordered];
3274 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3275 TREE_THIS_VOLATILE (clobber) = 1;
3276 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3277 GSI_SAME_STMT);
3278 }
28567c40
JJ
3279 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3280 {
3281 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3282 if (fd->have_reductemp)
3283 {
3284 gimple *g = gimple_build_assign (reductions, NOP_EXPR,
3285 gimple_call_lhs (call_stmt));
3286 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3287 }
3288 }
3289 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
629b3d75
MJ
3290 gsi_remove (&gsi, true);
3291
3292 /* Connect the new blocks. */
3293 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3294 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3295
3296 if (!broken_loop)
3297 {
3298 gimple_seq phis;
3299
3300 e = find_edge (cont_bb, l3_bb);
3301 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3302
3303 phis = phi_nodes (l3_bb);
3304 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3305 {
3306 gimple *phi = gsi_stmt (gsi);
3307 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3308 PHI_ARG_DEF_FROM_EDGE (phi, e));
3309 }
3310 remove_edge (e);
3311
3312 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3313 e = find_edge (cont_bb, l1_bb);
3314 if (e == NULL)
3315 {
3316 e = BRANCH_EDGE (cont_bb);
3317 gcc_assert (single_succ (e->dest) == l1_bb);
3318 }
3319 if (gimple_omp_for_combined_p (fd->for_stmt))
3320 {
3321 remove_edge (e);
3322 e = NULL;
3323 }
3324 else if (fd->collapse > 1)
3325 {
3326 remove_edge (e);
3327 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3328 }
3329 else
3330 e->flags = EDGE_TRUE_VALUE;
3331 if (e)
3332 {
357067f2
JH
3333 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3334 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
629b3d75
MJ
3335 }
3336 else
3337 {
3338 e = find_edge (cont_bb, l2_bb);
3339 e->flags = EDGE_FALLTHRU;
3340 }
3341 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3342
3343 if (gimple_in_ssa_p (cfun))
3344 {
3345 /* Add phis to the outer loop that connect to the phis in the inner,
3346 original loop, and move the loop entry value of the inner phi to
3347 the loop entry value of the outer phi. */
3348 gphi_iterator psi;
3349 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3350 {
620e594b 3351 location_t locus;
629b3d75
MJ
3352 gphi *nphi;
3353 gphi *exit_phi = psi.phi ();
3354
164485b5
JJ
3355 if (virtual_operand_p (gimple_phi_result (exit_phi)))
3356 continue;
3357
629b3d75
MJ
3358 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3359 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3360
3361 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3362 edge latch_to_l1 = find_edge (latch, l1_bb);
3363 gphi *inner_phi
3364 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3365
3366 tree t = gimple_phi_result (exit_phi);
3367 tree new_res = copy_ssa_name (t, NULL);
3368 nphi = create_phi_node (new_res, l0_bb);
3369
3370 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3371 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3372 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3373 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3374 add_phi_arg (nphi, t, entry_to_l0, locus);
3375
3376 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3377 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3378
3379 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
164485b5 3380 }
629b3d75
MJ
3381 }
3382
3383 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3384 recompute_dominator (CDI_DOMINATORS, l2_bb));
3385 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3386 recompute_dominator (CDI_DOMINATORS, l3_bb));
3387 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3388 recompute_dominator (CDI_DOMINATORS, l0_bb));
3389 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3390 recompute_dominator (CDI_DOMINATORS, l1_bb));
3391
3392 /* We enter expand_omp_for_generic with a loop. This original loop may
3393 have its own loop struct, or it may be part of an outer loop struct
3394 (which may be the fake loop). */
3395 struct loop *outer_loop = entry_bb->loop_father;
3396 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3397
3398 add_bb_to_loop (l2_bb, outer_loop);
3399
3400 /* We've added a new loop around the original loop. Allocate the
3401 corresponding loop struct. */
3402 struct loop *new_loop = alloc_loop ();
3403 new_loop->header = l0_bb;
3404 new_loop->latch = l2_bb;
3405 add_loop (new_loop, outer_loop);
3406
3407 /* Allocate a loop structure for the original loop unless we already
3408 had one. */
3409 if (!orig_loop_has_loop_struct
3410 && !gimple_omp_for_combined_p (fd->for_stmt))
3411 {
3412 struct loop *orig_loop = alloc_loop ();
3413 orig_loop->header = l1_bb;
3414 /* The loop may have multiple latches. */
3415 add_loop (orig_loop, new_loop);
3416 }
3417 }
3418}
3419
3420/* A subroutine of expand_omp_for. Generate code for a parallel
3421 loop with static schedule and no specified chunk size. Given
3422 parameters:
3423
3424 for (V = N1; V cond N2; V += STEP) BODY;
3425
3426 where COND is "<" or ">", we generate pseudocode
3427
3428 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3429 if (cond is <)
3430 adj = STEP - 1;
3431 else
3432 adj = STEP + 1;
3433 if ((__typeof (V)) -1 > 0 && cond is >)
3434 n = -(adj + N2 - N1) / -STEP;
3435 else
3436 n = (adj + N2 - N1) / STEP;
3437 q = n / nthreads;
3438 tt = n % nthreads;
3439 if (threadid < tt) goto L3; else goto L4;
3440 L3:
3441 tt = 0;
3442 q = q + 1;
3443 L4:
3444 s0 = q * threadid + tt;
3445 e0 = s0 + q;
3446 V = s0 * STEP + N1;
3447 if (s0 >= e0) goto L2; else goto L0;
3448 L0:
3449 e = e0 * STEP + N1;
3450 L1:
3451 BODY;
3452 V += STEP;
3453 if (V cond e) goto L1;
3454 L2:
3455*/
3456
3457static void
3458expand_omp_for_static_nochunk (struct omp_region *region,
3459 struct omp_for_data *fd,
3460 gimple *inner_stmt)
3461{
3462 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3463 tree type, itype, vmain, vback;
3464 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3465 basic_block body_bb, cont_bb, collapse_bb = NULL;
3466 basic_block fin_bb;
3467 gimple_stmt_iterator gsi;
3468 edge ep;
3469 bool broken_loop = region->cont == NULL;
3470 tree *counts = NULL;
3471 tree n1, n2, step;
28567c40 3472 tree reductions = NULL_TREE;
629b3d75
MJ
3473
3474 itype = type = TREE_TYPE (fd->loop.v);
3475 if (POINTER_TYPE_P (type))
3476 itype = signed_type_for (type);
3477
3478 entry_bb = region->entry;
3479 cont_bb = region->cont;
3480 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3481 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3482 gcc_assert (broken_loop
3483 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3484 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3485 body_bb = single_succ (seq_start_bb);
3486 if (!broken_loop)
3487 {
3488 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3489 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3490 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3491 }
3492 exit_bb = region->exit;
3493
3494 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 3495 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
3496 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3497
3498 if (fd->collapse > 1)
3499 {
3500 int first_zero_iter = -1, dummy = -1;
3501 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3502
3503 counts = XALLOCAVEC (tree, fd->collapse);
3504 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3505 fin_bb, first_zero_iter,
3506 dummy_bb, dummy, l2_dom_bb);
3507 t = NULL_TREE;
3508 }
3509 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3510 t = integer_one_node;
3511 else
3512 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3513 fold_convert (type, fd->loop.n1),
3514 fold_convert (type, fd->loop.n2));
3515 if (fd->collapse == 1
3516 && TYPE_UNSIGNED (type)
3517 && (t == NULL_TREE || !integer_onep (t)))
3518 {
3519 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3520 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3521 true, GSI_SAME_STMT);
3522 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3523 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3524 true, GSI_SAME_STMT);
3525 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3526 NULL_TREE, NULL_TREE);
3527 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3528 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3529 expand_omp_regimplify_p, NULL, NULL)
3530 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3531 expand_omp_regimplify_p, NULL, NULL))
3532 {
3533 gsi = gsi_for_stmt (cond_stmt);
3534 gimple_regimplify_operands (cond_stmt, &gsi);
3535 }
3536 ep = split_block (entry_bb, cond_stmt);
3537 ep->flags = EDGE_TRUE_VALUE;
3538 entry_bb = ep->dest;
357067f2 3539 ep->probability = profile_probability::very_likely ();
629b3d75 3540 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 3541 ep->probability = profile_probability::very_unlikely ();
629b3d75
MJ
3542 if (gimple_in_ssa_p (cfun))
3543 {
3544 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3545 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3546 !gsi_end_p (gpi); gsi_next (&gpi))
3547 {
3548 gphi *phi = gpi.phi ();
3549 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3550 ep, UNKNOWN_LOCATION);
3551 }
3552 }
3553 gsi = gsi_last_bb (entry_bb);
3554 }
3555
28567c40
JJ
3556 if (fd->have_reductemp)
3557 {
3558 tree t1 = build_int_cst (long_integer_type_node, 0);
3559 tree t2 = build_int_cst (long_integer_type_node, 1);
3560 tree t3 = build_int_cstu (long_integer_type_node,
3561 (HOST_WIDE_INT_1U << 31) + 1);
3562 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
3563 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
3564 reductions = OMP_CLAUSE_DECL (clauses);
3565 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
3566 gimple *g = SSA_NAME_DEF_STMT (reductions);
3567 reductions = gimple_assign_rhs1 (g);
3568 OMP_CLAUSE_DECL (clauses) = reductions;
3569 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
3570 tree t
3571 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
3572 9, t1, t2, t2, t3, t1, null_pointer_node,
3573 null_pointer_node, reductions, null_pointer_node);
3574 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
3575 true, GSI_SAME_STMT);
3576 gsi_remove (&gsi2, true);
3577 release_ssa_name (gimple_assign_lhs (g));
3578 }
629b3d75
MJ
3579 switch (gimple_omp_for_kind (fd->for_stmt))
3580 {
3581 case GF_OMP_FOR_KIND_FOR:
3582 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3583 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3584 break;
3585 case GF_OMP_FOR_KIND_DISTRIBUTE:
3586 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3587 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3588 break;
3589 default:
3590 gcc_unreachable ();
3591 }
3592 nthreads = build_call_expr (nthreads, 0);
3593 nthreads = fold_convert (itype, nthreads);
3594 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3595 true, GSI_SAME_STMT);
3596 threadid = build_call_expr (threadid, 0);
3597 threadid = fold_convert (itype, threadid);
3598 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3599 true, GSI_SAME_STMT);
3600
3601 n1 = fd->loop.n1;
3602 n2 = fd->loop.n2;
3603 step = fd->loop.step;
3604 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3605 {
3606 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3607 OMP_CLAUSE__LOOPTEMP_);
3608 gcc_assert (innerc);
3609 n1 = OMP_CLAUSE_DECL (innerc);
3610 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3611 OMP_CLAUSE__LOOPTEMP_);
3612 gcc_assert (innerc);
3613 n2 = OMP_CLAUSE_DECL (innerc);
3614 }
3615 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3616 true, NULL_TREE, true, GSI_SAME_STMT);
3617 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3618 true, NULL_TREE, true, GSI_SAME_STMT);
3619 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3620 true, NULL_TREE, true, GSI_SAME_STMT);
3621
3622 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3623 t = fold_build2 (PLUS_EXPR, itype, step, t);
3624 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3625 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3626 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3627 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3628 fold_build1 (NEGATE_EXPR, itype, t),
3629 fold_build1 (NEGATE_EXPR, itype, step));
3630 else
3631 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3632 t = fold_convert (itype, t);
3633 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3634
3635 q = create_tmp_reg (itype, "q");
3636 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3637 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3638 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3639
3640 tt = create_tmp_reg (itype, "tt");
3641 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3642 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3643 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3644
3645 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3646 gcond *cond_stmt = gimple_build_cond_empty (t);
3647 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3648
3649 second_bb = split_block (entry_bb, cond_stmt)->dest;
65f4b875 3650 gsi = gsi_last_nondebug_bb (second_bb);
629b3d75
MJ
3651 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3652
3653 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3654 GSI_SAME_STMT);
3655 gassign *assign_stmt
3656 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3657 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3658
3659 third_bb = split_block (second_bb, assign_stmt)->dest;
65f4b875 3660 gsi = gsi_last_nondebug_bb (third_bb);
629b3d75
MJ
3661 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3662
3663 t = build2 (MULT_EXPR, itype, q, threadid);
3664 t = build2 (PLUS_EXPR, itype, t, tt);
3665 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3666
3667 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3668 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3669
3670 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3671 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3672
3673 /* Remove the GIMPLE_OMP_FOR statement. */
3674 gsi_remove (&gsi, true);
3675
3676 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3677 gsi = gsi_start_bb (seq_start_bb);
3678
3679 tree startvar = fd->loop.v;
3680 tree endvar = NULL_TREE;
3681
3682 if (gimple_omp_for_combined_p (fd->for_stmt))
3683 {
3684 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3685 ? gimple_omp_parallel_clauses (inner_stmt)
3686 : gimple_omp_for_clauses (inner_stmt);
3687 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3688 gcc_assert (innerc);
3689 startvar = OMP_CLAUSE_DECL (innerc);
3690 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3691 OMP_CLAUSE__LOOPTEMP_);
3692 gcc_assert (innerc);
3693 endvar = OMP_CLAUSE_DECL (innerc);
3694 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3695 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3696 {
3697 int i;
3698 for (i = 1; i < fd->collapse; i++)
3699 {
3700 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3701 OMP_CLAUSE__LOOPTEMP_);
3702 gcc_assert (innerc);
3703 }
3704 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3705 OMP_CLAUSE__LOOPTEMP_);
3706 if (innerc)
3707 {
3708 /* If needed (distribute parallel for with lastprivate),
3709 propagate down the total number of iterations. */
3710 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3711 fd->loop.n2);
3712 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3713 GSI_CONTINUE_LINKING);
3714 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3715 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3716 }
3717 }
3718 }
3719 t = fold_convert (itype, s0);
3720 t = fold_build2 (MULT_EXPR, itype, t, step);
3721 if (POINTER_TYPE_P (type))
bde84d51
RB
3722 {
3723 t = fold_build_pointer_plus (n1, t);
3724 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3725 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3726 t = fold_convert (signed_type_for (type), t);
3727 }
629b3d75
MJ
3728 else
3729 t = fold_build2 (PLUS_EXPR, type, t, n1);
3730 t = fold_convert (TREE_TYPE (startvar), t);
3731 t = force_gimple_operand_gsi (&gsi, t,
3732 DECL_P (startvar)
3733 && TREE_ADDRESSABLE (startvar),
3734 NULL_TREE, false, GSI_CONTINUE_LINKING);
3735 assign_stmt = gimple_build_assign (startvar, t);
3736 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3737
3738 t = fold_convert (itype, e0);
3739 t = fold_build2 (MULT_EXPR, itype, t, step);
3740 if (POINTER_TYPE_P (type))
bde84d51
RB
3741 {
3742 t = fold_build_pointer_plus (n1, t);
3743 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
3744 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
3745 t = fold_convert (signed_type_for (type), t);
3746 }
629b3d75
MJ
3747 else
3748 t = fold_build2 (PLUS_EXPR, type, t, n1);
3749 t = fold_convert (TREE_TYPE (startvar), t);
3750 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3751 false, GSI_CONTINUE_LINKING);
3752 if (endvar)
3753 {
3754 assign_stmt = gimple_build_assign (endvar, e);
3755 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3756 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3757 assign_stmt = gimple_build_assign (fd->loop.v, e);
3758 else
3759 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3760 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3761 }
3762 /* Handle linear clause adjustments. */
3763 tree itercnt = NULL_TREE;
3764 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3765 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3766 c; c = OMP_CLAUSE_CHAIN (c))
3767 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3768 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3769 {
3770 tree d = OMP_CLAUSE_DECL (c);
3771 bool is_ref = omp_is_reference (d);
3772 tree t = d, a, dest;
3773 if (is_ref)
3774 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3775 if (itercnt == NULL_TREE)
3776 {
3777 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3778 {
3779 itercnt = fold_build2 (MINUS_EXPR, itype,
3780 fold_convert (itype, n1),
3781 fold_convert (itype, fd->loop.n1));
3782 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3783 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3784 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3785 NULL_TREE, false,
3786 GSI_CONTINUE_LINKING);
3787 }
3788 else
3789 itercnt = s0;
3790 }
3791 tree type = TREE_TYPE (t);
3792 if (POINTER_TYPE_P (type))
3793 type = sizetype;
3794 a = fold_build2 (MULT_EXPR, type,
3795 fold_convert (type, itercnt),
3796 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3797 dest = unshare_expr (t);
3798 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3799 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3800 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3801 false, GSI_CONTINUE_LINKING);
3802 assign_stmt = gimple_build_assign (dest, t);
3803 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3804 }
3805 if (fd->collapse > 1)
3806 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3807
3808 if (!broken_loop)
3809 {
3810 /* The code controlling the sequential loop replaces the
3811 GIMPLE_OMP_CONTINUE. */
65f4b875 3812 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
3813 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3814 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3815 vmain = gimple_omp_continue_control_use (cont_stmt);
3816 vback = gimple_omp_continue_control_def (cont_stmt);
3817
3818 if (!gimple_omp_for_combined_p (fd->for_stmt))
3819 {
3820 if (POINTER_TYPE_P (type))
3821 t = fold_build_pointer_plus (vmain, step);
3822 else
3823 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3824 t = force_gimple_operand_gsi (&gsi, t,
3825 DECL_P (vback)
3826 && TREE_ADDRESSABLE (vback),
3827 NULL_TREE, true, GSI_SAME_STMT);
3828 assign_stmt = gimple_build_assign (vback, t);
3829 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3830
3831 t = build2 (fd->loop.cond_code, boolean_type_node,
3832 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3833 ? t : vback, e);
3834 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3835 }
3836
3837 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3838 gsi_remove (&gsi, true);
3839
3840 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3841 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3842 }
3843
3844 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 3845 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
3846 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3847 {
3848 t = gimple_omp_return_lhs (gsi_stmt (gsi));
28567c40
JJ
3849 if (fd->have_reductemp)
3850 {
3851 tree fn;
3852 if (t)
3853 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3854 else
3855 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3856 gcall *g = gimple_build_call (fn, 0);
3857 if (t)
3858 {
3859 gimple_call_set_lhs (g, t);
3860 gsi_insert_after (&gsi, gimple_build_assign (reductions,
3861 NOP_EXPR, t),
3862 GSI_SAME_STMT);
3863 }
3864 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
3865 }
3866 else
3867 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
3868 }
3869 gsi_remove (&gsi, true);
3870
3871 /* Connect all the blocks. */
3872 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
357067f2 3873 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
629b3d75
MJ
3874 ep = find_edge (entry_bb, second_bb);
3875 ep->flags = EDGE_TRUE_VALUE;
357067f2 3876 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
629b3d75
MJ
3877 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3878 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3879
3880 if (!broken_loop)
3881 {
3882 ep = find_edge (cont_bb, body_bb);
3883 if (ep == NULL)
3884 {
3885 ep = BRANCH_EDGE (cont_bb);
3886 gcc_assert (single_succ (ep->dest) == body_bb);
3887 }
3888 if (gimple_omp_for_combined_p (fd->for_stmt))
3889 {
3890 remove_edge (ep);
3891 ep = NULL;
3892 }
3893 else if (fd->collapse > 1)
3894 {
3895 remove_edge (ep);
3896 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3897 }
3898 else
3899 ep->flags = EDGE_TRUE_VALUE;
3900 find_edge (cont_bb, fin_bb)->flags
3901 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3902 }
3903
3904 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3905 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3906 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3907
3908 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3909 recompute_dominator (CDI_DOMINATORS, body_bb));
3910 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3911 recompute_dominator (CDI_DOMINATORS, fin_bb));
3912
3913 struct loop *loop = body_bb->loop_father;
3914 if (loop != entry_bb->loop_father)
3915 {
3916 gcc_assert (broken_loop || loop->header == body_bb);
3917 gcc_assert (broken_loop
3918 || loop->latch == region->cont
3919 || single_pred (loop->latch) == region->cont);
3920 return;
3921 }
3922
3923 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3924 {
3925 loop = alloc_loop ();
3926 loop->header = body_bb;
3927 if (collapse_bb == NULL)
3928 loop->latch = cont_bb;
3929 add_loop (loop, body_bb->loop_father);
3930 }
3931}
3932
3933/* Return phi in E->DEST with ARG on edge E. */
3934
3935static gphi *
3936find_phi_with_arg_on_edge (tree arg, edge e)
3937{
3938 basic_block bb = e->dest;
3939
3940 for (gphi_iterator gpi = gsi_start_phis (bb);
3941 !gsi_end_p (gpi);
3942 gsi_next (&gpi))
3943 {
3944 gphi *phi = gpi.phi ();
3945 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3946 return phi;
3947 }
3948
3949 return NULL;
3950}
3951
3952/* A subroutine of expand_omp_for. Generate code for a parallel
3953 loop with static schedule and a specified chunk size. Given
3954 parameters:
3955
3956 for (V = N1; V cond N2; V += STEP) BODY;
3957
3958 where COND is "<" or ">", we generate pseudocode
3959
3960 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3961 if (cond is <)
3962 adj = STEP - 1;
3963 else
3964 adj = STEP + 1;
3965 if ((__typeof (V)) -1 > 0 && cond is >)
3966 n = -(adj + N2 - N1) / -STEP;
3967 else
3968 n = (adj + N2 - N1) / STEP;
3969 trip = 0;
3970 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3971 here so that V is defined
3972 if the loop is not entered
3973 L0:
3974 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 3975 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
3976 if (s0 < n) goto L1; else goto L4;
3977 L1:
3978 V = s0 * STEP + N1;
3979 e = e0 * STEP + N1;
3980 L2:
3981 BODY;
3982 V += STEP;
3983 if (V cond e) goto L2; else goto L3;
3984 L3:
3985 trip += 1;
3986 goto L0;
3987 L4:
3988*/
3989
3990static void
3991expand_omp_for_static_chunk (struct omp_region *region,
3992 struct omp_for_data *fd, gimple *inner_stmt)
3993{
3994 tree n, s0, e0, e, t;
3995 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3996 tree type, itype, vmain, vback, vextra;
3997 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3998 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3999 gimple_stmt_iterator gsi;
4000 edge se;
4001 bool broken_loop = region->cont == NULL;
4002 tree *counts = NULL;
4003 tree n1, n2, step;
28567c40 4004 tree reductions = NULL_TREE;
629b3d75
MJ
4005
4006 itype = type = TREE_TYPE (fd->loop.v);
4007 if (POINTER_TYPE_P (type))
4008 itype = signed_type_for (type);
4009
4010 entry_bb = region->entry;
4011 se = split_block (entry_bb, last_stmt (entry_bb));
4012 entry_bb = se->src;
4013 iter_part_bb = se->dest;
4014 cont_bb = region->cont;
4015 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
4016 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
4017 gcc_assert (broken_loop
4018 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
4019 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
4020 body_bb = single_succ (seq_start_bb);
4021 if (!broken_loop)
4022 {
4023 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
4024 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
4025 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4026 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
4027 }
4028 exit_bb = region->exit;
4029
4030 /* Trip and adjustment setup goes in ENTRY_BB. */
65f4b875 4031 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
4032 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4033
4034 if (fd->collapse > 1)
4035 {
4036 int first_zero_iter = -1, dummy = -1;
4037 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
4038
4039 counts = XALLOCAVEC (tree, fd->collapse);
4040 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4041 fin_bb, first_zero_iter,
4042 dummy_bb, dummy, l2_dom_bb);
4043 t = NULL_TREE;
4044 }
4045 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
4046 t = integer_one_node;
4047 else
4048 t = fold_binary (fd->loop.cond_code, boolean_type_node,
4049 fold_convert (type, fd->loop.n1),
4050 fold_convert (type, fd->loop.n2));
4051 if (fd->collapse == 1
4052 && TYPE_UNSIGNED (type)
4053 && (t == NULL_TREE || !integer_onep (t)))
4054 {
4055 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
4056 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
4057 true, GSI_SAME_STMT);
4058 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
4059 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
4060 true, GSI_SAME_STMT);
4061 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
4062 NULL_TREE, NULL_TREE);
4063 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
4064 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
4065 expand_omp_regimplify_p, NULL, NULL)
4066 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
4067 expand_omp_regimplify_p, NULL, NULL))
4068 {
4069 gsi = gsi_for_stmt (cond_stmt);
4070 gimple_regimplify_operands (cond_stmt, &gsi);
4071 }
4072 se = split_block (entry_bb, cond_stmt);
4073 se->flags = EDGE_TRUE_VALUE;
4074 entry_bb = se->dest;
357067f2 4075 se->probability = profile_probability::very_likely ();
629b3d75 4076 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
357067f2 4077 se->probability = profile_probability::very_unlikely ();
629b3d75
MJ
4078 if (gimple_in_ssa_p (cfun))
4079 {
4080 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
4081 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
4082 !gsi_end_p (gpi); gsi_next (&gpi))
4083 {
4084 gphi *phi = gpi.phi ();
4085 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
4086 se, UNKNOWN_LOCATION);
4087 }
4088 }
4089 gsi = gsi_last_bb (entry_bb);
4090 }
4091
28567c40
JJ
4092 if (fd->have_reductemp)
4093 {
4094 tree t1 = build_int_cst (long_integer_type_node, 0);
4095 tree t2 = build_int_cst (long_integer_type_node, 1);
4096 tree t3 = build_int_cstu (long_integer_type_node,
4097 (HOST_WIDE_INT_1U << 31) + 1);
4098 tree clauses = gimple_omp_for_clauses (fd->for_stmt);
4099 clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
4100 reductions = OMP_CLAUSE_DECL (clauses);
4101 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
4102 gimple *g = SSA_NAME_DEF_STMT (reductions);
4103 reductions = gimple_assign_rhs1 (g);
4104 OMP_CLAUSE_DECL (clauses) = reductions;
4105 gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
4106 tree t
4107 = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
4108 9, t1, t2, t2, t3, t1, null_pointer_node,
4109 null_pointer_node, reductions, null_pointer_node);
4110 force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
4111 true, GSI_SAME_STMT);
4112 gsi_remove (&gsi2, true);
4113 release_ssa_name (gimple_assign_lhs (g));
4114 }
629b3d75
MJ
4115 switch (gimple_omp_for_kind (fd->for_stmt))
4116 {
4117 case GF_OMP_FOR_KIND_FOR:
4118 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4119 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
4120 break;
4121 case GF_OMP_FOR_KIND_DISTRIBUTE:
4122 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
4123 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
4124 break;
4125 default:
4126 gcc_unreachable ();
4127 }
4128 nthreads = build_call_expr (nthreads, 0);
4129 nthreads = fold_convert (itype, nthreads);
4130 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
4131 true, GSI_SAME_STMT);
4132 threadid = build_call_expr (threadid, 0);
4133 threadid = fold_convert (itype, threadid);
4134 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
4135 true, GSI_SAME_STMT);
4136
4137 n1 = fd->loop.n1;
4138 n2 = fd->loop.n2;
4139 step = fd->loop.step;
4140 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4141 {
4142 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4143 OMP_CLAUSE__LOOPTEMP_);
4144 gcc_assert (innerc);
4145 n1 = OMP_CLAUSE_DECL (innerc);
4146 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4147 OMP_CLAUSE__LOOPTEMP_);
4148 gcc_assert (innerc);
4149 n2 = OMP_CLAUSE_DECL (innerc);
4150 }
4151 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
4152 true, NULL_TREE, true, GSI_SAME_STMT);
4153 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
4154 true, NULL_TREE, true, GSI_SAME_STMT);
4155 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
4156 true, NULL_TREE, true, GSI_SAME_STMT);
4157 tree chunk_size = fold_convert (itype, fd->chunk_size);
4158 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
4159 chunk_size
4160 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
4161 GSI_SAME_STMT);
4162
4163 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
4164 t = fold_build2 (PLUS_EXPR, itype, step, t);
4165 t = fold_build2 (PLUS_EXPR, itype, t, n2);
4166 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
4167 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
4168 t = fold_build2 (TRUNC_DIV_EXPR, itype,
4169 fold_build1 (NEGATE_EXPR, itype, t),
4170 fold_build1 (NEGATE_EXPR, itype, step));
4171 else
4172 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
4173 t = fold_convert (itype, t);
4174 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4175 true, GSI_SAME_STMT);
4176
4177 trip_var = create_tmp_reg (itype, ".trip");
4178 if (gimple_in_ssa_p (cfun))
4179 {
4180 trip_init = make_ssa_name (trip_var);
4181 trip_main = make_ssa_name (trip_var);
4182 trip_back = make_ssa_name (trip_var);
4183 }
4184 else
4185 {
4186 trip_init = trip_var;
4187 trip_main = trip_var;
4188 trip_back = trip_var;
4189 }
4190
4191 gassign *assign_stmt
4192 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
4193 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4194
4195 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
4196 t = fold_build2 (MULT_EXPR, itype, t, step);
4197 if (POINTER_TYPE_P (type))
4198 t = fold_build_pointer_plus (n1, t);
4199 else
4200 t = fold_build2 (PLUS_EXPR, type, t, n1);
4201 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4202 true, GSI_SAME_STMT);
4203
4204 /* Remove the GIMPLE_OMP_FOR. */
4205 gsi_remove (&gsi, true);
4206
4207 gimple_stmt_iterator gsif = gsi;
4208
4209 /* Iteration space partitioning goes in ITER_PART_BB. */
4210 gsi = gsi_last_bb (iter_part_bb);
4211
4212 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
4213 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
4214 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
4215 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4216 false, GSI_CONTINUE_LINKING);
4217
4218 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
4219 t = fold_build2 (MIN_EXPR, itype, t, n);
4220 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4221 false, GSI_CONTINUE_LINKING);
4222
4223 t = build2 (LT_EXPR, boolean_type_node, s0, n);
4224 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
4225
4226 /* Setup code for sequential iteration goes in SEQ_START_BB. */
4227 gsi = gsi_start_bb (seq_start_bb);
4228
4229 tree startvar = fd->loop.v;
4230 tree endvar = NULL_TREE;
4231
4232 if (gimple_omp_for_combined_p (fd->for_stmt))
4233 {
4234 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
4235 ? gimple_omp_parallel_clauses (inner_stmt)
4236 : gimple_omp_for_clauses (inner_stmt);
4237 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
4238 gcc_assert (innerc);
4239 startvar = OMP_CLAUSE_DECL (innerc);
4240 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4241 OMP_CLAUSE__LOOPTEMP_);
4242 gcc_assert (innerc);
4243 endvar = OMP_CLAUSE_DECL (innerc);
4244 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
4245 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
4246 {
4247 int i;
4248 for (i = 1; i < fd->collapse; i++)
4249 {
4250 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4251 OMP_CLAUSE__LOOPTEMP_);
4252 gcc_assert (innerc);
4253 }
4254 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4255 OMP_CLAUSE__LOOPTEMP_);
4256 if (innerc)
4257 {
4258 /* If needed (distribute parallel for with lastprivate),
4259 propagate down the total number of iterations. */
4260 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4261 fd->loop.n2);
4262 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4263 GSI_CONTINUE_LINKING);
4264 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4265 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4266 }
4267 }
4268 }
4269
4270 t = fold_convert (itype, s0);
4271 t = fold_build2 (MULT_EXPR, itype, t, step);
4272 if (POINTER_TYPE_P (type))
bde84d51
RB
4273 {
4274 t = fold_build_pointer_plus (n1, t);
4275 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4276 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4277 t = fold_convert (signed_type_for (type), t);
4278 }
629b3d75
MJ
4279 else
4280 t = fold_build2 (PLUS_EXPR, type, t, n1);
4281 t = fold_convert (TREE_TYPE (startvar), t);
4282 t = force_gimple_operand_gsi (&gsi, t,
4283 DECL_P (startvar)
4284 && TREE_ADDRESSABLE (startvar),
4285 NULL_TREE, false, GSI_CONTINUE_LINKING);
4286 assign_stmt = gimple_build_assign (startvar, t);
4287 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4288
4289 t = fold_convert (itype, e0);
4290 t = fold_build2 (MULT_EXPR, itype, t, step);
4291 if (POINTER_TYPE_P (type))
bde84d51
RB
4292 {
4293 t = fold_build_pointer_plus (n1, t);
4294 if (!POINTER_TYPE_P (TREE_TYPE (startvar))
4295 && TYPE_PRECISION (TREE_TYPE (startvar)) > TYPE_PRECISION (type))
4296 t = fold_convert (signed_type_for (type), t);
4297 }
629b3d75
MJ
4298 else
4299 t = fold_build2 (PLUS_EXPR, type, t, n1);
4300 t = fold_convert (TREE_TYPE (startvar), t);
4301 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4302 false, GSI_CONTINUE_LINKING);
4303 if (endvar)
4304 {
4305 assign_stmt = gimple_build_assign (endvar, e);
4306 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4307 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4308 assign_stmt = gimple_build_assign (fd->loop.v, e);
4309 else
4310 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4311 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4312 }
4313 /* Handle linear clause adjustments. */
4314 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4315 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4316 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4317 c; c = OMP_CLAUSE_CHAIN (c))
4318 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4319 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4320 {
4321 tree d = OMP_CLAUSE_DECL (c);
4322 bool is_ref = omp_is_reference (d);
4323 tree t = d, a, dest;
4324 if (is_ref)
4325 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4326 tree type = TREE_TYPE (t);
4327 if (POINTER_TYPE_P (type))
4328 type = sizetype;
4329 dest = unshare_expr (t);
4330 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4331 expand_omp_build_assign (&gsif, v, t);
4332 if (itercnt == NULL_TREE)
4333 {
4334 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4335 {
4336 itercntbias
4337 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4338 fold_convert (itype, fd->loop.n1));
4339 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4340 itercntbias, step);
4341 itercntbias
4342 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4343 NULL_TREE, true,
4344 GSI_SAME_STMT);
4345 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4346 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4347 NULL_TREE, false,
4348 GSI_CONTINUE_LINKING);
4349 }
4350 else
4351 itercnt = s0;
4352 }
4353 a = fold_build2 (MULT_EXPR, type,
4354 fold_convert (type, itercnt),
4355 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4356 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4357 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4358 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4359 false, GSI_CONTINUE_LINKING);
4360 assign_stmt = gimple_build_assign (dest, t);
4361 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4362 }
4363 if (fd->collapse > 1)
4364 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4365
4366 if (!broken_loop)
4367 {
4368 /* The code controlling the sequential loop goes in CONT_BB,
4369 replacing the GIMPLE_OMP_CONTINUE. */
65f4b875 4370 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4371 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4372 vmain = gimple_omp_continue_control_use (cont_stmt);
4373 vback = gimple_omp_continue_control_def (cont_stmt);
4374
4375 if (!gimple_omp_for_combined_p (fd->for_stmt))
4376 {
4377 if (POINTER_TYPE_P (type))
4378 t = fold_build_pointer_plus (vmain, step);
4379 else
4380 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4381 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4382 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4383 true, GSI_SAME_STMT);
4384 assign_stmt = gimple_build_assign (vback, t);
4385 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4386
4387 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4388 t = build2 (EQ_EXPR, boolean_type_node,
4389 build_int_cst (itype, 0),
4390 build_int_cst (itype, 1));
4391 else
4392 t = build2 (fd->loop.cond_code, boolean_type_node,
4393 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4394 ? t : vback, e);
4395 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4396 }
4397
4398 /* Remove GIMPLE_OMP_CONTINUE. */
4399 gsi_remove (&gsi, true);
4400
4401 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4402 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4403
4404 /* Trip update code goes into TRIP_UPDATE_BB. */
4405 gsi = gsi_start_bb (trip_update_bb);
4406
4407 t = build_int_cst (itype, 1);
4408 t = build2 (PLUS_EXPR, itype, trip_main, t);
4409 assign_stmt = gimple_build_assign (trip_back, t);
4410 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4411 }
4412
4413 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
65f4b875 4414 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4415 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4416 {
4417 t = gimple_omp_return_lhs (gsi_stmt (gsi));
28567c40
JJ
4418 if (fd->have_reductemp)
4419 {
4420 tree fn;
4421 if (t)
4422 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
4423 else
4424 fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
4425 gcall *g = gimple_build_call (fn, 0);
4426 if (t)
4427 {
4428 gimple_call_set_lhs (g, t);
4429 gsi_insert_after (&gsi, gimple_build_assign (reductions,
4430 NOP_EXPR, t),
4431 GSI_SAME_STMT);
4432 }
4433 gsi_insert_after (&gsi, g, GSI_SAME_STMT);
4434 }
4435 else
4436 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
629b3d75
MJ
4437 }
4438 gsi_remove (&gsi, true);
4439
4440 /* Connect the new blocks. */
4441 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4442 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4443
4444 if (!broken_loop)
4445 {
4446 se = find_edge (cont_bb, body_bb);
4447 if (se == NULL)
4448 {
4449 se = BRANCH_EDGE (cont_bb);
4450 gcc_assert (single_succ (se->dest) == body_bb);
4451 }
4452 if (gimple_omp_for_combined_p (fd->for_stmt))
4453 {
4454 remove_edge (se);
4455 se = NULL;
4456 }
4457 else if (fd->collapse > 1)
4458 {
4459 remove_edge (se);
4460 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4461 }
4462 else
4463 se->flags = EDGE_TRUE_VALUE;
4464 find_edge (cont_bb, trip_update_bb)->flags
4465 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4466
01914336
MJ
4467 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4468 iter_part_bb);
629b3d75
MJ
4469 }
4470
4471 if (gimple_in_ssa_p (cfun))
4472 {
4473 gphi_iterator psi;
4474 gphi *phi;
4475 edge re, ene;
4476 edge_var_map *vm;
4477 size_t i;
4478
4479 gcc_assert (fd->collapse == 1 && !broken_loop);
4480
4481 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4482 remove arguments of the phi nodes in fin_bb. We need to create
4483 appropriate phi nodes in iter_part_bb instead. */
4484 se = find_edge (iter_part_bb, fin_bb);
4485 re = single_succ_edge (trip_update_bb);
4486 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4487 ene = single_succ_edge (entry_bb);
4488
4489 psi = gsi_start_phis (fin_bb);
4490 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4491 gsi_next (&psi), ++i)
4492 {
4493 gphi *nphi;
620e594b 4494 location_t locus;
629b3d75
MJ
4495
4496 phi = psi.phi ();
d83cc5cc
TV
4497 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4498 redirect_edge_var_map_def (vm), 0))
4499 continue;
4500
629b3d75
MJ
4501 t = gimple_phi_result (phi);
4502 gcc_assert (t == redirect_edge_var_map_result (vm));
4503
4504 if (!single_pred_p (fin_bb))
4505 t = copy_ssa_name (t, phi);
4506
4507 nphi = create_phi_node (t, iter_part_bb);
4508
4509 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4510 locus = gimple_phi_arg_location_from_edge (phi, se);
4511
4512 /* A special case -- fd->loop.v is not yet computed in
4513 iter_part_bb, we need to use vextra instead. */
4514 if (t == fd->loop.v)
4515 t = vextra;
4516 add_phi_arg (nphi, t, ene, locus);
4517 locus = redirect_edge_var_map_location (vm);
4518 tree back_arg = redirect_edge_var_map_def (vm);
4519 add_phi_arg (nphi, back_arg, re, locus);
4520 edge ce = find_edge (cont_bb, body_bb);
4521 if (ce == NULL)
4522 {
4523 ce = BRANCH_EDGE (cont_bb);
4524 gcc_assert (single_succ (ce->dest) == body_bb);
4525 ce = single_succ_edge (ce->dest);
4526 }
4527 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4528 gcc_assert (inner_loop_phi != NULL);
4529 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4530 find_edge (seq_start_bb, body_bb), locus);
4531
4532 if (!single_pred_p (fin_bb))
4533 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4534 }
4535 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4536 redirect_edge_var_map_clear (re);
4537 if (single_pred_p (fin_bb))
4538 while (1)
4539 {
4540 psi = gsi_start_phis (fin_bb);
4541 if (gsi_end_p (psi))
4542 break;
4543 remove_phi_node (&psi, false);
4544 }
4545
4546 /* Make phi node for trip. */
4547 phi = create_phi_node (trip_main, iter_part_bb);
4548 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4549 UNKNOWN_LOCATION);
4550 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4551 UNKNOWN_LOCATION);
4552 }
4553
4554 if (!broken_loop)
4555 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4556 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4557 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4558 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4559 recompute_dominator (CDI_DOMINATORS, fin_bb));
4560 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4561 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4562 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4563 recompute_dominator (CDI_DOMINATORS, body_bb));
4564
4565 if (!broken_loop)
4566 {
4567 struct loop *loop = body_bb->loop_father;
4568 struct loop *trip_loop = alloc_loop ();
4569 trip_loop->header = iter_part_bb;
4570 trip_loop->latch = trip_update_bb;
4571 add_loop (trip_loop, iter_part_bb->loop_father);
4572
4573 if (loop != entry_bb->loop_father)
4574 {
4575 gcc_assert (loop->header == body_bb);
4576 gcc_assert (loop->latch == region->cont
4577 || single_pred (loop->latch) == region->cont);
4578 trip_loop->inner = loop;
4579 return;
4580 }
4581
4582 if (!gimple_omp_for_combined_p (fd->for_stmt))
4583 {
4584 loop = alloc_loop ();
4585 loop->header = body_bb;
4586 if (collapse_bb == NULL)
4587 loop->latch = cont_bb;
4588 add_loop (loop, trip_loop);
4589 }
4590 }
4591}
4592
629b3d75
MJ
4593/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4594 loop. Given parameters:
4595
4596 for (V = N1; V cond N2; V += STEP) BODY;
4597
4598 where COND is "<" or ">", we generate pseudocode
4599
4600 V = N1;
4601 goto L1;
4602 L0:
4603 BODY;
4604 V += STEP;
4605 L1:
4606 if (V cond N2) goto L0; else goto L2;
4607 L2:
4608
4609 For collapsed loops, given parameters:
4610 collapse(3)
4611 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4612 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4613 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4614 BODY;
4615
4616 we generate pseudocode
4617
4618 if (cond3 is <)
4619 adj = STEP3 - 1;
4620 else
4621 adj = STEP3 + 1;
4622 count3 = (adj + N32 - N31) / STEP3;
4623 if (cond2 is <)
4624 adj = STEP2 - 1;
4625 else
4626 adj = STEP2 + 1;
4627 count2 = (adj + N22 - N21) / STEP2;
4628 if (cond1 is <)
4629 adj = STEP1 - 1;
4630 else
4631 adj = STEP1 + 1;
4632 count1 = (adj + N12 - N11) / STEP1;
4633 count = count1 * count2 * count3;
4634 V = 0;
4635 V1 = N11;
4636 V2 = N21;
4637 V3 = N31;
4638 goto L1;
4639 L0:
4640 BODY;
4641 V += 1;
4642 V3 += STEP3;
4643 V2 += (V3 cond3 N32) ? 0 : STEP2;
4644 V3 = (V3 cond3 N32) ? V3 : N31;
4645 V1 += (V2 cond2 N22) ? 0 : STEP1;
4646 V2 = (V2 cond2 N22) ? V2 : N21;
4647 L1:
4648 if (V < count) goto L0; else goto L2;
4649 L2:
4650
4651 */
4652
4653static void
4654expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4655{
4656 tree type, t;
4657 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4658 gimple_stmt_iterator gsi;
4659 gimple *stmt;
4660 gcond *cond_stmt;
4661 bool broken_loop = region->cont == NULL;
4662 edge e, ne;
4663 tree *counts = NULL;
4664 int i;
4665 int safelen_int = INT_MAX;
4666 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4667 OMP_CLAUSE_SAFELEN);
4668 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4669 OMP_CLAUSE__SIMDUID_);
4670 tree n1, n2;
4671
4672 if (safelen)
4673 {
9d2f08ab 4674 poly_uint64 val;
629b3d75 4675 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
9d2f08ab 4676 if (!poly_int_tree_p (safelen, &val))
629b3d75 4677 safelen_int = 0;
9d2f08ab
RS
4678 else
4679 safelen_int = MIN (constant_lower_bound (val), INT_MAX);
629b3d75
MJ
4680 if (safelen_int == 1)
4681 safelen_int = 0;
4682 }
4683 type = TREE_TYPE (fd->loop.v);
4684 entry_bb = region->entry;
4685 cont_bb = region->cont;
4686 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4687 gcc_assert (broken_loop
4688 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4689 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4690 if (!broken_loop)
4691 {
4692 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4693 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4694 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4695 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4696 }
4697 else
4698 {
4699 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4700 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4701 l2_bb = single_succ (l1_bb);
4702 }
4703 exit_bb = region->exit;
4704 l2_dom_bb = NULL;
4705
65f4b875 4706 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
4707
4708 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4709 /* Not needed in SSA form right now. */
4710 gcc_assert (!gimple_in_ssa_p (cfun));
4711 if (fd->collapse > 1)
4712 {
4713 int first_zero_iter = -1, dummy = -1;
4714 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4715
4716 counts = XALLOCAVEC (tree, fd->collapse);
4717 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4718 zero_iter_bb, first_zero_iter,
4719 dummy_bb, dummy, l2_dom_bb);
4720 }
4721 if (l2_dom_bb == NULL)
4722 l2_dom_bb = l1_bb;
4723
4724 n1 = fd->loop.n1;
4725 n2 = fd->loop.n2;
4726 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4727 {
4728 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4729 OMP_CLAUSE__LOOPTEMP_);
4730 gcc_assert (innerc);
4731 n1 = OMP_CLAUSE_DECL (innerc);
4732 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4733 OMP_CLAUSE__LOOPTEMP_);
4734 gcc_assert (innerc);
4735 n2 = OMP_CLAUSE_DECL (innerc);
4736 }
4737 tree step = fd->loop.step;
4738
4cea8675
AM
4739 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4740 OMP_CLAUSE__SIMT_);
629b3d75
MJ
4741 if (is_simt)
4742 {
4743 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
4744 is_simt = safelen_int > 1;
4745 }
4746 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4747 if (is_simt)
4748 {
629b3d75
MJ
4749 simt_lane = create_tmp_var (unsigned_type_node);
4750 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4751 gimple_call_set_lhs (g, simt_lane);
4752 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4753 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4754 fold_convert (TREE_TYPE (step), simt_lane));
4755 n1 = fold_convert (type, n1);
4756 if (POINTER_TYPE_P (type))
4757 n1 = fold_build_pointer_plus (n1, offset);
4758 else
4759 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4760
4761 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4762 if (fd->collapse > 1)
4763 simt_maxlane = build_one_cst (unsigned_type_node);
4764 else if (safelen_int < omp_max_simt_vf ())
4765 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4766 tree vf
4767 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4768 unsigned_type_node, 0);
4769 if (simt_maxlane)
4770 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4771 vf = fold_convert (TREE_TYPE (step), vf);
4772 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4773 }
4774
4775 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4776 if (fd->collapse > 1)
4777 {
4778 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4779 {
4780 gsi_prev (&gsi);
4781 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4782 gsi_next (&gsi);
4783 }
4784 else
4785 for (i = 0; i < fd->collapse; i++)
4786 {
4787 tree itype = TREE_TYPE (fd->loops[i].v);
4788 if (POINTER_TYPE_P (itype))
4789 itype = signed_type_for (itype);
4790 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4791 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4792 }
4793 }
4794
4795 /* Remove the GIMPLE_OMP_FOR statement. */
4796 gsi_remove (&gsi, true);
4797
4798 if (!broken_loop)
4799 {
4800 /* Code to control the increment goes in the CONT_BB. */
65f4b875 4801 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
4802 stmt = gsi_stmt (gsi);
4803 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4804
4805 if (POINTER_TYPE_P (type))
4806 t = fold_build_pointer_plus (fd->loop.v, step);
4807 else
4808 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4809 expand_omp_build_assign (&gsi, fd->loop.v, t);
4810
4811 if (fd->collapse > 1)
4812 {
4813 i = fd->collapse - 1;
4814 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4815 {
4816 t = fold_convert (sizetype, fd->loops[i].step);
4817 t = fold_build_pointer_plus (fd->loops[i].v, t);
4818 }
4819 else
4820 {
4821 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4822 fd->loops[i].step);
4823 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4824 fd->loops[i].v, t);
4825 }
4826 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4827
4828 for (i = fd->collapse - 1; i > 0; i--)
4829 {
4830 tree itype = TREE_TYPE (fd->loops[i].v);
4831 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4832 if (POINTER_TYPE_P (itype2))
4833 itype2 = signed_type_for (itype2);
bcc6842b
JJ
4834 t = fold_convert (itype2, fd->loops[i - 1].step);
4835 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4836 GSI_SAME_STMT);
629b3d75
MJ
4837 t = build3 (COND_EXPR, itype2,
4838 build2 (fd->loops[i].cond_code, boolean_type_node,
4839 fd->loops[i].v,
4840 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 4841 build_int_cst (itype2, 0), t);
629b3d75
MJ
4842 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4843 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4844 else
4845 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4846 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4847
bcc6842b
JJ
4848 t = fold_convert (itype, fd->loops[i].n1);
4849 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4850 GSI_SAME_STMT);
629b3d75
MJ
4851 t = build3 (COND_EXPR, itype,
4852 build2 (fd->loops[i].cond_code, boolean_type_node,
4853 fd->loops[i].v,
4854 fold_convert (itype, fd->loops[i].n2)),
bcc6842b 4855 fd->loops[i].v, t);
629b3d75
MJ
4856 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4857 }
4858 }
4859
4860 /* Remove GIMPLE_OMP_CONTINUE. */
4861 gsi_remove (&gsi, true);
4862 }
4863
4864 /* Emit the condition in L1_BB. */
4865 gsi = gsi_start_bb (l1_bb);
4866
4867 t = fold_convert (type, n2);
4868 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4869 false, GSI_CONTINUE_LINKING);
4870 tree v = fd->loop.v;
4871 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4872 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4873 false, GSI_CONTINUE_LINKING);
4874 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4875 cond_stmt = gimple_build_cond_empty (t);
4876 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4877 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4878 NULL, NULL)
4879 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4880 NULL, NULL))
4881 {
4882 gsi = gsi_for_stmt (cond_stmt);
4883 gimple_regimplify_operands (cond_stmt, &gsi);
4884 }
4885
4886 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4887 if (is_simt)
4888 {
4889 gsi = gsi_start_bb (l2_bb);
4890 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4891 if (POINTER_TYPE_P (type))
4892 t = fold_build_pointer_plus (fd->loop.v, step);
4893 else
4894 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4895 expand_omp_build_assign (&gsi, fd->loop.v, t);
4896 }
4897
4898 /* Remove GIMPLE_OMP_RETURN. */
65f4b875 4899 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
4900 gsi_remove (&gsi, true);
4901
4902 /* Connect the new blocks. */
4903 remove_edge (FALLTHRU_EDGE (entry_bb));
4904
4905 if (!broken_loop)
4906 {
4907 remove_edge (BRANCH_EDGE (entry_bb));
4908 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4909
4910 e = BRANCH_EDGE (l1_bb);
4911 ne = FALLTHRU_EDGE (l1_bb);
4912 e->flags = EDGE_TRUE_VALUE;
4913 }
4914 else
4915 {
4916 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4917
4918 ne = single_succ_edge (l1_bb);
4919 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4920
4921 }
4922 ne->flags = EDGE_FALSE_VALUE;
357067f2
JH
4923 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4924 ne->probability = e->probability.invert ();
629b3d75
MJ
4925
4926 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4927 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4928
4929 if (simt_maxlane)
4930 {
4931 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4932 NULL_TREE, NULL_TREE);
4933 gsi = gsi_last_bb (entry_bb);
4934 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4935 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4936 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
357067f2
JH
4937 FALLTHRU_EDGE (entry_bb)->probability
4938 = profile_probability::guessed_always ().apply_scale (7, 8);
4939 BRANCH_EDGE (entry_bb)->probability
4940 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
629b3d75
MJ
4941 l2_dom_bb = entry_bb;
4942 }
4943 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4944
4945 if (!broken_loop)
4946 {
4947 struct loop *loop = alloc_loop ();
4948 loop->header = l1_bb;
4949 loop->latch = cont_bb;
4950 add_loop (loop, l1_bb->loop_father);
4951 loop->safelen = safelen_int;
4952 if (simduid)
4953 {
4954 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4955 cfun->has_simduid_loops = true;
4956 }
4957 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4958 the loop. */
4959 if ((flag_tree_loop_vectorize
26d476cd 4960 || !global_options_set.x_flag_tree_loop_vectorize)
629b3d75
MJ
4961 && flag_tree_loop_optimize
4962 && loop->safelen > 1)
4963 {
4964 loop->force_vectorize = true;
4965 cfun->has_force_vectorize_loops = true;
4966 }
4967 }
4968 else if (simduid)
4969 cfun->has_simduid_loops = true;
4970}
4971
4972/* Taskloop construct is represented after gimplification with
4973 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4974 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4975 which should just compute all the needed loop temporaries
4976 for GIMPLE_OMP_TASK. */
4977
4978static void
4979expand_omp_taskloop_for_outer (struct omp_region *region,
4980 struct omp_for_data *fd,
4981 gimple *inner_stmt)
4982{
4983 tree type, bias = NULL_TREE;
4984 basic_block entry_bb, cont_bb, exit_bb;
4985 gimple_stmt_iterator gsi;
4986 gassign *assign_stmt;
4987 tree *counts = NULL;
4988 int i;
4989
4990 gcc_assert (inner_stmt);
4991 gcc_assert (region->cont);
4992 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4993 && gimple_omp_task_taskloop_p (inner_stmt));
4994 type = TREE_TYPE (fd->loop.v);
4995
4996 /* See if we need to bias by LLONG_MIN. */
4997 if (fd->iter_type == long_long_unsigned_type_node
4998 && TREE_CODE (type) == INTEGER_TYPE
4999 && !TYPE_UNSIGNED (type))
5000 {
5001 tree n1, n2;
5002
5003 if (fd->loop.cond_code == LT_EXPR)
5004 {
5005 n1 = fd->loop.n1;
5006 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5007 }
5008 else
5009 {
5010 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5011 n2 = fd->loop.n1;
5012 }
5013 if (TREE_CODE (n1) != INTEGER_CST
5014 || TREE_CODE (n2) != INTEGER_CST
5015 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5016 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5017 }
5018
5019 entry_bb = region->entry;
5020 cont_bb = region->cont;
5021 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5022 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
5023 exit_bb = region->exit;
5024
65f4b875 5025 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5026 gimple *for_stmt = gsi_stmt (gsi);
5027 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
5028 if (fd->collapse > 1)
5029 {
5030 int first_zero_iter = -1, dummy = -1;
5031 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
5032
5033 counts = XALLOCAVEC (tree, fd->collapse);
5034 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5035 zero_iter_bb, first_zero_iter,
5036 dummy_bb, dummy, l2_dom_bb);
5037
5038 if (zero_iter_bb)
5039 {
5040 /* Some counts[i] vars might be uninitialized if
5041 some loop has zero iterations. But the body shouldn't
5042 be executed in that case, so just avoid uninit warnings. */
5043 for (i = first_zero_iter; i < fd->collapse; i++)
5044 if (SSA_VAR_P (counts[i]))
5045 TREE_NO_WARNING (counts[i]) = 1;
5046 gsi_prev (&gsi);
5047 edge e = split_block (entry_bb, gsi_stmt (gsi));
5048 entry_bb = e->dest;
5049 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
5050 gsi = gsi_last_bb (entry_bb);
5051 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
5052 get_immediate_dominator (CDI_DOMINATORS,
5053 zero_iter_bb));
5054 }
5055 }
5056
5057 tree t0, t1;
5058 t1 = fd->loop.n2;
5059 t0 = fd->loop.n1;
5060 if (POINTER_TYPE_P (TREE_TYPE (t0))
5061 && TYPE_PRECISION (TREE_TYPE (t0))
5062 != TYPE_PRECISION (fd->iter_type))
5063 {
5064 /* Avoid casting pointers to integer of a different size. */
5065 tree itype = signed_type_for (type);
5066 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
5067 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
5068 }
5069 else
5070 {
5071 t1 = fold_convert (fd->iter_type, t1);
5072 t0 = fold_convert (fd->iter_type, t0);
5073 }
5074 if (bias)
5075 {
5076 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
5077 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
5078 }
5079
5080 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
5081 OMP_CLAUSE__LOOPTEMP_);
5082 gcc_assert (innerc);
5083 tree startvar = OMP_CLAUSE_DECL (innerc);
5084 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5085 gcc_assert (innerc);
5086 tree endvar = OMP_CLAUSE_DECL (innerc);
5087 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
5088 {
5089 gcc_assert (innerc);
5090 for (i = 1; i < fd->collapse; i++)
5091 {
5092 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5093 OMP_CLAUSE__LOOPTEMP_);
5094 gcc_assert (innerc);
5095 }
5096 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5097 OMP_CLAUSE__LOOPTEMP_);
5098 if (innerc)
5099 {
5100 /* If needed (inner taskloop has lastprivate clause), propagate
5101 down the total number of iterations. */
5102 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5103 NULL_TREE, false,
5104 GSI_CONTINUE_LINKING);
5105 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5107 }
5108 }
5109
5110 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5111 GSI_CONTINUE_LINKING);
5112 assign_stmt = gimple_build_assign (startvar, t0);
5113 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5114
5115 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5116 GSI_CONTINUE_LINKING);
5117 assign_stmt = gimple_build_assign (endvar, t1);
5118 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5119 if (fd->collapse > 1)
5120 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5121
5122 /* Remove the GIMPLE_OMP_FOR statement. */
5123 gsi = gsi_for_stmt (for_stmt);
5124 gsi_remove (&gsi, true);
5125
65f4b875 5126 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5127 gsi_remove (&gsi, true);
5128
65f4b875 5129 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5130 gsi_remove (&gsi, true);
5131
357067f2 5132 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75 5133 remove_edge (BRANCH_EDGE (entry_bb));
357067f2 5134 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
629b3d75
MJ
5135 remove_edge (BRANCH_EDGE (cont_bb));
5136 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5137 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5138 recompute_dominator (CDI_DOMINATORS, region->entry));
5139}
5140
5141/* Taskloop construct is represented after gimplification with
5142 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5143 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5144 GOMP_taskloop{,_ull} function arranges for each task to be given just
5145 a single range of iterations. */
5146
5147static void
5148expand_omp_taskloop_for_inner (struct omp_region *region,
5149 struct omp_for_data *fd,
5150 gimple *inner_stmt)
5151{
5152 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5153 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5154 basic_block fin_bb;
5155 gimple_stmt_iterator gsi;
5156 edge ep;
5157 bool broken_loop = region->cont == NULL;
5158 tree *counts = NULL;
5159 tree n1, n2, step;
5160
5161 itype = type = TREE_TYPE (fd->loop.v);
5162 if (POINTER_TYPE_P (type))
5163 itype = signed_type_for (type);
5164
5165 /* See if we need to bias by LLONG_MIN. */
5166 if (fd->iter_type == long_long_unsigned_type_node
5167 && TREE_CODE (type) == INTEGER_TYPE
5168 && !TYPE_UNSIGNED (type))
5169 {
5170 tree n1, n2;
5171
5172 if (fd->loop.cond_code == LT_EXPR)
5173 {
5174 n1 = fd->loop.n1;
5175 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5176 }
5177 else
5178 {
5179 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5180 n2 = fd->loop.n1;
5181 }
5182 if (TREE_CODE (n1) != INTEGER_CST
5183 || TREE_CODE (n2) != INTEGER_CST
5184 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5185 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5186 }
5187
5188 entry_bb = region->entry;
5189 cont_bb = region->cont;
5190 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5191 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5192 gcc_assert (broken_loop
5193 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5194 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5195 if (!broken_loop)
5196 {
5197 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5198 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5199 }
5200 exit_bb = region->exit;
5201
5202 /* Iteration space partitioning goes in ENTRY_BB. */
65f4b875 5203 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5204 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5205
5206 if (fd->collapse > 1)
5207 {
5208 int first_zero_iter = -1, dummy = -1;
5209 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5210
5211 counts = XALLOCAVEC (tree, fd->collapse);
5212 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5213 fin_bb, first_zero_iter,
5214 dummy_bb, dummy, l2_dom_bb);
5215 t = NULL_TREE;
5216 }
5217 else
5218 t = integer_one_node;
5219
5220 step = fd->loop.step;
5221 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5222 OMP_CLAUSE__LOOPTEMP_);
5223 gcc_assert (innerc);
5224 n1 = OMP_CLAUSE_DECL (innerc);
5225 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5226 gcc_assert (innerc);
5227 n2 = OMP_CLAUSE_DECL (innerc);
5228 if (bias)
5229 {
5230 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5231 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5232 }
5233 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5234 true, NULL_TREE, true, GSI_SAME_STMT);
5235 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5236 true, NULL_TREE, true, GSI_SAME_STMT);
5237 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5238 true, NULL_TREE, true, GSI_SAME_STMT);
5239
5240 tree startvar = fd->loop.v;
5241 tree endvar = NULL_TREE;
5242
5243 if (gimple_omp_for_combined_p (fd->for_stmt))
5244 {
5245 tree clauses = gimple_omp_for_clauses (inner_stmt);
5246 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5247 gcc_assert (innerc);
5248 startvar = OMP_CLAUSE_DECL (innerc);
5249 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5250 OMP_CLAUSE__LOOPTEMP_);
5251 gcc_assert (innerc);
5252 endvar = OMP_CLAUSE_DECL (innerc);
5253 }
5254 t = fold_convert (TREE_TYPE (startvar), n1);
5255 t = force_gimple_operand_gsi (&gsi, t,
5256 DECL_P (startvar)
5257 && TREE_ADDRESSABLE (startvar),
5258 NULL_TREE, false, GSI_CONTINUE_LINKING);
5259 gimple *assign_stmt = gimple_build_assign (startvar, t);
5260 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5261
5262 t = fold_convert (TREE_TYPE (startvar), n2);
5263 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5264 false, GSI_CONTINUE_LINKING);
5265 if (endvar)
5266 {
5267 assign_stmt = gimple_build_assign (endvar, e);
5268 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5269 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5270 assign_stmt = gimple_build_assign (fd->loop.v, e);
5271 else
5272 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5273 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5274 }
5275 if (fd->collapse > 1)
5276 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5277
5278 if (!broken_loop)
5279 {
5280 /* The code controlling the sequential loop replaces the
5281 GIMPLE_OMP_CONTINUE. */
65f4b875 5282 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5283 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5284 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5285 vmain = gimple_omp_continue_control_use (cont_stmt);
5286 vback = gimple_omp_continue_control_def (cont_stmt);
5287
5288 if (!gimple_omp_for_combined_p (fd->for_stmt))
5289 {
5290 if (POINTER_TYPE_P (type))
5291 t = fold_build_pointer_plus (vmain, step);
5292 else
5293 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5294 t = force_gimple_operand_gsi (&gsi, t,
5295 DECL_P (vback)
5296 && TREE_ADDRESSABLE (vback),
5297 NULL_TREE, true, GSI_SAME_STMT);
5298 assign_stmt = gimple_build_assign (vback, t);
5299 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5300
5301 t = build2 (fd->loop.cond_code, boolean_type_node,
5302 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5303 ? t : vback, e);
5304 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5305 }
5306
5307 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5308 gsi_remove (&gsi, true);
5309
5310 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5311 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5312 }
5313
5314 /* Remove the GIMPLE_OMP_FOR statement. */
5315 gsi = gsi_for_stmt (fd->for_stmt);
5316 gsi_remove (&gsi, true);
5317
5318 /* Remove the GIMPLE_OMP_RETURN statement. */
65f4b875 5319 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5320 gsi_remove (&gsi, true);
5321
357067f2 5322 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
629b3d75
MJ
5323 if (!broken_loop)
5324 remove_edge (BRANCH_EDGE (entry_bb));
5325 else
5326 {
5327 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5328 region->outer->cont = NULL;
5329 }
5330
5331 /* Connect all the blocks. */
5332 if (!broken_loop)
5333 {
5334 ep = find_edge (cont_bb, body_bb);
5335 if (gimple_omp_for_combined_p (fd->for_stmt))
5336 {
5337 remove_edge (ep);
5338 ep = NULL;
5339 }
5340 else if (fd->collapse > 1)
5341 {
5342 remove_edge (ep);
5343 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5344 }
5345 else
5346 ep->flags = EDGE_TRUE_VALUE;
5347 find_edge (cont_bb, fin_bb)->flags
5348 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5349 }
5350
5351 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5352 recompute_dominator (CDI_DOMINATORS, body_bb));
5353 if (!broken_loop)
5354 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5355 recompute_dominator (CDI_DOMINATORS, fin_bb));
5356
5357 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5358 {
5359 struct loop *loop = alloc_loop ();
5360 loop->header = body_bb;
5361 if (collapse_bb == NULL)
5362 loop->latch = cont_bb;
5363 add_loop (loop, body_bb->loop_father);
5364 }
5365}
5366
5367/* A subroutine of expand_omp_for. Generate code for an OpenACC
5368 partitioned loop. The lowering here is abstracted, in that the
5369 loop parameters are passed through internal functions, which are
5370 further lowered by oacc_device_lower, once we get to the target
5371 compiler. The loop is of the form:
5372
5373 for (V = B; V LTGT E; V += S) {BODY}
5374
5375 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5376 (constant 0 for no chunking) and we will have a GWV partitioning
5377 mask, specifying dimensions over which the loop is to be
02889d23
CLT
5378 partitioned (see note below). We generate code that looks like
5379 (this ignores tiling):
629b3d75
MJ
5380
5381 <entry_bb> [incoming FALL->body, BRANCH->exit]
5382 typedef signedintify (typeof (V)) T; // underlying signed integral type
5383 T range = E - B;
5384 T chunk_no = 0;
5385 T DIR = LTGT == '<' ? +1 : -1;
5386 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5387 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5388
5389 <head_bb> [created by splitting end of entry_bb]
5390 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5391 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5392 if (!(offset LTGT bound)) goto bottom_bb;
5393
5394 <body_bb> [incoming]
5395 V = B + offset;
5396 {BODY}
5397
5398 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5399 offset += step;
5400 if (offset LTGT bound) goto body_bb; [*]
5401
5402 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5403 chunk_no++;
5404 if (chunk < chunk_max) goto head_bb;
5405
5406 <exit_bb> [incoming]
5407 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5408
02889d23 5409 [*] Needed if V live at end of loop. */
629b3d75
MJ
5410
5411static void
5412expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5413{
5414 tree v = fd->loop.v;
5415 enum tree_code cond_code = fd->loop.cond_code;
5416 enum tree_code plus_code = PLUS_EXPR;
5417
5418 tree chunk_size = integer_minus_one_node;
5419 tree gwv = integer_zero_node;
5420 tree iter_type = TREE_TYPE (v);
5421 tree diff_type = iter_type;
5422 tree plus_type = iter_type;
5423 struct oacc_collapse *counts = NULL;
5424
5425 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5426 == GF_OMP_FOR_KIND_OACC_LOOP);
5427 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5428 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5429
5430 if (POINTER_TYPE_P (iter_type))
5431 {
5432 plus_code = POINTER_PLUS_EXPR;
5433 plus_type = sizetype;
5434 }
5435 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5436 diff_type = signed_type_for (diff_type);
f4c222c0
TV
5437 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5438 diff_type = integer_type_node;
629b3d75
MJ
5439
5440 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5441 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5442 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5443 basic_block bottom_bb = NULL;
5444
5445 /* entry_bb has two sucessors; the branch edge is to the exit
5446 block, fallthrough edge to body. */
5447 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5448 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5449
5450 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5451 body_bb, or to a block whose only successor is the body_bb. Its
5452 fallthrough successor is the final block (same as the branch
5453 successor of the entry_bb). */
5454 if (cont_bb)
5455 {
5456 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5457 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5458
5459 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5460 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5461 }
5462 else
5463 gcc_assert (!gimple_in_ssa_p (cfun));
5464
5465 /* The exit block only has entry_bb and cont_bb as predecessors. */
5466 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5467
5468 tree chunk_no;
5469 tree chunk_max = NULL_TREE;
5470 tree bound, offset;
5471 tree step = create_tmp_var (diff_type, ".step");
5472 bool up = cond_code == LT_EXPR;
5473 tree dir = build_int_cst (diff_type, up ? +1 : -1);
02889d23 5474 bool chunking = !gimple_in_ssa_p (cfun);
629b3d75
MJ
5475 bool negating;
5476
02889d23
CLT
5477 /* Tiling vars. */
5478 tree tile_size = NULL_TREE;
5479 tree element_s = NULL_TREE;
5480 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5481 basic_block elem_body_bb = NULL;
5482 basic_block elem_cont_bb = NULL;
5483
629b3d75
MJ
5484 /* SSA instances. */
5485 tree offset_incr = NULL_TREE;
5486 tree offset_init = NULL_TREE;
5487
5488 gimple_stmt_iterator gsi;
5489 gassign *ass;
5490 gcall *call;
5491 gimple *stmt;
5492 tree expr;
5493 location_t loc;
5494 edge split, be, fte;
5495
5496 /* Split the end of entry_bb to create head_bb. */
5497 split = split_block (entry_bb, last_stmt (entry_bb));
5498 basic_block head_bb = split->dest;
5499 entry_bb = split->src;
5500
5501 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
65f4b875 5502 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
5503 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5504 loc = gimple_location (for_stmt);
5505
5506 if (gimple_in_ssa_p (cfun))
5507 {
5508 offset_init = gimple_omp_for_index (for_stmt, 0);
5509 gcc_assert (integer_zerop (fd->loop.n1));
5510 /* The SSA parallelizer does gang parallelism. */
5511 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5512 }
5513
02889d23 5514 if (fd->collapse > 1 || fd->tiling)
629b3d75 5515 {
02889d23 5516 gcc_assert (!gimple_in_ssa_p (cfun) && up);
629b3d75
MJ
5517 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5518 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
02889d23 5519 TREE_TYPE (fd->loop.n2), loc);
629b3d75
MJ
5520
5521 if (SSA_VAR_P (fd->loop.n2))
5522 {
5523 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5524 true, GSI_SAME_STMT);
5525 ass = gimple_build_assign (fd->loop.n2, total);
5526 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5527 }
629b3d75
MJ
5528 }
5529
5530 tree b = fd->loop.n1;
5531 tree e = fd->loop.n2;
5532 tree s = fd->loop.step;
5533
5534 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5535 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5536
01914336 5537 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5538 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5539 if (negating)
5540 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5541 s = fold_convert (diff_type, s);
5542 if (negating)
5543 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5544 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5545
5546 if (!chunking)
5547 chunk_size = integer_zero_node;
5548 expr = fold_convert (diff_type, chunk_size);
5549 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5550 NULL_TREE, true, GSI_SAME_STMT);
02889d23
CLT
5551
5552 if (fd->tiling)
5553 {
5554 /* Determine the tile size and element step,
5555 modify the outer loop step size. */
5556 tile_size = create_tmp_var (diff_type, ".tile_size");
5557 expr = build_int_cst (diff_type, 1);
5558 for (int ix = 0; ix < fd->collapse; ix++)
5559 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5560 expr = force_gimple_operand_gsi (&gsi, expr, true,
5561 NULL_TREE, true, GSI_SAME_STMT);
5562 ass = gimple_build_assign (tile_size, expr);
5563 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5564
5565 element_s = create_tmp_var (diff_type, ".element_s");
5566 ass = gimple_build_assign (element_s, s);
5567 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5568
5569 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5570 s = force_gimple_operand_gsi (&gsi, expr, true,
5571 NULL_TREE, true, GSI_SAME_STMT);
5572 }
5573
01914336 5574 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5575 negating = !up && TYPE_UNSIGNED (iter_type);
5576 expr = fold_build2 (MINUS_EXPR, plus_type,
5577 fold_convert (plus_type, negating ? b : e),
5578 fold_convert (plus_type, negating ? e : b));
5579 expr = fold_convert (diff_type, expr);
5580 if (negating)
5581 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5582 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5583 NULL_TREE, true, GSI_SAME_STMT);
5584
5585 chunk_no = build_int_cst (diff_type, 0);
5586 if (chunking)
5587 {
5588 gcc_assert (!gimple_in_ssa_p (cfun));
5589
5590 expr = chunk_no;
5591 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5592 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5593
5594 ass = gimple_build_assign (chunk_no, expr);
5595 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5596
5597 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5598 build_int_cst (integer_type_node,
5599 IFN_GOACC_LOOP_CHUNKS),
5600 dir, range, s, chunk_size, gwv);
5601 gimple_call_set_lhs (call, chunk_max);
5602 gimple_set_location (call, loc);
5603 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5604 }
5605 else
5606 chunk_size = chunk_no;
5607
5608 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5609 build_int_cst (integer_type_node,
5610 IFN_GOACC_LOOP_STEP),
5611 dir, range, s, chunk_size, gwv);
5612 gimple_call_set_lhs (call, step);
5613 gimple_set_location (call, loc);
5614 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5615
5616 /* Remove the GIMPLE_OMP_FOR. */
5617 gsi_remove (&gsi, true);
5618
01914336 5619 /* Fixup edges from head_bb. */
629b3d75
MJ
5620 be = BRANCH_EDGE (head_bb);
5621 fte = FALLTHRU_EDGE (head_bb);
5622 be->flags |= EDGE_FALSE_VALUE;
5623 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5624
5625 basic_block body_bb = fte->dest;
5626
5627 if (gimple_in_ssa_p (cfun))
5628 {
65f4b875 5629 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5630 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5631
5632 offset = gimple_omp_continue_control_use (cont_stmt);
5633 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5634 }
5635 else
5636 {
5637 offset = create_tmp_var (diff_type, ".offset");
5638 offset_init = offset_incr = offset;
5639 }
5640 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5641
5642 /* Loop offset & bound go into head_bb. */
5643 gsi = gsi_start_bb (head_bb);
5644
5645 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5646 build_int_cst (integer_type_node,
5647 IFN_GOACC_LOOP_OFFSET),
5648 dir, range, s,
5649 chunk_size, gwv, chunk_no);
5650 gimple_call_set_lhs (call, offset_init);
5651 gimple_set_location (call, loc);
5652 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5653
5654 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5655 build_int_cst (integer_type_node,
5656 IFN_GOACC_LOOP_BOUND),
5657 dir, range, s,
5658 chunk_size, gwv, offset_init);
5659 gimple_call_set_lhs (call, bound);
5660 gimple_set_location (call, loc);
5661 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5662
5663 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5664 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5665 GSI_CONTINUE_LINKING);
5666
5667 /* V assignment goes into body_bb. */
5668 if (!gimple_in_ssa_p (cfun))
5669 {
5670 gsi = gsi_start_bb (body_bb);
5671
5672 expr = build2 (plus_code, iter_type, b,
5673 fold_convert (plus_type, offset));
5674 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5675 true, GSI_SAME_STMT);
5676 ass = gimple_build_assign (v, expr);
5677 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
02889d23
CLT
5678
5679 if (fd->collapse > 1 || fd->tiling)
5680 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5681
5682 if (fd->tiling)
5683 {
5684 /* Determine the range of the element loop -- usually simply
5685 the tile_size, but could be smaller if the final
5686 iteration of the outer loop is a partial tile. */
5687 tree e_range = create_tmp_var (diff_type, ".e_range");
5688
5689 expr = build2 (MIN_EXPR, diff_type,
5690 build2 (MINUS_EXPR, diff_type, bound, offset),
5691 build2 (MULT_EXPR, diff_type, tile_size,
5692 element_s));
5693 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5694 true, GSI_SAME_STMT);
5695 ass = gimple_build_assign (e_range, expr);
5696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5697
5698 /* Determine bound, offset & step of inner loop. */
5699 e_bound = create_tmp_var (diff_type, ".e_bound");
5700 e_offset = create_tmp_var (diff_type, ".e_offset");
5701 e_step = create_tmp_var (diff_type, ".e_step");
5702
5703 /* Mark these as element loops. */
5704 tree t, e_gwv = integer_minus_one_node;
5705 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5706
5707 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5708 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5709 element_s, chunk, e_gwv, chunk);
5710 gimple_call_set_lhs (call, e_offset);
5711 gimple_set_location (call, loc);
5712 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5713
5714 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5715 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5716 element_s, chunk, e_gwv, e_offset);
5717 gimple_call_set_lhs (call, e_bound);
5718 gimple_set_location (call, loc);
5719 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5720
5721 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5722 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5723 element_s, chunk, e_gwv);
5724 gimple_call_set_lhs (call, e_step);
5725 gimple_set_location (call, loc);
5726 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5727
5728 /* Add test and split block. */
5729 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5730 stmt = gimple_build_cond_empty (expr);
5731 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5732 split = split_block (body_bb, stmt);
5733 elem_body_bb = split->dest;
5734 if (cont_bb == body_bb)
5735 cont_bb = elem_body_bb;
5736 body_bb = split->src;
5737
5738 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5739
05e0af43
CP
5740 /* Add a dummy exit for the tiled block when cont_bb is missing. */
5741 if (cont_bb == NULL)
5742 {
5743 edge e = make_edge (body_bb, exit_bb, EDGE_FALSE_VALUE);
5744 e->probability = profile_probability::even ();
5745 split->probability = profile_probability::even ();
5746 }
5747
02889d23
CLT
5748 /* Initialize the user's loop vars. */
5749 gsi = gsi_start_bb (elem_body_bb);
5750 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5751 }
629b3d75
MJ
5752 }
5753
5754 /* Loop increment goes into cont_bb. If this is not a loop, we
5755 will have spawned threads as if it was, and each one will
5756 execute one iteration. The specification is not explicit about
5757 whether such constructs are ill-formed or not, and they can
5758 occur, especially when noreturn routines are involved. */
5759 if (cont_bb)
5760 {
65f4b875 5761 gsi = gsi_last_nondebug_bb (cont_bb);
629b3d75
MJ
5762 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5763 loc = gimple_location (cont_stmt);
5764
02889d23
CLT
5765 if (fd->tiling)
5766 {
5767 /* Insert element loop increment and test. */
5768 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5769 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5770 true, GSI_SAME_STMT);
5771 ass = gimple_build_assign (e_offset, expr);
5772 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5773 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5774
5775 stmt = gimple_build_cond_empty (expr);
5776 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5777 split = split_block (cont_bb, stmt);
5778 elem_cont_bb = split->src;
5779 cont_bb = split->dest;
5780
5781 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
9fba2943
TV
5782 split->probability = profile_probability::unlikely ().guessed ();
5783 edge latch_edge
5784 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5785 latch_edge->probability = profile_probability::likely ().guessed ();
5786
5787 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5788 skip_edge->probability = profile_probability::unlikely ().guessed ();
5789 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5790 loop_entry_edge->probability
5791 = profile_probability::likely ().guessed ();
02889d23
CLT
5792
5793 gsi = gsi_for_stmt (cont_stmt);
5794 }
5795
629b3d75
MJ
5796 /* Increment offset. */
5797 if (gimple_in_ssa_p (cfun))
02889d23
CLT
5798 expr = build2 (plus_code, iter_type, offset,
5799 fold_convert (plus_type, step));
629b3d75
MJ
5800 else
5801 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5802 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5803 true, GSI_SAME_STMT);
5804 ass = gimple_build_assign (offset_incr, expr);
5805 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5806 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5807 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5808
5809 /* Remove the GIMPLE_OMP_CONTINUE. */
5810 gsi_remove (&gsi, true);
5811
01914336 5812 /* Fixup edges from cont_bb. */
629b3d75
MJ
5813 be = BRANCH_EDGE (cont_bb);
5814 fte = FALLTHRU_EDGE (cont_bb);
5815 be->flags |= EDGE_TRUE_VALUE;
5816 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5817
5818 if (chunking)
5819 {
5820 /* Split the beginning of exit_bb to make bottom_bb. We
5821 need to insert a nop at the start, because splitting is
01914336 5822 after a stmt, not before. */
629b3d75
MJ
5823 gsi = gsi_start_bb (exit_bb);
5824 stmt = gimple_build_nop ();
5825 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5826 split = split_block (exit_bb, stmt);
5827 bottom_bb = split->src;
5828 exit_bb = split->dest;
5829 gsi = gsi_last_bb (bottom_bb);
5830
5831 /* Chunk increment and test goes into bottom_bb. */
5832 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5833 build_int_cst (diff_type, 1));
5834 ass = gimple_build_assign (chunk_no, expr);
5835 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5836
5837 /* Chunk test at end of bottom_bb. */
5838 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5839 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5840 GSI_CONTINUE_LINKING);
5841
01914336 5842 /* Fixup edges from bottom_bb. */
629b3d75 5843 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
f38057cc
TV
5844 split->probability = profile_probability::unlikely ().guessed ();
5845 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5846 latch_edge->probability = profile_probability::likely ().guessed ();
629b3d75
MJ
5847 }
5848 }
5849
65f4b875 5850 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
5851 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5852 loc = gimple_location (gsi_stmt (gsi));
5853
5854 if (!gimple_in_ssa_p (cfun))
5855 {
5856 /* Insert the final value of V, in case it is live. This is the
5857 value for the only thread that survives past the join. */
5858 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5859 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5860 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5861 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5862 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5863 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5864 true, GSI_SAME_STMT);
5865 ass = gimple_build_assign (v, expr);
5866 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5867 }
5868
01914336 5869 /* Remove the OMP_RETURN. */
629b3d75
MJ
5870 gsi_remove (&gsi, true);
5871
5872 if (cont_bb)
5873 {
02889d23 5874 /* We now have one, two or three nested loops. Update the loop
629b3d75
MJ
5875 structures. */
5876 struct loop *parent = entry_bb->loop_father;
5877 struct loop *body = body_bb->loop_father;
5878
5879 if (chunking)
5880 {
5881 struct loop *chunk_loop = alloc_loop ();
5882 chunk_loop->header = head_bb;
5883 chunk_loop->latch = bottom_bb;
5884 add_loop (chunk_loop, parent);
5885 parent = chunk_loop;
5886 }
5887 else if (parent != body)
5888 {
5889 gcc_assert (body->header == body_bb);
5890 gcc_assert (body->latch == cont_bb
5891 || single_pred (body->latch) == cont_bb);
5892 parent = NULL;
5893 }
5894
5895 if (parent)
5896 {
5897 struct loop *body_loop = alloc_loop ();
5898 body_loop->header = body_bb;
5899 body_loop->latch = cont_bb;
5900 add_loop (body_loop, parent);
02889d23
CLT
5901
5902 if (fd->tiling)
5903 {
5904 /* Insert tiling's element loop. */
5905 struct loop *inner_loop = alloc_loop ();
5906 inner_loop->header = elem_body_bb;
5907 inner_loop->latch = elem_cont_bb;
5908 add_loop (inner_loop, body_loop);
5909 }
629b3d75
MJ
5910 }
5911 }
5912}
5913
5914/* Expand the OMP loop defined by REGION. */
5915
5916static void
5917expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5918{
5919 struct omp_for_data fd;
5920 struct omp_for_data_loop *loops;
5921
5922 loops
5923 = (struct omp_for_data_loop *)
5924 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5925 * sizeof (struct omp_for_data_loop));
5926 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5927 &fd, loops);
5928 region->sched_kind = fd.sched_kind;
5929 region->sched_modifiers = fd.sched_modifiers;
5930
5931 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5932 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5933 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5934 if (region->cont)
5935 {
5936 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5937 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5938 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5939 }
5940 else
5941 /* If there isn't a continue then this is a degerate case where
5942 the introduction of abnormal edges during lowering will prevent
5943 original loops from being detected. Fix that up. */
5944 loops_state_set (LOOPS_NEED_FIXUP);
5945
5946 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5947 expand_omp_simd (region, &fd);
629b3d75
MJ
5948 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5949 {
5950 gcc_assert (!inner_stmt);
5951 expand_oacc_for (region, &fd);
5952 }
5953 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5954 {
5955 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5956 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5957 else
5958 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5959 }
5960 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5961 && !fd.have_ordered)
5962 {
5963 if (fd.chunk_size == NULL)
5964 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5965 else
5966 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5967 }
5968 else
5969 {
5970 int fn_index, start_ix, next_ix;
28567c40
JJ
5971 unsigned HOST_WIDE_INT sched = 0;
5972 tree sched_arg = NULL_TREE;
629b3d75
MJ
5973
5974 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5975 == GF_OMP_FOR_KIND_FOR);
5976 if (fd.chunk_size == NULL
5977 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5978 fd.chunk_size = integer_zero_node;
629b3d75
MJ
5979 switch (fd.sched_kind)
5980 {
5981 case OMP_CLAUSE_SCHEDULE_RUNTIME:
28567c40
JJ
5982 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
5983 {
5984 gcc_assert (!fd.have_ordered);
5985 fn_index = 6;
5986 sched = 4;
5987 }
5988 else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
5989 && !fd.have_ordered)
5990 fn_index = 7;
5991 else
5992 {
5993 fn_index = 3;
5994 sched = (HOST_WIDE_INT_1U << 31);
5995 }
629b3d75
MJ
5996 break;
5997 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5998 case OMP_CLAUSE_SCHEDULE_GUIDED:
28567c40 5999 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
629b3d75
MJ
6000 && !fd.have_ordered)
6001 {
6002 fn_index = 3 + fd.sched_kind;
28567c40 6003 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
629b3d75
MJ
6004 break;
6005 }
629b3d75 6006 fn_index = fd.sched_kind;
28567c40
JJ
6007 sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
6008 sched += (HOST_WIDE_INT_1U << 31);
629b3d75 6009 break;
28567c40
JJ
6010 case OMP_CLAUSE_SCHEDULE_STATIC:
6011 gcc_assert (fd.have_ordered);
6012 fn_index = 0;
6013 sched = (HOST_WIDE_INT_1U << 31) + 1;
6014 break;
6015 default:
6016 gcc_unreachable ();
629b3d75
MJ
6017 }
6018 if (!fd.ordered)
28567c40 6019 fn_index += fd.have_ordered * 8;
629b3d75
MJ
6020 if (fd.ordered)
6021 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
6022 else
6023 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
6024 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
28567c40
JJ
6025 if (fd.have_reductemp)
6026 {
6027 if (fd.ordered)
6028 start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
6029 else if (fd.have_ordered)
6030 start_ix = (int)BUILT_IN_GOMP_LOOP_ORDERED_START;
6031 else
6032 start_ix = (int)BUILT_IN_GOMP_LOOP_START;
6033 sched_arg = build_int_cstu (long_integer_type_node, sched);
6034 if (!fd.chunk_size)
6035 fd.chunk_size = integer_zero_node;
6036 }
629b3d75
MJ
6037 if (fd.iter_type == long_long_unsigned_type_node)
6038 {
6039 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
6040 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
6041 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
6042 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
6043 }
6044 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
28567c40
JJ
6045 (enum built_in_function) next_ix, sched_arg,
6046 inner_stmt);
629b3d75
MJ
6047 }
6048
6049 if (gimple_in_ssa_p (cfun))
6050 update_ssa (TODO_update_ssa_only_virtuals);
6051}
6052
6053/* Expand code for an OpenMP sections directive. In pseudo code, we generate
6054
6055 v = GOMP_sections_start (n);
6056 L0:
6057 switch (v)
6058 {
6059 case 0:
6060 goto L2;
6061 case 1:
6062 section 1;
6063 goto L1;
6064 case 2:
6065 ...
6066 case n:
6067 ...
6068 default:
6069 abort ();
6070 }
6071 L1:
6072 v = GOMP_sections_next ();
6073 goto L0;
6074 L2:
6075 reduction;
6076
6077 If this is a combined parallel sections, replace the call to
6078 GOMP_sections_start with call to GOMP_sections_next. */
6079
6080static void
6081expand_omp_sections (struct omp_region *region)
6082{
6083 tree t, u, vin = NULL, vmain, vnext, l2;
6084 unsigned len;
6085 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
6086 gimple_stmt_iterator si, switch_si;
6087 gomp_sections *sections_stmt;
6088 gimple *stmt;
6089 gomp_continue *cont;
6090 edge_iterator ei;
6091 edge e;
6092 struct omp_region *inner;
6093 unsigned i, casei;
6094 bool exit_reachable = region->cont != NULL;
6095
6096 gcc_assert (region->exit != NULL);
6097 entry_bb = region->entry;
6098 l0_bb = single_succ (entry_bb);
6099 l1_bb = region->cont;
6100 l2_bb = region->exit;
6101 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
6102 l2 = gimple_block_label (l2_bb);
6103 else
6104 {
6105 /* This can happen if there are reductions. */
6106 len = EDGE_COUNT (l0_bb->succs);
6107 gcc_assert (len > 0);
6108 e = EDGE_SUCC (l0_bb, len - 1);
65f4b875 6109 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6110 l2 = NULL_TREE;
6111 if (gsi_end_p (si)
01914336 6112 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
6113 l2 = gimple_block_label (e->dest);
6114 else
6115 FOR_EACH_EDGE (e, ei, l0_bb->succs)
6116 {
65f4b875 6117 si = gsi_last_nondebug_bb (e->dest);
629b3d75
MJ
6118 if (gsi_end_p (si)
6119 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
6120 {
6121 l2 = gimple_block_label (e->dest);
6122 break;
6123 }
6124 }
6125 }
6126 if (exit_reachable)
6127 default_bb = create_empty_bb (l1_bb->prev_bb);
6128 else
6129 default_bb = create_empty_bb (l0_bb);
6130
6131 /* We will build a switch() with enough cases for all the
6132 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
6133 and a default case to abort if something goes wrong. */
6134 len = EDGE_COUNT (l0_bb->succs);
6135
6136 /* Use vec::quick_push on label_vec throughout, since we know the size
6137 in advance. */
6138 auto_vec<tree> label_vec (len);
6139
6140 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6141 GIMPLE_OMP_SECTIONS statement. */
65f4b875 6142 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6143 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6144 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6145 vin = gimple_omp_sections_control (sections_stmt);
28567c40
JJ
6146 tree clauses = gimple_omp_sections_clauses (sections_stmt);
6147 tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
6148 if (reductmp)
6149 {
6150 tree reductions = OMP_CLAUSE_DECL (reductmp);
6151 gcc_assert (TREE_CODE (reductions) == SSA_NAME);
6152 gimple *g = SSA_NAME_DEF_STMT (reductions);
6153 reductions = gimple_assign_rhs1 (g);
6154 OMP_CLAUSE_DECL (reductmp) = reductions;
6155 gimple_stmt_iterator gsi = gsi_for_stmt (g);
6156 t = build_int_cst (unsigned_type_node, len - 1);
6157 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
6158 stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
6159 gimple_call_set_lhs (stmt, vin);
6160 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
6161 gsi_remove (&gsi, true);
6162 release_ssa_name (gimple_assign_lhs (g));
6163 }
6164 else if (!is_combined_parallel (region))
629b3d75
MJ
6165 {
6166 /* If we are not inside a combined parallel+sections region,
6167 call GOMP_sections_start. */
6168 t = build_int_cst (unsigned_type_node, len - 1);
6169 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6170 stmt = gimple_build_call (u, 1, t);
6171 }
6172 else
6173 {
6174 /* Otherwise, call GOMP_sections_next. */
6175 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6176 stmt = gimple_build_call (u, 0);
6177 }
28567c40
JJ
6178 if (!reductmp)
6179 {
6180 gimple_call_set_lhs (stmt, vin);
6181 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6182 }
629b3d75
MJ
6183 gsi_remove (&si, true);
6184
6185 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6186 L0_BB. */
65f4b875 6187 switch_si = gsi_last_nondebug_bb (l0_bb);
629b3d75
MJ
6188 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6189 if (exit_reachable)
6190 {
6191 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6192 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6193 vmain = gimple_omp_continue_control_use (cont);
6194 vnext = gimple_omp_continue_control_def (cont);
6195 }
6196 else
6197 {
6198 vmain = vin;
6199 vnext = NULL_TREE;
6200 }
6201
6202 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6203 label_vec.quick_push (t);
6204 i = 1;
6205
6206 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6207 for (inner = region->inner, casei = 1;
6208 inner;
6209 inner = inner->next, i++, casei++)
6210 {
6211 basic_block s_entry_bb, s_exit_bb;
6212
6213 /* Skip optional reduction region. */
6214 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6215 {
6216 --i;
6217 --casei;
6218 continue;
6219 }
6220
6221 s_entry_bb = inner->entry;
6222 s_exit_bb = inner->exit;
6223
6224 t = gimple_block_label (s_entry_bb);
6225 u = build_int_cst (unsigned_type_node, casei);
6226 u = build_case_label (u, NULL, t);
6227 label_vec.quick_push (u);
6228
65f4b875 6229 si = gsi_last_nondebug_bb (s_entry_bb);
629b3d75
MJ
6230 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6231 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6232 gsi_remove (&si, true);
6233 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6234
6235 if (s_exit_bb == NULL)
6236 continue;
6237
65f4b875 6238 si = gsi_last_nondebug_bb (s_exit_bb);
629b3d75
MJ
6239 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6240 gsi_remove (&si, true);
6241
6242 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6243 }
6244
6245 /* Error handling code goes in DEFAULT_BB. */
6246 t = gimple_block_label (default_bb);
6247 u = build_case_label (NULL, NULL, t);
6248 make_edge (l0_bb, default_bb, 0);
6249 add_bb_to_loop (default_bb, current_loops->tree_root);
6250
6251 stmt = gimple_build_switch (vmain, u, label_vec);
6252 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6253 gsi_remove (&switch_si, true);
6254
6255 si = gsi_start_bb (default_bb);
6256 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6257 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6258
6259 if (exit_reachable)
6260 {
6261 tree bfn_decl;
6262
6263 /* Code to get the next section goes in L1_BB. */
65f4b875 6264 si = gsi_last_nondebug_bb (l1_bb);
629b3d75
MJ
6265 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6266
6267 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6268 stmt = gimple_build_call (bfn_decl, 0);
6269 gimple_call_set_lhs (stmt, vnext);
6270 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6271 gsi_remove (&si, true);
6272
6273 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6274 }
6275
6276 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
65f4b875 6277 si = gsi_last_nondebug_bb (l2_bb);
629b3d75
MJ
6278 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6279 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6280 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6281 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6282 else
6283 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6284 stmt = gimple_build_call (t, 0);
6285 if (gimple_omp_return_lhs (gsi_stmt (si)))
6286 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6287 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6288 gsi_remove (&si, true);
6289
6290 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6291}
6292
6293/* Expand code for an OpenMP single directive. We've already expanded
6294 much of the code, here we simply place the GOMP_barrier call. */
6295
6296static void
6297expand_omp_single (struct omp_region *region)
6298{
6299 basic_block entry_bb, exit_bb;
6300 gimple_stmt_iterator si;
6301
6302 entry_bb = region->entry;
6303 exit_bb = region->exit;
6304
65f4b875 6305 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6306 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6307 gsi_remove (&si, true);
6308 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6309
65f4b875 6310 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6311 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6312 {
6313 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6314 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6315 }
6316 gsi_remove (&si, true);
6317 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6318}
6319
6320/* Generic expansion for OpenMP synchronization directives: master,
6321 ordered and critical. All we need to do here is remove the entry
6322 and exit markers for REGION. */
6323
6324static void
6325expand_omp_synch (struct omp_region *region)
6326{
6327 basic_block entry_bb, exit_bb;
6328 gimple_stmt_iterator si;
6329
6330 entry_bb = region->entry;
6331 exit_bb = region->exit;
6332
65f4b875 6333 si = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
6334 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6335 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6336 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6337 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6338 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6339 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
28567c40
JJ
6340 if (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS
6341 && gimple_omp_teams_host (as_a <gomp_teams *> (gsi_stmt (si))))
6342 {
6343 expand_omp_taskreg (region);
6344 return;
6345 }
629b3d75
MJ
6346 gsi_remove (&si, true);
6347 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6348
6349 if (exit_bb)
6350 {
65f4b875 6351 si = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
6352 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6353 gsi_remove (&si, true);
6354 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6355 }
6356}
6357
28567c40
JJ
6358/* Translate enum omp_memory_order to enum memmodel. The two enums
6359 are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
6360 is 0. */
6361
6362static enum memmodel
6363omp_memory_order_to_memmodel (enum omp_memory_order mo)
6364{
6365 switch (mo)
6366 {
6367 case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
6368 case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
6369 case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
6370 case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
6371 case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
6372 default: gcc_unreachable ();
6373 }
6374}
6375
629b3d75
MJ
6376/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6377 operation as a normal volatile load. */
6378
6379static bool
6380expand_omp_atomic_load (basic_block load_bb, tree addr,
6381 tree loaded_val, int index)
6382{
6383 enum built_in_function tmpbase;
6384 gimple_stmt_iterator gsi;
6385 basic_block store_bb;
6386 location_t loc;
6387 gimple *stmt;
6388 tree decl, call, type, itype;
6389
65f4b875 6390 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6391 stmt = gsi_stmt (gsi);
6392 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6393 loc = gimple_location (stmt);
6394
6395 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6396 is smaller than word size, then expand_atomic_load assumes that the load
6397 is atomic. We could avoid the builtin entirely in this case. */
6398
6399 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6400 decl = builtin_decl_explicit (tmpbase);
6401 if (decl == NULL_TREE)
6402 return false;
6403
6404 type = TREE_TYPE (loaded_val);
6405 itype = TREE_TYPE (TREE_TYPE (decl));
6406
28567c40
JJ
6407 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6408 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6409 call = build_call_expr_loc (loc, decl, 2, addr, mo);
629b3d75
MJ
6410 if (!useless_type_conversion_p (type, itype))
6411 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6412 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6413
6414 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6415 gsi_remove (&gsi, true);
6416
6417 store_bb = single_succ (load_bb);
65f4b875 6418 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6419 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6420 gsi_remove (&gsi, true);
6421
6422 if (gimple_in_ssa_p (cfun))
6423 update_ssa (TODO_update_ssa_no_phi);
6424
6425 return true;
6426}
6427
6428/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6429 operation as a normal volatile store. */
6430
6431static bool
6432expand_omp_atomic_store (basic_block load_bb, tree addr,
6433 tree loaded_val, tree stored_val, int index)
6434{
6435 enum built_in_function tmpbase;
6436 gimple_stmt_iterator gsi;
6437 basic_block store_bb = single_succ (load_bb);
6438 location_t loc;
6439 gimple *stmt;
6440 tree decl, call, type, itype;
6441 machine_mode imode;
6442 bool exchange;
6443
65f4b875 6444 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6445 stmt = gsi_stmt (gsi);
6446 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6447
6448 /* If the load value is needed, then this isn't a store but an exchange. */
6449 exchange = gimple_omp_atomic_need_value_p (stmt);
6450
65f4b875 6451 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6452 stmt = gsi_stmt (gsi);
6453 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6454 loc = gimple_location (stmt);
6455
6456 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6457 is smaller than word size, then expand_atomic_store assumes that the store
6458 is atomic. We could avoid the builtin entirely in this case. */
6459
6460 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6461 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6462 decl = builtin_decl_explicit (tmpbase);
6463 if (decl == NULL_TREE)
6464 return false;
6465
6466 type = TREE_TYPE (stored_val);
6467
6468 /* Dig out the type of the function's second argument. */
6469 itype = TREE_TYPE (decl);
6470 itype = TYPE_ARG_TYPES (itype);
6471 itype = TREE_CHAIN (itype);
6472 itype = TREE_VALUE (itype);
6473 imode = TYPE_MODE (itype);
6474
6475 if (exchange && !can_atomic_exchange_p (imode, true))
6476 return false;
6477
6478 if (!useless_type_conversion_p (itype, type))
6479 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
28567c40
JJ
6480 enum omp_memory_order omo = gimple_omp_atomic_memory_order (stmt);
6481 tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
6482 call = build_call_expr_loc (loc, decl, 3, addr, stored_val, mo);
629b3d75
MJ
6483 if (exchange)
6484 {
6485 if (!useless_type_conversion_p (type, itype))
6486 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6487 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6488 }
6489
6490 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6491 gsi_remove (&gsi, true);
6492
6493 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
65f4b875 6494 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6495 gsi_remove (&gsi, true);
6496
6497 if (gimple_in_ssa_p (cfun))
6498 update_ssa (TODO_update_ssa_no_phi);
6499
6500 return true;
6501}
6502
6503/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6504 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6505 size of the data type, and thus usable to find the index of the builtin
6506 decl. Returns false if the expression is not of the proper form. */
6507
6508static bool
6509expand_omp_atomic_fetch_op (basic_block load_bb,
6510 tree addr, tree loaded_val,
6511 tree stored_val, int index)
6512{
6513 enum built_in_function oldbase, newbase, tmpbase;
6514 tree decl, itype, call;
6515 tree lhs, rhs;
6516 basic_block store_bb = single_succ (load_bb);
6517 gimple_stmt_iterator gsi;
6518 gimple *stmt;
6519 location_t loc;
6520 enum tree_code code;
6521 bool need_old, need_new;
6522 machine_mode imode;
629b3d75
MJ
6523
6524 /* We expect to find the following sequences:
6525
6526 load_bb:
6527 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6528
6529 store_bb:
6530 val = tmp OP something; (or: something OP tmp)
6531 GIMPLE_OMP_STORE (val)
6532
6533 ???FIXME: Allow a more flexible sequence.
6534 Perhaps use data flow to pick the statements.
6535
6536 */
6537
6538 gsi = gsi_after_labels (store_bb);
6539 stmt = gsi_stmt (gsi);
65f4b875
AO
6540 if (is_gimple_debug (stmt))
6541 {
6542 gsi_next_nondebug (&gsi);
6543 if (gsi_end_p (gsi))
6544 return false;
6545 stmt = gsi_stmt (gsi);
6546 }
629b3d75
MJ
6547 loc = gimple_location (stmt);
6548 if (!is_gimple_assign (stmt))
6549 return false;
65f4b875 6550 gsi_next_nondebug (&gsi);
629b3d75
MJ
6551 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6552 return false;
6553 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6554 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
28567c40
JJ
6555 enum omp_memory_order omo
6556 = gimple_omp_atomic_memory_order (last_stmt (load_bb));
6557 enum memmodel mo = omp_memory_order_to_memmodel (omo);
629b3d75
MJ
6558 gcc_checking_assert (!need_old || !need_new);
6559
6560 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6561 return false;
6562
6563 /* Check for one of the supported fetch-op operations. */
6564 code = gimple_assign_rhs_code (stmt);
6565 switch (code)
6566 {
6567 case PLUS_EXPR:
6568 case POINTER_PLUS_EXPR:
6569 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6570 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6571 break;
6572 case MINUS_EXPR:
6573 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6574 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6575 break;
6576 case BIT_AND_EXPR:
6577 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6578 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6579 break;
6580 case BIT_IOR_EXPR:
6581 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6582 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6583 break;
6584 case BIT_XOR_EXPR:
6585 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6586 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6587 break;
6588 default:
6589 return false;
6590 }
6591
6592 /* Make sure the expression is of the proper form. */
6593 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6594 rhs = gimple_assign_rhs2 (stmt);
6595 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6596 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6597 rhs = gimple_assign_rhs1 (stmt);
6598 else
6599 return false;
6600
6601 tmpbase = ((enum built_in_function)
6602 ((need_new ? newbase : oldbase) + index + 1));
6603 decl = builtin_decl_explicit (tmpbase);
6604 if (decl == NULL_TREE)
6605 return false;
6606 itype = TREE_TYPE (TREE_TYPE (decl));
6607 imode = TYPE_MODE (itype);
6608
6609 /* We could test all of the various optabs involved, but the fact of the
6610 matter is that (with the exception of i486 vs i586 and xadd) all targets
6611 that support any atomic operaton optab also implements compare-and-swap.
6612 Let optabs.c take care of expanding any compare-and-swap loop. */
dc06356a 6613 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
629b3d75
MJ
6614 return false;
6615
65f4b875 6616 gsi = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6617 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6618
6619 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6620 It only requires that the operation happen atomically. Thus we can
6621 use the RELAXED memory model. */
6622 call = build_call_expr_loc (loc, decl, 3, addr,
6623 fold_convert_loc (loc, itype, rhs),
28567c40 6624 build_int_cst (NULL, mo));
629b3d75
MJ
6625
6626 if (need_old || need_new)
6627 {
6628 lhs = need_old ? loaded_val : stored_val;
6629 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6630 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6631 }
6632 else
6633 call = fold_convert_loc (loc, void_type_node, call);
6634 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6635 gsi_remove (&gsi, true);
6636
65f4b875 6637 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6638 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6639 gsi_remove (&gsi, true);
65f4b875 6640 gsi = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6641 stmt = gsi_stmt (gsi);
6642 gsi_remove (&gsi, true);
6643
6644 if (gimple_in_ssa_p (cfun))
6645 {
6646 release_defs (stmt);
6647 update_ssa (TODO_update_ssa_no_phi);
6648 }
6649
6650 return true;
6651}
6652
6653/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6654
6655 oldval = *addr;
6656 repeat:
01914336 6657 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
6658 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6659 if (oldval != newval)
6660 goto repeat;
6661
6662 INDEX is log2 of the size of the data type, and thus usable to find the
6663 index of the builtin decl. */
6664
6665static bool
6666expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6667 tree addr, tree loaded_val, tree stored_val,
6668 int index)
6669{
6670 tree loadedi, storedi, initial, new_storedi, old_vali;
b4e47472 6671 tree type, itype, cmpxchg, iaddr, atype;
629b3d75
MJ
6672 gimple_stmt_iterator si;
6673 basic_block loop_header = single_succ (load_bb);
6674 gimple *phi, *stmt;
6675 edge e;
6676 enum built_in_function fncode;
6677
6678 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6679 order to use the RELAXED memory model effectively. */
6680 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6681 + index + 1);
6682 cmpxchg = builtin_decl_explicit (fncode);
6683 if (cmpxchg == NULL_TREE)
6684 return false;
b4e47472
JJ
6685 type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
6686 atype = type;
629b3d75
MJ
6687 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6688
dc06356a
JJ
6689 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6690 || !can_atomic_load_p (TYPE_MODE (itype)))
629b3d75
MJ
6691 return false;
6692
6693 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
65f4b875 6694 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6695 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6696
6697 /* For floating-point values, we'll need to view-convert them to integers
6698 so that we can perform the atomic compare and swap. Simplify the
6699 following code by always setting up the "i"ntegral variables. */
6700 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6701 {
6702 tree iaddr_val;
6703
6704 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6705 true));
b4e47472 6706 atype = itype;
629b3d75
MJ
6707 iaddr_val
6708 = force_gimple_operand_gsi (&si,
6709 fold_convert (TREE_TYPE (iaddr), addr),
6710 false, NULL_TREE, true, GSI_SAME_STMT);
6711 stmt = gimple_build_assign (iaddr, iaddr_val);
6712 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6713 loadedi = create_tmp_var (itype);
6714 if (gimple_in_ssa_p (cfun))
6715 loadedi = make_ssa_name (loadedi);
6716 }
6717 else
6718 {
6719 iaddr = addr;
6720 loadedi = loaded_val;
6721 }
6722
6723 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6724 tree loaddecl = builtin_decl_explicit (fncode);
6725 if (loaddecl)
6726 initial
b4e47472 6727 = fold_convert (atype,
629b3d75
MJ
6728 build_call_expr (loaddecl, 2, iaddr,
6729 build_int_cst (NULL_TREE,
6730 MEMMODEL_RELAXED)));
6731 else
b4e47472
JJ
6732 {
6733 tree off
6734 = build_int_cst (build_pointer_type_for_mode (atype, ptr_mode,
6735 true), 0);
6736 initial = build2 (MEM_REF, atype, iaddr, off);
6737 }
629b3d75
MJ
6738
6739 initial
6740 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6741 GSI_SAME_STMT);
6742
6743 /* Move the value to the LOADEDI temporary. */
6744 if (gimple_in_ssa_p (cfun))
6745 {
6746 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6747 phi = create_phi_node (loadedi, loop_header);
6748 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6749 initial);
6750 }
6751 else
6752 gsi_insert_before (&si,
6753 gimple_build_assign (loadedi, initial),
6754 GSI_SAME_STMT);
6755 if (loadedi != loaded_val)
6756 {
6757 gimple_stmt_iterator gsi2;
6758 tree x;
6759
6760 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6761 gsi2 = gsi_start_bb (loop_header);
6762 if (gimple_in_ssa_p (cfun))
6763 {
6764 gassign *stmt;
6765 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6766 true, GSI_SAME_STMT);
6767 stmt = gimple_build_assign (loaded_val, x);
6768 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6769 }
6770 else
6771 {
6772 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6773 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6774 true, GSI_SAME_STMT);
6775 }
6776 }
6777 gsi_remove (&si, true);
6778
65f4b875 6779 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6780 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6781
6782 if (iaddr == addr)
6783 storedi = stored_val;
6784 else
01914336
MJ
6785 storedi
6786 = force_gimple_operand_gsi (&si,
6787 build1 (VIEW_CONVERT_EXPR, itype,
6788 stored_val), true, NULL_TREE, true,
6789 GSI_SAME_STMT);
629b3d75
MJ
6790
6791 /* Build the compare&swap statement. */
6792 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6793 new_storedi = force_gimple_operand_gsi (&si,
6794 fold_convert (TREE_TYPE (loadedi),
6795 new_storedi),
6796 true, NULL_TREE,
6797 true, GSI_SAME_STMT);
6798
6799 if (gimple_in_ssa_p (cfun))
6800 old_vali = loadedi;
6801 else
6802 {
6803 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6804 stmt = gimple_build_assign (old_vali, loadedi);
6805 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6806
6807 stmt = gimple_build_assign (loadedi, new_storedi);
6808 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6809 }
6810
6811 /* Note that we always perform the comparison as an integer, even for
6812 floating point. This allows the atomic operation to properly
6813 succeed even with NaNs and -0.0. */
01914336
MJ
6814 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6815 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
6816 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6817
6818 /* Update cfg. */
6819 e = single_succ_edge (store_bb);
6820 e->flags &= ~EDGE_FALLTHRU;
6821 e->flags |= EDGE_FALSE_VALUE;
357067f2
JH
6822 /* Expect no looping. */
6823 e->probability = profile_probability::guessed_always ();
629b3d75
MJ
6824
6825 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
357067f2 6826 e->probability = profile_probability::guessed_never ();
629b3d75
MJ
6827
6828 /* Copy the new value to loadedi (we already did that before the condition
6829 if we are not in SSA). */
6830 if (gimple_in_ssa_p (cfun))
6831 {
6832 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6833 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6834 }
6835
6836 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6837 gsi_remove (&si, true);
6838
6839 struct loop *loop = alloc_loop ();
6840 loop->header = loop_header;
6841 loop->latch = store_bb;
6842 add_loop (loop, loop_header->loop_father);
6843
6844 if (gimple_in_ssa_p (cfun))
6845 update_ssa (TODO_update_ssa_no_phi);
6846
6847 return true;
6848}
6849
6850/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6851
01914336
MJ
6852 GOMP_atomic_start ();
6853 *addr = rhs;
6854 GOMP_atomic_end ();
629b3d75
MJ
6855
6856 The result is not globally atomic, but works so long as all parallel
6857 references are within #pragma omp atomic directives. According to
6858 responses received from omp@openmp.org, appears to be within spec.
6859 Which makes sense, since that's how several other compilers handle
6860 this situation as well.
6861 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6862 expanding. STORED_VAL is the operand of the matching
6863 GIMPLE_OMP_ATOMIC_STORE.
6864
6865 We replace
6866 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6867 loaded_val = *addr;
6868
6869 and replace
6870 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6871 *addr = stored_val;
6872*/
6873
6874static bool
6875expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6876 tree addr, tree loaded_val, tree stored_val)
6877{
6878 gimple_stmt_iterator si;
6879 gassign *stmt;
6880 tree t;
6881
65f4b875 6882 si = gsi_last_nondebug_bb (load_bb);
629b3d75
MJ
6883 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6884
6885 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6886 t = build_call_expr (t, 0);
6887 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6888
b4e47472
JJ
6889 tree mem = build_simple_mem_ref (addr);
6890 TREE_TYPE (mem) = TREE_TYPE (loaded_val);
6891 TREE_OPERAND (mem, 1)
6892 = fold_convert (build_pointer_type_for_mode (TREE_TYPE (mem), ptr_mode,
6893 true),
6894 TREE_OPERAND (mem, 1));
6895 stmt = gimple_build_assign (loaded_val, mem);
629b3d75
MJ
6896 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6897 gsi_remove (&si, true);
6898
65f4b875 6899 si = gsi_last_nondebug_bb (store_bb);
629b3d75
MJ
6900 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6901
b4e47472 6902 stmt = gimple_build_assign (unshare_expr (mem), stored_val);
629b3d75
MJ
6903 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6904
6905 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6906 t = build_call_expr (t, 0);
6907 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6908 gsi_remove (&si, true);
6909
6910 if (gimple_in_ssa_p (cfun))
6911 update_ssa (TODO_update_ssa_no_phi);
6912 return true;
6913}
6914
6915/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 6916 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
6917 call expand_omp_atomic_pipeline, and if it fails too, the
6918 ultimate fallback is wrapping the operation in a mutex
6919 (expand_omp_atomic_mutex). REGION is the atomic region built
6920 by build_omp_regions_1(). */
6921
6922static void
6923expand_omp_atomic (struct omp_region *region)
6924{
6925 basic_block load_bb = region->entry, store_bb = region->exit;
6926 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6927 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6928 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6929 tree addr = gimple_omp_atomic_load_rhs (load);
6930 tree stored_val = gimple_omp_atomic_store_val (store);
b4e47472 6931 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
629b3d75
MJ
6932 HOST_WIDE_INT index;
6933
6934 /* Make sure the type is one of the supported sizes. */
6935 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6936 index = exact_log2 (index);
6937 if (index >= 0 && index <= 4)
6938 {
6939 unsigned int align = TYPE_ALIGN_UNIT (type);
6940
6941 /* __sync builtins require strict data alignment. */
6942 if (exact_log2 (align) >= index)
6943 {
6944 /* Atomic load. */
3bd8f481 6945 scalar_mode smode;
629b3d75 6946 if (loaded_val == stored_val
3bd8f481
RS
6947 && (is_int_mode (TYPE_MODE (type), &smode)
6948 || is_float_mode (TYPE_MODE (type), &smode))
6949 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
6950 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6951 return;
6952
6953 /* Atomic store. */
3bd8f481
RS
6954 if ((is_int_mode (TYPE_MODE (type), &smode)
6955 || is_float_mode (TYPE_MODE (type), &smode))
6956 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
629b3d75
MJ
6957 && store_bb == single_succ (load_bb)
6958 && first_stmt (store_bb) == store
6959 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6960 stored_val, index))
6961 return;
6962
6963 /* When possible, use specialized atomic update functions. */
6964 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6965 && store_bb == single_succ (load_bb)
6966 && expand_omp_atomic_fetch_op (load_bb, addr,
6967 loaded_val, stored_val, index))
6968 return;
6969
6970 /* If we don't have specialized __sync builtins, try and implement
6971 as a compare and swap loop. */
6972 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6973 loaded_val, stored_val, index))
6974 return;
6975 }
6976 }
6977
6978 /* The ultimate fallback is wrapping the operation in a mutex. */
6979 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6980}
6981
6982/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6983 at REGION_EXIT. */
6984
6985static void
6986mark_loops_in_oacc_kernels_region (basic_block region_entry,
6987 basic_block region_exit)
6988{
6989 struct loop *outer = region_entry->loop_father;
6990 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6991
6992 /* Don't parallelize the kernels region if it contains more than one outer
6993 loop. */
6994 unsigned int nr_outer_loops = 0;
6995 struct loop *single_outer = NULL;
6996 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6997 {
6998 gcc_assert (loop_outer (loop) == outer);
6999
7000 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
7001 continue;
7002
7003 if (region_exit != NULL
7004 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
7005 continue;
7006
7007 nr_outer_loops++;
7008 single_outer = loop;
7009 }
7010 if (nr_outer_loops != 1)
7011 return;
7012
01914336
MJ
7013 for (struct loop *loop = single_outer->inner;
7014 loop != NULL;
7015 loop = loop->inner)
629b3d75
MJ
7016 if (loop->next)
7017 return;
7018
7019 /* Mark the loops in the region. */
7020 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
7021 loop->in_oacc_kernels_region = true;
7022}
7023
7024/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
7025
7026struct GTY(()) grid_launch_attributes_trees
7027{
7028 tree kernel_dim_array_type;
7029 tree kernel_lattrs_dimnum_decl;
7030 tree kernel_lattrs_grid_decl;
7031 tree kernel_lattrs_group_decl;
7032 tree kernel_launch_attributes_type;
7033};
7034
7035static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
7036
7037/* Create types used to pass kernel launch attributes to target. */
7038
7039static void
7040grid_create_kernel_launch_attr_types (void)
7041{
7042 if (grid_attr_trees)
7043 return;
7044 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
7045
7046 tree dim_arr_index_type
7047 = build_index_type (build_int_cst (integer_type_node, 2));
7048 grid_attr_trees->kernel_dim_array_type
7049 = build_array_type (uint32_type_node, dim_arr_index_type);
7050
7051 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
7052 grid_attr_trees->kernel_lattrs_dimnum_decl
7053 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
7054 uint32_type_node);
7055 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
7056
7057 grid_attr_trees->kernel_lattrs_grid_decl
7058 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
7059 grid_attr_trees->kernel_dim_array_type);
7060 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
7061 = grid_attr_trees->kernel_lattrs_dimnum_decl;
7062 grid_attr_trees->kernel_lattrs_group_decl
7063 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
7064 grid_attr_trees->kernel_dim_array_type);
7065 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
7066 = grid_attr_trees->kernel_lattrs_grid_decl;
7067 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
7068 "__gomp_kernel_launch_attributes",
7069 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
7070}
7071
7072/* Insert before the current statement in GSI a store of VALUE to INDEX of
7073 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
7074 of type uint32_type_node. */
7075
7076static void
7077grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
7078 tree fld_decl, int index, tree value)
7079{
7080 tree ref = build4 (ARRAY_REF, uint32_type_node,
7081 build3 (COMPONENT_REF,
7082 grid_attr_trees->kernel_dim_array_type,
7083 range_var, fld_decl, NULL_TREE),
7084 build_int_cst (integer_type_node, index),
7085 NULL_TREE, NULL_TREE);
7086 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
7087}
7088
7089/* Return a tree representation of a pointer to a structure with grid and
7090 work-group size information. Statements filling that information will be
7091 inserted before GSI, TGT_STMT is the target statement which has the
7092 necessary information in it. */
7093
7094static tree
7095grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
7096 gomp_target *tgt_stmt)
7097{
7098 grid_create_kernel_launch_attr_types ();
7099 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
7100 "__kernel_launch_attrs");
7101
7102 unsigned max_dim = 0;
7103 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
7104 clause;
7105 clause = OMP_CLAUSE_CHAIN (clause))
7106 {
7107 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
7108 continue;
7109
7110 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
7111 max_dim = MAX (dim, max_dim);
7112
7113 grid_insert_store_range_dim (gsi, lattrs,
7114 grid_attr_trees->kernel_lattrs_grid_decl,
7115 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
7116 grid_insert_store_range_dim (gsi, lattrs,
7117 grid_attr_trees->kernel_lattrs_group_decl,
7118 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
7119 }
7120
7121 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
7122 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
7123 gcc_checking_assert (max_dim <= 2);
7124 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
7125 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
7126 GSI_SAME_STMT);
7127 TREE_ADDRESSABLE (lattrs) = 1;
7128 return build_fold_addr_expr (lattrs);
7129}
7130
7131/* Build target argument identifier from the DEVICE identifier, value
7132 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
7133
7134static tree
7135get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
7136{
7137 tree t = build_int_cst (integer_type_node, device);
7138 if (subseqent_param)
7139 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7140 build_int_cst (integer_type_node,
7141 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
7142 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7143 build_int_cst (integer_type_node, id));
7144 return t;
7145}
7146
7147/* Like above but return it in type that can be directly stored as an element
7148 of the argument array. */
7149
7150static tree
7151get_target_argument_identifier (int device, bool subseqent_param, int id)
7152{
7153 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
7154 return fold_convert (ptr_type_node, t);
7155}
7156
7157/* Return a target argument consisting of DEVICE identifier, value identifier
7158 ID, and the actual VALUE. */
7159
7160static tree
7161get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
7162 tree value)
7163{
7164 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
7165 fold_convert (integer_type_node, value),
7166 build_int_cst (unsigned_type_node,
7167 GOMP_TARGET_ARG_VALUE_SHIFT));
7168 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
7169 get_target_argument_identifier_1 (device, false, id));
7170 t = fold_convert (ptr_type_node, t);
7171 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
7172}
7173
7174/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
7175 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
7176 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
7177 arguments. */
7178
7179static void
7180push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
7181 int id, tree value, vec <tree> *args)
7182{
7183 if (tree_fits_shwi_p (value)
7184 && tree_to_shwi (value) > -(1 << 15)
7185 && tree_to_shwi (value) < (1 << 15))
7186 args->quick_push (get_target_argument_value (gsi, device, id, value));
7187 else
7188 {
7189 args->quick_push (get_target_argument_identifier (device, true, id));
7190 value = fold_convert (ptr_type_node, value);
7191 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
7192 GSI_SAME_STMT);
7193 args->quick_push (value);
7194 }
7195}
7196
01914336 7197/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
7198
7199static tree
7200get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7201{
7202 auto_vec <tree, 6> args;
7203 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7204 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7205 if (c)
7206 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7207 else
7208 t = integer_minus_one_node;
7209 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7210 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7211
7212 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7213 if (c)
7214 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7215 else
7216 t = integer_minus_one_node;
7217 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7218 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7219 &args);
7220
7221 /* Add HSA-specific grid sizes, if available. */
7222 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7223 OMP_CLAUSE__GRIDDIM_))
7224 {
01914336
MJ
7225 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7226 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
7227 args.quick_push (t);
7228 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7229 }
7230
7231 /* Produce more, perhaps device specific, arguments here. */
7232
7233 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7234 args.length () + 1),
7235 ".omp_target_args");
7236 for (unsigned i = 0; i < args.length (); i++)
7237 {
7238 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7239 build_int_cst (integer_type_node, i),
7240 NULL_TREE, NULL_TREE);
7241 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7242 GSI_SAME_STMT);
7243 }
7244 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7245 build_int_cst (integer_type_node, args.length ()),
7246 NULL_TREE, NULL_TREE);
7247 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7248 GSI_SAME_STMT);
7249 TREE_ADDRESSABLE (argarray) = 1;
7250 return build_fold_addr_expr (argarray);
7251}
7252
7253/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7254
7255static void
7256expand_omp_target (struct omp_region *region)
7257{
7258 basic_block entry_bb, exit_bb, new_bb;
7259 struct function *child_cfun;
7260 tree child_fn, block, t;
7261 gimple_stmt_iterator gsi;
7262 gomp_target *entry_stmt;
7263 gimple *stmt;
7264 edge e;
7265 bool offloaded, data_region;
7266
7267 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7268 new_bb = region->entry;
7269
7270 offloaded = is_gimple_omp_offloaded (entry_stmt);
7271 switch (gimple_omp_target_kind (entry_stmt))
7272 {
7273 case GF_OMP_TARGET_KIND_REGION:
7274 case GF_OMP_TARGET_KIND_UPDATE:
7275 case GF_OMP_TARGET_KIND_ENTER_DATA:
7276 case GF_OMP_TARGET_KIND_EXIT_DATA:
7277 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7278 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7279 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7280 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7281 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7282 data_region = false;
7283 break;
7284 case GF_OMP_TARGET_KIND_DATA:
7285 case GF_OMP_TARGET_KIND_OACC_DATA:
7286 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7287 data_region = true;
7288 break;
7289 default:
7290 gcc_unreachable ();
7291 }
7292
7293 child_fn = NULL_TREE;
7294 child_cfun = NULL;
7295 if (offloaded)
7296 {
7297 child_fn = gimple_omp_target_child_fn (entry_stmt);
7298 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7299 }
7300
7301 /* Supported by expand_omp_taskreg, but not here. */
7302 if (child_cfun != NULL)
7303 gcc_checking_assert (!child_cfun->cfg);
7304 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7305
7306 entry_bb = region->entry;
7307 exit_bb = region->exit;
7308
7309 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
25651634
TS
7310 {
7311 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7312
7313 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7314 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7315 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7316 DECL_ATTRIBUTES (child_fn)
7317 = tree_cons (get_identifier ("oacc kernels"),
7318 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7319 }
629b3d75
MJ
7320
7321 if (offloaded)
7322 {
7323 unsigned srcidx, dstidx, num;
7324
7325 /* If the offloading region needs data sent from the parent
7326 function, then the very first statement (except possible
7327 tree profile counter updates) of the offloading body
7328 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7329 &.OMP_DATA_O is passed as an argument to the child function,
7330 we need to replace it with the argument as seen by the child
7331 function.
7332
7333 In most cases, this will end up being the identity assignment
7334 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7335 a function call that has been inlined, the original PARM_DECL
7336 .OMP_DATA_I may have been converted into a different local
7337 variable. In which case, we need to keep the assignment. */
7338 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7339 if (data_arg)
7340 {
7341 basic_block entry_succ_bb = single_succ (entry_bb);
7342 gimple_stmt_iterator gsi;
7343 tree arg;
7344 gimple *tgtcopy_stmt = NULL;
7345 tree sender = TREE_VEC_ELT (data_arg, 0);
7346
7347 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7348 {
7349 gcc_assert (!gsi_end_p (gsi));
7350 stmt = gsi_stmt (gsi);
7351 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7352 continue;
7353
7354 if (gimple_num_ops (stmt) == 2)
7355 {
7356 tree arg = gimple_assign_rhs1 (stmt);
7357
7358 /* We're ignoring the subcode because we're
7359 effectively doing a STRIP_NOPS. */
7360
7361 if (TREE_CODE (arg) == ADDR_EXPR
7362 && TREE_OPERAND (arg, 0) == sender)
7363 {
7364 tgtcopy_stmt = stmt;
7365 break;
7366 }
7367 }
7368 }
7369
7370 gcc_assert (tgtcopy_stmt != NULL);
7371 arg = DECL_ARGUMENTS (child_fn);
7372
7373 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7374 gsi_remove (&gsi, true);
7375 }
7376
7377 /* Declare local variables needed in CHILD_CFUN. */
7378 block = DECL_INITIAL (child_fn);
7379 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7380 /* The gimplifier could record temporaries in the offloading block
7381 rather than in containing function's local_decls chain,
7382 which would mean cgraph missed finalizing them. Do it now. */
7383 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7384 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7385 varpool_node::finalize_decl (t);
7386 DECL_SAVED_TREE (child_fn) = NULL;
7387 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7388 gimple_set_body (child_fn, NULL);
7389 TREE_USED (block) = 1;
7390
7391 /* Reset DECL_CONTEXT on function arguments. */
7392 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7393 DECL_CONTEXT (t) = child_fn;
7394
7395 /* Split ENTRY_BB at GIMPLE_*,
7396 so that it can be moved to the child function. */
65f4b875 7397 gsi = gsi_last_nondebug_bb (entry_bb);
629b3d75
MJ
7398 stmt = gsi_stmt (gsi);
7399 gcc_assert (stmt
7400 && gimple_code (stmt) == gimple_code (entry_stmt));
7401 e = split_block (entry_bb, stmt);
7402 gsi_remove (&gsi, true);
7403 entry_bb = e->dest;
7404 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7405
7406 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7407 if (exit_bb)
7408 {
65f4b875 7409 gsi = gsi_last_nondebug_bb (exit_bb);
629b3d75
MJ
7410 gcc_assert (!gsi_end_p (gsi)
7411 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7412 stmt = gimple_build_return (NULL);
7413 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7414 gsi_remove (&gsi, true);
7415 }
7416
7417 /* Move the offloading region into CHILD_CFUN. */
7418
7419 block = gimple_block (entry_stmt);
7420
7421 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7422 if (exit_bb)
7423 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7424 /* When the OMP expansion process cannot guarantee an up-to-date
7425 loop tree arrange for the child function to fixup loops. */
7426 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7427 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7428
7429 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7430 num = vec_safe_length (child_cfun->local_decls);
7431 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7432 {
7433 t = (*child_cfun->local_decls)[srcidx];
7434 if (DECL_CONTEXT (t) == cfun->decl)
7435 continue;
7436 if (srcidx != dstidx)
7437 (*child_cfun->local_decls)[dstidx] = t;
7438 dstidx++;
7439 }
7440 if (dstidx != num)
7441 vec_safe_truncate (child_cfun->local_decls, dstidx);
7442
7443 /* Inform the callgraph about the new function. */
7444 child_cfun->curr_properties = cfun->curr_properties;
7445 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7446 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7447 cgraph_node *node = cgraph_node::get_create (child_fn);
7448 node->parallelized_function = 1;
7449 cgraph_node::add_new_function (child_fn, true);
7450
7451 /* Add the new function to the offload table. */
7452 if (ENABLE_OFFLOADING)
60bf575c
TV
7453 {
7454 if (in_lto_p)
7455 DECL_PRESERVE_P (child_fn) = 1;
7456 vec_safe_push (offload_funcs, child_fn);
7457 }
629b3d75
MJ
7458
7459 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7460 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7461
7462 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7463 fixed in a following pass. */
7464 push_cfun (child_cfun);
7465 if (need_asm)
9579db35 7466 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7467 cgraph_edge::rebuild_edges ();
7468
7469 /* Some EH regions might become dead, see PR34608. If
7470 pass_cleanup_cfg isn't the first pass to happen with the
7471 new child, these dead EH edges might cause problems.
7472 Clean them up now. */
7473 if (flag_exceptions)
7474 {
7475 basic_block bb;
7476 bool changed = false;
7477
7478 FOR_EACH_BB_FN (bb, cfun)
7479 changed |= gimple_purge_dead_eh_edges (bb);
7480 if (changed)
7481 cleanup_tree_cfg ();
7482 }
7483 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7484 verify_loop_structure ();
7485 pop_cfun ();
7486
7487 if (dump_file && !gimple_in_ssa_p (cfun))
7488 {
7489 omp_any_child_fn_dumped = true;
7490 dump_function_header (dump_file, child_fn, dump_flags);
7491 dump_function_to_file (child_fn, dump_file, dump_flags);
7492 }
4ccc4e30
JJ
7493
7494 adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
629b3d75
MJ
7495 }
7496
7497 /* Emit a library call to launch the offloading region, or do data
7498 transfers. */
7499 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7500 enum built_in_function start_ix;
7501 location_t clause_loc;
7502 unsigned int flags_i = 0;
629b3d75
MJ
7503
7504 switch (gimple_omp_target_kind (entry_stmt))
7505 {
7506 case GF_OMP_TARGET_KIND_REGION:
7507 start_ix = BUILT_IN_GOMP_TARGET;
7508 break;
7509 case GF_OMP_TARGET_KIND_DATA:
7510 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7511 break;
7512 case GF_OMP_TARGET_KIND_UPDATE:
7513 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7514 break;
7515 case GF_OMP_TARGET_KIND_ENTER_DATA:
7516 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7517 break;
7518 case GF_OMP_TARGET_KIND_EXIT_DATA:
7519 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7520 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7521 break;
7522 case GF_OMP_TARGET_KIND_OACC_KERNELS:
629b3d75
MJ
7523 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7524 start_ix = BUILT_IN_GOACC_PARALLEL;
7525 break;
7526 case GF_OMP_TARGET_KIND_OACC_DATA:
7527 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7528 start_ix = BUILT_IN_GOACC_DATA_START;
7529 break;
7530 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7531 start_ix = BUILT_IN_GOACC_UPDATE;
7532 break;
7533 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7534 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7535 break;
7536 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7537 start_ix = BUILT_IN_GOACC_DECLARE;
7538 break;
7539 default:
7540 gcc_unreachable ();
7541 }
7542
7543 clauses = gimple_omp_target_clauses (entry_stmt);
7544
7545 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7546 library choose) and there is no conditional. */
7547 cond = NULL_TREE;
7548 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7549
7550 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7551 if (c)
7552 cond = OMP_CLAUSE_IF_EXPR (c);
7553
7554 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7555 if (c)
7556 {
7557 /* Even if we pass it to all library function calls, it is currently only
7558 defined/used for the OpenMP target ones. */
7559 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7560 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7561 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7562 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7563
7564 device = OMP_CLAUSE_DEVICE_ID (c);
7565 clause_loc = OMP_CLAUSE_LOCATION (c);
7566 }
7567 else
7568 clause_loc = gimple_location (entry_stmt);
7569
7570 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7571 if (c)
7572 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7573
7574 /* Ensure 'device' is of the correct type. */
7575 device = fold_convert_loc (clause_loc, integer_type_node, device);
7576
7577 /* If we found the clause 'if (cond)', build
7578 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7579 if (cond)
7580 {
7581 cond = gimple_boolify (cond);
7582
7583 basic_block cond_bb, then_bb, else_bb;
7584 edge e;
7585 tree tmp_var;
7586
7587 tmp_var = create_tmp_var (TREE_TYPE (device));
7588 if (offloaded)
7589 e = split_block_after_labels (new_bb);
7590 else
7591 {
65f4b875 7592 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7593 gsi_prev (&gsi);
7594 e = split_block (new_bb, gsi_stmt (gsi));
7595 }
7596 cond_bb = e->src;
7597 new_bb = e->dest;
7598 remove_edge (e);
7599
7600 then_bb = create_empty_bb (cond_bb);
7601 else_bb = create_empty_bb (then_bb);
7602 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7603 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7604
7605 stmt = gimple_build_cond_empty (cond);
7606 gsi = gsi_last_bb (cond_bb);
7607 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7608
7609 gsi = gsi_start_bb (then_bb);
7610 stmt = gimple_build_assign (tmp_var, device);
7611 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7612
7613 gsi = gsi_start_bb (else_bb);
7614 stmt = gimple_build_assign (tmp_var,
7615 build_int_cst (integer_type_node,
7616 GOMP_DEVICE_HOST_FALLBACK));
7617 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7618
7619 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7620 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7621 add_bb_to_loop (then_bb, cond_bb->loop_father);
7622 add_bb_to_loop (else_bb, cond_bb->loop_father);
7623 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7624 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7625
7626 device = tmp_var;
65f4b875 7627 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7628 }
7629 else
7630 {
65f4b875 7631 gsi = gsi_last_nondebug_bb (new_bb);
629b3d75
MJ
7632 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7633 true, GSI_SAME_STMT);
7634 }
7635
7636 t = gimple_omp_target_data_arg (entry_stmt);
7637 if (t == NULL)
7638 {
7639 t1 = size_zero_node;
7640 t2 = build_zero_cst (ptr_type_node);
7641 t3 = t2;
7642 t4 = t2;
7643 }
7644 else
7645 {
7646 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7647 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7648 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7649 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7650 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7651 }
7652
7653 gimple *g;
7654 bool tagging = false;
7655 /* The maximum number used by any start_ix, without varargs. */
7656 auto_vec<tree, 11> args;
7657 args.quick_push (device);
7658 if (offloaded)
7659 args.quick_push (build_fold_addr_expr (child_fn));
7660 args.quick_push (t1);
7661 args.quick_push (t2);
7662 args.quick_push (t3);
7663 args.quick_push (t4);
7664 switch (start_ix)
7665 {
7666 case BUILT_IN_GOACC_DATA_START:
7667 case BUILT_IN_GOACC_DECLARE:
7668 case BUILT_IN_GOMP_TARGET_DATA:
7669 break;
7670 case BUILT_IN_GOMP_TARGET:
7671 case BUILT_IN_GOMP_TARGET_UPDATE:
7672 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7673 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7674 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7675 if (c)
7676 depend = OMP_CLAUSE_DECL (c);
7677 else
7678 depend = build_int_cst (ptr_type_node, 0);
7679 args.quick_push (depend);
7680 if (start_ix == BUILT_IN_GOMP_TARGET)
7681 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7682 break;
7683 case BUILT_IN_GOACC_PARALLEL:
25651634
TS
7684 oacc_set_fn_attrib (child_fn, clauses, &args);
7685 tagging = true;
629b3d75
MJ
7686 /* FALLTHRU */
7687 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7688 case BUILT_IN_GOACC_UPDATE:
7689 {
7690 tree t_async = NULL_TREE;
7691
7692 /* If present, use the value specified by the respective
7693 clause, making sure that is of the correct type. */
7694 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7695 if (c)
7696 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7697 integer_type_node,
7698 OMP_CLAUSE_ASYNC_EXPR (c));
7699 else if (!tagging)
7700 /* Default values for t_async. */
7701 t_async = fold_convert_loc (gimple_location (entry_stmt),
7702 integer_type_node,
7703 build_int_cst (integer_type_node,
7704 GOMP_ASYNC_SYNC));
7705 if (tagging && t_async)
7706 {
7707 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7708
7709 if (TREE_CODE (t_async) == INTEGER_CST)
7710 {
7711 /* See if we can pack the async arg in to the tag's
7712 operand. */
7713 i_async = TREE_INT_CST_LOW (t_async);
7714 if (i_async < GOMP_LAUNCH_OP_MAX)
7715 t_async = NULL_TREE;
7716 else
7717 i_async = GOMP_LAUNCH_OP_MAX;
7718 }
7719 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7720 i_async));
7721 }
7722 if (t_async)
7723 args.safe_push (t_async);
7724
7725 /* Save the argument index, and ... */
7726 unsigned t_wait_idx = args.length ();
7727 unsigned num_waits = 0;
7728 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7729 if (!tagging || c)
7730 /* ... push a placeholder. */
7731 args.safe_push (integer_zero_node);
7732
7733 for (; c; c = OMP_CLAUSE_CHAIN (c))
7734 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7735 {
7736 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7737 integer_type_node,
7738 OMP_CLAUSE_WAIT_EXPR (c)));
7739 num_waits++;
7740 }
7741
7742 if (!tagging || num_waits)
7743 {
7744 tree len;
7745
7746 /* Now that we know the number, update the placeholder. */
7747 if (tagging)
7748 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7749 else
7750 len = build_int_cst (integer_type_node, num_waits);
7751 len = fold_convert_loc (gimple_location (entry_stmt),
7752 unsigned_type_node, len);
7753 args[t_wait_idx] = len;
7754 }
7755 }
7756 break;
7757 default:
7758 gcc_unreachable ();
7759 }
7760 if (tagging)
7761 /* Push terminal marker - zero. */
7762 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7763
7764 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7765 gimple_set_location (g, gimple_location (entry_stmt));
7766 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7767 if (!offloaded)
7768 {
7769 g = gsi_stmt (gsi);
7770 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7771 gsi_remove (&gsi, true);
7772 }
7773 if (data_region && region->exit)
7774 {
65f4b875 7775 gsi = gsi_last_nondebug_bb (region->exit);
629b3d75
MJ
7776 g = gsi_stmt (gsi);
7777 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7778 gsi_remove (&gsi, true);
7779 }
7780}
7781
7782/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7783 iteration variable derived from the thread number. INTRA_GROUP means this
7784 is an expansion of a loop iterating over work-items within a separate
01914336 7785 iteration over groups. */
629b3d75
MJ
7786
7787static void
7788grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7789{
7790 gimple_stmt_iterator gsi;
7791 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7792 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7793 == GF_OMP_FOR_KIND_GRID_LOOP);
7794 size_t collapse = gimple_omp_for_collapse (for_stmt);
7795 struct omp_for_data_loop *loops
7796 = XALLOCAVEC (struct omp_for_data_loop,
01914336 7797 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
7798 struct omp_for_data fd;
7799
7800 remove_edge (BRANCH_EDGE (kfor->entry));
7801 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7802
7803 gcc_assert (kfor->cont);
7804 omp_extract_for_data (for_stmt, &fd, loops);
7805
7806 gsi = gsi_start_bb (body_bb);
7807
7808 for (size_t dim = 0; dim < collapse; dim++)
7809 {
7810 tree type, itype;
7811 itype = type = TREE_TYPE (fd.loops[dim].v);
7812 if (POINTER_TYPE_P (type))
7813 itype = signed_type_for (type);
7814
7815 tree n1 = fd.loops[dim].n1;
7816 tree step = fd.loops[dim].step;
7817 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7818 true, NULL_TREE, true, GSI_SAME_STMT);
7819 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7820 true, NULL_TREE, true, GSI_SAME_STMT);
7821 tree threadid;
7822 if (gimple_omp_for_grid_group_iter (for_stmt))
7823 {
7824 gcc_checking_assert (!intra_group);
7825 threadid = build_call_expr (builtin_decl_explicit
7826 (BUILT_IN_HSA_WORKGROUPID), 1,
7827 build_int_cstu (unsigned_type_node, dim));
7828 }
7829 else if (intra_group)
7830 threadid = build_call_expr (builtin_decl_explicit
7831 (BUILT_IN_HSA_WORKITEMID), 1,
7832 build_int_cstu (unsigned_type_node, dim));
7833 else
7834 threadid = build_call_expr (builtin_decl_explicit
7835 (BUILT_IN_HSA_WORKITEMABSID), 1,
7836 build_int_cstu (unsigned_type_node, dim));
7837 threadid = fold_convert (itype, threadid);
7838 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7839 true, GSI_SAME_STMT);
7840
7841 tree startvar = fd.loops[dim].v;
7842 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7843 if (POINTER_TYPE_P (type))
7844 t = fold_build_pointer_plus (n1, t);
7845 else
7846 t = fold_build2 (PLUS_EXPR, type, t, n1);
7847 t = fold_convert (type, t);
7848 t = force_gimple_operand_gsi (&gsi, t,
7849 DECL_P (startvar)
7850 && TREE_ADDRESSABLE (startvar),
7851 NULL_TREE, true, GSI_SAME_STMT);
7852 gassign *assign_stmt = gimple_build_assign (startvar, t);
7853 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7854 }
01914336 7855 /* Remove the omp for statement. */
65f4b875 7856 gsi = gsi_last_nondebug_bb (kfor->entry);
629b3d75
MJ
7857 gsi_remove (&gsi, true);
7858
7859 /* Remove the GIMPLE_OMP_CONTINUE statement. */
65f4b875 7860 gsi = gsi_last_nondebug_bb (kfor->cont);
629b3d75
MJ
7861 gcc_assert (!gsi_end_p (gsi)
7862 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7863 gsi_remove (&gsi, true);
7864
7865 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
65f4b875 7866 gsi = gsi_last_nondebug_bb (kfor->exit);
629b3d75
MJ
7867 gcc_assert (!gsi_end_p (gsi)
7868 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7869 if (intra_group)
7870 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7871 gsi_remove (&gsi, true);
7872
7873 /* Fixup the much simpler CFG. */
7874 remove_edge (find_edge (kfor->cont, body_bb));
7875
7876 if (kfor->cont != body_bb)
7877 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7878 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7879}
7880
7881/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7882 argument_decls. */
7883
7884struct grid_arg_decl_map
7885{
7886 tree old_arg;
7887 tree new_arg;
7888};
7889
7890/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7891 pertaining to kernel function. */
7892
7893static tree
7894grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7895{
7896 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7897 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7898 tree t = *tp;
7899
7900 if (t == adm->old_arg)
7901 *tp = adm->new_arg;
7902 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7903 return NULL_TREE;
7904}
7905
7906/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 7907 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
7908
7909static void
7910grid_expand_target_grid_body (struct omp_region *target)
7911{
7912 if (!hsa_gen_requested_p ())
7913 return;
7914
7915 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7916 struct omp_region **pp;
7917
7918 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7919 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7920 break;
7921
7922 struct omp_region *gpukernel = *pp;
7923
7924 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7925 if (!gpukernel)
7926 {
7927 /* HSA cannot handle OACC stuff. */
7928 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7929 return;
7930 gcc_checking_assert (orig_child_fndecl);
7931 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7932 OMP_CLAUSE__GRIDDIM_));
7933 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7934
7935 hsa_register_kernel (n);
7936 return;
7937 }
7938
7939 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7940 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
7941 tree inside_block
7942 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
7943 *pp = gpukernel->next;
7944 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7945 if ((*pp)->type == GIMPLE_OMP_FOR)
7946 break;
7947
7948 struct omp_region *kfor = *pp;
7949 gcc_assert (kfor);
7950 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7951 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7952 *pp = kfor->next;
7953 if (kfor->inner)
7954 {
7955 if (gimple_omp_for_grid_group_iter (for_stmt))
7956 {
7957 struct omp_region **next_pp;
7958 for (pp = &kfor->inner; *pp; pp = next_pp)
7959 {
7960 next_pp = &(*pp)->next;
7961 if ((*pp)->type != GIMPLE_OMP_FOR)
7962 continue;
7963 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7964 gcc_assert (gimple_omp_for_kind (inner)
7965 == GF_OMP_FOR_KIND_GRID_LOOP);
7966 grid_expand_omp_for_loop (*pp, true);
7967 *pp = (*pp)->next;
7968 next_pp = pp;
7969 }
7970 }
7971 expand_omp (kfor->inner);
7972 }
7973 if (gpukernel->inner)
7974 expand_omp (gpukernel->inner);
7975
7976 tree kern_fndecl = copy_node (orig_child_fndecl);
7958186b
MP
7977 DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
7978 "kernel");
629b3d75
MJ
7979 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7980 tree tgtblock = gimple_block (tgt_stmt);
7981 tree fniniblock = make_node (BLOCK);
dc16b007 7982 BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
629b3d75
MJ
7983 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7984 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7985 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7986 DECL_INITIAL (kern_fndecl) = fniniblock;
7987 push_struct_function (kern_fndecl);
7988 cfun->function_end_locus = gimple_location (tgt_stmt);
7989 init_tree_ssa (cfun);
7990 pop_cfun ();
7991
7992 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7993 gcc_assert (!DECL_CHAIN (old_parm_decl));
7994 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7995 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7996 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7997 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7998 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7999 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
8000 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
8001 kern_cfun->curr_properties = cfun->curr_properties;
8002
8003 grid_expand_omp_for_loop (kfor, false);
8004
01914336 8005 /* Remove the omp for statement. */
65f4b875 8006 gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
629b3d75
MJ
8007 gsi_remove (&gsi, true);
8008 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
8009 return. */
65f4b875 8010 gsi = gsi_last_nondebug_bb (gpukernel->exit);
629b3d75
MJ
8011 gcc_assert (!gsi_end_p (gsi)
8012 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
8013 gimple *ret_stmt = gimple_build_return (NULL);
8014 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
8015 gsi_remove (&gsi, true);
8016
8017 /* Statements in the first BB in the target construct have been produced by
8018 target lowering and must be copied inside the GPUKERNEL, with the two
8019 exceptions of the first OMP statement and the OMP_DATA assignment
8020 statement. */
8021 gsi = gsi_start_bb (single_succ (gpukernel->entry));
8022 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
8023 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
8024 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
8025 !gsi_end_p (tsi); gsi_next (&tsi))
8026 {
8027 gimple *stmt = gsi_stmt (tsi);
8028 if (is_gimple_omp (stmt))
8029 break;
8030 if (sender
8031 && is_gimple_assign (stmt)
8032 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
8033 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
8034 continue;
8035 gimple *copy = gimple_copy (stmt);
8036 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
8037 gimple_set_block (copy, fniniblock);
8038 }
8039
8040 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
8041 gpukernel->exit, inside_block);
8042
8043 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
8044 kcn->mark_force_output ();
8045 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
8046
8047 hsa_register_kernel (kcn, orig_child);
8048
8049 cgraph_node::add_new_function (kern_fndecl, true);
8050 push_cfun (kern_cfun);
8051 cgraph_edge::rebuild_edges ();
8052
8053 /* Re-map any mention of the PARM_DECL of the original function to the
8054 PARM_DECL of the new one.
8055
8056 TODO: It would be great if lowering produced references into the GPU
8057 kernel decl straight away and we did not have to do this. */
8058 struct grid_arg_decl_map adm;
8059 adm.old_arg = old_parm_decl;
8060 adm.new_arg = new_parm_decl;
8061 basic_block bb;
8062 FOR_EACH_BB_FN (bb, kern_cfun)
8063 {
8064 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
8065 {
8066 gimple *stmt = gsi_stmt (gsi);
8067 struct walk_stmt_info wi;
8068 memset (&wi, 0, sizeof (wi));
8069 wi.info = &adm;
8070 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
8071 }
8072 }
8073 pop_cfun ();
8074
8075 return;
8076}
8077
8078/* Expand the parallel region tree rooted at REGION. Expansion
8079 proceeds in depth-first order. Innermost regions are expanded
8080 first. This way, parallel regions that require a new function to
8081 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
8082 internal dependencies in their body. */
8083
8084static void
8085expand_omp (struct omp_region *region)
8086{
8087 omp_any_child_fn_dumped = false;
8088 while (region)
8089 {
8090 location_t saved_location;
8091 gimple *inner_stmt = NULL;
8092
8093 /* First, determine whether this is a combined parallel+workshare
01914336 8094 region. */
629b3d75
MJ
8095 if (region->type == GIMPLE_OMP_PARALLEL)
8096 determine_parallel_type (region);
8097 else if (region->type == GIMPLE_OMP_TARGET)
8098 grid_expand_target_grid_body (region);
8099
8100 if (region->type == GIMPLE_OMP_FOR
8101 && gimple_omp_for_combined_p (last_stmt (region->entry)))
8102 inner_stmt = last_stmt (region->inner->entry);
8103
8104 if (region->inner)
8105 expand_omp (region->inner);
8106
8107 saved_location = input_location;
8108 if (gimple_has_location (last_stmt (region->entry)))
8109 input_location = gimple_location (last_stmt (region->entry));
8110
8111 switch (region->type)
8112 {
8113 case GIMPLE_OMP_PARALLEL:
8114 case GIMPLE_OMP_TASK:
8115 expand_omp_taskreg (region);
8116 break;
8117
8118 case GIMPLE_OMP_FOR:
8119 expand_omp_for (region, inner_stmt);
8120 break;
8121
8122 case GIMPLE_OMP_SECTIONS:
8123 expand_omp_sections (region);
8124 break;
8125
8126 case GIMPLE_OMP_SECTION:
8127 /* Individual omp sections are handled together with their
8128 parent GIMPLE_OMP_SECTIONS region. */
8129 break;
8130
8131 case GIMPLE_OMP_SINGLE:
8132 expand_omp_single (region);
8133 break;
8134
8135 case GIMPLE_OMP_ORDERED:
8136 {
8137 gomp_ordered *ord_stmt
8138 = as_a <gomp_ordered *> (last_stmt (region->entry));
8139 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
8140 OMP_CLAUSE_DEPEND))
8141 {
8142 /* We'll expand these when expanding corresponding
8143 worksharing region with ordered(n) clause. */
8144 gcc_assert (region->outer
8145 && region->outer->type == GIMPLE_OMP_FOR);
8146 region->ord_stmt = ord_stmt;
8147 break;
8148 }
8149 }
8150 /* FALLTHRU */
8151 case GIMPLE_OMP_MASTER:
8152 case GIMPLE_OMP_TASKGROUP:
8153 case GIMPLE_OMP_CRITICAL:
8154 case GIMPLE_OMP_TEAMS:
8155 expand_omp_synch (region);
8156 break;
8157
8158 case GIMPLE_OMP_ATOMIC_LOAD:
8159 expand_omp_atomic (region);
8160 break;
8161
8162 case GIMPLE_OMP_TARGET:
8163 expand_omp_target (region);
8164 break;
8165
8166 default:
8167 gcc_unreachable ();
8168 }
8169
8170 input_location = saved_location;
8171 region = region->next;
8172 }
8173 if (omp_any_child_fn_dumped)
8174 {
8175 if (dump_file)
8176 dump_function_header (dump_file, current_function_decl, dump_flags);
8177 omp_any_child_fn_dumped = false;
8178 }
8179}
8180
8181/* Helper for build_omp_regions. Scan the dominator tree starting at
8182 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
8183 true, the function ends once a single tree is built (otherwise, whole
8184 forest of OMP constructs may be built). */
8185
8186static void
8187build_omp_regions_1 (basic_block bb, struct omp_region *parent,
8188 bool single_tree)
8189{
8190 gimple_stmt_iterator gsi;
8191 gimple *stmt;
8192 basic_block son;
8193
65f4b875 8194 gsi = gsi_last_nondebug_bb (bb);
629b3d75
MJ
8195 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8196 {
8197 struct omp_region *region;
8198 enum gimple_code code;
8199
8200 stmt = gsi_stmt (gsi);
8201 code = gimple_code (stmt);
8202 if (code == GIMPLE_OMP_RETURN)
8203 {
8204 /* STMT is the return point out of region PARENT. Mark it
8205 as the exit point and make PARENT the immediately
8206 enclosing region. */
8207 gcc_assert (parent);
8208 region = parent;
8209 region->exit = bb;
8210 parent = parent->outer;
8211 }
8212 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8213 {
5764ee3c 8214 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
629b3d75
MJ
8215 GIMPLE_OMP_RETURN, but matches with
8216 GIMPLE_OMP_ATOMIC_LOAD. */
8217 gcc_assert (parent);
8218 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8219 region = parent;
8220 region->exit = bb;
8221 parent = parent->outer;
8222 }
8223 else if (code == GIMPLE_OMP_CONTINUE)
8224 {
8225 gcc_assert (parent);
8226 parent->cont = bb;
8227 }
8228 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8229 {
8230 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8231 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8232 }
8233 else
8234 {
8235 region = new_omp_region (bb, code, parent);
8236 /* Otherwise... */
8237 if (code == GIMPLE_OMP_TARGET)
8238 {
8239 switch (gimple_omp_target_kind (stmt))
8240 {
8241 case GF_OMP_TARGET_KIND_REGION:
8242 case GF_OMP_TARGET_KIND_DATA:
8243 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8244 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8245 case GF_OMP_TARGET_KIND_OACC_DATA:
8246 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8247 break;
8248 case GF_OMP_TARGET_KIND_UPDATE:
8249 case GF_OMP_TARGET_KIND_ENTER_DATA:
8250 case GF_OMP_TARGET_KIND_EXIT_DATA:
8251 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8252 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8253 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8254 /* ..., other than for those stand-alone directives... */
8255 region = NULL;
8256 break;
8257 default:
8258 gcc_unreachable ();
8259 }
8260 }
8261 else if (code == GIMPLE_OMP_ORDERED
8262 && omp_find_clause (gimple_omp_ordered_clauses
8263 (as_a <gomp_ordered *> (stmt)),
8264 OMP_CLAUSE_DEPEND))
8265 /* #pragma omp ordered depend is also just a stand-alone
8266 directive. */
8267 region = NULL;
28567c40
JJ
8268 else if (code == GIMPLE_OMP_TASK
8269 && gimple_omp_task_taskwait_p (stmt))
8270 /* #pragma omp taskwait depend(...) is a stand-alone directive. */
8271 region = NULL;
629b3d75
MJ
8272 /* ..., this directive becomes the parent for a new region. */
8273 if (region)
8274 parent = region;
8275 }
8276 }
8277
8278 if (single_tree && !parent)
8279 return;
8280
8281 for (son = first_dom_son (CDI_DOMINATORS, bb);
8282 son;
8283 son = next_dom_son (CDI_DOMINATORS, son))
8284 build_omp_regions_1 (son, parent, single_tree);
8285}
8286
8287/* Builds the tree of OMP regions rooted at ROOT, storing it to
8288 root_omp_region. */
8289
8290static void
8291build_omp_regions_root (basic_block root)
8292{
8293 gcc_assert (root_omp_region == NULL);
8294 build_omp_regions_1 (root, NULL, true);
8295 gcc_assert (root_omp_region != NULL);
8296}
8297
8298/* Expands omp construct (and its subconstructs) starting in HEAD. */
8299
8300void
8301omp_expand_local (basic_block head)
8302{
8303 build_omp_regions_root (head);
8304 if (dump_file && (dump_flags & TDF_DETAILS))
8305 {
8306 fprintf (dump_file, "\nOMP region tree\n\n");
8307 dump_omp_region (dump_file, root_omp_region, 0);
8308 fprintf (dump_file, "\n");
8309 }
8310
8311 remove_exit_barriers (root_omp_region);
8312 expand_omp (root_omp_region);
8313
8314 omp_free_regions ();
8315}
8316
8317/* Scan the CFG and build a tree of OMP regions. Return the root of
8318 the OMP region tree. */
8319
8320static void
8321build_omp_regions (void)
8322{
8323 gcc_assert (root_omp_region == NULL);
8324 calculate_dominance_info (CDI_DOMINATORS);
8325 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8326}
8327
8328/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8329
8330static unsigned int
8331execute_expand_omp (void)
8332{
8333 build_omp_regions ();
8334
8335 if (!root_omp_region)
8336 return 0;
8337
8338 if (dump_file)
8339 {
8340 fprintf (dump_file, "\nOMP region tree\n\n");
8341 dump_omp_region (dump_file, root_omp_region, 0);
8342 fprintf (dump_file, "\n");
8343 }
8344
8345 remove_exit_barriers (root_omp_region);
8346
8347 expand_omp (root_omp_region);
8348
8349 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8350 verify_loop_structure ();
8351 cleanup_tree_cfg ();
8352
8353 omp_free_regions ();
8354
8355 return 0;
8356}
8357
8358/* OMP expansion -- the default pass, run before creation of SSA form. */
8359
8360namespace {
8361
8362const pass_data pass_data_expand_omp =
8363{
8364 GIMPLE_PASS, /* type */
8365 "ompexp", /* name */
fd2b8c8b 8366 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8367 TV_NONE, /* tv_id */
8368 PROP_gimple_any, /* properties_required */
8369 PROP_gimple_eomp, /* properties_provided */
8370 0, /* properties_destroyed */
8371 0, /* todo_flags_start */
8372 0, /* todo_flags_finish */
8373};
8374
8375class pass_expand_omp : public gimple_opt_pass
8376{
8377public:
8378 pass_expand_omp (gcc::context *ctxt)
8379 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8380 {}
8381
8382 /* opt_pass methods: */
8383 virtual unsigned int execute (function *)
8384 {
5e9d6aa4 8385 bool gate = ((flag_openacc != 0 || flag_openmp != 0
629b3d75
MJ
8386 || flag_openmp_simd != 0)
8387 && !seen_error ());
8388
8389 /* This pass always runs, to provide PROP_gimple_eomp.
8390 But often, there is nothing to do. */
8391 if (!gate)
8392 return 0;
8393
8394 return execute_expand_omp ();
8395 }
8396
8397}; // class pass_expand_omp
8398
8399} // anon namespace
8400
8401gimple_opt_pass *
8402make_pass_expand_omp (gcc::context *ctxt)
8403{
8404 return new pass_expand_omp (ctxt);
8405}
8406
8407namespace {
8408
8409const pass_data pass_data_expand_omp_ssa =
8410{
8411 GIMPLE_PASS, /* type */
8412 "ompexpssa", /* name */
fd2b8c8b 8413 OPTGROUP_OMP, /* optinfo_flags */
629b3d75
MJ
8414 TV_NONE, /* tv_id */
8415 PROP_cfg | PROP_ssa, /* properties_required */
8416 PROP_gimple_eomp, /* properties_provided */
8417 0, /* properties_destroyed */
8418 0, /* todo_flags_start */
8419 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8420};
8421
8422class pass_expand_omp_ssa : public gimple_opt_pass
8423{
8424public:
8425 pass_expand_omp_ssa (gcc::context *ctxt)
8426 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8427 {}
8428
8429 /* opt_pass methods: */
8430 virtual bool gate (function *fun)
8431 {
8432 return !(fun->curr_properties & PROP_gimple_eomp);
8433 }
8434 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8435 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8436
8437}; // class pass_expand_omp_ssa
8438
8439} // anon namespace
8440
8441gimple_opt_pass *
8442make_pass_expand_omp_ssa (gcc::context *ctxt)
8443{
8444 return new pass_expand_omp_ssa (ctxt);
8445}
8446
8447/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8448 GIMPLE_* codes. */
8449
8450bool
8451omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8452 int *region_idx)
8453{
8454 gimple *last = last_stmt (bb);
8455 enum gimple_code code = gimple_code (last);
8456 struct omp_region *cur_region = *region;
8457 bool fallthru = false;
8458
8459 switch (code)
8460 {
8461 case GIMPLE_OMP_PARALLEL:
629b3d75
MJ
8462 case GIMPLE_OMP_FOR:
8463 case GIMPLE_OMP_SINGLE:
8464 case GIMPLE_OMP_TEAMS:
8465 case GIMPLE_OMP_MASTER:
8466 case GIMPLE_OMP_TASKGROUP:
8467 case GIMPLE_OMP_CRITICAL:
8468 case GIMPLE_OMP_SECTION:
8469 case GIMPLE_OMP_GRID_BODY:
8470 cur_region = new_omp_region (bb, code, cur_region);
8471 fallthru = true;
8472 break;
8473
28567c40
JJ
8474 case GIMPLE_OMP_TASK:
8475 cur_region = new_omp_region (bb, code, cur_region);
8476 fallthru = true;
8477 if (gimple_omp_task_taskwait_p (last))
8478 cur_region = cur_region->outer;
8479 break;
8480
629b3d75
MJ
8481 case GIMPLE_OMP_ORDERED:
8482 cur_region = new_omp_region (bb, code, cur_region);
8483 fallthru = true;
8484 if (omp_find_clause (gimple_omp_ordered_clauses
8485 (as_a <gomp_ordered *> (last)),
8486 OMP_CLAUSE_DEPEND))
8487 cur_region = cur_region->outer;
8488 break;
8489
8490 case GIMPLE_OMP_TARGET:
8491 cur_region = new_omp_region (bb, code, cur_region);
8492 fallthru = true;
8493 switch (gimple_omp_target_kind (last))
8494 {
8495 case GF_OMP_TARGET_KIND_REGION:
8496 case GF_OMP_TARGET_KIND_DATA:
8497 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8498 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8499 case GF_OMP_TARGET_KIND_OACC_DATA:
8500 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8501 break;
8502 case GF_OMP_TARGET_KIND_UPDATE:
8503 case GF_OMP_TARGET_KIND_ENTER_DATA:
8504 case GF_OMP_TARGET_KIND_EXIT_DATA:
8505 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8506 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8507 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8508 cur_region = cur_region->outer;
8509 break;
8510 default:
8511 gcc_unreachable ();
8512 }
8513 break;
8514
8515 case GIMPLE_OMP_SECTIONS:
8516 cur_region = new_omp_region (bb, code, cur_region);
8517 fallthru = true;
8518 break;
8519
8520 case GIMPLE_OMP_SECTIONS_SWITCH:
8521 fallthru = false;
8522 break;
8523
8524 case GIMPLE_OMP_ATOMIC_LOAD:
8525 case GIMPLE_OMP_ATOMIC_STORE:
8526 fallthru = true;
8527 break;
8528
8529 case GIMPLE_OMP_RETURN:
8530 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8531 somewhere other than the next block. This will be
8532 created later. */
8533 cur_region->exit = bb;
8534 if (cur_region->type == GIMPLE_OMP_TASK)
8535 /* Add an edge corresponding to not scheduling the task
8536 immediately. */
8537 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8538 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8539 cur_region = cur_region->outer;
8540 break;
8541
8542 case GIMPLE_OMP_CONTINUE:
8543 cur_region->cont = bb;
8544 switch (cur_region->type)
8545 {
8546 case GIMPLE_OMP_FOR:
8547 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8548 succs edges as abnormal to prevent splitting
8549 them. */
8550 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8551 /* Make the loopback edge. */
8552 make_edge (bb, single_succ (cur_region->entry),
8553 EDGE_ABNORMAL);
8554
8555 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8556 corresponds to the case that the body of the loop
8557 is not executed at all. */
8558 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8559 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8560 fallthru = false;
8561 break;
8562
8563 case GIMPLE_OMP_SECTIONS:
8564 /* Wire up the edges into and out of the nested sections. */
8565 {
8566 basic_block switch_bb = single_succ (cur_region->entry);
8567
8568 struct omp_region *i;
8569 for (i = cur_region->inner; i ; i = i->next)
8570 {
8571 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8572 make_edge (switch_bb, i->entry, 0);
8573 make_edge (i->exit, bb, EDGE_FALLTHRU);
8574 }
8575
8576 /* Make the loopback edge to the block with
8577 GIMPLE_OMP_SECTIONS_SWITCH. */
8578 make_edge (bb, switch_bb, 0);
8579
8580 /* Make the edge from the switch to exit. */
8581 make_edge (switch_bb, bb->next_bb, 0);
8582 fallthru = false;
8583 }
8584 break;
8585
8586 case GIMPLE_OMP_TASK:
8587 fallthru = true;
8588 break;
8589
8590 default:
8591 gcc_unreachable ();
8592 }
8593 break;
8594
8595 default:
8596 gcc_unreachable ();
8597 }
8598
8599 if (*region != cur_region)
8600 {
8601 *region = cur_region;
8602 if (cur_region)
8603 *region_idx = cur_region->entry->index;
8604 else
8605 *region_idx = 0;
8606 }
8607
8608 return fallthru;
8609}
8610
8611#include "gt-omp-expand.h"