]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
sched: Do not move expensive insns speculatively (PR68664)
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
629b3d75
MJ
1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
cbe34bb5 5Copyright (C) 2005-2017 Free Software Foundation, Inc.
629b3d75
MJ
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "cilk.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
13293add 59#include "hsa-common.h"
5c628c3e 60#include "debug.h"
629b3d75
MJ
61
62
63/* OMP region information. Every parallel and workshare
64 directive is enclosed between two markers, the OMP_* directive
65 and a corresponding GIMPLE_OMP_RETURN statement. */
66
67struct omp_region
68{
69 /* The enclosing region. */
70 struct omp_region *outer;
71
72 /* First child region. */
73 struct omp_region *inner;
74
75 /* Next peer region. */
76 struct omp_region *next;
77
78 /* Block containing the omp directive as its last stmt. */
79 basic_block entry;
80
81 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
82 basic_block exit;
83
84 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
85 basic_block cont;
86
87 /* If this is a combined parallel+workshare region, this is a list
88 of additional arguments needed by the combined parallel+workshare
89 library call. */
90 vec<tree, va_gc> *ws_args;
91
92 /* The code for the omp directive of this region. */
93 enum gimple_code type;
94
95 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
96 enum omp_clause_schedule_kind sched_kind;
97
98 /* Schedule modifiers. */
99 unsigned char sched_modifiers;
100
101 /* True if this is a combined parallel+workshare region. */
102 bool is_combined_parallel;
103
104 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
105 a depend clause. */
106 gomp_ordered *ord_stmt;
107};
108
109static struct omp_region *root_omp_region;
110static bool omp_any_child_fn_dumped;
111
112static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
113 bool = false);
114static gphi *find_phi_with_arg_on_edge (tree, edge);
115static void expand_omp (struct omp_region *region);
116
117/* Return true if REGION is a combined parallel+workshare region. */
118
119static inline bool
120is_combined_parallel (struct omp_region *region)
121{
122 return region->is_combined_parallel;
123}
124
125/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
126 is the immediate dominator of PAR_ENTRY_BB, return true if there
127 are no data dependencies that would prevent expanding the parallel
128 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
129
130 When expanding a combined parallel+workshare region, the call to
131 the child function may need additional arguments in the case of
132 GIMPLE_OMP_FOR regions. In some cases, these arguments are
133 computed out of variables passed in from the parent to the child
134 via 'struct .omp_data_s'. For instance:
135
136 #pragma omp parallel for schedule (guided, i * 4)
137 for (j ...)
138
139 Is lowered into:
140
01914336 141 # BLOCK 2 (PAR_ENTRY_BB)
629b3d75
MJ
142 .omp_data_o.i = i;
143 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
144
145 # BLOCK 3 (WS_ENTRY_BB)
146 .omp_data_i = &.omp_data_o;
147 D.1667 = .omp_data_i->i;
148 D.1598 = D.1667 * 4;
149 #pragma omp for schedule (guided, D.1598)
150
151 When we outline the parallel region, the call to the child function
152 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
153 that value is computed *after* the call site. So, in principle we
154 cannot do the transformation.
155
156 To see whether the code in WS_ENTRY_BB blocks the combined
157 parallel+workshare call, we collect all the variables used in the
158 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
159 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
160 call.
161
162 FIXME. If we had the SSA form built at this point, we could merely
163 hoist the code in block 3 into block 2 and be done with it. But at
164 this point we don't have dataflow information and though we could
165 hack something up here, it is really not worth the aggravation. */
166
167static bool
168workshare_safe_to_combine_p (basic_block ws_entry_bb)
169{
170 struct omp_for_data fd;
171 gimple *ws_stmt = last_stmt (ws_entry_bb);
172
173 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
174 return true;
175
176 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
177
178 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
179
180 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
181 return false;
182 if (fd.iter_type != long_integer_type_node)
183 return false;
184
185 /* FIXME. We give up too easily here. If any of these arguments
186 are not constants, they will likely involve variables that have
187 been mapped into fields of .omp_data_s for sharing with the child
188 function. With appropriate data flow, it would be possible to
189 see through this. */
190 if (!is_gimple_min_invariant (fd.loop.n1)
191 || !is_gimple_min_invariant (fd.loop.n2)
192 || !is_gimple_min_invariant (fd.loop.step)
193 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
194 return false;
195
196 return true;
197}
198
199/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
200 presence (SIMD_SCHEDULE). */
201
202static tree
203omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
204{
205 if (!simd_schedule)
206 return chunk_size;
207
208 int vf = omp_max_vf ();
209 if (vf == 1)
210 return chunk_size;
211
212 tree type = TREE_TYPE (chunk_size);
213 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
214 build_int_cst (type, vf - 1));
215 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
216 build_int_cst (type, -vf));
217}
218
219/* Collect additional arguments needed to emit a combined
220 parallel+workshare call. WS_STMT is the workshare directive being
221 expanded. */
222
223static vec<tree, va_gc> *
224get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
225{
226 tree t;
227 location_t loc = gimple_location (ws_stmt);
228 vec<tree, va_gc> *ws_args;
229
230 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
231 {
232 struct omp_for_data fd;
233 tree n1, n2;
234
235 omp_extract_for_data (for_stmt, &fd, NULL);
236 n1 = fd.loop.n1;
237 n2 = fd.loop.n2;
238
239 if (gimple_omp_for_combined_into_p (for_stmt))
240 {
241 tree innerc
242 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
243 OMP_CLAUSE__LOOPTEMP_);
244 gcc_assert (innerc);
245 n1 = OMP_CLAUSE_DECL (innerc);
246 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
247 OMP_CLAUSE__LOOPTEMP_);
248 gcc_assert (innerc);
249 n2 = OMP_CLAUSE_DECL (innerc);
250 }
251
252 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
253
254 t = fold_convert_loc (loc, long_integer_type_node, n1);
255 ws_args->quick_push (t);
256
257 t = fold_convert_loc (loc, long_integer_type_node, n2);
258 ws_args->quick_push (t);
259
260 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
261 ws_args->quick_push (t);
262
263 if (fd.chunk_size)
264 {
265 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
266 t = omp_adjust_chunk_size (t, fd.simd_schedule);
267 ws_args->quick_push (t);
268 }
269
270 return ws_args;
271 }
272 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
273 {
274 /* Number of sections is equal to the number of edges from the
275 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
276 the exit of the sections region. */
277 basic_block bb = single_succ (gimple_bb (ws_stmt));
278 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
279 vec_alloc (ws_args, 1);
280 ws_args->quick_push (t);
281 return ws_args;
282 }
283
284 gcc_unreachable ();
285}
286
287/* Discover whether REGION is a combined parallel+workshare region. */
288
289static void
290determine_parallel_type (struct omp_region *region)
291{
292 basic_block par_entry_bb, par_exit_bb;
293 basic_block ws_entry_bb, ws_exit_bb;
294
295 if (region == NULL || region->inner == NULL
296 || region->exit == NULL || region->inner->exit == NULL
297 || region->inner->cont == NULL)
298 return;
299
300 /* We only support parallel+for and parallel+sections. */
301 if (region->type != GIMPLE_OMP_PARALLEL
302 || (region->inner->type != GIMPLE_OMP_FOR
303 && region->inner->type != GIMPLE_OMP_SECTIONS))
304 return;
305
306 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
307 WS_EXIT_BB -> PAR_EXIT_BB. */
308 par_entry_bb = region->entry;
309 par_exit_bb = region->exit;
310 ws_entry_bb = region->inner->entry;
311 ws_exit_bb = region->inner->exit;
312
313 if (single_succ (par_entry_bb) == ws_entry_bb
314 && single_succ (ws_exit_bb) == par_exit_bb
315 && workshare_safe_to_combine_p (ws_entry_bb)
316 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
317 || (last_and_only_stmt (ws_entry_bb)
318 && last_and_only_stmt (par_exit_bb))))
319 {
320 gimple *par_stmt = last_stmt (par_entry_bb);
321 gimple *ws_stmt = last_stmt (ws_entry_bb);
322
323 if (region->inner->type == GIMPLE_OMP_FOR)
324 {
325 /* If this is a combined parallel loop, we need to determine
326 whether or not to use the combined library calls. There
327 are two cases where we do not apply the transformation:
328 static loops and any kind of ordered loop. In the first
329 case, we already open code the loop so there is no need
330 to do anything else. In the latter case, the combined
331 parallel loop call would still need extra synchronization
332 to implement ordered semantics, so there would not be any
333 gain in using the combined call. */
334 tree clauses = gimple_omp_for_clauses (ws_stmt);
335 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
336 if (c == NULL
337 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
338 == OMP_CLAUSE_SCHEDULE_STATIC)
339 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
340 {
341 region->is_combined_parallel = false;
342 region->inner->is_combined_parallel = false;
343 return;
344 }
345 }
346
347 region->is_combined_parallel = true;
348 region->inner->is_combined_parallel = true;
349 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
350 }
351}
352
353/* Debugging dumps for parallel regions. */
354void dump_omp_region (FILE *, struct omp_region *, int);
355void debug_omp_region (struct omp_region *);
356void debug_all_omp_regions (void);
357
358/* Dump the parallel region tree rooted at REGION. */
359
360void
361dump_omp_region (FILE *file, struct omp_region *region, int indent)
362{
363 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
364 gimple_code_name[region->type]);
365
366 if (region->inner)
367 dump_omp_region (file, region->inner, indent + 4);
368
369 if (region->cont)
370 {
371 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
372 region->cont->index);
373 }
374
375 if (region->exit)
376 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
377 region->exit->index);
378 else
379 fprintf (file, "%*s[no exit marker]\n", indent, "");
380
381 if (region->next)
382 dump_omp_region (file, region->next, indent);
383}
384
385DEBUG_FUNCTION void
386debug_omp_region (struct omp_region *region)
387{
388 dump_omp_region (stderr, region, 0);
389}
390
391DEBUG_FUNCTION void
392debug_all_omp_regions (void)
393{
394 dump_omp_region (stderr, root_omp_region, 0);
395}
396
397/* Create a new parallel region starting at STMT inside region PARENT. */
398
399static struct omp_region *
400new_omp_region (basic_block bb, enum gimple_code type,
401 struct omp_region *parent)
402{
403 struct omp_region *region = XCNEW (struct omp_region);
404
405 region->outer = parent;
406 region->entry = bb;
407 region->type = type;
408
409 if (parent)
410 {
411 /* This is a nested region. Add it to the list of inner
412 regions in PARENT. */
413 region->next = parent->inner;
414 parent->inner = region;
415 }
416 else
417 {
418 /* This is a toplevel region. Add it to the list of toplevel
419 regions in ROOT_OMP_REGION. */
420 region->next = root_omp_region;
421 root_omp_region = region;
422 }
423
424 return region;
425}
426
427/* Release the memory associated with the region tree rooted at REGION. */
428
429static void
430free_omp_region_1 (struct omp_region *region)
431{
432 struct omp_region *i, *n;
433
434 for (i = region->inner; i ; i = n)
435 {
436 n = i->next;
437 free_omp_region_1 (i);
438 }
439
440 free (region);
441}
442
443/* Release the memory for the entire omp region tree. */
444
445void
446omp_free_regions (void)
447{
448 struct omp_region *r, *n;
449 for (r = root_omp_region; r ; r = n)
450 {
451 n = r->next;
452 free_omp_region_1 (r);
453 }
454 root_omp_region = NULL;
455}
456
457/* A convenience function to build an empty GIMPLE_COND with just the
458 condition. */
459
460static gcond *
461gimple_build_cond_empty (tree cond)
462{
463 enum tree_code pred_code;
464 tree lhs, rhs;
465
466 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
467 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
468}
469
470/* Return true if a parallel REGION is within a declare target function or
471 within a target region and is not a part of a gridified target. */
472
473static bool
474parallel_needs_hsa_kernel_p (struct omp_region *region)
475{
476 bool indirect = false;
477 for (region = region->outer; region; region = region->outer)
478 {
479 if (region->type == GIMPLE_OMP_PARALLEL)
480 indirect = true;
481 else if (region->type == GIMPLE_OMP_TARGET)
482 {
483 gomp_target *tgt_stmt
484 = as_a <gomp_target *> (last_stmt (region->entry));
485
486 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
487 OMP_CLAUSE__GRIDDIM_))
488 return indirect;
489 else
490 return true;
491 }
492 }
493
494 if (lookup_attribute ("omp declare target",
495 DECL_ATTRIBUTES (current_function_decl)))
496 return true;
497
498 return false;
499}
500
501/* Build the function calls to GOMP_parallel_start etc to actually
502 generate the parallel operation. REGION is the parallel region
503 being expanded. BB is the block where to insert the code. WS_ARGS
504 will be set if this is a call to a combined parallel+workshare
505 construct, it contains the list of additional arguments needed by
506 the workshare construct. */
507
508static void
509expand_parallel_call (struct omp_region *region, basic_block bb,
510 gomp_parallel *entry_stmt,
511 vec<tree, va_gc> *ws_args)
512{
513 tree t, t1, t2, val, cond, c, clauses, flags;
514 gimple_stmt_iterator gsi;
515 gimple *stmt;
516 enum built_in_function start_ix;
517 int start_ix2;
518 location_t clause_loc;
519 vec<tree, va_gc> *args;
520
521 clauses = gimple_omp_parallel_clauses (entry_stmt);
522
523 /* Determine what flavor of GOMP_parallel we will be
524 emitting. */
525 start_ix = BUILT_IN_GOMP_PARALLEL;
526 if (is_combined_parallel (region))
527 {
528 switch (region->inner->type)
529 {
530 case GIMPLE_OMP_FOR:
531 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
532 switch (region->inner->sched_kind)
533 {
534 case OMP_CLAUSE_SCHEDULE_RUNTIME:
535 start_ix2 = 3;
536 break;
537 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
538 case OMP_CLAUSE_SCHEDULE_GUIDED:
539 if (region->inner->sched_modifiers
540 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
541 {
542 start_ix2 = 3 + region->inner->sched_kind;
543 break;
544 }
545 /* FALLTHRU */
546 default:
547 start_ix2 = region->inner->sched_kind;
548 break;
549 }
550 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
551 start_ix = (enum built_in_function) start_ix2;
552 break;
553 case GIMPLE_OMP_SECTIONS:
554 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
555 break;
556 default:
557 gcc_unreachable ();
558 }
559 }
560
561 /* By default, the value of NUM_THREADS is zero (selected at run time)
562 and there is no conditional. */
563 cond = NULL_TREE;
564 val = build_int_cst (unsigned_type_node, 0);
565 flags = build_int_cst (unsigned_type_node, 0);
566
567 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
568 if (c)
569 cond = OMP_CLAUSE_IF_EXPR (c);
570
571 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
572 if (c)
573 {
574 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
575 clause_loc = OMP_CLAUSE_LOCATION (c);
576 }
577 else
578 clause_loc = gimple_location (entry_stmt);
579
580 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
581 if (c)
582 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
583
584 /* Ensure 'val' is of the correct type. */
585 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
586
587 /* If we found the clause 'if (cond)', build either
588 (cond != 0) or (cond ? val : 1u). */
589 if (cond)
590 {
591 cond = gimple_boolify (cond);
592
593 if (integer_zerop (val))
594 val = fold_build2_loc (clause_loc,
595 EQ_EXPR, unsigned_type_node, cond,
596 build_int_cst (TREE_TYPE (cond), 0));
597 else
598 {
599 basic_block cond_bb, then_bb, else_bb;
600 edge e, e_then, e_else;
601 tree tmp_then, tmp_else, tmp_join, tmp_var;
602
603 tmp_var = create_tmp_var (TREE_TYPE (val));
604 if (gimple_in_ssa_p (cfun))
605 {
606 tmp_then = make_ssa_name (tmp_var);
607 tmp_else = make_ssa_name (tmp_var);
608 tmp_join = make_ssa_name (tmp_var);
609 }
610 else
611 {
612 tmp_then = tmp_var;
613 tmp_else = tmp_var;
614 tmp_join = tmp_var;
615 }
616
617 e = split_block_after_labels (bb);
618 cond_bb = e->src;
619 bb = e->dest;
620 remove_edge (e);
621
622 then_bb = create_empty_bb (cond_bb);
623 else_bb = create_empty_bb (then_bb);
624 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
625 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
626
627 stmt = gimple_build_cond_empty (cond);
628 gsi = gsi_start_bb (cond_bb);
629 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
630
631 gsi = gsi_start_bb (then_bb);
632 expand_omp_build_assign (&gsi, tmp_then, val, true);
633
634 gsi = gsi_start_bb (else_bb);
635 expand_omp_build_assign (&gsi, tmp_else,
636 build_int_cst (unsigned_type_node, 1),
637 true);
638
639 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
640 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
641 add_bb_to_loop (then_bb, cond_bb->loop_father);
642 add_bb_to_loop (else_bb, cond_bb->loop_father);
643 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
644 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
645
646 if (gimple_in_ssa_p (cfun))
647 {
648 gphi *phi = create_phi_node (tmp_join, bb);
649 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
650 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
651 }
652
653 val = tmp_join;
654 }
655
656 gsi = gsi_start_bb (bb);
657 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
658 false, GSI_CONTINUE_LINKING);
659 }
660
661 gsi = gsi_last_bb (bb);
662 t = gimple_omp_parallel_data_arg (entry_stmt);
663 if (t == NULL)
664 t1 = null_pointer_node;
665 else
666 t1 = build_fold_addr_expr (t);
667 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
668 t2 = build_fold_addr_expr (child_fndecl);
669
670 vec_alloc (args, 4 + vec_safe_length (ws_args));
671 args->quick_push (t2);
672 args->quick_push (t1);
673 args->quick_push (val);
674 if (ws_args)
675 args->splice (*ws_args);
676 args->quick_push (flags);
677
678 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
679 builtin_decl_explicit (start_ix), args);
680
681 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
682 false, GSI_CONTINUE_LINKING);
683
684 if (hsa_gen_requested_p ()
685 && parallel_needs_hsa_kernel_p (region))
686 {
687 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
688 hsa_register_kernel (child_cnode);
689 }
690}
691
692/* Insert a function call whose name is FUNC_NAME with the information from
693 ENTRY_STMT into the basic_block BB. */
694
695static void
696expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
697 vec <tree, va_gc> *ws_args)
698{
699 tree t, t1, t2;
700 gimple_stmt_iterator gsi;
701 vec <tree, va_gc> *args;
702
703 gcc_assert (vec_safe_length (ws_args) == 2);
704 tree func_name = (*ws_args)[0];
705 tree grain = (*ws_args)[1];
706
707 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
708 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
709 gcc_assert (count != NULL_TREE);
710 count = OMP_CLAUSE_OPERAND (count, 0);
711
712 gsi = gsi_last_bb (bb);
713 t = gimple_omp_parallel_data_arg (entry_stmt);
714 if (t == NULL)
715 t1 = null_pointer_node;
716 else
717 t1 = build_fold_addr_expr (t);
718 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
719
720 vec_alloc (args, 4);
721 args->quick_push (t2);
722 args->quick_push (t1);
723 args->quick_push (count);
724 args->quick_push (grain);
725 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
726
727 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
728 GSI_CONTINUE_LINKING);
729}
730
731/* Build the function call to GOMP_task to actually
732 generate the task operation. BB is the block where to insert the code. */
733
734static void
735expand_task_call (struct omp_region *region, basic_block bb,
736 gomp_task *entry_stmt)
737{
738 tree t1, t2, t3;
739 gimple_stmt_iterator gsi;
740 location_t loc = gimple_location (entry_stmt);
741
742 tree clauses = gimple_omp_task_clauses (entry_stmt);
743
744 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
745 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
746 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
747 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
748 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
749 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
750
751 unsigned int iflags
752 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
753 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
754 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
755
756 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
757 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
758 tree num_tasks = NULL_TREE;
759 bool ull = false;
760 if (taskloop_p)
761 {
762 gimple *g = last_stmt (region->outer->entry);
763 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
764 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
765 struct omp_for_data fd;
766 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
767 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
768 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
769 OMP_CLAUSE__LOOPTEMP_);
770 startvar = OMP_CLAUSE_DECL (startvar);
771 endvar = OMP_CLAUSE_DECL (endvar);
772 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
773 if (fd.loop.cond_code == LT_EXPR)
774 iflags |= GOMP_TASK_FLAG_UP;
775 tree tclauses = gimple_omp_for_clauses (g);
776 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
777 if (num_tasks)
778 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
779 else
780 {
781 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
782 if (num_tasks)
783 {
784 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
785 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
786 }
787 else
788 num_tasks = integer_zero_node;
789 }
790 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
791 if (ifc == NULL_TREE)
792 iflags |= GOMP_TASK_FLAG_IF;
793 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
794 iflags |= GOMP_TASK_FLAG_NOGROUP;
795 ull = fd.iter_type == long_long_unsigned_type_node;
796 }
797 else if (priority)
798 iflags |= GOMP_TASK_FLAG_PRIORITY;
799
800 tree flags = build_int_cst (unsigned_type_node, iflags);
801
802 tree cond = boolean_true_node;
803 if (ifc)
804 {
805 if (taskloop_p)
806 {
807 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
808 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
809 build_int_cst (unsigned_type_node,
810 GOMP_TASK_FLAG_IF),
811 build_int_cst (unsigned_type_node, 0));
812 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
813 flags, t);
814 }
815 else
816 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
817 }
818
819 if (finalc)
820 {
821 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
822 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
823 build_int_cst (unsigned_type_node,
824 GOMP_TASK_FLAG_FINAL),
825 build_int_cst (unsigned_type_node, 0));
826 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
827 }
828 if (depend)
829 depend = OMP_CLAUSE_DECL (depend);
830 else
831 depend = build_int_cst (ptr_type_node, 0);
832 if (priority)
833 priority = fold_convert (integer_type_node,
834 OMP_CLAUSE_PRIORITY_EXPR (priority));
835 else
836 priority = integer_zero_node;
837
838 gsi = gsi_last_bb (bb);
839 tree t = gimple_omp_task_data_arg (entry_stmt);
840 if (t == NULL)
841 t2 = null_pointer_node;
842 else
843 t2 = build_fold_addr_expr_loc (loc, t);
844 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
845 t = gimple_omp_task_copy_fn (entry_stmt);
846 if (t == NULL)
847 t3 = null_pointer_node;
848 else
849 t3 = build_fold_addr_expr_loc (loc, t);
850
851 if (taskloop_p)
852 t = build_call_expr (ull
853 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
854 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
855 11, t1, t2, t3,
856 gimple_omp_task_arg_size (entry_stmt),
857 gimple_omp_task_arg_align (entry_stmt), flags,
858 num_tasks, priority, startvar, endvar, step);
859 else
860 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
861 9, t1, t2, t3,
862 gimple_omp_task_arg_size (entry_stmt),
863 gimple_omp_task_arg_align (entry_stmt), cond, flags,
864 depend, priority);
865
866 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
867 false, GSI_CONTINUE_LINKING);
868}
869
870/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
871
872static tree
873vec2chain (vec<tree, va_gc> *v)
874{
875 tree chain = NULL_TREE, t;
876 unsigned ix;
877
878 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
879 {
880 DECL_CHAIN (t) = chain;
881 chain = t;
882 }
883
884 return chain;
885}
886
887/* Remove barriers in REGION->EXIT's block. Note that this is only
888 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
889 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
890 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
891 removed. */
892
893static void
894remove_exit_barrier (struct omp_region *region)
895{
896 gimple_stmt_iterator gsi;
897 basic_block exit_bb;
898 edge_iterator ei;
899 edge e;
900 gimple *stmt;
901 int any_addressable_vars = -1;
902
903 exit_bb = region->exit;
904
905 /* If the parallel region doesn't return, we don't have REGION->EXIT
906 block at all. */
907 if (! exit_bb)
908 return;
909
910 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
911 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
912 statements that can appear in between are extremely limited -- no
913 memory operations at all. Here, we allow nothing at all, so the
914 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
915 gsi = gsi_last_bb (exit_bb);
916 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
917 gsi_prev (&gsi);
918 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
919 return;
920
921 FOR_EACH_EDGE (e, ei, exit_bb->preds)
922 {
923 gsi = gsi_last_bb (e->src);
924 if (gsi_end_p (gsi))
925 continue;
926 stmt = gsi_stmt (gsi);
927 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
928 && !gimple_omp_return_nowait_p (stmt))
929 {
930 /* OpenMP 3.0 tasks unfortunately prevent this optimization
931 in many cases. If there could be tasks queued, the barrier
932 might be needed to let the tasks run before some local
933 variable of the parallel that the task uses as shared
934 runs out of scope. The task can be spawned either
935 from within current function (this would be easy to check)
936 or from some function it calls and gets passed an address
937 of such a variable. */
938 if (any_addressable_vars < 0)
939 {
940 gomp_parallel *parallel_stmt
941 = as_a <gomp_parallel *> (last_stmt (region->entry));
942 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
943 tree local_decls, block, decl;
944 unsigned ix;
945
946 any_addressable_vars = 0;
947 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
948 if (TREE_ADDRESSABLE (decl))
949 {
950 any_addressable_vars = 1;
951 break;
952 }
953 for (block = gimple_block (stmt);
954 !any_addressable_vars
955 && block
956 && TREE_CODE (block) == BLOCK;
957 block = BLOCK_SUPERCONTEXT (block))
958 {
959 for (local_decls = BLOCK_VARS (block);
960 local_decls;
961 local_decls = DECL_CHAIN (local_decls))
962 if (TREE_ADDRESSABLE (local_decls))
963 {
964 any_addressable_vars = 1;
965 break;
966 }
967 if (block == gimple_block (parallel_stmt))
968 break;
969 }
970 }
971 if (!any_addressable_vars)
972 gimple_omp_return_set_nowait (stmt);
973 }
974 }
975}
976
977static void
978remove_exit_barriers (struct omp_region *region)
979{
980 if (region->type == GIMPLE_OMP_PARALLEL)
981 remove_exit_barrier (region);
982
983 if (region->inner)
984 {
985 region = region->inner;
986 remove_exit_barriers (region);
987 while (region->next)
988 {
989 region = region->next;
990 remove_exit_barriers (region);
991 }
992 }
993}
994
995/* Optimize omp_get_thread_num () and omp_get_num_threads ()
996 calls. These can't be declared as const functions, but
997 within one parallel body they are constant, so they can be
998 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
999 which are declared const. Similarly for task body, except
1000 that in untied task omp_get_thread_num () can change at any task
1001 scheduling point. */
1002
1003static void
1004optimize_omp_library_calls (gimple *entry_stmt)
1005{
1006 basic_block bb;
1007 gimple_stmt_iterator gsi;
1008 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1009 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1010 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1011 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1012 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1013 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1014 OMP_CLAUSE_UNTIED) != NULL);
1015
1016 FOR_EACH_BB_FN (bb, cfun)
1017 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1018 {
1019 gimple *call = gsi_stmt (gsi);
1020 tree decl;
1021
1022 if (is_gimple_call (call)
1023 && (decl = gimple_call_fndecl (call))
1024 && DECL_EXTERNAL (decl)
1025 && TREE_PUBLIC (decl)
1026 && DECL_INITIAL (decl) == NULL)
1027 {
1028 tree built_in;
1029
1030 if (DECL_NAME (decl) == thr_num_id)
1031 {
1032 /* In #pragma omp task untied omp_get_thread_num () can change
1033 during the execution of the task region. */
1034 if (untied_task)
1035 continue;
1036 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1037 }
1038 else if (DECL_NAME (decl) == num_thr_id)
1039 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1040 else
1041 continue;
1042
1043 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1044 || gimple_call_num_args (call) != 0)
1045 continue;
1046
1047 if (flag_exceptions && !TREE_NOTHROW (decl))
1048 continue;
1049
1050 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1051 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1052 TREE_TYPE (TREE_TYPE (built_in))))
1053 continue;
1054
1055 gimple_call_set_fndecl (call, built_in);
1056 }
1057 }
1058}
1059
1060/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1061 regimplified. */
1062
1063static tree
1064expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1065{
1066 tree t = *tp;
1067
1068 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1069 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1070 return t;
1071
1072 if (TREE_CODE (t) == ADDR_EXPR)
1073 recompute_tree_invariant_for_addr_expr (t);
1074
1075 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1076 return NULL_TREE;
1077}
1078
1079/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1080
1081static void
1082expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1083 bool after)
1084{
1085 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1086 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1087 !after, after ? GSI_CONTINUE_LINKING
1088 : GSI_SAME_STMT);
1089 gimple *stmt = gimple_build_assign (to, from);
1090 if (after)
1091 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1092 else
1093 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1094 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1095 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1096 {
1097 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1098 gimple_regimplify_operands (stmt, &gsi);
1099 }
1100}
1101
1102/* Expand the OpenMP parallel or task directive starting at REGION. */
1103
1104static void
1105expand_omp_taskreg (struct omp_region *region)
1106{
1107 basic_block entry_bb, exit_bb, new_bb;
1108 struct function *child_cfun;
1109 tree child_fn, block, t;
1110 gimple_stmt_iterator gsi;
1111 gimple *entry_stmt, *stmt;
1112 edge e;
1113 vec<tree, va_gc> *ws_args;
1114
1115 entry_stmt = last_stmt (region->entry);
1116 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1117 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1118
1119 entry_bb = region->entry;
1120 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1121 exit_bb = region->cont;
1122 else
1123 exit_bb = region->exit;
1124
1125 bool is_cilk_for
1126 = (flag_cilkplus
1127 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1128 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1129 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1130
1131 if (is_cilk_for)
1132 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1133 and the inner statement contains the name of the built-in function
1134 and grain. */
1135 ws_args = region->inner->ws_args;
1136 else if (is_combined_parallel (region))
1137 ws_args = region->ws_args;
1138 else
1139 ws_args = NULL;
1140
1141 if (child_cfun->cfg)
1142 {
1143 /* Due to inlining, it may happen that we have already outlined
1144 the region, in which case all we need to do is make the
1145 sub-graph unreachable and emit the parallel call. */
1146 edge entry_succ_e, exit_succ_e;
1147
1148 entry_succ_e = single_succ_edge (entry_bb);
1149
1150 gsi = gsi_last_bb (entry_bb);
1151 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1152 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1153 gsi_remove (&gsi, true);
1154
1155 new_bb = entry_bb;
1156 if (exit_bb)
1157 {
1158 exit_succ_e = single_succ_edge (exit_bb);
1159 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1160 }
1161 remove_edge_and_dominated_blocks (entry_succ_e);
1162 }
1163 else
1164 {
1165 unsigned srcidx, dstidx, num;
1166
1167 /* If the parallel region needs data sent from the parent
1168 function, then the very first statement (except possible
1169 tree profile counter updates) of the parallel body
1170 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1171 &.OMP_DATA_O is passed as an argument to the child function,
1172 we need to replace it with the argument as seen by the child
1173 function.
1174
1175 In most cases, this will end up being the identity assignment
1176 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1177 a function call that has been inlined, the original PARM_DECL
1178 .OMP_DATA_I may have been converted into a different local
1179 variable. In which case, we need to keep the assignment. */
1180 if (gimple_omp_taskreg_data_arg (entry_stmt))
1181 {
1182 basic_block entry_succ_bb
1183 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1184 : FALLTHRU_EDGE (entry_bb)->dest;
1185 tree arg;
1186 gimple *parcopy_stmt = NULL;
1187
1188 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1189 {
1190 gimple *stmt;
1191
1192 gcc_assert (!gsi_end_p (gsi));
1193 stmt = gsi_stmt (gsi);
1194 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1195 continue;
1196
1197 if (gimple_num_ops (stmt) == 2)
1198 {
1199 tree arg = gimple_assign_rhs1 (stmt);
1200
1201 /* We're ignore the subcode because we're
1202 effectively doing a STRIP_NOPS. */
1203
1204 if (TREE_CODE (arg) == ADDR_EXPR
1205 && TREE_OPERAND (arg, 0)
01914336 1206 == gimple_omp_taskreg_data_arg (entry_stmt))
629b3d75
MJ
1207 {
1208 parcopy_stmt = stmt;
1209 break;
1210 }
1211 }
1212 }
1213
1214 gcc_assert (parcopy_stmt != NULL);
1215 arg = DECL_ARGUMENTS (child_fn);
1216
1217 if (!gimple_in_ssa_p (cfun))
1218 {
1219 if (gimple_assign_lhs (parcopy_stmt) == arg)
1220 gsi_remove (&gsi, true);
1221 else
1222 {
01914336 1223 /* ?? Is setting the subcode really necessary ?? */
629b3d75
MJ
1224 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1225 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1226 }
1227 }
1228 else
1229 {
1230 tree lhs = gimple_assign_lhs (parcopy_stmt);
1231 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1232 /* We'd like to set the rhs to the default def in the child_fn,
1233 but it's too early to create ssa names in the child_fn.
1234 Instead, we set the rhs to the parm. In
1235 move_sese_region_to_fn, we introduce a default def for the
1236 parm, map the parm to it's default def, and once we encounter
1237 this stmt, replace the parm with the default def. */
1238 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1239 update_stmt (parcopy_stmt);
1240 }
1241 }
1242
1243 /* Declare local variables needed in CHILD_CFUN. */
1244 block = DECL_INITIAL (child_fn);
1245 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1246 /* The gimplifier could record temporaries in parallel/task block
1247 rather than in containing function's local_decls chain,
1248 which would mean cgraph missed finalizing them. Do it now. */
1249 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1250 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1251 varpool_node::finalize_decl (t);
1252 DECL_SAVED_TREE (child_fn) = NULL;
1253 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1254 gimple_set_body (child_fn, NULL);
1255 TREE_USED (block) = 1;
1256
1257 /* Reset DECL_CONTEXT on function arguments. */
1258 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1259 DECL_CONTEXT (t) = child_fn;
1260
1261 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1262 so that it can be moved to the child function. */
1263 gsi = gsi_last_bb (entry_bb);
1264 stmt = gsi_stmt (gsi);
1265 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1266 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1267 e = split_block (entry_bb, stmt);
1268 gsi_remove (&gsi, true);
1269 entry_bb = e->dest;
1270 edge e2 = NULL;
1271 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1272 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1273 else
1274 {
1275 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1276 gcc_assert (e2->dest == region->exit);
1277 remove_edge (BRANCH_EDGE (entry_bb));
1278 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1279 gsi = gsi_last_bb (region->exit);
1280 gcc_assert (!gsi_end_p (gsi)
1281 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1282 gsi_remove (&gsi, true);
1283 }
1284
1285 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1286 if (exit_bb)
1287 {
1288 gsi = gsi_last_bb (exit_bb);
1289 gcc_assert (!gsi_end_p (gsi)
1290 && (gimple_code (gsi_stmt (gsi))
1291 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1292 stmt = gimple_build_return (NULL);
1293 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1294 gsi_remove (&gsi, true);
1295 }
1296
1297 /* Move the parallel region into CHILD_CFUN. */
1298
1299 if (gimple_in_ssa_p (cfun))
1300 {
1301 init_tree_ssa (child_cfun);
1302 init_ssa_operands (child_cfun);
1303 child_cfun->gimple_df->in_ssa_p = true;
1304 block = NULL_TREE;
1305 }
1306 else
1307 block = gimple_block (entry_stmt);
1308
5c628c3e
RB
1309 /* Make sure to generate early debug for the function before
1310 outlining anything. */
1311 if (! gimple_in_ssa_p (cfun))
1312 (*debug_hooks->early_global_decl) (cfun->decl);
1313
629b3d75
MJ
1314 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1315 if (exit_bb)
1316 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1317 if (e2)
1318 {
1319 basic_block dest_bb = e2->dest;
1320 if (!exit_bb)
1321 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1322 remove_edge (e2);
1323 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1324 }
1325 /* When the OMP expansion process cannot guarantee an up-to-date
01914336 1326 loop tree arrange for the child function to fixup loops. */
629b3d75
MJ
1327 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1328 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1329
1330 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1331 num = vec_safe_length (child_cfun->local_decls);
1332 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1333 {
1334 t = (*child_cfun->local_decls)[srcidx];
1335 if (DECL_CONTEXT (t) == cfun->decl)
1336 continue;
1337 if (srcidx != dstidx)
1338 (*child_cfun->local_decls)[dstidx] = t;
1339 dstidx++;
1340 }
1341 if (dstidx != num)
1342 vec_safe_truncate (child_cfun->local_decls, dstidx);
1343
1344 /* Inform the callgraph about the new function. */
1345 child_cfun->curr_properties = cfun->curr_properties;
1346 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1347 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1348 cgraph_node *node = cgraph_node::get_create (child_fn);
1349 node->parallelized_function = 1;
1350 cgraph_node::add_new_function (child_fn, true);
1351
1352 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1353 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1354
1355 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1356 fixed in a following pass. */
1357 push_cfun (child_cfun);
1358 if (need_asm)
9579db35 1359 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
1360
1361 if (optimize)
1362 optimize_omp_library_calls (entry_stmt);
1363 cgraph_edge::rebuild_edges ();
1364
1365 /* Some EH regions might become dead, see PR34608. If
1366 pass_cleanup_cfg isn't the first pass to happen with the
1367 new child, these dead EH edges might cause problems.
1368 Clean them up now. */
1369 if (flag_exceptions)
1370 {
1371 basic_block bb;
1372 bool changed = false;
1373
1374 FOR_EACH_BB_FN (bb, cfun)
1375 changed |= gimple_purge_dead_eh_edges (bb);
1376 if (changed)
1377 cleanup_tree_cfg ();
1378 }
1379 if (gimple_in_ssa_p (cfun))
1380 update_ssa (TODO_update_ssa);
1381 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1382 verify_loop_structure ();
1383 pop_cfun ();
1384
1385 if (dump_file && !gimple_in_ssa_p (cfun))
1386 {
1387 omp_any_child_fn_dumped = true;
1388 dump_function_header (dump_file, child_fn, dump_flags);
1389 dump_function_to_file (child_fn, dump_file, dump_flags);
1390 }
1391 }
1392
1393 /* Emit a library call to launch the children threads. */
1394 if (is_cilk_for)
1395 expand_cilk_for_call (new_bb,
1396 as_a <gomp_parallel *> (entry_stmt), ws_args);
1397 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1398 expand_parallel_call (region, new_bb,
1399 as_a <gomp_parallel *> (entry_stmt), ws_args);
1400 else
1401 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1402 if (gimple_in_ssa_p (cfun))
1403 update_ssa (TODO_update_ssa_only_virtuals);
1404}
1405
1406/* Information about members of an OpenACC collapsed loop nest. */
1407
1408struct oacc_collapse
1409{
01914336 1410 tree base; /* Base value. */
629b3d75
MJ
1411 tree iters; /* Number of steps. */
1412 tree step; /* step size. */
1413};
1414
1415/* Helper for expand_oacc_for. Determine collapsed loop information.
1416 Fill in COUNTS array. Emit any initialization code before GSI.
1417 Return the calculated outer loop bound of BOUND_TYPE. */
1418
1419static tree
1420expand_oacc_collapse_init (const struct omp_for_data *fd,
1421 gimple_stmt_iterator *gsi,
1422 oacc_collapse *counts, tree bound_type)
1423{
1424 tree total = build_int_cst (bound_type, 1);
1425 int ix;
1426
1427 gcc_assert (integer_onep (fd->loop.step));
1428 gcc_assert (integer_zerop (fd->loop.n1));
1429
1430 for (ix = 0; ix != fd->collapse; ix++)
1431 {
1432 const omp_for_data_loop *loop = &fd->loops[ix];
1433
1434 tree iter_type = TREE_TYPE (loop->v);
1435 tree diff_type = iter_type;
1436 tree plus_type = iter_type;
1437
1438 gcc_assert (loop->cond_code == fd->loop.cond_code);
1439
1440 if (POINTER_TYPE_P (iter_type))
1441 plus_type = sizetype;
1442 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1443 diff_type = signed_type_for (diff_type);
1444
1445 tree b = loop->n1;
1446 tree e = loop->n2;
1447 tree s = loop->step;
1448 bool up = loop->cond_code == LT_EXPR;
1449 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1450 bool negating;
1451 tree expr;
1452
1453 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1454 true, GSI_SAME_STMT);
1455 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1456 true, GSI_SAME_STMT);
1457
01914336 1458 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1459 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1460 if (negating)
1461 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1462 s = fold_convert (diff_type, s);
1463 if (negating)
1464 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1465 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1466 true, GSI_SAME_STMT);
1467
01914336 1468 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
1469 negating = !up && TYPE_UNSIGNED (iter_type);
1470 expr = fold_build2 (MINUS_EXPR, plus_type,
1471 fold_convert (plus_type, negating ? b : e),
1472 fold_convert (plus_type, negating ? e : b));
1473 expr = fold_convert (diff_type, expr);
1474 if (negating)
1475 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1476 tree range = force_gimple_operand_gsi
1477 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1478
1479 /* Determine number of iterations. */
1480 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1481 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1482 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1483
1484 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1485 true, GSI_SAME_STMT);
1486
1487 counts[ix].base = b;
1488 counts[ix].iters = iters;
1489 counts[ix].step = s;
1490
1491 total = fold_build2 (MULT_EXPR, bound_type, total,
1492 fold_convert (bound_type, iters));
1493 }
1494
1495 return total;
1496}
1497
1498/* Emit initializers for collapsed loop members. IVAR is the outer
1499 loop iteration variable, from which collapsed loop iteration values
1500 are calculated. COUNTS array has been initialized by
1501 expand_oacc_collapse_inits. */
1502
1503static void
1504expand_oacc_collapse_vars (const struct omp_for_data *fd,
1505 gimple_stmt_iterator *gsi,
1506 const oacc_collapse *counts, tree ivar)
1507{
1508 tree ivar_type = TREE_TYPE (ivar);
1509
1510 /* The most rapidly changing iteration variable is the innermost
1511 one. */
1512 for (int ix = fd->collapse; ix--;)
1513 {
1514 const omp_for_data_loop *loop = &fd->loops[ix];
1515 const oacc_collapse *collapse = &counts[ix];
1516 tree iter_type = TREE_TYPE (loop->v);
1517 tree diff_type = TREE_TYPE (collapse->step);
1518 tree plus_type = iter_type;
1519 enum tree_code plus_code = PLUS_EXPR;
1520 tree expr;
1521
1522 if (POINTER_TYPE_P (iter_type))
1523 {
1524 plus_code = POINTER_PLUS_EXPR;
1525 plus_type = sizetype;
1526 }
1527
1528 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
1529 fold_convert (ivar_type, collapse->iters));
1530 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1531 collapse->step);
1532 expr = fold_build2 (plus_code, iter_type, collapse->base,
1533 fold_convert (plus_type, expr));
1534 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1535 true, GSI_SAME_STMT);
1536 gassign *ass = gimple_build_assign (loop->v, expr);
1537 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1538
1539 if (ix)
1540 {
1541 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
1542 fold_convert (ivar_type, collapse->iters));
1543 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1544 true, GSI_SAME_STMT);
1545 }
1546 }
1547}
1548
1549/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1550 of the combined collapse > 1 loop constructs, generate code like:
1551 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1552 if (cond3 is <)
1553 adj = STEP3 - 1;
1554 else
1555 adj = STEP3 + 1;
1556 count3 = (adj + N32 - N31) / STEP3;
1557 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1558 if (cond2 is <)
1559 adj = STEP2 - 1;
1560 else
1561 adj = STEP2 + 1;
1562 count2 = (adj + N22 - N21) / STEP2;
1563 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1564 if (cond1 is <)
1565 adj = STEP1 - 1;
1566 else
1567 adj = STEP1 + 1;
1568 count1 = (adj + N12 - N11) / STEP1;
1569 count = count1 * count2 * count3;
1570 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1571 count = 0;
1572 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1573 of the combined loop constructs, just initialize COUNTS array
1574 from the _looptemp_ clauses. */
1575
1576/* NOTE: It *could* be better to moosh all of the BBs together,
1577 creating one larger BB with all the computation and the unexpected
1578 jump at the end. I.e.
1579
1580 bool zero3, zero2, zero1, zero;
1581
1582 zero3 = N32 c3 N31;
1583 count3 = (N32 - N31) /[cl] STEP3;
1584 zero2 = N22 c2 N21;
1585 count2 = (N22 - N21) /[cl] STEP2;
1586 zero1 = N12 c1 N11;
1587 count1 = (N12 - N11) /[cl] STEP1;
1588 zero = zero3 || zero2 || zero1;
1589 count = count1 * count2 * count3;
1590 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1591
1592 After all, we expect the zero=false, and thus we expect to have to
1593 evaluate all of the comparison expressions, so short-circuiting
1594 oughtn't be a win. Since the condition isn't protecting a
1595 denominator, we're not concerned about divide-by-zero, so we can
1596 fully evaluate count even if a numerator turned out to be wrong.
1597
1598 It seems like putting this all together would create much better
1599 scheduling opportunities, and less pressure on the chip's branch
1600 predictor. */
1601
1602static void
1603expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1604 basic_block &entry_bb, tree *counts,
1605 basic_block &zero_iter1_bb, int &first_zero_iter1,
1606 basic_block &zero_iter2_bb, int &first_zero_iter2,
1607 basic_block &l2_dom_bb)
1608{
1609 tree t, type = TREE_TYPE (fd->loop.v);
1610 edge e, ne;
1611 int i;
1612
1613 /* Collapsed loops need work for expansion into SSA form. */
1614 gcc_assert (!gimple_in_ssa_p (cfun));
1615
1616 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1617 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1618 {
1619 gcc_assert (fd->ordered == 0);
1620 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1621 isn't supposed to be handled, as the inner loop doesn't
1622 use it. */
1623 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1624 OMP_CLAUSE__LOOPTEMP_);
1625 gcc_assert (innerc);
1626 for (i = 0; i < fd->collapse; i++)
1627 {
1628 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1629 OMP_CLAUSE__LOOPTEMP_);
1630 gcc_assert (innerc);
1631 if (i)
1632 counts[i] = OMP_CLAUSE_DECL (innerc);
1633 else
1634 counts[0] = NULL_TREE;
1635 }
1636 return;
1637 }
1638
1639 for (i = fd->collapse; i < fd->ordered; i++)
1640 {
1641 tree itype = TREE_TYPE (fd->loops[i].v);
1642 counts[i] = NULL_TREE;
1643 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1644 fold_convert (itype, fd->loops[i].n1),
1645 fold_convert (itype, fd->loops[i].n2));
1646 if (t && integer_zerop (t))
1647 {
1648 for (i = fd->collapse; i < fd->ordered; i++)
1649 counts[i] = build_int_cst (type, 0);
1650 break;
1651 }
1652 }
1653 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1654 {
1655 tree itype = TREE_TYPE (fd->loops[i].v);
1656
1657 if (i >= fd->collapse && counts[i])
1658 continue;
1659 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1660 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1661 fold_convert (itype, fd->loops[i].n1),
1662 fold_convert (itype, fd->loops[i].n2)))
1663 == NULL_TREE || !integer_onep (t)))
1664 {
1665 gcond *cond_stmt;
1666 tree n1, n2;
1667 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1668 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1669 true, GSI_SAME_STMT);
1670 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1671 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1672 true, GSI_SAME_STMT);
1673 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1674 NULL_TREE, NULL_TREE);
1675 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1676 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1677 expand_omp_regimplify_p, NULL, NULL)
1678 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1679 expand_omp_regimplify_p, NULL, NULL))
1680 {
1681 *gsi = gsi_for_stmt (cond_stmt);
1682 gimple_regimplify_operands (cond_stmt, gsi);
1683 }
1684 e = split_block (entry_bb, cond_stmt);
1685 basic_block &zero_iter_bb
1686 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1687 int &first_zero_iter
1688 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1689 if (zero_iter_bb == NULL)
1690 {
1691 gassign *assign_stmt;
1692 first_zero_iter = i;
1693 zero_iter_bb = create_empty_bb (entry_bb);
1694 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1695 *gsi = gsi_after_labels (zero_iter_bb);
1696 if (i < fd->collapse)
1697 assign_stmt = gimple_build_assign (fd->loop.n2,
1698 build_zero_cst (type));
1699 else
1700 {
1701 counts[i] = create_tmp_reg (type, ".count");
1702 assign_stmt
1703 = gimple_build_assign (counts[i], build_zero_cst (type));
1704 }
1705 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1706 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1707 entry_bb);
1708 }
1709 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1710 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1711 e->flags = EDGE_TRUE_VALUE;
1712 e->probability = REG_BR_PROB_BASE - ne->probability;
1713 if (l2_dom_bb == NULL)
1714 l2_dom_bb = entry_bb;
1715 entry_bb = e->dest;
1716 *gsi = gsi_last_bb (entry_bb);
1717 }
1718
1719 if (POINTER_TYPE_P (itype))
1720 itype = signed_type_for (itype);
1721 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1722 ? -1 : 1));
1723 t = fold_build2 (PLUS_EXPR, itype,
1724 fold_convert (itype, fd->loops[i].step), t);
1725 t = fold_build2 (PLUS_EXPR, itype, t,
1726 fold_convert (itype, fd->loops[i].n2));
1727 t = fold_build2 (MINUS_EXPR, itype, t,
1728 fold_convert (itype, fd->loops[i].n1));
1729 /* ?? We could probably use CEIL_DIV_EXPR instead of
1730 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1731 generate the same code in the end because generically we
1732 don't know that the values involved must be negative for
1733 GT?? */
1734 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1735 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1736 fold_build1 (NEGATE_EXPR, itype, t),
1737 fold_build1 (NEGATE_EXPR, itype,
1738 fold_convert (itype,
1739 fd->loops[i].step)));
1740 else
1741 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1742 fold_convert (itype, fd->loops[i].step));
1743 t = fold_convert (type, t);
1744 if (TREE_CODE (t) == INTEGER_CST)
1745 counts[i] = t;
1746 else
1747 {
1748 if (i < fd->collapse || i != first_zero_iter2)
1749 counts[i] = create_tmp_reg (type, ".count");
1750 expand_omp_build_assign (gsi, counts[i], t);
1751 }
1752 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1753 {
1754 if (i == 0)
1755 t = counts[0];
1756 else
1757 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1758 expand_omp_build_assign (gsi, fd->loop.n2, t);
1759 }
1760 }
1761}
1762
1763/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1764 T = V;
1765 V3 = N31 + (T % count3) * STEP3;
1766 T = T / count3;
1767 V2 = N21 + (T % count2) * STEP2;
1768 T = T / count2;
1769 V1 = N11 + T * STEP1;
1770 if this loop doesn't have an inner loop construct combined with it.
1771 If it does have an inner loop construct combined with it and the
1772 iteration count isn't known constant, store values from counts array
1773 into its _looptemp_ temporaries instead. */
1774
1775static void
1776expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1777 tree *counts, gimple *inner_stmt, tree startvar)
1778{
1779 int i;
1780 if (gimple_omp_for_combined_p (fd->for_stmt))
1781 {
1782 /* If fd->loop.n2 is constant, then no propagation of the counts
1783 is needed, they are constant. */
1784 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1785 return;
1786
1787 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1788 ? gimple_omp_taskreg_clauses (inner_stmt)
1789 : gimple_omp_for_clauses (inner_stmt);
1790 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1791 isn't supposed to be handled, as the inner loop doesn't
1792 use it. */
1793 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1794 gcc_assert (innerc);
1795 for (i = 0; i < fd->collapse; i++)
1796 {
1797 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1798 OMP_CLAUSE__LOOPTEMP_);
1799 gcc_assert (innerc);
1800 if (i)
1801 {
1802 tree tem = OMP_CLAUSE_DECL (innerc);
1803 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1804 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1805 false, GSI_CONTINUE_LINKING);
1806 gassign *stmt = gimple_build_assign (tem, t);
1807 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1808 }
1809 }
1810 return;
1811 }
1812
1813 tree type = TREE_TYPE (fd->loop.v);
1814 tree tem = create_tmp_reg (type, ".tem");
1815 gassign *stmt = gimple_build_assign (tem, startvar);
1816 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1817
1818 for (i = fd->collapse - 1; i >= 0; i--)
1819 {
1820 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1821 itype = vtype;
1822 if (POINTER_TYPE_P (vtype))
1823 itype = signed_type_for (vtype);
1824 if (i != 0)
1825 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1826 else
1827 t = tem;
1828 t = fold_convert (itype, t);
1829 t = fold_build2 (MULT_EXPR, itype, t,
1830 fold_convert (itype, fd->loops[i].step));
1831 if (POINTER_TYPE_P (vtype))
1832 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1833 else
1834 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1835 t = force_gimple_operand_gsi (gsi, t,
1836 DECL_P (fd->loops[i].v)
1837 && TREE_ADDRESSABLE (fd->loops[i].v),
1838 NULL_TREE, false,
1839 GSI_CONTINUE_LINKING);
1840 stmt = gimple_build_assign (fd->loops[i].v, t);
1841 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1842 if (i != 0)
1843 {
1844 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1845 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1846 false, GSI_CONTINUE_LINKING);
1847 stmt = gimple_build_assign (tem, t);
1848 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1849 }
1850 }
1851}
1852
1853/* Helper function for expand_omp_for_*. Generate code like:
1854 L10:
1855 V3 += STEP3;
1856 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1857 L11:
1858 V3 = N31;
1859 V2 += STEP2;
1860 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1861 L12:
1862 V2 = N21;
1863 V1 += STEP1;
1864 goto BODY_BB; */
1865
1866static basic_block
1867extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1868 basic_block body_bb)
1869{
1870 basic_block last_bb, bb, collapse_bb = NULL;
1871 int i;
1872 gimple_stmt_iterator gsi;
1873 edge e;
1874 tree t;
1875 gimple *stmt;
1876
1877 last_bb = cont_bb;
1878 for (i = fd->collapse - 1; i >= 0; i--)
1879 {
1880 tree vtype = TREE_TYPE (fd->loops[i].v);
1881
1882 bb = create_empty_bb (last_bb);
1883 add_bb_to_loop (bb, last_bb->loop_father);
1884 gsi = gsi_start_bb (bb);
1885
1886 if (i < fd->collapse - 1)
1887 {
1888 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1889 e->probability = REG_BR_PROB_BASE / 8;
1890
1891 t = fd->loops[i + 1].n1;
1892 t = force_gimple_operand_gsi (&gsi, t,
1893 DECL_P (fd->loops[i + 1].v)
1894 && TREE_ADDRESSABLE (fd->loops[i
1895 + 1].v),
1896 NULL_TREE, false,
1897 GSI_CONTINUE_LINKING);
1898 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1899 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1900 }
1901 else
1902 collapse_bb = bb;
1903
1904 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1905
1906 if (POINTER_TYPE_P (vtype))
1907 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1908 else
1909 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1910 t = force_gimple_operand_gsi (&gsi, t,
1911 DECL_P (fd->loops[i].v)
1912 && TREE_ADDRESSABLE (fd->loops[i].v),
1913 NULL_TREE, false, GSI_CONTINUE_LINKING);
1914 stmt = gimple_build_assign (fd->loops[i].v, t);
1915 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1916
1917 if (i > 0)
1918 {
1919 t = fd->loops[i].n2;
1920 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1921 false, GSI_CONTINUE_LINKING);
1922 tree v = fd->loops[i].v;
1923 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1924 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1925 false, GSI_CONTINUE_LINKING);
1926 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1927 stmt = gimple_build_cond_empty (t);
1928 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1929 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1930 e->probability = REG_BR_PROB_BASE * 7 / 8;
1931 }
1932 else
1933 make_edge (bb, body_bb, EDGE_FALLTHRU);
1934 last_bb = bb;
1935 }
1936
1937 return collapse_bb;
1938}
1939
1940/* Expand #pragma omp ordered depend(source). */
1941
1942static void
1943expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1944 tree *counts, location_t loc)
1945{
1946 enum built_in_function source_ix
1947 = fd->iter_type == long_integer_type_node
1948 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1949 gimple *g
1950 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1951 build_fold_addr_expr (counts[fd->ordered]));
1952 gimple_set_location (g, loc);
1953 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1954}
1955
1956/* Expand a single depend from #pragma omp ordered depend(sink:...). */
1957
1958static void
1959expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1960 tree *counts, tree c, location_t loc)
1961{
1962 auto_vec<tree, 10> args;
1963 enum built_in_function sink_ix
1964 = fd->iter_type == long_integer_type_node
1965 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1966 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1967 int i;
1968 gimple_stmt_iterator gsi2 = *gsi;
1969 bool warned_step = false;
1970
1971 for (i = 0; i < fd->ordered; i++)
1972 {
1973 tree step = NULL_TREE;
1974 off = TREE_PURPOSE (deps);
1975 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1976 {
1977 step = TREE_OPERAND (off, 1);
1978 off = TREE_OPERAND (off, 0);
1979 }
1980 if (!integer_zerop (off))
1981 {
1982 gcc_assert (fd->loops[i].cond_code == LT_EXPR
1983 || fd->loops[i].cond_code == GT_EXPR);
1984 bool forward = fd->loops[i].cond_code == LT_EXPR;
1985 if (step)
1986 {
1987 /* Non-simple Fortran DO loops. If step is variable,
1988 we don't know at compile even the direction, so can't
1989 warn. */
1990 if (TREE_CODE (step) != INTEGER_CST)
1991 break;
1992 forward = tree_int_cst_sgn (step) != -1;
1993 }
1994 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
1995 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
1996 "lexically later iteration");
1997 break;
1998 }
1999 deps = TREE_CHAIN (deps);
2000 }
2001 /* If all offsets corresponding to the collapsed loops are zero,
2002 this depend clause can be ignored. FIXME: but there is still a
2003 flush needed. We need to emit one __sync_synchronize () for it
2004 though (perhaps conditionally)? Solve this together with the
2005 conservative dependence folding optimization.
2006 if (i >= fd->collapse)
2007 return; */
2008
2009 deps = OMP_CLAUSE_DECL (c);
2010 gsi_prev (&gsi2);
2011 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2012 edge e2 = split_block_after_labels (e1->dest);
2013
2014 gsi2 = gsi_after_labels (e1->dest);
2015 *gsi = gsi_last_bb (e1->src);
2016 for (i = 0; i < fd->ordered; i++)
2017 {
2018 tree itype = TREE_TYPE (fd->loops[i].v);
2019 tree step = NULL_TREE;
2020 tree orig_off = NULL_TREE;
2021 if (POINTER_TYPE_P (itype))
2022 itype = sizetype;
2023 if (i)
2024 deps = TREE_CHAIN (deps);
2025 off = TREE_PURPOSE (deps);
2026 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2027 {
2028 step = TREE_OPERAND (off, 1);
2029 off = TREE_OPERAND (off, 0);
2030 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2031 && integer_onep (fd->loops[i].step)
2032 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2033 }
2034 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2035 if (step)
2036 {
2037 off = fold_convert_loc (loc, itype, off);
2038 orig_off = off;
2039 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2040 }
2041
2042 if (integer_zerop (off))
2043 t = boolean_true_node;
2044 else
2045 {
2046 tree a;
2047 tree co = fold_convert_loc (loc, itype, off);
2048 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2049 {
2050 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2051 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2052 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2053 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2054 co);
2055 }
2056 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2057 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2058 fd->loops[i].v, co);
2059 else
2060 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2061 fd->loops[i].v, co);
2062 if (step)
2063 {
2064 tree t1, t2;
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2066 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2067 fd->loops[i].n1);
2068 else
2069 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2070 fd->loops[i].n2);
2071 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2072 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2073 fd->loops[i].n2);
2074 else
2075 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2076 fd->loops[i].n1);
2077 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2078 step, build_int_cst (TREE_TYPE (step), 0));
2079 if (TREE_CODE (step) != INTEGER_CST)
2080 {
2081 t1 = unshare_expr (t1);
2082 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2083 false, GSI_CONTINUE_LINKING);
2084 t2 = unshare_expr (t2);
2085 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2086 false, GSI_CONTINUE_LINKING);
2087 }
2088 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2089 t, t2, t1);
2090 }
2091 else if (fd->loops[i].cond_code == LT_EXPR)
2092 {
2093 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2094 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2095 fd->loops[i].n1);
2096 else
2097 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2098 fd->loops[i].n2);
2099 }
2100 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2101 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2102 fd->loops[i].n2);
2103 else
2104 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2105 fd->loops[i].n1);
2106 }
2107 if (cond)
2108 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2109 else
2110 cond = t;
2111
2112 off = fold_convert_loc (loc, itype, off);
2113
2114 if (step
2115 || (fd->loops[i].cond_code == LT_EXPR
2116 ? !integer_onep (fd->loops[i].step)
2117 : !integer_minus_onep (fd->loops[i].step)))
2118 {
2119 if (step == NULL_TREE
2120 && TYPE_UNSIGNED (itype)
2121 && fd->loops[i].cond_code == GT_EXPR)
2122 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2123 fold_build1_loc (loc, NEGATE_EXPR, itype,
2124 s));
2125 else
2126 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2127 orig_off ? orig_off : off, s);
2128 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2129 build_int_cst (itype, 0));
2130 if (integer_zerop (t) && !warned_step)
2131 {
2132 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2133 "in the iteration space");
2134 warned_step = true;
2135 }
2136 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2137 cond, t);
2138 }
2139
2140 if (i <= fd->collapse - 1 && fd->collapse > 1)
2141 t = fd->loop.v;
2142 else if (counts[i])
2143 t = counts[i];
2144 else
2145 {
2146 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2147 fd->loops[i].v, fd->loops[i].n1);
2148 t = fold_convert_loc (loc, fd->iter_type, t);
2149 }
2150 if (step)
2151 /* We have divided off by step already earlier. */;
2152 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2153 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2154 fold_build1_loc (loc, NEGATE_EXPR, itype,
2155 s));
2156 else
2157 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2158 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2159 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2160 off = fold_convert_loc (loc, fd->iter_type, off);
2161 if (i <= fd->collapse - 1 && fd->collapse > 1)
2162 {
2163 if (i)
2164 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2165 off);
2166 if (i < fd->collapse - 1)
2167 {
2168 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2169 counts[i]);
2170 continue;
2171 }
2172 }
2173 off = unshare_expr (off);
2174 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2175 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2176 true, GSI_SAME_STMT);
2177 args.safe_push (t);
2178 }
2179 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2180 gimple_set_location (g, loc);
2181 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2182
2183 cond = unshare_expr (cond);
2184 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2185 GSI_CONTINUE_LINKING);
2186 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2187 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2188 e3->probability = REG_BR_PROB_BASE / 8;
2189 e1->probability = REG_BR_PROB_BASE - e3->probability;
2190 e1->flags = EDGE_TRUE_VALUE;
2191 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2192
2193 *gsi = gsi_after_labels (e2->dest);
2194}
2195
2196/* Expand all #pragma omp ordered depend(source) and
2197 #pragma omp ordered depend(sink:...) constructs in the current
2198 #pragma omp for ordered(n) region. */
2199
2200static void
2201expand_omp_ordered_source_sink (struct omp_region *region,
2202 struct omp_for_data *fd, tree *counts,
2203 basic_block cont_bb)
2204{
2205 struct omp_region *inner;
2206 int i;
2207 for (i = fd->collapse - 1; i < fd->ordered; i++)
2208 if (i == fd->collapse - 1 && fd->collapse > 1)
2209 counts[i] = NULL_TREE;
2210 else if (i >= fd->collapse && !cont_bb)
2211 counts[i] = build_zero_cst (fd->iter_type);
2212 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2213 && integer_onep (fd->loops[i].step))
2214 counts[i] = NULL_TREE;
2215 else
2216 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2217 tree atype
2218 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2219 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2220 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2221
2222 for (inner = region->inner; inner; inner = inner->next)
2223 if (inner->type == GIMPLE_OMP_ORDERED)
2224 {
2225 gomp_ordered *ord_stmt = inner->ord_stmt;
2226 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2227 location_t loc = gimple_location (ord_stmt);
2228 tree c;
2229 for (c = gimple_omp_ordered_clauses (ord_stmt);
2230 c; c = OMP_CLAUSE_CHAIN (c))
2231 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2232 break;
2233 if (c)
2234 expand_omp_ordered_source (&gsi, fd, counts, loc);
2235 for (c = gimple_omp_ordered_clauses (ord_stmt);
2236 c; c = OMP_CLAUSE_CHAIN (c))
2237 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2238 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2239 gsi_remove (&gsi, true);
2240 }
2241}
2242
2243/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2244 collapsed. */
2245
2246static basic_block
2247expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2248 basic_block cont_bb, basic_block body_bb,
2249 bool ordered_lastprivate)
2250{
2251 if (fd->ordered == fd->collapse)
2252 return cont_bb;
2253
2254 if (!cont_bb)
2255 {
2256 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2257 for (int i = fd->collapse; i < fd->ordered; i++)
2258 {
2259 tree type = TREE_TYPE (fd->loops[i].v);
2260 tree n1 = fold_convert (type, fd->loops[i].n1);
2261 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2262 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2263 size_int (i - fd->collapse + 1),
2264 NULL_TREE, NULL_TREE);
2265 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2266 }
2267 return NULL;
2268 }
2269
2270 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2271 {
2272 tree t, type = TREE_TYPE (fd->loops[i].v);
2273 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2274 expand_omp_build_assign (&gsi, fd->loops[i].v,
2275 fold_convert (type, fd->loops[i].n1));
2276 if (counts[i])
2277 expand_omp_build_assign (&gsi, counts[i],
2278 build_zero_cst (fd->iter_type));
2279 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2280 size_int (i - fd->collapse + 1),
2281 NULL_TREE, NULL_TREE);
2282 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2283 if (!gsi_end_p (gsi))
2284 gsi_prev (&gsi);
2285 else
2286 gsi = gsi_last_bb (body_bb);
2287 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2288 basic_block new_body = e1->dest;
2289 if (body_bb == cont_bb)
2290 cont_bb = new_body;
2291 edge e2 = NULL;
2292 basic_block new_header;
2293 if (EDGE_COUNT (cont_bb->preds) > 0)
2294 {
2295 gsi = gsi_last_bb (cont_bb);
2296 if (POINTER_TYPE_P (type))
2297 t = fold_build_pointer_plus (fd->loops[i].v,
2298 fold_convert (sizetype,
2299 fd->loops[i].step));
2300 else
2301 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2302 fold_convert (type, fd->loops[i].step));
2303 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2304 if (counts[i])
2305 {
2306 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2307 build_int_cst (fd->iter_type, 1));
2308 expand_omp_build_assign (&gsi, counts[i], t);
2309 t = counts[i];
2310 }
2311 else
2312 {
2313 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2314 fd->loops[i].v, fd->loops[i].n1);
2315 t = fold_convert (fd->iter_type, t);
2316 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2317 true, GSI_SAME_STMT);
2318 }
2319 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2320 size_int (i - fd->collapse + 1),
2321 NULL_TREE, NULL_TREE);
2322 expand_omp_build_assign (&gsi, aref, t);
2323 gsi_prev (&gsi);
2324 e2 = split_block (cont_bb, gsi_stmt (gsi));
2325 new_header = e2->dest;
2326 }
2327 else
2328 new_header = cont_bb;
2329 gsi = gsi_after_labels (new_header);
2330 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2331 true, GSI_SAME_STMT);
2332 tree n2
2333 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2334 true, NULL_TREE, true, GSI_SAME_STMT);
2335 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2336 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2337 edge e3 = split_block (new_header, gsi_stmt (gsi));
2338 cont_bb = e3->dest;
2339 remove_edge (e1);
2340 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2341 e3->flags = EDGE_FALSE_VALUE;
2342 e3->probability = REG_BR_PROB_BASE / 8;
2343 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2344 e1->probability = REG_BR_PROB_BASE - e3->probability;
2345
2346 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2347 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2348
2349 if (e2)
2350 {
2351 struct loop *loop = alloc_loop ();
2352 loop->header = new_header;
2353 loop->latch = e2->src;
2354 add_loop (loop, body_bb->loop_father);
2355 }
2356 }
2357
2358 /* If there are any lastprivate clauses and it is possible some loops
2359 might have zero iterations, ensure all the decls are initialized,
2360 otherwise we could crash evaluating C++ class iterators with lastprivate
2361 clauses. */
2362 bool need_inits = false;
2363 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2364 if (need_inits)
2365 {
2366 tree type = TREE_TYPE (fd->loops[i].v);
2367 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2368 expand_omp_build_assign (&gsi, fd->loops[i].v,
2369 fold_convert (type, fd->loops[i].n1));
2370 }
2371 else
2372 {
2373 tree type = TREE_TYPE (fd->loops[i].v);
2374 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2375 boolean_type_node,
2376 fold_convert (type, fd->loops[i].n1),
2377 fold_convert (type, fd->loops[i].n2));
2378 if (!integer_onep (this_cond))
2379 need_inits = true;
2380 }
2381
2382 return cont_bb;
2383}
2384
2385/* A subroutine of expand_omp_for. Generate code for a parallel
2386 loop with any schedule. Given parameters:
2387
2388 for (V = N1; V cond N2; V += STEP) BODY;
2389
2390 where COND is "<" or ">", we generate pseudocode
2391
2392 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2393 if (more) goto L0; else goto L3;
2394 L0:
2395 V = istart0;
2396 iend = iend0;
2397 L1:
2398 BODY;
2399 V += STEP;
2400 if (V cond iend) goto L1; else goto L2;
2401 L2:
2402 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2403 L3:
2404
2405 If this is a combined omp parallel loop, instead of the call to
2406 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2407 If this is gimple_omp_for_combined_p loop, then instead of assigning
2408 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2409 inner GIMPLE_OMP_FOR and V += STEP; and
2410 if (V cond iend) goto L1; else goto L2; are removed.
2411
2412 For collapsed loops, given parameters:
2413 collapse(3)
2414 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2415 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2416 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2417 BODY;
2418
2419 we generate pseudocode
2420
2421 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2422 if (cond3 is <)
2423 adj = STEP3 - 1;
2424 else
2425 adj = STEP3 + 1;
2426 count3 = (adj + N32 - N31) / STEP3;
2427 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2428 if (cond2 is <)
2429 adj = STEP2 - 1;
2430 else
2431 adj = STEP2 + 1;
2432 count2 = (adj + N22 - N21) / STEP2;
2433 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2434 if (cond1 is <)
2435 adj = STEP1 - 1;
2436 else
2437 adj = STEP1 + 1;
2438 count1 = (adj + N12 - N11) / STEP1;
2439 count = count1 * count2 * count3;
2440 goto Z1;
2441 Z0:
2442 count = 0;
2443 Z1:
2444 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2445 if (more) goto L0; else goto L3;
2446 L0:
2447 V = istart0;
2448 T = V;
2449 V3 = N31 + (T % count3) * STEP3;
2450 T = T / count3;
2451 V2 = N21 + (T % count2) * STEP2;
2452 T = T / count2;
2453 V1 = N11 + T * STEP1;
2454 iend = iend0;
2455 L1:
2456 BODY;
2457 V += 1;
2458 if (V < iend) goto L10; else goto L2;
2459 L10:
2460 V3 += STEP3;
2461 if (V3 cond3 N32) goto L1; else goto L11;
2462 L11:
2463 V3 = N31;
2464 V2 += STEP2;
2465 if (V2 cond2 N22) goto L1; else goto L12;
2466 L12:
2467 V2 = N21;
2468 V1 += STEP1;
2469 goto L1;
2470 L2:
2471 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2472 L3:
2473
2474 */
2475
2476static void
2477expand_omp_for_generic (struct omp_region *region,
2478 struct omp_for_data *fd,
2479 enum built_in_function start_fn,
2480 enum built_in_function next_fn,
2481 gimple *inner_stmt)
2482{
2483 tree type, istart0, iend0, iend;
2484 tree t, vmain, vback, bias = NULL_TREE;
2485 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2486 basic_block l2_bb = NULL, l3_bb = NULL;
2487 gimple_stmt_iterator gsi;
2488 gassign *assign_stmt;
2489 bool in_combined_parallel = is_combined_parallel (region);
2490 bool broken_loop = region->cont == NULL;
2491 edge e, ne;
2492 tree *counts = NULL;
2493 int i;
2494 bool ordered_lastprivate = false;
2495
2496 gcc_assert (!broken_loop || !in_combined_parallel);
2497 gcc_assert (fd->iter_type == long_integer_type_node
2498 || !in_combined_parallel);
2499
2500 entry_bb = region->entry;
2501 cont_bb = region->cont;
2502 collapse_bb = NULL;
2503 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2504 gcc_assert (broken_loop
2505 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2506 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2507 l1_bb = single_succ (l0_bb);
2508 if (!broken_loop)
2509 {
2510 l2_bb = create_empty_bb (cont_bb);
2511 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2512 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2513 == l1_bb));
2514 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2515 }
2516 else
2517 l2_bb = NULL;
2518 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2519 exit_bb = region->exit;
2520
2521 gsi = gsi_last_bb (entry_bb);
2522
2523 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2524 if (fd->ordered
2525 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2526 OMP_CLAUSE_LASTPRIVATE))
2527 ordered_lastprivate = false;
2528 if (fd->collapse > 1 || fd->ordered)
2529 {
2530 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2531 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2532
2533 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2534 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2535 zero_iter1_bb, first_zero_iter1,
2536 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2537
2538 if (zero_iter1_bb)
2539 {
2540 /* Some counts[i] vars might be uninitialized if
2541 some loop has zero iterations. But the body shouldn't
2542 be executed in that case, so just avoid uninit warnings. */
2543 for (i = first_zero_iter1;
2544 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2545 if (SSA_VAR_P (counts[i]))
2546 TREE_NO_WARNING (counts[i]) = 1;
2547 gsi_prev (&gsi);
2548 e = split_block (entry_bb, gsi_stmt (gsi));
2549 entry_bb = e->dest;
2550 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2551 gsi = gsi_last_bb (entry_bb);
2552 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2553 get_immediate_dominator (CDI_DOMINATORS,
2554 zero_iter1_bb));
2555 }
2556 if (zero_iter2_bb)
2557 {
2558 /* Some counts[i] vars might be uninitialized if
2559 some loop has zero iterations. But the body shouldn't
2560 be executed in that case, so just avoid uninit warnings. */
2561 for (i = first_zero_iter2; i < fd->ordered; i++)
2562 if (SSA_VAR_P (counts[i]))
2563 TREE_NO_WARNING (counts[i]) = 1;
2564 if (zero_iter1_bb)
2565 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2566 else
2567 {
2568 gsi_prev (&gsi);
2569 e = split_block (entry_bb, gsi_stmt (gsi));
2570 entry_bb = e->dest;
2571 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2572 gsi = gsi_last_bb (entry_bb);
2573 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2574 get_immediate_dominator
2575 (CDI_DOMINATORS, zero_iter2_bb));
2576 }
2577 }
2578 if (fd->collapse == 1)
2579 {
2580 counts[0] = fd->loop.n2;
2581 fd->loop = fd->loops[0];
2582 }
2583 }
2584
2585 type = TREE_TYPE (fd->loop.v);
2586 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2587 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2588 TREE_ADDRESSABLE (istart0) = 1;
2589 TREE_ADDRESSABLE (iend0) = 1;
2590
2591 /* See if we need to bias by LLONG_MIN. */
2592 if (fd->iter_type == long_long_unsigned_type_node
2593 && TREE_CODE (type) == INTEGER_TYPE
2594 && !TYPE_UNSIGNED (type)
2595 && fd->ordered == 0)
2596 {
2597 tree n1, n2;
2598
2599 if (fd->loop.cond_code == LT_EXPR)
2600 {
2601 n1 = fd->loop.n1;
2602 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2603 }
2604 else
2605 {
2606 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2607 n2 = fd->loop.n1;
2608 }
2609 if (TREE_CODE (n1) != INTEGER_CST
2610 || TREE_CODE (n2) != INTEGER_CST
2611 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2612 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2613 }
2614
2615 gimple_stmt_iterator gsif = gsi;
2616 gsi_prev (&gsif);
2617
2618 tree arr = NULL_TREE;
2619 if (in_combined_parallel)
2620 {
2621 gcc_assert (fd->ordered == 0);
2622 /* In a combined parallel loop, emit a call to
2623 GOMP_loop_foo_next. */
2624 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2625 build_fold_addr_expr (istart0),
2626 build_fold_addr_expr (iend0));
2627 }
2628 else
2629 {
2630 tree t0, t1, t2, t3, t4;
2631 /* If this is not a combined parallel loop, emit a call to
2632 GOMP_loop_foo_start in ENTRY_BB. */
2633 t4 = build_fold_addr_expr (iend0);
2634 t3 = build_fold_addr_expr (istart0);
2635 if (fd->ordered)
2636 {
2637 t0 = build_int_cst (unsigned_type_node,
2638 fd->ordered - fd->collapse + 1);
2639 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2640 fd->ordered
2641 - fd->collapse + 1),
2642 ".omp_counts");
2643 DECL_NAMELESS (arr) = 1;
2644 TREE_ADDRESSABLE (arr) = 1;
2645 TREE_STATIC (arr) = 1;
2646 vec<constructor_elt, va_gc> *v;
2647 vec_alloc (v, fd->ordered - fd->collapse + 1);
2648 int idx;
2649
2650 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2651 {
2652 tree c;
2653 if (idx == 0 && fd->collapse > 1)
2654 c = fd->loop.n2;
2655 else
2656 c = counts[idx + fd->collapse - 1];
2657 tree purpose = size_int (idx);
2658 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2659 if (TREE_CODE (c) != INTEGER_CST)
2660 TREE_STATIC (arr) = 0;
2661 }
2662
2663 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2664 if (!TREE_STATIC (arr))
2665 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2666 void_type_node, arr),
2667 true, NULL_TREE, true, GSI_SAME_STMT);
2668 t1 = build_fold_addr_expr (arr);
2669 t2 = NULL_TREE;
2670 }
2671 else
2672 {
2673 t2 = fold_convert (fd->iter_type, fd->loop.step);
2674 t1 = fd->loop.n2;
2675 t0 = fd->loop.n1;
2676 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2677 {
2678 tree innerc
2679 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2680 OMP_CLAUSE__LOOPTEMP_);
2681 gcc_assert (innerc);
2682 t0 = OMP_CLAUSE_DECL (innerc);
2683 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2684 OMP_CLAUSE__LOOPTEMP_);
2685 gcc_assert (innerc);
2686 t1 = OMP_CLAUSE_DECL (innerc);
2687 }
2688 if (POINTER_TYPE_P (TREE_TYPE (t0))
2689 && TYPE_PRECISION (TREE_TYPE (t0))
2690 != TYPE_PRECISION (fd->iter_type))
2691 {
2692 /* Avoid casting pointers to integer of a different size. */
2693 tree itype = signed_type_for (type);
2694 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2695 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2696 }
2697 else
2698 {
2699 t1 = fold_convert (fd->iter_type, t1);
2700 t0 = fold_convert (fd->iter_type, t0);
2701 }
2702 if (bias)
2703 {
2704 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2705 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2706 }
2707 }
2708 if (fd->iter_type == long_integer_type_node || fd->ordered)
2709 {
2710 if (fd->chunk_size)
2711 {
2712 t = fold_convert (fd->iter_type, fd->chunk_size);
2713 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2714 if (fd->ordered)
2715 t = build_call_expr (builtin_decl_explicit (start_fn),
2716 5, t0, t1, t, t3, t4);
2717 else
2718 t = build_call_expr (builtin_decl_explicit (start_fn),
2719 6, t0, t1, t2, t, t3, t4);
2720 }
2721 else if (fd->ordered)
2722 t = build_call_expr (builtin_decl_explicit (start_fn),
2723 4, t0, t1, t3, t4);
2724 else
2725 t = build_call_expr (builtin_decl_explicit (start_fn),
2726 5, t0, t1, t2, t3, t4);
2727 }
2728 else
2729 {
2730 tree t5;
2731 tree c_bool_type;
2732 tree bfn_decl;
2733
2734 /* The GOMP_loop_ull_*start functions have additional boolean
2735 argument, true for < loops and false for > loops.
2736 In Fortran, the C bool type can be different from
2737 boolean_type_node. */
2738 bfn_decl = builtin_decl_explicit (start_fn);
2739 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2740 t5 = build_int_cst (c_bool_type,
2741 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2742 if (fd->chunk_size)
2743 {
2744 tree bfn_decl = builtin_decl_explicit (start_fn);
2745 t = fold_convert (fd->iter_type, fd->chunk_size);
2746 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2747 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2748 }
2749 else
2750 t = build_call_expr (builtin_decl_explicit (start_fn),
2751 6, t5, t0, t1, t2, t3, t4);
2752 }
2753 }
2754 if (TREE_TYPE (t) != boolean_type_node)
2755 t = fold_build2 (NE_EXPR, boolean_type_node,
2756 t, build_int_cst (TREE_TYPE (t), 0));
2757 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
01914336 2758 true, GSI_SAME_STMT);
629b3d75
MJ
2759 if (arr && !TREE_STATIC (arr))
2760 {
2761 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2762 TREE_THIS_VOLATILE (clobber) = 1;
2763 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2764 GSI_SAME_STMT);
2765 }
2766 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2767
2768 /* Remove the GIMPLE_OMP_FOR statement. */
2769 gsi_remove (&gsi, true);
2770
2771 if (gsi_end_p (gsif))
2772 gsif = gsi_after_labels (gsi_bb (gsif));
2773 gsi_next (&gsif);
2774
2775 /* Iteration setup for sequential loop goes in L0_BB. */
2776 tree startvar = fd->loop.v;
2777 tree endvar = NULL_TREE;
2778
2779 if (gimple_omp_for_combined_p (fd->for_stmt))
2780 {
2781 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2782 && gimple_omp_for_kind (inner_stmt)
2783 == GF_OMP_FOR_KIND_SIMD);
2784 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2785 OMP_CLAUSE__LOOPTEMP_);
2786 gcc_assert (innerc);
2787 startvar = OMP_CLAUSE_DECL (innerc);
2788 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2789 OMP_CLAUSE__LOOPTEMP_);
2790 gcc_assert (innerc);
2791 endvar = OMP_CLAUSE_DECL (innerc);
2792 }
2793
2794 gsi = gsi_start_bb (l0_bb);
2795 t = istart0;
2796 if (fd->ordered && fd->collapse == 1)
2797 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2798 fold_convert (fd->iter_type, fd->loop.step));
2799 else if (bias)
2800 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2801 if (fd->ordered && fd->collapse == 1)
2802 {
2803 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2804 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2805 fd->loop.n1, fold_convert (sizetype, t));
2806 else
2807 {
2808 t = fold_convert (TREE_TYPE (startvar), t);
2809 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2810 fd->loop.n1, t);
2811 }
2812 }
2813 else
2814 {
2815 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2816 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2817 t = fold_convert (TREE_TYPE (startvar), t);
2818 }
2819 t = force_gimple_operand_gsi (&gsi, t,
2820 DECL_P (startvar)
2821 && TREE_ADDRESSABLE (startvar),
2822 NULL_TREE, false, GSI_CONTINUE_LINKING);
2823 assign_stmt = gimple_build_assign (startvar, t);
2824 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2825
2826 t = iend0;
2827 if (fd->ordered && fd->collapse == 1)
2828 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2829 fold_convert (fd->iter_type, fd->loop.step));
2830 else if (bias)
2831 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2832 if (fd->ordered && fd->collapse == 1)
2833 {
2834 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2835 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2836 fd->loop.n1, fold_convert (sizetype, t));
2837 else
2838 {
2839 t = fold_convert (TREE_TYPE (startvar), t);
2840 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2841 fd->loop.n1, t);
2842 }
2843 }
2844 else
2845 {
2846 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2847 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2848 t = fold_convert (TREE_TYPE (startvar), t);
2849 }
2850 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2851 false, GSI_CONTINUE_LINKING);
2852 if (endvar)
2853 {
2854 assign_stmt = gimple_build_assign (endvar, iend);
2855 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2856 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2857 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2858 else
2859 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2860 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2861 }
2862 /* Handle linear clause adjustments. */
2863 tree itercnt = NULL_TREE;
2864 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2865 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2866 c; c = OMP_CLAUSE_CHAIN (c))
2867 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2868 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2869 {
2870 tree d = OMP_CLAUSE_DECL (c);
2871 bool is_ref = omp_is_reference (d);
2872 tree t = d, a, dest;
2873 if (is_ref)
2874 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2875 tree type = TREE_TYPE (t);
2876 if (POINTER_TYPE_P (type))
2877 type = sizetype;
2878 dest = unshare_expr (t);
2879 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2880 expand_omp_build_assign (&gsif, v, t);
2881 if (itercnt == NULL_TREE)
2882 {
2883 itercnt = startvar;
2884 tree n1 = fd->loop.n1;
2885 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2886 {
2887 itercnt
2888 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2889 itercnt);
2890 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2891 }
2892 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2893 itercnt, n1);
2894 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2895 itercnt, fd->loop.step);
2896 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2897 NULL_TREE, false,
2898 GSI_CONTINUE_LINKING);
2899 }
2900 a = fold_build2 (MULT_EXPR, type,
2901 fold_convert (type, itercnt),
2902 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2903 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2904 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2905 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2906 false, GSI_CONTINUE_LINKING);
2907 assign_stmt = gimple_build_assign (dest, t);
2908 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2909 }
2910 if (fd->collapse > 1)
2911 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2912
2913 if (fd->ordered)
2914 {
2915 /* Until now, counts array contained number of iterations or
2916 variable containing it for ith loop. From now on, we need
2917 those counts only for collapsed loops, and only for the 2nd
2918 till the last collapsed one. Move those one element earlier,
2919 we'll use counts[fd->collapse - 1] for the first source/sink
2920 iteration counter and so on and counts[fd->ordered]
2921 as the array holding the current counter values for
2922 depend(source). */
2923 if (fd->collapse > 1)
2924 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2925 if (broken_loop)
2926 {
2927 int i;
2928 for (i = fd->collapse; i < fd->ordered; i++)
2929 {
2930 tree type = TREE_TYPE (fd->loops[i].v);
2931 tree this_cond
2932 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2933 fold_convert (type, fd->loops[i].n1),
2934 fold_convert (type, fd->loops[i].n2));
2935 if (!integer_onep (this_cond))
2936 break;
2937 }
2938 if (i < fd->ordered)
2939 {
2940 cont_bb
2941 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2942 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2943 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2944 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2945 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2946 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2947 make_edge (cont_bb, l1_bb, 0);
2948 l2_bb = create_empty_bb (cont_bb);
2949 broken_loop = false;
2950 }
2951 }
2952 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2953 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2954 ordered_lastprivate);
2955 if (counts[fd->collapse - 1])
2956 {
2957 gcc_assert (fd->collapse == 1);
2958 gsi = gsi_last_bb (l0_bb);
2959 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2960 istart0, true);
2961 gsi = gsi_last_bb (cont_bb);
2962 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2963 build_int_cst (fd->iter_type, 1));
2964 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2965 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2966 size_zero_node, NULL_TREE, NULL_TREE);
2967 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2968 t = counts[fd->collapse - 1];
2969 }
2970 else if (fd->collapse > 1)
2971 t = fd->loop.v;
2972 else
2973 {
2974 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2975 fd->loops[0].v, fd->loops[0].n1);
2976 t = fold_convert (fd->iter_type, t);
2977 }
2978 gsi = gsi_last_bb (l0_bb);
2979 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2980 size_zero_node, NULL_TREE, NULL_TREE);
2981 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2982 false, GSI_CONTINUE_LINKING);
2983 expand_omp_build_assign (&gsi, aref, t, true);
2984 }
2985
2986 if (!broken_loop)
2987 {
2988 /* Code to control the increment and predicate for the sequential
2989 loop goes in the CONT_BB. */
2990 gsi = gsi_last_bb (cont_bb);
2991 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
2992 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
2993 vmain = gimple_omp_continue_control_use (cont_stmt);
2994 vback = gimple_omp_continue_control_def (cont_stmt);
2995
2996 if (!gimple_omp_for_combined_p (fd->for_stmt))
2997 {
2998 if (POINTER_TYPE_P (type))
2999 t = fold_build_pointer_plus (vmain, fd->loop.step);
3000 else
3001 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3002 t = force_gimple_operand_gsi (&gsi, t,
3003 DECL_P (vback)
3004 && TREE_ADDRESSABLE (vback),
3005 NULL_TREE, true, GSI_SAME_STMT);
3006 assign_stmt = gimple_build_assign (vback, t);
3007 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3008
3009 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3010 {
3011 if (fd->collapse > 1)
3012 t = fd->loop.v;
3013 else
3014 {
3015 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3016 fd->loops[0].v, fd->loops[0].n1);
3017 t = fold_convert (fd->iter_type, t);
3018 }
3019 tree aref = build4 (ARRAY_REF, fd->iter_type,
3020 counts[fd->ordered], size_zero_node,
3021 NULL_TREE, NULL_TREE);
3022 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3023 true, GSI_SAME_STMT);
3024 expand_omp_build_assign (&gsi, aref, t);
3025 }
3026
3027 t = build2 (fd->loop.cond_code, boolean_type_node,
3028 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3029 iend);
3030 gcond *cond_stmt = gimple_build_cond_empty (t);
3031 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3032 }
3033
3034 /* Remove GIMPLE_OMP_CONTINUE. */
3035 gsi_remove (&gsi, true);
3036
3037 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3038 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3039
3040 /* Emit code to get the next parallel iteration in L2_BB. */
3041 gsi = gsi_start_bb (l2_bb);
3042
3043 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3044 build_fold_addr_expr (istart0),
3045 build_fold_addr_expr (iend0));
3046 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3047 false, GSI_CONTINUE_LINKING);
3048 if (TREE_TYPE (t) != boolean_type_node)
3049 t = fold_build2 (NE_EXPR, boolean_type_node,
3050 t, build_int_cst (TREE_TYPE (t), 0));
3051 gcond *cond_stmt = gimple_build_cond_empty (t);
3052 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3053 }
3054
3055 /* Add the loop cleanup function. */
3056 gsi = gsi_last_bb (exit_bb);
3057 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3058 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3059 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3060 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3061 else
3062 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3063 gcall *call_stmt = gimple_build_call (t, 0);
3064 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3065 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3066 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3067 if (fd->ordered)
3068 {
3069 tree arr = counts[fd->ordered];
3070 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3071 TREE_THIS_VOLATILE (clobber) = 1;
3072 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3073 GSI_SAME_STMT);
3074 }
3075 gsi_remove (&gsi, true);
3076
3077 /* Connect the new blocks. */
3078 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3079 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3080
3081 if (!broken_loop)
3082 {
3083 gimple_seq phis;
3084
3085 e = find_edge (cont_bb, l3_bb);
3086 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3087
3088 phis = phi_nodes (l3_bb);
3089 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3090 {
3091 gimple *phi = gsi_stmt (gsi);
3092 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3093 PHI_ARG_DEF_FROM_EDGE (phi, e));
3094 }
3095 remove_edge (e);
3096
3097 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3098 e = find_edge (cont_bb, l1_bb);
3099 if (e == NULL)
3100 {
3101 e = BRANCH_EDGE (cont_bb);
3102 gcc_assert (single_succ (e->dest) == l1_bb);
3103 }
3104 if (gimple_omp_for_combined_p (fd->for_stmt))
3105 {
3106 remove_edge (e);
3107 e = NULL;
3108 }
3109 else if (fd->collapse > 1)
3110 {
3111 remove_edge (e);
3112 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3113 }
3114 else
3115 e->flags = EDGE_TRUE_VALUE;
3116 if (e)
3117 {
3118 e->probability = REG_BR_PROB_BASE * 7 / 8;
3119 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3120 }
3121 else
3122 {
3123 e = find_edge (cont_bb, l2_bb);
3124 e->flags = EDGE_FALLTHRU;
3125 }
3126 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3127
3128 if (gimple_in_ssa_p (cfun))
3129 {
3130 /* Add phis to the outer loop that connect to the phis in the inner,
3131 original loop, and move the loop entry value of the inner phi to
3132 the loop entry value of the outer phi. */
3133 gphi_iterator psi;
3134 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3135 {
3136 source_location locus;
3137 gphi *nphi;
3138 gphi *exit_phi = psi.phi ();
3139
3140 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3141 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3142
3143 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3144 edge latch_to_l1 = find_edge (latch, l1_bb);
3145 gphi *inner_phi
3146 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3147
3148 tree t = gimple_phi_result (exit_phi);
3149 tree new_res = copy_ssa_name (t, NULL);
3150 nphi = create_phi_node (new_res, l0_bb);
3151
3152 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3153 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3154 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3155 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3156 add_phi_arg (nphi, t, entry_to_l0, locus);
3157
3158 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3159 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3160
3161 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3162 };
3163 }
3164
3165 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3166 recompute_dominator (CDI_DOMINATORS, l2_bb));
3167 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3168 recompute_dominator (CDI_DOMINATORS, l3_bb));
3169 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3170 recompute_dominator (CDI_DOMINATORS, l0_bb));
3171 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3172 recompute_dominator (CDI_DOMINATORS, l1_bb));
3173
3174 /* We enter expand_omp_for_generic with a loop. This original loop may
3175 have its own loop struct, or it may be part of an outer loop struct
3176 (which may be the fake loop). */
3177 struct loop *outer_loop = entry_bb->loop_father;
3178 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3179
3180 add_bb_to_loop (l2_bb, outer_loop);
3181
3182 /* We've added a new loop around the original loop. Allocate the
3183 corresponding loop struct. */
3184 struct loop *new_loop = alloc_loop ();
3185 new_loop->header = l0_bb;
3186 new_loop->latch = l2_bb;
3187 add_loop (new_loop, outer_loop);
3188
3189 /* Allocate a loop structure for the original loop unless we already
3190 had one. */
3191 if (!orig_loop_has_loop_struct
3192 && !gimple_omp_for_combined_p (fd->for_stmt))
3193 {
3194 struct loop *orig_loop = alloc_loop ();
3195 orig_loop->header = l1_bb;
3196 /* The loop may have multiple latches. */
3197 add_loop (orig_loop, new_loop);
3198 }
3199 }
3200}
3201
3202/* A subroutine of expand_omp_for. Generate code for a parallel
3203 loop with static schedule and no specified chunk size. Given
3204 parameters:
3205
3206 for (V = N1; V cond N2; V += STEP) BODY;
3207
3208 where COND is "<" or ">", we generate pseudocode
3209
3210 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3211 if (cond is <)
3212 adj = STEP - 1;
3213 else
3214 adj = STEP + 1;
3215 if ((__typeof (V)) -1 > 0 && cond is >)
3216 n = -(adj + N2 - N1) / -STEP;
3217 else
3218 n = (adj + N2 - N1) / STEP;
3219 q = n / nthreads;
3220 tt = n % nthreads;
3221 if (threadid < tt) goto L3; else goto L4;
3222 L3:
3223 tt = 0;
3224 q = q + 1;
3225 L4:
3226 s0 = q * threadid + tt;
3227 e0 = s0 + q;
3228 V = s0 * STEP + N1;
3229 if (s0 >= e0) goto L2; else goto L0;
3230 L0:
3231 e = e0 * STEP + N1;
3232 L1:
3233 BODY;
3234 V += STEP;
3235 if (V cond e) goto L1;
3236 L2:
3237*/
3238
3239static void
3240expand_omp_for_static_nochunk (struct omp_region *region,
3241 struct omp_for_data *fd,
3242 gimple *inner_stmt)
3243{
3244 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3245 tree type, itype, vmain, vback;
3246 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3247 basic_block body_bb, cont_bb, collapse_bb = NULL;
3248 basic_block fin_bb;
3249 gimple_stmt_iterator gsi;
3250 edge ep;
3251 bool broken_loop = region->cont == NULL;
3252 tree *counts = NULL;
3253 tree n1, n2, step;
3254
3255 itype = type = TREE_TYPE (fd->loop.v);
3256 if (POINTER_TYPE_P (type))
3257 itype = signed_type_for (type);
3258
3259 entry_bb = region->entry;
3260 cont_bb = region->cont;
3261 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3262 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3263 gcc_assert (broken_loop
3264 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3265 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3266 body_bb = single_succ (seq_start_bb);
3267 if (!broken_loop)
3268 {
3269 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3270 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3271 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3272 }
3273 exit_bb = region->exit;
3274
3275 /* Iteration space partitioning goes in ENTRY_BB. */
3276 gsi = gsi_last_bb (entry_bb);
3277 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3278
3279 if (fd->collapse > 1)
3280 {
3281 int first_zero_iter = -1, dummy = -1;
3282 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3283
3284 counts = XALLOCAVEC (tree, fd->collapse);
3285 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3286 fin_bb, first_zero_iter,
3287 dummy_bb, dummy, l2_dom_bb);
3288 t = NULL_TREE;
3289 }
3290 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3291 t = integer_one_node;
3292 else
3293 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3294 fold_convert (type, fd->loop.n1),
3295 fold_convert (type, fd->loop.n2));
3296 if (fd->collapse == 1
3297 && TYPE_UNSIGNED (type)
3298 && (t == NULL_TREE || !integer_onep (t)))
3299 {
3300 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3301 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3302 true, GSI_SAME_STMT);
3303 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3304 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3305 true, GSI_SAME_STMT);
3306 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3307 NULL_TREE, NULL_TREE);
3308 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3309 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3310 expand_omp_regimplify_p, NULL, NULL)
3311 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3312 expand_omp_regimplify_p, NULL, NULL))
3313 {
3314 gsi = gsi_for_stmt (cond_stmt);
3315 gimple_regimplify_operands (cond_stmt, &gsi);
3316 }
3317 ep = split_block (entry_bb, cond_stmt);
3318 ep->flags = EDGE_TRUE_VALUE;
3319 entry_bb = ep->dest;
3320 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3321 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3322 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3323 if (gimple_in_ssa_p (cfun))
3324 {
3325 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3326 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3327 !gsi_end_p (gpi); gsi_next (&gpi))
3328 {
3329 gphi *phi = gpi.phi ();
3330 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3331 ep, UNKNOWN_LOCATION);
3332 }
3333 }
3334 gsi = gsi_last_bb (entry_bb);
3335 }
3336
3337 switch (gimple_omp_for_kind (fd->for_stmt))
3338 {
3339 case GF_OMP_FOR_KIND_FOR:
3340 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3341 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3342 break;
3343 case GF_OMP_FOR_KIND_DISTRIBUTE:
3344 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3345 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3346 break;
3347 default:
3348 gcc_unreachable ();
3349 }
3350 nthreads = build_call_expr (nthreads, 0);
3351 nthreads = fold_convert (itype, nthreads);
3352 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3353 true, GSI_SAME_STMT);
3354 threadid = build_call_expr (threadid, 0);
3355 threadid = fold_convert (itype, threadid);
3356 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3357 true, GSI_SAME_STMT);
3358
3359 n1 = fd->loop.n1;
3360 n2 = fd->loop.n2;
3361 step = fd->loop.step;
3362 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3363 {
3364 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3365 OMP_CLAUSE__LOOPTEMP_);
3366 gcc_assert (innerc);
3367 n1 = OMP_CLAUSE_DECL (innerc);
3368 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3369 OMP_CLAUSE__LOOPTEMP_);
3370 gcc_assert (innerc);
3371 n2 = OMP_CLAUSE_DECL (innerc);
3372 }
3373 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3374 true, NULL_TREE, true, GSI_SAME_STMT);
3375 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3376 true, NULL_TREE, true, GSI_SAME_STMT);
3377 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3378 true, NULL_TREE, true, GSI_SAME_STMT);
3379
3380 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3381 t = fold_build2 (PLUS_EXPR, itype, step, t);
3382 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3383 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3384 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3385 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3386 fold_build1 (NEGATE_EXPR, itype, t),
3387 fold_build1 (NEGATE_EXPR, itype, step));
3388 else
3389 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3390 t = fold_convert (itype, t);
3391 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3392
3393 q = create_tmp_reg (itype, "q");
3394 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3395 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3396 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3397
3398 tt = create_tmp_reg (itype, "tt");
3399 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3400 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3401 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3402
3403 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3404 gcond *cond_stmt = gimple_build_cond_empty (t);
3405 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3406
3407 second_bb = split_block (entry_bb, cond_stmt)->dest;
3408 gsi = gsi_last_bb (second_bb);
3409 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3410
3411 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3412 GSI_SAME_STMT);
3413 gassign *assign_stmt
3414 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3415 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3416
3417 third_bb = split_block (second_bb, assign_stmt)->dest;
3418 gsi = gsi_last_bb (third_bb);
3419 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3420
3421 t = build2 (MULT_EXPR, itype, q, threadid);
3422 t = build2 (PLUS_EXPR, itype, t, tt);
3423 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3424
3425 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3426 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3427
3428 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3429 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3430
3431 /* Remove the GIMPLE_OMP_FOR statement. */
3432 gsi_remove (&gsi, true);
3433
3434 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3435 gsi = gsi_start_bb (seq_start_bb);
3436
3437 tree startvar = fd->loop.v;
3438 tree endvar = NULL_TREE;
3439
3440 if (gimple_omp_for_combined_p (fd->for_stmt))
3441 {
3442 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3443 ? gimple_omp_parallel_clauses (inner_stmt)
3444 : gimple_omp_for_clauses (inner_stmt);
3445 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3446 gcc_assert (innerc);
3447 startvar = OMP_CLAUSE_DECL (innerc);
3448 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3449 OMP_CLAUSE__LOOPTEMP_);
3450 gcc_assert (innerc);
3451 endvar = OMP_CLAUSE_DECL (innerc);
3452 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3453 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3454 {
3455 int i;
3456 for (i = 1; i < fd->collapse; i++)
3457 {
3458 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3459 OMP_CLAUSE__LOOPTEMP_);
3460 gcc_assert (innerc);
3461 }
3462 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3463 OMP_CLAUSE__LOOPTEMP_);
3464 if (innerc)
3465 {
3466 /* If needed (distribute parallel for with lastprivate),
3467 propagate down the total number of iterations. */
3468 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3469 fd->loop.n2);
3470 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3471 GSI_CONTINUE_LINKING);
3472 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3473 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3474 }
3475 }
3476 }
3477 t = fold_convert (itype, s0);
3478 t = fold_build2 (MULT_EXPR, itype, t, step);
3479 if (POINTER_TYPE_P (type))
3480 t = fold_build_pointer_plus (n1, t);
3481 else
3482 t = fold_build2 (PLUS_EXPR, type, t, n1);
3483 t = fold_convert (TREE_TYPE (startvar), t);
3484 t = force_gimple_operand_gsi (&gsi, t,
3485 DECL_P (startvar)
3486 && TREE_ADDRESSABLE (startvar),
3487 NULL_TREE, false, GSI_CONTINUE_LINKING);
3488 assign_stmt = gimple_build_assign (startvar, t);
3489 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3490
3491 t = fold_convert (itype, e0);
3492 t = fold_build2 (MULT_EXPR, itype, t, step);
3493 if (POINTER_TYPE_P (type))
3494 t = fold_build_pointer_plus (n1, t);
3495 else
3496 t = fold_build2 (PLUS_EXPR, type, t, n1);
3497 t = fold_convert (TREE_TYPE (startvar), t);
3498 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3499 false, GSI_CONTINUE_LINKING);
3500 if (endvar)
3501 {
3502 assign_stmt = gimple_build_assign (endvar, e);
3503 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3504 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3505 assign_stmt = gimple_build_assign (fd->loop.v, e);
3506 else
3507 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3509 }
3510 /* Handle linear clause adjustments. */
3511 tree itercnt = NULL_TREE;
3512 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3513 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3514 c; c = OMP_CLAUSE_CHAIN (c))
3515 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3516 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3517 {
3518 tree d = OMP_CLAUSE_DECL (c);
3519 bool is_ref = omp_is_reference (d);
3520 tree t = d, a, dest;
3521 if (is_ref)
3522 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3523 if (itercnt == NULL_TREE)
3524 {
3525 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3526 {
3527 itercnt = fold_build2 (MINUS_EXPR, itype,
3528 fold_convert (itype, n1),
3529 fold_convert (itype, fd->loop.n1));
3530 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3531 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3532 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3533 NULL_TREE, false,
3534 GSI_CONTINUE_LINKING);
3535 }
3536 else
3537 itercnt = s0;
3538 }
3539 tree type = TREE_TYPE (t);
3540 if (POINTER_TYPE_P (type))
3541 type = sizetype;
3542 a = fold_build2 (MULT_EXPR, type,
3543 fold_convert (type, itercnt),
3544 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3545 dest = unshare_expr (t);
3546 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3547 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3548 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3549 false, GSI_CONTINUE_LINKING);
3550 assign_stmt = gimple_build_assign (dest, t);
3551 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3552 }
3553 if (fd->collapse > 1)
3554 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3555
3556 if (!broken_loop)
3557 {
3558 /* The code controlling the sequential loop replaces the
3559 GIMPLE_OMP_CONTINUE. */
3560 gsi = gsi_last_bb (cont_bb);
3561 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3562 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3563 vmain = gimple_omp_continue_control_use (cont_stmt);
3564 vback = gimple_omp_continue_control_def (cont_stmt);
3565
3566 if (!gimple_omp_for_combined_p (fd->for_stmt))
3567 {
3568 if (POINTER_TYPE_P (type))
3569 t = fold_build_pointer_plus (vmain, step);
3570 else
3571 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3572 t = force_gimple_operand_gsi (&gsi, t,
3573 DECL_P (vback)
3574 && TREE_ADDRESSABLE (vback),
3575 NULL_TREE, true, GSI_SAME_STMT);
3576 assign_stmt = gimple_build_assign (vback, t);
3577 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3578
3579 t = build2 (fd->loop.cond_code, boolean_type_node,
3580 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3581 ? t : vback, e);
3582 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3583 }
3584
3585 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3586 gsi_remove (&gsi, true);
3587
3588 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3589 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3590 }
3591
3592 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3593 gsi = gsi_last_bb (exit_bb);
3594 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3595 {
3596 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3597 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3598 }
3599 gsi_remove (&gsi, true);
3600
3601 /* Connect all the blocks. */
3602 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3603 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3604 ep = find_edge (entry_bb, second_bb);
3605 ep->flags = EDGE_TRUE_VALUE;
3606 ep->probability = REG_BR_PROB_BASE / 4;
3607 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3608 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3609
3610 if (!broken_loop)
3611 {
3612 ep = find_edge (cont_bb, body_bb);
3613 if (ep == NULL)
3614 {
3615 ep = BRANCH_EDGE (cont_bb);
3616 gcc_assert (single_succ (ep->dest) == body_bb);
3617 }
3618 if (gimple_omp_for_combined_p (fd->for_stmt))
3619 {
3620 remove_edge (ep);
3621 ep = NULL;
3622 }
3623 else if (fd->collapse > 1)
3624 {
3625 remove_edge (ep);
3626 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3627 }
3628 else
3629 ep->flags = EDGE_TRUE_VALUE;
3630 find_edge (cont_bb, fin_bb)->flags
3631 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3632 }
3633
3634 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3635 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3636 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3637
3638 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3639 recompute_dominator (CDI_DOMINATORS, body_bb));
3640 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3641 recompute_dominator (CDI_DOMINATORS, fin_bb));
3642
3643 struct loop *loop = body_bb->loop_father;
3644 if (loop != entry_bb->loop_father)
3645 {
3646 gcc_assert (broken_loop || loop->header == body_bb);
3647 gcc_assert (broken_loop
3648 || loop->latch == region->cont
3649 || single_pred (loop->latch) == region->cont);
3650 return;
3651 }
3652
3653 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3654 {
3655 loop = alloc_loop ();
3656 loop->header = body_bb;
3657 if (collapse_bb == NULL)
3658 loop->latch = cont_bb;
3659 add_loop (loop, body_bb->loop_father);
3660 }
3661}
3662
3663/* Return phi in E->DEST with ARG on edge E. */
3664
3665static gphi *
3666find_phi_with_arg_on_edge (tree arg, edge e)
3667{
3668 basic_block bb = e->dest;
3669
3670 for (gphi_iterator gpi = gsi_start_phis (bb);
3671 !gsi_end_p (gpi);
3672 gsi_next (&gpi))
3673 {
3674 gphi *phi = gpi.phi ();
3675 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3676 return phi;
3677 }
3678
3679 return NULL;
3680}
3681
3682/* A subroutine of expand_omp_for. Generate code for a parallel
3683 loop with static schedule and a specified chunk size. Given
3684 parameters:
3685
3686 for (V = N1; V cond N2; V += STEP) BODY;
3687
3688 where COND is "<" or ">", we generate pseudocode
3689
3690 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3691 if (cond is <)
3692 adj = STEP - 1;
3693 else
3694 adj = STEP + 1;
3695 if ((__typeof (V)) -1 > 0 && cond is >)
3696 n = -(adj + N2 - N1) / -STEP;
3697 else
3698 n = (adj + N2 - N1) / STEP;
3699 trip = 0;
3700 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3701 here so that V is defined
3702 if the loop is not entered
3703 L0:
3704 s0 = (trip * nthreads + threadid) * CHUNK;
01914336 3705 e0 = min (s0 + CHUNK, n);
629b3d75
MJ
3706 if (s0 < n) goto L1; else goto L4;
3707 L1:
3708 V = s0 * STEP + N1;
3709 e = e0 * STEP + N1;
3710 L2:
3711 BODY;
3712 V += STEP;
3713 if (V cond e) goto L2; else goto L3;
3714 L3:
3715 trip += 1;
3716 goto L0;
3717 L4:
3718*/
3719
3720static void
3721expand_omp_for_static_chunk (struct omp_region *region,
3722 struct omp_for_data *fd, gimple *inner_stmt)
3723{
3724 tree n, s0, e0, e, t;
3725 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3726 tree type, itype, vmain, vback, vextra;
3727 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3728 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3729 gimple_stmt_iterator gsi;
3730 edge se;
3731 bool broken_loop = region->cont == NULL;
3732 tree *counts = NULL;
3733 tree n1, n2, step;
3734
3735 itype = type = TREE_TYPE (fd->loop.v);
3736 if (POINTER_TYPE_P (type))
3737 itype = signed_type_for (type);
3738
3739 entry_bb = region->entry;
3740 se = split_block (entry_bb, last_stmt (entry_bb));
3741 entry_bb = se->src;
3742 iter_part_bb = se->dest;
3743 cont_bb = region->cont;
3744 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3745 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3746 gcc_assert (broken_loop
3747 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3748 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3749 body_bb = single_succ (seq_start_bb);
3750 if (!broken_loop)
3751 {
3752 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3753 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3754 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3755 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3756 }
3757 exit_bb = region->exit;
3758
3759 /* Trip and adjustment setup goes in ENTRY_BB. */
3760 gsi = gsi_last_bb (entry_bb);
3761 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3762
3763 if (fd->collapse > 1)
3764 {
3765 int first_zero_iter = -1, dummy = -1;
3766 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3767
3768 counts = XALLOCAVEC (tree, fd->collapse);
3769 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3770 fin_bb, first_zero_iter,
3771 dummy_bb, dummy, l2_dom_bb);
3772 t = NULL_TREE;
3773 }
3774 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3775 t = integer_one_node;
3776 else
3777 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3778 fold_convert (type, fd->loop.n1),
3779 fold_convert (type, fd->loop.n2));
3780 if (fd->collapse == 1
3781 && TYPE_UNSIGNED (type)
3782 && (t == NULL_TREE || !integer_onep (t)))
3783 {
3784 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3785 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3786 true, GSI_SAME_STMT);
3787 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3788 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3789 true, GSI_SAME_STMT);
3790 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3791 NULL_TREE, NULL_TREE);
3792 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3793 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3794 expand_omp_regimplify_p, NULL, NULL)
3795 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3796 expand_omp_regimplify_p, NULL, NULL))
3797 {
3798 gsi = gsi_for_stmt (cond_stmt);
3799 gimple_regimplify_operands (cond_stmt, &gsi);
3800 }
3801 se = split_block (entry_bb, cond_stmt);
3802 se->flags = EDGE_TRUE_VALUE;
3803 entry_bb = se->dest;
3804 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3805 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3806 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3807 if (gimple_in_ssa_p (cfun))
3808 {
3809 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3810 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3811 !gsi_end_p (gpi); gsi_next (&gpi))
3812 {
3813 gphi *phi = gpi.phi ();
3814 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3815 se, UNKNOWN_LOCATION);
3816 }
3817 }
3818 gsi = gsi_last_bb (entry_bb);
3819 }
3820
3821 switch (gimple_omp_for_kind (fd->for_stmt))
3822 {
3823 case GF_OMP_FOR_KIND_FOR:
3824 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3825 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3826 break;
3827 case GF_OMP_FOR_KIND_DISTRIBUTE:
3828 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3829 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3830 break;
3831 default:
3832 gcc_unreachable ();
3833 }
3834 nthreads = build_call_expr (nthreads, 0);
3835 nthreads = fold_convert (itype, nthreads);
3836 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3837 true, GSI_SAME_STMT);
3838 threadid = build_call_expr (threadid, 0);
3839 threadid = fold_convert (itype, threadid);
3840 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3841 true, GSI_SAME_STMT);
3842
3843 n1 = fd->loop.n1;
3844 n2 = fd->loop.n2;
3845 step = fd->loop.step;
3846 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3847 {
3848 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3849 OMP_CLAUSE__LOOPTEMP_);
3850 gcc_assert (innerc);
3851 n1 = OMP_CLAUSE_DECL (innerc);
3852 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3853 OMP_CLAUSE__LOOPTEMP_);
3854 gcc_assert (innerc);
3855 n2 = OMP_CLAUSE_DECL (innerc);
3856 }
3857 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3858 true, NULL_TREE, true, GSI_SAME_STMT);
3859 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3860 true, NULL_TREE, true, GSI_SAME_STMT);
3861 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3862 true, NULL_TREE, true, GSI_SAME_STMT);
3863 tree chunk_size = fold_convert (itype, fd->chunk_size);
3864 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3865 chunk_size
3866 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3867 GSI_SAME_STMT);
3868
3869 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3870 t = fold_build2 (PLUS_EXPR, itype, step, t);
3871 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3872 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3873 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3874 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3875 fold_build1 (NEGATE_EXPR, itype, t),
3876 fold_build1 (NEGATE_EXPR, itype, step));
3877 else
3878 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3879 t = fold_convert (itype, t);
3880 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3881 true, GSI_SAME_STMT);
3882
3883 trip_var = create_tmp_reg (itype, ".trip");
3884 if (gimple_in_ssa_p (cfun))
3885 {
3886 trip_init = make_ssa_name (trip_var);
3887 trip_main = make_ssa_name (trip_var);
3888 trip_back = make_ssa_name (trip_var);
3889 }
3890 else
3891 {
3892 trip_init = trip_var;
3893 trip_main = trip_var;
3894 trip_back = trip_var;
3895 }
3896
3897 gassign *assign_stmt
3898 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3899 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3900
3901 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3902 t = fold_build2 (MULT_EXPR, itype, t, step);
3903 if (POINTER_TYPE_P (type))
3904 t = fold_build_pointer_plus (n1, t);
3905 else
3906 t = fold_build2 (PLUS_EXPR, type, t, n1);
3907 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3908 true, GSI_SAME_STMT);
3909
3910 /* Remove the GIMPLE_OMP_FOR. */
3911 gsi_remove (&gsi, true);
3912
3913 gimple_stmt_iterator gsif = gsi;
3914
3915 /* Iteration space partitioning goes in ITER_PART_BB. */
3916 gsi = gsi_last_bb (iter_part_bb);
3917
3918 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3919 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3920 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3921 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3922 false, GSI_CONTINUE_LINKING);
3923
3924 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3925 t = fold_build2 (MIN_EXPR, itype, t, n);
3926 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3927 false, GSI_CONTINUE_LINKING);
3928
3929 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3930 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3931
3932 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3933 gsi = gsi_start_bb (seq_start_bb);
3934
3935 tree startvar = fd->loop.v;
3936 tree endvar = NULL_TREE;
3937
3938 if (gimple_omp_for_combined_p (fd->for_stmt))
3939 {
3940 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3941 ? gimple_omp_parallel_clauses (inner_stmt)
3942 : gimple_omp_for_clauses (inner_stmt);
3943 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3944 gcc_assert (innerc);
3945 startvar = OMP_CLAUSE_DECL (innerc);
3946 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3947 OMP_CLAUSE__LOOPTEMP_);
3948 gcc_assert (innerc);
3949 endvar = OMP_CLAUSE_DECL (innerc);
3950 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3951 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3952 {
3953 int i;
3954 for (i = 1; i < fd->collapse; i++)
3955 {
3956 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3957 OMP_CLAUSE__LOOPTEMP_);
3958 gcc_assert (innerc);
3959 }
3960 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3961 OMP_CLAUSE__LOOPTEMP_);
3962 if (innerc)
3963 {
3964 /* If needed (distribute parallel for with lastprivate),
3965 propagate down the total number of iterations. */
3966 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3967 fd->loop.n2);
3968 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3969 GSI_CONTINUE_LINKING);
3970 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3971 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3972 }
3973 }
3974 }
3975
3976 t = fold_convert (itype, s0);
3977 t = fold_build2 (MULT_EXPR, itype, t, step);
3978 if (POINTER_TYPE_P (type))
3979 t = fold_build_pointer_plus (n1, t);
3980 else
3981 t = fold_build2 (PLUS_EXPR, type, t, n1);
3982 t = fold_convert (TREE_TYPE (startvar), t);
3983 t = force_gimple_operand_gsi (&gsi, t,
3984 DECL_P (startvar)
3985 && TREE_ADDRESSABLE (startvar),
3986 NULL_TREE, false, GSI_CONTINUE_LINKING);
3987 assign_stmt = gimple_build_assign (startvar, t);
3988 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3989
3990 t = fold_convert (itype, e0);
3991 t = fold_build2 (MULT_EXPR, itype, t, step);
3992 if (POINTER_TYPE_P (type))
3993 t = fold_build_pointer_plus (n1, t);
3994 else
3995 t = fold_build2 (PLUS_EXPR, type, t, n1);
3996 t = fold_convert (TREE_TYPE (startvar), t);
3997 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3998 false, GSI_CONTINUE_LINKING);
3999 if (endvar)
4000 {
4001 assign_stmt = gimple_build_assign (endvar, e);
4002 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4003 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4004 assign_stmt = gimple_build_assign (fd->loop.v, e);
4005 else
4006 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4007 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4008 }
4009 /* Handle linear clause adjustments. */
4010 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4011 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4012 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4013 c; c = OMP_CLAUSE_CHAIN (c))
4014 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4015 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4016 {
4017 tree d = OMP_CLAUSE_DECL (c);
4018 bool is_ref = omp_is_reference (d);
4019 tree t = d, a, dest;
4020 if (is_ref)
4021 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4022 tree type = TREE_TYPE (t);
4023 if (POINTER_TYPE_P (type))
4024 type = sizetype;
4025 dest = unshare_expr (t);
4026 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4027 expand_omp_build_assign (&gsif, v, t);
4028 if (itercnt == NULL_TREE)
4029 {
4030 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4031 {
4032 itercntbias
4033 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4034 fold_convert (itype, fd->loop.n1));
4035 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4036 itercntbias, step);
4037 itercntbias
4038 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4039 NULL_TREE, true,
4040 GSI_SAME_STMT);
4041 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4042 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4043 NULL_TREE, false,
4044 GSI_CONTINUE_LINKING);
4045 }
4046 else
4047 itercnt = s0;
4048 }
4049 a = fold_build2 (MULT_EXPR, type,
4050 fold_convert (type, itercnt),
4051 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4052 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4053 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4054 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4055 false, GSI_CONTINUE_LINKING);
4056 assign_stmt = gimple_build_assign (dest, t);
4057 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4058 }
4059 if (fd->collapse > 1)
4060 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4061
4062 if (!broken_loop)
4063 {
4064 /* The code controlling the sequential loop goes in CONT_BB,
4065 replacing the GIMPLE_OMP_CONTINUE. */
4066 gsi = gsi_last_bb (cont_bb);
4067 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4068 vmain = gimple_omp_continue_control_use (cont_stmt);
4069 vback = gimple_omp_continue_control_def (cont_stmt);
4070
4071 if (!gimple_omp_for_combined_p (fd->for_stmt))
4072 {
4073 if (POINTER_TYPE_P (type))
4074 t = fold_build_pointer_plus (vmain, step);
4075 else
4076 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4077 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4078 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4079 true, GSI_SAME_STMT);
4080 assign_stmt = gimple_build_assign (vback, t);
4081 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4082
4083 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4084 t = build2 (EQ_EXPR, boolean_type_node,
4085 build_int_cst (itype, 0),
4086 build_int_cst (itype, 1));
4087 else
4088 t = build2 (fd->loop.cond_code, boolean_type_node,
4089 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4090 ? t : vback, e);
4091 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4092 }
4093
4094 /* Remove GIMPLE_OMP_CONTINUE. */
4095 gsi_remove (&gsi, true);
4096
4097 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4098 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4099
4100 /* Trip update code goes into TRIP_UPDATE_BB. */
4101 gsi = gsi_start_bb (trip_update_bb);
4102
4103 t = build_int_cst (itype, 1);
4104 t = build2 (PLUS_EXPR, itype, trip_main, t);
4105 assign_stmt = gimple_build_assign (trip_back, t);
4106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4107 }
4108
4109 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4110 gsi = gsi_last_bb (exit_bb);
4111 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4112 {
4113 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4114 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4115 }
4116 gsi_remove (&gsi, true);
4117
4118 /* Connect the new blocks. */
4119 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4120 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4121
4122 if (!broken_loop)
4123 {
4124 se = find_edge (cont_bb, body_bb);
4125 if (se == NULL)
4126 {
4127 se = BRANCH_EDGE (cont_bb);
4128 gcc_assert (single_succ (se->dest) == body_bb);
4129 }
4130 if (gimple_omp_for_combined_p (fd->for_stmt))
4131 {
4132 remove_edge (se);
4133 se = NULL;
4134 }
4135 else if (fd->collapse > 1)
4136 {
4137 remove_edge (se);
4138 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4139 }
4140 else
4141 se->flags = EDGE_TRUE_VALUE;
4142 find_edge (cont_bb, trip_update_bb)->flags
4143 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4144
01914336
MJ
4145 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4146 iter_part_bb);
629b3d75
MJ
4147 }
4148
4149 if (gimple_in_ssa_p (cfun))
4150 {
4151 gphi_iterator psi;
4152 gphi *phi;
4153 edge re, ene;
4154 edge_var_map *vm;
4155 size_t i;
4156
4157 gcc_assert (fd->collapse == 1 && !broken_loop);
4158
4159 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4160 remove arguments of the phi nodes in fin_bb. We need to create
4161 appropriate phi nodes in iter_part_bb instead. */
4162 se = find_edge (iter_part_bb, fin_bb);
4163 re = single_succ_edge (trip_update_bb);
4164 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4165 ene = single_succ_edge (entry_bb);
4166
4167 psi = gsi_start_phis (fin_bb);
4168 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4169 gsi_next (&psi), ++i)
4170 {
4171 gphi *nphi;
4172 source_location locus;
4173
4174 phi = psi.phi ();
4175 t = gimple_phi_result (phi);
4176 gcc_assert (t == redirect_edge_var_map_result (vm));
4177
4178 if (!single_pred_p (fin_bb))
4179 t = copy_ssa_name (t, phi);
4180
4181 nphi = create_phi_node (t, iter_part_bb);
4182
4183 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4184 locus = gimple_phi_arg_location_from_edge (phi, se);
4185
4186 /* A special case -- fd->loop.v is not yet computed in
4187 iter_part_bb, we need to use vextra instead. */
4188 if (t == fd->loop.v)
4189 t = vextra;
4190 add_phi_arg (nphi, t, ene, locus);
4191 locus = redirect_edge_var_map_location (vm);
4192 tree back_arg = redirect_edge_var_map_def (vm);
4193 add_phi_arg (nphi, back_arg, re, locus);
4194 edge ce = find_edge (cont_bb, body_bb);
4195 if (ce == NULL)
4196 {
4197 ce = BRANCH_EDGE (cont_bb);
4198 gcc_assert (single_succ (ce->dest) == body_bb);
4199 ce = single_succ_edge (ce->dest);
4200 }
4201 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4202 gcc_assert (inner_loop_phi != NULL);
4203 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4204 find_edge (seq_start_bb, body_bb), locus);
4205
4206 if (!single_pred_p (fin_bb))
4207 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4208 }
4209 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4210 redirect_edge_var_map_clear (re);
4211 if (single_pred_p (fin_bb))
4212 while (1)
4213 {
4214 psi = gsi_start_phis (fin_bb);
4215 if (gsi_end_p (psi))
4216 break;
4217 remove_phi_node (&psi, false);
4218 }
4219
4220 /* Make phi node for trip. */
4221 phi = create_phi_node (trip_main, iter_part_bb);
4222 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4223 UNKNOWN_LOCATION);
4224 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4225 UNKNOWN_LOCATION);
4226 }
4227
4228 if (!broken_loop)
4229 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4230 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4231 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4232 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4233 recompute_dominator (CDI_DOMINATORS, fin_bb));
4234 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4235 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4236 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4237 recompute_dominator (CDI_DOMINATORS, body_bb));
4238
4239 if (!broken_loop)
4240 {
4241 struct loop *loop = body_bb->loop_father;
4242 struct loop *trip_loop = alloc_loop ();
4243 trip_loop->header = iter_part_bb;
4244 trip_loop->latch = trip_update_bb;
4245 add_loop (trip_loop, iter_part_bb->loop_father);
4246
4247 if (loop != entry_bb->loop_father)
4248 {
4249 gcc_assert (loop->header == body_bb);
4250 gcc_assert (loop->latch == region->cont
4251 || single_pred (loop->latch) == region->cont);
4252 trip_loop->inner = loop;
4253 return;
4254 }
4255
4256 if (!gimple_omp_for_combined_p (fd->for_stmt))
4257 {
4258 loop = alloc_loop ();
4259 loop->header = body_bb;
4260 if (collapse_bb == NULL)
4261 loop->latch = cont_bb;
4262 add_loop (loop, trip_loop);
4263 }
4264 }
4265}
4266
4267/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4268 Given parameters:
4269 for (V = N1; V cond N2; V += STEP) BODY;
4270
4271 where COND is "<" or ">" or "!=", we generate pseudocode
4272
4273 for (ind_var = low; ind_var < high; ind_var++)
4274 {
4275 V = n1 + (ind_var * STEP)
4276
4277 <BODY>
4278 }
4279
4280 In the above pseudocode, low and high are function parameters of the
4281 child function. In the function below, we are inserting a temp.
4282 variable that will be making a call to two OMP functions that will not be
4283 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4284 with _Cilk_for). These functions are replaced with low and high
4285 by the function that handles taskreg. */
4286
4287
4288static void
4289expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4290{
4291 bool broken_loop = region->cont == NULL;
4292 basic_block entry_bb = region->entry;
4293 basic_block cont_bb = region->cont;
4294
4295 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4296 gcc_assert (broken_loop
4297 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4298 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4299 basic_block l1_bb, l2_bb;
4300
4301 if (!broken_loop)
4302 {
4303 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4304 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4305 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4306 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4307 }
4308 else
4309 {
4310 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4311 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4312 l2_bb = single_succ (l1_bb);
4313 }
4314 basic_block exit_bb = region->exit;
4315 basic_block l2_dom_bb = NULL;
4316
4317 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4318
4319 /* Below statements until the "tree high_val = ..." are pseudo statements
4320 used to pass information to be used by expand_omp_taskreg.
4321 low_val and high_val will be replaced by the __low and __high
4322 parameter from the child function.
4323
4324 The call_exprs part is a place-holder, it is mainly used
4325 to distinctly identify to the top-level part that this is
4326 where we should put low and high (reasoning given in header
4327 comment). */
4328
01914336
MJ
4329 gomp_parallel *par_stmt
4330 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4331 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
629b3d75
MJ
4332 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4333 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4334 {
4335 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4336 high_val = t;
4337 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4338 low_val = t;
4339 }
4340 gcc_assert (low_val && high_val);
4341
4342 tree type = TREE_TYPE (low_val);
4343 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4344 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4345
4346 /* Not needed in SSA form right now. */
4347 gcc_assert (!gimple_in_ssa_p (cfun));
4348 if (l2_dom_bb == NULL)
4349 l2_dom_bb = l1_bb;
4350
4351 tree n1 = low_val;
4352 tree n2 = high_val;
4353
4354 gimple *stmt = gimple_build_assign (ind_var, n1);
4355
4356 /* Replace the GIMPLE_OMP_FOR statement. */
4357 gsi_replace (&gsi, stmt, true);
4358
4359 if (!broken_loop)
4360 {
4361 /* Code to control the increment goes in the CONT_BB. */
4362 gsi = gsi_last_bb (cont_bb);
4363 stmt = gsi_stmt (gsi);
4364 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4365 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4366 build_one_cst (type));
4367
4368 /* Replace GIMPLE_OMP_CONTINUE. */
4369 gsi_replace (&gsi, stmt, true);
4370 }
4371
4372 /* Emit the condition in L1_BB. */
4373 gsi = gsi_after_labels (l1_bb);
4374 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4375 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4376 fd->loop.step);
4377 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4378 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4379 fd->loop.n1, fold_convert (sizetype, t));
4380 else
4381 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4382 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4383 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4384 expand_omp_build_assign (&gsi, fd->loop.v, t);
4385
4386 /* The condition is always '<' since the runtime will fill in the low
4387 and high values. */
4388 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4389 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4390
4391 /* Remove GIMPLE_OMP_RETURN. */
4392 gsi = gsi_last_bb (exit_bb);
4393 gsi_remove (&gsi, true);
4394
4395 /* Connect the new blocks. */
4396 remove_edge (FALLTHRU_EDGE (entry_bb));
4397
4398 edge e, ne;
4399 if (!broken_loop)
4400 {
4401 remove_edge (BRANCH_EDGE (entry_bb));
4402 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4403
4404 e = BRANCH_EDGE (l1_bb);
4405 ne = FALLTHRU_EDGE (l1_bb);
4406 e->flags = EDGE_TRUE_VALUE;
4407 }
4408 else
4409 {
4410 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4411
4412 ne = single_succ_edge (l1_bb);
4413 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4414
4415 }
4416 ne->flags = EDGE_FALSE_VALUE;
4417 e->probability = REG_BR_PROB_BASE * 7 / 8;
4418 ne->probability = REG_BR_PROB_BASE / 8;
4419
4420 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4421 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4422 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4423
4424 if (!broken_loop)
4425 {
4426 struct loop *loop = alloc_loop ();
4427 loop->header = l1_bb;
4428 loop->latch = cont_bb;
4429 add_loop (loop, l1_bb->loop_father);
4430 loop->safelen = INT_MAX;
4431 }
4432
4433 /* Pick the correct library function based on the precision of the
4434 induction variable type. */
4435 tree lib_fun = NULL_TREE;
4436 if (TYPE_PRECISION (type) == 32)
4437 lib_fun = cilk_for_32_fndecl;
4438 else if (TYPE_PRECISION (type) == 64)
4439 lib_fun = cilk_for_64_fndecl;
4440 else
4441 gcc_unreachable ();
4442
4443 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4444
4445 /* WS_ARGS contains the library function flavor to call:
4446 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4447 user-defined grain value. If the user does not define one, then zero
4448 is passed in by the parser. */
4449 vec_alloc (region->ws_args, 2);
4450 region->ws_args->quick_push (lib_fun);
4451 region->ws_args->quick_push (fd->chunk_size);
4452}
4453
4454/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4455 loop. Given parameters:
4456
4457 for (V = N1; V cond N2; V += STEP) BODY;
4458
4459 where COND is "<" or ">", we generate pseudocode
4460
4461 V = N1;
4462 goto L1;
4463 L0:
4464 BODY;
4465 V += STEP;
4466 L1:
4467 if (V cond N2) goto L0; else goto L2;
4468 L2:
4469
4470 For collapsed loops, given parameters:
4471 collapse(3)
4472 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4473 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4474 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4475 BODY;
4476
4477 we generate pseudocode
4478
4479 if (cond3 is <)
4480 adj = STEP3 - 1;
4481 else
4482 adj = STEP3 + 1;
4483 count3 = (adj + N32 - N31) / STEP3;
4484 if (cond2 is <)
4485 adj = STEP2 - 1;
4486 else
4487 adj = STEP2 + 1;
4488 count2 = (adj + N22 - N21) / STEP2;
4489 if (cond1 is <)
4490 adj = STEP1 - 1;
4491 else
4492 adj = STEP1 + 1;
4493 count1 = (adj + N12 - N11) / STEP1;
4494 count = count1 * count2 * count3;
4495 V = 0;
4496 V1 = N11;
4497 V2 = N21;
4498 V3 = N31;
4499 goto L1;
4500 L0:
4501 BODY;
4502 V += 1;
4503 V3 += STEP3;
4504 V2 += (V3 cond3 N32) ? 0 : STEP2;
4505 V3 = (V3 cond3 N32) ? V3 : N31;
4506 V1 += (V2 cond2 N22) ? 0 : STEP1;
4507 V2 = (V2 cond2 N22) ? V2 : N21;
4508 L1:
4509 if (V < count) goto L0; else goto L2;
4510 L2:
4511
4512 */
4513
4514static void
4515expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4516{
4517 tree type, t;
4518 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4519 gimple_stmt_iterator gsi;
4520 gimple *stmt;
4521 gcond *cond_stmt;
4522 bool broken_loop = region->cont == NULL;
4523 edge e, ne;
4524 tree *counts = NULL;
4525 int i;
4526 int safelen_int = INT_MAX;
4527 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4528 OMP_CLAUSE_SAFELEN);
4529 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4530 OMP_CLAUSE__SIMDUID_);
4531 tree n1, n2;
4532
4533 if (safelen)
4534 {
4535 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4536 if (TREE_CODE (safelen) != INTEGER_CST)
4537 safelen_int = 0;
4538 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4539 safelen_int = tree_to_uhwi (safelen);
4540 if (safelen_int == 1)
4541 safelen_int = 0;
4542 }
4543 type = TREE_TYPE (fd->loop.v);
4544 entry_bb = region->entry;
4545 cont_bb = region->cont;
4546 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4547 gcc_assert (broken_loop
4548 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4549 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4550 if (!broken_loop)
4551 {
4552 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4553 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4554 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4555 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4556 }
4557 else
4558 {
4559 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4560 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4561 l2_bb = single_succ (l1_bb);
4562 }
4563 exit_bb = region->exit;
4564 l2_dom_bb = NULL;
4565
4566 gsi = gsi_last_bb (entry_bb);
4567
4568 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4569 /* Not needed in SSA form right now. */
4570 gcc_assert (!gimple_in_ssa_p (cfun));
4571 if (fd->collapse > 1)
4572 {
4573 int first_zero_iter = -1, dummy = -1;
4574 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4575
4576 counts = XALLOCAVEC (tree, fd->collapse);
4577 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4578 zero_iter_bb, first_zero_iter,
4579 dummy_bb, dummy, l2_dom_bb);
4580 }
4581 if (l2_dom_bb == NULL)
4582 l2_dom_bb = l1_bb;
4583
4584 n1 = fd->loop.n1;
4585 n2 = fd->loop.n2;
4586 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4587 {
4588 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4589 OMP_CLAUSE__LOOPTEMP_);
4590 gcc_assert (innerc);
4591 n1 = OMP_CLAUSE_DECL (innerc);
4592 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4593 OMP_CLAUSE__LOOPTEMP_);
4594 gcc_assert (innerc);
4595 n2 = OMP_CLAUSE_DECL (innerc);
4596 }
4597 tree step = fd->loop.step;
4598
4cea8675
AM
4599 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4600 OMP_CLAUSE__SIMT_);
629b3d75
MJ
4601 if (is_simt)
4602 {
4603 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4cea8675
AM
4604 is_simt = safelen_int > 1;
4605 }
4606 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4607 if (is_simt)
4608 {
629b3d75
MJ
4609 simt_lane = create_tmp_var (unsigned_type_node);
4610 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4611 gimple_call_set_lhs (g, simt_lane);
4612 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4613 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4614 fold_convert (TREE_TYPE (step), simt_lane));
4615 n1 = fold_convert (type, n1);
4616 if (POINTER_TYPE_P (type))
4617 n1 = fold_build_pointer_plus (n1, offset);
4618 else
4619 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4620
4621 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4622 if (fd->collapse > 1)
4623 simt_maxlane = build_one_cst (unsigned_type_node);
4624 else if (safelen_int < omp_max_simt_vf ())
4625 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4626 tree vf
4627 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4628 unsigned_type_node, 0);
4629 if (simt_maxlane)
4630 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4631 vf = fold_convert (TREE_TYPE (step), vf);
4632 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4633 }
4634
4635 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4636 if (fd->collapse > 1)
4637 {
4638 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4639 {
4640 gsi_prev (&gsi);
4641 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4642 gsi_next (&gsi);
4643 }
4644 else
4645 for (i = 0; i < fd->collapse; i++)
4646 {
4647 tree itype = TREE_TYPE (fd->loops[i].v);
4648 if (POINTER_TYPE_P (itype))
4649 itype = signed_type_for (itype);
4650 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4651 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4652 }
4653 }
4654
4655 /* Remove the GIMPLE_OMP_FOR statement. */
4656 gsi_remove (&gsi, true);
4657
4658 if (!broken_loop)
4659 {
4660 /* Code to control the increment goes in the CONT_BB. */
4661 gsi = gsi_last_bb (cont_bb);
4662 stmt = gsi_stmt (gsi);
4663 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4664
4665 if (POINTER_TYPE_P (type))
4666 t = fold_build_pointer_plus (fd->loop.v, step);
4667 else
4668 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4669 expand_omp_build_assign (&gsi, fd->loop.v, t);
4670
4671 if (fd->collapse > 1)
4672 {
4673 i = fd->collapse - 1;
4674 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4675 {
4676 t = fold_convert (sizetype, fd->loops[i].step);
4677 t = fold_build_pointer_plus (fd->loops[i].v, t);
4678 }
4679 else
4680 {
4681 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4682 fd->loops[i].step);
4683 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4684 fd->loops[i].v, t);
4685 }
4686 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4687
4688 for (i = fd->collapse - 1; i > 0; i--)
4689 {
4690 tree itype = TREE_TYPE (fd->loops[i].v);
4691 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4692 if (POINTER_TYPE_P (itype2))
4693 itype2 = signed_type_for (itype2);
4694 t = build3 (COND_EXPR, itype2,
4695 build2 (fd->loops[i].cond_code, boolean_type_node,
4696 fd->loops[i].v,
4697 fold_convert (itype, fd->loops[i].n2)),
4698 build_int_cst (itype2, 0),
4699 fold_convert (itype2, fd->loops[i - 1].step));
4700 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4701 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4702 else
4703 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4704 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4705
4706 t = build3 (COND_EXPR, itype,
4707 build2 (fd->loops[i].cond_code, boolean_type_node,
4708 fd->loops[i].v,
4709 fold_convert (itype, fd->loops[i].n2)),
4710 fd->loops[i].v,
4711 fold_convert (itype, fd->loops[i].n1));
4712 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4713 }
4714 }
4715
4716 /* Remove GIMPLE_OMP_CONTINUE. */
4717 gsi_remove (&gsi, true);
4718 }
4719
4720 /* Emit the condition in L1_BB. */
4721 gsi = gsi_start_bb (l1_bb);
4722
4723 t = fold_convert (type, n2);
4724 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4725 false, GSI_CONTINUE_LINKING);
4726 tree v = fd->loop.v;
4727 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4728 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4729 false, GSI_CONTINUE_LINKING);
4730 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4731 cond_stmt = gimple_build_cond_empty (t);
4732 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4733 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4734 NULL, NULL)
4735 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4736 NULL, NULL))
4737 {
4738 gsi = gsi_for_stmt (cond_stmt);
4739 gimple_regimplify_operands (cond_stmt, &gsi);
4740 }
4741
4742 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4743 if (is_simt)
4744 {
4745 gsi = gsi_start_bb (l2_bb);
4746 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4747 if (POINTER_TYPE_P (type))
4748 t = fold_build_pointer_plus (fd->loop.v, step);
4749 else
4750 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4751 expand_omp_build_assign (&gsi, fd->loop.v, t);
4752 }
4753
4754 /* Remove GIMPLE_OMP_RETURN. */
4755 gsi = gsi_last_bb (exit_bb);
4756 gsi_remove (&gsi, true);
4757
4758 /* Connect the new blocks. */
4759 remove_edge (FALLTHRU_EDGE (entry_bb));
4760
4761 if (!broken_loop)
4762 {
4763 remove_edge (BRANCH_EDGE (entry_bb));
4764 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4765
4766 e = BRANCH_EDGE (l1_bb);
4767 ne = FALLTHRU_EDGE (l1_bb);
4768 e->flags = EDGE_TRUE_VALUE;
4769 }
4770 else
4771 {
4772 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4773
4774 ne = single_succ_edge (l1_bb);
4775 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4776
4777 }
4778 ne->flags = EDGE_FALSE_VALUE;
4779 e->probability = REG_BR_PROB_BASE * 7 / 8;
4780 ne->probability = REG_BR_PROB_BASE / 8;
4781
4782 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4783 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4784
4785 if (simt_maxlane)
4786 {
4787 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4788 NULL_TREE, NULL_TREE);
4789 gsi = gsi_last_bb (entry_bb);
4790 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4791 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4792 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4793 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4794 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4795 l2_dom_bb = entry_bb;
4796 }
4797 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4798
4799 if (!broken_loop)
4800 {
4801 struct loop *loop = alloc_loop ();
4802 loop->header = l1_bb;
4803 loop->latch = cont_bb;
4804 add_loop (loop, l1_bb->loop_father);
4805 loop->safelen = safelen_int;
4806 if (simduid)
4807 {
4808 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4809 cfun->has_simduid_loops = true;
4810 }
4811 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4812 the loop. */
4813 if ((flag_tree_loop_vectorize
4814 || (!global_options_set.x_flag_tree_loop_vectorize
01914336 4815 && !global_options_set.x_flag_tree_vectorize))
629b3d75
MJ
4816 && flag_tree_loop_optimize
4817 && loop->safelen > 1)
4818 {
4819 loop->force_vectorize = true;
4820 cfun->has_force_vectorize_loops = true;
4821 }
4822 }
4823 else if (simduid)
4824 cfun->has_simduid_loops = true;
4825}
4826
4827/* Taskloop construct is represented after gimplification with
4828 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4829 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4830 which should just compute all the needed loop temporaries
4831 for GIMPLE_OMP_TASK. */
4832
4833static void
4834expand_omp_taskloop_for_outer (struct omp_region *region,
4835 struct omp_for_data *fd,
4836 gimple *inner_stmt)
4837{
4838 tree type, bias = NULL_TREE;
4839 basic_block entry_bb, cont_bb, exit_bb;
4840 gimple_stmt_iterator gsi;
4841 gassign *assign_stmt;
4842 tree *counts = NULL;
4843 int i;
4844
4845 gcc_assert (inner_stmt);
4846 gcc_assert (region->cont);
4847 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4848 && gimple_omp_task_taskloop_p (inner_stmt));
4849 type = TREE_TYPE (fd->loop.v);
4850
4851 /* See if we need to bias by LLONG_MIN. */
4852 if (fd->iter_type == long_long_unsigned_type_node
4853 && TREE_CODE (type) == INTEGER_TYPE
4854 && !TYPE_UNSIGNED (type))
4855 {
4856 tree n1, n2;
4857
4858 if (fd->loop.cond_code == LT_EXPR)
4859 {
4860 n1 = fd->loop.n1;
4861 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4862 }
4863 else
4864 {
4865 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4866 n2 = fd->loop.n1;
4867 }
4868 if (TREE_CODE (n1) != INTEGER_CST
4869 || TREE_CODE (n2) != INTEGER_CST
4870 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4871 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4872 }
4873
4874 entry_bb = region->entry;
4875 cont_bb = region->cont;
4876 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4877 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4878 exit_bb = region->exit;
4879
4880 gsi = gsi_last_bb (entry_bb);
4881 gimple *for_stmt = gsi_stmt (gsi);
4882 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4883 if (fd->collapse > 1)
4884 {
4885 int first_zero_iter = -1, dummy = -1;
4886 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4887
4888 counts = XALLOCAVEC (tree, fd->collapse);
4889 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4890 zero_iter_bb, first_zero_iter,
4891 dummy_bb, dummy, l2_dom_bb);
4892
4893 if (zero_iter_bb)
4894 {
4895 /* Some counts[i] vars might be uninitialized if
4896 some loop has zero iterations. But the body shouldn't
4897 be executed in that case, so just avoid uninit warnings. */
4898 for (i = first_zero_iter; i < fd->collapse; i++)
4899 if (SSA_VAR_P (counts[i]))
4900 TREE_NO_WARNING (counts[i]) = 1;
4901 gsi_prev (&gsi);
4902 edge e = split_block (entry_bb, gsi_stmt (gsi));
4903 entry_bb = e->dest;
4904 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4905 gsi = gsi_last_bb (entry_bb);
4906 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4907 get_immediate_dominator (CDI_DOMINATORS,
4908 zero_iter_bb));
4909 }
4910 }
4911
4912 tree t0, t1;
4913 t1 = fd->loop.n2;
4914 t0 = fd->loop.n1;
4915 if (POINTER_TYPE_P (TREE_TYPE (t0))
4916 && TYPE_PRECISION (TREE_TYPE (t0))
4917 != TYPE_PRECISION (fd->iter_type))
4918 {
4919 /* Avoid casting pointers to integer of a different size. */
4920 tree itype = signed_type_for (type);
4921 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4922 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4923 }
4924 else
4925 {
4926 t1 = fold_convert (fd->iter_type, t1);
4927 t0 = fold_convert (fd->iter_type, t0);
4928 }
4929 if (bias)
4930 {
4931 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4932 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4933 }
4934
4935 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4936 OMP_CLAUSE__LOOPTEMP_);
4937 gcc_assert (innerc);
4938 tree startvar = OMP_CLAUSE_DECL (innerc);
4939 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4940 gcc_assert (innerc);
4941 tree endvar = OMP_CLAUSE_DECL (innerc);
4942 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4943 {
4944 gcc_assert (innerc);
4945 for (i = 1; i < fd->collapse; i++)
4946 {
4947 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4948 OMP_CLAUSE__LOOPTEMP_);
4949 gcc_assert (innerc);
4950 }
4951 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4952 OMP_CLAUSE__LOOPTEMP_);
4953 if (innerc)
4954 {
4955 /* If needed (inner taskloop has lastprivate clause), propagate
4956 down the total number of iterations. */
4957 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4958 NULL_TREE, false,
4959 GSI_CONTINUE_LINKING);
4960 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4961 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4962 }
4963 }
4964
4965 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4966 GSI_CONTINUE_LINKING);
4967 assign_stmt = gimple_build_assign (startvar, t0);
4968 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4969
4970 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4971 GSI_CONTINUE_LINKING);
4972 assign_stmt = gimple_build_assign (endvar, t1);
4973 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4974 if (fd->collapse > 1)
4975 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4976
4977 /* Remove the GIMPLE_OMP_FOR statement. */
4978 gsi = gsi_for_stmt (for_stmt);
4979 gsi_remove (&gsi, true);
4980
4981 gsi = gsi_last_bb (cont_bb);
4982 gsi_remove (&gsi, true);
4983
4984 gsi = gsi_last_bb (exit_bb);
4985 gsi_remove (&gsi, true);
4986
4987 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
4988 remove_edge (BRANCH_EDGE (entry_bb));
4989 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
4990 remove_edge (BRANCH_EDGE (cont_bb));
4991 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4992 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4993 recompute_dominator (CDI_DOMINATORS, region->entry));
4994}
4995
4996/* Taskloop construct is represented after gimplification with
4997 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4998 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4999 GOMP_taskloop{,_ull} function arranges for each task to be given just
5000 a single range of iterations. */
5001
5002static void
5003expand_omp_taskloop_for_inner (struct omp_region *region,
5004 struct omp_for_data *fd,
5005 gimple *inner_stmt)
5006{
5007 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5008 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5009 basic_block fin_bb;
5010 gimple_stmt_iterator gsi;
5011 edge ep;
5012 bool broken_loop = region->cont == NULL;
5013 tree *counts = NULL;
5014 tree n1, n2, step;
5015
5016 itype = type = TREE_TYPE (fd->loop.v);
5017 if (POINTER_TYPE_P (type))
5018 itype = signed_type_for (type);
5019
5020 /* See if we need to bias by LLONG_MIN. */
5021 if (fd->iter_type == long_long_unsigned_type_node
5022 && TREE_CODE (type) == INTEGER_TYPE
5023 && !TYPE_UNSIGNED (type))
5024 {
5025 tree n1, n2;
5026
5027 if (fd->loop.cond_code == LT_EXPR)
5028 {
5029 n1 = fd->loop.n1;
5030 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5031 }
5032 else
5033 {
5034 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5035 n2 = fd->loop.n1;
5036 }
5037 if (TREE_CODE (n1) != INTEGER_CST
5038 || TREE_CODE (n2) != INTEGER_CST
5039 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5040 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5041 }
5042
5043 entry_bb = region->entry;
5044 cont_bb = region->cont;
5045 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5046 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5047 gcc_assert (broken_loop
5048 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5049 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5050 if (!broken_loop)
5051 {
5052 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5053 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5054 }
5055 exit_bb = region->exit;
5056
5057 /* Iteration space partitioning goes in ENTRY_BB. */
5058 gsi = gsi_last_bb (entry_bb);
5059 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5060
5061 if (fd->collapse > 1)
5062 {
5063 int first_zero_iter = -1, dummy = -1;
5064 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5065
5066 counts = XALLOCAVEC (tree, fd->collapse);
5067 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5068 fin_bb, first_zero_iter,
5069 dummy_bb, dummy, l2_dom_bb);
5070 t = NULL_TREE;
5071 }
5072 else
5073 t = integer_one_node;
5074
5075 step = fd->loop.step;
5076 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5077 OMP_CLAUSE__LOOPTEMP_);
5078 gcc_assert (innerc);
5079 n1 = OMP_CLAUSE_DECL (innerc);
5080 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5081 gcc_assert (innerc);
5082 n2 = OMP_CLAUSE_DECL (innerc);
5083 if (bias)
5084 {
5085 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5086 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5087 }
5088 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5089 true, NULL_TREE, true, GSI_SAME_STMT);
5090 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5091 true, NULL_TREE, true, GSI_SAME_STMT);
5092 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5093 true, NULL_TREE, true, GSI_SAME_STMT);
5094
5095 tree startvar = fd->loop.v;
5096 tree endvar = NULL_TREE;
5097
5098 if (gimple_omp_for_combined_p (fd->for_stmt))
5099 {
5100 tree clauses = gimple_omp_for_clauses (inner_stmt);
5101 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5102 gcc_assert (innerc);
5103 startvar = OMP_CLAUSE_DECL (innerc);
5104 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5105 OMP_CLAUSE__LOOPTEMP_);
5106 gcc_assert (innerc);
5107 endvar = OMP_CLAUSE_DECL (innerc);
5108 }
5109 t = fold_convert (TREE_TYPE (startvar), n1);
5110 t = force_gimple_operand_gsi (&gsi, t,
5111 DECL_P (startvar)
5112 && TREE_ADDRESSABLE (startvar),
5113 NULL_TREE, false, GSI_CONTINUE_LINKING);
5114 gimple *assign_stmt = gimple_build_assign (startvar, t);
5115 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5116
5117 t = fold_convert (TREE_TYPE (startvar), n2);
5118 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5119 false, GSI_CONTINUE_LINKING);
5120 if (endvar)
5121 {
5122 assign_stmt = gimple_build_assign (endvar, e);
5123 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5124 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5125 assign_stmt = gimple_build_assign (fd->loop.v, e);
5126 else
5127 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5128 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5129 }
5130 if (fd->collapse > 1)
5131 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5132
5133 if (!broken_loop)
5134 {
5135 /* The code controlling the sequential loop replaces the
5136 GIMPLE_OMP_CONTINUE. */
5137 gsi = gsi_last_bb (cont_bb);
5138 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5139 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5140 vmain = gimple_omp_continue_control_use (cont_stmt);
5141 vback = gimple_omp_continue_control_def (cont_stmt);
5142
5143 if (!gimple_omp_for_combined_p (fd->for_stmt))
5144 {
5145 if (POINTER_TYPE_P (type))
5146 t = fold_build_pointer_plus (vmain, step);
5147 else
5148 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5149 t = force_gimple_operand_gsi (&gsi, t,
5150 DECL_P (vback)
5151 && TREE_ADDRESSABLE (vback),
5152 NULL_TREE, true, GSI_SAME_STMT);
5153 assign_stmt = gimple_build_assign (vback, t);
5154 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5155
5156 t = build2 (fd->loop.cond_code, boolean_type_node,
5157 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5158 ? t : vback, e);
5159 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5160 }
5161
5162 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5163 gsi_remove (&gsi, true);
5164
5165 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5166 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5167 }
5168
5169 /* Remove the GIMPLE_OMP_FOR statement. */
5170 gsi = gsi_for_stmt (fd->for_stmt);
5171 gsi_remove (&gsi, true);
5172
5173 /* Remove the GIMPLE_OMP_RETURN statement. */
5174 gsi = gsi_last_bb (exit_bb);
5175 gsi_remove (&gsi, true);
5176
5177 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5178 if (!broken_loop)
5179 remove_edge (BRANCH_EDGE (entry_bb));
5180 else
5181 {
5182 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5183 region->outer->cont = NULL;
5184 }
5185
5186 /* Connect all the blocks. */
5187 if (!broken_loop)
5188 {
5189 ep = find_edge (cont_bb, body_bb);
5190 if (gimple_omp_for_combined_p (fd->for_stmt))
5191 {
5192 remove_edge (ep);
5193 ep = NULL;
5194 }
5195 else if (fd->collapse > 1)
5196 {
5197 remove_edge (ep);
5198 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5199 }
5200 else
5201 ep->flags = EDGE_TRUE_VALUE;
5202 find_edge (cont_bb, fin_bb)->flags
5203 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5204 }
5205
5206 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5207 recompute_dominator (CDI_DOMINATORS, body_bb));
5208 if (!broken_loop)
5209 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5210 recompute_dominator (CDI_DOMINATORS, fin_bb));
5211
5212 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5213 {
5214 struct loop *loop = alloc_loop ();
5215 loop->header = body_bb;
5216 if (collapse_bb == NULL)
5217 loop->latch = cont_bb;
5218 add_loop (loop, body_bb->loop_father);
5219 }
5220}
5221
5222/* A subroutine of expand_omp_for. Generate code for an OpenACC
5223 partitioned loop. The lowering here is abstracted, in that the
5224 loop parameters are passed through internal functions, which are
5225 further lowered by oacc_device_lower, once we get to the target
5226 compiler. The loop is of the form:
5227
5228 for (V = B; V LTGT E; V += S) {BODY}
5229
5230 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5231 (constant 0 for no chunking) and we will have a GWV partitioning
5232 mask, specifying dimensions over which the loop is to be
5233 partitioned (see note below). We generate code that looks like:
5234
5235 <entry_bb> [incoming FALL->body, BRANCH->exit]
5236 typedef signedintify (typeof (V)) T; // underlying signed integral type
5237 T range = E - B;
5238 T chunk_no = 0;
5239 T DIR = LTGT == '<' ? +1 : -1;
5240 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5241 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5242
5243 <head_bb> [created by splitting end of entry_bb]
5244 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5245 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5246 if (!(offset LTGT bound)) goto bottom_bb;
5247
5248 <body_bb> [incoming]
5249 V = B + offset;
5250 {BODY}
5251
5252 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5253 offset += step;
5254 if (offset LTGT bound) goto body_bb; [*]
5255
5256 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5257 chunk_no++;
5258 if (chunk < chunk_max) goto head_bb;
5259
5260 <exit_bb> [incoming]
5261 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5262
5263 [*] Needed if V live at end of loop
5264
5265 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5266 transition, and will be specified by a more general mechanism shortly.
5267 */
5268
5269static void
5270expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5271{
5272 tree v = fd->loop.v;
5273 enum tree_code cond_code = fd->loop.cond_code;
5274 enum tree_code plus_code = PLUS_EXPR;
5275
5276 tree chunk_size = integer_minus_one_node;
5277 tree gwv = integer_zero_node;
5278 tree iter_type = TREE_TYPE (v);
5279 tree diff_type = iter_type;
5280 tree plus_type = iter_type;
5281 struct oacc_collapse *counts = NULL;
5282
5283 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5284 == GF_OMP_FOR_KIND_OACC_LOOP);
5285 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5286 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5287
5288 if (POINTER_TYPE_P (iter_type))
5289 {
5290 plus_code = POINTER_PLUS_EXPR;
5291 plus_type = sizetype;
5292 }
5293 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5294 diff_type = signed_type_for (diff_type);
5295
5296 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5297 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5298 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5299 basic_block bottom_bb = NULL;
5300
5301 /* entry_bb has two sucessors; the branch edge is to the exit
5302 block, fallthrough edge to body. */
5303 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5304 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5305
5306 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5307 body_bb, or to a block whose only successor is the body_bb. Its
5308 fallthrough successor is the final block (same as the branch
5309 successor of the entry_bb). */
5310 if (cont_bb)
5311 {
5312 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5313 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5314
5315 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5316 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5317 }
5318 else
5319 gcc_assert (!gimple_in_ssa_p (cfun));
5320
5321 /* The exit block only has entry_bb and cont_bb as predecessors. */
5322 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5323
5324 tree chunk_no;
5325 tree chunk_max = NULL_TREE;
5326 tree bound, offset;
5327 tree step = create_tmp_var (diff_type, ".step");
5328 bool up = cond_code == LT_EXPR;
5329 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5330 bool chunking = !gimple_in_ssa_p (cfun);;
5331 bool negating;
5332
5333 /* SSA instances. */
5334 tree offset_incr = NULL_TREE;
5335 tree offset_init = NULL_TREE;
5336
5337 gimple_stmt_iterator gsi;
5338 gassign *ass;
5339 gcall *call;
5340 gimple *stmt;
5341 tree expr;
5342 location_t loc;
5343 edge split, be, fte;
5344
5345 /* Split the end of entry_bb to create head_bb. */
5346 split = split_block (entry_bb, last_stmt (entry_bb));
5347 basic_block head_bb = split->dest;
5348 entry_bb = split->src;
5349
5350 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5351 gsi = gsi_last_bb (entry_bb);
5352 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5353 loc = gimple_location (for_stmt);
5354
5355 if (gimple_in_ssa_p (cfun))
5356 {
5357 offset_init = gimple_omp_for_index (for_stmt, 0);
5358 gcc_assert (integer_zerop (fd->loop.n1));
5359 /* The SSA parallelizer does gang parallelism. */
5360 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5361 }
5362
5363 if (fd->collapse > 1)
5364 {
5365 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5366 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5367 TREE_TYPE (fd->loop.n2));
5368
5369 if (SSA_VAR_P (fd->loop.n2))
5370 {
5371 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5372 true, GSI_SAME_STMT);
5373 ass = gimple_build_assign (fd->loop.n2, total);
5374 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5375 }
5376
5377 }
5378
5379 tree b = fd->loop.n1;
5380 tree e = fd->loop.n2;
5381 tree s = fd->loop.step;
5382
5383 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5384 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5385
01914336 5386 /* Convert the step, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5387 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5388 if (negating)
5389 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5390 s = fold_convert (diff_type, s);
5391 if (negating)
5392 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5393 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5394
5395 if (!chunking)
5396 chunk_size = integer_zero_node;
5397 expr = fold_convert (diff_type, chunk_size);
5398 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5399 NULL_TREE, true, GSI_SAME_STMT);
01914336 5400 /* Determine the range, avoiding possible unsigned->signed overflow. */
629b3d75
MJ
5401 negating = !up && TYPE_UNSIGNED (iter_type);
5402 expr = fold_build2 (MINUS_EXPR, plus_type,
5403 fold_convert (plus_type, negating ? b : e),
5404 fold_convert (plus_type, negating ? e : b));
5405 expr = fold_convert (diff_type, expr);
5406 if (negating)
5407 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5408 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5409 NULL_TREE, true, GSI_SAME_STMT);
5410
5411 chunk_no = build_int_cst (diff_type, 0);
5412 if (chunking)
5413 {
5414 gcc_assert (!gimple_in_ssa_p (cfun));
5415
5416 expr = chunk_no;
5417 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5418 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5419
5420 ass = gimple_build_assign (chunk_no, expr);
5421 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5422
5423 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5424 build_int_cst (integer_type_node,
5425 IFN_GOACC_LOOP_CHUNKS),
5426 dir, range, s, chunk_size, gwv);
5427 gimple_call_set_lhs (call, chunk_max);
5428 gimple_set_location (call, loc);
5429 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5430 }
5431 else
5432 chunk_size = chunk_no;
5433
5434 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5435 build_int_cst (integer_type_node,
5436 IFN_GOACC_LOOP_STEP),
5437 dir, range, s, chunk_size, gwv);
5438 gimple_call_set_lhs (call, step);
5439 gimple_set_location (call, loc);
5440 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5441
5442 /* Remove the GIMPLE_OMP_FOR. */
5443 gsi_remove (&gsi, true);
5444
01914336 5445 /* Fixup edges from head_bb. */
629b3d75
MJ
5446 be = BRANCH_EDGE (head_bb);
5447 fte = FALLTHRU_EDGE (head_bb);
5448 be->flags |= EDGE_FALSE_VALUE;
5449 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5450
5451 basic_block body_bb = fte->dest;
5452
5453 if (gimple_in_ssa_p (cfun))
5454 {
5455 gsi = gsi_last_bb (cont_bb);
5456 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5457
5458 offset = gimple_omp_continue_control_use (cont_stmt);
5459 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5460 }
5461 else
5462 {
5463 offset = create_tmp_var (diff_type, ".offset");
5464 offset_init = offset_incr = offset;
5465 }
5466 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5467
5468 /* Loop offset & bound go into head_bb. */
5469 gsi = gsi_start_bb (head_bb);
5470
5471 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5472 build_int_cst (integer_type_node,
5473 IFN_GOACC_LOOP_OFFSET),
5474 dir, range, s,
5475 chunk_size, gwv, chunk_no);
5476 gimple_call_set_lhs (call, offset_init);
5477 gimple_set_location (call, loc);
5478 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5479
5480 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5481 build_int_cst (integer_type_node,
5482 IFN_GOACC_LOOP_BOUND),
5483 dir, range, s,
5484 chunk_size, gwv, offset_init);
5485 gimple_call_set_lhs (call, bound);
5486 gimple_set_location (call, loc);
5487 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5488
5489 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5490 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5491 GSI_CONTINUE_LINKING);
5492
5493 /* V assignment goes into body_bb. */
5494 if (!gimple_in_ssa_p (cfun))
5495 {
5496 gsi = gsi_start_bb (body_bb);
5497
5498 expr = build2 (plus_code, iter_type, b,
5499 fold_convert (plus_type, offset));
5500 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5501 true, GSI_SAME_STMT);
5502 ass = gimple_build_assign (v, expr);
5503 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5504 if (fd->collapse > 1)
5505 expand_oacc_collapse_vars (fd, &gsi, counts, v);
5506 }
5507
5508 /* Loop increment goes into cont_bb. If this is not a loop, we
5509 will have spawned threads as if it was, and each one will
5510 execute one iteration. The specification is not explicit about
5511 whether such constructs are ill-formed or not, and they can
5512 occur, especially when noreturn routines are involved. */
5513 if (cont_bb)
5514 {
5515 gsi = gsi_last_bb (cont_bb);
5516 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5517 loc = gimple_location (cont_stmt);
5518
5519 /* Increment offset. */
5520 if (gimple_in_ssa_p (cfun))
5521 expr= build2 (plus_code, iter_type, offset,
5522 fold_convert (plus_type, step));
5523 else
5524 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5525 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5526 true, GSI_SAME_STMT);
5527 ass = gimple_build_assign (offset_incr, expr);
5528 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5529 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5530 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5531
5532 /* Remove the GIMPLE_OMP_CONTINUE. */
5533 gsi_remove (&gsi, true);
5534
01914336 5535 /* Fixup edges from cont_bb. */
629b3d75
MJ
5536 be = BRANCH_EDGE (cont_bb);
5537 fte = FALLTHRU_EDGE (cont_bb);
5538 be->flags |= EDGE_TRUE_VALUE;
5539 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5540
5541 if (chunking)
5542 {
5543 /* Split the beginning of exit_bb to make bottom_bb. We
5544 need to insert a nop at the start, because splitting is
01914336 5545 after a stmt, not before. */
629b3d75
MJ
5546 gsi = gsi_start_bb (exit_bb);
5547 stmt = gimple_build_nop ();
5548 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5549 split = split_block (exit_bb, stmt);
5550 bottom_bb = split->src;
5551 exit_bb = split->dest;
5552 gsi = gsi_last_bb (bottom_bb);
5553
5554 /* Chunk increment and test goes into bottom_bb. */
5555 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5556 build_int_cst (diff_type, 1));
5557 ass = gimple_build_assign (chunk_no, expr);
5558 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5559
5560 /* Chunk test at end of bottom_bb. */
5561 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5562 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5563 GSI_CONTINUE_LINKING);
5564
01914336 5565 /* Fixup edges from bottom_bb. */
629b3d75
MJ
5566 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5567 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5568 }
5569 }
5570
5571 gsi = gsi_last_bb (exit_bb);
5572 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5573 loc = gimple_location (gsi_stmt (gsi));
5574
5575 if (!gimple_in_ssa_p (cfun))
5576 {
5577 /* Insert the final value of V, in case it is live. This is the
5578 value for the only thread that survives past the join. */
5579 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5580 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5581 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5582 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5583 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5584 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5585 true, GSI_SAME_STMT);
5586 ass = gimple_build_assign (v, expr);
5587 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5588 }
5589
01914336 5590 /* Remove the OMP_RETURN. */
629b3d75
MJ
5591 gsi_remove (&gsi, true);
5592
5593 if (cont_bb)
5594 {
5595 /* We now have one or two nested loops. Update the loop
5596 structures. */
5597 struct loop *parent = entry_bb->loop_father;
5598 struct loop *body = body_bb->loop_father;
5599
5600 if (chunking)
5601 {
5602 struct loop *chunk_loop = alloc_loop ();
5603 chunk_loop->header = head_bb;
5604 chunk_loop->latch = bottom_bb;
5605 add_loop (chunk_loop, parent);
5606 parent = chunk_loop;
5607 }
5608 else if (parent != body)
5609 {
5610 gcc_assert (body->header == body_bb);
5611 gcc_assert (body->latch == cont_bb
5612 || single_pred (body->latch) == cont_bb);
5613 parent = NULL;
5614 }
5615
5616 if (parent)
5617 {
5618 struct loop *body_loop = alloc_loop ();
5619 body_loop->header = body_bb;
5620 body_loop->latch = cont_bb;
5621 add_loop (body_loop, parent);
5622 }
5623 }
5624}
5625
5626/* Expand the OMP loop defined by REGION. */
5627
5628static void
5629expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5630{
5631 struct omp_for_data fd;
5632 struct omp_for_data_loop *loops;
5633
5634 loops
5635 = (struct omp_for_data_loop *)
5636 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5637 * sizeof (struct omp_for_data_loop));
5638 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5639 &fd, loops);
5640 region->sched_kind = fd.sched_kind;
5641 region->sched_modifiers = fd.sched_modifiers;
5642
5643 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5644 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5645 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5646 if (region->cont)
5647 {
5648 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5649 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5650 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5651 }
5652 else
5653 /* If there isn't a continue then this is a degerate case where
5654 the introduction of abnormal edges during lowering will prevent
5655 original loops from being detected. Fix that up. */
5656 loops_state_set (LOOPS_NEED_FIXUP);
5657
5658 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5659 expand_omp_simd (region, &fd);
5660 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5661 expand_cilk_for (region, &fd);
5662 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5663 {
5664 gcc_assert (!inner_stmt);
5665 expand_oacc_for (region, &fd);
5666 }
5667 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5668 {
5669 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5670 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5671 else
5672 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5673 }
5674 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5675 && !fd.have_ordered)
5676 {
5677 if (fd.chunk_size == NULL)
5678 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5679 else
5680 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5681 }
5682 else
5683 {
5684 int fn_index, start_ix, next_ix;
5685
5686 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5687 == GF_OMP_FOR_KIND_FOR);
5688 if (fd.chunk_size == NULL
5689 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5690 fd.chunk_size = integer_zero_node;
5691 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5692 switch (fd.sched_kind)
5693 {
5694 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5695 fn_index = 3;
5696 break;
5697 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5698 case OMP_CLAUSE_SCHEDULE_GUIDED:
5699 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5700 && !fd.ordered
5701 && !fd.have_ordered)
5702 {
5703 fn_index = 3 + fd.sched_kind;
5704 break;
5705 }
5706 /* FALLTHRU */
5707 default:
5708 fn_index = fd.sched_kind;
5709 break;
5710 }
5711 if (!fd.ordered)
5712 fn_index += fd.have_ordered * 6;
5713 if (fd.ordered)
5714 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5715 else
5716 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5717 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5718 if (fd.iter_type == long_long_unsigned_type_node)
5719 {
5720 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5721 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5722 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5723 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5724 }
5725 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5726 (enum built_in_function) next_ix, inner_stmt);
5727 }
5728
5729 if (gimple_in_ssa_p (cfun))
5730 update_ssa (TODO_update_ssa_only_virtuals);
5731}
5732
5733/* Expand code for an OpenMP sections directive. In pseudo code, we generate
5734
5735 v = GOMP_sections_start (n);
5736 L0:
5737 switch (v)
5738 {
5739 case 0:
5740 goto L2;
5741 case 1:
5742 section 1;
5743 goto L1;
5744 case 2:
5745 ...
5746 case n:
5747 ...
5748 default:
5749 abort ();
5750 }
5751 L1:
5752 v = GOMP_sections_next ();
5753 goto L0;
5754 L2:
5755 reduction;
5756
5757 If this is a combined parallel sections, replace the call to
5758 GOMP_sections_start with call to GOMP_sections_next. */
5759
5760static void
5761expand_omp_sections (struct omp_region *region)
5762{
5763 tree t, u, vin = NULL, vmain, vnext, l2;
5764 unsigned len;
5765 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5766 gimple_stmt_iterator si, switch_si;
5767 gomp_sections *sections_stmt;
5768 gimple *stmt;
5769 gomp_continue *cont;
5770 edge_iterator ei;
5771 edge e;
5772 struct omp_region *inner;
5773 unsigned i, casei;
5774 bool exit_reachable = region->cont != NULL;
5775
5776 gcc_assert (region->exit != NULL);
5777 entry_bb = region->entry;
5778 l0_bb = single_succ (entry_bb);
5779 l1_bb = region->cont;
5780 l2_bb = region->exit;
5781 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5782 l2 = gimple_block_label (l2_bb);
5783 else
5784 {
5785 /* This can happen if there are reductions. */
5786 len = EDGE_COUNT (l0_bb->succs);
5787 gcc_assert (len > 0);
5788 e = EDGE_SUCC (l0_bb, len - 1);
5789 si = gsi_last_bb (e->dest);
5790 l2 = NULL_TREE;
5791 if (gsi_end_p (si)
01914336 5792 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
629b3d75
MJ
5793 l2 = gimple_block_label (e->dest);
5794 else
5795 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5796 {
5797 si = gsi_last_bb (e->dest);
5798 if (gsi_end_p (si)
5799 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5800 {
5801 l2 = gimple_block_label (e->dest);
5802 break;
5803 }
5804 }
5805 }
5806 if (exit_reachable)
5807 default_bb = create_empty_bb (l1_bb->prev_bb);
5808 else
5809 default_bb = create_empty_bb (l0_bb);
5810
5811 /* We will build a switch() with enough cases for all the
5812 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5813 and a default case to abort if something goes wrong. */
5814 len = EDGE_COUNT (l0_bb->succs);
5815
5816 /* Use vec::quick_push on label_vec throughout, since we know the size
5817 in advance. */
5818 auto_vec<tree> label_vec (len);
5819
5820 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5821 GIMPLE_OMP_SECTIONS statement. */
5822 si = gsi_last_bb (entry_bb);
5823 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5824 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5825 vin = gimple_omp_sections_control (sections_stmt);
5826 if (!is_combined_parallel (region))
5827 {
5828 /* If we are not inside a combined parallel+sections region,
5829 call GOMP_sections_start. */
5830 t = build_int_cst (unsigned_type_node, len - 1);
5831 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5832 stmt = gimple_build_call (u, 1, t);
5833 }
5834 else
5835 {
5836 /* Otherwise, call GOMP_sections_next. */
5837 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5838 stmt = gimple_build_call (u, 0);
5839 }
5840 gimple_call_set_lhs (stmt, vin);
5841 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5842 gsi_remove (&si, true);
5843
5844 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5845 L0_BB. */
5846 switch_si = gsi_last_bb (l0_bb);
5847 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5848 if (exit_reachable)
5849 {
5850 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5851 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5852 vmain = gimple_omp_continue_control_use (cont);
5853 vnext = gimple_omp_continue_control_def (cont);
5854 }
5855 else
5856 {
5857 vmain = vin;
5858 vnext = NULL_TREE;
5859 }
5860
5861 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5862 label_vec.quick_push (t);
5863 i = 1;
5864
5865 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5866 for (inner = region->inner, casei = 1;
5867 inner;
5868 inner = inner->next, i++, casei++)
5869 {
5870 basic_block s_entry_bb, s_exit_bb;
5871
5872 /* Skip optional reduction region. */
5873 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5874 {
5875 --i;
5876 --casei;
5877 continue;
5878 }
5879
5880 s_entry_bb = inner->entry;
5881 s_exit_bb = inner->exit;
5882
5883 t = gimple_block_label (s_entry_bb);
5884 u = build_int_cst (unsigned_type_node, casei);
5885 u = build_case_label (u, NULL, t);
5886 label_vec.quick_push (u);
5887
5888 si = gsi_last_bb (s_entry_bb);
5889 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5890 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5891 gsi_remove (&si, true);
5892 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5893
5894 if (s_exit_bb == NULL)
5895 continue;
5896
5897 si = gsi_last_bb (s_exit_bb);
5898 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5899 gsi_remove (&si, true);
5900
5901 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5902 }
5903
5904 /* Error handling code goes in DEFAULT_BB. */
5905 t = gimple_block_label (default_bb);
5906 u = build_case_label (NULL, NULL, t);
5907 make_edge (l0_bb, default_bb, 0);
5908 add_bb_to_loop (default_bb, current_loops->tree_root);
5909
5910 stmt = gimple_build_switch (vmain, u, label_vec);
5911 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5912 gsi_remove (&switch_si, true);
5913
5914 si = gsi_start_bb (default_bb);
5915 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5916 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5917
5918 if (exit_reachable)
5919 {
5920 tree bfn_decl;
5921
5922 /* Code to get the next section goes in L1_BB. */
5923 si = gsi_last_bb (l1_bb);
5924 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5925
5926 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5927 stmt = gimple_build_call (bfn_decl, 0);
5928 gimple_call_set_lhs (stmt, vnext);
5929 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5930 gsi_remove (&si, true);
5931
5932 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5933 }
5934
5935 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5936 si = gsi_last_bb (l2_bb);
5937 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5938 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5939 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5940 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5941 else
5942 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5943 stmt = gimple_build_call (t, 0);
5944 if (gimple_omp_return_lhs (gsi_stmt (si)))
5945 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5946 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5947 gsi_remove (&si, true);
5948
5949 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5950}
5951
5952/* Expand code for an OpenMP single directive. We've already expanded
5953 much of the code, here we simply place the GOMP_barrier call. */
5954
5955static void
5956expand_omp_single (struct omp_region *region)
5957{
5958 basic_block entry_bb, exit_bb;
5959 gimple_stmt_iterator si;
5960
5961 entry_bb = region->entry;
5962 exit_bb = region->exit;
5963
5964 si = gsi_last_bb (entry_bb);
5965 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5966 gsi_remove (&si, true);
5967 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5968
5969 si = gsi_last_bb (exit_bb);
5970 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5971 {
5972 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5973 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5974 }
5975 gsi_remove (&si, true);
5976 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5977}
5978
5979/* Generic expansion for OpenMP synchronization directives: master,
5980 ordered and critical. All we need to do here is remove the entry
5981 and exit markers for REGION. */
5982
5983static void
5984expand_omp_synch (struct omp_region *region)
5985{
5986 basic_block entry_bb, exit_bb;
5987 gimple_stmt_iterator si;
5988
5989 entry_bb = region->entry;
5990 exit_bb = region->exit;
5991
5992 si = gsi_last_bb (entry_bb);
5993 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5994 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5995 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5996 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5997 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5998 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5999 gsi_remove (&si, true);
6000 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6001
6002 if (exit_bb)
6003 {
6004 si = gsi_last_bb (exit_bb);
6005 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6006 gsi_remove (&si, true);
6007 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6008 }
6009}
6010
6011/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6012 operation as a normal volatile load. */
6013
6014static bool
6015expand_omp_atomic_load (basic_block load_bb, tree addr,
6016 tree loaded_val, int index)
6017{
6018 enum built_in_function tmpbase;
6019 gimple_stmt_iterator gsi;
6020 basic_block store_bb;
6021 location_t loc;
6022 gimple *stmt;
6023 tree decl, call, type, itype;
6024
6025 gsi = gsi_last_bb (load_bb);
6026 stmt = gsi_stmt (gsi);
6027 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6028 loc = gimple_location (stmt);
6029
6030 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6031 is smaller than word size, then expand_atomic_load assumes that the load
6032 is atomic. We could avoid the builtin entirely in this case. */
6033
6034 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6035 decl = builtin_decl_explicit (tmpbase);
6036 if (decl == NULL_TREE)
6037 return false;
6038
6039 type = TREE_TYPE (loaded_val);
6040 itype = TREE_TYPE (TREE_TYPE (decl));
6041
6042 call = build_call_expr_loc (loc, decl, 2, addr,
6043 build_int_cst (NULL,
6044 gimple_omp_atomic_seq_cst_p (stmt)
6045 ? MEMMODEL_SEQ_CST
6046 : MEMMODEL_RELAXED));
6047 if (!useless_type_conversion_p (type, itype))
6048 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6049 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6050
6051 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6052 gsi_remove (&gsi, true);
6053
6054 store_bb = single_succ (load_bb);
6055 gsi = gsi_last_bb (store_bb);
6056 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6057 gsi_remove (&gsi, true);
6058
6059 if (gimple_in_ssa_p (cfun))
6060 update_ssa (TODO_update_ssa_no_phi);
6061
6062 return true;
6063}
6064
6065/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6066 operation as a normal volatile store. */
6067
6068static bool
6069expand_omp_atomic_store (basic_block load_bb, tree addr,
6070 tree loaded_val, tree stored_val, int index)
6071{
6072 enum built_in_function tmpbase;
6073 gimple_stmt_iterator gsi;
6074 basic_block store_bb = single_succ (load_bb);
6075 location_t loc;
6076 gimple *stmt;
6077 tree decl, call, type, itype;
6078 machine_mode imode;
6079 bool exchange;
6080
6081 gsi = gsi_last_bb (load_bb);
6082 stmt = gsi_stmt (gsi);
6083 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6084
6085 /* If the load value is needed, then this isn't a store but an exchange. */
6086 exchange = gimple_omp_atomic_need_value_p (stmt);
6087
6088 gsi = gsi_last_bb (store_bb);
6089 stmt = gsi_stmt (gsi);
6090 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6091 loc = gimple_location (stmt);
6092
6093 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6094 is smaller than word size, then expand_atomic_store assumes that the store
6095 is atomic. We could avoid the builtin entirely in this case. */
6096
6097 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6098 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6099 decl = builtin_decl_explicit (tmpbase);
6100 if (decl == NULL_TREE)
6101 return false;
6102
6103 type = TREE_TYPE (stored_val);
6104
6105 /* Dig out the type of the function's second argument. */
6106 itype = TREE_TYPE (decl);
6107 itype = TYPE_ARG_TYPES (itype);
6108 itype = TREE_CHAIN (itype);
6109 itype = TREE_VALUE (itype);
6110 imode = TYPE_MODE (itype);
6111
6112 if (exchange && !can_atomic_exchange_p (imode, true))
6113 return false;
6114
6115 if (!useless_type_conversion_p (itype, type))
6116 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6117 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6118 build_int_cst (NULL,
6119 gimple_omp_atomic_seq_cst_p (stmt)
6120 ? MEMMODEL_SEQ_CST
6121 : MEMMODEL_RELAXED));
6122 if (exchange)
6123 {
6124 if (!useless_type_conversion_p (type, itype))
6125 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6126 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6127 }
6128
6129 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6130 gsi_remove (&gsi, true);
6131
6132 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6133 gsi = gsi_last_bb (load_bb);
6134 gsi_remove (&gsi, true);
6135
6136 if (gimple_in_ssa_p (cfun))
6137 update_ssa (TODO_update_ssa_no_phi);
6138
6139 return true;
6140}
6141
6142/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6143 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6144 size of the data type, and thus usable to find the index of the builtin
6145 decl. Returns false if the expression is not of the proper form. */
6146
6147static bool
6148expand_omp_atomic_fetch_op (basic_block load_bb,
6149 tree addr, tree loaded_val,
6150 tree stored_val, int index)
6151{
6152 enum built_in_function oldbase, newbase, tmpbase;
6153 tree decl, itype, call;
6154 tree lhs, rhs;
6155 basic_block store_bb = single_succ (load_bb);
6156 gimple_stmt_iterator gsi;
6157 gimple *stmt;
6158 location_t loc;
6159 enum tree_code code;
6160 bool need_old, need_new;
6161 machine_mode imode;
6162 bool seq_cst;
6163
6164 /* We expect to find the following sequences:
6165
6166 load_bb:
6167 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6168
6169 store_bb:
6170 val = tmp OP something; (or: something OP tmp)
6171 GIMPLE_OMP_STORE (val)
6172
6173 ???FIXME: Allow a more flexible sequence.
6174 Perhaps use data flow to pick the statements.
6175
6176 */
6177
6178 gsi = gsi_after_labels (store_bb);
6179 stmt = gsi_stmt (gsi);
6180 loc = gimple_location (stmt);
6181 if (!is_gimple_assign (stmt))
6182 return false;
6183 gsi_next (&gsi);
6184 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6185 return false;
6186 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6187 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6188 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6189 gcc_checking_assert (!need_old || !need_new);
6190
6191 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6192 return false;
6193
6194 /* Check for one of the supported fetch-op operations. */
6195 code = gimple_assign_rhs_code (stmt);
6196 switch (code)
6197 {
6198 case PLUS_EXPR:
6199 case POINTER_PLUS_EXPR:
6200 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6201 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6202 break;
6203 case MINUS_EXPR:
6204 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6205 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6206 break;
6207 case BIT_AND_EXPR:
6208 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6209 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6210 break;
6211 case BIT_IOR_EXPR:
6212 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6213 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6214 break;
6215 case BIT_XOR_EXPR:
6216 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6217 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6218 break;
6219 default:
6220 return false;
6221 }
6222
6223 /* Make sure the expression is of the proper form. */
6224 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6225 rhs = gimple_assign_rhs2 (stmt);
6226 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6227 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6228 rhs = gimple_assign_rhs1 (stmt);
6229 else
6230 return false;
6231
6232 tmpbase = ((enum built_in_function)
6233 ((need_new ? newbase : oldbase) + index + 1));
6234 decl = builtin_decl_explicit (tmpbase);
6235 if (decl == NULL_TREE)
6236 return false;
6237 itype = TREE_TYPE (TREE_TYPE (decl));
6238 imode = TYPE_MODE (itype);
6239
6240 /* We could test all of the various optabs involved, but the fact of the
6241 matter is that (with the exception of i486 vs i586 and xadd) all targets
6242 that support any atomic operaton optab also implements compare-and-swap.
6243 Let optabs.c take care of expanding any compare-and-swap loop. */
6244 if (!can_compare_and_swap_p (imode, true))
6245 return false;
6246
6247 gsi = gsi_last_bb (load_bb);
6248 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6249
6250 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6251 It only requires that the operation happen atomically. Thus we can
6252 use the RELAXED memory model. */
6253 call = build_call_expr_loc (loc, decl, 3, addr,
6254 fold_convert_loc (loc, itype, rhs),
6255 build_int_cst (NULL,
6256 seq_cst ? MEMMODEL_SEQ_CST
6257 : MEMMODEL_RELAXED));
6258
6259 if (need_old || need_new)
6260 {
6261 lhs = need_old ? loaded_val : stored_val;
6262 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6263 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6264 }
6265 else
6266 call = fold_convert_loc (loc, void_type_node, call);
6267 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6268 gsi_remove (&gsi, true);
6269
6270 gsi = gsi_last_bb (store_bb);
6271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6272 gsi_remove (&gsi, true);
6273 gsi = gsi_last_bb (store_bb);
6274 stmt = gsi_stmt (gsi);
6275 gsi_remove (&gsi, true);
6276
6277 if (gimple_in_ssa_p (cfun))
6278 {
6279 release_defs (stmt);
6280 update_ssa (TODO_update_ssa_no_phi);
6281 }
6282
6283 return true;
6284}
6285
6286/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6287
6288 oldval = *addr;
6289 repeat:
01914336 6290 newval = rhs; // with oldval replacing *addr in rhs
629b3d75
MJ
6291 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6292 if (oldval != newval)
6293 goto repeat;
6294
6295 INDEX is log2 of the size of the data type, and thus usable to find the
6296 index of the builtin decl. */
6297
6298static bool
6299expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6300 tree addr, tree loaded_val, tree stored_val,
6301 int index)
6302{
6303 tree loadedi, storedi, initial, new_storedi, old_vali;
6304 tree type, itype, cmpxchg, iaddr;
6305 gimple_stmt_iterator si;
6306 basic_block loop_header = single_succ (load_bb);
6307 gimple *phi, *stmt;
6308 edge e;
6309 enum built_in_function fncode;
6310
6311 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6312 order to use the RELAXED memory model effectively. */
6313 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6314 + index + 1);
6315 cmpxchg = builtin_decl_explicit (fncode);
6316 if (cmpxchg == NULL_TREE)
6317 return false;
6318 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6319 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6320
6321 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
6322 return false;
6323
6324 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6325 si = gsi_last_bb (load_bb);
6326 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6327
6328 /* For floating-point values, we'll need to view-convert them to integers
6329 so that we can perform the atomic compare and swap. Simplify the
6330 following code by always setting up the "i"ntegral variables. */
6331 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6332 {
6333 tree iaddr_val;
6334
6335 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6336 true));
6337 iaddr_val
6338 = force_gimple_operand_gsi (&si,
6339 fold_convert (TREE_TYPE (iaddr), addr),
6340 false, NULL_TREE, true, GSI_SAME_STMT);
6341 stmt = gimple_build_assign (iaddr, iaddr_val);
6342 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6343 loadedi = create_tmp_var (itype);
6344 if (gimple_in_ssa_p (cfun))
6345 loadedi = make_ssa_name (loadedi);
6346 }
6347 else
6348 {
6349 iaddr = addr;
6350 loadedi = loaded_val;
6351 }
6352
6353 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6354 tree loaddecl = builtin_decl_explicit (fncode);
6355 if (loaddecl)
6356 initial
6357 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6358 build_call_expr (loaddecl, 2, iaddr,
6359 build_int_cst (NULL_TREE,
6360 MEMMODEL_RELAXED)));
6361 else
6362 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6363 build_int_cst (TREE_TYPE (iaddr), 0));
6364
6365 initial
6366 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6367 GSI_SAME_STMT);
6368
6369 /* Move the value to the LOADEDI temporary. */
6370 if (gimple_in_ssa_p (cfun))
6371 {
6372 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6373 phi = create_phi_node (loadedi, loop_header);
6374 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6375 initial);
6376 }
6377 else
6378 gsi_insert_before (&si,
6379 gimple_build_assign (loadedi, initial),
6380 GSI_SAME_STMT);
6381 if (loadedi != loaded_val)
6382 {
6383 gimple_stmt_iterator gsi2;
6384 tree x;
6385
6386 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6387 gsi2 = gsi_start_bb (loop_header);
6388 if (gimple_in_ssa_p (cfun))
6389 {
6390 gassign *stmt;
6391 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6392 true, GSI_SAME_STMT);
6393 stmt = gimple_build_assign (loaded_val, x);
6394 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6395 }
6396 else
6397 {
6398 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6399 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6400 true, GSI_SAME_STMT);
6401 }
6402 }
6403 gsi_remove (&si, true);
6404
6405 si = gsi_last_bb (store_bb);
6406 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6407
6408 if (iaddr == addr)
6409 storedi = stored_val;
6410 else
01914336
MJ
6411 storedi
6412 = force_gimple_operand_gsi (&si,
6413 build1 (VIEW_CONVERT_EXPR, itype,
6414 stored_val), true, NULL_TREE, true,
6415 GSI_SAME_STMT);
629b3d75
MJ
6416
6417 /* Build the compare&swap statement. */
6418 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6419 new_storedi = force_gimple_operand_gsi (&si,
6420 fold_convert (TREE_TYPE (loadedi),
6421 new_storedi),
6422 true, NULL_TREE,
6423 true, GSI_SAME_STMT);
6424
6425 if (gimple_in_ssa_p (cfun))
6426 old_vali = loadedi;
6427 else
6428 {
6429 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6430 stmt = gimple_build_assign (old_vali, loadedi);
6431 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6432
6433 stmt = gimple_build_assign (loadedi, new_storedi);
6434 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6435 }
6436
6437 /* Note that we always perform the comparison as an integer, even for
6438 floating point. This allows the atomic operation to properly
6439 succeed even with NaNs and -0.0. */
01914336
MJ
6440 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6441 stmt = gimple_build_cond_empty (ne);
629b3d75
MJ
6442 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6443
6444 /* Update cfg. */
6445 e = single_succ_edge (store_bb);
6446 e->flags &= ~EDGE_FALLTHRU;
6447 e->flags |= EDGE_FALSE_VALUE;
6448
6449 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6450
6451 /* Copy the new value to loadedi (we already did that before the condition
6452 if we are not in SSA). */
6453 if (gimple_in_ssa_p (cfun))
6454 {
6455 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6456 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6457 }
6458
6459 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6460 gsi_remove (&si, true);
6461
6462 struct loop *loop = alloc_loop ();
6463 loop->header = loop_header;
6464 loop->latch = store_bb;
6465 add_loop (loop, loop_header->loop_father);
6466
6467 if (gimple_in_ssa_p (cfun))
6468 update_ssa (TODO_update_ssa_no_phi);
6469
6470 return true;
6471}
6472
6473/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6474
01914336
MJ
6475 GOMP_atomic_start ();
6476 *addr = rhs;
6477 GOMP_atomic_end ();
629b3d75
MJ
6478
6479 The result is not globally atomic, but works so long as all parallel
6480 references are within #pragma omp atomic directives. According to
6481 responses received from omp@openmp.org, appears to be within spec.
6482 Which makes sense, since that's how several other compilers handle
6483 this situation as well.
6484 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6485 expanding. STORED_VAL is the operand of the matching
6486 GIMPLE_OMP_ATOMIC_STORE.
6487
6488 We replace
6489 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6490 loaded_val = *addr;
6491
6492 and replace
6493 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6494 *addr = stored_val;
6495*/
6496
6497static bool
6498expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6499 tree addr, tree loaded_val, tree stored_val)
6500{
6501 gimple_stmt_iterator si;
6502 gassign *stmt;
6503 tree t;
6504
6505 si = gsi_last_bb (load_bb);
6506 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6507
6508 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6509 t = build_call_expr (t, 0);
6510 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6511
6512 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6513 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6514 gsi_remove (&si, true);
6515
6516 si = gsi_last_bb (store_bb);
6517 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6518
6519 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6520 stored_val);
6521 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6522
6523 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6524 t = build_call_expr (t, 0);
6525 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6526 gsi_remove (&si, true);
6527
6528 if (gimple_in_ssa_p (cfun))
6529 update_ssa (TODO_update_ssa_no_phi);
6530 return true;
6531}
6532
6533/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
01914336 6534 using expand_omp_atomic_fetch_op. If it failed, we try to
629b3d75
MJ
6535 call expand_omp_atomic_pipeline, and if it fails too, the
6536 ultimate fallback is wrapping the operation in a mutex
6537 (expand_omp_atomic_mutex). REGION is the atomic region built
6538 by build_omp_regions_1(). */
6539
6540static void
6541expand_omp_atomic (struct omp_region *region)
6542{
6543 basic_block load_bb = region->entry, store_bb = region->exit;
6544 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6545 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6546 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6547 tree addr = gimple_omp_atomic_load_rhs (load);
6548 tree stored_val = gimple_omp_atomic_store_val (store);
6549 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6550 HOST_WIDE_INT index;
6551
6552 /* Make sure the type is one of the supported sizes. */
6553 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6554 index = exact_log2 (index);
6555 if (index >= 0 && index <= 4)
6556 {
6557 unsigned int align = TYPE_ALIGN_UNIT (type);
6558
6559 /* __sync builtins require strict data alignment. */
6560 if (exact_log2 (align) >= index)
6561 {
6562 /* Atomic load. */
6563 if (loaded_val == stored_val
6564 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6565 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6566 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6567 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6568 return;
6569
6570 /* Atomic store. */
6571 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6572 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6573 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6574 && store_bb == single_succ (load_bb)
6575 && first_stmt (store_bb) == store
6576 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6577 stored_val, index))
6578 return;
6579
6580 /* When possible, use specialized atomic update functions. */
6581 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6582 && store_bb == single_succ (load_bb)
6583 && expand_omp_atomic_fetch_op (load_bb, addr,
6584 loaded_val, stored_val, index))
6585 return;
6586
6587 /* If we don't have specialized __sync builtins, try and implement
6588 as a compare and swap loop. */
6589 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6590 loaded_val, stored_val, index))
6591 return;
6592 }
6593 }
6594
6595 /* The ultimate fallback is wrapping the operation in a mutex. */
6596 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6597}
6598
6599/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6600 at REGION_EXIT. */
6601
6602static void
6603mark_loops_in_oacc_kernels_region (basic_block region_entry,
6604 basic_block region_exit)
6605{
6606 struct loop *outer = region_entry->loop_father;
6607 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6608
6609 /* Don't parallelize the kernels region if it contains more than one outer
6610 loop. */
6611 unsigned int nr_outer_loops = 0;
6612 struct loop *single_outer = NULL;
6613 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6614 {
6615 gcc_assert (loop_outer (loop) == outer);
6616
6617 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6618 continue;
6619
6620 if (region_exit != NULL
6621 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6622 continue;
6623
6624 nr_outer_loops++;
6625 single_outer = loop;
6626 }
6627 if (nr_outer_loops != 1)
6628 return;
6629
01914336
MJ
6630 for (struct loop *loop = single_outer->inner;
6631 loop != NULL;
6632 loop = loop->inner)
629b3d75
MJ
6633 if (loop->next)
6634 return;
6635
6636 /* Mark the loops in the region. */
6637 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6638 loop->in_oacc_kernels_region = true;
6639}
6640
6641/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6642
6643struct GTY(()) grid_launch_attributes_trees
6644{
6645 tree kernel_dim_array_type;
6646 tree kernel_lattrs_dimnum_decl;
6647 tree kernel_lattrs_grid_decl;
6648 tree kernel_lattrs_group_decl;
6649 tree kernel_launch_attributes_type;
6650};
6651
6652static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6653
6654/* Create types used to pass kernel launch attributes to target. */
6655
6656static void
6657grid_create_kernel_launch_attr_types (void)
6658{
6659 if (grid_attr_trees)
6660 return;
6661 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6662
6663 tree dim_arr_index_type
6664 = build_index_type (build_int_cst (integer_type_node, 2));
6665 grid_attr_trees->kernel_dim_array_type
6666 = build_array_type (uint32_type_node, dim_arr_index_type);
6667
6668 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6669 grid_attr_trees->kernel_lattrs_dimnum_decl
6670 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6671 uint32_type_node);
6672 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6673
6674 grid_attr_trees->kernel_lattrs_grid_decl
6675 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6676 grid_attr_trees->kernel_dim_array_type);
6677 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6678 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6679 grid_attr_trees->kernel_lattrs_group_decl
6680 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6681 grid_attr_trees->kernel_dim_array_type);
6682 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6683 = grid_attr_trees->kernel_lattrs_grid_decl;
6684 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6685 "__gomp_kernel_launch_attributes",
6686 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6687}
6688
6689/* Insert before the current statement in GSI a store of VALUE to INDEX of
6690 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6691 of type uint32_type_node. */
6692
6693static void
6694grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6695 tree fld_decl, int index, tree value)
6696{
6697 tree ref = build4 (ARRAY_REF, uint32_type_node,
6698 build3 (COMPONENT_REF,
6699 grid_attr_trees->kernel_dim_array_type,
6700 range_var, fld_decl, NULL_TREE),
6701 build_int_cst (integer_type_node, index),
6702 NULL_TREE, NULL_TREE);
6703 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6704}
6705
6706/* Return a tree representation of a pointer to a structure with grid and
6707 work-group size information. Statements filling that information will be
6708 inserted before GSI, TGT_STMT is the target statement which has the
6709 necessary information in it. */
6710
6711static tree
6712grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6713 gomp_target *tgt_stmt)
6714{
6715 grid_create_kernel_launch_attr_types ();
6716 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6717 "__kernel_launch_attrs");
6718
6719 unsigned max_dim = 0;
6720 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6721 clause;
6722 clause = OMP_CLAUSE_CHAIN (clause))
6723 {
6724 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6725 continue;
6726
6727 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6728 max_dim = MAX (dim, max_dim);
6729
6730 grid_insert_store_range_dim (gsi, lattrs,
6731 grid_attr_trees->kernel_lattrs_grid_decl,
6732 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6733 grid_insert_store_range_dim (gsi, lattrs,
6734 grid_attr_trees->kernel_lattrs_group_decl,
6735 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6736 }
6737
6738 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6739 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6740 gcc_checking_assert (max_dim <= 2);
6741 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6742 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6743 GSI_SAME_STMT);
6744 TREE_ADDRESSABLE (lattrs) = 1;
6745 return build_fold_addr_expr (lattrs);
6746}
6747
6748/* Build target argument identifier from the DEVICE identifier, value
6749 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6750
6751static tree
6752get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6753{
6754 tree t = build_int_cst (integer_type_node, device);
6755 if (subseqent_param)
6756 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6757 build_int_cst (integer_type_node,
6758 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6759 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6760 build_int_cst (integer_type_node, id));
6761 return t;
6762}
6763
6764/* Like above but return it in type that can be directly stored as an element
6765 of the argument array. */
6766
6767static tree
6768get_target_argument_identifier (int device, bool subseqent_param, int id)
6769{
6770 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6771 return fold_convert (ptr_type_node, t);
6772}
6773
6774/* Return a target argument consisting of DEVICE identifier, value identifier
6775 ID, and the actual VALUE. */
6776
6777static tree
6778get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6779 tree value)
6780{
6781 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6782 fold_convert (integer_type_node, value),
6783 build_int_cst (unsigned_type_node,
6784 GOMP_TARGET_ARG_VALUE_SHIFT));
6785 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6786 get_target_argument_identifier_1 (device, false, id));
6787 t = fold_convert (ptr_type_node, t);
6788 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6789}
6790
6791/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6792 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6793 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6794 arguments. */
6795
6796static void
6797push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6798 int id, tree value, vec <tree> *args)
6799{
6800 if (tree_fits_shwi_p (value)
6801 && tree_to_shwi (value) > -(1 << 15)
6802 && tree_to_shwi (value) < (1 << 15))
6803 args->quick_push (get_target_argument_value (gsi, device, id, value));
6804 else
6805 {
6806 args->quick_push (get_target_argument_identifier (device, true, id));
6807 value = fold_convert (ptr_type_node, value);
6808 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6809 GSI_SAME_STMT);
6810 args->quick_push (value);
6811 }
6812}
6813
01914336 6814/* Create an array of arguments that is then passed to GOMP_target. */
629b3d75
MJ
6815
6816static tree
6817get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6818{
6819 auto_vec <tree, 6> args;
6820 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6821 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6822 if (c)
6823 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6824 else
6825 t = integer_minus_one_node;
6826 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6827 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6828
6829 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6830 if (c)
6831 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6832 else
6833 t = integer_minus_one_node;
6834 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6835 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6836 &args);
6837
6838 /* Add HSA-specific grid sizes, if available. */
6839 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6840 OMP_CLAUSE__GRIDDIM_))
6841 {
01914336
MJ
6842 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6843 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
629b3d75
MJ
6844 args.quick_push (t);
6845 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6846 }
6847
6848 /* Produce more, perhaps device specific, arguments here. */
6849
6850 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6851 args.length () + 1),
6852 ".omp_target_args");
6853 for (unsigned i = 0; i < args.length (); i++)
6854 {
6855 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6856 build_int_cst (integer_type_node, i),
6857 NULL_TREE, NULL_TREE);
6858 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6859 GSI_SAME_STMT);
6860 }
6861 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6862 build_int_cst (integer_type_node, args.length ()),
6863 NULL_TREE, NULL_TREE);
6864 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6865 GSI_SAME_STMT);
6866 TREE_ADDRESSABLE (argarray) = 1;
6867 return build_fold_addr_expr (argarray);
6868}
6869
6870/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6871
6872static void
6873expand_omp_target (struct omp_region *region)
6874{
6875 basic_block entry_bb, exit_bb, new_bb;
6876 struct function *child_cfun;
6877 tree child_fn, block, t;
6878 gimple_stmt_iterator gsi;
6879 gomp_target *entry_stmt;
6880 gimple *stmt;
6881 edge e;
6882 bool offloaded, data_region;
6883
6884 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6885 new_bb = region->entry;
6886
6887 offloaded = is_gimple_omp_offloaded (entry_stmt);
6888 switch (gimple_omp_target_kind (entry_stmt))
6889 {
6890 case GF_OMP_TARGET_KIND_REGION:
6891 case GF_OMP_TARGET_KIND_UPDATE:
6892 case GF_OMP_TARGET_KIND_ENTER_DATA:
6893 case GF_OMP_TARGET_KIND_EXIT_DATA:
6894 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6895 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6896 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6897 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6898 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6899 data_region = false;
6900 break;
6901 case GF_OMP_TARGET_KIND_DATA:
6902 case GF_OMP_TARGET_KIND_OACC_DATA:
6903 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6904 data_region = true;
6905 break;
6906 default:
6907 gcc_unreachable ();
6908 }
6909
6910 child_fn = NULL_TREE;
6911 child_cfun = NULL;
6912 if (offloaded)
6913 {
6914 child_fn = gimple_omp_target_child_fn (entry_stmt);
6915 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6916 }
6917
6918 /* Supported by expand_omp_taskreg, but not here. */
6919 if (child_cfun != NULL)
6920 gcc_checking_assert (!child_cfun->cfg);
6921 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6922
6923 entry_bb = region->entry;
6924 exit_bb = region->exit;
6925
6926 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6927 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6928
6929 if (offloaded)
6930 {
6931 unsigned srcidx, dstidx, num;
6932
6933 /* If the offloading region needs data sent from the parent
6934 function, then the very first statement (except possible
6935 tree profile counter updates) of the offloading body
6936 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6937 &.OMP_DATA_O is passed as an argument to the child function,
6938 we need to replace it with the argument as seen by the child
6939 function.
6940
6941 In most cases, this will end up being the identity assignment
6942 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6943 a function call that has been inlined, the original PARM_DECL
6944 .OMP_DATA_I may have been converted into a different local
6945 variable. In which case, we need to keep the assignment. */
6946 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6947 if (data_arg)
6948 {
6949 basic_block entry_succ_bb = single_succ (entry_bb);
6950 gimple_stmt_iterator gsi;
6951 tree arg;
6952 gimple *tgtcopy_stmt = NULL;
6953 tree sender = TREE_VEC_ELT (data_arg, 0);
6954
6955 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6956 {
6957 gcc_assert (!gsi_end_p (gsi));
6958 stmt = gsi_stmt (gsi);
6959 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6960 continue;
6961
6962 if (gimple_num_ops (stmt) == 2)
6963 {
6964 tree arg = gimple_assign_rhs1 (stmt);
6965
6966 /* We're ignoring the subcode because we're
6967 effectively doing a STRIP_NOPS. */
6968
6969 if (TREE_CODE (arg) == ADDR_EXPR
6970 && TREE_OPERAND (arg, 0) == sender)
6971 {
6972 tgtcopy_stmt = stmt;
6973 break;
6974 }
6975 }
6976 }
6977
6978 gcc_assert (tgtcopy_stmt != NULL);
6979 arg = DECL_ARGUMENTS (child_fn);
6980
6981 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6982 gsi_remove (&gsi, true);
6983 }
6984
6985 /* Declare local variables needed in CHILD_CFUN. */
6986 block = DECL_INITIAL (child_fn);
6987 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6988 /* The gimplifier could record temporaries in the offloading block
6989 rather than in containing function's local_decls chain,
6990 which would mean cgraph missed finalizing them. Do it now. */
6991 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6992 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6993 varpool_node::finalize_decl (t);
6994 DECL_SAVED_TREE (child_fn) = NULL;
6995 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6996 gimple_set_body (child_fn, NULL);
6997 TREE_USED (block) = 1;
6998
6999 /* Reset DECL_CONTEXT on function arguments. */
7000 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7001 DECL_CONTEXT (t) = child_fn;
7002
7003 /* Split ENTRY_BB at GIMPLE_*,
7004 so that it can be moved to the child function. */
7005 gsi = gsi_last_bb (entry_bb);
7006 stmt = gsi_stmt (gsi);
7007 gcc_assert (stmt
7008 && gimple_code (stmt) == gimple_code (entry_stmt));
7009 e = split_block (entry_bb, stmt);
7010 gsi_remove (&gsi, true);
7011 entry_bb = e->dest;
7012 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7013
7014 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7015 if (exit_bb)
7016 {
7017 gsi = gsi_last_bb (exit_bb);
7018 gcc_assert (!gsi_end_p (gsi)
7019 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7020 stmt = gimple_build_return (NULL);
7021 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7022 gsi_remove (&gsi, true);
7023 }
7024
5c628c3e
RB
7025 /* Make sure to generate early debug for the function before
7026 outlining anything. */
7027 if (! gimple_in_ssa_p (cfun))
7028 (*debug_hooks->early_global_decl) (cfun->decl);
7029
629b3d75
MJ
7030 /* Move the offloading region into CHILD_CFUN. */
7031
7032 block = gimple_block (entry_stmt);
7033
7034 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7035 if (exit_bb)
7036 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7037 /* When the OMP expansion process cannot guarantee an up-to-date
7038 loop tree arrange for the child function to fixup loops. */
7039 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7040 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7041
7042 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7043 num = vec_safe_length (child_cfun->local_decls);
7044 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7045 {
7046 t = (*child_cfun->local_decls)[srcidx];
7047 if (DECL_CONTEXT (t) == cfun->decl)
7048 continue;
7049 if (srcidx != dstidx)
7050 (*child_cfun->local_decls)[dstidx] = t;
7051 dstidx++;
7052 }
7053 if (dstidx != num)
7054 vec_safe_truncate (child_cfun->local_decls, dstidx);
7055
7056 /* Inform the callgraph about the new function. */
7057 child_cfun->curr_properties = cfun->curr_properties;
7058 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7059 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7060 cgraph_node *node = cgraph_node::get_create (child_fn);
7061 node->parallelized_function = 1;
7062 cgraph_node::add_new_function (child_fn, true);
7063
7064 /* Add the new function to the offload table. */
7065 if (ENABLE_OFFLOADING)
7066 vec_safe_push (offload_funcs, child_fn);
7067
7068 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7069 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7070
7071 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7072 fixed in a following pass. */
7073 push_cfun (child_cfun);
7074 if (need_asm)
9579db35 7075 assign_assembler_name_if_needed (child_fn);
629b3d75
MJ
7076 cgraph_edge::rebuild_edges ();
7077
7078 /* Some EH regions might become dead, see PR34608. If
7079 pass_cleanup_cfg isn't the first pass to happen with the
7080 new child, these dead EH edges might cause problems.
7081 Clean them up now. */
7082 if (flag_exceptions)
7083 {
7084 basic_block bb;
7085 bool changed = false;
7086
7087 FOR_EACH_BB_FN (bb, cfun)
7088 changed |= gimple_purge_dead_eh_edges (bb);
7089 if (changed)
7090 cleanup_tree_cfg ();
7091 }
7092 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7093 verify_loop_structure ();
7094 pop_cfun ();
7095
7096 if (dump_file && !gimple_in_ssa_p (cfun))
7097 {
7098 omp_any_child_fn_dumped = true;
7099 dump_function_header (dump_file, child_fn, dump_flags);
7100 dump_function_to_file (child_fn, dump_file, dump_flags);
7101 }
7102 }
7103
7104 /* Emit a library call to launch the offloading region, or do data
7105 transfers. */
7106 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7107 enum built_in_function start_ix;
7108 location_t clause_loc;
7109 unsigned int flags_i = 0;
7110 bool oacc_kernels_p = false;
7111
7112 switch (gimple_omp_target_kind (entry_stmt))
7113 {
7114 case GF_OMP_TARGET_KIND_REGION:
7115 start_ix = BUILT_IN_GOMP_TARGET;
7116 break;
7117 case GF_OMP_TARGET_KIND_DATA:
7118 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7119 break;
7120 case GF_OMP_TARGET_KIND_UPDATE:
7121 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7122 break;
7123 case GF_OMP_TARGET_KIND_ENTER_DATA:
7124 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7125 break;
7126 case GF_OMP_TARGET_KIND_EXIT_DATA:
7127 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7128 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7129 break;
7130 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7131 oacc_kernels_p = true;
7132 /* FALLTHROUGH */
7133 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7134 start_ix = BUILT_IN_GOACC_PARALLEL;
7135 break;
7136 case GF_OMP_TARGET_KIND_OACC_DATA:
7137 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7138 start_ix = BUILT_IN_GOACC_DATA_START;
7139 break;
7140 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7141 start_ix = BUILT_IN_GOACC_UPDATE;
7142 break;
7143 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7144 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7145 break;
7146 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7147 start_ix = BUILT_IN_GOACC_DECLARE;
7148 break;
7149 default:
7150 gcc_unreachable ();
7151 }
7152
7153 clauses = gimple_omp_target_clauses (entry_stmt);
7154
7155 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7156 library choose) and there is no conditional. */
7157 cond = NULL_TREE;
7158 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7159
7160 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7161 if (c)
7162 cond = OMP_CLAUSE_IF_EXPR (c);
7163
7164 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7165 if (c)
7166 {
7167 /* Even if we pass it to all library function calls, it is currently only
7168 defined/used for the OpenMP target ones. */
7169 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7170 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7171 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7172 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7173
7174 device = OMP_CLAUSE_DEVICE_ID (c);
7175 clause_loc = OMP_CLAUSE_LOCATION (c);
7176 }
7177 else
7178 clause_loc = gimple_location (entry_stmt);
7179
7180 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7181 if (c)
7182 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7183
7184 /* Ensure 'device' is of the correct type. */
7185 device = fold_convert_loc (clause_loc, integer_type_node, device);
7186
7187 /* If we found the clause 'if (cond)', build
7188 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7189 if (cond)
7190 {
7191 cond = gimple_boolify (cond);
7192
7193 basic_block cond_bb, then_bb, else_bb;
7194 edge e;
7195 tree tmp_var;
7196
7197 tmp_var = create_tmp_var (TREE_TYPE (device));
7198 if (offloaded)
7199 e = split_block_after_labels (new_bb);
7200 else
7201 {
7202 gsi = gsi_last_bb (new_bb);
7203 gsi_prev (&gsi);
7204 e = split_block (new_bb, gsi_stmt (gsi));
7205 }
7206 cond_bb = e->src;
7207 new_bb = e->dest;
7208 remove_edge (e);
7209
7210 then_bb = create_empty_bb (cond_bb);
7211 else_bb = create_empty_bb (then_bb);
7212 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7213 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7214
7215 stmt = gimple_build_cond_empty (cond);
7216 gsi = gsi_last_bb (cond_bb);
7217 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7218
7219 gsi = gsi_start_bb (then_bb);
7220 stmt = gimple_build_assign (tmp_var, device);
7221 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7222
7223 gsi = gsi_start_bb (else_bb);
7224 stmt = gimple_build_assign (tmp_var,
7225 build_int_cst (integer_type_node,
7226 GOMP_DEVICE_HOST_FALLBACK));
7227 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7228
7229 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7230 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7231 add_bb_to_loop (then_bb, cond_bb->loop_father);
7232 add_bb_to_loop (else_bb, cond_bb->loop_father);
7233 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7234 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7235
7236 device = tmp_var;
7237 gsi = gsi_last_bb (new_bb);
7238 }
7239 else
7240 {
7241 gsi = gsi_last_bb (new_bb);
7242 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7243 true, GSI_SAME_STMT);
7244 }
7245
7246 t = gimple_omp_target_data_arg (entry_stmt);
7247 if (t == NULL)
7248 {
7249 t1 = size_zero_node;
7250 t2 = build_zero_cst (ptr_type_node);
7251 t3 = t2;
7252 t4 = t2;
7253 }
7254 else
7255 {
7256 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7257 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7258 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7259 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7260 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7261 }
7262
7263 gimple *g;
7264 bool tagging = false;
7265 /* The maximum number used by any start_ix, without varargs. */
7266 auto_vec<tree, 11> args;
7267 args.quick_push (device);
7268 if (offloaded)
7269 args.quick_push (build_fold_addr_expr (child_fn));
7270 args.quick_push (t1);
7271 args.quick_push (t2);
7272 args.quick_push (t3);
7273 args.quick_push (t4);
7274 switch (start_ix)
7275 {
7276 case BUILT_IN_GOACC_DATA_START:
7277 case BUILT_IN_GOACC_DECLARE:
7278 case BUILT_IN_GOMP_TARGET_DATA:
7279 break;
7280 case BUILT_IN_GOMP_TARGET:
7281 case BUILT_IN_GOMP_TARGET_UPDATE:
7282 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7283 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7284 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7285 if (c)
7286 depend = OMP_CLAUSE_DECL (c);
7287 else
7288 depend = build_int_cst (ptr_type_node, 0);
7289 args.quick_push (depend);
7290 if (start_ix == BUILT_IN_GOMP_TARGET)
7291 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7292 break;
7293 case BUILT_IN_GOACC_PARALLEL:
7294 {
7295 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7296 tagging = true;
7297 }
7298 /* FALLTHRU */
7299 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7300 case BUILT_IN_GOACC_UPDATE:
7301 {
7302 tree t_async = NULL_TREE;
7303
7304 /* If present, use the value specified by the respective
7305 clause, making sure that is of the correct type. */
7306 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7307 if (c)
7308 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7309 integer_type_node,
7310 OMP_CLAUSE_ASYNC_EXPR (c));
7311 else if (!tagging)
7312 /* Default values for t_async. */
7313 t_async = fold_convert_loc (gimple_location (entry_stmt),
7314 integer_type_node,
7315 build_int_cst (integer_type_node,
7316 GOMP_ASYNC_SYNC));
7317 if (tagging && t_async)
7318 {
7319 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7320
7321 if (TREE_CODE (t_async) == INTEGER_CST)
7322 {
7323 /* See if we can pack the async arg in to the tag's
7324 operand. */
7325 i_async = TREE_INT_CST_LOW (t_async);
7326 if (i_async < GOMP_LAUNCH_OP_MAX)
7327 t_async = NULL_TREE;
7328 else
7329 i_async = GOMP_LAUNCH_OP_MAX;
7330 }
7331 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7332 i_async));
7333 }
7334 if (t_async)
7335 args.safe_push (t_async);
7336
7337 /* Save the argument index, and ... */
7338 unsigned t_wait_idx = args.length ();
7339 unsigned num_waits = 0;
7340 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7341 if (!tagging || c)
7342 /* ... push a placeholder. */
7343 args.safe_push (integer_zero_node);
7344
7345 for (; c; c = OMP_CLAUSE_CHAIN (c))
7346 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7347 {
7348 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7349 integer_type_node,
7350 OMP_CLAUSE_WAIT_EXPR (c)));
7351 num_waits++;
7352 }
7353
7354 if (!tagging || num_waits)
7355 {
7356 tree len;
7357
7358 /* Now that we know the number, update the placeholder. */
7359 if (tagging)
7360 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7361 else
7362 len = build_int_cst (integer_type_node, num_waits);
7363 len = fold_convert_loc (gimple_location (entry_stmt),
7364 unsigned_type_node, len);
7365 args[t_wait_idx] = len;
7366 }
7367 }
7368 break;
7369 default:
7370 gcc_unreachable ();
7371 }
7372 if (tagging)
7373 /* Push terminal marker - zero. */
7374 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7375
7376 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7377 gimple_set_location (g, gimple_location (entry_stmt));
7378 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7379 if (!offloaded)
7380 {
7381 g = gsi_stmt (gsi);
7382 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7383 gsi_remove (&gsi, true);
7384 }
7385 if (data_region && region->exit)
7386 {
7387 gsi = gsi_last_bb (region->exit);
7388 g = gsi_stmt (gsi);
7389 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7390 gsi_remove (&gsi, true);
7391 }
7392}
7393
7394/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7395 iteration variable derived from the thread number. INTRA_GROUP means this
7396 is an expansion of a loop iterating over work-items within a separate
01914336 7397 iteration over groups. */
629b3d75
MJ
7398
7399static void
7400grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7401{
7402 gimple_stmt_iterator gsi;
7403 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7404 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7405 == GF_OMP_FOR_KIND_GRID_LOOP);
7406 size_t collapse = gimple_omp_for_collapse (for_stmt);
7407 struct omp_for_data_loop *loops
7408 = XALLOCAVEC (struct omp_for_data_loop,
01914336 7409 gimple_omp_for_collapse (for_stmt));
629b3d75
MJ
7410 struct omp_for_data fd;
7411
7412 remove_edge (BRANCH_EDGE (kfor->entry));
7413 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7414
7415 gcc_assert (kfor->cont);
7416 omp_extract_for_data (for_stmt, &fd, loops);
7417
7418 gsi = gsi_start_bb (body_bb);
7419
7420 for (size_t dim = 0; dim < collapse; dim++)
7421 {
7422 tree type, itype;
7423 itype = type = TREE_TYPE (fd.loops[dim].v);
7424 if (POINTER_TYPE_P (type))
7425 itype = signed_type_for (type);
7426
7427 tree n1 = fd.loops[dim].n1;
7428 tree step = fd.loops[dim].step;
7429 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7430 true, NULL_TREE, true, GSI_SAME_STMT);
7431 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7432 true, NULL_TREE, true, GSI_SAME_STMT);
7433 tree threadid;
7434 if (gimple_omp_for_grid_group_iter (for_stmt))
7435 {
7436 gcc_checking_assert (!intra_group);
7437 threadid = build_call_expr (builtin_decl_explicit
7438 (BUILT_IN_HSA_WORKGROUPID), 1,
7439 build_int_cstu (unsigned_type_node, dim));
7440 }
7441 else if (intra_group)
7442 threadid = build_call_expr (builtin_decl_explicit
7443 (BUILT_IN_HSA_WORKITEMID), 1,
7444 build_int_cstu (unsigned_type_node, dim));
7445 else
7446 threadid = build_call_expr (builtin_decl_explicit
7447 (BUILT_IN_HSA_WORKITEMABSID), 1,
7448 build_int_cstu (unsigned_type_node, dim));
7449 threadid = fold_convert (itype, threadid);
7450 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7451 true, GSI_SAME_STMT);
7452
7453 tree startvar = fd.loops[dim].v;
7454 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7455 if (POINTER_TYPE_P (type))
7456 t = fold_build_pointer_plus (n1, t);
7457 else
7458 t = fold_build2 (PLUS_EXPR, type, t, n1);
7459 t = fold_convert (type, t);
7460 t = force_gimple_operand_gsi (&gsi, t,
7461 DECL_P (startvar)
7462 && TREE_ADDRESSABLE (startvar),
7463 NULL_TREE, true, GSI_SAME_STMT);
7464 gassign *assign_stmt = gimple_build_assign (startvar, t);
7465 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7466 }
01914336 7467 /* Remove the omp for statement. */
629b3d75
MJ
7468 gsi = gsi_last_bb (kfor->entry);
7469 gsi_remove (&gsi, true);
7470
7471 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7472 gsi = gsi_last_bb (kfor->cont);
7473 gcc_assert (!gsi_end_p (gsi)
7474 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7475 gsi_remove (&gsi, true);
7476
7477 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7478 gsi = gsi_last_bb (kfor->exit);
7479 gcc_assert (!gsi_end_p (gsi)
7480 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7481 if (intra_group)
7482 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7483 gsi_remove (&gsi, true);
7484
7485 /* Fixup the much simpler CFG. */
7486 remove_edge (find_edge (kfor->cont, body_bb));
7487
7488 if (kfor->cont != body_bb)
7489 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7490 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7491}
7492
7493/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7494 argument_decls. */
7495
7496struct grid_arg_decl_map
7497{
7498 tree old_arg;
7499 tree new_arg;
7500};
7501
7502/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7503 pertaining to kernel function. */
7504
7505static tree
7506grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7507{
7508 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7509 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7510 tree t = *tp;
7511
7512 if (t == adm->old_arg)
7513 *tp = adm->new_arg;
7514 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7515 return NULL_TREE;
7516}
7517
7518/* If TARGET region contains a kernel body for loop, remove its region from the
01914336 7519 TARGET and expand it in HSA gridified kernel fashion. */
629b3d75
MJ
7520
7521static void
7522grid_expand_target_grid_body (struct omp_region *target)
7523{
7524 if (!hsa_gen_requested_p ())
7525 return;
7526
7527 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7528 struct omp_region **pp;
7529
7530 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7531 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7532 break;
7533
7534 struct omp_region *gpukernel = *pp;
7535
7536 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7537 if (!gpukernel)
7538 {
7539 /* HSA cannot handle OACC stuff. */
7540 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7541 return;
7542 gcc_checking_assert (orig_child_fndecl);
7543 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7544 OMP_CLAUSE__GRIDDIM_));
7545 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7546
7547 hsa_register_kernel (n);
7548 return;
7549 }
7550
7551 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7552 OMP_CLAUSE__GRIDDIM_));
01914336
MJ
7553 tree inside_block
7554 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
629b3d75
MJ
7555 *pp = gpukernel->next;
7556 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7557 if ((*pp)->type == GIMPLE_OMP_FOR)
7558 break;
7559
7560 struct omp_region *kfor = *pp;
7561 gcc_assert (kfor);
7562 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7563 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7564 *pp = kfor->next;
7565 if (kfor->inner)
7566 {
7567 if (gimple_omp_for_grid_group_iter (for_stmt))
7568 {
7569 struct omp_region **next_pp;
7570 for (pp = &kfor->inner; *pp; pp = next_pp)
7571 {
7572 next_pp = &(*pp)->next;
7573 if ((*pp)->type != GIMPLE_OMP_FOR)
7574 continue;
7575 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7576 gcc_assert (gimple_omp_for_kind (inner)
7577 == GF_OMP_FOR_KIND_GRID_LOOP);
7578 grid_expand_omp_for_loop (*pp, true);
7579 *pp = (*pp)->next;
7580 next_pp = pp;
7581 }
7582 }
7583 expand_omp (kfor->inner);
7584 }
7585 if (gpukernel->inner)
7586 expand_omp (gpukernel->inner);
7587
7588 tree kern_fndecl = copy_node (orig_child_fndecl);
7589 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7590 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7591 tree tgtblock = gimple_block (tgt_stmt);
7592 tree fniniblock = make_node (BLOCK);
7593 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7594 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7595 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7596 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7597 DECL_INITIAL (kern_fndecl) = fniniblock;
7598 push_struct_function (kern_fndecl);
7599 cfun->function_end_locus = gimple_location (tgt_stmt);
7600 init_tree_ssa (cfun);
7601 pop_cfun ();
7602
5c628c3e
RB
7603 /* Make sure to generate early debug for the function before
7604 outlining anything. */
7605 if (! gimple_in_ssa_p (cfun))
7606 (*debug_hooks->early_global_decl) (cfun->decl);
7607
629b3d75
MJ
7608 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7609 gcc_assert (!DECL_CHAIN (old_parm_decl));
7610 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7611 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7612 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7613 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7614 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7615 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7616 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7617 kern_cfun->curr_properties = cfun->curr_properties;
7618
7619 grid_expand_omp_for_loop (kfor, false);
7620
01914336 7621 /* Remove the omp for statement. */
629b3d75
MJ
7622 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7623 gsi_remove (&gsi, true);
7624 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7625 return. */
7626 gsi = gsi_last_bb (gpukernel->exit);
7627 gcc_assert (!gsi_end_p (gsi)
7628 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7629 gimple *ret_stmt = gimple_build_return (NULL);
7630 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7631 gsi_remove (&gsi, true);
7632
7633 /* Statements in the first BB in the target construct have been produced by
7634 target lowering and must be copied inside the GPUKERNEL, with the two
7635 exceptions of the first OMP statement and the OMP_DATA assignment
7636 statement. */
7637 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7638 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7639 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7640 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7641 !gsi_end_p (tsi); gsi_next (&tsi))
7642 {
7643 gimple *stmt = gsi_stmt (tsi);
7644 if (is_gimple_omp (stmt))
7645 break;
7646 if (sender
7647 && is_gimple_assign (stmt)
7648 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7649 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7650 continue;
7651 gimple *copy = gimple_copy (stmt);
7652 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7653 gimple_set_block (copy, fniniblock);
7654 }
7655
7656 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7657 gpukernel->exit, inside_block);
7658
7659 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7660 kcn->mark_force_output ();
7661 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7662
7663 hsa_register_kernel (kcn, orig_child);
7664
7665 cgraph_node::add_new_function (kern_fndecl, true);
7666 push_cfun (kern_cfun);
7667 cgraph_edge::rebuild_edges ();
7668
7669 /* Re-map any mention of the PARM_DECL of the original function to the
7670 PARM_DECL of the new one.
7671
7672 TODO: It would be great if lowering produced references into the GPU
7673 kernel decl straight away and we did not have to do this. */
7674 struct grid_arg_decl_map adm;
7675 adm.old_arg = old_parm_decl;
7676 adm.new_arg = new_parm_decl;
7677 basic_block bb;
7678 FOR_EACH_BB_FN (bb, kern_cfun)
7679 {
7680 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7681 {
7682 gimple *stmt = gsi_stmt (gsi);
7683 struct walk_stmt_info wi;
7684 memset (&wi, 0, sizeof (wi));
7685 wi.info = &adm;
7686 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7687 }
7688 }
7689 pop_cfun ();
7690
7691 return;
7692}
7693
7694/* Expand the parallel region tree rooted at REGION. Expansion
7695 proceeds in depth-first order. Innermost regions are expanded
7696 first. This way, parallel regions that require a new function to
7697 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7698 internal dependencies in their body. */
7699
7700static void
7701expand_omp (struct omp_region *region)
7702{
7703 omp_any_child_fn_dumped = false;
7704 while (region)
7705 {
7706 location_t saved_location;
7707 gimple *inner_stmt = NULL;
7708
7709 /* First, determine whether this is a combined parallel+workshare
01914336 7710 region. */
629b3d75
MJ
7711 if (region->type == GIMPLE_OMP_PARALLEL)
7712 determine_parallel_type (region);
7713 else if (region->type == GIMPLE_OMP_TARGET)
7714 grid_expand_target_grid_body (region);
7715
7716 if (region->type == GIMPLE_OMP_FOR
7717 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7718 inner_stmt = last_stmt (region->inner->entry);
7719
7720 if (region->inner)
7721 expand_omp (region->inner);
7722
7723 saved_location = input_location;
7724 if (gimple_has_location (last_stmt (region->entry)))
7725 input_location = gimple_location (last_stmt (region->entry));
7726
7727 switch (region->type)
7728 {
7729 case GIMPLE_OMP_PARALLEL:
7730 case GIMPLE_OMP_TASK:
7731 expand_omp_taskreg (region);
7732 break;
7733
7734 case GIMPLE_OMP_FOR:
7735 expand_omp_for (region, inner_stmt);
7736 break;
7737
7738 case GIMPLE_OMP_SECTIONS:
7739 expand_omp_sections (region);
7740 break;
7741
7742 case GIMPLE_OMP_SECTION:
7743 /* Individual omp sections are handled together with their
7744 parent GIMPLE_OMP_SECTIONS region. */
7745 break;
7746
7747 case GIMPLE_OMP_SINGLE:
7748 expand_omp_single (region);
7749 break;
7750
7751 case GIMPLE_OMP_ORDERED:
7752 {
7753 gomp_ordered *ord_stmt
7754 = as_a <gomp_ordered *> (last_stmt (region->entry));
7755 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7756 OMP_CLAUSE_DEPEND))
7757 {
7758 /* We'll expand these when expanding corresponding
7759 worksharing region with ordered(n) clause. */
7760 gcc_assert (region->outer
7761 && region->outer->type == GIMPLE_OMP_FOR);
7762 region->ord_stmt = ord_stmt;
7763 break;
7764 }
7765 }
7766 /* FALLTHRU */
7767 case GIMPLE_OMP_MASTER:
7768 case GIMPLE_OMP_TASKGROUP:
7769 case GIMPLE_OMP_CRITICAL:
7770 case GIMPLE_OMP_TEAMS:
7771 expand_omp_synch (region);
7772 break;
7773
7774 case GIMPLE_OMP_ATOMIC_LOAD:
7775 expand_omp_atomic (region);
7776 break;
7777
7778 case GIMPLE_OMP_TARGET:
7779 expand_omp_target (region);
7780 break;
7781
7782 default:
7783 gcc_unreachable ();
7784 }
7785
7786 input_location = saved_location;
7787 region = region->next;
7788 }
7789 if (omp_any_child_fn_dumped)
7790 {
7791 if (dump_file)
7792 dump_function_header (dump_file, current_function_decl, dump_flags);
7793 omp_any_child_fn_dumped = false;
7794 }
7795}
7796
7797/* Helper for build_omp_regions. Scan the dominator tree starting at
7798 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7799 true, the function ends once a single tree is built (otherwise, whole
7800 forest of OMP constructs may be built). */
7801
7802static void
7803build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7804 bool single_tree)
7805{
7806 gimple_stmt_iterator gsi;
7807 gimple *stmt;
7808 basic_block son;
7809
7810 gsi = gsi_last_bb (bb);
7811 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7812 {
7813 struct omp_region *region;
7814 enum gimple_code code;
7815
7816 stmt = gsi_stmt (gsi);
7817 code = gimple_code (stmt);
7818 if (code == GIMPLE_OMP_RETURN)
7819 {
7820 /* STMT is the return point out of region PARENT. Mark it
7821 as the exit point and make PARENT the immediately
7822 enclosing region. */
7823 gcc_assert (parent);
7824 region = parent;
7825 region->exit = bb;
7826 parent = parent->outer;
7827 }
7828 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7829 {
7830 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7831 GIMPLE_OMP_RETURN, but matches with
7832 GIMPLE_OMP_ATOMIC_LOAD. */
7833 gcc_assert (parent);
7834 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7835 region = parent;
7836 region->exit = bb;
7837 parent = parent->outer;
7838 }
7839 else if (code == GIMPLE_OMP_CONTINUE)
7840 {
7841 gcc_assert (parent);
7842 parent->cont = bb;
7843 }
7844 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7845 {
7846 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7847 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7848 }
7849 else
7850 {
7851 region = new_omp_region (bb, code, parent);
7852 /* Otherwise... */
7853 if (code == GIMPLE_OMP_TARGET)
7854 {
7855 switch (gimple_omp_target_kind (stmt))
7856 {
7857 case GF_OMP_TARGET_KIND_REGION:
7858 case GF_OMP_TARGET_KIND_DATA:
7859 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7860 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7861 case GF_OMP_TARGET_KIND_OACC_DATA:
7862 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7863 break;
7864 case GF_OMP_TARGET_KIND_UPDATE:
7865 case GF_OMP_TARGET_KIND_ENTER_DATA:
7866 case GF_OMP_TARGET_KIND_EXIT_DATA:
7867 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7868 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7869 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7870 /* ..., other than for those stand-alone directives... */
7871 region = NULL;
7872 break;
7873 default:
7874 gcc_unreachable ();
7875 }
7876 }
7877 else if (code == GIMPLE_OMP_ORDERED
7878 && omp_find_clause (gimple_omp_ordered_clauses
7879 (as_a <gomp_ordered *> (stmt)),
7880 OMP_CLAUSE_DEPEND))
7881 /* #pragma omp ordered depend is also just a stand-alone
7882 directive. */
7883 region = NULL;
7884 /* ..., this directive becomes the parent for a new region. */
7885 if (region)
7886 parent = region;
7887 }
7888 }
7889
7890 if (single_tree && !parent)
7891 return;
7892
7893 for (son = first_dom_son (CDI_DOMINATORS, bb);
7894 son;
7895 son = next_dom_son (CDI_DOMINATORS, son))
7896 build_omp_regions_1 (son, parent, single_tree);
7897}
7898
7899/* Builds the tree of OMP regions rooted at ROOT, storing it to
7900 root_omp_region. */
7901
7902static void
7903build_omp_regions_root (basic_block root)
7904{
7905 gcc_assert (root_omp_region == NULL);
7906 build_omp_regions_1 (root, NULL, true);
7907 gcc_assert (root_omp_region != NULL);
7908}
7909
7910/* Expands omp construct (and its subconstructs) starting in HEAD. */
7911
7912void
7913omp_expand_local (basic_block head)
7914{
7915 build_omp_regions_root (head);
7916 if (dump_file && (dump_flags & TDF_DETAILS))
7917 {
7918 fprintf (dump_file, "\nOMP region tree\n\n");
7919 dump_omp_region (dump_file, root_omp_region, 0);
7920 fprintf (dump_file, "\n");
7921 }
7922
7923 remove_exit_barriers (root_omp_region);
7924 expand_omp (root_omp_region);
7925
7926 omp_free_regions ();
7927}
7928
7929/* Scan the CFG and build a tree of OMP regions. Return the root of
7930 the OMP region tree. */
7931
7932static void
7933build_omp_regions (void)
7934{
7935 gcc_assert (root_omp_region == NULL);
7936 calculate_dominance_info (CDI_DOMINATORS);
7937 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7938}
7939
7940/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7941
7942static unsigned int
7943execute_expand_omp (void)
7944{
7945 build_omp_regions ();
7946
7947 if (!root_omp_region)
7948 return 0;
7949
7950 if (dump_file)
7951 {
7952 fprintf (dump_file, "\nOMP region tree\n\n");
7953 dump_omp_region (dump_file, root_omp_region, 0);
7954 fprintf (dump_file, "\n");
7955 }
7956
7957 remove_exit_barriers (root_omp_region);
7958
7959 expand_omp (root_omp_region);
7960
7961 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7962 verify_loop_structure ();
7963 cleanup_tree_cfg ();
7964
7965 omp_free_regions ();
7966
7967 return 0;
7968}
7969
7970/* OMP expansion -- the default pass, run before creation of SSA form. */
7971
7972namespace {
7973
7974const pass_data pass_data_expand_omp =
7975{
7976 GIMPLE_PASS, /* type */
7977 "ompexp", /* name */
7978 OPTGROUP_OPENMP, /* optinfo_flags */
7979 TV_NONE, /* tv_id */
7980 PROP_gimple_any, /* properties_required */
7981 PROP_gimple_eomp, /* properties_provided */
7982 0, /* properties_destroyed */
7983 0, /* todo_flags_start */
7984 0, /* todo_flags_finish */
7985};
7986
7987class pass_expand_omp : public gimple_opt_pass
7988{
7989public:
7990 pass_expand_omp (gcc::context *ctxt)
7991 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7992 {}
7993
7994 /* opt_pass methods: */
7995 virtual unsigned int execute (function *)
7996 {
7997 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
7998 || flag_openmp_simd != 0)
7999 && !seen_error ());
8000
8001 /* This pass always runs, to provide PROP_gimple_eomp.
8002 But often, there is nothing to do. */
8003 if (!gate)
8004 return 0;
8005
8006 return execute_expand_omp ();
8007 }
8008
8009}; // class pass_expand_omp
8010
8011} // anon namespace
8012
8013gimple_opt_pass *
8014make_pass_expand_omp (gcc::context *ctxt)
8015{
8016 return new pass_expand_omp (ctxt);
8017}
8018
8019namespace {
8020
8021const pass_data pass_data_expand_omp_ssa =
8022{
8023 GIMPLE_PASS, /* type */
8024 "ompexpssa", /* name */
8025 OPTGROUP_OPENMP, /* optinfo_flags */
8026 TV_NONE, /* tv_id */
8027 PROP_cfg | PROP_ssa, /* properties_required */
8028 PROP_gimple_eomp, /* properties_provided */
8029 0, /* properties_destroyed */
8030 0, /* todo_flags_start */
8031 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8032};
8033
8034class pass_expand_omp_ssa : public gimple_opt_pass
8035{
8036public:
8037 pass_expand_omp_ssa (gcc::context *ctxt)
8038 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8039 {}
8040
8041 /* opt_pass methods: */
8042 virtual bool gate (function *fun)
8043 {
8044 return !(fun->curr_properties & PROP_gimple_eomp);
8045 }
8046 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8047 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8048
8049}; // class pass_expand_omp_ssa
8050
8051} // anon namespace
8052
8053gimple_opt_pass *
8054make_pass_expand_omp_ssa (gcc::context *ctxt)
8055{
8056 return new pass_expand_omp_ssa (ctxt);
8057}
8058
8059/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8060 GIMPLE_* codes. */
8061
8062bool
8063omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8064 int *region_idx)
8065{
8066 gimple *last = last_stmt (bb);
8067 enum gimple_code code = gimple_code (last);
8068 struct omp_region *cur_region = *region;
8069 bool fallthru = false;
8070
8071 switch (code)
8072 {
8073 case GIMPLE_OMP_PARALLEL:
8074 case GIMPLE_OMP_TASK:
8075 case GIMPLE_OMP_FOR:
8076 case GIMPLE_OMP_SINGLE:
8077 case GIMPLE_OMP_TEAMS:
8078 case GIMPLE_OMP_MASTER:
8079 case GIMPLE_OMP_TASKGROUP:
8080 case GIMPLE_OMP_CRITICAL:
8081 case GIMPLE_OMP_SECTION:
8082 case GIMPLE_OMP_GRID_BODY:
8083 cur_region = new_omp_region (bb, code, cur_region);
8084 fallthru = true;
8085 break;
8086
8087 case GIMPLE_OMP_ORDERED:
8088 cur_region = new_omp_region (bb, code, cur_region);
8089 fallthru = true;
8090 if (omp_find_clause (gimple_omp_ordered_clauses
8091 (as_a <gomp_ordered *> (last)),
8092 OMP_CLAUSE_DEPEND))
8093 cur_region = cur_region->outer;
8094 break;
8095
8096 case GIMPLE_OMP_TARGET:
8097 cur_region = new_omp_region (bb, code, cur_region);
8098 fallthru = true;
8099 switch (gimple_omp_target_kind (last))
8100 {
8101 case GF_OMP_TARGET_KIND_REGION:
8102 case GF_OMP_TARGET_KIND_DATA:
8103 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8104 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8105 case GF_OMP_TARGET_KIND_OACC_DATA:
8106 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8107 break;
8108 case GF_OMP_TARGET_KIND_UPDATE:
8109 case GF_OMP_TARGET_KIND_ENTER_DATA:
8110 case GF_OMP_TARGET_KIND_EXIT_DATA:
8111 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8112 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8113 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8114 cur_region = cur_region->outer;
8115 break;
8116 default:
8117 gcc_unreachable ();
8118 }
8119 break;
8120
8121 case GIMPLE_OMP_SECTIONS:
8122 cur_region = new_omp_region (bb, code, cur_region);
8123 fallthru = true;
8124 break;
8125
8126 case GIMPLE_OMP_SECTIONS_SWITCH:
8127 fallthru = false;
8128 break;
8129
8130 case GIMPLE_OMP_ATOMIC_LOAD:
8131 case GIMPLE_OMP_ATOMIC_STORE:
8132 fallthru = true;
8133 break;
8134
8135 case GIMPLE_OMP_RETURN:
8136 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8137 somewhere other than the next block. This will be
8138 created later. */
8139 cur_region->exit = bb;
8140 if (cur_region->type == GIMPLE_OMP_TASK)
8141 /* Add an edge corresponding to not scheduling the task
8142 immediately. */
8143 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8144 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8145 cur_region = cur_region->outer;
8146 break;
8147
8148 case GIMPLE_OMP_CONTINUE:
8149 cur_region->cont = bb;
8150 switch (cur_region->type)
8151 {
8152 case GIMPLE_OMP_FOR:
8153 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8154 succs edges as abnormal to prevent splitting
8155 them. */
8156 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8157 /* Make the loopback edge. */
8158 make_edge (bb, single_succ (cur_region->entry),
8159 EDGE_ABNORMAL);
8160
8161 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8162 corresponds to the case that the body of the loop
8163 is not executed at all. */
8164 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8165 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8166 fallthru = false;
8167 break;
8168
8169 case GIMPLE_OMP_SECTIONS:
8170 /* Wire up the edges into and out of the nested sections. */
8171 {
8172 basic_block switch_bb = single_succ (cur_region->entry);
8173
8174 struct omp_region *i;
8175 for (i = cur_region->inner; i ; i = i->next)
8176 {
8177 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8178 make_edge (switch_bb, i->entry, 0);
8179 make_edge (i->exit, bb, EDGE_FALLTHRU);
8180 }
8181
8182 /* Make the loopback edge to the block with
8183 GIMPLE_OMP_SECTIONS_SWITCH. */
8184 make_edge (bb, switch_bb, 0);
8185
8186 /* Make the edge from the switch to exit. */
8187 make_edge (switch_bb, bb->next_bb, 0);
8188 fallthru = false;
8189 }
8190 break;
8191
8192 case GIMPLE_OMP_TASK:
8193 fallthru = true;
8194 break;
8195
8196 default:
8197 gcc_unreachable ();
8198 }
8199 break;
8200
8201 default:
8202 gcc_unreachable ();
8203 }
8204
8205 if (*region != cur_region)
8206 {
8207 *region = cur_region;
8208 if (cur_region)
8209 *region_idx = cur_region->entry->index;
8210 else
8211 *region_idx = 0;
8212 }
8213
8214 return fallthru;
8215}
8216
8217#include "gt-omp-expand.h"