]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/omp-expand.c
[hsa] Rename hsa.[ch] to hsa-common.[ch]
[thirdparty/gcc.git] / gcc / omp-expand.c
CommitLineData
4954efd4 1/* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
aad93da1 5Copyright (C) 2005-2017 Free Software Foundation, Inc.
4954efd4 6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "memmodel.h"
27#include "backend.h"
28#include "target.h"
29#include "rtl.h"
30#include "tree.h"
31#include "gimple.h"
32#include "cfghooks.h"
33#include "tree-pass.h"
34#include "ssa.h"
35#include "optabs.h"
36#include "cgraph.h"
37#include "pretty-print.h"
38#include "diagnostic-core.h"
39#include "fold-const.h"
40#include "stor-layout.h"
41#include "cfganal.h"
42#include "internal-fn.h"
43#include "gimplify.h"
44#include "gimple-iterator.h"
45#include "gimplify-me.h"
46#include "gimple-walk.h"
47#include "tree-cfg.h"
48#include "tree-into-ssa.h"
49#include "tree-ssa.h"
50#include "splay-tree.h"
51#include "cfgloop.h"
52#include "omp-general.h"
53#include "omp-offload.h"
54#include "tree-cfgcleanup.h"
55#include "symbol-summary.h"
56#include "cilk.h"
57#include "gomp-constants.h"
58#include "gimple-pretty-print.h"
ef2beaf2 59#include "hsa-common.h"
4954efd4 60
61
62/* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66struct omp_region
67{
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106};
107
108static struct omp_region *root_omp_region;
109static bool omp_any_child_fn_dumped;
110
111static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113static gphi *find_phi_with_arg_on_edge (tree, edge);
114static void expand_omp (struct omp_region *region);
115
116/* Return true if REGION is a combined parallel+workshare region. */
117
118static inline bool
119is_combined_parallel (struct omp_region *region)
120{
121 return region->is_combined_parallel;
122}
123
124/* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
7c6746c9 140 # BLOCK 2 (PAR_ENTRY_BB)
4954efd4 141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166static bool
167workshare_safe_to_combine_p (basic_block ws_entry_bb)
168{
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176
177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
178
179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180 return false;
181 if (fd.iter_type != long_integer_type_node)
182 return false;
183
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
188 see through this. */
189 if (!is_gimple_min_invariant (fd.loop.n1)
190 || !is_gimple_min_invariant (fd.loop.n2)
191 || !is_gimple_min_invariant (fd.loop.step)
192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193 return false;
194
195 return true;
196}
197
198/* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
200
201static tree
202omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
203{
204 if (!simd_schedule)
205 return chunk_size;
206
207 int vf = omp_max_vf ();
208 if (vf == 1)
209 return chunk_size;
210
211 tree type = TREE_TYPE (chunk_size);
212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 build_int_cst (type, vf - 1));
214 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 build_int_cst (type, -vf));
216}
217
218/* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
220 expanded. */
221
222static vec<tree, va_gc> *
223get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
224{
225 tree t;
226 location_t loc = gimple_location (ws_stmt);
227 vec<tree, va_gc> *ws_args;
228
229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
230 {
231 struct omp_for_data fd;
232 tree n1, n2;
233
234 omp_extract_for_data (for_stmt, &fd, NULL);
235 n1 = fd.loop.n1;
236 n2 = fd.loop.n2;
237
238 if (gimple_omp_for_combined_into_p (for_stmt))
239 {
240 tree innerc
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 OMP_CLAUSE__LOOPTEMP_);
243 gcc_assert (innerc);
244 n1 = OMP_CLAUSE_DECL (innerc);
245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 OMP_CLAUSE__LOOPTEMP_);
247 gcc_assert (innerc);
248 n2 = OMP_CLAUSE_DECL (innerc);
249 }
250
251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
252
253 t = fold_convert_loc (loc, long_integer_type_node, n1);
254 ws_args->quick_push (t);
255
256 t = fold_convert_loc (loc, long_integer_type_node, n2);
257 ws_args->quick_push (t);
258
259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260 ws_args->quick_push (t);
261
262 if (fd.chunk_size)
263 {
264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 ws_args->quick_push (t);
267 }
268
269 return ws_args;
270 }
271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
272 {
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb = single_succ (gimple_bb (ws_stmt));
277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278 vec_alloc (ws_args, 1);
279 ws_args->quick_push (t);
280 return ws_args;
281 }
282
283 gcc_unreachable ();
284}
285
286/* Discover whether REGION is a combined parallel+workshare region. */
287
288static void
289determine_parallel_type (struct omp_region *region)
290{
291 basic_block par_entry_bb, par_exit_bb;
292 basic_block ws_entry_bb, ws_exit_bb;
293
294 if (region == NULL || region->inner == NULL
295 || region->exit == NULL || region->inner->exit == NULL
296 || region->inner->cont == NULL)
297 return;
298
299 /* We only support parallel+for and parallel+sections. */
300 if (region->type != GIMPLE_OMP_PARALLEL
301 || (region->inner->type != GIMPLE_OMP_FOR
302 && region->inner->type != GIMPLE_OMP_SECTIONS))
303 return;
304
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb = region->entry;
308 par_exit_bb = region->exit;
309 ws_entry_bb = region->inner->entry;
310 ws_exit_bb = region->inner->exit;
311
312 if (single_succ (par_entry_bb) == ws_entry_bb
313 && single_succ (ws_exit_bb) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 || (last_and_only_stmt (ws_entry_bb)
317 && last_and_only_stmt (par_exit_bb))))
318 {
319 gimple *par_stmt = last_stmt (par_entry_bb);
320 gimple *ws_stmt = last_stmt (ws_entry_bb);
321
322 if (region->inner->type == GIMPLE_OMP_FOR)
323 {
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses = gimple_omp_for_clauses (ws_stmt);
334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 if (c == NULL
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 == OMP_CLAUSE_SCHEDULE_STATIC)
338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
339 {
340 region->is_combined_parallel = false;
341 region->inner->is_combined_parallel = false;
342 return;
343 }
344 }
345
346 region->is_combined_parallel = true;
347 region->inner->is_combined_parallel = true;
348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
349 }
350}
351
352/* Debugging dumps for parallel regions. */
353void dump_omp_region (FILE *, struct omp_region *, int);
354void debug_omp_region (struct omp_region *);
355void debug_all_omp_regions (void);
356
357/* Dump the parallel region tree rooted at REGION. */
358
359void
360dump_omp_region (FILE *file, struct omp_region *region, int indent)
361{
362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 gimple_code_name[region->type]);
364
365 if (region->inner)
366 dump_omp_region (file, region->inner, indent + 4);
367
368 if (region->cont)
369 {
370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 region->cont->index);
372 }
373
374 if (region->exit)
375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 region->exit->index);
377 else
378 fprintf (file, "%*s[no exit marker]\n", indent, "");
379
380 if (region->next)
381 dump_omp_region (file, region->next, indent);
382}
383
384DEBUG_FUNCTION void
385debug_omp_region (struct omp_region *region)
386{
387 dump_omp_region (stderr, region, 0);
388}
389
390DEBUG_FUNCTION void
391debug_all_omp_regions (void)
392{
393 dump_omp_region (stderr, root_omp_region, 0);
394}
395
396/* Create a new parallel region starting at STMT inside region PARENT. */
397
398static struct omp_region *
399new_omp_region (basic_block bb, enum gimple_code type,
400 struct omp_region *parent)
401{
402 struct omp_region *region = XCNEW (struct omp_region);
403
404 region->outer = parent;
405 region->entry = bb;
406 region->type = type;
407
408 if (parent)
409 {
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region->next = parent->inner;
413 parent->inner = region;
414 }
415 else
416 {
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region->next = root_omp_region;
420 root_omp_region = region;
421 }
422
423 return region;
424}
425
426/* Release the memory associated with the region tree rooted at REGION. */
427
428static void
429free_omp_region_1 (struct omp_region *region)
430{
431 struct omp_region *i, *n;
432
433 for (i = region->inner; i ; i = n)
434 {
435 n = i->next;
436 free_omp_region_1 (i);
437 }
438
439 free (region);
440}
441
442/* Release the memory for the entire omp region tree. */
443
444void
445omp_free_regions (void)
446{
447 struct omp_region *r, *n;
448 for (r = root_omp_region; r ; r = n)
449 {
450 n = r->next;
451 free_omp_region_1 (r);
452 }
453 root_omp_region = NULL;
454}
455
456/* A convenience function to build an empty GIMPLE_COND with just the
457 condition. */
458
459static gcond *
460gimple_build_cond_empty (tree cond)
461{
462 enum tree_code pred_code;
463 tree lhs, rhs;
464
465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
467}
468
469/* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
471
472static bool
473parallel_needs_hsa_kernel_p (struct omp_region *region)
474{
475 bool indirect = false;
476 for (region = region->outer; region; region = region->outer)
477 {
478 if (region->type == GIMPLE_OMP_PARALLEL)
479 indirect = true;
480 else if (region->type == GIMPLE_OMP_TARGET)
481 {
482 gomp_target *tgt_stmt
483 = as_a <gomp_target *> (last_stmt (region->entry));
484
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 OMP_CLAUSE__GRIDDIM_))
487 return indirect;
488 else
489 return true;
490 }
491 }
492
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl)))
495 return true;
496
497 return false;
498}
499
500/* Build the function calls to GOMP_parallel_start etc to actually
501 generate the parallel operation. REGION is the parallel region
502 being expanded. BB is the block where to insert the code. WS_ARGS
503 will be set if this is a call to a combined parallel+workshare
504 construct, it contains the list of additional arguments needed by
505 the workshare construct. */
506
507static void
508expand_parallel_call (struct omp_region *region, basic_block bb,
509 gomp_parallel *entry_stmt,
510 vec<tree, va_gc> *ws_args)
511{
512 tree t, t1, t2, val, cond, c, clauses, flags;
513 gimple_stmt_iterator gsi;
514 gimple *stmt;
515 enum built_in_function start_ix;
516 int start_ix2;
517 location_t clause_loc;
518 vec<tree, va_gc> *args;
519
520 clauses = gimple_omp_parallel_clauses (entry_stmt);
521
522 /* Determine what flavor of GOMP_parallel we will be
523 emitting. */
524 start_ix = BUILT_IN_GOMP_PARALLEL;
525 if (is_combined_parallel (region))
526 {
527 switch (region->inner->type)
528 {
529 case GIMPLE_OMP_FOR:
530 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
531 switch (region->inner->sched_kind)
532 {
533 case OMP_CLAUSE_SCHEDULE_RUNTIME:
534 start_ix2 = 3;
535 break;
536 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
537 case OMP_CLAUSE_SCHEDULE_GUIDED:
538 if (region->inner->sched_modifiers
539 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
540 {
541 start_ix2 = 3 + region->inner->sched_kind;
542 break;
543 }
544 /* FALLTHRU */
545 default:
546 start_ix2 = region->inner->sched_kind;
547 break;
548 }
549 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
550 start_ix = (enum built_in_function) start_ix2;
551 break;
552 case GIMPLE_OMP_SECTIONS:
553 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
554 break;
555 default:
556 gcc_unreachable ();
557 }
558 }
559
560 /* By default, the value of NUM_THREADS is zero (selected at run time)
561 and there is no conditional. */
562 cond = NULL_TREE;
563 val = build_int_cst (unsigned_type_node, 0);
564 flags = build_int_cst (unsigned_type_node, 0);
565
566 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
567 if (c)
568 cond = OMP_CLAUSE_IF_EXPR (c);
569
570 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
571 if (c)
572 {
573 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
574 clause_loc = OMP_CLAUSE_LOCATION (c);
575 }
576 else
577 clause_loc = gimple_location (entry_stmt);
578
579 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
580 if (c)
581 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
582
583 /* Ensure 'val' is of the correct type. */
584 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
585
586 /* If we found the clause 'if (cond)', build either
587 (cond != 0) or (cond ? val : 1u). */
588 if (cond)
589 {
590 cond = gimple_boolify (cond);
591
592 if (integer_zerop (val))
593 val = fold_build2_loc (clause_loc,
594 EQ_EXPR, unsigned_type_node, cond,
595 build_int_cst (TREE_TYPE (cond), 0));
596 else
597 {
598 basic_block cond_bb, then_bb, else_bb;
599 edge e, e_then, e_else;
600 tree tmp_then, tmp_else, tmp_join, tmp_var;
601
602 tmp_var = create_tmp_var (TREE_TYPE (val));
603 if (gimple_in_ssa_p (cfun))
604 {
605 tmp_then = make_ssa_name (tmp_var);
606 tmp_else = make_ssa_name (tmp_var);
607 tmp_join = make_ssa_name (tmp_var);
608 }
609 else
610 {
611 tmp_then = tmp_var;
612 tmp_else = tmp_var;
613 tmp_join = tmp_var;
614 }
615
616 e = split_block_after_labels (bb);
617 cond_bb = e->src;
618 bb = e->dest;
619 remove_edge (e);
620
621 then_bb = create_empty_bb (cond_bb);
622 else_bb = create_empty_bb (then_bb);
623 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
624 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
625
626 stmt = gimple_build_cond_empty (cond);
627 gsi = gsi_start_bb (cond_bb);
628 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
629
630 gsi = gsi_start_bb (then_bb);
631 expand_omp_build_assign (&gsi, tmp_then, val, true);
632
633 gsi = gsi_start_bb (else_bb);
634 expand_omp_build_assign (&gsi, tmp_else,
635 build_int_cst (unsigned_type_node, 1),
636 true);
637
638 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
639 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
640 add_bb_to_loop (then_bb, cond_bb->loop_father);
641 add_bb_to_loop (else_bb, cond_bb->loop_father);
642 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
643 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
644
645 if (gimple_in_ssa_p (cfun))
646 {
647 gphi *phi = create_phi_node (tmp_join, bb);
648 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
649 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
650 }
651
652 val = tmp_join;
653 }
654
655 gsi = gsi_start_bb (bb);
656 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
657 false, GSI_CONTINUE_LINKING);
658 }
659
660 gsi = gsi_last_bb (bb);
661 t = gimple_omp_parallel_data_arg (entry_stmt);
662 if (t == NULL)
663 t1 = null_pointer_node;
664 else
665 t1 = build_fold_addr_expr (t);
666 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
667 t2 = build_fold_addr_expr (child_fndecl);
668
669 vec_alloc (args, 4 + vec_safe_length (ws_args));
670 args->quick_push (t2);
671 args->quick_push (t1);
672 args->quick_push (val);
673 if (ws_args)
674 args->splice (*ws_args);
675 args->quick_push (flags);
676
677 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
678 builtin_decl_explicit (start_ix), args);
679
680 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
681 false, GSI_CONTINUE_LINKING);
682
683 if (hsa_gen_requested_p ()
684 && parallel_needs_hsa_kernel_p (region))
685 {
686 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
687 hsa_register_kernel (child_cnode);
688 }
689}
690
691/* Insert a function call whose name is FUNC_NAME with the information from
692 ENTRY_STMT into the basic_block BB. */
693
694static void
695expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
696 vec <tree, va_gc> *ws_args)
697{
698 tree t, t1, t2;
699 gimple_stmt_iterator gsi;
700 vec <tree, va_gc> *args;
701
702 gcc_assert (vec_safe_length (ws_args) == 2);
703 tree func_name = (*ws_args)[0];
704 tree grain = (*ws_args)[1];
705
706 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
707 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
708 gcc_assert (count != NULL_TREE);
709 count = OMP_CLAUSE_OPERAND (count, 0);
710
711 gsi = gsi_last_bb (bb);
712 t = gimple_omp_parallel_data_arg (entry_stmt);
713 if (t == NULL)
714 t1 = null_pointer_node;
715 else
716 t1 = build_fold_addr_expr (t);
717 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
718
719 vec_alloc (args, 4);
720 args->quick_push (t2);
721 args->quick_push (t1);
722 args->quick_push (count);
723 args->quick_push (grain);
724 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
725
726 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
727 GSI_CONTINUE_LINKING);
728}
729
730/* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
732
733static void
734expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
736{
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
740
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
742
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
749
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
754
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
760 {
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
779 {
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
782 {
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
785 }
786 else
787 num_tasks = integer_zero_node;
788 }
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
795 }
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
798
799 tree flags = build_int_cst (unsigned_type_node, iflags);
800
801 tree cond = boolean_true_node;
802 if (ifc)
803 {
804 if (taskloop_p)
805 {
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
813 }
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
816 }
817
818 if (finalc)
819 {
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
826 }
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
836
837 gsi = gsi_last_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
849
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
864
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867}
868
869/* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
870
871static tree
872vec2chain (vec<tree, va_gc> *v)
873{
874 tree chain = NULL_TREE, t;
875 unsigned ix;
876
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
878 {
879 DECL_CHAIN (t) = chain;
880 chain = t;
881 }
882
883 return chain;
884}
885
886/* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
891
892static void
893remove_exit_barrier (struct omp_region *region)
894{
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
901
902 exit_bb = region->exit;
903
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
908
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
919
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
921 {
922 gsi = gsi_last_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
928 {
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
938 {
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
944
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
948 {
949 any_addressable_vars = 1;
950 break;
951 }
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
957 {
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
962 {
963 any_addressable_vars = 1;
964 break;
965 }
966 if (block == gimple_block (parallel_stmt))
967 break;
968 }
969 }
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
972 }
973 }
974}
975
976static void
977remove_exit_barriers (struct omp_region *region)
978{
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
981
982 if (region->inner)
983 {
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
987 {
988 region = region->next;
989 remove_exit_barriers (region);
990 }
991 }
992}
993
994/* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1001
1002static void
1003optimize_omp_library_calls (gimple *entry_stmt)
1004{
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1014
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1017 {
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1020
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1026 {
1027 tree built_in;
1028
1029 if (DECL_NAME (decl) == thr_num_id)
1030 {
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1036 }
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1041
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1045
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1048
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1053
1054 gimple_call_set_fndecl (call, built_in);
1055 }
1056 }
1057}
1058
1059/* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1061
1062static tree
1063expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1064{
1065 tree t = *tp;
1066
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1070
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1073
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1076}
1077
1078/* Prepend or append TO = FROM assignment before or after *GSI_P. */
1079
1080static void
1081expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1083{
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1095 {
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1098 }
1099}
1100
1101/* Expand the OpenMP parallel or task directive starting at REGION. */
1102
1103static void
1104expand_omp_taskreg (struct omp_region *region)
1105{
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1113
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1117
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1123
1124 bool is_cilk_for
1125 = (flag_cilkplus
1126 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1127 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1128 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1129
1130 if (is_cilk_for)
1131 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1132 and the inner statement contains the name of the built-in function
1133 and grain. */
1134 ws_args = region->inner->ws_args;
1135 else if (is_combined_parallel (region))
1136 ws_args = region->ws_args;
1137 else
1138 ws_args = NULL;
1139
1140 if (child_cfun->cfg)
1141 {
1142 /* Due to inlining, it may happen that we have already outlined
1143 the region, in which case all we need to do is make the
1144 sub-graph unreachable and emit the parallel call. */
1145 edge entry_succ_e, exit_succ_e;
1146
1147 entry_succ_e = single_succ_edge (entry_bb);
1148
1149 gsi = gsi_last_bb (entry_bb);
1150 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1151 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1152 gsi_remove (&gsi, true);
1153
1154 new_bb = entry_bb;
1155 if (exit_bb)
1156 {
1157 exit_succ_e = single_succ_edge (exit_bb);
1158 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1159 }
1160 remove_edge_and_dominated_blocks (entry_succ_e);
1161 }
1162 else
1163 {
1164 unsigned srcidx, dstidx, num;
1165
1166 /* If the parallel region needs data sent from the parent
1167 function, then the very first statement (except possible
1168 tree profile counter updates) of the parallel body
1169 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1170 &.OMP_DATA_O is passed as an argument to the child function,
1171 we need to replace it with the argument as seen by the child
1172 function.
1173
1174 In most cases, this will end up being the identity assignment
1175 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1176 a function call that has been inlined, the original PARM_DECL
1177 .OMP_DATA_I may have been converted into a different local
1178 variable. In which case, we need to keep the assignment. */
1179 if (gimple_omp_taskreg_data_arg (entry_stmt))
1180 {
1181 basic_block entry_succ_bb
1182 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1183 : FALLTHRU_EDGE (entry_bb)->dest;
1184 tree arg;
1185 gimple *parcopy_stmt = NULL;
1186
1187 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1188 {
1189 gimple *stmt;
1190
1191 gcc_assert (!gsi_end_p (gsi));
1192 stmt = gsi_stmt (gsi);
1193 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1194 continue;
1195
1196 if (gimple_num_ops (stmt) == 2)
1197 {
1198 tree arg = gimple_assign_rhs1 (stmt);
1199
1200 /* We're ignore the subcode because we're
1201 effectively doing a STRIP_NOPS. */
1202
1203 if (TREE_CODE (arg) == ADDR_EXPR
1204 && TREE_OPERAND (arg, 0)
7c6746c9 1205 == gimple_omp_taskreg_data_arg (entry_stmt))
4954efd4 1206 {
1207 parcopy_stmt = stmt;
1208 break;
1209 }
1210 }
1211 }
1212
1213 gcc_assert (parcopy_stmt != NULL);
1214 arg = DECL_ARGUMENTS (child_fn);
1215
1216 if (!gimple_in_ssa_p (cfun))
1217 {
1218 if (gimple_assign_lhs (parcopy_stmt) == arg)
1219 gsi_remove (&gsi, true);
1220 else
1221 {
7c6746c9 1222 /* ?? Is setting the subcode really necessary ?? */
4954efd4 1223 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1224 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1225 }
1226 }
1227 else
1228 {
1229 tree lhs = gimple_assign_lhs (parcopy_stmt);
1230 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1231 /* We'd like to set the rhs to the default def in the child_fn,
1232 but it's too early to create ssa names in the child_fn.
1233 Instead, we set the rhs to the parm. In
1234 move_sese_region_to_fn, we introduce a default def for the
1235 parm, map the parm to it's default def, and once we encounter
1236 this stmt, replace the parm with the default def. */
1237 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1238 update_stmt (parcopy_stmt);
1239 }
1240 }
1241
1242 /* Declare local variables needed in CHILD_CFUN. */
1243 block = DECL_INITIAL (child_fn);
1244 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1245 /* The gimplifier could record temporaries in parallel/task block
1246 rather than in containing function's local_decls chain,
1247 which would mean cgraph missed finalizing them. Do it now. */
1248 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1249 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1250 varpool_node::finalize_decl (t);
1251 DECL_SAVED_TREE (child_fn) = NULL;
1252 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1253 gimple_set_body (child_fn, NULL);
1254 TREE_USED (block) = 1;
1255
1256 /* Reset DECL_CONTEXT on function arguments. */
1257 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1258 DECL_CONTEXT (t) = child_fn;
1259
1260 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1261 so that it can be moved to the child function. */
1262 gsi = gsi_last_bb (entry_bb);
1263 stmt = gsi_stmt (gsi);
1264 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1265 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1266 e = split_block (entry_bb, stmt);
1267 gsi_remove (&gsi, true);
1268 entry_bb = e->dest;
1269 edge e2 = NULL;
1270 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1271 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1272 else
1273 {
1274 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1275 gcc_assert (e2->dest == region->exit);
1276 remove_edge (BRANCH_EDGE (entry_bb));
1277 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1278 gsi = gsi_last_bb (region->exit);
1279 gcc_assert (!gsi_end_p (gsi)
1280 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1281 gsi_remove (&gsi, true);
1282 }
1283
1284 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1285 if (exit_bb)
1286 {
1287 gsi = gsi_last_bb (exit_bb);
1288 gcc_assert (!gsi_end_p (gsi)
1289 && (gimple_code (gsi_stmt (gsi))
1290 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1291 stmt = gimple_build_return (NULL);
1292 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1293 gsi_remove (&gsi, true);
1294 }
1295
1296 /* Move the parallel region into CHILD_CFUN. */
1297
1298 if (gimple_in_ssa_p (cfun))
1299 {
1300 init_tree_ssa (child_cfun);
1301 init_ssa_operands (child_cfun);
1302 child_cfun->gimple_df->in_ssa_p = true;
1303 block = NULL_TREE;
1304 }
1305 else
1306 block = gimple_block (entry_stmt);
1307
1308 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1309 if (exit_bb)
1310 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1311 if (e2)
1312 {
1313 basic_block dest_bb = e2->dest;
1314 if (!exit_bb)
1315 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1316 remove_edge (e2);
1317 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1318 }
1319 /* When the OMP expansion process cannot guarantee an up-to-date
7c6746c9 1320 loop tree arrange for the child function to fixup loops. */
4954efd4 1321 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1322 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1323
1324 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1325 num = vec_safe_length (child_cfun->local_decls);
1326 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1327 {
1328 t = (*child_cfun->local_decls)[srcidx];
1329 if (DECL_CONTEXT (t) == cfun->decl)
1330 continue;
1331 if (srcidx != dstidx)
1332 (*child_cfun->local_decls)[dstidx] = t;
1333 dstidx++;
1334 }
1335 if (dstidx != num)
1336 vec_safe_truncate (child_cfun->local_decls, dstidx);
1337
1338 /* Inform the callgraph about the new function. */
1339 child_cfun->curr_properties = cfun->curr_properties;
1340 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1341 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1342 cgraph_node *node = cgraph_node::get_create (child_fn);
1343 node->parallelized_function = 1;
1344 cgraph_node::add_new_function (child_fn, true);
1345
1346 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1347 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1348
1349 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1350 fixed in a following pass. */
1351 push_cfun (child_cfun);
1352 if (need_asm)
1353 assign_assembler_name_if_neeeded (child_fn);
1354
1355 if (optimize)
1356 optimize_omp_library_calls (entry_stmt);
1357 cgraph_edge::rebuild_edges ();
1358
1359 /* Some EH regions might become dead, see PR34608. If
1360 pass_cleanup_cfg isn't the first pass to happen with the
1361 new child, these dead EH edges might cause problems.
1362 Clean them up now. */
1363 if (flag_exceptions)
1364 {
1365 basic_block bb;
1366 bool changed = false;
1367
1368 FOR_EACH_BB_FN (bb, cfun)
1369 changed |= gimple_purge_dead_eh_edges (bb);
1370 if (changed)
1371 cleanup_tree_cfg ();
1372 }
1373 if (gimple_in_ssa_p (cfun))
1374 update_ssa (TODO_update_ssa);
1375 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1376 verify_loop_structure ();
1377 pop_cfun ();
1378
1379 if (dump_file && !gimple_in_ssa_p (cfun))
1380 {
1381 omp_any_child_fn_dumped = true;
1382 dump_function_header (dump_file, child_fn, dump_flags);
1383 dump_function_to_file (child_fn, dump_file, dump_flags);
1384 }
1385 }
1386
1387 /* Emit a library call to launch the children threads. */
1388 if (is_cilk_for)
1389 expand_cilk_for_call (new_bb,
1390 as_a <gomp_parallel *> (entry_stmt), ws_args);
1391 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1392 expand_parallel_call (region, new_bb,
1393 as_a <gomp_parallel *> (entry_stmt), ws_args);
1394 else
1395 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1396 if (gimple_in_ssa_p (cfun))
1397 update_ssa (TODO_update_ssa_only_virtuals);
1398}
1399
1400/* Information about members of an OpenACC collapsed loop nest. */
1401
1402struct oacc_collapse
1403{
7c6746c9 1404 tree base; /* Base value. */
4954efd4 1405 tree iters; /* Number of steps. */
1406 tree step; /* step size. */
1407};
1408
1409/* Helper for expand_oacc_for. Determine collapsed loop information.
1410 Fill in COUNTS array. Emit any initialization code before GSI.
1411 Return the calculated outer loop bound of BOUND_TYPE. */
1412
1413static tree
1414expand_oacc_collapse_init (const struct omp_for_data *fd,
1415 gimple_stmt_iterator *gsi,
1416 oacc_collapse *counts, tree bound_type)
1417{
1418 tree total = build_int_cst (bound_type, 1);
1419 int ix;
1420
1421 gcc_assert (integer_onep (fd->loop.step));
1422 gcc_assert (integer_zerop (fd->loop.n1));
1423
1424 for (ix = 0; ix != fd->collapse; ix++)
1425 {
1426 const omp_for_data_loop *loop = &fd->loops[ix];
1427
1428 tree iter_type = TREE_TYPE (loop->v);
1429 tree diff_type = iter_type;
1430 tree plus_type = iter_type;
1431
1432 gcc_assert (loop->cond_code == fd->loop.cond_code);
1433
1434 if (POINTER_TYPE_P (iter_type))
1435 plus_type = sizetype;
1436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1437 diff_type = signed_type_for (diff_type);
1438
1439 tree b = loop->n1;
1440 tree e = loop->n2;
1441 tree s = loop->step;
1442 bool up = loop->cond_code == LT_EXPR;
1443 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1444 bool negating;
1445 tree expr;
1446
1447 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1448 true, GSI_SAME_STMT);
1449 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1450 true, GSI_SAME_STMT);
1451
7c6746c9 1452 /* Convert the step, avoiding possible unsigned->signed overflow. */
4954efd4 1453 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1454 if (negating)
1455 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1456 s = fold_convert (diff_type, s);
1457 if (negating)
1458 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1459 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1460 true, GSI_SAME_STMT);
1461
7c6746c9 1462 /* Determine the range, avoiding possible unsigned->signed overflow. */
4954efd4 1463 negating = !up && TYPE_UNSIGNED (iter_type);
1464 expr = fold_build2 (MINUS_EXPR, plus_type,
1465 fold_convert (plus_type, negating ? b : e),
1466 fold_convert (plus_type, negating ? e : b));
1467 expr = fold_convert (diff_type, expr);
1468 if (negating)
1469 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1470 tree range = force_gimple_operand_gsi
1471 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1472
1473 /* Determine number of iterations. */
1474 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1475 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1476 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1477
1478 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1479 true, GSI_SAME_STMT);
1480
1481 counts[ix].base = b;
1482 counts[ix].iters = iters;
1483 counts[ix].step = s;
1484
1485 total = fold_build2 (MULT_EXPR, bound_type, total,
1486 fold_convert (bound_type, iters));
1487 }
1488
1489 return total;
1490}
1491
1492/* Emit initializers for collapsed loop members. IVAR is the outer
1493 loop iteration variable, from which collapsed loop iteration values
1494 are calculated. COUNTS array has been initialized by
1495 expand_oacc_collapse_inits. */
1496
1497static void
1498expand_oacc_collapse_vars (const struct omp_for_data *fd,
1499 gimple_stmt_iterator *gsi,
1500 const oacc_collapse *counts, tree ivar)
1501{
1502 tree ivar_type = TREE_TYPE (ivar);
1503
1504 /* The most rapidly changing iteration variable is the innermost
1505 one. */
1506 for (int ix = fd->collapse; ix--;)
1507 {
1508 const omp_for_data_loop *loop = &fd->loops[ix];
1509 const oacc_collapse *collapse = &counts[ix];
1510 tree iter_type = TREE_TYPE (loop->v);
1511 tree diff_type = TREE_TYPE (collapse->step);
1512 tree plus_type = iter_type;
1513 enum tree_code plus_code = PLUS_EXPR;
1514 tree expr;
1515
1516 if (POINTER_TYPE_P (iter_type))
1517 {
1518 plus_code = POINTER_PLUS_EXPR;
1519 plus_type = sizetype;
1520 }
1521
1522 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
1523 fold_convert (ivar_type, collapse->iters));
1524 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1525 collapse->step);
1526 expr = fold_build2 (plus_code, iter_type, collapse->base,
1527 fold_convert (plus_type, expr));
1528 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1529 true, GSI_SAME_STMT);
1530 gassign *ass = gimple_build_assign (loop->v, expr);
1531 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1532
1533 if (ix)
1534 {
1535 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
1536 fold_convert (ivar_type, collapse->iters));
1537 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1538 true, GSI_SAME_STMT);
1539 }
1540 }
1541}
1542
1543/* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1544 of the combined collapse > 1 loop constructs, generate code like:
1545 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1546 if (cond3 is <)
1547 adj = STEP3 - 1;
1548 else
1549 adj = STEP3 + 1;
1550 count3 = (adj + N32 - N31) / STEP3;
1551 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1552 if (cond2 is <)
1553 adj = STEP2 - 1;
1554 else
1555 adj = STEP2 + 1;
1556 count2 = (adj + N22 - N21) / STEP2;
1557 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1558 if (cond1 is <)
1559 adj = STEP1 - 1;
1560 else
1561 adj = STEP1 + 1;
1562 count1 = (adj + N12 - N11) / STEP1;
1563 count = count1 * count2 * count3;
1564 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1565 count = 0;
1566 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1567 of the combined loop constructs, just initialize COUNTS array
1568 from the _looptemp_ clauses. */
1569
1570/* NOTE: It *could* be better to moosh all of the BBs together,
1571 creating one larger BB with all the computation and the unexpected
1572 jump at the end. I.e.
1573
1574 bool zero3, zero2, zero1, zero;
1575
1576 zero3 = N32 c3 N31;
1577 count3 = (N32 - N31) /[cl] STEP3;
1578 zero2 = N22 c2 N21;
1579 count2 = (N22 - N21) /[cl] STEP2;
1580 zero1 = N12 c1 N11;
1581 count1 = (N12 - N11) /[cl] STEP1;
1582 zero = zero3 || zero2 || zero1;
1583 count = count1 * count2 * count3;
1584 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1585
1586 After all, we expect the zero=false, and thus we expect to have to
1587 evaluate all of the comparison expressions, so short-circuiting
1588 oughtn't be a win. Since the condition isn't protecting a
1589 denominator, we're not concerned about divide-by-zero, so we can
1590 fully evaluate count even if a numerator turned out to be wrong.
1591
1592 It seems like putting this all together would create much better
1593 scheduling opportunities, and less pressure on the chip's branch
1594 predictor. */
1595
1596static void
1597expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1598 basic_block &entry_bb, tree *counts,
1599 basic_block &zero_iter1_bb, int &first_zero_iter1,
1600 basic_block &zero_iter2_bb, int &first_zero_iter2,
1601 basic_block &l2_dom_bb)
1602{
1603 tree t, type = TREE_TYPE (fd->loop.v);
1604 edge e, ne;
1605 int i;
1606
1607 /* Collapsed loops need work for expansion into SSA form. */
1608 gcc_assert (!gimple_in_ssa_p (cfun));
1609
1610 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1611 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1612 {
1613 gcc_assert (fd->ordered == 0);
1614 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1615 isn't supposed to be handled, as the inner loop doesn't
1616 use it. */
1617 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1618 OMP_CLAUSE__LOOPTEMP_);
1619 gcc_assert (innerc);
1620 for (i = 0; i < fd->collapse; i++)
1621 {
1622 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1623 OMP_CLAUSE__LOOPTEMP_);
1624 gcc_assert (innerc);
1625 if (i)
1626 counts[i] = OMP_CLAUSE_DECL (innerc);
1627 else
1628 counts[0] = NULL_TREE;
1629 }
1630 return;
1631 }
1632
1633 for (i = fd->collapse; i < fd->ordered; i++)
1634 {
1635 tree itype = TREE_TYPE (fd->loops[i].v);
1636 counts[i] = NULL_TREE;
1637 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1638 fold_convert (itype, fd->loops[i].n1),
1639 fold_convert (itype, fd->loops[i].n2));
1640 if (t && integer_zerop (t))
1641 {
1642 for (i = fd->collapse; i < fd->ordered; i++)
1643 counts[i] = build_int_cst (type, 0);
1644 break;
1645 }
1646 }
1647 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1648 {
1649 tree itype = TREE_TYPE (fd->loops[i].v);
1650
1651 if (i >= fd->collapse && counts[i])
1652 continue;
1653 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1654 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1655 fold_convert (itype, fd->loops[i].n1),
1656 fold_convert (itype, fd->loops[i].n2)))
1657 == NULL_TREE || !integer_onep (t)))
1658 {
1659 gcond *cond_stmt;
1660 tree n1, n2;
1661 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1662 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1663 true, GSI_SAME_STMT);
1664 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1665 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1666 true, GSI_SAME_STMT);
1667 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1668 NULL_TREE, NULL_TREE);
1669 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1670 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1671 expand_omp_regimplify_p, NULL, NULL)
1672 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1673 expand_omp_regimplify_p, NULL, NULL))
1674 {
1675 *gsi = gsi_for_stmt (cond_stmt);
1676 gimple_regimplify_operands (cond_stmt, gsi);
1677 }
1678 e = split_block (entry_bb, cond_stmt);
1679 basic_block &zero_iter_bb
1680 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1681 int &first_zero_iter
1682 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1683 if (zero_iter_bb == NULL)
1684 {
1685 gassign *assign_stmt;
1686 first_zero_iter = i;
1687 zero_iter_bb = create_empty_bb (entry_bb);
1688 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1689 *gsi = gsi_after_labels (zero_iter_bb);
1690 if (i < fd->collapse)
1691 assign_stmt = gimple_build_assign (fd->loop.n2,
1692 build_zero_cst (type));
1693 else
1694 {
1695 counts[i] = create_tmp_reg (type, ".count");
1696 assign_stmt
1697 = gimple_build_assign (counts[i], build_zero_cst (type));
1698 }
1699 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1700 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1701 entry_bb);
1702 }
1703 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1704 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1705 e->flags = EDGE_TRUE_VALUE;
1706 e->probability = REG_BR_PROB_BASE - ne->probability;
1707 if (l2_dom_bb == NULL)
1708 l2_dom_bb = entry_bb;
1709 entry_bb = e->dest;
1710 *gsi = gsi_last_bb (entry_bb);
1711 }
1712
1713 if (POINTER_TYPE_P (itype))
1714 itype = signed_type_for (itype);
1715 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1716 ? -1 : 1));
1717 t = fold_build2 (PLUS_EXPR, itype,
1718 fold_convert (itype, fd->loops[i].step), t);
1719 t = fold_build2 (PLUS_EXPR, itype, t,
1720 fold_convert (itype, fd->loops[i].n2));
1721 t = fold_build2 (MINUS_EXPR, itype, t,
1722 fold_convert (itype, fd->loops[i].n1));
1723 /* ?? We could probably use CEIL_DIV_EXPR instead of
1724 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1725 generate the same code in the end because generically we
1726 don't know that the values involved must be negative for
1727 GT?? */
1728 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1729 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1730 fold_build1 (NEGATE_EXPR, itype, t),
1731 fold_build1 (NEGATE_EXPR, itype,
1732 fold_convert (itype,
1733 fd->loops[i].step)));
1734 else
1735 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1736 fold_convert (itype, fd->loops[i].step));
1737 t = fold_convert (type, t);
1738 if (TREE_CODE (t) == INTEGER_CST)
1739 counts[i] = t;
1740 else
1741 {
1742 if (i < fd->collapse || i != first_zero_iter2)
1743 counts[i] = create_tmp_reg (type, ".count");
1744 expand_omp_build_assign (gsi, counts[i], t);
1745 }
1746 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1747 {
1748 if (i == 0)
1749 t = counts[0];
1750 else
1751 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1752 expand_omp_build_assign (gsi, fd->loop.n2, t);
1753 }
1754 }
1755}
1756
1757/* Helper function for expand_omp_{for_*,simd}. Generate code like:
1758 T = V;
1759 V3 = N31 + (T % count3) * STEP3;
1760 T = T / count3;
1761 V2 = N21 + (T % count2) * STEP2;
1762 T = T / count2;
1763 V1 = N11 + T * STEP1;
1764 if this loop doesn't have an inner loop construct combined with it.
1765 If it does have an inner loop construct combined with it and the
1766 iteration count isn't known constant, store values from counts array
1767 into its _looptemp_ temporaries instead. */
1768
1769static void
1770expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1771 tree *counts, gimple *inner_stmt, tree startvar)
1772{
1773 int i;
1774 if (gimple_omp_for_combined_p (fd->for_stmt))
1775 {
1776 /* If fd->loop.n2 is constant, then no propagation of the counts
1777 is needed, they are constant. */
1778 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1779 return;
1780
1781 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1782 ? gimple_omp_taskreg_clauses (inner_stmt)
1783 : gimple_omp_for_clauses (inner_stmt);
1784 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1785 isn't supposed to be handled, as the inner loop doesn't
1786 use it. */
1787 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1790 {
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 {
1796 tree tem = OMP_CLAUSE_DECL (innerc);
1797 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1798 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1799 false, GSI_CONTINUE_LINKING);
1800 gassign *stmt = gimple_build_assign (tem, t);
1801 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1802 }
1803 }
1804 return;
1805 }
1806
1807 tree type = TREE_TYPE (fd->loop.v);
1808 tree tem = create_tmp_reg (type, ".tem");
1809 gassign *stmt = gimple_build_assign (tem, startvar);
1810 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1811
1812 for (i = fd->collapse - 1; i >= 0; i--)
1813 {
1814 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1815 itype = vtype;
1816 if (POINTER_TYPE_P (vtype))
1817 itype = signed_type_for (vtype);
1818 if (i != 0)
1819 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1820 else
1821 t = tem;
1822 t = fold_convert (itype, t);
1823 t = fold_build2 (MULT_EXPR, itype, t,
1824 fold_convert (itype, fd->loops[i].step));
1825 if (POINTER_TYPE_P (vtype))
1826 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1827 else
1828 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1829 t = force_gimple_operand_gsi (gsi, t,
1830 DECL_P (fd->loops[i].v)
1831 && TREE_ADDRESSABLE (fd->loops[i].v),
1832 NULL_TREE, false,
1833 GSI_CONTINUE_LINKING);
1834 stmt = gimple_build_assign (fd->loops[i].v, t);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1836 if (i != 0)
1837 {
1838 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1843 }
1844 }
1845}
1846
1847/* Helper function for expand_omp_for_*. Generate code like:
1848 L10:
1849 V3 += STEP3;
1850 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1851 L11:
1852 V3 = N31;
1853 V2 += STEP2;
1854 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1855 L12:
1856 V2 = N21;
1857 V1 += STEP1;
1858 goto BODY_BB; */
1859
1860static basic_block
1861extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1862 basic_block body_bb)
1863{
1864 basic_block last_bb, bb, collapse_bb = NULL;
1865 int i;
1866 gimple_stmt_iterator gsi;
1867 edge e;
1868 tree t;
1869 gimple *stmt;
1870
1871 last_bb = cont_bb;
1872 for (i = fd->collapse - 1; i >= 0; i--)
1873 {
1874 tree vtype = TREE_TYPE (fd->loops[i].v);
1875
1876 bb = create_empty_bb (last_bb);
1877 add_bb_to_loop (bb, last_bb->loop_father);
1878 gsi = gsi_start_bb (bb);
1879
1880 if (i < fd->collapse - 1)
1881 {
1882 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1883 e->probability = REG_BR_PROB_BASE / 8;
1884
1885 t = fd->loops[i + 1].n1;
1886 t = force_gimple_operand_gsi (&gsi, t,
1887 DECL_P (fd->loops[i + 1].v)
1888 && TREE_ADDRESSABLE (fd->loops[i
1889 + 1].v),
1890 NULL_TREE, false,
1891 GSI_CONTINUE_LINKING);
1892 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1893 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1894 }
1895 else
1896 collapse_bb = bb;
1897
1898 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1899
1900 if (POINTER_TYPE_P (vtype))
1901 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1902 else
1903 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1904 t = force_gimple_operand_gsi (&gsi, t,
1905 DECL_P (fd->loops[i].v)
1906 && TREE_ADDRESSABLE (fd->loops[i].v),
1907 NULL_TREE, false, GSI_CONTINUE_LINKING);
1908 stmt = gimple_build_assign (fd->loops[i].v, t);
1909 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1910
1911 if (i > 0)
1912 {
1913 t = fd->loops[i].n2;
1914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1915 false, GSI_CONTINUE_LINKING);
1916 tree v = fd->loops[i].v;
1917 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1918 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1919 false, GSI_CONTINUE_LINKING);
1920 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1921 stmt = gimple_build_cond_empty (t);
1922 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1923 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1924 e->probability = REG_BR_PROB_BASE * 7 / 8;
1925 }
1926 else
1927 make_edge (bb, body_bb, EDGE_FALLTHRU);
1928 last_bb = bb;
1929 }
1930
1931 return collapse_bb;
1932}
1933
1934/* Expand #pragma omp ordered depend(source). */
1935
1936static void
1937expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1938 tree *counts, location_t loc)
1939{
1940 enum built_in_function source_ix
1941 = fd->iter_type == long_integer_type_node
1942 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1943 gimple *g
1944 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1945 build_fold_addr_expr (counts[fd->ordered]));
1946 gimple_set_location (g, loc);
1947 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1948}
1949
1950/* Expand a single depend from #pragma omp ordered depend(sink:...). */
1951
1952static void
1953expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1954 tree *counts, tree c, location_t loc)
1955{
1956 auto_vec<tree, 10> args;
1957 enum built_in_function sink_ix
1958 = fd->iter_type == long_integer_type_node
1959 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1960 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1961 int i;
1962 gimple_stmt_iterator gsi2 = *gsi;
1963 bool warned_step = false;
1964
1965 for (i = 0; i < fd->ordered; i++)
1966 {
1967 tree step = NULL_TREE;
1968 off = TREE_PURPOSE (deps);
1969 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1970 {
1971 step = TREE_OPERAND (off, 1);
1972 off = TREE_OPERAND (off, 0);
1973 }
1974 if (!integer_zerop (off))
1975 {
1976 gcc_assert (fd->loops[i].cond_code == LT_EXPR
1977 || fd->loops[i].cond_code == GT_EXPR);
1978 bool forward = fd->loops[i].cond_code == LT_EXPR;
1979 if (step)
1980 {
1981 /* Non-simple Fortran DO loops. If step is variable,
1982 we don't know at compile even the direction, so can't
1983 warn. */
1984 if (TREE_CODE (step) != INTEGER_CST)
1985 break;
1986 forward = tree_int_cst_sgn (step) != -1;
1987 }
1988 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
1989 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
1990 "lexically later iteration");
1991 break;
1992 }
1993 deps = TREE_CHAIN (deps);
1994 }
1995 /* If all offsets corresponding to the collapsed loops are zero,
1996 this depend clause can be ignored. FIXME: but there is still a
1997 flush needed. We need to emit one __sync_synchronize () for it
1998 though (perhaps conditionally)? Solve this together with the
1999 conservative dependence folding optimization.
2000 if (i >= fd->collapse)
2001 return; */
2002
2003 deps = OMP_CLAUSE_DECL (c);
2004 gsi_prev (&gsi2);
2005 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2006 edge e2 = split_block_after_labels (e1->dest);
2007
2008 gsi2 = gsi_after_labels (e1->dest);
2009 *gsi = gsi_last_bb (e1->src);
2010 for (i = 0; i < fd->ordered; i++)
2011 {
2012 tree itype = TREE_TYPE (fd->loops[i].v);
2013 tree step = NULL_TREE;
2014 tree orig_off = NULL_TREE;
2015 if (POINTER_TYPE_P (itype))
2016 itype = sizetype;
2017 if (i)
2018 deps = TREE_CHAIN (deps);
2019 off = TREE_PURPOSE (deps);
2020 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2021 {
2022 step = TREE_OPERAND (off, 1);
2023 off = TREE_OPERAND (off, 0);
2024 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2025 && integer_onep (fd->loops[i].step)
2026 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2027 }
2028 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2029 if (step)
2030 {
2031 off = fold_convert_loc (loc, itype, off);
2032 orig_off = off;
2033 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2034 }
2035
2036 if (integer_zerop (off))
2037 t = boolean_true_node;
2038 else
2039 {
2040 tree a;
2041 tree co = fold_convert_loc (loc, itype, off);
2042 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2043 {
2044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2045 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2046 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2047 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2048 co);
2049 }
2050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2051 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2052 fd->loops[i].v, co);
2053 else
2054 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2055 fd->loops[i].v, co);
2056 if (step)
2057 {
2058 tree t1, t2;
2059 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2060 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2061 fd->loops[i].n1);
2062 else
2063 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2064 fd->loops[i].n2);
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2066 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2067 fd->loops[i].n2);
2068 else
2069 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2070 fd->loops[i].n1);
2071 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2072 step, build_int_cst (TREE_TYPE (step), 0));
2073 if (TREE_CODE (step) != INTEGER_CST)
2074 {
2075 t1 = unshare_expr (t1);
2076 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2077 false, GSI_CONTINUE_LINKING);
2078 t2 = unshare_expr (t2);
2079 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2080 false, GSI_CONTINUE_LINKING);
2081 }
2082 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2083 t, t2, t1);
2084 }
2085 else if (fd->loops[i].cond_code == LT_EXPR)
2086 {
2087 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2088 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2089 fd->loops[i].n1);
2090 else
2091 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2093 }
2094 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2095 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2096 fd->loops[i].n2);
2097 else
2098 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2099 fd->loops[i].n1);
2100 }
2101 if (cond)
2102 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2103 else
2104 cond = t;
2105
2106 off = fold_convert_loc (loc, itype, off);
2107
2108 if (step
2109 || (fd->loops[i].cond_code == LT_EXPR
2110 ? !integer_onep (fd->loops[i].step)
2111 : !integer_minus_onep (fd->loops[i].step)))
2112 {
2113 if (step == NULL_TREE
2114 && TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2117 fold_build1_loc (loc, NEGATE_EXPR, itype,
2118 s));
2119 else
2120 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2121 orig_off ? orig_off : off, s);
2122 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2123 build_int_cst (itype, 0));
2124 if (integer_zerop (t) && !warned_step)
2125 {
2126 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2127 "in the iteration space");
2128 warned_step = true;
2129 }
2130 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2131 cond, t);
2132 }
2133
2134 if (i <= fd->collapse - 1 && fd->collapse > 1)
2135 t = fd->loop.v;
2136 else if (counts[i])
2137 t = counts[i];
2138 else
2139 {
2140 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2141 fd->loops[i].v, fd->loops[i].n1);
2142 t = fold_convert_loc (loc, fd->iter_type, t);
2143 }
2144 if (step)
2145 /* We have divided off by step already earlier. */;
2146 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2147 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2148 fold_build1_loc (loc, NEGATE_EXPR, itype,
2149 s));
2150 else
2151 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2152 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2153 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2154 off = fold_convert_loc (loc, fd->iter_type, off);
2155 if (i <= fd->collapse - 1 && fd->collapse > 1)
2156 {
2157 if (i)
2158 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2159 off);
2160 if (i < fd->collapse - 1)
2161 {
2162 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2163 counts[i]);
2164 continue;
2165 }
2166 }
2167 off = unshare_expr (off);
2168 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2169 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2170 true, GSI_SAME_STMT);
2171 args.safe_push (t);
2172 }
2173 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2174 gimple_set_location (g, loc);
2175 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2176
2177 cond = unshare_expr (cond);
2178 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2179 GSI_CONTINUE_LINKING);
2180 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2181 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2182 e3->probability = REG_BR_PROB_BASE / 8;
2183 e1->probability = REG_BR_PROB_BASE - e3->probability;
2184 e1->flags = EDGE_TRUE_VALUE;
2185 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2186
2187 *gsi = gsi_after_labels (e2->dest);
2188}
2189
2190/* Expand all #pragma omp ordered depend(source) and
2191 #pragma omp ordered depend(sink:...) constructs in the current
2192 #pragma omp for ordered(n) region. */
2193
2194static void
2195expand_omp_ordered_source_sink (struct omp_region *region,
2196 struct omp_for_data *fd, tree *counts,
2197 basic_block cont_bb)
2198{
2199 struct omp_region *inner;
2200 int i;
2201 for (i = fd->collapse - 1; i < fd->ordered; i++)
2202 if (i == fd->collapse - 1 && fd->collapse > 1)
2203 counts[i] = NULL_TREE;
2204 else if (i >= fd->collapse && !cont_bb)
2205 counts[i] = build_zero_cst (fd->iter_type);
2206 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2207 && integer_onep (fd->loops[i].step))
2208 counts[i] = NULL_TREE;
2209 else
2210 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2211 tree atype
2212 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2213 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2214 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2215
2216 for (inner = region->inner; inner; inner = inner->next)
2217 if (inner->type == GIMPLE_OMP_ORDERED)
2218 {
2219 gomp_ordered *ord_stmt = inner->ord_stmt;
2220 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2221 location_t loc = gimple_location (ord_stmt);
2222 tree c;
2223 for (c = gimple_omp_ordered_clauses (ord_stmt);
2224 c; c = OMP_CLAUSE_CHAIN (c))
2225 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2226 break;
2227 if (c)
2228 expand_omp_ordered_source (&gsi, fd, counts, loc);
2229 for (c = gimple_omp_ordered_clauses (ord_stmt);
2230 c; c = OMP_CLAUSE_CHAIN (c))
2231 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2232 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2233 gsi_remove (&gsi, true);
2234 }
2235}
2236
2237/* Wrap the body into fd->ordered - fd->collapse loops that aren't
2238 collapsed. */
2239
2240static basic_block
2241expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2242 basic_block cont_bb, basic_block body_bb,
2243 bool ordered_lastprivate)
2244{
2245 if (fd->ordered == fd->collapse)
2246 return cont_bb;
2247
2248 if (!cont_bb)
2249 {
2250 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2251 for (int i = fd->collapse; i < fd->ordered; i++)
2252 {
2253 tree type = TREE_TYPE (fd->loops[i].v);
2254 tree n1 = fold_convert (type, fd->loops[i].n1);
2255 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2256 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2257 size_int (i - fd->collapse + 1),
2258 NULL_TREE, NULL_TREE);
2259 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2260 }
2261 return NULL;
2262 }
2263
2264 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2265 {
2266 tree t, type = TREE_TYPE (fd->loops[i].v);
2267 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2268 expand_omp_build_assign (&gsi, fd->loops[i].v,
2269 fold_convert (type, fd->loops[i].n1));
2270 if (counts[i])
2271 expand_omp_build_assign (&gsi, counts[i],
2272 build_zero_cst (fd->iter_type));
2273 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2274 size_int (i - fd->collapse + 1),
2275 NULL_TREE, NULL_TREE);
2276 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2277 if (!gsi_end_p (gsi))
2278 gsi_prev (&gsi);
2279 else
2280 gsi = gsi_last_bb (body_bb);
2281 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2282 basic_block new_body = e1->dest;
2283 if (body_bb == cont_bb)
2284 cont_bb = new_body;
2285 edge e2 = NULL;
2286 basic_block new_header;
2287 if (EDGE_COUNT (cont_bb->preds) > 0)
2288 {
2289 gsi = gsi_last_bb (cont_bb);
2290 if (POINTER_TYPE_P (type))
2291 t = fold_build_pointer_plus (fd->loops[i].v,
2292 fold_convert (sizetype,
2293 fd->loops[i].step));
2294 else
2295 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].step));
2297 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2298 if (counts[i])
2299 {
2300 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2301 build_int_cst (fd->iter_type, 1));
2302 expand_omp_build_assign (&gsi, counts[i], t);
2303 t = counts[i];
2304 }
2305 else
2306 {
2307 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2308 fd->loops[i].v, fd->loops[i].n1);
2309 t = fold_convert (fd->iter_type, t);
2310 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2312 }
2313 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, t);
2317 gsi_prev (&gsi);
2318 e2 = split_block (cont_bb, gsi_stmt (gsi));
2319 new_header = e2->dest;
2320 }
2321 else
2322 new_header = cont_bb;
2323 gsi = gsi_after_labels (new_header);
2324 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2325 true, GSI_SAME_STMT);
2326 tree n2
2327 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2328 true, NULL_TREE, true, GSI_SAME_STMT);
2329 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2330 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2331 edge e3 = split_block (new_header, gsi_stmt (gsi));
2332 cont_bb = e3->dest;
2333 remove_edge (e1);
2334 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2335 e3->flags = EDGE_FALSE_VALUE;
2336 e3->probability = REG_BR_PROB_BASE / 8;
2337 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2338 e1->probability = REG_BR_PROB_BASE - e3->probability;
2339
2340 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2341 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2342
2343 if (e2)
2344 {
2345 struct loop *loop = alloc_loop ();
2346 loop->header = new_header;
2347 loop->latch = e2->src;
2348 add_loop (loop, body_bb->loop_father);
2349 }
2350 }
2351
2352 /* If there are any lastprivate clauses and it is possible some loops
2353 might have zero iterations, ensure all the decls are initialized,
2354 otherwise we could crash evaluating C++ class iterators with lastprivate
2355 clauses. */
2356 bool need_inits = false;
2357 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2358 if (need_inits)
2359 {
2360 tree type = TREE_TYPE (fd->loops[i].v);
2361 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2362 expand_omp_build_assign (&gsi, fd->loops[i].v,
2363 fold_convert (type, fd->loops[i].n1));
2364 }
2365 else
2366 {
2367 tree type = TREE_TYPE (fd->loops[i].v);
2368 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2369 boolean_type_node,
2370 fold_convert (type, fd->loops[i].n1),
2371 fold_convert (type, fd->loops[i].n2));
2372 if (!integer_onep (this_cond))
2373 need_inits = true;
2374 }
2375
2376 return cont_bb;
2377}
2378
2379/* A subroutine of expand_omp_for. Generate code for a parallel
2380 loop with any schedule. Given parameters:
2381
2382 for (V = N1; V cond N2; V += STEP) BODY;
2383
2384 where COND is "<" or ">", we generate pseudocode
2385
2386 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2387 if (more) goto L0; else goto L3;
2388 L0:
2389 V = istart0;
2390 iend = iend0;
2391 L1:
2392 BODY;
2393 V += STEP;
2394 if (V cond iend) goto L1; else goto L2;
2395 L2:
2396 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2397 L3:
2398
2399 If this is a combined omp parallel loop, instead of the call to
2400 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2401 If this is gimple_omp_for_combined_p loop, then instead of assigning
2402 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2403 inner GIMPLE_OMP_FOR and V += STEP; and
2404 if (V cond iend) goto L1; else goto L2; are removed.
2405
2406 For collapsed loops, given parameters:
2407 collapse(3)
2408 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2409 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2410 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2411 BODY;
2412
2413 we generate pseudocode
2414
2415 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2416 if (cond3 is <)
2417 adj = STEP3 - 1;
2418 else
2419 adj = STEP3 + 1;
2420 count3 = (adj + N32 - N31) / STEP3;
2421 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2422 if (cond2 is <)
2423 adj = STEP2 - 1;
2424 else
2425 adj = STEP2 + 1;
2426 count2 = (adj + N22 - N21) / STEP2;
2427 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2428 if (cond1 is <)
2429 adj = STEP1 - 1;
2430 else
2431 adj = STEP1 + 1;
2432 count1 = (adj + N12 - N11) / STEP1;
2433 count = count1 * count2 * count3;
2434 goto Z1;
2435 Z0:
2436 count = 0;
2437 Z1:
2438 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2439 if (more) goto L0; else goto L3;
2440 L0:
2441 V = istart0;
2442 T = V;
2443 V3 = N31 + (T % count3) * STEP3;
2444 T = T / count3;
2445 V2 = N21 + (T % count2) * STEP2;
2446 T = T / count2;
2447 V1 = N11 + T * STEP1;
2448 iend = iend0;
2449 L1:
2450 BODY;
2451 V += 1;
2452 if (V < iend) goto L10; else goto L2;
2453 L10:
2454 V3 += STEP3;
2455 if (V3 cond3 N32) goto L1; else goto L11;
2456 L11:
2457 V3 = N31;
2458 V2 += STEP2;
2459 if (V2 cond2 N22) goto L1; else goto L12;
2460 L12:
2461 V2 = N21;
2462 V1 += STEP1;
2463 goto L1;
2464 L2:
2465 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2466 L3:
2467
2468 */
2469
2470static void
2471expand_omp_for_generic (struct omp_region *region,
2472 struct omp_for_data *fd,
2473 enum built_in_function start_fn,
2474 enum built_in_function next_fn,
2475 gimple *inner_stmt)
2476{
2477 tree type, istart0, iend0, iend;
2478 tree t, vmain, vback, bias = NULL_TREE;
2479 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2480 basic_block l2_bb = NULL, l3_bb = NULL;
2481 gimple_stmt_iterator gsi;
2482 gassign *assign_stmt;
2483 bool in_combined_parallel = is_combined_parallel (region);
2484 bool broken_loop = region->cont == NULL;
2485 edge e, ne;
2486 tree *counts = NULL;
2487 int i;
2488 bool ordered_lastprivate = false;
2489
2490 gcc_assert (!broken_loop || !in_combined_parallel);
2491 gcc_assert (fd->iter_type == long_integer_type_node
2492 || !in_combined_parallel);
2493
2494 entry_bb = region->entry;
2495 cont_bb = region->cont;
2496 collapse_bb = NULL;
2497 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2498 gcc_assert (broken_loop
2499 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2500 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2501 l1_bb = single_succ (l0_bb);
2502 if (!broken_loop)
2503 {
2504 l2_bb = create_empty_bb (cont_bb);
2505 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2506 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2507 == l1_bb));
2508 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2509 }
2510 else
2511 l2_bb = NULL;
2512 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2513 exit_bb = region->exit;
2514
2515 gsi = gsi_last_bb (entry_bb);
2516
2517 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2518 if (fd->ordered
2519 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2520 OMP_CLAUSE_LASTPRIVATE))
2521 ordered_lastprivate = false;
2522 if (fd->collapse > 1 || fd->ordered)
2523 {
2524 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2525 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2526
2527 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2528 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2529 zero_iter1_bb, first_zero_iter1,
2530 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2531
2532 if (zero_iter1_bb)
2533 {
2534 /* Some counts[i] vars might be uninitialized if
2535 some loop has zero iterations. But the body shouldn't
2536 be executed in that case, so just avoid uninit warnings. */
2537 for (i = first_zero_iter1;
2538 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2539 if (SSA_VAR_P (counts[i]))
2540 TREE_NO_WARNING (counts[i]) = 1;
2541 gsi_prev (&gsi);
2542 e = split_block (entry_bb, gsi_stmt (gsi));
2543 entry_bb = e->dest;
2544 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2545 gsi = gsi_last_bb (entry_bb);
2546 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2547 get_immediate_dominator (CDI_DOMINATORS,
2548 zero_iter1_bb));
2549 }
2550 if (zero_iter2_bb)
2551 {
2552 /* Some counts[i] vars might be uninitialized if
2553 some loop has zero iterations. But the body shouldn't
2554 be executed in that case, so just avoid uninit warnings. */
2555 for (i = first_zero_iter2; i < fd->ordered; i++)
2556 if (SSA_VAR_P (counts[i]))
2557 TREE_NO_WARNING (counts[i]) = 1;
2558 if (zero_iter1_bb)
2559 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2560 else
2561 {
2562 gsi_prev (&gsi);
2563 e = split_block (entry_bb, gsi_stmt (gsi));
2564 entry_bb = e->dest;
2565 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2566 gsi = gsi_last_bb (entry_bb);
2567 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2568 get_immediate_dominator
2569 (CDI_DOMINATORS, zero_iter2_bb));
2570 }
2571 }
2572 if (fd->collapse == 1)
2573 {
2574 counts[0] = fd->loop.n2;
2575 fd->loop = fd->loops[0];
2576 }
2577 }
2578
2579 type = TREE_TYPE (fd->loop.v);
2580 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2581 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2582 TREE_ADDRESSABLE (istart0) = 1;
2583 TREE_ADDRESSABLE (iend0) = 1;
2584
2585 /* See if we need to bias by LLONG_MIN. */
2586 if (fd->iter_type == long_long_unsigned_type_node
2587 && TREE_CODE (type) == INTEGER_TYPE
2588 && !TYPE_UNSIGNED (type)
2589 && fd->ordered == 0)
2590 {
2591 tree n1, n2;
2592
2593 if (fd->loop.cond_code == LT_EXPR)
2594 {
2595 n1 = fd->loop.n1;
2596 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2597 }
2598 else
2599 {
2600 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2601 n2 = fd->loop.n1;
2602 }
2603 if (TREE_CODE (n1) != INTEGER_CST
2604 || TREE_CODE (n2) != INTEGER_CST
2605 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2606 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2607 }
2608
2609 gimple_stmt_iterator gsif = gsi;
2610 gsi_prev (&gsif);
2611
2612 tree arr = NULL_TREE;
2613 if (in_combined_parallel)
2614 {
2615 gcc_assert (fd->ordered == 0);
2616 /* In a combined parallel loop, emit a call to
2617 GOMP_loop_foo_next. */
2618 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2619 build_fold_addr_expr (istart0),
2620 build_fold_addr_expr (iend0));
2621 }
2622 else
2623 {
2624 tree t0, t1, t2, t3, t4;
2625 /* If this is not a combined parallel loop, emit a call to
2626 GOMP_loop_foo_start in ENTRY_BB. */
2627 t4 = build_fold_addr_expr (iend0);
2628 t3 = build_fold_addr_expr (istart0);
2629 if (fd->ordered)
2630 {
2631 t0 = build_int_cst (unsigned_type_node,
2632 fd->ordered - fd->collapse + 1);
2633 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2634 fd->ordered
2635 - fd->collapse + 1),
2636 ".omp_counts");
2637 DECL_NAMELESS (arr) = 1;
2638 TREE_ADDRESSABLE (arr) = 1;
2639 TREE_STATIC (arr) = 1;
2640 vec<constructor_elt, va_gc> *v;
2641 vec_alloc (v, fd->ordered - fd->collapse + 1);
2642 int idx;
2643
2644 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2645 {
2646 tree c;
2647 if (idx == 0 && fd->collapse > 1)
2648 c = fd->loop.n2;
2649 else
2650 c = counts[idx + fd->collapse - 1];
2651 tree purpose = size_int (idx);
2652 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2653 if (TREE_CODE (c) != INTEGER_CST)
2654 TREE_STATIC (arr) = 0;
2655 }
2656
2657 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2658 if (!TREE_STATIC (arr))
2659 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2660 void_type_node, arr),
2661 true, NULL_TREE, true, GSI_SAME_STMT);
2662 t1 = build_fold_addr_expr (arr);
2663 t2 = NULL_TREE;
2664 }
2665 else
2666 {
2667 t2 = fold_convert (fd->iter_type, fd->loop.step);
2668 t1 = fd->loop.n2;
2669 t0 = fd->loop.n1;
2670 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2671 {
2672 tree innerc
2673 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2674 OMP_CLAUSE__LOOPTEMP_);
2675 gcc_assert (innerc);
2676 t0 = OMP_CLAUSE_DECL (innerc);
2677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2678 OMP_CLAUSE__LOOPTEMP_);
2679 gcc_assert (innerc);
2680 t1 = OMP_CLAUSE_DECL (innerc);
2681 }
2682 if (POINTER_TYPE_P (TREE_TYPE (t0))
2683 && TYPE_PRECISION (TREE_TYPE (t0))
2684 != TYPE_PRECISION (fd->iter_type))
2685 {
2686 /* Avoid casting pointers to integer of a different size. */
2687 tree itype = signed_type_for (type);
2688 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2689 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2690 }
2691 else
2692 {
2693 t1 = fold_convert (fd->iter_type, t1);
2694 t0 = fold_convert (fd->iter_type, t0);
2695 }
2696 if (bias)
2697 {
2698 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2699 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2700 }
2701 }
2702 if (fd->iter_type == long_integer_type_node || fd->ordered)
2703 {
2704 if (fd->chunk_size)
2705 {
2706 t = fold_convert (fd->iter_type, fd->chunk_size);
2707 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2708 if (fd->ordered)
2709 t = build_call_expr (builtin_decl_explicit (start_fn),
2710 5, t0, t1, t, t3, t4);
2711 else
2712 t = build_call_expr (builtin_decl_explicit (start_fn),
2713 6, t0, t1, t2, t, t3, t4);
2714 }
2715 else if (fd->ordered)
2716 t = build_call_expr (builtin_decl_explicit (start_fn),
2717 4, t0, t1, t3, t4);
2718 else
2719 t = build_call_expr (builtin_decl_explicit (start_fn),
2720 5, t0, t1, t2, t3, t4);
2721 }
2722 else
2723 {
2724 tree t5;
2725 tree c_bool_type;
2726 tree bfn_decl;
2727
2728 /* The GOMP_loop_ull_*start functions have additional boolean
2729 argument, true for < loops and false for > loops.
2730 In Fortran, the C bool type can be different from
2731 boolean_type_node. */
2732 bfn_decl = builtin_decl_explicit (start_fn);
2733 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2734 t5 = build_int_cst (c_bool_type,
2735 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2736 if (fd->chunk_size)
2737 {
2738 tree bfn_decl = builtin_decl_explicit (start_fn);
2739 t = fold_convert (fd->iter_type, fd->chunk_size);
2740 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2741 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2742 }
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 6, t5, t0, t1, t2, t3, t4);
2746 }
2747 }
2748 if (TREE_TYPE (t) != boolean_type_node)
2749 t = fold_build2 (NE_EXPR, boolean_type_node,
2750 t, build_int_cst (TREE_TYPE (t), 0));
2751 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
7c6746c9 2752 true, GSI_SAME_STMT);
4954efd4 2753 if (arr && !TREE_STATIC (arr))
2754 {
2755 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2756 TREE_THIS_VOLATILE (clobber) = 1;
2757 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2758 GSI_SAME_STMT);
2759 }
2760 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2761
2762 /* Remove the GIMPLE_OMP_FOR statement. */
2763 gsi_remove (&gsi, true);
2764
2765 if (gsi_end_p (gsif))
2766 gsif = gsi_after_labels (gsi_bb (gsif));
2767 gsi_next (&gsif);
2768
2769 /* Iteration setup for sequential loop goes in L0_BB. */
2770 tree startvar = fd->loop.v;
2771 tree endvar = NULL_TREE;
2772
2773 if (gimple_omp_for_combined_p (fd->for_stmt))
2774 {
2775 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2776 && gimple_omp_for_kind (inner_stmt)
2777 == GF_OMP_FOR_KIND_SIMD);
2778 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2779 OMP_CLAUSE__LOOPTEMP_);
2780 gcc_assert (innerc);
2781 startvar = OMP_CLAUSE_DECL (innerc);
2782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2783 OMP_CLAUSE__LOOPTEMP_);
2784 gcc_assert (innerc);
2785 endvar = OMP_CLAUSE_DECL (innerc);
2786 }
2787
2788 gsi = gsi_start_bb (l0_bb);
2789 t = istart0;
2790 if (fd->ordered && fd->collapse == 1)
2791 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2792 fold_convert (fd->iter_type, fd->loop.step));
2793 else if (bias)
2794 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2795 if (fd->ordered && fd->collapse == 1)
2796 {
2797 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2798 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2799 fd->loop.n1, fold_convert (sizetype, t));
2800 else
2801 {
2802 t = fold_convert (TREE_TYPE (startvar), t);
2803 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2804 fd->loop.n1, t);
2805 }
2806 }
2807 else
2808 {
2809 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2810 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2811 t = fold_convert (TREE_TYPE (startvar), t);
2812 }
2813 t = force_gimple_operand_gsi (&gsi, t,
2814 DECL_P (startvar)
2815 && TREE_ADDRESSABLE (startvar),
2816 NULL_TREE, false, GSI_CONTINUE_LINKING);
2817 assign_stmt = gimple_build_assign (startvar, t);
2818 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2819
2820 t = iend0;
2821 if (fd->ordered && fd->collapse == 1)
2822 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2823 fold_convert (fd->iter_type, fd->loop.step));
2824 else if (bias)
2825 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2826 if (fd->ordered && fd->collapse == 1)
2827 {
2828 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2829 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2830 fd->loop.n1, fold_convert (sizetype, t));
2831 else
2832 {
2833 t = fold_convert (TREE_TYPE (startvar), t);
2834 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2835 fd->loop.n1, t);
2836 }
2837 }
2838 else
2839 {
2840 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2841 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2842 t = fold_convert (TREE_TYPE (startvar), t);
2843 }
2844 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2845 false, GSI_CONTINUE_LINKING);
2846 if (endvar)
2847 {
2848 assign_stmt = gimple_build_assign (endvar, iend);
2849 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2850 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2851 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2852 else
2853 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2855 }
2856 /* Handle linear clause adjustments. */
2857 tree itercnt = NULL_TREE;
2858 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2859 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2860 c; c = OMP_CLAUSE_CHAIN (c))
2861 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2862 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2863 {
2864 tree d = OMP_CLAUSE_DECL (c);
2865 bool is_ref = omp_is_reference (d);
2866 tree t = d, a, dest;
2867 if (is_ref)
2868 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2869 tree type = TREE_TYPE (t);
2870 if (POINTER_TYPE_P (type))
2871 type = sizetype;
2872 dest = unshare_expr (t);
2873 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2874 expand_omp_build_assign (&gsif, v, t);
2875 if (itercnt == NULL_TREE)
2876 {
2877 itercnt = startvar;
2878 tree n1 = fd->loop.n1;
2879 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2880 {
2881 itercnt
2882 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2883 itercnt);
2884 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2885 }
2886 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2887 itercnt, n1);
2888 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2889 itercnt, fd->loop.step);
2890 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2891 NULL_TREE, false,
2892 GSI_CONTINUE_LINKING);
2893 }
2894 a = fold_build2 (MULT_EXPR, type,
2895 fold_convert (type, itercnt),
2896 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2897 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2898 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2899 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2900 false, GSI_CONTINUE_LINKING);
2901 assign_stmt = gimple_build_assign (dest, t);
2902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2903 }
2904 if (fd->collapse > 1)
2905 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2906
2907 if (fd->ordered)
2908 {
2909 /* Until now, counts array contained number of iterations or
2910 variable containing it for ith loop. From now on, we need
2911 those counts only for collapsed loops, and only for the 2nd
2912 till the last collapsed one. Move those one element earlier,
2913 we'll use counts[fd->collapse - 1] for the first source/sink
2914 iteration counter and so on and counts[fd->ordered]
2915 as the array holding the current counter values for
2916 depend(source). */
2917 if (fd->collapse > 1)
2918 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2919 if (broken_loop)
2920 {
2921 int i;
2922 for (i = fd->collapse; i < fd->ordered; i++)
2923 {
2924 tree type = TREE_TYPE (fd->loops[i].v);
2925 tree this_cond
2926 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2927 fold_convert (type, fd->loops[i].n1),
2928 fold_convert (type, fd->loops[i].n2));
2929 if (!integer_onep (this_cond))
2930 break;
2931 }
2932 if (i < fd->ordered)
2933 {
2934 cont_bb
2935 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2936 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2937 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2938 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2941 make_edge (cont_bb, l1_bb, 0);
2942 l2_bb = create_empty_bb (cont_bb);
2943 broken_loop = false;
2944 }
2945 }
2946 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2947 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2948 ordered_lastprivate);
2949 if (counts[fd->collapse - 1])
2950 {
2951 gcc_assert (fd->collapse == 1);
2952 gsi = gsi_last_bb (l0_bb);
2953 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2954 istart0, true);
2955 gsi = gsi_last_bb (cont_bb);
2956 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2957 build_int_cst (fd->iter_type, 1));
2958 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2959 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2960 size_zero_node, NULL_TREE, NULL_TREE);
2961 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2962 t = counts[fd->collapse - 1];
2963 }
2964 else if (fd->collapse > 1)
2965 t = fd->loop.v;
2966 else
2967 {
2968 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2969 fd->loops[0].v, fd->loops[0].n1);
2970 t = fold_convert (fd->iter_type, t);
2971 }
2972 gsi = gsi_last_bb (l0_bb);
2973 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2974 size_zero_node, NULL_TREE, NULL_TREE);
2975 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2976 false, GSI_CONTINUE_LINKING);
2977 expand_omp_build_assign (&gsi, aref, t, true);
2978 }
2979
2980 if (!broken_loop)
2981 {
2982 /* Code to control the increment and predicate for the sequential
2983 loop goes in the CONT_BB. */
2984 gsi = gsi_last_bb (cont_bb);
2985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
2986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
2987 vmain = gimple_omp_continue_control_use (cont_stmt);
2988 vback = gimple_omp_continue_control_def (cont_stmt);
2989
2990 if (!gimple_omp_for_combined_p (fd->for_stmt))
2991 {
2992 if (POINTER_TYPE_P (type))
2993 t = fold_build_pointer_plus (vmain, fd->loop.step);
2994 else
2995 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
2996 t = force_gimple_operand_gsi (&gsi, t,
2997 DECL_P (vback)
2998 && TREE_ADDRESSABLE (vback),
2999 NULL_TREE, true, GSI_SAME_STMT);
3000 assign_stmt = gimple_build_assign (vback, t);
3001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3002
3003 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3004 {
3005 if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3008 {
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3012 }
3013 tree aref = build4 (ARRAY_REF, fd->iter_type,
3014 counts[fd->ordered], size_zero_node,
3015 NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 true, GSI_SAME_STMT);
3018 expand_omp_build_assign (&gsi, aref, t);
3019 }
3020
3021 t = build2 (fd->loop.cond_code, boolean_type_node,
3022 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3023 iend);
3024 gcond *cond_stmt = gimple_build_cond_empty (t);
3025 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3026 }
3027
3028 /* Remove GIMPLE_OMP_CONTINUE. */
3029 gsi_remove (&gsi, true);
3030
3031 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3032 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3033
3034 /* Emit code to get the next parallel iteration in L2_BB. */
3035 gsi = gsi_start_bb (l2_bb);
3036
3037 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3038 build_fold_addr_expr (istart0),
3039 build_fold_addr_expr (iend0));
3040 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3041 false, GSI_CONTINUE_LINKING);
3042 if (TREE_TYPE (t) != boolean_type_node)
3043 t = fold_build2 (NE_EXPR, boolean_type_node,
3044 t, build_int_cst (TREE_TYPE (t), 0));
3045 gcond *cond_stmt = gimple_build_cond_empty (t);
3046 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3047 }
3048
3049 /* Add the loop cleanup function. */
3050 gsi = gsi_last_bb (exit_bb);
3051 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3052 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3053 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3054 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3055 else
3056 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3057 gcall *call_stmt = gimple_build_call (t, 0);
3058 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3059 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3060 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3061 if (fd->ordered)
3062 {
3063 tree arr = counts[fd->ordered];
3064 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3065 TREE_THIS_VOLATILE (clobber) = 1;
3066 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3067 GSI_SAME_STMT);
3068 }
3069 gsi_remove (&gsi, true);
3070
3071 /* Connect the new blocks. */
3072 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3073 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3074
3075 if (!broken_loop)
3076 {
3077 gimple_seq phis;
3078
3079 e = find_edge (cont_bb, l3_bb);
3080 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3081
3082 phis = phi_nodes (l3_bb);
3083 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3084 {
3085 gimple *phi = gsi_stmt (gsi);
3086 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3087 PHI_ARG_DEF_FROM_EDGE (phi, e));
3088 }
3089 remove_edge (e);
3090
3091 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3092 e = find_edge (cont_bb, l1_bb);
3093 if (e == NULL)
3094 {
3095 e = BRANCH_EDGE (cont_bb);
3096 gcc_assert (single_succ (e->dest) == l1_bb);
3097 }
3098 if (gimple_omp_for_combined_p (fd->for_stmt))
3099 {
3100 remove_edge (e);
3101 e = NULL;
3102 }
3103 else if (fd->collapse > 1)
3104 {
3105 remove_edge (e);
3106 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3107 }
3108 else
3109 e->flags = EDGE_TRUE_VALUE;
3110 if (e)
3111 {
3112 e->probability = REG_BR_PROB_BASE * 7 / 8;
3113 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3114 }
3115 else
3116 {
3117 e = find_edge (cont_bb, l2_bb);
3118 e->flags = EDGE_FALLTHRU;
3119 }
3120 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3121
3122 if (gimple_in_ssa_p (cfun))
3123 {
3124 /* Add phis to the outer loop that connect to the phis in the inner,
3125 original loop, and move the loop entry value of the inner phi to
3126 the loop entry value of the outer phi. */
3127 gphi_iterator psi;
3128 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3129 {
3130 source_location locus;
3131 gphi *nphi;
3132 gphi *exit_phi = psi.phi ();
3133
3134 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3135 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3136
3137 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3138 edge latch_to_l1 = find_edge (latch, l1_bb);
3139 gphi *inner_phi
3140 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3141
3142 tree t = gimple_phi_result (exit_phi);
3143 tree new_res = copy_ssa_name (t, NULL);
3144 nphi = create_phi_node (new_res, l0_bb);
3145
3146 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3147 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3148 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3149 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3150 add_phi_arg (nphi, t, entry_to_l0, locus);
3151
3152 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3153 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3154
3155 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3156 };
3157 }
3158
3159 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3160 recompute_dominator (CDI_DOMINATORS, l2_bb));
3161 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3162 recompute_dominator (CDI_DOMINATORS, l3_bb));
3163 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3164 recompute_dominator (CDI_DOMINATORS, l0_bb));
3165 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3166 recompute_dominator (CDI_DOMINATORS, l1_bb));
3167
3168 /* We enter expand_omp_for_generic with a loop. This original loop may
3169 have its own loop struct, or it may be part of an outer loop struct
3170 (which may be the fake loop). */
3171 struct loop *outer_loop = entry_bb->loop_father;
3172 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3173
3174 add_bb_to_loop (l2_bb, outer_loop);
3175
3176 /* We've added a new loop around the original loop. Allocate the
3177 corresponding loop struct. */
3178 struct loop *new_loop = alloc_loop ();
3179 new_loop->header = l0_bb;
3180 new_loop->latch = l2_bb;
3181 add_loop (new_loop, outer_loop);
3182
3183 /* Allocate a loop structure for the original loop unless we already
3184 had one. */
3185 if (!orig_loop_has_loop_struct
3186 && !gimple_omp_for_combined_p (fd->for_stmt))
3187 {
3188 struct loop *orig_loop = alloc_loop ();
3189 orig_loop->header = l1_bb;
3190 /* The loop may have multiple latches. */
3191 add_loop (orig_loop, new_loop);
3192 }
3193 }
3194}
3195
3196/* A subroutine of expand_omp_for. Generate code for a parallel
3197 loop with static schedule and no specified chunk size. Given
3198 parameters:
3199
3200 for (V = N1; V cond N2; V += STEP) BODY;
3201
3202 where COND is "<" or ">", we generate pseudocode
3203
3204 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3205 if (cond is <)
3206 adj = STEP - 1;
3207 else
3208 adj = STEP + 1;
3209 if ((__typeof (V)) -1 > 0 && cond is >)
3210 n = -(adj + N2 - N1) / -STEP;
3211 else
3212 n = (adj + N2 - N1) / STEP;
3213 q = n / nthreads;
3214 tt = n % nthreads;
3215 if (threadid < tt) goto L3; else goto L4;
3216 L3:
3217 tt = 0;
3218 q = q + 1;
3219 L4:
3220 s0 = q * threadid + tt;
3221 e0 = s0 + q;
3222 V = s0 * STEP + N1;
3223 if (s0 >= e0) goto L2; else goto L0;
3224 L0:
3225 e = e0 * STEP + N1;
3226 L1:
3227 BODY;
3228 V += STEP;
3229 if (V cond e) goto L1;
3230 L2:
3231*/
3232
3233static void
3234expand_omp_for_static_nochunk (struct omp_region *region,
3235 struct omp_for_data *fd,
3236 gimple *inner_stmt)
3237{
3238 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3239 tree type, itype, vmain, vback;
3240 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3241 basic_block body_bb, cont_bb, collapse_bb = NULL;
3242 basic_block fin_bb;
3243 gimple_stmt_iterator gsi;
3244 edge ep;
3245 bool broken_loop = region->cont == NULL;
3246 tree *counts = NULL;
3247 tree n1, n2, step;
3248
3249 itype = type = TREE_TYPE (fd->loop.v);
3250 if (POINTER_TYPE_P (type))
3251 itype = signed_type_for (type);
3252
3253 entry_bb = region->entry;
3254 cont_bb = region->cont;
3255 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3256 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3257 gcc_assert (broken_loop
3258 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3259 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3260 body_bb = single_succ (seq_start_bb);
3261 if (!broken_loop)
3262 {
3263 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3264 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3266 }
3267 exit_bb = region->exit;
3268
3269 /* Iteration space partitioning goes in ENTRY_BB. */
3270 gsi = gsi_last_bb (entry_bb);
3271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3272
3273 if (fd->collapse > 1)
3274 {
3275 int first_zero_iter = -1, dummy = -1;
3276 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3277
3278 counts = XALLOCAVEC (tree, fd->collapse);
3279 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3280 fin_bb, first_zero_iter,
3281 dummy_bb, dummy, l2_dom_bb);
3282 t = NULL_TREE;
3283 }
3284 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3285 t = integer_one_node;
3286 else
3287 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3288 fold_convert (type, fd->loop.n1),
3289 fold_convert (type, fd->loop.n2));
3290 if (fd->collapse == 1
3291 && TYPE_UNSIGNED (type)
3292 && (t == NULL_TREE || !integer_onep (t)))
3293 {
3294 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3295 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3296 true, GSI_SAME_STMT);
3297 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3298 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3299 true, GSI_SAME_STMT);
3300 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3301 NULL_TREE, NULL_TREE);
3302 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3303 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3304 expand_omp_regimplify_p, NULL, NULL)
3305 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3306 expand_omp_regimplify_p, NULL, NULL))
3307 {
3308 gsi = gsi_for_stmt (cond_stmt);
3309 gimple_regimplify_operands (cond_stmt, &gsi);
3310 }
3311 ep = split_block (entry_bb, cond_stmt);
3312 ep->flags = EDGE_TRUE_VALUE;
3313 entry_bb = ep->dest;
3314 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3315 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3316 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3317 if (gimple_in_ssa_p (cfun))
3318 {
3319 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3320 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3321 !gsi_end_p (gpi); gsi_next (&gpi))
3322 {
3323 gphi *phi = gpi.phi ();
3324 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3325 ep, UNKNOWN_LOCATION);
3326 }
3327 }
3328 gsi = gsi_last_bb (entry_bb);
3329 }
3330
3331 switch (gimple_omp_for_kind (fd->for_stmt))
3332 {
3333 case GF_OMP_FOR_KIND_FOR:
3334 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3335 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3336 break;
3337 case GF_OMP_FOR_KIND_DISTRIBUTE:
3338 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3339 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3340 break;
3341 default:
3342 gcc_unreachable ();
3343 }
3344 nthreads = build_call_expr (nthreads, 0);
3345 nthreads = fold_convert (itype, nthreads);
3346 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 threadid = build_call_expr (threadid, 0);
3349 threadid = fold_convert (itype, threadid);
3350 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3351 true, GSI_SAME_STMT);
3352
3353 n1 = fd->loop.n1;
3354 n2 = fd->loop.n2;
3355 step = fd->loop.step;
3356 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3357 {
3358 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3359 OMP_CLAUSE__LOOPTEMP_);
3360 gcc_assert (innerc);
3361 n1 = OMP_CLAUSE_DECL (innerc);
3362 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3363 OMP_CLAUSE__LOOPTEMP_);
3364 gcc_assert (innerc);
3365 n2 = OMP_CLAUSE_DECL (innerc);
3366 }
3367 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3368 true, NULL_TREE, true, GSI_SAME_STMT);
3369 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3370 true, NULL_TREE, true, GSI_SAME_STMT);
3371 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3372 true, NULL_TREE, true, GSI_SAME_STMT);
3373
3374 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3375 t = fold_build2 (PLUS_EXPR, itype, step, t);
3376 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3377 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3378 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3379 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3380 fold_build1 (NEGATE_EXPR, itype, t),
3381 fold_build1 (NEGATE_EXPR, itype, step));
3382 else
3383 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3384 t = fold_convert (itype, t);
3385 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3386
3387 q = create_tmp_reg (itype, "q");
3388 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3389 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3390 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3391
3392 tt = create_tmp_reg (itype, "tt");
3393 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3394 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3395 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3396
3397 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3398 gcond *cond_stmt = gimple_build_cond_empty (t);
3399 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3400
3401 second_bb = split_block (entry_bb, cond_stmt)->dest;
3402 gsi = gsi_last_bb (second_bb);
3403 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3404
3405 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3406 GSI_SAME_STMT);
3407 gassign *assign_stmt
3408 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3410
3411 third_bb = split_block (second_bb, assign_stmt)->dest;
3412 gsi = gsi_last_bb (third_bb);
3413 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3414
3415 t = build2 (MULT_EXPR, itype, q, threadid);
3416 t = build2 (PLUS_EXPR, itype, t, tt);
3417 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3418
3419 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3420 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3421
3422 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3423 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3424
3425 /* Remove the GIMPLE_OMP_FOR statement. */
3426 gsi_remove (&gsi, true);
3427
3428 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3429 gsi = gsi_start_bb (seq_start_bb);
3430
3431 tree startvar = fd->loop.v;
3432 tree endvar = NULL_TREE;
3433
3434 if (gimple_omp_for_combined_p (fd->for_stmt))
3435 {
3436 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3437 ? gimple_omp_parallel_clauses (inner_stmt)
3438 : gimple_omp_for_clauses (inner_stmt);
3439 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3440 gcc_assert (innerc);
3441 startvar = OMP_CLAUSE_DECL (innerc);
3442 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3443 OMP_CLAUSE__LOOPTEMP_);
3444 gcc_assert (innerc);
3445 endvar = OMP_CLAUSE_DECL (innerc);
3446 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3447 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3448 {
3449 int i;
3450 for (i = 1; i < fd->collapse; i++)
3451 {
3452 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3453 OMP_CLAUSE__LOOPTEMP_);
3454 gcc_assert (innerc);
3455 }
3456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3457 OMP_CLAUSE__LOOPTEMP_);
3458 if (innerc)
3459 {
3460 /* If needed (distribute parallel for with lastprivate),
3461 propagate down the total number of iterations. */
3462 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3463 fd->loop.n2);
3464 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3465 GSI_CONTINUE_LINKING);
3466 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3467 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3468 }
3469 }
3470 }
3471 t = fold_convert (itype, s0);
3472 t = fold_build2 (MULT_EXPR, itype, t, step);
3473 if (POINTER_TYPE_P (type))
3474 t = fold_build_pointer_plus (n1, t);
3475 else
3476 t = fold_build2 (PLUS_EXPR, type, t, n1);
3477 t = fold_convert (TREE_TYPE (startvar), t);
3478 t = force_gimple_operand_gsi (&gsi, t,
3479 DECL_P (startvar)
3480 && TREE_ADDRESSABLE (startvar),
3481 NULL_TREE, false, GSI_CONTINUE_LINKING);
3482 assign_stmt = gimple_build_assign (startvar, t);
3483 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3484
3485 t = fold_convert (itype, e0);
3486 t = fold_build2 (MULT_EXPR, itype, t, step);
3487 if (POINTER_TYPE_P (type))
3488 t = fold_build_pointer_plus (n1, t);
3489 else
3490 t = fold_build2 (PLUS_EXPR, type, t, n1);
3491 t = fold_convert (TREE_TYPE (startvar), t);
3492 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3493 false, GSI_CONTINUE_LINKING);
3494 if (endvar)
3495 {
3496 assign_stmt = gimple_build_assign (endvar, e);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3499 assign_stmt = gimple_build_assign (fd->loop.v, e);
3500 else
3501 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3502 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3503 }
3504 /* Handle linear clause adjustments. */
3505 tree itercnt = NULL_TREE;
3506 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3507 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3508 c; c = OMP_CLAUSE_CHAIN (c))
3509 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3510 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3511 {
3512 tree d = OMP_CLAUSE_DECL (c);
3513 bool is_ref = omp_is_reference (d);
3514 tree t = d, a, dest;
3515 if (is_ref)
3516 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3517 if (itercnt == NULL_TREE)
3518 {
3519 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3520 {
3521 itercnt = fold_build2 (MINUS_EXPR, itype,
3522 fold_convert (itype, n1),
3523 fold_convert (itype, fd->loop.n1));
3524 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3525 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3526 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3527 NULL_TREE, false,
3528 GSI_CONTINUE_LINKING);
3529 }
3530 else
3531 itercnt = s0;
3532 }
3533 tree type = TREE_TYPE (t);
3534 if (POINTER_TYPE_P (type))
3535 type = sizetype;
3536 a = fold_build2 (MULT_EXPR, type,
3537 fold_convert (type, itercnt),
3538 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3539 dest = unshare_expr (t);
3540 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3541 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3542 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3543 false, GSI_CONTINUE_LINKING);
3544 assign_stmt = gimple_build_assign (dest, t);
3545 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3546 }
3547 if (fd->collapse > 1)
3548 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3549
3550 if (!broken_loop)
3551 {
3552 /* The code controlling the sequential loop replaces the
3553 GIMPLE_OMP_CONTINUE. */
3554 gsi = gsi_last_bb (cont_bb);
3555 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3556 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3557 vmain = gimple_omp_continue_control_use (cont_stmt);
3558 vback = gimple_omp_continue_control_def (cont_stmt);
3559
3560 if (!gimple_omp_for_combined_p (fd->for_stmt))
3561 {
3562 if (POINTER_TYPE_P (type))
3563 t = fold_build_pointer_plus (vmain, step);
3564 else
3565 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3566 t = force_gimple_operand_gsi (&gsi, t,
3567 DECL_P (vback)
3568 && TREE_ADDRESSABLE (vback),
3569 NULL_TREE, true, GSI_SAME_STMT);
3570 assign_stmt = gimple_build_assign (vback, t);
3571 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3572
3573 t = build2 (fd->loop.cond_code, boolean_type_node,
3574 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3575 ? t : vback, e);
3576 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3577 }
3578
3579 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3580 gsi_remove (&gsi, true);
3581
3582 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3583 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3584 }
3585
3586 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3587 gsi = gsi_last_bb (exit_bb);
3588 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3589 {
3590 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3591 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3592 }
3593 gsi_remove (&gsi, true);
3594
3595 /* Connect all the blocks. */
3596 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3597 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3598 ep = find_edge (entry_bb, second_bb);
3599 ep->flags = EDGE_TRUE_VALUE;
3600 ep->probability = REG_BR_PROB_BASE / 4;
3601 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3602 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3603
3604 if (!broken_loop)
3605 {
3606 ep = find_edge (cont_bb, body_bb);
3607 if (ep == NULL)
3608 {
3609 ep = BRANCH_EDGE (cont_bb);
3610 gcc_assert (single_succ (ep->dest) == body_bb);
3611 }
3612 if (gimple_omp_for_combined_p (fd->for_stmt))
3613 {
3614 remove_edge (ep);
3615 ep = NULL;
3616 }
3617 else if (fd->collapse > 1)
3618 {
3619 remove_edge (ep);
3620 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3621 }
3622 else
3623 ep->flags = EDGE_TRUE_VALUE;
3624 find_edge (cont_bb, fin_bb)->flags
3625 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3626 }
3627
3628 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3629 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3630 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3631
3632 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3633 recompute_dominator (CDI_DOMINATORS, body_bb));
3634 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3635 recompute_dominator (CDI_DOMINATORS, fin_bb));
3636
3637 struct loop *loop = body_bb->loop_father;
3638 if (loop != entry_bb->loop_father)
3639 {
3640 gcc_assert (broken_loop || loop->header == body_bb);
3641 gcc_assert (broken_loop
3642 || loop->latch == region->cont
3643 || single_pred (loop->latch) == region->cont);
3644 return;
3645 }
3646
3647 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3648 {
3649 loop = alloc_loop ();
3650 loop->header = body_bb;
3651 if (collapse_bb == NULL)
3652 loop->latch = cont_bb;
3653 add_loop (loop, body_bb->loop_father);
3654 }
3655}
3656
3657/* Return phi in E->DEST with ARG on edge E. */
3658
3659static gphi *
3660find_phi_with_arg_on_edge (tree arg, edge e)
3661{
3662 basic_block bb = e->dest;
3663
3664 for (gphi_iterator gpi = gsi_start_phis (bb);
3665 !gsi_end_p (gpi);
3666 gsi_next (&gpi))
3667 {
3668 gphi *phi = gpi.phi ();
3669 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3670 return phi;
3671 }
3672
3673 return NULL;
3674}
3675
3676/* A subroutine of expand_omp_for. Generate code for a parallel
3677 loop with static schedule and a specified chunk size. Given
3678 parameters:
3679
3680 for (V = N1; V cond N2; V += STEP) BODY;
3681
3682 where COND is "<" or ">", we generate pseudocode
3683
3684 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3685 if (cond is <)
3686 adj = STEP - 1;
3687 else
3688 adj = STEP + 1;
3689 if ((__typeof (V)) -1 > 0 && cond is >)
3690 n = -(adj + N2 - N1) / -STEP;
3691 else
3692 n = (adj + N2 - N1) / STEP;
3693 trip = 0;
3694 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3695 here so that V is defined
3696 if the loop is not entered
3697 L0:
3698 s0 = (trip * nthreads + threadid) * CHUNK;
7c6746c9 3699 e0 = min (s0 + CHUNK, n);
4954efd4 3700 if (s0 < n) goto L1; else goto L4;
3701 L1:
3702 V = s0 * STEP + N1;
3703 e = e0 * STEP + N1;
3704 L2:
3705 BODY;
3706 V += STEP;
3707 if (V cond e) goto L2; else goto L3;
3708 L3:
3709 trip += 1;
3710 goto L0;
3711 L4:
3712*/
3713
3714static void
3715expand_omp_for_static_chunk (struct omp_region *region,
3716 struct omp_for_data *fd, gimple *inner_stmt)
3717{
3718 tree n, s0, e0, e, t;
3719 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3720 tree type, itype, vmain, vback, vextra;
3721 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3722 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3723 gimple_stmt_iterator gsi;
3724 edge se;
3725 bool broken_loop = region->cont == NULL;
3726 tree *counts = NULL;
3727 tree n1, n2, step;
3728
3729 itype = type = TREE_TYPE (fd->loop.v);
3730 if (POINTER_TYPE_P (type))
3731 itype = signed_type_for (type);
3732
3733 entry_bb = region->entry;
3734 se = split_block (entry_bb, last_stmt (entry_bb));
3735 entry_bb = se->src;
3736 iter_part_bb = se->dest;
3737 cont_bb = region->cont;
3738 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3739 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3740 gcc_assert (broken_loop
3741 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3742 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3743 body_bb = single_succ (seq_start_bb);
3744 if (!broken_loop)
3745 {
3746 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3747 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3748 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3749 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3750 }
3751 exit_bb = region->exit;
3752
3753 /* Trip and adjustment setup goes in ENTRY_BB. */
3754 gsi = gsi_last_bb (entry_bb);
3755 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3756
3757 if (fd->collapse > 1)
3758 {
3759 int first_zero_iter = -1, dummy = -1;
3760 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3761
3762 counts = XALLOCAVEC (tree, fd->collapse);
3763 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3764 fin_bb, first_zero_iter,
3765 dummy_bb, dummy, l2_dom_bb);
3766 t = NULL_TREE;
3767 }
3768 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3769 t = integer_one_node;
3770 else
3771 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3772 fold_convert (type, fd->loop.n1),
3773 fold_convert (type, fd->loop.n2));
3774 if (fd->collapse == 1
3775 && TYPE_UNSIGNED (type)
3776 && (t == NULL_TREE || !integer_onep (t)))
3777 {
3778 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3779 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3780 true, GSI_SAME_STMT);
3781 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3782 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3783 true, GSI_SAME_STMT);
3784 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3785 NULL_TREE, NULL_TREE);
3786 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3788 expand_omp_regimplify_p, NULL, NULL)
3789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3790 expand_omp_regimplify_p, NULL, NULL))
3791 {
3792 gsi = gsi_for_stmt (cond_stmt);
3793 gimple_regimplify_operands (cond_stmt, &gsi);
3794 }
3795 se = split_block (entry_bb, cond_stmt);
3796 se->flags = EDGE_TRUE_VALUE;
3797 entry_bb = se->dest;
3798 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3799 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3800 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3801 if (gimple_in_ssa_p (cfun))
3802 {
3803 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3804 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3805 !gsi_end_p (gpi); gsi_next (&gpi))
3806 {
3807 gphi *phi = gpi.phi ();
3808 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3809 se, UNKNOWN_LOCATION);
3810 }
3811 }
3812 gsi = gsi_last_bb (entry_bb);
3813 }
3814
3815 switch (gimple_omp_for_kind (fd->for_stmt))
3816 {
3817 case GF_OMP_FOR_KIND_FOR:
3818 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3819 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3820 break;
3821 case GF_OMP_FOR_KIND_DISTRIBUTE:
3822 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3823 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3824 break;
3825 default:
3826 gcc_unreachable ();
3827 }
3828 nthreads = build_call_expr (nthreads, 0);
3829 nthreads = fold_convert (itype, nthreads);
3830 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3831 true, GSI_SAME_STMT);
3832 threadid = build_call_expr (threadid, 0);
3833 threadid = fold_convert (itype, threadid);
3834 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3835 true, GSI_SAME_STMT);
3836
3837 n1 = fd->loop.n1;
3838 n2 = fd->loop.n2;
3839 step = fd->loop.step;
3840 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3841 {
3842 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3843 OMP_CLAUSE__LOOPTEMP_);
3844 gcc_assert (innerc);
3845 n1 = OMP_CLAUSE_DECL (innerc);
3846 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3847 OMP_CLAUSE__LOOPTEMP_);
3848 gcc_assert (innerc);
3849 n2 = OMP_CLAUSE_DECL (innerc);
3850 }
3851 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3852 true, NULL_TREE, true, GSI_SAME_STMT);
3853 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3854 true, NULL_TREE, true, GSI_SAME_STMT);
3855 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3856 true, NULL_TREE, true, GSI_SAME_STMT);
3857 tree chunk_size = fold_convert (itype, fd->chunk_size);
3858 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3859 chunk_size
3860 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3861 GSI_SAME_STMT);
3862
3863 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3864 t = fold_build2 (PLUS_EXPR, itype, step, t);
3865 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3866 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3867 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3869 fold_build1 (NEGATE_EXPR, itype, t),
3870 fold_build1 (NEGATE_EXPR, itype, step));
3871 else
3872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3873 t = fold_convert (itype, t);
3874 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3876
3877 trip_var = create_tmp_reg (itype, ".trip");
3878 if (gimple_in_ssa_p (cfun))
3879 {
3880 trip_init = make_ssa_name (trip_var);
3881 trip_main = make_ssa_name (trip_var);
3882 trip_back = make_ssa_name (trip_var);
3883 }
3884 else
3885 {
3886 trip_init = trip_var;
3887 trip_main = trip_var;
3888 trip_back = trip_var;
3889 }
3890
3891 gassign *assign_stmt
3892 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3893 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3894
3895 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3896 t = fold_build2 (MULT_EXPR, itype, t, step);
3897 if (POINTER_TYPE_P (type))
3898 t = fold_build_pointer_plus (n1, t);
3899 else
3900 t = fold_build2 (PLUS_EXPR, type, t, n1);
3901 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3902 true, GSI_SAME_STMT);
3903
3904 /* Remove the GIMPLE_OMP_FOR. */
3905 gsi_remove (&gsi, true);
3906
3907 gimple_stmt_iterator gsif = gsi;
3908
3909 /* Iteration space partitioning goes in ITER_PART_BB. */
3910 gsi = gsi_last_bb (iter_part_bb);
3911
3912 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3913 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3914 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3915 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 false, GSI_CONTINUE_LINKING);
3917
3918 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3919 t = fold_build2 (MIN_EXPR, itype, t, n);
3920 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 false, GSI_CONTINUE_LINKING);
3922
3923 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3924 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3925
3926 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3927 gsi = gsi_start_bb (seq_start_bb);
3928
3929 tree startvar = fd->loop.v;
3930 tree endvar = NULL_TREE;
3931
3932 if (gimple_omp_for_combined_p (fd->for_stmt))
3933 {
3934 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3935 ? gimple_omp_parallel_clauses (inner_stmt)
3936 : gimple_omp_for_clauses (inner_stmt);
3937 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3938 gcc_assert (innerc);
3939 startvar = OMP_CLAUSE_DECL (innerc);
3940 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3941 OMP_CLAUSE__LOOPTEMP_);
3942 gcc_assert (innerc);
3943 endvar = OMP_CLAUSE_DECL (innerc);
3944 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3945 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3946 {
3947 int i;
3948 for (i = 1; i < fd->collapse; i++)
3949 {
3950 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3951 OMP_CLAUSE__LOOPTEMP_);
3952 gcc_assert (innerc);
3953 }
3954 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3955 OMP_CLAUSE__LOOPTEMP_);
3956 if (innerc)
3957 {
3958 /* If needed (distribute parallel for with lastprivate),
3959 propagate down the total number of iterations. */
3960 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3961 fd->loop.n2);
3962 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3963 GSI_CONTINUE_LINKING);
3964 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3965 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3966 }
3967 }
3968 }
3969
3970 t = fold_convert (itype, s0);
3971 t = fold_build2 (MULT_EXPR, itype, t, step);
3972 if (POINTER_TYPE_P (type))
3973 t = fold_build_pointer_plus (n1, t);
3974 else
3975 t = fold_build2 (PLUS_EXPR, type, t, n1);
3976 t = fold_convert (TREE_TYPE (startvar), t);
3977 t = force_gimple_operand_gsi (&gsi, t,
3978 DECL_P (startvar)
3979 && TREE_ADDRESSABLE (startvar),
3980 NULL_TREE, false, GSI_CONTINUE_LINKING);
3981 assign_stmt = gimple_build_assign (startvar, t);
3982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3983
3984 t = fold_convert (itype, e0);
3985 t = fold_build2 (MULT_EXPR, itype, t, step);
3986 if (POINTER_TYPE_P (type))
3987 t = fold_build_pointer_plus (n1, t);
3988 else
3989 t = fold_build2 (PLUS_EXPR, type, t, n1);
3990 t = fold_convert (TREE_TYPE (startvar), t);
3991 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3992 false, GSI_CONTINUE_LINKING);
3993 if (endvar)
3994 {
3995 assign_stmt = gimple_build_assign (endvar, e);
3996 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3997 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3998 assign_stmt = gimple_build_assign (fd->loop.v, e);
3999 else
4000 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4001 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4002 }
4003 /* Handle linear clause adjustments. */
4004 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4005 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4006 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4007 c; c = OMP_CLAUSE_CHAIN (c))
4008 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4009 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4010 {
4011 tree d = OMP_CLAUSE_DECL (c);
4012 bool is_ref = omp_is_reference (d);
4013 tree t = d, a, dest;
4014 if (is_ref)
4015 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4016 tree type = TREE_TYPE (t);
4017 if (POINTER_TYPE_P (type))
4018 type = sizetype;
4019 dest = unshare_expr (t);
4020 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4021 expand_omp_build_assign (&gsif, v, t);
4022 if (itercnt == NULL_TREE)
4023 {
4024 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4025 {
4026 itercntbias
4027 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4028 fold_convert (itype, fd->loop.n1));
4029 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4030 itercntbias, step);
4031 itercntbias
4032 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4033 NULL_TREE, true,
4034 GSI_SAME_STMT);
4035 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4036 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4037 NULL_TREE, false,
4038 GSI_CONTINUE_LINKING);
4039 }
4040 else
4041 itercnt = s0;
4042 }
4043 a = fold_build2 (MULT_EXPR, type,
4044 fold_convert (type, itercnt),
4045 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4046 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4047 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4048 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4049 false, GSI_CONTINUE_LINKING);
4050 assign_stmt = gimple_build_assign (dest, t);
4051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4052 }
4053 if (fd->collapse > 1)
4054 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4055
4056 if (!broken_loop)
4057 {
4058 /* The code controlling the sequential loop goes in CONT_BB,
4059 replacing the GIMPLE_OMP_CONTINUE. */
4060 gsi = gsi_last_bb (cont_bb);
4061 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4062 vmain = gimple_omp_continue_control_use (cont_stmt);
4063 vback = gimple_omp_continue_control_def (cont_stmt);
4064
4065 if (!gimple_omp_for_combined_p (fd->for_stmt))
4066 {
4067 if (POINTER_TYPE_P (type))
4068 t = fold_build_pointer_plus (vmain, step);
4069 else
4070 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4071 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4072 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4073 true, GSI_SAME_STMT);
4074 assign_stmt = gimple_build_assign (vback, t);
4075 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4076
4077 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4078 t = build2 (EQ_EXPR, boolean_type_node,
4079 build_int_cst (itype, 0),
4080 build_int_cst (itype, 1));
4081 else
4082 t = build2 (fd->loop.cond_code, boolean_type_node,
4083 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4084 ? t : vback, e);
4085 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4086 }
4087
4088 /* Remove GIMPLE_OMP_CONTINUE. */
4089 gsi_remove (&gsi, true);
4090
4091 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4092 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4093
4094 /* Trip update code goes into TRIP_UPDATE_BB. */
4095 gsi = gsi_start_bb (trip_update_bb);
4096
4097 t = build_int_cst (itype, 1);
4098 t = build2 (PLUS_EXPR, itype, trip_main, t);
4099 assign_stmt = gimple_build_assign (trip_back, t);
4100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4101 }
4102
4103 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4104 gsi = gsi_last_bb (exit_bb);
4105 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4106 {
4107 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4108 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4109 }
4110 gsi_remove (&gsi, true);
4111
4112 /* Connect the new blocks. */
4113 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4114 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4115
4116 if (!broken_loop)
4117 {
4118 se = find_edge (cont_bb, body_bb);
4119 if (se == NULL)
4120 {
4121 se = BRANCH_EDGE (cont_bb);
4122 gcc_assert (single_succ (se->dest) == body_bb);
4123 }
4124 if (gimple_omp_for_combined_p (fd->for_stmt))
4125 {
4126 remove_edge (se);
4127 se = NULL;
4128 }
4129 else if (fd->collapse > 1)
4130 {
4131 remove_edge (se);
4132 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4133 }
4134 else
4135 se->flags = EDGE_TRUE_VALUE;
4136 find_edge (cont_bb, trip_update_bb)->flags
4137 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4138
7c6746c9 4139 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4140 iter_part_bb);
4954efd4 4141 }
4142
4143 if (gimple_in_ssa_p (cfun))
4144 {
4145 gphi_iterator psi;
4146 gphi *phi;
4147 edge re, ene;
4148 edge_var_map *vm;
4149 size_t i;
4150
4151 gcc_assert (fd->collapse == 1 && !broken_loop);
4152
4153 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4154 remove arguments of the phi nodes in fin_bb. We need to create
4155 appropriate phi nodes in iter_part_bb instead. */
4156 se = find_edge (iter_part_bb, fin_bb);
4157 re = single_succ_edge (trip_update_bb);
4158 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4159 ene = single_succ_edge (entry_bb);
4160
4161 psi = gsi_start_phis (fin_bb);
4162 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4163 gsi_next (&psi), ++i)
4164 {
4165 gphi *nphi;
4166 source_location locus;
4167
4168 phi = psi.phi ();
4169 t = gimple_phi_result (phi);
4170 gcc_assert (t == redirect_edge_var_map_result (vm));
4171
4172 if (!single_pred_p (fin_bb))
4173 t = copy_ssa_name (t, phi);
4174
4175 nphi = create_phi_node (t, iter_part_bb);
4176
4177 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4178 locus = gimple_phi_arg_location_from_edge (phi, se);
4179
4180 /* A special case -- fd->loop.v is not yet computed in
4181 iter_part_bb, we need to use vextra instead. */
4182 if (t == fd->loop.v)
4183 t = vextra;
4184 add_phi_arg (nphi, t, ene, locus);
4185 locus = redirect_edge_var_map_location (vm);
4186 tree back_arg = redirect_edge_var_map_def (vm);
4187 add_phi_arg (nphi, back_arg, re, locus);
4188 edge ce = find_edge (cont_bb, body_bb);
4189 if (ce == NULL)
4190 {
4191 ce = BRANCH_EDGE (cont_bb);
4192 gcc_assert (single_succ (ce->dest) == body_bb);
4193 ce = single_succ_edge (ce->dest);
4194 }
4195 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4196 gcc_assert (inner_loop_phi != NULL);
4197 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4198 find_edge (seq_start_bb, body_bb), locus);
4199
4200 if (!single_pred_p (fin_bb))
4201 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4202 }
4203 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4204 redirect_edge_var_map_clear (re);
4205 if (single_pred_p (fin_bb))
4206 while (1)
4207 {
4208 psi = gsi_start_phis (fin_bb);
4209 if (gsi_end_p (psi))
4210 break;
4211 remove_phi_node (&psi, false);
4212 }
4213
4214 /* Make phi node for trip. */
4215 phi = create_phi_node (trip_main, iter_part_bb);
4216 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4217 UNKNOWN_LOCATION);
4218 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4219 UNKNOWN_LOCATION);
4220 }
4221
4222 if (!broken_loop)
4223 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4224 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4225 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4226 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4227 recompute_dominator (CDI_DOMINATORS, fin_bb));
4228 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4229 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4230 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4231 recompute_dominator (CDI_DOMINATORS, body_bb));
4232
4233 if (!broken_loop)
4234 {
4235 struct loop *loop = body_bb->loop_father;
4236 struct loop *trip_loop = alloc_loop ();
4237 trip_loop->header = iter_part_bb;
4238 trip_loop->latch = trip_update_bb;
4239 add_loop (trip_loop, iter_part_bb->loop_father);
4240
4241 if (loop != entry_bb->loop_father)
4242 {
4243 gcc_assert (loop->header == body_bb);
4244 gcc_assert (loop->latch == region->cont
4245 || single_pred (loop->latch) == region->cont);
4246 trip_loop->inner = loop;
4247 return;
4248 }
4249
4250 if (!gimple_omp_for_combined_p (fd->for_stmt))
4251 {
4252 loop = alloc_loop ();
4253 loop->header = body_bb;
4254 if (collapse_bb == NULL)
4255 loop->latch = cont_bb;
4256 add_loop (loop, trip_loop);
4257 }
4258 }
4259}
4260
4261/* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4262 Given parameters:
4263 for (V = N1; V cond N2; V += STEP) BODY;
4264
4265 where COND is "<" or ">" or "!=", we generate pseudocode
4266
4267 for (ind_var = low; ind_var < high; ind_var++)
4268 {
4269 V = n1 + (ind_var * STEP)
4270
4271 <BODY>
4272 }
4273
4274 In the above pseudocode, low and high are function parameters of the
4275 child function. In the function below, we are inserting a temp.
4276 variable that will be making a call to two OMP functions that will not be
4277 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4278 with _Cilk_for). These functions are replaced with low and high
4279 by the function that handles taskreg. */
4280
4281
4282static void
4283expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4284{
4285 bool broken_loop = region->cont == NULL;
4286 basic_block entry_bb = region->entry;
4287 basic_block cont_bb = region->cont;
4288
4289 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4290 gcc_assert (broken_loop
4291 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4292 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4293 basic_block l1_bb, l2_bb;
4294
4295 if (!broken_loop)
4296 {
4297 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4298 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4299 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4300 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4301 }
4302 else
4303 {
4304 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4305 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4306 l2_bb = single_succ (l1_bb);
4307 }
4308 basic_block exit_bb = region->exit;
4309 basic_block l2_dom_bb = NULL;
4310
4311 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4312
4313 /* Below statements until the "tree high_val = ..." are pseudo statements
4314 used to pass information to be used by expand_omp_taskreg.
4315 low_val and high_val will be replaced by the __low and __high
4316 parameter from the child function.
4317
4318 The call_exprs part is a place-holder, it is mainly used
4319 to distinctly identify to the top-level part that this is
4320 where we should put low and high (reasoning given in header
4321 comment). */
4322
7c6746c9 4323 gomp_parallel *par_stmt
4324 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4325 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4954efd4 4326 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4327 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4328 {
4329 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4330 high_val = t;
4331 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4332 low_val = t;
4333 }
4334 gcc_assert (low_val && high_val);
4335
4336 tree type = TREE_TYPE (low_val);
4337 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4338 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4339
4340 /* Not needed in SSA form right now. */
4341 gcc_assert (!gimple_in_ssa_p (cfun));
4342 if (l2_dom_bb == NULL)
4343 l2_dom_bb = l1_bb;
4344
4345 tree n1 = low_val;
4346 tree n2 = high_val;
4347
4348 gimple *stmt = gimple_build_assign (ind_var, n1);
4349
4350 /* Replace the GIMPLE_OMP_FOR statement. */
4351 gsi_replace (&gsi, stmt, true);
4352
4353 if (!broken_loop)
4354 {
4355 /* Code to control the increment goes in the CONT_BB. */
4356 gsi = gsi_last_bb (cont_bb);
4357 stmt = gsi_stmt (gsi);
4358 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4359 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4360 build_one_cst (type));
4361
4362 /* Replace GIMPLE_OMP_CONTINUE. */
4363 gsi_replace (&gsi, stmt, true);
4364 }
4365
4366 /* Emit the condition in L1_BB. */
4367 gsi = gsi_after_labels (l1_bb);
4368 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4369 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4370 fd->loop.step);
4371 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4372 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4373 fd->loop.n1, fold_convert (sizetype, t));
4374 else
4375 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4376 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4377 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4378 expand_omp_build_assign (&gsi, fd->loop.v, t);
4379
4380 /* The condition is always '<' since the runtime will fill in the low
4381 and high values. */
4382 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4383 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4384
4385 /* Remove GIMPLE_OMP_RETURN. */
4386 gsi = gsi_last_bb (exit_bb);
4387 gsi_remove (&gsi, true);
4388
4389 /* Connect the new blocks. */
4390 remove_edge (FALLTHRU_EDGE (entry_bb));
4391
4392 edge e, ne;
4393 if (!broken_loop)
4394 {
4395 remove_edge (BRANCH_EDGE (entry_bb));
4396 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4397
4398 e = BRANCH_EDGE (l1_bb);
4399 ne = FALLTHRU_EDGE (l1_bb);
4400 e->flags = EDGE_TRUE_VALUE;
4401 }
4402 else
4403 {
4404 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4405
4406 ne = single_succ_edge (l1_bb);
4407 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4408
4409 }
4410 ne->flags = EDGE_FALSE_VALUE;
4411 e->probability = REG_BR_PROB_BASE * 7 / 8;
4412 ne->probability = REG_BR_PROB_BASE / 8;
4413
4414 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4415 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4416 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4417
4418 if (!broken_loop)
4419 {
4420 struct loop *loop = alloc_loop ();
4421 loop->header = l1_bb;
4422 loop->latch = cont_bb;
4423 add_loop (loop, l1_bb->loop_father);
4424 loop->safelen = INT_MAX;
4425 }
4426
4427 /* Pick the correct library function based on the precision of the
4428 induction variable type. */
4429 tree lib_fun = NULL_TREE;
4430 if (TYPE_PRECISION (type) == 32)
4431 lib_fun = cilk_for_32_fndecl;
4432 else if (TYPE_PRECISION (type) == 64)
4433 lib_fun = cilk_for_64_fndecl;
4434 else
4435 gcc_unreachable ();
4436
4437 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4438
4439 /* WS_ARGS contains the library function flavor to call:
4440 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4441 user-defined grain value. If the user does not define one, then zero
4442 is passed in by the parser. */
4443 vec_alloc (region->ws_args, 2);
4444 region->ws_args->quick_push (lib_fun);
4445 region->ws_args->quick_push (fd->chunk_size);
4446}
4447
4448/* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4449 loop. Given parameters:
4450
4451 for (V = N1; V cond N2; V += STEP) BODY;
4452
4453 where COND is "<" or ">", we generate pseudocode
4454
4455 V = N1;
4456 goto L1;
4457 L0:
4458 BODY;
4459 V += STEP;
4460 L1:
4461 if (V cond N2) goto L0; else goto L2;
4462 L2:
4463
4464 For collapsed loops, given parameters:
4465 collapse(3)
4466 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4467 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4468 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4469 BODY;
4470
4471 we generate pseudocode
4472
4473 if (cond3 is <)
4474 adj = STEP3 - 1;
4475 else
4476 adj = STEP3 + 1;
4477 count3 = (adj + N32 - N31) / STEP3;
4478 if (cond2 is <)
4479 adj = STEP2 - 1;
4480 else
4481 adj = STEP2 + 1;
4482 count2 = (adj + N22 - N21) / STEP2;
4483 if (cond1 is <)
4484 adj = STEP1 - 1;
4485 else
4486 adj = STEP1 + 1;
4487 count1 = (adj + N12 - N11) / STEP1;
4488 count = count1 * count2 * count3;
4489 V = 0;
4490 V1 = N11;
4491 V2 = N21;
4492 V3 = N31;
4493 goto L1;
4494 L0:
4495 BODY;
4496 V += 1;
4497 V3 += STEP3;
4498 V2 += (V3 cond3 N32) ? 0 : STEP2;
4499 V3 = (V3 cond3 N32) ? V3 : N31;
4500 V1 += (V2 cond2 N22) ? 0 : STEP1;
4501 V2 = (V2 cond2 N22) ? V2 : N21;
4502 L1:
4503 if (V < count) goto L0; else goto L2;
4504 L2:
4505
4506 */
4507
4508static void
4509expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4510{
4511 tree type, t;
4512 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4513 gimple_stmt_iterator gsi;
4514 gimple *stmt;
4515 gcond *cond_stmt;
4516 bool broken_loop = region->cont == NULL;
4517 edge e, ne;
4518 tree *counts = NULL;
4519 int i;
4520 int safelen_int = INT_MAX;
4521 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4522 OMP_CLAUSE_SAFELEN);
4523 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4524 OMP_CLAUSE__SIMDUID_);
4525 tree n1, n2;
4526
4527 if (safelen)
4528 {
4529 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4530 if (TREE_CODE (safelen) != INTEGER_CST)
4531 safelen_int = 0;
4532 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4533 safelen_int = tree_to_uhwi (safelen);
4534 if (safelen_int == 1)
4535 safelen_int = 0;
4536 }
4537 type = TREE_TYPE (fd->loop.v);
4538 entry_bb = region->entry;
4539 cont_bb = region->cont;
4540 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4541 gcc_assert (broken_loop
4542 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4543 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4544 if (!broken_loop)
4545 {
4546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4547 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4548 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4549 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4550 }
4551 else
4552 {
4553 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4554 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4555 l2_bb = single_succ (l1_bb);
4556 }
4557 exit_bb = region->exit;
4558 l2_dom_bb = NULL;
4559
4560 gsi = gsi_last_bb (entry_bb);
4561
4562 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4563 /* Not needed in SSA form right now. */
4564 gcc_assert (!gimple_in_ssa_p (cfun));
4565 if (fd->collapse > 1)
4566 {
4567 int first_zero_iter = -1, dummy = -1;
4568 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4569
4570 counts = XALLOCAVEC (tree, fd->collapse);
4571 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4572 zero_iter_bb, first_zero_iter,
4573 dummy_bb, dummy, l2_dom_bb);
4574 }
4575 if (l2_dom_bb == NULL)
4576 l2_dom_bb = l1_bb;
4577
4578 n1 = fd->loop.n1;
4579 n2 = fd->loop.n2;
4580 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4581 {
4582 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4583 OMP_CLAUSE__LOOPTEMP_);
4584 gcc_assert (innerc);
4585 n1 = OMP_CLAUSE_DECL (innerc);
4586 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4587 OMP_CLAUSE__LOOPTEMP_);
4588 gcc_assert (innerc);
4589 n2 = OMP_CLAUSE_DECL (innerc);
4590 }
4591 tree step = fd->loop.step;
4592
4593 bool is_simt = (safelen_int > 1
4594 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4595 OMP_CLAUSE__SIMT_));
4596 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4597 if (is_simt)
4598 {
4599 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4600 simt_lane = create_tmp_var (unsigned_type_node);
4601 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4602 gimple_call_set_lhs (g, simt_lane);
4603 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4604 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4605 fold_convert (TREE_TYPE (step), simt_lane));
4606 n1 = fold_convert (type, n1);
4607 if (POINTER_TYPE_P (type))
4608 n1 = fold_build_pointer_plus (n1, offset);
4609 else
4610 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4611
4612 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4613 if (fd->collapse > 1)
4614 simt_maxlane = build_one_cst (unsigned_type_node);
4615 else if (safelen_int < omp_max_simt_vf ())
4616 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4617 tree vf
4618 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4619 unsigned_type_node, 0);
4620 if (simt_maxlane)
4621 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4622 vf = fold_convert (TREE_TYPE (step), vf);
4623 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4624 }
4625
4626 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4627 if (fd->collapse > 1)
4628 {
4629 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4630 {
4631 gsi_prev (&gsi);
4632 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4633 gsi_next (&gsi);
4634 }
4635 else
4636 for (i = 0; i < fd->collapse; i++)
4637 {
4638 tree itype = TREE_TYPE (fd->loops[i].v);
4639 if (POINTER_TYPE_P (itype))
4640 itype = signed_type_for (itype);
4641 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4642 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4643 }
4644 }
4645
4646 /* Remove the GIMPLE_OMP_FOR statement. */
4647 gsi_remove (&gsi, true);
4648
4649 if (!broken_loop)
4650 {
4651 /* Code to control the increment goes in the CONT_BB. */
4652 gsi = gsi_last_bb (cont_bb);
4653 stmt = gsi_stmt (gsi);
4654 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4655
4656 if (POINTER_TYPE_P (type))
4657 t = fold_build_pointer_plus (fd->loop.v, step);
4658 else
4659 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4660 expand_omp_build_assign (&gsi, fd->loop.v, t);
4661
4662 if (fd->collapse > 1)
4663 {
4664 i = fd->collapse - 1;
4665 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4666 {
4667 t = fold_convert (sizetype, fd->loops[i].step);
4668 t = fold_build_pointer_plus (fd->loops[i].v, t);
4669 }
4670 else
4671 {
4672 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4673 fd->loops[i].step);
4674 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4675 fd->loops[i].v, t);
4676 }
4677 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4678
4679 for (i = fd->collapse - 1; i > 0; i--)
4680 {
4681 tree itype = TREE_TYPE (fd->loops[i].v);
4682 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4683 if (POINTER_TYPE_P (itype2))
4684 itype2 = signed_type_for (itype2);
4685 t = build3 (COND_EXPR, itype2,
4686 build2 (fd->loops[i].cond_code, boolean_type_node,
4687 fd->loops[i].v,
4688 fold_convert (itype, fd->loops[i].n2)),
4689 build_int_cst (itype2, 0),
4690 fold_convert (itype2, fd->loops[i - 1].step));
4691 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4692 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4693 else
4694 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4695 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4696
4697 t = build3 (COND_EXPR, itype,
4698 build2 (fd->loops[i].cond_code, boolean_type_node,
4699 fd->loops[i].v,
4700 fold_convert (itype, fd->loops[i].n2)),
4701 fd->loops[i].v,
4702 fold_convert (itype, fd->loops[i].n1));
4703 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4704 }
4705 }
4706
4707 /* Remove GIMPLE_OMP_CONTINUE. */
4708 gsi_remove (&gsi, true);
4709 }
4710
4711 /* Emit the condition in L1_BB. */
4712 gsi = gsi_start_bb (l1_bb);
4713
4714 t = fold_convert (type, n2);
4715 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4716 false, GSI_CONTINUE_LINKING);
4717 tree v = fd->loop.v;
4718 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4719 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4720 false, GSI_CONTINUE_LINKING);
4721 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4722 cond_stmt = gimple_build_cond_empty (t);
4723 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4724 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4725 NULL, NULL)
4726 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4727 NULL, NULL))
4728 {
4729 gsi = gsi_for_stmt (cond_stmt);
4730 gimple_regimplify_operands (cond_stmt, &gsi);
4731 }
4732
4733 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4734 if (is_simt)
4735 {
4736 gsi = gsi_start_bb (l2_bb);
4737 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4738 if (POINTER_TYPE_P (type))
4739 t = fold_build_pointer_plus (fd->loop.v, step);
4740 else
4741 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4742 expand_omp_build_assign (&gsi, fd->loop.v, t);
4743 }
4744
4745 /* Remove GIMPLE_OMP_RETURN. */
4746 gsi = gsi_last_bb (exit_bb);
4747 gsi_remove (&gsi, true);
4748
4749 /* Connect the new blocks. */
4750 remove_edge (FALLTHRU_EDGE (entry_bb));
4751
4752 if (!broken_loop)
4753 {
4754 remove_edge (BRANCH_EDGE (entry_bb));
4755 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4756
4757 e = BRANCH_EDGE (l1_bb);
4758 ne = FALLTHRU_EDGE (l1_bb);
4759 e->flags = EDGE_TRUE_VALUE;
4760 }
4761 else
4762 {
4763 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4764
4765 ne = single_succ_edge (l1_bb);
4766 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4767
4768 }
4769 ne->flags = EDGE_FALSE_VALUE;
4770 e->probability = REG_BR_PROB_BASE * 7 / 8;
4771 ne->probability = REG_BR_PROB_BASE / 8;
4772
4773 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4774 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4775
4776 if (simt_maxlane)
4777 {
4778 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4779 NULL_TREE, NULL_TREE);
4780 gsi = gsi_last_bb (entry_bb);
4781 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4782 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4783 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4784 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4785 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4786 l2_dom_bb = entry_bb;
4787 }
4788 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4789
4790 if (!broken_loop)
4791 {
4792 struct loop *loop = alloc_loop ();
4793 loop->header = l1_bb;
4794 loop->latch = cont_bb;
4795 add_loop (loop, l1_bb->loop_father);
4796 loop->safelen = safelen_int;
4797 if (simduid)
4798 {
4799 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4800 cfun->has_simduid_loops = true;
4801 }
4802 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4803 the loop. */
4804 if ((flag_tree_loop_vectorize
4805 || (!global_options_set.x_flag_tree_loop_vectorize
7c6746c9 4806 && !global_options_set.x_flag_tree_vectorize))
4954efd4 4807 && flag_tree_loop_optimize
4808 && loop->safelen > 1)
4809 {
4810 loop->force_vectorize = true;
4811 cfun->has_force_vectorize_loops = true;
4812 }
4813 }
4814 else if (simduid)
4815 cfun->has_simduid_loops = true;
4816}
4817
4818/* Taskloop construct is represented after gimplification with
4819 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4820 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4821 which should just compute all the needed loop temporaries
4822 for GIMPLE_OMP_TASK. */
4823
4824static void
4825expand_omp_taskloop_for_outer (struct omp_region *region,
4826 struct omp_for_data *fd,
4827 gimple *inner_stmt)
4828{
4829 tree type, bias = NULL_TREE;
4830 basic_block entry_bb, cont_bb, exit_bb;
4831 gimple_stmt_iterator gsi;
4832 gassign *assign_stmt;
4833 tree *counts = NULL;
4834 int i;
4835
4836 gcc_assert (inner_stmt);
4837 gcc_assert (region->cont);
4838 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4839 && gimple_omp_task_taskloop_p (inner_stmt));
4840 type = TREE_TYPE (fd->loop.v);
4841
4842 /* See if we need to bias by LLONG_MIN. */
4843 if (fd->iter_type == long_long_unsigned_type_node
4844 && TREE_CODE (type) == INTEGER_TYPE
4845 && !TYPE_UNSIGNED (type))
4846 {
4847 tree n1, n2;
4848
4849 if (fd->loop.cond_code == LT_EXPR)
4850 {
4851 n1 = fd->loop.n1;
4852 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4853 }
4854 else
4855 {
4856 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4857 n2 = fd->loop.n1;
4858 }
4859 if (TREE_CODE (n1) != INTEGER_CST
4860 || TREE_CODE (n2) != INTEGER_CST
4861 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4862 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4863 }
4864
4865 entry_bb = region->entry;
4866 cont_bb = region->cont;
4867 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4868 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4869 exit_bb = region->exit;
4870
4871 gsi = gsi_last_bb (entry_bb);
4872 gimple *for_stmt = gsi_stmt (gsi);
4873 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4874 if (fd->collapse > 1)
4875 {
4876 int first_zero_iter = -1, dummy = -1;
4877 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4878
4879 counts = XALLOCAVEC (tree, fd->collapse);
4880 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4881 zero_iter_bb, first_zero_iter,
4882 dummy_bb, dummy, l2_dom_bb);
4883
4884 if (zero_iter_bb)
4885 {
4886 /* Some counts[i] vars might be uninitialized if
4887 some loop has zero iterations. But the body shouldn't
4888 be executed in that case, so just avoid uninit warnings. */
4889 for (i = first_zero_iter; i < fd->collapse; i++)
4890 if (SSA_VAR_P (counts[i]))
4891 TREE_NO_WARNING (counts[i]) = 1;
4892 gsi_prev (&gsi);
4893 edge e = split_block (entry_bb, gsi_stmt (gsi));
4894 entry_bb = e->dest;
4895 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4896 gsi = gsi_last_bb (entry_bb);
4897 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4898 get_immediate_dominator (CDI_DOMINATORS,
4899 zero_iter_bb));
4900 }
4901 }
4902
4903 tree t0, t1;
4904 t1 = fd->loop.n2;
4905 t0 = fd->loop.n1;
4906 if (POINTER_TYPE_P (TREE_TYPE (t0))
4907 && TYPE_PRECISION (TREE_TYPE (t0))
4908 != TYPE_PRECISION (fd->iter_type))
4909 {
4910 /* Avoid casting pointers to integer of a different size. */
4911 tree itype = signed_type_for (type);
4912 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4913 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4914 }
4915 else
4916 {
4917 t1 = fold_convert (fd->iter_type, t1);
4918 t0 = fold_convert (fd->iter_type, t0);
4919 }
4920 if (bias)
4921 {
4922 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4923 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4924 }
4925
4926 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4927 OMP_CLAUSE__LOOPTEMP_);
4928 gcc_assert (innerc);
4929 tree startvar = OMP_CLAUSE_DECL (innerc);
4930 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4931 gcc_assert (innerc);
4932 tree endvar = OMP_CLAUSE_DECL (innerc);
4933 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4934 {
4935 gcc_assert (innerc);
4936 for (i = 1; i < fd->collapse; i++)
4937 {
4938 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4939 OMP_CLAUSE__LOOPTEMP_);
4940 gcc_assert (innerc);
4941 }
4942 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4943 OMP_CLAUSE__LOOPTEMP_);
4944 if (innerc)
4945 {
4946 /* If needed (inner taskloop has lastprivate clause), propagate
4947 down the total number of iterations. */
4948 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4949 NULL_TREE, false,
4950 GSI_CONTINUE_LINKING);
4951 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4952 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4953 }
4954 }
4955
4956 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4957 GSI_CONTINUE_LINKING);
4958 assign_stmt = gimple_build_assign (startvar, t0);
4959 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4960
4961 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4962 GSI_CONTINUE_LINKING);
4963 assign_stmt = gimple_build_assign (endvar, t1);
4964 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4965 if (fd->collapse > 1)
4966 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4967
4968 /* Remove the GIMPLE_OMP_FOR statement. */
4969 gsi = gsi_for_stmt (for_stmt);
4970 gsi_remove (&gsi, true);
4971
4972 gsi = gsi_last_bb (cont_bb);
4973 gsi_remove (&gsi, true);
4974
4975 gsi = gsi_last_bb (exit_bb);
4976 gsi_remove (&gsi, true);
4977
4978 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
4979 remove_edge (BRANCH_EDGE (entry_bb));
4980 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
4981 remove_edge (BRANCH_EDGE (cont_bb));
4982 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4983 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4984 recompute_dominator (CDI_DOMINATORS, region->entry));
4985}
4986
4987/* Taskloop construct is represented after gimplification with
4988 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4989 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4990 GOMP_taskloop{,_ull} function arranges for each task to be given just
4991 a single range of iterations. */
4992
4993static void
4994expand_omp_taskloop_for_inner (struct omp_region *region,
4995 struct omp_for_data *fd,
4996 gimple *inner_stmt)
4997{
4998 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4999 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5000 basic_block fin_bb;
5001 gimple_stmt_iterator gsi;
5002 edge ep;
5003 bool broken_loop = region->cont == NULL;
5004 tree *counts = NULL;
5005 tree n1, n2, step;
5006
5007 itype = type = TREE_TYPE (fd->loop.v);
5008 if (POINTER_TYPE_P (type))
5009 itype = signed_type_for (type);
5010
5011 /* See if we need to bias by LLONG_MIN. */
5012 if (fd->iter_type == long_long_unsigned_type_node
5013 && TREE_CODE (type) == INTEGER_TYPE
5014 && !TYPE_UNSIGNED (type))
5015 {
5016 tree n1, n2;
5017
5018 if (fd->loop.cond_code == LT_EXPR)
5019 {
5020 n1 = fd->loop.n1;
5021 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5022 }
5023 else
5024 {
5025 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5026 n2 = fd->loop.n1;
5027 }
5028 if (TREE_CODE (n1) != INTEGER_CST
5029 || TREE_CODE (n2) != INTEGER_CST
5030 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5031 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5032 }
5033
5034 entry_bb = region->entry;
5035 cont_bb = region->cont;
5036 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5037 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5038 gcc_assert (broken_loop
5039 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5040 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5041 if (!broken_loop)
5042 {
5043 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5044 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5045 }
5046 exit_bb = region->exit;
5047
5048 /* Iteration space partitioning goes in ENTRY_BB. */
5049 gsi = gsi_last_bb (entry_bb);
5050 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5051
5052 if (fd->collapse > 1)
5053 {
5054 int first_zero_iter = -1, dummy = -1;
5055 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5056
5057 counts = XALLOCAVEC (tree, fd->collapse);
5058 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5059 fin_bb, first_zero_iter,
5060 dummy_bb, dummy, l2_dom_bb);
5061 t = NULL_TREE;
5062 }
5063 else
5064 t = integer_one_node;
5065
5066 step = fd->loop.step;
5067 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5068 OMP_CLAUSE__LOOPTEMP_);
5069 gcc_assert (innerc);
5070 n1 = OMP_CLAUSE_DECL (innerc);
5071 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5072 gcc_assert (innerc);
5073 n2 = OMP_CLAUSE_DECL (innerc);
5074 if (bias)
5075 {
5076 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5077 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5078 }
5079 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5080 true, NULL_TREE, true, GSI_SAME_STMT);
5081 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5082 true, NULL_TREE, true, GSI_SAME_STMT);
5083 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5084 true, NULL_TREE, true, GSI_SAME_STMT);
5085
5086 tree startvar = fd->loop.v;
5087 tree endvar = NULL_TREE;
5088
5089 if (gimple_omp_for_combined_p (fd->for_stmt))
5090 {
5091 tree clauses = gimple_omp_for_clauses (inner_stmt);
5092 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5093 gcc_assert (innerc);
5094 startvar = OMP_CLAUSE_DECL (innerc);
5095 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5096 OMP_CLAUSE__LOOPTEMP_);
5097 gcc_assert (innerc);
5098 endvar = OMP_CLAUSE_DECL (innerc);
5099 }
5100 t = fold_convert (TREE_TYPE (startvar), n1);
5101 t = force_gimple_operand_gsi (&gsi, t,
5102 DECL_P (startvar)
5103 && TREE_ADDRESSABLE (startvar),
5104 NULL_TREE, false, GSI_CONTINUE_LINKING);
5105 gimple *assign_stmt = gimple_build_assign (startvar, t);
5106 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5107
5108 t = fold_convert (TREE_TYPE (startvar), n2);
5109 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5110 false, GSI_CONTINUE_LINKING);
5111 if (endvar)
5112 {
5113 assign_stmt = gimple_build_assign (endvar, e);
5114 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5115 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5116 assign_stmt = gimple_build_assign (fd->loop.v, e);
5117 else
5118 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5119 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5120 }
5121 if (fd->collapse > 1)
5122 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5123
5124 if (!broken_loop)
5125 {
5126 /* The code controlling the sequential loop replaces the
5127 GIMPLE_OMP_CONTINUE. */
5128 gsi = gsi_last_bb (cont_bb);
5129 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5130 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5131 vmain = gimple_omp_continue_control_use (cont_stmt);
5132 vback = gimple_omp_continue_control_def (cont_stmt);
5133
5134 if (!gimple_omp_for_combined_p (fd->for_stmt))
5135 {
5136 if (POINTER_TYPE_P (type))
5137 t = fold_build_pointer_plus (vmain, step);
5138 else
5139 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5140 t = force_gimple_operand_gsi (&gsi, t,
5141 DECL_P (vback)
5142 && TREE_ADDRESSABLE (vback),
5143 NULL_TREE, true, GSI_SAME_STMT);
5144 assign_stmt = gimple_build_assign (vback, t);
5145 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5146
5147 t = build2 (fd->loop.cond_code, boolean_type_node,
5148 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5149 ? t : vback, e);
5150 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5151 }
5152
5153 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5154 gsi_remove (&gsi, true);
5155
5156 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5157 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5158 }
5159
5160 /* Remove the GIMPLE_OMP_FOR statement. */
5161 gsi = gsi_for_stmt (fd->for_stmt);
5162 gsi_remove (&gsi, true);
5163
5164 /* Remove the GIMPLE_OMP_RETURN statement. */
5165 gsi = gsi_last_bb (exit_bb);
5166 gsi_remove (&gsi, true);
5167
5168 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5169 if (!broken_loop)
5170 remove_edge (BRANCH_EDGE (entry_bb));
5171 else
5172 {
5173 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5174 region->outer->cont = NULL;
5175 }
5176
5177 /* Connect all the blocks. */
5178 if (!broken_loop)
5179 {
5180 ep = find_edge (cont_bb, body_bb);
5181 if (gimple_omp_for_combined_p (fd->for_stmt))
5182 {
5183 remove_edge (ep);
5184 ep = NULL;
5185 }
5186 else if (fd->collapse > 1)
5187 {
5188 remove_edge (ep);
5189 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5190 }
5191 else
5192 ep->flags = EDGE_TRUE_VALUE;
5193 find_edge (cont_bb, fin_bb)->flags
5194 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5195 }
5196
5197 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5198 recompute_dominator (CDI_DOMINATORS, body_bb));
5199 if (!broken_loop)
5200 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5201 recompute_dominator (CDI_DOMINATORS, fin_bb));
5202
5203 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5204 {
5205 struct loop *loop = alloc_loop ();
5206 loop->header = body_bb;
5207 if (collapse_bb == NULL)
5208 loop->latch = cont_bb;
5209 add_loop (loop, body_bb->loop_father);
5210 }
5211}
5212
5213/* A subroutine of expand_omp_for. Generate code for an OpenACC
5214 partitioned loop. The lowering here is abstracted, in that the
5215 loop parameters are passed through internal functions, which are
5216 further lowered by oacc_device_lower, once we get to the target
5217 compiler. The loop is of the form:
5218
5219 for (V = B; V LTGT E; V += S) {BODY}
5220
5221 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5222 (constant 0 for no chunking) and we will have a GWV partitioning
5223 mask, specifying dimensions over which the loop is to be
5224 partitioned (see note below). We generate code that looks like:
5225
5226 <entry_bb> [incoming FALL->body, BRANCH->exit]
5227 typedef signedintify (typeof (V)) T; // underlying signed integral type
5228 T range = E - B;
5229 T chunk_no = 0;
5230 T DIR = LTGT == '<' ? +1 : -1;
5231 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5232 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5233
5234 <head_bb> [created by splitting end of entry_bb]
5235 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5236 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5237 if (!(offset LTGT bound)) goto bottom_bb;
5238
5239 <body_bb> [incoming]
5240 V = B + offset;
5241 {BODY}
5242
5243 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5244 offset += step;
5245 if (offset LTGT bound) goto body_bb; [*]
5246
5247 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5248 chunk_no++;
5249 if (chunk < chunk_max) goto head_bb;
5250
5251 <exit_bb> [incoming]
5252 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5253
5254 [*] Needed if V live at end of loop
5255
5256 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5257 transition, and will be specified by a more general mechanism shortly.
5258 */
5259
5260static void
5261expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5262{
5263 tree v = fd->loop.v;
5264 enum tree_code cond_code = fd->loop.cond_code;
5265 enum tree_code plus_code = PLUS_EXPR;
5266
5267 tree chunk_size = integer_minus_one_node;
5268 tree gwv = integer_zero_node;
5269 tree iter_type = TREE_TYPE (v);
5270 tree diff_type = iter_type;
5271 tree plus_type = iter_type;
5272 struct oacc_collapse *counts = NULL;
5273
5274 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5275 == GF_OMP_FOR_KIND_OACC_LOOP);
5276 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5277 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5278
5279 if (POINTER_TYPE_P (iter_type))
5280 {
5281 plus_code = POINTER_PLUS_EXPR;
5282 plus_type = sizetype;
5283 }
5284 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5285 diff_type = signed_type_for (diff_type);
5286
5287 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5288 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5289 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5290 basic_block bottom_bb = NULL;
5291
5292 /* entry_bb has two sucessors; the branch edge is to the exit
5293 block, fallthrough edge to body. */
5294 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5295 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5296
5297 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5298 body_bb, or to a block whose only successor is the body_bb. Its
5299 fallthrough successor is the final block (same as the branch
5300 successor of the entry_bb). */
5301 if (cont_bb)
5302 {
5303 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5304 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5305
5306 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5307 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5308 }
5309 else
5310 gcc_assert (!gimple_in_ssa_p (cfun));
5311
5312 /* The exit block only has entry_bb and cont_bb as predecessors. */
5313 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5314
5315 tree chunk_no;
5316 tree chunk_max = NULL_TREE;
5317 tree bound, offset;
5318 tree step = create_tmp_var (diff_type, ".step");
5319 bool up = cond_code == LT_EXPR;
5320 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5321 bool chunking = !gimple_in_ssa_p (cfun);;
5322 bool negating;
5323
5324 /* SSA instances. */
5325 tree offset_incr = NULL_TREE;
5326 tree offset_init = NULL_TREE;
5327
5328 gimple_stmt_iterator gsi;
5329 gassign *ass;
5330 gcall *call;
5331 gimple *stmt;
5332 tree expr;
5333 location_t loc;
5334 edge split, be, fte;
5335
5336 /* Split the end of entry_bb to create head_bb. */
5337 split = split_block (entry_bb, last_stmt (entry_bb));
5338 basic_block head_bb = split->dest;
5339 entry_bb = split->src;
5340
5341 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5342 gsi = gsi_last_bb (entry_bb);
5343 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5344 loc = gimple_location (for_stmt);
5345
5346 if (gimple_in_ssa_p (cfun))
5347 {
5348 offset_init = gimple_omp_for_index (for_stmt, 0);
5349 gcc_assert (integer_zerop (fd->loop.n1));
5350 /* The SSA parallelizer does gang parallelism. */
5351 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5352 }
5353
5354 if (fd->collapse > 1)
5355 {
5356 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5357 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5358 TREE_TYPE (fd->loop.n2));
5359
5360 if (SSA_VAR_P (fd->loop.n2))
5361 {
5362 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5363 true, GSI_SAME_STMT);
5364 ass = gimple_build_assign (fd->loop.n2, total);
5365 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5366 }
5367
5368 }
5369
5370 tree b = fd->loop.n1;
5371 tree e = fd->loop.n2;
5372 tree s = fd->loop.step;
5373
5374 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5375 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5376
7c6746c9 5377 /* Convert the step, avoiding possible unsigned->signed overflow. */
4954efd4 5378 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5379 if (negating)
5380 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5381 s = fold_convert (diff_type, s);
5382 if (negating)
5383 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5384 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5385
5386 if (!chunking)
5387 chunk_size = integer_zero_node;
5388 expr = fold_convert (diff_type, chunk_size);
5389 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5390 NULL_TREE, true, GSI_SAME_STMT);
7c6746c9 5391 /* Determine the range, avoiding possible unsigned->signed overflow. */
4954efd4 5392 negating = !up && TYPE_UNSIGNED (iter_type);
5393 expr = fold_build2 (MINUS_EXPR, plus_type,
5394 fold_convert (plus_type, negating ? b : e),
5395 fold_convert (plus_type, negating ? e : b));
5396 expr = fold_convert (diff_type, expr);
5397 if (negating)
5398 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5399 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5400 NULL_TREE, true, GSI_SAME_STMT);
5401
5402 chunk_no = build_int_cst (diff_type, 0);
5403 if (chunking)
5404 {
5405 gcc_assert (!gimple_in_ssa_p (cfun));
5406
5407 expr = chunk_no;
5408 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5409 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5410
5411 ass = gimple_build_assign (chunk_no, expr);
5412 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5413
5414 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5415 build_int_cst (integer_type_node,
5416 IFN_GOACC_LOOP_CHUNKS),
5417 dir, range, s, chunk_size, gwv);
5418 gimple_call_set_lhs (call, chunk_max);
5419 gimple_set_location (call, loc);
5420 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5421 }
5422 else
5423 chunk_size = chunk_no;
5424
5425 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5426 build_int_cst (integer_type_node,
5427 IFN_GOACC_LOOP_STEP),
5428 dir, range, s, chunk_size, gwv);
5429 gimple_call_set_lhs (call, step);
5430 gimple_set_location (call, loc);
5431 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5432
5433 /* Remove the GIMPLE_OMP_FOR. */
5434 gsi_remove (&gsi, true);
5435
7c6746c9 5436 /* Fixup edges from head_bb. */
4954efd4 5437 be = BRANCH_EDGE (head_bb);
5438 fte = FALLTHRU_EDGE (head_bb);
5439 be->flags |= EDGE_FALSE_VALUE;
5440 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5441
5442 basic_block body_bb = fte->dest;
5443
5444 if (gimple_in_ssa_p (cfun))
5445 {
5446 gsi = gsi_last_bb (cont_bb);
5447 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5448
5449 offset = gimple_omp_continue_control_use (cont_stmt);
5450 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5451 }
5452 else
5453 {
5454 offset = create_tmp_var (diff_type, ".offset");
5455 offset_init = offset_incr = offset;
5456 }
5457 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5458
5459 /* Loop offset & bound go into head_bb. */
5460 gsi = gsi_start_bb (head_bb);
5461
5462 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5463 build_int_cst (integer_type_node,
5464 IFN_GOACC_LOOP_OFFSET),
5465 dir, range, s,
5466 chunk_size, gwv, chunk_no);
5467 gimple_call_set_lhs (call, offset_init);
5468 gimple_set_location (call, loc);
5469 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5470
5471 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5472 build_int_cst (integer_type_node,
5473 IFN_GOACC_LOOP_BOUND),
5474 dir, range, s,
5475 chunk_size, gwv, offset_init);
5476 gimple_call_set_lhs (call, bound);
5477 gimple_set_location (call, loc);
5478 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5479
5480 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5481 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5482 GSI_CONTINUE_LINKING);
5483
5484 /* V assignment goes into body_bb. */
5485 if (!gimple_in_ssa_p (cfun))
5486 {
5487 gsi = gsi_start_bb (body_bb);
5488
5489 expr = build2 (plus_code, iter_type, b,
5490 fold_convert (plus_type, offset));
5491 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5492 true, GSI_SAME_STMT);
5493 ass = gimple_build_assign (v, expr);
5494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5495 if (fd->collapse > 1)
5496 expand_oacc_collapse_vars (fd, &gsi, counts, v);
5497 }
5498
5499 /* Loop increment goes into cont_bb. If this is not a loop, we
5500 will have spawned threads as if it was, and each one will
5501 execute one iteration. The specification is not explicit about
5502 whether such constructs are ill-formed or not, and they can
5503 occur, especially when noreturn routines are involved. */
5504 if (cont_bb)
5505 {
5506 gsi = gsi_last_bb (cont_bb);
5507 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5508 loc = gimple_location (cont_stmt);
5509
5510 /* Increment offset. */
5511 if (gimple_in_ssa_p (cfun))
5512 expr= build2 (plus_code, iter_type, offset,
5513 fold_convert (plus_type, step));
5514 else
5515 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5516 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5517 true, GSI_SAME_STMT);
5518 ass = gimple_build_assign (offset_incr, expr);
5519 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5520 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5521 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5522
5523 /* Remove the GIMPLE_OMP_CONTINUE. */
5524 gsi_remove (&gsi, true);
5525
7c6746c9 5526 /* Fixup edges from cont_bb. */
4954efd4 5527 be = BRANCH_EDGE (cont_bb);
5528 fte = FALLTHRU_EDGE (cont_bb);
5529 be->flags |= EDGE_TRUE_VALUE;
5530 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5531
5532 if (chunking)
5533 {
5534 /* Split the beginning of exit_bb to make bottom_bb. We
5535 need to insert a nop at the start, because splitting is
7c6746c9 5536 after a stmt, not before. */
4954efd4 5537 gsi = gsi_start_bb (exit_bb);
5538 stmt = gimple_build_nop ();
5539 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5540 split = split_block (exit_bb, stmt);
5541 bottom_bb = split->src;
5542 exit_bb = split->dest;
5543 gsi = gsi_last_bb (bottom_bb);
5544
5545 /* Chunk increment and test goes into bottom_bb. */
5546 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5547 build_int_cst (diff_type, 1));
5548 ass = gimple_build_assign (chunk_no, expr);
5549 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5550
5551 /* Chunk test at end of bottom_bb. */
5552 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5553 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5554 GSI_CONTINUE_LINKING);
5555
7c6746c9 5556 /* Fixup edges from bottom_bb. */
4954efd4 5557 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5558 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5559 }
5560 }
5561
5562 gsi = gsi_last_bb (exit_bb);
5563 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5564 loc = gimple_location (gsi_stmt (gsi));
5565
5566 if (!gimple_in_ssa_p (cfun))
5567 {
5568 /* Insert the final value of V, in case it is live. This is the
5569 value for the only thread that survives past the join. */
5570 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5571 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5572 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5573 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5574 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5575 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5576 true, GSI_SAME_STMT);
5577 ass = gimple_build_assign (v, expr);
5578 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5579 }
5580
7c6746c9 5581 /* Remove the OMP_RETURN. */
4954efd4 5582 gsi_remove (&gsi, true);
5583
5584 if (cont_bb)
5585 {
5586 /* We now have one or two nested loops. Update the loop
5587 structures. */
5588 struct loop *parent = entry_bb->loop_father;
5589 struct loop *body = body_bb->loop_father;
5590
5591 if (chunking)
5592 {
5593 struct loop *chunk_loop = alloc_loop ();
5594 chunk_loop->header = head_bb;
5595 chunk_loop->latch = bottom_bb;
5596 add_loop (chunk_loop, parent);
5597 parent = chunk_loop;
5598 }
5599 else if (parent != body)
5600 {
5601 gcc_assert (body->header == body_bb);
5602 gcc_assert (body->latch == cont_bb
5603 || single_pred (body->latch) == cont_bb);
5604 parent = NULL;
5605 }
5606
5607 if (parent)
5608 {
5609 struct loop *body_loop = alloc_loop ();
5610 body_loop->header = body_bb;
5611 body_loop->latch = cont_bb;
5612 add_loop (body_loop, parent);
5613 }
5614 }
5615}
5616
5617/* Expand the OMP loop defined by REGION. */
5618
5619static void
5620expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5621{
5622 struct omp_for_data fd;
5623 struct omp_for_data_loop *loops;
5624
5625 loops
5626 = (struct omp_for_data_loop *)
5627 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5628 * sizeof (struct omp_for_data_loop));
5629 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5630 &fd, loops);
5631 region->sched_kind = fd.sched_kind;
5632 region->sched_modifiers = fd.sched_modifiers;
5633
5634 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5635 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5636 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5637 if (region->cont)
5638 {
5639 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5640 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5641 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5642 }
5643 else
5644 /* If there isn't a continue then this is a degerate case where
5645 the introduction of abnormal edges during lowering will prevent
5646 original loops from being detected. Fix that up. */
5647 loops_state_set (LOOPS_NEED_FIXUP);
5648
5649 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5650 expand_omp_simd (region, &fd);
5651 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5652 expand_cilk_for (region, &fd);
5653 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5654 {
5655 gcc_assert (!inner_stmt);
5656 expand_oacc_for (region, &fd);
5657 }
5658 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5659 {
5660 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5661 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5662 else
5663 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5664 }
5665 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5666 && !fd.have_ordered)
5667 {
5668 if (fd.chunk_size == NULL)
5669 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5670 else
5671 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5672 }
5673 else
5674 {
5675 int fn_index, start_ix, next_ix;
5676
5677 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5678 == GF_OMP_FOR_KIND_FOR);
5679 if (fd.chunk_size == NULL
5680 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5681 fd.chunk_size = integer_zero_node;
5682 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5683 switch (fd.sched_kind)
5684 {
5685 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5686 fn_index = 3;
5687 break;
5688 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5689 case OMP_CLAUSE_SCHEDULE_GUIDED:
5690 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5691 && !fd.ordered
5692 && !fd.have_ordered)
5693 {
5694 fn_index = 3 + fd.sched_kind;
5695 break;
5696 }
5697 /* FALLTHRU */
5698 default:
5699 fn_index = fd.sched_kind;
5700 break;
5701 }
5702 if (!fd.ordered)
5703 fn_index += fd.have_ordered * 6;
5704 if (fd.ordered)
5705 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5706 else
5707 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5708 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5709 if (fd.iter_type == long_long_unsigned_type_node)
5710 {
5711 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5712 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5713 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5714 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5715 }
5716 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5717 (enum built_in_function) next_ix, inner_stmt);
5718 }
5719
5720 if (gimple_in_ssa_p (cfun))
5721 update_ssa (TODO_update_ssa_only_virtuals);
5722}
5723
5724/* Expand code for an OpenMP sections directive. In pseudo code, we generate
5725
5726 v = GOMP_sections_start (n);
5727 L0:
5728 switch (v)
5729 {
5730 case 0:
5731 goto L2;
5732 case 1:
5733 section 1;
5734 goto L1;
5735 case 2:
5736 ...
5737 case n:
5738 ...
5739 default:
5740 abort ();
5741 }
5742 L1:
5743 v = GOMP_sections_next ();
5744 goto L0;
5745 L2:
5746 reduction;
5747
5748 If this is a combined parallel sections, replace the call to
5749 GOMP_sections_start with call to GOMP_sections_next. */
5750
5751static void
5752expand_omp_sections (struct omp_region *region)
5753{
5754 tree t, u, vin = NULL, vmain, vnext, l2;
5755 unsigned len;
5756 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5757 gimple_stmt_iterator si, switch_si;
5758 gomp_sections *sections_stmt;
5759 gimple *stmt;
5760 gomp_continue *cont;
5761 edge_iterator ei;
5762 edge e;
5763 struct omp_region *inner;
5764 unsigned i, casei;
5765 bool exit_reachable = region->cont != NULL;
5766
5767 gcc_assert (region->exit != NULL);
5768 entry_bb = region->entry;
5769 l0_bb = single_succ (entry_bb);
5770 l1_bb = region->cont;
5771 l2_bb = region->exit;
5772 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5773 l2 = gimple_block_label (l2_bb);
5774 else
5775 {
5776 /* This can happen if there are reductions. */
5777 len = EDGE_COUNT (l0_bb->succs);
5778 gcc_assert (len > 0);
5779 e = EDGE_SUCC (l0_bb, len - 1);
5780 si = gsi_last_bb (e->dest);
5781 l2 = NULL_TREE;
5782 if (gsi_end_p (si)
7c6746c9 5783 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
4954efd4 5784 l2 = gimple_block_label (e->dest);
5785 else
5786 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5787 {
5788 si = gsi_last_bb (e->dest);
5789 if (gsi_end_p (si)
5790 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5791 {
5792 l2 = gimple_block_label (e->dest);
5793 break;
5794 }
5795 }
5796 }
5797 if (exit_reachable)
5798 default_bb = create_empty_bb (l1_bb->prev_bb);
5799 else
5800 default_bb = create_empty_bb (l0_bb);
5801
5802 /* We will build a switch() with enough cases for all the
5803 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5804 and a default case to abort if something goes wrong. */
5805 len = EDGE_COUNT (l0_bb->succs);
5806
5807 /* Use vec::quick_push on label_vec throughout, since we know the size
5808 in advance. */
5809 auto_vec<tree> label_vec (len);
5810
5811 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5812 GIMPLE_OMP_SECTIONS statement. */
5813 si = gsi_last_bb (entry_bb);
5814 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5815 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5816 vin = gimple_omp_sections_control (sections_stmt);
5817 if (!is_combined_parallel (region))
5818 {
5819 /* If we are not inside a combined parallel+sections region,
5820 call GOMP_sections_start. */
5821 t = build_int_cst (unsigned_type_node, len - 1);
5822 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5823 stmt = gimple_build_call (u, 1, t);
5824 }
5825 else
5826 {
5827 /* Otherwise, call GOMP_sections_next. */
5828 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5829 stmt = gimple_build_call (u, 0);
5830 }
5831 gimple_call_set_lhs (stmt, vin);
5832 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5833 gsi_remove (&si, true);
5834
5835 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5836 L0_BB. */
5837 switch_si = gsi_last_bb (l0_bb);
5838 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5839 if (exit_reachable)
5840 {
5841 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5842 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5843 vmain = gimple_omp_continue_control_use (cont);
5844 vnext = gimple_omp_continue_control_def (cont);
5845 }
5846 else
5847 {
5848 vmain = vin;
5849 vnext = NULL_TREE;
5850 }
5851
5852 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5853 label_vec.quick_push (t);
5854 i = 1;
5855
5856 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5857 for (inner = region->inner, casei = 1;
5858 inner;
5859 inner = inner->next, i++, casei++)
5860 {
5861 basic_block s_entry_bb, s_exit_bb;
5862
5863 /* Skip optional reduction region. */
5864 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5865 {
5866 --i;
5867 --casei;
5868 continue;
5869 }
5870
5871 s_entry_bb = inner->entry;
5872 s_exit_bb = inner->exit;
5873
5874 t = gimple_block_label (s_entry_bb);
5875 u = build_int_cst (unsigned_type_node, casei);
5876 u = build_case_label (u, NULL, t);
5877 label_vec.quick_push (u);
5878
5879 si = gsi_last_bb (s_entry_bb);
5880 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5881 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5882 gsi_remove (&si, true);
5883 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5884
5885 if (s_exit_bb == NULL)
5886 continue;
5887
5888 si = gsi_last_bb (s_exit_bb);
5889 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5890 gsi_remove (&si, true);
5891
5892 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5893 }
5894
5895 /* Error handling code goes in DEFAULT_BB. */
5896 t = gimple_block_label (default_bb);
5897 u = build_case_label (NULL, NULL, t);
5898 make_edge (l0_bb, default_bb, 0);
5899 add_bb_to_loop (default_bb, current_loops->tree_root);
5900
5901 stmt = gimple_build_switch (vmain, u, label_vec);
5902 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5903 gsi_remove (&switch_si, true);
5904
5905 si = gsi_start_bb (default_bb);
5906 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5907 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5908
5909 if (exit_reachable)
5910 {
5911 tree bfn_decl;
5912
5913 /* Code to get the next section goes in L1_BB. */
5914 si = gsi_last_bb (l1_bb);
5915 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5916
5917 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5918 stmt = gimple_build_call (bfn_decl, 0);
5919 gimple_call_set_lhs (stmt, vnext);
5920 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5921 gsi_remove (&si, true);
5922
5923 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5924 }
5925
5926 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5927 si = gsi_last_bb (l2_bb);
5928 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5929 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5930 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5931 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5932 else
5933 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5934 stmt = gimple_build_call (t, 0);
5935 if (gimple_omp_return_lhs (gsi_stmt (si)))
5936 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5937 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5938 gsi_remove (&si, true);
5939
5940 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5941}
5942
5943/* Expand code for an OpenMP single directive. We've already expanded
5944 much of the code, here we simply place the GOMP_barrier call. */
5945
5946static void
5947expand_omp_single (struct omp_region *region)
5948{
5949 basic_block entry_bb, exit_bb;
5950 gimple_stmt_iterator si;
5951
5952 entry_bb = region->entry;
5953 exit_bb = region->exit;
5954
5955 si = gsi_last_bb (entry_bb);
5956 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5957 gsi_remove (&si, true);
5958 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5959
5960 si = gsi_last_bb (exit_bb);
5961 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5962 {
5963 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5964 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5965 }
5966 gsi_remove (&si, true);
5967 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5968}
5969
5970/* Generic expansion for OpenMP synchronization directives: master,
5971 ordered and critical. All we need to do here is remove the entry
5972 and exit markers for REGION. */
5973
5974static void
5975expand_omp_synch (struct omp_region *region)
5976{
5977 basic_block entry_bb, exit_bb;
5978 gimple_stmt_iterator si;
5979
5980 entry_bb = region->entry;
5981 exit_bb = region->exit;
5982
5983 si = gsi_last_bb (entry_bb);
5984 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5985 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5986 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5987 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5988 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5989 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5990 gsi_remove (&si, true);
5991 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5992
5993 if (exit_bb)
5994 {
5995 si = gsi_last_bb (exit_bb);
5996 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5997 gsi_remove (&si, true);
5998 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5999 }
6000}
6001
6002/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6003 operation as a normal volatile load. */
6004
6005static bool
6006expand_omp_atomic_load (basic_block load_bb, tree addr,
6007 tree loaded_val, int index)
6008{
6009 enum built_in_function tmpbase;
6010 gimple_stmt_iterator gsi;
6011 basic_block store_bb;
6012 location_t loc;
6013 gimple *stmt;
6014 tree decl, call, type, itype;
6015
6016 gsi = gsi_last_bb (load_bb);
6017 stmt = gsi_stmt (gsi);
6018 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6019 loc = gimple_location (stmt);
6020
6021 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6022 is smaller than word size, then expand_atomic_load assumes that the load
6023 is atomic. We could avoid the builtin entirely in this case. */
6024
6025 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6026 decl = builtin_decl_explicit (tmpbase);
6027 if (decl == NULL_TREE)
6028 return false;
6029
6030 type = TREE_TYPE (loaded_val);
6031 itype = TREE_TYPE (TREE_TYPE (decl));
6032
6033 call = build_call_expr_loc (loc, decl, 2, addr,
6034 build_int_cst (NULL,
6035 gimple_omp_atomic_seq_cst_p (stmt)
6036 ? MEMMODEL_SEQ_CST
6037 : MEMMODEL_RELAXED));
6038 if (!useless_type_conversion_p (type, itype))
6039 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6040 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6041
6042 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6043 gsi_remove (&gsi, true);
6044
6045 store_bb = single_succ (load_bb);
6046 gsi = gsi_last_bb (store_bb);
6047 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6048 gsi_remove (&gsi, true);
6049
6050 if (gimple_in_ssa_p (cfun))
6051 update_ssa (TODO_update_ssa_no_phi);
6052
6053 return true;
6054}
6055
6056/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6057 operation as a normal volatile store. */
6058
6059static bool
6060expand_omp_atomic_store (basic_block load_bb, tree addr,
6061 tree loaded_val, tree stored_val, int index)
6062{
6063 enum built_in_function tmpbase;
6064 gimple_stmt_iterator gsi;
6065 basic_block store_bb = single_succ (load_bb);
6066 location_t loc;
6067 gimple *stmt;
6068 tree decl, call, type, itype;
6069 machine_mode imode;
6070 bool exchange;
6071
6072 gsi = gsi_last_bb (load_bb);
6073 stmt = gsi_stmt (gsi);
6074 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6075
6076 /* If the load value is needed, then this isn't a store but an exchange. */
6077 exchange = gimple_omp_atomic_need_value_p (stmt);
6078
6079 gsi = gsi_last_bb (store_bb);
6080 stmt = gsi_stmt (gsi);
6081 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6082 loc = gimple_location (stmt);
6083
6084 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6085 is smaller than word size, then expand_atomic_store assumes that the store
6086 is atomic. We could avoid the builtin entirely in this case. */
6087
6088 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6089 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6090 decl = builtin_decl_explicit (tmpbase);
6091 if (decl == NULL_TREE)
6092 return false;
6093
6094 type = TREE_TYPE (stored_val);
6095
6096 /* Dig out the type of the function's second argument. */
6097 itype = TREE_TYPE (decl);
6098 itype = TYPE_ARG_TYPES (itype);
6099 itype = TREE_CHAIN (itype);
6100 itype = TREE_VALUE (itype);
6101 imode = TYPE_MODE (itype);
6102
6103 if (exchange && !can_atomic_exchange_p (imode, true))
6104 return false;
6105
6106 if (!useless_type_conversion_p (itype, type))
6107 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6108 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6109 build_int_cst (NULL,
6110 gimple_omp_atomic_seq_cst_p (stmt)
6111 ? MEMMODEL_SEQ_CST
6112 : MEMMODEL_RELAXED));
6113 if (exchange)
6114 {
6115 if (!useless_type_conversion_p (type, itype))
6116 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6117 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6118 }
6119
6120 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6121 gsi_remove (&gsi, true);
6122
6123 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6124 gsi = gsi_last_bb (load_bb);
6125 gsi_remove (&gsi, true);
6126
6127 if (gimple_in_ssa_p (cfun))
6128 update_ssa (TODO_update_ssa_no_phi);
6129
6130 return true;
6131}
6132
6133/* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6134 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6135 size of the data type, and thus usable to find the index of the builtin
6136 decl. Returns false if the expression is not of the proper form. */
6137
6138static bool
6139expand_omp_atomic_fetch_op (basic_block load_bb,
6140 tree addr, tree loaded_val,
6141 tree stored_val, int index)
6142{
6143 enum built_in_function oldbase, newbase, tmpbase;
6144 tree decl, itype, call;
6145 tree lhs, rhs;
6146 basic_block store_bb = single_succ (load_bb);
6147 gimple_stmt_iterator gsi;
6148 gimple *stmt;
6149 location_t loc;
6150 enum tree_code code;
6151 bool need_old, need_new;
6152 machine_mode imode;
6153 bool seq_cst;
6154
6155 /* We expect to find the following sequences:
6156
6157 load_bb:
6158 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6159
6160 store_bb:
6161 val = tmp OP something; (or: something OP tmp)
6162 GIMPLE_OMP_STORE (val)
6163
6164 ???FIXME: Allow a more flexible sequence.
6165 Perhaps use data flow to pick the statements.
6166
6167 */
6168
6169 gsi = gsi_after_labels (store_bb);
6170 stmt = gsi_stmt (gsi);
6171 loc = gimple_location (stmt);
6172 if (!is_gimple_assign (stmt))
6173 return false;
6174 gsi_next (&gsi);
6175 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6176 return false;
6177 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6178 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6179 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6180 gcc_checking_assert (!need_old || !need_new);
6181
6182 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6183 return false;
6184
6185 /* Check for one of the supported fetch-op operations. */
6186 code = gimple_assign_rhs_code (stmt);
6187 switch (code)
6188 {
6189 case PLUS_EXPR:
6190 case POINTER_PLUS_EXPR:
6191 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6192 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6193 break;
6194 case MINUS_EXPR:
6195 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6196 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6197 break;
6198 case BIT_AND_EXPR:
6199 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6200 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6201 break;
6202 case BIT_IOR_EXPR:
6203 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6204 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6205 break;
6206 case BIT_XOR_EXPR:
6207 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6208 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6209 break;
6210 default:
6211 return false;
6212 }
6213
6214 /* Make sure the expression is of the proper form. */
6215 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6216 rhs = gimple_assign_rhs2 (stmt);
6217 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6218 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6219 rhs = gimple_assign_rhs1 (stmt);
6220 else
6221 return false;
6222
6223 tmpbase = ((enum built_in_function)
6224 ((need_new ? newbase : oldbase) + index + 1));
6225 decl = builtin_decl_explicit (tmpbase);
6226 if (decl == NULL_TREE)
6227 return false;
6228 itype = TREE_TYPE (TREE_TYPE (decl));
6229 imode = TYPE_MODE (itype);
6230
6231 /* We could test all of the various optabs involved, but the fact of the
6232 matter is that (with the exception of i486 vs i586 and xadd) all targets
6233 that support any atomic operaton optab also implements compare-and-swap.
6234 Let optabs.c take care of expanding any compare-and-swap loop. */
6235 if (!can_compare_and_swap_p (imode, true))
6236 return false;
6237
6238 gsi = gsi_last_bb (load_bb);
6239 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6240
6241 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6242 It only requires that the operation happen atomically. Thus we can
6243 use the RELAXED memory model. */
6244 call = build_call_expr_loc (loc, decl, 3, addr,
6245 fold_convert_loc (loc, itype, rhs),
6246 build_int_cst (NULL,
6247 seq_cst ? MEMMODEL_SEQ_CST
6248 : MEMMODEL_RELAXED));
6249
6250 if (need_old || need_new)
6251 {
6252 lhs = need_old ? loaded_val : stored_val;
6253 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6254 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6255 }
6256 else
6257 call = fold_convert_loc (loc, void_type_node, call);
6258 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6259 gsi_remove (&gsi, true);
6260
6261 gsi = gsi_last_bb (store_bb);
6262 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6263 gsi_remove (&gsi, true);
6264 gsi = gsi_last_bb (store_bb);
6265 stmt = gsi_stmt (gsi);
6266 gsi_remove (&gsi, true);
6267
6268 if (gimple_in_ssa_p (cfun))
6269 {
6270 release_defs (stmt);
6271 update_ssa (TODO_update_ssa_no_phi);
6272 }
6273
6274 return true;
6275}
6276
6277/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6278
6279 oldval = *addr;
6280 repeat:
7c6746c9 6281 newval = rhs; // with oldval replacing *addr in rhs
4954efd4 6282 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6283 if (oldval != newval)
6284 goto repeat;
6285
6286 INDEX is log2 of the size of the data type, and thus usable to find the
6287 index of the builtin decl. */
6288
6289static bool
6290expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6291 tree addr, tree loaded_val, tree stored_val,
6292 int index)
6293{
6294 tree loadedi, storedi, initial, new_storedi, old_vali;
6295 tree type, itype, cmpxchg, iaddr;
6296 gimple_stmt_iterator si;
6297 basic_block loop_header = single_succ (load_bb);
6298 gimple *phi, *stmt;
6299 edge e;
6300 enum built_in_function fncode;
6301
6302 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6303 order to use the RELAXED memory model effectively. */
6304 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6305 + index + 1);
6306 cmpxchg = builtin_decl_explicit (fncode);
6307 if (cmpxchg == NULL_TREE)
6308 return false;
6309 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6310 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6311
6312 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
6313 return false;
6314
6315 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6316 si = gsi_last_bb (load_bb);
6317 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6318
6319 /* For floating-point values, we'll need to view-convert them to integers
6320 so that we can perform the atomic compare and swap. Simplify the
6321 following code by always setting up the "i"ntegral variables. */
6322 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6323 {
6324 tree iaddr_val;
6325
6326 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6327 true));
6328 iaddr_val
6329 = force_gimple_operand_gsi (&si,
6330 fold_convert (TREE_TYPE (iaddr), addr),
6331 false, NULL_TREE, true, GSI_SAME_STMT);
6332 stmt = gimple_build_assign (iaddr, iaddr_val);
6333 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6334 loadedi = create_tmp_var (itype);
6335 if (gimple_in_ssa_p (cfun))
6336 loadedi = make_ssa_name (loadedi);
6337 }
6338 else
6339 {
6340 iaddr = addr;
6341 loadedi = loaded_val;
6342 }
6343
6344 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6345 tree loaddecl = builtin_decl_explicit (fncode);
6346 if (loaddecl)
6347 initial
6348 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6349 build_call_expr (loaddecl, 2, iaddr,
6350 build_int_cst (NULL_TREE,
6351 MEMMODEL_RELAXED)));
6352 else
6353 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6354 build_int_cst (TREE_TYPE (iaddr), 0));
6355
6356 initial
6357 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6358 GSI_SAME_STMT);
6359
6360 /* Move the value to the LOADEDI temporary. */
6361 if (gimple_in_ssa_p (cfun))
6362 {
6363 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6364 phi = create_phi_node (loadedi, loop_header);
6365 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6366 initial);
6367 }
6368 else
6369 gsi_insert_before (&si,
6370 gimple_build_assign (loadedi, initial),
6371 GSI_SAME_STMT);
6372 if (loadedi != loaded_val)
6373 {
6374 gimple_stmt_iterator gsi2;
6375 tree x;
6376
6377 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6378 gsi2 = gsi_start_bb (loop_header);
6379 if (gimple_in_ssa_p (cfun))
6380 {
6381 gassign *stmt;
6382 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6383 true, GSI_SAME_STMT);
6384 stmt = gimple_build_assign (loaded_val, x);
6385 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6386 }
6387 else
6388 {
6389 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6390 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6391 true, GSI_SAME_STMT);
6392 }
6393 }
6394 gsi_remove (&si, true);
6395
6396 si = gsi_last_bb (store_bb);
6397 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6398
6399 if (iaddr == addr)
6400 storedi = stored_val;
6401 else
7c6746c9 6402 storedi
6403 = force_gimple_operand_gsi (&si,
6404 build1 (VIEW_CONVERT_EXPR, itype,
6405 stored_val), true, NULL_TREE, true,
6406 GSI_SAME_STMT);
4954efd4 6407
6408 /* Build the compare&swap statement. */
6409 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6410 new_storedi = force_gimple_operand_gsi (&si,
6411 fold_convert (TREE_TYPE (loadedi),
6412 new_storedi),
6413 true, NULL_TREE,
6414 true, GSI_SAME_STMT);
6415
6416 if (gimple_in_ssa_p (cfun))
6417 old_vali = loadedi;
6418 else
6419 {
6420 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6421 stmt = gimple_build_assign (old_vali, loadedi);
6422 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6423
6424 stmt = gimple_build_assign (loadedi, new_storedi);
6425 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6426 }
6427
6428 /* Note that we always perform the comparison as an integer, even for
6429 floating point. This allows the atomic operation to properly
6430 succeed even with NaNs and -0.0. */
7c6746c9 6431 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6432 stmt = gimple_build_cond_empty (ne);
4954efd4 6433 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6434
6435 /* Update cfg. */
6436 e = single_succ_edge (store_bb);
6437 e->flags &= ~EDGE_FALLTHRU;
6438 e->flags |= EDGE_FALSE_VALUE;
6439
6440 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6441
6442 /* Copy the new value to loadedi (we already did that before the condition
6443 if we are not in SSA). */
6444 if (gimple_in_ssa_p (cfun))
6445 {
6446 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6447 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6448 }
6449
6450 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6451 gsi_remove (&si, true);
6452
6453 struct loop *loop = alloc_loop ();
6454 loop->header = loop_header;
6455 loop->latch = store_bb;
6456 add_loop (loop, loop_header->loop_father);
6457
6458 if (gimple_in_ssa_p (cfun))
6459 update_ssa (TODO_update_ssa_no_phi);
6460
6461 return true;
6462}
6463
6464/* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6465
7c6746c9 6466 GOMP_atomic_start ();
6467 *addr = rhs;
6468 GOMP_atomic_end ();
4954efd4 6469
6470 The result is not globally atomic, but works so long as all parallel
6471 references are within #pragma omp atomic directives. According to
6472 responses received from omp@openmp.org, appears to be within spec.
6473 Which makes sense, since that's how several other compilers handle
6474 this situation as well.
6475 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6476 expanding. STORED_VAL is the operand of the matching
6477 GIMPLE_OMP_ATOMIC_STORE.
6478
6479 We replace
6480 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6481 loaded_val = *addr;
6482
6483 and replace
6484 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6485 *addr = stored_val;
6486*/
6487
6488static bool
6489expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6490 tree addr, tree loaded_val, tree stored_val)
6491{
6492 gimple_stmt_iterator si;
6493 gassign *stmt;
6494 tree t;
6495
6496 si = gsi_last_bb (load_bb);
6497 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6498
6499 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6500 t = build_call_expr (t, 0);
6501 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6502
6503 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6504 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6505 gsi_remove (&si, true);
6506
6507 si = gsi_last_bb (store_bb);
6508 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6509
6510 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6511 stored_val);
6512 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6513
6514 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6515 t = build_call_expr (t, 0);
6516 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6517 gsi_remove (&si, true);
6518
6519 if (gimple_in_ssa_p (cfun))
6520 update_ssa (TODO_update_ssa_no_phi);
6521 return true;
6522}
6523
6524/* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
7c6746c9 6525 using expand_omp_atomic_fetch_op. If it failed, we try to
4954efd4 6526 call expand_omp_atomic_pipeline, and if it fails too, the
6527 ultimate fallback is wrapping the operation in a mutex
6528 (expand_omp_atomic_mutex). REGION is the atomic region built
6529 by build_omp_regions_1(). */
6530
6531static void
6532expand_omp_atomic (struct omp_region *region)
6533{
6534 basic_block load_bb = region->entry, store_bb = region->exit;
6535 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6536 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6537 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6538 tree addr = gimple_omp_atomic_load_rhs (load);
6539 tree stored_val = gimple_omp_atomic_store_val (store);
6540 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6541 HOST_WIDE_INT index;
6542
6543 /* Make sure the type is one of the supported sizes. */
6544 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6545 index = exact_log2 (index);
6546 if (index >= 0 && index <= 4)
6547 {
6548 unsigned int align = TYPE_ALIGN_UNIT (type);
6549
6550 /* __sync builtins require strict data alignment. */
6551 if (exact_log2 (align) >= index)
6552 {
6553 /* Atomic load. */
6554 if (loaded_val == stored_val
6555 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6556 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6557 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6558 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6559 return;
6560
6561 /* Atomic store. */
6562 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6563 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6564 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6565 && store_bb == single_succ (load_bb)
6566 && first_stmt (store_bb) == store
6567 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6568 stored_val, index))
6569 return;
6570
6571 /* When possible, use specialized atomic update functions. */
6572 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6573 && store_bb == single_succ (load_bb)
6574 && expand_omp_atomic_fetch_op (load_bb, addr,
6575 loaded_val, stored_val, index))
6576 return;
6577
6578 /* If we don't have specialized __sync builtins, try and implement
6579 as a compare and swap loop. */
6580 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6581 loaded_val, stored_val, index))
6582 return;
6583 }
6584 }
6585
6586 /* The ultimate fallback is wrapping the operation in a mutex. */
6587 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6588}
6589
6590/* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6591 at REGION_EXIT. */
6592
6593static void
6594mark_loops_in_oacc_kernels_region (basic_block region_entry,
6595 basic_block region_exit)
6596{
6597 struct loop *outer = region_entry->loop_father;
6598 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6599
6600 /* Don't parallelize the kernels region if it contains more than one outer
6601 loop. */
6602 unsigned int nr_outer_loops = 0;
6603 struct loop *single_outer = NULL;
6604 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6605 {
6606 gcc_assert (loop_outer (loop) == outer);
6607
6608 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6609 continue;
6610
6611 if (region_exit != NULL
6612 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6613 continue;
6614
6615 nr_outer_loops++;
6616 single_outer = loop;
6617 }
6618 if (nr_outer_loops != 1)
6619 return;
6620
7c6746c9 6621 for (struct loop *loop = single_outer->inner;
6622 loop != NULL;
6623 loop = loop->inner)
4954efd4 6624 if (loop->next)
6625 return;
6626
6627 /* Mark the loops in the region. */
6628 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6629 loop->in_oacc_kernels_region = true;
6630}
6631
6632/* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6633
6634struct GTY(()) grid_launch_attributes_trees
6635{
6636 tree kernel_dim_array_type;
6637 tree kernel_lattrs_dimnum_decl;
6638 tree kernel_lattrs_grid_decl;
6639 tree kernel_lattrs_group_decl;
6640 tree kernel_launch_attributes_type;
6641};
6642
6643static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6644
6645/* Create types used to pass kernel launch attributes to target. */
6646
6647static void
6648grid_create_kernel_launch_attr_types (void)
6649{
6650 if (grid_attr_trees)
6651 return;
6652 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6653
6654 tree dim_arr_index_type
6655 = build_index_type (build_int_cst (integer_type_node, 2));
6656 grid_attr_trees->kernel_dim_array_type
6657 = build_array_type (uint32_type_node, dim_arr_index_type);
6658
6659 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6660 grid_attr_trees->kernel_lattrs_dimnum_decl
6661 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6662 uint32_type_node);
6663 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6664
6665 grid_attr_trees->kernel_lattrs_grid_decl
6666 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6667 grid_attr_trees->kernel_dim_array_type);
6668 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6669 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6670 grid_attr_trees->kernel_lattrs_group_decl
6671 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6672 grid_attr_trees->kernel_dim_array_type);
6673 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6674 = grid_attr_trees->kernel_lattrs_grid_decl;
6675 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6676 "__gomp_kernel_launch_attributes",
6677 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6678}
6679
6680/* Insert before the current statement in GSI a store of VALUE to INDEX of
6681 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6682 of type uint32_type_node. */
6683
6684static void
6685grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6686 tree fld_decl, int index, tree value)
6687{
6688 tree ref = build4 (ARRAY_REF, uint32_type_node,
6689 build3 (COMPONENT_REF,
6690 grid_attr_trees->kernel_dim_array_type,
6691 range_var, fld_decl, NULL_TREE),
6692 build_int_cst (integer_type_node, index),
6693 NULL_TREE, NULL_TREE);
6694 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6695}
6696
6697/* Return a tree representation of a pointer to a structure with grid and
6698 work-group size information. Statements filling that information will be
6699 inserted before GSI, TGT_STMT is the target statement which has the
6700 necessary information in it. */
6701
6702static tree
6703grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6704 gomp_target *tgt_stmt)
6705{
6706 grid_create_kernel_launch_attr_types ();
6707 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6708 "__kernel_launch_attrs");
6709
6710 unsigned max_dim = 0;
6711 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6712 clause;
6713 clause = OMP_CLAUSE_CHAIN (clause))
6714 {
6715 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6716 continue;
6717
6718 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6719 max_dim = MAX (dim, max_dim);
6720
6721 grid_insert_store_range_dim (gsi, lattrs,
6722 grid_attr_trees->kernel_lattrs_grid_decl,
6723 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6724 grid_insert_store_range_dim (gsi, lattrs,
6725 grid_attr_trees->kernel_lattrs_group_decl,
6726 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6727 }
6728
6729 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6730 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6731 gcc_checking_assert (max_dim <= 2);
6732 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6733 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6734 GSI_SAME_STMT);
6735 TREE_ADDRESSABLE (lattrs) = 1;
6736 return build_fold_addr_expr (lattrs);
6737}
6738
6739/* Build target argument identifier from the DEVICE identifier, value
6740 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6741
6742static tree
6743get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6744{
6745 tree t = build_int_cst (integer_type_node, device);
6746 if (subseqent_param)
6747 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6748 build_int_cst (integer_type_node,
6749 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6750 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6751 build_int_cst (integer_type_node, id));
6752 return t;
6753}
6754
6755/* Like above but return it in type that can be directly stored as an element
6756 of the argument array. */
6757
6758static tree
6759get_target_argument_identifier (int device, bool subseqent_param, int id)
6760{
6761 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6762 return fold_convert (ptr_type_node, t);
6763}
6764
6765/* Return a target argument consisting of DEVICE identifier, value identifier
6766 ID, and the actual VALUE. */
6767
6768static tree
6769get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6770 tree value)
6771{
6772 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6773 fold_convert (integer_type_node, value),
6774 build_int_cst (unsigned_type_node,
6775 GOMP_TARGET_ARG_VALUE_SHIFT));
6776 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6777 get_target_argument_identifier_1 (device, false, id));
6778 t = fold_convert (ptr_type_node, t);
6779 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6780}
6781
6782/* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6783 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6784 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6785 arguments. */
6786
6787static void
6788push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6789 int id, tree value, vec <tree> *args)
6790{
6791 if (tree_fits_shwi_p (value)
6792 && tree_to_shwi (value) > -(1 << 15)
6793 && tree_to_shwi (value) < (1 << 15))
6794 args->quick_push (get_target_argument_value (gsi, device, id, value));
6795 else
6796 {
6797 args->quick_push (get_target_argument_identifier (device, true, id));
6798 value = fold_convert (ptr_type_node, value);
6799 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6800 GSI_SAME_STMT);
6801 args->quick_push (value);
6802 }
6803}
6804
7c6746c9 6805/* Create an array of arguments that is then passed to GOMP_target. */
4954efd4 6806
6807static tree
6808get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6809{
6810 auto_vec <tree, 6> args;
6811 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6812 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6813 if (c)
6814 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6815 else
6816 t = integer_minus_one_node;
6817 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6818 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6819
6820 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6821 if (c)
6822 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6823 else
6824 t = integer_minus_one_node;
6825 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6826 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6827 &args);
6828
6829 /* Add HSA-specific grid sizes, if available. */
6830 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6831 OMP_CLAUSE__GRIDDIM_))
6832 {
7c6746c9 6833 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
6834 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
4954efd4 6835 args.quick_push (t);
6836 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6837 }
6838
6839 /* Produce more, perhaps device specific, arguments here. */
6840
6841 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6842 args.length () + 1),
6843 ".omp_target_args");
6844 for (unsigned i = 0; i < args.length (); i++)
6845 {
6846 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6847 build_int_cst (integer_type_node, i),
6848 NULL_TREE, NULL_TREE);
6849 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6850 GSI_SAME_STMT);
6851 }
6852 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6853 build_int_cst (integer_type_node, args.length ()),
6854 NULL_TREE, NULL_TREE);
6855 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6856 GSI_SAME_STMT);
6857 TREE_ADDRESSABLE (argarray) = 1;
6858 return build_fold_addr_expr (argarray);
6859}
6860
6861/* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6862
6863static void
6864expand_omp_target (struct omp_region *region)
6865{
6866 basic_block entry_bb, exit_bb, new_bb;
6867 struct function *child_cfun;
6868 tree child_fn, block, t;
6869 gimple_stmt_iterator gsi;
6870 gomp_target *entry_stmt;
6871 gimple *stmt;
6872 edge e;
6873 bool offloaded, data_region;
6874
6875 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6876 new_bb = region->entry;
6877
6878 offloaded = is_gimple_omp_offloaded (entry_stmt);
6879 switch (gimple_omp_target_kind (entry_stmt))
6880 {
6881 case GF_OMP_TARGET_KIND_REGION:
6882 case GF_OMP_TARGET_KIND_UPDATE:
6883 case GF_OMP_TARGET_KIND_ENTER_DATA:
6884 case GF_OMP_TARGET_KIND_EXIT_DATA:
6885 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6886 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6887 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6888 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6889 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6890 data_region = false;
6891 break;
6892 case GF_OMP_TARGET_KIND_DATA:
6893 case GF_OMP_TARGET_KIND_OACC_DATA:
6894 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6895 data_region = true;
6896 break;
6897 default:
6898 gcc_unreachable ();
6899 }
6900
6901 child_fn = NULL_TREE;
6902 child_cfun = NULL;
6903 if (offloaded)
6904 {
6905 child_fn = gimple_omp_target_child_fn (entry_stmt);
6906 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6907 }
6908
6909 /* Supported by expand_omp_taskreg, but not here. */
6910 if (child_cfun != NULL)
6911 gcc_checking_assert (!child_cfun->cfg);
6912 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6913
6914 entry_bb = region->entry;
6915 exit_bb = region->exit;
6916
6917 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6918 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6919
6920 if (offloaded)
6921 {
6922 unsigned srcidx, dstidx, num;
6923
6924 /* If the offloading region needs data sent from the parent
6925 function, then the very first statement (except possible
6926 tree profile counter updates) of the offloading body
6927 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6928 &.OMP_DATA_O is passed as an argument to the child function,
6929 we need to replace it with the argument as seen by the child
6930 function.
6931
6932 In most cases, this will end up being the identity assignment
6933 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6934 a function call that has been inlined, the original PARM_DECL
6935 .OMP_DATA_I may have been converted into a different local
6936 variable. In which case, we need to keep the assignment. */
6937 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6938 if (data_arg)
6939 {
6940 basic_block entry_succ_bb = single_succ (entry_bb);
6941 gimple_stmt_iterator gsi;
6942 tree arg;
6943 gimple *tgtcopy_stmt = NULL;
6944 tree sender = TREE_VEC_ELT (data_arg, 0);
6945
6946 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6947 {
6948 gcc_assert (!gsi_end_p (gsi));
6949 stmt = gsi_stmt (gsi);
6950 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6951 continue;
6952
6953 if (gimple_num_ops (stmt) == 2)
6954 {
6955 tree arg = gimple_assign_rhs1 (stmt);
6956
6957 /* We're ignoring the subcode because we're
6958 effectively doing a STRIP_NOPS. */
6959
6960 if (TREE_CODE (arg) == ADDR_EXPR
6961 && TREE_OPERAND (arg, 0) == sender)
6962 {
6963 tgtcopy_stmt = stmt;
6964 break;
6965 }
6966 }
6967 }
6968
6969 gcc_assert (tgtcopy_stmt != NULL);
6970 arg = DECL_ARGUMENTS (child_fn);
6971
6972 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6973 gsi_remove (&gsi, true);
6974 }
6975
6976 /* Declare local variables needed in CHILD_CFUN. */
6977 block = DECL_INITIAL (child_fn);
6978 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6979 /* The gimplifier could record temporaries in the offloading block
6980 rather than in containing function's local_decls chain,
6981 which would mean cgraph missed finalizing them. Do it now. */
6982 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6983 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6984 varpool_node::finalize_decl (t);
6985 DECL_SAVED_TREE (child_fn) = NULL;
6986 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6987 gimple_set_body (child_fn, NULL);
6988 TREE_USED (block) = 1;
6989
6990 /* Reset DECL_CONTEXT on function arguments. */
6991 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
6992 DECL_CONTEXT (t) = child_fn;
6993
6994 /* Split ENTRY_BB at GIMPLE_*,
6995 so that it can be moved to the child function. */
6996 gsi = gsi_last_bb (entry_bb);
6997 stmt = gsi_stmt (gsi);
6998 gcc_assert (stmt
6999 && gimple_code (stmt) == gimple_code (entry_stmt));
7000 e = split_block (entry_bb, stmt);
7001 gsi_remove (&gsi, true);
7002 entry_bb = e->dest;
7003 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7004
7005 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7006 if (exit_bb)
7007 {
7008 gsi = gsi_last_bb (exit_bb);
7009 gcc_assert (!gsi_end_p (gsi)
7010 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7011 stmt = gimple_build_return (NULL);
7012 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7013 gsi_remove (&gsi, true);
7014 }
7015
7016 /* Move the offloading region into CHILD_CFUN. */
7017
7018 block = gimple_block (entry_stmt);
7019
7020 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7021 if (exit_bb)
7022 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7023 /* When the OMP expansion process cannot guarantee an up-to-date
7024 loop tree arrange for the child function to fixup loops. */
7025 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7026 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7027
7028 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7029 num = vec_safe_length (child_cfun->local_decls);
7030 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7031 {
7032 t = (*child_cfun->local_decls)[srcidx];
7033 if (DECL_CONTEXT (t) == cfun->decl)
7034 continue;
7035 if (srcidx != dstidx)
7036 (*child_cfun->local_decls)[dstidx] = t;
7037 dstidx++;
7038 }
7039 if (dstidx != num)
7040 vec_safe_truncate (child_cfun->local_decls, dstidx);
7041
7042 /* Inform the callgraph about the new function. */
7043 child_cfun->curr_properties = cfun->curr_properties;
7044 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7045 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7046 cgraph_node *node = cgraph_node::get_create (child_fn);
7047 node->parallelized_function = 1;
7048 cgraph_node::add_new_function (child_fn, true);
7049
7050 /* Add the new function to the offload table. */
7051 if (ENABLE_OFFLOADING)
7052 vec_safe_push (offload_funcs, child_fn);
7053
7054 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7055 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7056
7057 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7058 fixed in a following pass. */
7059 push_cfun (child_cfun);
7060 if (need_asm)
7061 assign_assembler_name_if_neeeded (child_fn);
7062 cgraph_edge::rebuild_edges ();
7063
7064 /* Some EH regions might become dead, see PR34608. If
7065 pass_cleanup_cfg isn't the first pass to happen with the
7066 new child, these dead EH edges might cause problems.
7067 Clean them up now. */
7068 if (flag_exceptions)
7069 {
7070 basic_block bb;
7071 bool changed = false;
7072
7073 FOR_EACH_BB_FN (bb, cfun)
7074 changed |= gimple_purge_dead_eh_edges (bb);
7075 if (changed)
7076 cleanup_tree_cfg ();
7077 }
7078 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7079 verify_loop_structure ();
7080 pop_cfun ();
7081
7082 if (dump_file && !gimple_in_ssa_p (cfun))
7083 {
7084 omp_any_child_fn_dumped = true;
7085 dump_function_header (dump_file, child_fn, dump_flags);
7086 dump_function_to_file (child_fn, dump_file, dump_flags);
7087 }
7088 }
7089
7090 /* Emit a library call to launch the offloading region, or do data
7091 transfers. */
7092 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7093 enum built_in_function start_ix;
7094 location_t clause_loc;
7095 unsigned int flags_i = 0;
7096 bool oacc_kernels_p = false;
7097
7098 switch (gimple_omp_target_kind (entry_stmt))
7099 {
7100 case GF_OMP_TARGET_KIND_REGION:
7101 start_ix = BUILT_IN_GOMP_TARGET;
7102 break;
7103 case GF_OMP_TARGET_KIND_DATA:
7104 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7105 break;
7106 case GF_OMP_TARGET_KIND_UPDATE:
7107 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7108 break;
7109 case GF_OMP_TARGET_KIND_ENTER_DATA:
7110 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7111 break;
7112 case GF_OMP_TARGET_KIND_EXIT_DATA:
7113 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7114 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7115 break;
7116 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7117 oacc_kernels_p = true;
7118 /* FALLTHROUGH */
7119 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7120 start_ix = BUILT_IN_GOACC_PARALLEL;
7121 break;
7122 case GF_OMP_TARGET_KIND_OACC_DATA:
7123 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7124 start_ix = BUILT_IN_GOACC_DATA_START;
7125 break;
7126 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7127 start_ix = BUILT_IN_GOACC_UPDATE;
7128 break;
7129 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7130 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7131 break;
7132 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7133 start_ix = BUILT_IN_GOACC_DECLARE;
7134 break;
7135 default:
7136 gcc_unreachable ();
7137 }
7138
7139 clauses = gimple_omp_target_clauses (entry_stmt);
7140
7141 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7142 library choose) and there is no conditional. */
7143 cond = NULL_TREE;
7144 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7145
7146 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7147 if (c)
7148 cond = OMP_CLAUSE_IF_EXPR (c);
7149
7150 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7151 if (c)
7152 {
7153 /* Even if we pass it to all library function calls, it is currently only
7154 defined/used for the OpenMP target ones. */
7155 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7156 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7157 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7158 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7159
7160 device = OMP_CLAUSE_DEVICE_ID (c);
7161 clause_loc = OMP_CLAUSE_LOCATION (c);
7162 }
7163 else
7164 clause_loc = gimple_location (entry_stmt);
7165
7166 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7167 if (c)
7168 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7169
7170 /* Ensure 'device' is of the correct type. */
7171 device = fold_convert_loc (clause_loc, integer_type_node, device);
7172
7173 /* If we found the clause 'if (cond)', build
7174 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7175 if (cond)
7176 {
7177 cond = gimple_boolify (cond);
7178
7179 basic_block cond_bb, then_bb, else_bb;
7180 edge e;
7181 tree tmp_var;
7182
7183 tmp_var = create_tmp_var (TREE_TYPE (device));
7184 if (offloaded)
7185 e = split_block_after_labels (new_bb);
7186 else
7187 {
7188 gsi = gsi_last_bb (new_bb);
7189 gsi_prev (&gsi);
7190 e = split_block (new_bb, gsi_stmt (gsi));
7191 }
7192 cond_bb = e->src;
7193 new_bb = e->dest;
7194 remove_edge (e);
7195
7196 then_bb = create_empty_bb (cond_bb);
7197 else_bb = create_empty_bb (then_bb);
7198 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7199 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7200
7201 stmt = gimple_build_cond_empty (cond);
7202 gsi = gsi_last_bb (cond_bb);
7203 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7204
7205 gsi = gsi_start_bb (then_bb);
7206 stmt = gimple_build_assign (tmp_var, device);
7207 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7208
7209 gsi = gsi_start_bb (else_bb);
7210 stmt = gimple_build_assign (tmp_var,
7211 build_int_cst (integer_type_node,
7212 GOMP_DEVICE_HOST_FALLBACK));
7213 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7214
7215 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7216 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7217 add_bb_to_loop (then_bb, cond_bb->loop_father);
7218 add_bb_to_loop (else_bb, cond_bb->loop_father);
7219 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7220 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7221
7222 device = tmp_var;
7223 gsi = gsi_last_bb (new_bb);
7224 }
7225 else
7226 {
7227 gsi = gsi_last_bb (new_bb);
7228 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7229 true, GSI_SAME_STMT);
7230 }
7231
7232 t = gimple_omp_target_data_arg (entry_stmt);
7233 if (t == NULL)
7234 {
7235 t1 = size_zero_node;
7236 t2 = build_zero_cst (ptr_type_node);
7237 t3 = t2;
7238 t4 = t2;
7239 }
7240 else
7241 {
7242 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7243 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7244 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7245 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7246 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7247 }
7248
7249 gimple *g;
7250 bool tagging = false;
7251 /* The maximum number used by any start_ix, without varargs. */
7252 auto_vec<tree, 11> args;
7253 args.quick_push (device);
7254 if (offloaded)
7255 args.quick_push (build_fold_addr_expr (child_fn));
7256 args.quick_push (t1);
7257 args.quick_push (t2);
7258 args.quick_push (t3);
7259 args.quick_push (t4);
7260 switch (start_ix)
7261 {
7262 case BUILT_IN_GOACC_DATA_START:
7263 case BUILT_IN_GOACC_DECLARE:
7264 case BUILT_IN_GOMP_TARGET_DATA:
7265 break;
7266 case BUILT_IN_GOMP_TARGET:
7267 case BUILT_IN_GOMP_TARGET_UPDATE:
7268 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7269 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7270 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7271 if (c)
7272 depend = OMP_CLAUSE_DECL (c);
7273 else
7274 depend = build_int_cst (ptr_type_node, 0);
7275 args.quick_push (depend);
7276 if (start_ix == BUILT_IN_GOMP_TARGET)
7277 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7278 break;
7279 case BUILT_IN_GOACC_PARALLEL:
7280 {
7281 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7282 tagging = true;
7283 }
7284 /* FALLTHRU */
7285 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7286 case BUILT_IN_GOACC_UPDATE:
7287 {
7288 tree t_async = NULL_TREE;
7289
7290 /* If present, use the value specified by the respective
7291 clause, making sure that is of the correct type. */
7292 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7293 if (c)
7294 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7295 integer_type_node,
7296 OMP_CLAUSE_ASYNC_EXPR (c));
7297 else if (!tagging)
7298 /* Default values for t_async. */
7299 t_async = fold_convert_loc (gimple_location (entry_stmt),
7300 integer_type_node,
7301 build_int_cst (integer_type_node,
7302 GOMP_ASYNC_SYNC));
7303 if (tagging && t_async)
7304 {
7305 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7306
7307 if (TREE_CODE (t_async) == INTEGER_CST)
7308 {
7309 /* See if we can pack the async arg in to the tag's
7310 operand. */
7311 i_async = TREE_INT_CST_LOW (t_async);
7312 if (i_async < GOMP_LAUNCH_OP_MAX)
7313 t_async = NULL_TREE;
7314 else
7315 i_async = GOMP_LAUNCH_OP_MAX;
7316 }
7317 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7318 i_async));
7319 }
7320 if (t_async)
7321 args.safe_push (t_async);
7322
7323 /* Save the argument index, and ... */
7324 unsigned t_wait_idx = args.length ();
7325 unsigned num_waits = 0;
7326 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7327 if (!tagging || c)
7328 /* ... push a placeholder. */
7329 args.safe_push (integer_zero_node);
7330
7331 for (; c; c = OMP_CLAUSE_CHAIN (c))
7332 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7333 {
7334 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7335 integer_type_node,
7336 OMP_CLAUSE_WAIT_EXPR (c)));
7337 num_waits++;
7338 }
7339
7340 if (!tagging || num_waits)
7341 {
7342 tree len;
7343
7344 /* Now that we know the number, update the placeholder. */
7345 if (tagging)
7346 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7347 else
7348 len = build_int_cst (integer_type_node, num_waits);
7349 len = fold_convert_loc (gimple_location (entry_stmt),
7350 unsigned_type_node, len);
7351 args[t_wait_idx] = len;
7352 }
7353 }
7354 break;
7355 default:
7356 gcc_unreachable ();
7357 }
7358 if (tagging)
7359 /* Push terminal marker - zero. */
7360 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7361
7362 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7363 gimple_set_location (g, gimple_location (entry_stmt));
7364 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7365 if (!offloaded)
7366 {
7367 g = gsi_stmt (gsi);
7368 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7369 gsi_remove (&gsi, true);
7370 }
7371 if (data_region && region->exit)
7372 {
7373 gsi = gsi_last_bb (region->exit);
7374 g = gsi_stmt (gsi);
7375 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7376 gsi_remove (&gsi, true);
7377 }
7378}
7379
7380/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7381 iteration variable derived from the thread number. INTRA_GROUP means this
7382 is an expansion of a loop iterating over work-items within a separate
7c6746c9 7383 iteration over groups. */
4954efd4 7384
7385static void
7386grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7387{
7388 gimple_stmt_iterator gsi;
7389 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7390 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7391 == GF_OMP_FOR_KIND_GRID_LOOP);
7392 size_t collapse = gimple_omp_for_collapse (for_stmt);
7393 struct omp_for_data_loop *loops
7394 = XALLOCAVEC (struct omp_for_data_loop,
7c6746c9 7395 gimple_omp_for_collapse (for_stmt));
4954efd4 7396 struct omp_for_data fd;
7397
7398 remove_edge (BRANCH_EDGE (kfor->entry));
7399 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7400
7401 gcc_assert (kfor->cont);
7402 omp_extract_for_data (for_stmt, &fd, loops);
7403
7404 gsi = gsi_start_bb (body_bb);
7405
7406 for (size_t dim = 0; dim < collapse; dim++)
7407 {
7408 tree type, itype;
7409 itype = type = TREE_TYPE (fd.loops[dim].v);
7410 if (POINTER_TYPE_P (type))
7411 itype = signed_type_for (type);
7412
7413 tree n1 = fd.loops[dim].n1;
7414 tree step = fd.loops[dim].step;
7415 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7416 true, NULL_TREE, true, GSI_SAME_STMT);
7417 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7418 true, NULL_TREE, true, GSI_SAME_STMT);
7419 tree threadid;
7420 if (gimple_omp_for_grid_group_iter (for_stmt))
7421 {
7422 gcc_checking_assert (!intra_group);
7423 threadid = build_call_expr (builtin_decl_explicit
7424 (BUILT_IN_HSA_WORKGROUPID), 1,
7425 build_int_cstu (unsigned_type_node, dim));
7426 }
7427 else if (intra_group)
7428 threadid = build_call_expr (builtin_decl_explicit
7429 (BUILT_IN_HSA_WORKITEMID), 1,
7430 build_int_cstu (unsigned_type_node, dim));
7431 else
7432 threadid = build_call_expr (builtin_decl_explicit
7433 (BUILT_IN_HSA_WORKITEMABSID), 1,
7434 build_int_cstu (unsigned_type_node, dim));
7435 threadid = fold_convert (itype, threadid);
7436 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7437 true, GSI_SAME_STMT);
7438
7439 tree startvar = fd.loops[dim].v;
7440 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7441 if (POINTER_TYPE_P (type))
7442 t = fold_build_pointer_plus (n1, t);
7443 else
7444 t = fold_build2 (PLUS_EXPR, type, t, n1);
7445 t = fold_convert (type, t);
7446 t = force_gimple_operand_gsi (&gsi, t,
7447 DECL_P (startvar)
7448 && TREE_ADDRESSABLE (startvar),
7449 NULL_TREE, true, GSI_SAME_STMT);
7450 gassign *assign_stmt = gimple_build_assign (startvar, t);
7451 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7452 }
7c6746c9 7453 /* Remove the omp for statement. */
4954efd4 7454 gsi = gsi_last_bb (kfor->entry);
7455 gsi_remove (&gsi, true);
7456
7457 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7458 gsi = gsi_last_bb (kfor->cont);
7459 gcc_assert (!gsi_end_p (gsi)
7460 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7461 gsi_remove (&gsi, true);
7462
7463 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7464 gsi = gsi_last_bb (kfor->exit);
7465 gcc_assert (!gsi_end_p (gsi)
7466 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7467 if (intra_group)
7468 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7469 gsi_remove (&gsi, true);
7470
7471 /* Fixup the much simpler CFG. */
7472 remove_edge (find_edge (kfor->cont, body_bb));
7473
7474 if (kfor->cont != body_bb)
7475 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7476 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7477}
7478
7479/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7480 argument_decls. */
7481
7482struct grid_arg_decl_map
7483{
7484 tree old_arg;
7485 tree new_arg;
7486};
7487
7488/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7489 pertaining to kernel function. */
7490
7491static tree
7492grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7493{
7494 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7495 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7496 tree t = *tp;
7497
7498 if (t == adm->old_arg)
7499 *tp = adm->new_arg;
7500 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7501 return NULL_TREE;
7502}
7503
7504/* If TARGET region contains a kernel body for loop, remove its region from the
7c6746c9 7505 TARGET and expand it in HSA gridified kernel fashion. */
4954efd4 7506
7507static void
7508grid_expand_target_grid_body (struct omp_region *target)
7509{
7510 if (!hsa_gen_requested_p ())
7511 return;
7512
7513 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7514 struct omp_region **pp;
7515
7516 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7517 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7518 break;
7519
7520 struct omp_region *gpukernel = *pp;
7521
7522 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7523 if (!gpukernel)
7524 {
7525 /* HSA cannot handle OACC stuff. */
7526 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7527 return;
7528 gcc_checking_assert (orig_child_fndecl);
7529 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7530 OMP_CLAUSE__GRIDDIM_));
7531 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7532
7533 hsa_register_kernel (n);
7534 return;
7535 }
7536
7537 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7538 OMP_CLAUSE__GRIDDIM_));
7c6746c9 7539 tree inside_block
7540 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
4954efd4 7541 *pp = gpukernel->next;
7542 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7543 if ((*pp)->type == GIMPLE_OMP_FOR)
7544 break;
7545
7546 struct omp_region *kfor = *pp;
7547 gcc_assert (kfor);
7548 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7549 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7550 *pp = kfor->next;
7551 if (kfor->inner)
7552 {
7553 if (gimple_omp_for_grid_group_iter (for_stmt))
7554 {
7555 struct omp_region **next_pp;
7556 for (pp = &kfor->inner; *pp; pp = next_pp)
7557 {
7558 next_pp = &(*pp)->next;
7559 if ((*pp)->type != GIMPLE_OMP_FOR)
7560 continue;
7561 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7562 gcc_assert (gimple_omp_for_kind (inner)
7563 == GF_OMP_FOR_KIND_GRID_LOOP);
7564 grid_expand_omp_for_loop (*pp, true);
7565 *pp = (*pp)->next;
7566 next_pp = pp;
7567 }
7568 }
7569 expand_omp (kfor->inner);
7570 }
7571 if (gpukernel->inner)
7572 expand_omp (gpukernel->inner);
7573
7574 tree kern_fndecl = copy_node (orig_child_fndecl);
7575 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7576 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7577 tree tgtblock = gimple_block (tgt_stmt);
7578 tree fniniblock = make_node (BLOCK);
7579 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7580 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7581 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7582 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7583 DECL_INITIAL (kern_fndecl) = fniniblock;
7584 push_struct_function (kern_fndecl);
7585 cfun->function_end_locus = gimple_location (tgt_stmt);
7586 init_tree_ssa (cfun);
7587 pop_cfun ();
7588
7589 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7590 gcc_assert (!DECL_CHAIN (old_parm_decl));
7591 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7592 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7593 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7594 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7595 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7596 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7597 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7598 kern_cfun->curr_properties = cfun->curr_properties;
7599
7600 grid_expand_omp_for_loop (kfor, false);
7601
7c6746c9 7602 /* Remove the omp for statement. */
4954efd4 7603 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7604 gsi_remove (&gsi, true);
7605 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7606 return. */
7607 gsi = gsi_last_bb (gpukernel->exit);
7608 gcc_assert (!gsi_end_p (gsi)
7609 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7610 gimple *ret_stmt = gimple_build_return (NULL);
7611 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7612 gsi_remove (&gsi, true);
7613
7614 /* Statements in the first BB in the target construct have been produced by
7615 target lowering and must be copied inside the GPUKERNEL, with the two
7616 exceptions of the first OMP statement and the OMP_DATA assignment
7617 statement. */
7618 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7619 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7620 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7621 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7622 !gsi_end_p (tsi); gsi_next (&tsi))
7623 {
7624 gimple *stmt = gsi_stmt (tsi);
7625 if (is_gimple_omp (stmt))
7626 break;
7627 if (sender
7628 && is_gimple_assign (stmt)
7629 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7630 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7631 continue;
7632 gimple *copy = gimple_copy (stmt);
7633 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7634 gimple_set_block (copy, fniniblock);
7635 }
7636
7637 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7638 gpukernel->exit, inside_block);
7639
7640 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7641 kcn->mark_force_output ();
7642 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7643
7644 hsa_register_kernel (kcn, orig_child);
7645
7646 cgraph_node::add_new_function (kern_fndecl, true);
7647 push_cfun (kern_cfun);
7648 cgraph_edge::rebuild_edges ();
7649
7650 /* Re-map any mention of the PARM_DECL of the original function to the
7651 PARM_DECL of the new one.
7652
7653 TODO: It would be great if lowering produced references into the GPU
7654 kernel decl straight away and we did not have to do this. */
7655 struct grid_arg_decl_map adm;
7656 adm.old_arg = old_parm_decl;
7657 adm.new_arg = new_parm_decl;
7658 basic_block bb;
7659 FOR_EACH_BB_FN (bb, kern_cfun)
7660 {
7661 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7662 {
7663 gimple *stmt = gsi_stmt (gsi);
7664 struct walk_stmt_info wi;
7665 memset (&wi, 0, sizeof (wi));
7666 wi.info = &adm;
7667 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7668 }
7669 }
7670 pop_cfun ();
7671
7672 return;
7673}
7674
7675/* Expand the parallel region tree rooted at REGION. Expansion
7676 proceeds in depth-first order. Innermost regions are expanded
7677 first. This way, parallel regions that require a new function to
7678 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7679 internal dependencies in their body. */
7680
7681static void
7682expand_omp (struct omp_region *region)
7683{
7684 omp_any_child_fn_dumped = false;
7685 while (region)
7686 {
7687 location_t saved_location;
7688 gimple *inner_stmt = NULL;
7689
7690 /* First, determine whether this is a combined parallel+workshare
7c6746c9 7691 region. */
4954efd4 7692 if (region->type == GIMPLE_OMP_PARALLEL)
7693 determine_parallel_type (region);
7694 else if (region->type == GIMPLE_OMP_TARGET)
7695 grid_expand_target_grid_body (region);
7696
7697 if (region->type == GIMPLE_OMP_FOR
7698 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7699 inner_stmt = last_stmt (region->inner->entry);
7700
7701 if (region->inner)
7702 expand_omp (region->inner);
7703
7704 saved_location = input_location;
7705 if (gimple_has_location (last_stmt (region->entry)))
7706 input_location = gimple_location (last_stmt (region->entry));
7707
7708 switch (region->type)
7709 {
7710 case GIMPLE_OMP_PARALLEL:
7711 case GIMPLE_OMP_TASK:
7712 expand_omp_taskreg (region);
7713 break;
7714
7715 case GIMPLE_OMP_FOR:
7716 expand_omp_for (region, inner_stmt);
7717 break;
7718
7719 case GIMPLE_OMP_SECTIONS:
7720 expand_omp_sections (region);
7721 break;
7722
7723 case GIMPLE_OMP_SECTION:
7724 /* Individual omp sections are handled together with their
7725 parent GIMPLE_OMP_SECTIONS region. */
7726 break;
7727
7728 case GIMPLE_OMP_SINGLE:
7729 expand_omp_single (region);
7730 break;
7731
7732 case GIMPLE_OMP_ORDERED:
7733 {
7734 gomp_ordered *ord_stmt
7735 = as_a <gomp_ordered *> (last_stmt (region->entry));
7736 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7737 OMP_CLAUSE_DEPEND))
7738 {
7739 /* We'll expand these when expanding corresponding
7740 worksharing region with ordered(n) clause. */
7741 gcc_assert (region->outer
7742 && region->outer->type == GIMPLE_OMP_FOR);
7743 region->ord_stmt = ord_stmt;
7744 break;
7745 }
7746 }
7747 /* FALLTHRU */
7748 case GIMPLE_OMP_MASTER:
7749 case GIMPLE_OMP_TASKGROUP:
7750 case GIMPLE_OMP_CRITICAL:
7751 case GIMPLE_OMP_TEAMS:
7752 expand_omp_synch (region);
7753 break;
7754
7755 case GIMPLE_OMP_ATOMIC_LOAD:
7756 expand_omp_atomic (region);
7757 break;
7758
7759 case GIMPLE_OMP_TARGET:
7760 expand_omp_target (region);
7761 break;
7762
7763 default:
7764 gcc_unreachable ();
7765 }
7766
7767 input_location = saved_location;
7768 region = region->next;
7769 }
7770 if (omp_any_child_fn_dumped)
7771 {
7772 if (dump_file)
7773 dump_function_header (dump_file, current_function_decl, dump_flags);
7774 omp_any_child_fn_dumped = false;
7775 }
7776}
7777
7778/* Helper for build_omp_regions. Scan the dominator tree starting at
7779 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7780 true, the function ends once a single tree is built (otherwise, whole
7781 forest of OMP constructs may be built). */
7782
7783static void
7784build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7785 bool single_tree)
7786{
7787 gimple_stmt_iterator gsi;
7788 gimple *stmt;
7789 basic_block son;
7790
7791 gsi = gsi_last_bb (bb);
7792 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7793 {
7794 struct omp_region *region;
7795 enum gimple_code code;
7796
7797 stmt = gsi_stmt (gsi);
7798 code = gimple_code (stmt);
7799 if (code == GIMPLE_OMP_RETURN)
7800 {
7801 /* STMT is the return point out of region PARENT. Mark it
7802 as the exit point and make PARENT the immediately
7803 enclosing region. */
7804 gcc_assert (parent);
7805 region = parent;
7806 region->exit = bb;
7807 parent = parent->outer;
7808 }
7809 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7810 {
7811 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7812 GIMPLE_OMP_RETURN, but matches with
7813 GIMPLE_OMP_ATOMIC_LOAD. */
7814 gcc_assert (parent);
7815 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7816 region = parent;
7817 region->exit = bb;
7818 parent = parent->outer;
7819 }
7820 else if (code == GIMPLE_OMP_CONTINUE)
7821 {
7822 gcc_assert (parent);
7823 parent->cont = bb;
7824 }
7825 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7826 {
7827 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7828 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7829 }
7830 else
7831 {
7832 region = new_omp_region (bb, code, parent);
7833 /* Otherwise... */
7834 if (code == GIMPLE_OMP_TARGET)
7835 {
7836 switch (gimple_omp_target_kind (stmt))
7837 {
7838 case GF_OMP_TARGET_KIND_REGION:
7839 case GF_OMP_TARGET_KIND_DATA:
7840 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7841 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7842 case GF_OMP_TARGET_KIND_OACC_DATA:
7843 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7844 break;
7845 case GF_OMP_TARGET_KIND_UPDATE:
7846 case GF_OMP_TARGET_KIND_ENTER_DATA:
7847 case GF_OMP_TARGET_KIND_EXIT_DATA:
7848 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7849 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7850 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7851 /* ..., other than for those stand-alone directives... */
7852 region = NULL;
7853 break;
7854 default:
7855 gcc_unreachable ();
7856 }
7857 }
7858 else if (code == GIMPLE_OMP_ORDERED
7859 && omp_find_clause (gimple_omp_ordered_clauses
7860 (as_a <gomp_ordered *> (stmt)),
7861 OMP_CLAUSE_DEPEND))
7862 /* #pragma omp ordered depend is also just a stand-alone
7863 directive. */
7864 region = NULL;
7865 /* ..., this directive becomes the parent for a new region. */
7866 if (region)
7867 parent = region;
7868 }
7869 }
7870
7871 if (single_tree && !parent)
7872 return;
7873
7874 for (son = first_dom_son (CDI_DOMINATORS, bb);
7875 son;
7876 son = next_dom_son (CDI_DOMINATORS, son))
7877 build_omp_regions_1 (son, parent, single_tree);
7878}
7879
7880/* Builds the tree of OMP regions rooted at ROOT, storing it to
7881 root_omp_region. */
7882
7883static void
7884build_omp_regions_root (basic_block root)
7885{
7886 gcc_assert (root_omp_region == NULL);
7887 build_omp_regions_1 (root, NULL, true);
7888 gcc_assert (root_omp_region != NULL);
7889}
7890
7891/* Expands omp construct (and its subconstructs) starting in HEAD. */
7892
7893void
7894omp_expand_local (basic_block head)
7895{
7896 build_omp_regions_root (head);
7897 if (dump_file && (dump_flags & TDF_DETAILS))
7898 {
7899 fprintf (dump_file, "\nOMP region tree\n\n");
7900 dump_omp_region (dump_file, root_omp_region, 0);
7901 fprintf (dump_file, "\n");
7902 }
7903
7904 remove_exit_barriers (root_omp_region);
7905 expand_omp (root_omp_region);
7906
7907 omp_free_regions ();
7908}
7909
7910/* Scan the CFG and build a tree of OMP regions. Return the root of
7911 the OMP region tree. */
7912
7913static void
7914build_omp_regions (void)
7915{
7916 gcc_assert (root_omp_region == NULL);
7917 calculate_dominance_info (CDI_DOMINATORS);
7918 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7919}
7920
7921/* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7922
7923static unsigned int
7924execute_expand_omp (void)
7925{
7926 build_omp_regions ();
7927
7928 if (!root_omp_region)
7929 return 0;
7930
7931 if (dump_file)
7932 {
7933 fprintf (dump_file, "\nOMP region tree\n\n");
7934 dump_omp_region (dump_file, root_omp_region, 0);
7935 fprintf (dump_file, "\n");
7936 }
7937
7938 remove_exit_barriers (root_omp_region);
7939
7940 expand_omp (root_omp_region);
7941
7942 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7943 verify_loop_structure ();
7944 cleanup_tree_cfg ();
7945
7946 omp_free_regions ();
7947
7948 return 0;
7949}
7950
7951/* OMP expansion -- the default pass, run before creation of SSA form. */
7952
7953namespace {
7954
7955const pass_data pass_data_expand_omp =
7956{
7957 GIMPLE_PASS, /* type */
7958 "ompexp", /* name */
7959 OPTGROUP_OPENMP, /* optinfo_flags */
7960 TV_NONE, /* tv_id */
7961 PROP_gimple_any, /* properties_required */
7962 PROP_gimple_eomp, /* properties_provided */
7963 0, /* properties_destroyed */
7964 0, /* todo_flags_start */
7965 0, /* todo_flags_finish */
7966};
7967
7968class pass_expand_omp : public gimple_opt_pass
7969{
7970public:
7971 pass_expand_omp (gcc::context *ctxt)
7972 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7973 {}
7974
7975 /* opt_pass methods: */
7976 virtual unsigned int execute (function *)
7977 {
7978 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
7979 || flag_openmp_simd != 0)
7980 && !seen_error ());
7981
7982 /* This pass always runs, to provide PROP_gimple_eomp.
7983 But often, there is nothing to do. */
7984 if (!gate)
7985 return 0;
7986
7987 return execute_expand_omp ();
7988 }
7989
7990}; // class pass_expand_omp
7991
7992} // anon namespace
7993
7994gimple_opt_pass *
7995make_pass_expand_omp (gcc::context *ctxt)
7996{
7997 return new pass_expand_omp (ctxt);
7998}
7999
8000namespace {
8001
8002const pass_data pass_data_expand_omp_ssa =
8003{
8004 GIMPLE_PASS, /* type */
8005 "ompexpssa", /* name */
8006 OPTGROUP_OPENMP, /* optinfo_flags */
8007 TV_NONE, /* tv_id */
8008 PROP_cfg | PROP_ssa, /* properties_required */
8009 PROP_gimple_eomp, /* properties_provided */
8010 0, /* properties_destroyed */
8011 0, /* todo_flags_start */
8012 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8013};
8014
8015class pass_expand_omp_ssa : public gimple_opt_pass
8016{
8017public:
8018 pass_expand_omp_ssa (gcc::context *ctxt)
8019 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8020 {}
8021
8022 /* opt_pass methods: */
8023 virtual bool gate (function *fun)
8024 {
8025 return !(fun->curr_properties & PROP_gimple_eomp);
8026 }
8027 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8028 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8029
8030}; // class pass_expand_omp_ssa
8031
8032} // anon namespace
8033
8034gimple_opt_pass *
8035make_pass_expand_omp_ssa (gcc::context *ctxt)
8036{
8037 return new pass_expand_omp_ssa (ctxt);
8038}
8039
8040/* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8041 GIMPLE_* codes. */
8042
8043bool
8044omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8045 int *region_idx)
8046{
8047 gimple *last = last_stmt (bb);
8048 enum gimple_code code = gimple_code (last);
8049 struct omp_region *cur_region = *region;
8050 bool fallthru = false;
8051
8052 switch (code)
8053 {
8054 case GIMPLE_OMP_PARALLEL:
8055 case GIMPLE_OMP_TASK:
8056 case GIMPLE_OMP_FOR:
8057 case GIMPLE_OMP_SINGLE:
8058 case GIMPLE_OMP_TEAMS:
8059 case GIMPLE_OMP_MASTER:
8060 case GIMPLE_OMP_TASKGROUP:
8061 case GIMPLE_OMP_CRITICAL:
8062 case GIMPLE_OMP_SECTION:
8063 case GIMPLE_OMP_GRID_BODY:
8064 cur_region = new_omp_region (bb, code, cur_region);
8065 fallthru = true;
8066 break;
8067
8068 case GIMPLE_OMP_ORDERED:
8069 cur_region = new_omp_region (bb, code, cur_region);
8070 fallthru = true;
8071 if (omp_find_clause (gimple_omp_ordered_clauses
8072 (as_a <gomp_ordered *> (last)),
8073 OMP_CLAUSE_DEPEND))
8074 cur_region = cur_region->outer;
8075 break;
8076
8077 case GIMPLE_OMP_TARGET:
8078 cur_region = new_omp_region (bb, code, cur_region);
8079 fallthru = true;
8080 switch (gimple_omp_target_kind (last))
8081 {
8082 case GF_OMP_TARGET_KIND_REGION:
8083 case GF_OMP_TARGET_KIND_DATA:
8084 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8085 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8086 case GF_OMP_TARGET_KIND_OACC_DATA:
8087 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8088 break;
8089 case GF_OMP_TARGET_KIND_UPDATE:
8090 case GF_OMP_TARGET_KIND_ENTER_DATA:
8091 case GF_OMP_TARGET_KIND_EXIT_DATA:
8092 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8093 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8094 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8095 cur_region = cur_region->outer;
8096 break;
8097 default:
8098 gcc_unreachable ();
8099 }
8100 break;
8101
8102 case GIMPLE_OMP_SECTIONS:
8103 cur_region = new_omp_region (bb, code, cur_region);
8104 fallthru = true;
8105 break;
8106
8107 case GIMPLE_OMP_SECTIONS_SWITCH:
8108 fallthru = false;
8109 break;
8110
8111 case GIMPLE_OMP_ATOMIC_LOAD:
8112 case GIMPLE_OMP_ATOMIC_STORE:
8113 fallthru = true;
8114 break;
8115
8116 case GIMPLE_OMP_RETURN:
8117 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8118 somewhere other than the next block. This will be
8119 created later. */
8120 cur_region->exit = bb;
8121 if (cur_region->type == GIMPLE_OMP_TASK)
8122 /* Add an edge corresponding to not scheduling the task
8123 immediately. */
8124 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8125 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8126 cur_region = cur_region->outer;
8127 break;
8128
8129 case GIMPLE_OMP_CONTINUE:
8130 cur_region->cont = bb;
8131 switch (cur_region->type)
8132 {
8133 case GIMPLE_OMP_FOR:
8134 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8135 succs edges as abnormal to prevent splitting
8136 them. */
8137 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8138 /* Make the loopback edge. */
8139 make_edge (bb, single_succ (cur_region->entry),
8140 EDGE_ABNORMAL);
8141
8142 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8143 corresponds to the case that the body of the loop
8144 is not executed at all. */
8145 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8146 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8147 fallthru = false;
8148 break;
8149
8150 case GIMPLE_OMP_SECTIONS:
8151 /* Wire up the edges into and out of the nested sections. */
8152 {
8153 basic_block switch_bb = single_succ (cur_region->entry);
8154
8155 struct omp_region *i;
8156 for (i = cur_region->inner; i ; i = i->next)
8157 {
8158 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8159 make_edge (switch_bb, i->entry, 0);
8160 make_edge (i->exit, bb, EDGE_FALLTHRU);
8161 }
8162
8163 /* Make the loopback edge to the block with
8164 GIMPLE_OMP_SECTIONS_SWITCH. */
8165 make_edge (bb, switch_bb, 0);
8166
8167 /* Make the edge from the switch to exit. */
8168 make_edge (switch_bb, bb->next_bb, 0);
8169 fallthru = false;
8170 }
8171 break;
8172
8173 case GIMPLE_OMP_TASK:
8174 fallthru = true;
8175 break;
8176
8177 default:
8178 gcc_unreachable ();
8179 }
8180 break;
8181
8182 default:
8183 gcc_unreachable ();
8184 }
8185
8186 if (*region != cur_region)
8187 {
8188 *region = cur_region;
8189 if (cur_region)
8190 *region_idx = cur_region->entry->index;
8191 else
8192 *region_idx = 0;
8193 }
8194
8195 return fallthru;
8196}
8197
8198#include "gt-omp-expand.h"