]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
re PR middle-end/81768 (error: control flow in the middle of basic block)
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2017 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa-common.h"
60 #include "debug.h"
61 #include "stringpool.h"
62 #include "attribs.h"
63
64 /* OMP region information. Every parallel and workshare
65 directive is enclosed between two markers, the OMP_* directive
66 and a corresponding GIMPLE_OMP_RETURN statement. */
67
68 struct omp_region
69 {
70 /* The enclosing region. */
71 struct omp_region *outer;
72
73 /* First child region. */
74 struct omp_region *inner;
75
76 /* Next peer region. */
77 struct omp_region *next;
78
79 /* Block containing the omp directive as its last stmt. */
80 basic_block entry;
81
82 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
83 basic_block exit;
84
85 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
86 basic_block cont;
87
88 /* If this is a combined parallel+workshare region, this is a list
89 of additional arguments needed by the combined parallel+workshare
90 library call. */
91 vec<tree, va_gc> *ws_args;
92
93 /* The code for the omp directive of this region. */
94 enum gimple_code type;
95
96 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
97 enum omp_clause_schedule_kind sched_kind;
98
99 /* Schedule modifiers. */
100 unsigned char sched_modifiers;
101
102 /* True if this is a combined parallel+workshare region. */
103 bool is_combined_parallel;
104
105 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
106 a depend clause. */
107 gomp_ordered *ord_stmt;
108 };
109
110 static struct omp_region *root_omp_region;
111 static bool omp_any_child_fn_dumped;
112
113 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
114 bool = false);
115 static gphi *find_phi_with_arg_on_edge (tree, edge);
116 static void expand_omp (struct omp_region *region);
117
118 /* Return true if REGION is a combined parallel+workshare region. */
119
120 static inline bool
121 is_combined_parallel (struct omp_region *region)
122 {
123 return region->is_combined_parallel;
124 }
125
126 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
127 is the immediate dominator of PAR_ENTRY_BB, return true if there
128 are no data dependencies that would prevent expanding the parallel
129 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
130
131 When expanding a combined parallel+workshare region, the call to
132 the child function may need additional arguments in the case of
133 GIMPLE_OMP_FOR regions. In some cases, these arguments are
134 computed out of variables passed in from the parent to the child
135 via 'struct .omp_data_s'. For instance:
136
137 #pragma omp parallel for schedule (guided, i * 4)
138 for (j ...)
139
140 Is lowered into:
141
142 # BLOCK 2 (PAR_ENTRY_BB)
143 .omp_data_o.i = i;
144 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
145
146 # BLOCK 3 (WS_ENTRY_BB)
147 .omp_data_i = &.omp_data_o;
148 D.1667 = .omp_data_i->i;
149 D.1598 = D.1667 * 4;
150 #pragma omp for schedule (guided, D.1598)
151
152 When we outline the parallel region, the call to the child function
153 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
154 that value is computed *after* the call site. So, in principle we
155 cannot do the transformation.
156
157 To see whether the code in WS_ENTRY_BB blocks the combined
158 parallel+workshare call, we collect all the variables used in the
159 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
160 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
161 call.
162
163 FIXME. If we had the SSA form built at this point, we could merely
164 hoist the code in block 3 into block 2 and be done with it. But at
165 this point we don't have dataflow information and though we could
166 hack something up here, it is really not worth the aggravation. */
167
168 static bool
169 workshare_safe_to_combine_p (basic_block ws_entry_bb)
170 {
171 struct omp_for_data fd;
172 gimple *ws_stmt = last_stmt (ws_entry_bb);
173
174 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
175 return true;
176
177 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
178
179 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
180
181 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
182 return false;
183 if (fd.iter_type != long_integer_type_node)
184 return false;
185
186 /* FIXME. We give up too easily here. If any of these arguments
187 are not constants, they will likely involve variables that have
188 been mapped into fields of .omp_data_s for sharing with the child
189 function. With appropriate data flow, it would be possible to
190 see through this. */
191 if (!is_gimple_min_invariant (fd.loop.n1)
192 || !is_gimple_min_invariant (fd.loop.n2)
193 || !is_gimple_min_invariant (fd.loop.step)
194 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
195 return false;
196
197 return true;
198 }
199
200 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
201 presence (SIMD_SCHEDULE). */
202
203 static tree
204 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
205 {
206 if (!simd_schedule)
207 return chunk_size;
208
209 int vf = omp_max_vf ();
210 if (vf == 1)
211 return chunk_size;
212
213 tree type = TREE_TYPE (chunk_size);
214 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
215 build_int_cst (type, vf - 1));
216 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
217 build_int_cst (type, -vf));
218 }
219
220 /* Collect additional arguments needed to emit a combined
221 parallel+workshare call. WS_STMT is the workshare directive being
222 expanded. */
223
224 static vec<tree, va_gc> *
225 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
226 {
227 tree t;
228 location_t loc = gimple_location (ws_stmt);
229 vec<tree, va_gc> *ws_args;
230
231 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
232 {
233 struct omp_for_data fd;
234 tree n1, n2;
235
236 omp_extract_for_data (for_stmt, &fd, NULL);
237 n1 = fd.loop.n1;
238 n2 = fd.loop.n2;
239
240 if (gimple_omp_for_combined_into_p (for_stmt))
241 {
242 tree innerc
243 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
244 OMP_CLAUSE__LOOPTEMP_);
245 gcc_assert (innerc);
246 n1 = OMP_CLAUSE_DECL (innerc);
247 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
248 OMP_CLAUSE__LOOPTEMP_);
249 gcc_assert (innerc);
250 n2 = OMP_CLAUSE_DECL (innerc);
251 }
252
253 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
254
255 t = fold_convert_loc (loc, long_integer_type_node, n1);
256 ws_args->quick_push (t);
257
258 t = fold_convert_loc (loc, long_integer_type_node, n2);
259 ws_args->quick_push (t);
260
261 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
262 ws_args->quick_push (t);
263
264 if (fd.chunk_size)
265 {
266 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
267 t = omp_adjust_chunk_size (t, fd.simd_schedule);
268 ws_args->quick_push (t);
269 }
270
271 return ws_args;
272 }
273 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
274 {
275 /* Number of sections is equal to the number of edges from the
276 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
277 the exit of the sections region. */
278 basic_block bb = single_succ (gimple_bb (ws_stmt));
279 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
280 vec_alloc (ws_args, 1);
281 ws_args->quick_push (t);
282 return ws_args;
283 }
284
285 gcc_unreachable ();
286 }
287
288 /* Discover whether REGION is a combined parallel+workshare region. */
289
290 static void
291 determine_parallel_type (struct omp_region *region)
292 {
293 basic_block par_entry_bb, par_exit_bb;
294 basic_block ws_entry_bb, ws_exit_bb;
295
296 if (region == NULL || region->inner == NULL
297 || region->exit == NULL || region->inner->exit == NULL
298 || region->inner->cont == NULL)
299 return;
300
301 /* We only support parallel+for and parallel+sections. */
302 if (region->type != GIMPLE_OMP_PARALLEL
303 || (region->inner->type != GIMPLE_OMP_FOR
304 && region->inner->type != GIMPLE_OMP_SECTIONS))
305 return;
306
307 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
308 WS_EXIT_BB -> PAR_EXIT_BB. */
309 par_entry_bb = region->entry;
310 par_exit_bb = region->exit;
311 ws_entry_bb = region->inner->entry;
312 ws_exit_bb = region->inner->exit;
313
314 if (single_succ (par_entry_bb) == ws_entry_bb
315 && single_succ (ws_exit_bb) == par_exit_bb
316 && workshare_safe_to_combine_p (ws_entry_bb)
317 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
318 || (last_and_only_stmt (ws_entry_bb)
319 && last_and_only_stmt (par_exit_bb))))
320 {
321 gimple *par_stmt = last_stmt (par_entry_bb);
322 gimple *ws_stmt = last_stmt (ws_entry_bb);
323
324 if (region->inner->type == GIMPLE_OMP_FOR)
325 {
326 /* If this is a combined parallel loop, we need to determine
327 whether or not to use the combined library calls. There
328 are two cases where we do not apply the transformation:
329 static loops and any kind of ordered loop. In the first
330 case, we already open code the loop so there is no need
331 to do anything else. In the latter case, the combined
332 parallel loop call would still need extra synchronization
333 to implement ordered semantics, so there would not be any
334 gain in using the combined call. */
335 tree clauses = gimple_omp_for_clauses (ws_stmt);
336 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
337 if (c == NULL
338 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
339 == OMP_CLAUSE_SCHEDULE_STATIC)
340 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
341 {
342 region->is_combined_parallel = false;
343 region->inner->is_combined_parallel = false;
344 return;
345 }
346 }
347
348 region->is_combined_parallel = true;
349 region->inner->is_combined_parallel = true;
350 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
351 }
352 }
353
354 /* Debugging dumps for parallel regions. */
355 void dump_omp_region (FILE *, struct omp_region *, int);
356 void debug_omp_region (struct omp_region *);
357 void debug_all_omp_regions (void);
358
359 /* Dump the parallel region tree rooted at REGION. */
360
361 void
362 dump_omp_region (FILE *file, struct omp_region *region, int indent)
363 {
364 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
365 gimple_code_name[region->type]);
366
367 if (region->inner)
368 dump_omp_region (file, region->inner, indent + 4);
369
370 if (region->cont)
371 {
372 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
373 region->cont->index);
374 }
375
376 if (region->exit)
377 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
378 region->exit->index);
379 else
380 fprintf (file, "%*s[no exit marker]\n", indent, "");
381
382 if (region->next)
383 dump_omp_region (file, region->next, indent);
384 }
385
386 DEBUG_FUNCTION void
387 debug_omp_region (struct omp_region *region)
388 {
389 dump_omp_region (stderr, region, 0);
390 }
391
392 DEBUG_FUNCTION void
393 debug_all_omp_regions (void)
394 {
395 dump_omp_region (stderr, root_omp_region, 0);
396 }
397
398 /* Create a new parallel region starting at STMT inside region PARENT. */
399
400 static struct omp_region *
401 new_omp_region (basic_block bb, enum gimple_code type,
402 struct omp_region *parent)
403 {
404 struct omp_region *region = XCNEW (struct omp_region);
405
406 region->outer = parent;
407 region->entry = bb;
408 region->type = type;
409
410 if (parent)
411 {
412 /* This is a nested region. Add it to the list of inner
413 regions in PARENT. */
414 region->next = parent->inner;
415 parent->inner = region;
416 }
417 else
418 {
419 /* This is a toplevel region. Add it to the list of toplevel
420 regions in ROOT_OMP_REGION. */
421 region->next = root_omp_region;
422 root_omp_region = region;
423 }
424
425 return region;
426 }
427
428 /* Release the memory associated with the region tree rooted at REGION. */
429
430 static void
431 free_omp_region_1 (struct omp_region *region)
432 {
433 struct omp_region *i, *n;
434
435 for (i = region->inner; i ; i = n)
436 {
437 n = i->next;
438 free_omp_region_1 (i);
439 }
440
441 free (region);
442 }
443
444 /* Release the memory for the entire omp region tree. */
445
446 void
447 omp_free_regions (void)
448 {
449 struct omp_region *r, *n;
450 for (r = root_omp_region; r ; r = n)
451 {
452 n = r->next;
453 free_omp_region_1 (r);
454 }
455 root_omp_region = NULL;
456 }
457
458 /* A convenience function to build an empty GIMPLE_COND with just the
459 condition. */
460
461 static gcond *
462 gimple_build_cond_empty (tree cond)
463 {
464 enum tree_code pred_code;
465 tree lhs, rhs;
466
467 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
468 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
469 }
470
471 /* Return true if a parallel REGION is within a declare target function or
472 within a target region and is not a part of a gridified target. */
473
474 static bool
475 parallel_needs_hsa_kernel_p (struct omp_region *region)
476 {
477 bool indirect = false;
478 for (region = region->outer; region; region = region->outer)
479 {
480 if (region->type == GIMPLE_OMP_PARALLEL)
481 indirect = true;
482 else if (region->type == GIMPLE_OMP_TARGET)
483 {
484 gomp_target *tgt_stmt
485 = as_a <gomp_target *> (last_stmt (region->entry));
486
487 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
488 OMP_CLAUSE__GRIDDIM_))
489 return indirect;
490 else
491 return true;
492 }
493 }
494
495 if (lookup_attribute ("omp declare target",
496 DECL_ATTRIBUTES (current_function_decl)))
497 return true;
498
499 return false;
500 }
501
502 /* Build the function calls to GOMP_parallel_start etc to actually
503 generate the parallel operation. REGION is the parallel region
504 being expanded. BB is the block where to insert the code. WS_ARGS
505 will be set if this is a call to a combined parallel+workshare
506 construct, it contains the list of additional arguments needed by
507 the workshare construct. */
508
509 static void
510 expand_parallel_call (struct omp_region *region, basic_block bb,
511 gomp_parallel *entry_stmt,
512 vec<tree, va_gc> *ws_args)
513 {
514 tree t, t1, t2, val, cond, c, clauses, flags;
515 gimple_stmt_iterator gsi;
516 gimple *stmt;
517 enum built_in_function start_ix;
518 int start_ix2;
519 location_t clause_loc;
520 vec<tree, va_gc> *args;
521
522 clauses = gimple_omp_parallel_clauses (entry_stmt);
523
524 /* Determine what flavor of GOMP_parallel we will be
525 emitting. */
526 start_ix = BUILT_IN_GOMP_PARALLEL;
527 if (is_combined_parallel (region))
528 {
529 switch (region->inner->type)
530 {
531 case GIMPLE_OMP_FOR:
532 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
533 switch (region->inner->sched_kind)
534 {
535 case OMP_CLAUSE_SCHEDULE_RUNTIME:
536 start_ix2 = 3;
537 break;
538 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
539 case OMP_CLAUSE_SCHEDULE_GUIDED:
540 if (region->inner->sched_modifiers
541 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
542 {
543 start_ix2 = 3 + region->inner->sched_kind;
544 break;
545 }
546 /* FALLTHRU */
547 default:
548 start_ix2 = region->inner->sched_kind;
549 break;
550 }
551 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
552 start_ix = (enum built_in_function) start_ix2;
553 break;
554 case GIMPLE_OMP_SECTIONS:
555 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
556 break;
557 default:
558 gcc_unreachable ();
559 }
560 }
561
562 /* By default, the value of NUM_THREADS is zero (selected at run time)
563 and there is no conditional. */
564 cond = NULL_TREE;
565 val = build_int_cst (unsigned_type_node, 0);
566 flags = build_int_cst (unsigned_type_node, 0);
567
568 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
569 if (c)
570 cond = OMP_CLAUSE_IF_EXPR (c);
571
572 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
573 if (c)
574 {
575 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
576 clause_loc = OMP_CLAUSE_LOCATION (c);
577 }
578 else
579 clause_loc = gimple_location (entry_stmt);
580
581 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
582 if (c)
583 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
584
585 /* Ensure 'val' is of the correct type. */
586 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
587
588 /* If we found the clause 'if (cond)', build either
589 (cond != 0) or (cond ? val : 1u). */
590 if (cond)
591 {
592 cond = gimple_boolify (cond);
593
594 if (integer_zerop (val))
595 val = fold_build2_loc (clause_loc,
596 EQ_EXPR, unsigned_type_node, cond,
597 build_int_cst (TREE_TYPE (cond), 0));
598 else
599 {
600 basic_block cond_bb, then_bb, else_bb;
601 edge e, e_then, e_else;
602 tree tmp_then, tmp_else, tmp_join, tmp_var;
603
604 tmp_var = create_tmp_var (TREE_TYPE (val));
605 if (gimple_in_ssa_p (cfun))
606 {
607 tmp_then = make_ssa_name (tmp_var);
608 tmp_else = make_ssa_name (tmp_var);
609 tmp_join = make_ssa_name (tmp_var);
610 }
611 else
612 {
613 tmp_then = tmp_var;
614 tmp_else = tmp_var;
615 tmp_join = tmp_var;
616 }
617
618 e = split_block_after_labels (bb);
619 cond_bb = e->src;
620 bb = e->dest;
621 remove_edge (e);
622
623 then_bb = create_empty_bb (cond_bb);
624 else_bb = create_empty_bb (then_bb);
625 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
626 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
627
628 stmt = gimple_build_cond_empty (cond);
629 gsi = gsi_start_bb (cond_bb);
630 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
631
632 gsi = gsi_start_bb (then_bb);
633 expand_omp_build_assign (&gsi, tmp_then, val, true);
634
635 gsi = gsi_start_bb (else_bb);
636 expand_omp_build_assign (&gsi, tmp_else,
637 build_int_cst (unsigned_type_node, 1),
638 true);
639
640 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
641 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
642 add_bb_to_loop (then_bb, cond_bb->loop_father);
643 add_bb_to_loop (else_bb, cond_bb->loop_father);
644 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
645 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
646
647 if (gimple_in_ssa_p (cfun))
648 {
649 gphi *phi = create_phi_node (tmp_join, bb);
650 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
651 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
652 }
653
654 val = tmp_join;
655 }
656
657 gsi = gsi_start_bb (bb);
658 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
659 false, GSI_CONTINUE_LINKING);
660 }
661
662 gsi = gsi_last_bb (bb);
663 t = gimple_omp_parallel_data_arg (entry_stmt);
664 if (t == NULL)
665 t1 = null_pointer_node;
666 else
667 t1 = build_fold_addr_expr (t);
668 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
669 t2 = build_fold_addr_expr (child_fndecl);
670
671 vec_alloc (args, 4 + vec_safe_length (ws_args));
672 args->quick_push (t2);
673 args->quick_push (t1);
674 args->quick_push (val);
675 if (ws_args)
676 args->splice (*ws_args);
677 args->quick_push (flags);
678
679 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
680 builtin_decl_explicit (start_ix), args);
681
682 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
683 false, GSI_CONTINUE_LINKING);
684
685 if (hsa_gen_requested_p ()
686 && parallel_needs_hsa_kernel_p (region))
687 {
688 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
689 hsa_register_kernel (child_cnode);
690 }
691 }
692
693 /* Insert a function call whose name is FUNC_NAME with the information from
694 ENTRY_STMT into the basic_block BB. */
695
696 static void
697 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
698 vec <tree, va_gc> *ws_args)
699 {
700 tree t, t1, t2;
701 gimple_stmt_iterator gsi;
702 vec <tree, va_gc> *args;
703
704 gcc_assert (vec_safe_length (ws_args) == 2);
705 tree func_name = (*ws_args)[0];
706 tree grain = (*ws_args)[1];
707
708 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
709 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
710 gcc_assert (count != NULL_TREE);
711 count = OMP_CLAUSE_OPERAND (count, 0);
712
713 gsi = gsi_last_bb (bb);
714 t = gimple_omp_parallel_data_arg (entry_stmt);
715 if (t == NULL)
716 t1 = null_pointer_node;
717 else
718 t1 = build_fold_addr_expr (t);
719 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
720
721 vec_alloc (args, 4);
722 args->quick_push (t2);
723 args->quick_push (t1);
724 args->quick_push (count);
725 args->quick_push (grain);
726 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
727
728 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
729 GSI_CONTINUE_LINKING);
730 }
731
732 /* Build the function call to GOMP_task to actually
733 generate the task operation. BB is the block where to insert the code. */
734
735 static void
736 expand_task_call (struct omp_region *region, basic_block bb,
737 gomp_task *entry_stmt)
738 {
739 tree t1, t2, t3;
740 gimple_stmt_iterator gsi;
741 location_t loc = gimple_location (entry_stmt);
742
743 tree clauses = gimple_omp_task_clauses (entry_stmt);
744
745 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
746 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
747 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
748 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
749 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
750 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
751
752 unsigned int iflags
753 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
754 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
755 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
756
757 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
758 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
759 tree num_tasks = NULL_TREE;
760 bool ull = false;
761 if (taskloop_p)
762 {
763 gimple *g = last_stmt (region->outer->entry);
764 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
765 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
766 struct omp_for_data fd;
767 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
768 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
769 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
770 OMP_CLAUSE__LOOPTEMP_);
771 startvar = OMP_CLAUSE_DECL (startvar);
772 endvar = OMP_CLAUSE_DECL (endvar);
773 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
774 if (fd.loop.cond_code == LT_EXPR)
775 iflags |= GOMP_TASK_FLAG_UP;
776 tree tclauses = gimple_omp_for_clauses (g);
777 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
778 if (num_tasks)
779 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
780 else
781 {
782 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
783 if (num_tasks)
784 {
785 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
786 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
787 }
788 else
789 num_tasks = integer_zero_node;
790 }
791 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
792 if (ifc == NULL_TREE)
793 iflags |= GOMP_TASK_FLAG_IF;
794 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
795 iflags |= GOMP_TASK_FLAG_NOGROUP;
796 ull = fd.iter_type == long_long_unsigned_type_node;
797 }
798 else if (priority)
799 iflags |= GOMP_TASK_FLAG_PRIORITY;
800
801 tree flags = build_int_cst (unsigned_type_node, iflags);
802
803 tree cond = boolean_true_node;
804 if (ifc)
805 {
806 if (taskloop_p)
807 {
808 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
809 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
810 build_int_cst (unsigned_type_node,
811 GOMP_TASK_FLAG_IF),
812 build_int_cst (unsigned_type_node, 0));
813 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
814 flags, t);
815 }
816 else
817 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
818 }
819
820 if (finalc)
821 {
822 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
823 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
824 build_int_cst (unsigned_type_node,
825 GOMP_TASK_FLAG_FINAL),
826 build_int_cst (unsigned_type_node, 0));
827 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
828 }
829 if (depend)
830 depend = OMP_CLAUSE_DECL (depend);
831 else
832 depend = build_int_cst (ptr_type_node, 0);
833 if (priority)
834 priority = fold_convert (integer_type_node,
835 OMP_CLAUSE_PRIORITY_EXPR (priority));
836 else
837 priority = integer_zero_node;
838
839 gsi = gsi_last_bb (bb);
840 tree t = gimple_omp_task_data_arg (entry_stmt);
841 if (t == NULL)
842 t2 = null_pointer_node;
843 else
844 t2 = build_fold_addr_expr_loc (loc, t);
845 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
846 t = gimple_omp_task_copy_fn (entry_stmt);
847 if (t == NULL)
848 t3 = null_pointer_node;
849 else
850 t3 = build_fold_addr_expr_loc (loc, t);
851
852 if (taskloop_p)
853 t = build_call_expr (ull
854 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
855 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
856 11, t1, t2, t3,
857 gimple_omp_task_arg_size (entry_stmt),
858 gimple_omp_task_arg_align (entry_stmt), flags,
859 num_tasks, priority, startvar, endvar, step);
860 else
861 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
862 9, t1, t2, t3,
863 gimple_omp_task_arg_size (entry_stmt),
864 gimple_omp_task_arg_align (entry_stmt), cond, flags,
865 depend, priority);
866
867 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
868 false, GSI_CONTINUE_LINKING);
869 }
870
871 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
872
873 static tree
874 vec2chain (vec<tree, va_gc> *v)
875 {
876 tree chain = NULL_TREE, t;
877 unsigned ix;
878
879 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
880 {
881 DECL_CHAIN (t) = chain;
882 chain = t;
883 }
884
885 return chain;
886 }
887
888 /* Remove barriers in REGION->EXIT's block. Note that this is only
889 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
890 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
891 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
892 removed. */
893
894 static void
895 remove_exit_barrier (struct omp_region *region)
896 {
897 gimple_stmt_iterator gsi;
898 basic_block exit_bb;
899 edge_iterator ei;
900 edge e;
901 gimple *stmt;
902 int any_addressable_vars = -1;
903
904 exit_bb = region->exit;
905
906 /* If the parallel region doesn't return, we don't have REGION->EXIT
907 block at all. */
908 if (! exit_bb)
909 return;
910
911 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
912 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
913 statements that can appear in between are extremely limited -- no
914 memory operations at all. Here, we allow nothing at all, so the
915 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
916 gsi = gsi_last_bb (exit_bb);
917 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
918 gsi_prev (&gsi);
919 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
920 return;
921
922 FOR_EACH_EDGE (e, ei, exit_bb->preds)
923 {
924 gsi = gsi_last_bb (e->src);
925 if (gsi_end_p (gsi))
926 continue;
927 stmt = gsi_stmt (gsi);
928 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
929 && !gimple_omp_return_nowait_p (stmt))
930 {
931 /* OpenMP 3.0 tasks unfortunately prevent this optimization
932 in many cases. If there could be tasks queued, the barrier
933 might be needed to let the tasks run before some local
934 variable of the parallel that the task uses as shared
935 runs out of scope. The task can be spawned either
936 from within current function (this would be easy to check)
937 or from some function it calls and gets passed an address
938 of such a variable. */
939 if (any_addressable_vars < 0)
940 {
941 gomp_parallel *parallel_stmt
942 = as_a <gomp_parallel *> (last_stmt (region->entry));
943 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
944 tree local_decls, block, decl;
945 unsigned ix;
946
947 any_addressable_vars = 0;
948 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
949 if (TREE_ADDRESSABLE (decl))
950 {
951 any_addressable_vars = 1;
952 break;
953 }
954 for (block = gimple_block (stmt);
955 !any_addressable_vars
956 && block
957 && TREE_CODE (block) == BLOCK;
958 block = BLOCK_SUPERCONTEXT (block))
959 {
960 for (local_decls = BLOCK_VARS (block);
961 local_decls;
962 local_decls = DECL_CHAIN (local_decls))
963 if (TREE_ADDRESSABLE (local_decls))
964 {
965 any_addressable_vars = 1;
966 break;
967 }
968 if (block == gimple_block (parallel_stmt))
969 break;
970 }
971 }
972 if (!any_addressable_vars)
973 gimple_omp_return_set_nowait (stmt);
974 }
975 }
976 }
977
978 static void
979 remove_exit_barriers (struct omp_region *region)
980 {
981 if (region->type == GIMPLE_OMP_PARALLEL)
982 remove_exit_barrier (region);
983
984 if (region->inner)
985 {
986 region = region->inner;
987 remove_exit_barriers (region);
988 while (region->next)
989 {
990 region = region->next;
991 remove_exit_barriers (region);
992 }
993 }
994 }
995
996 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
997 calls. These can't be declared as const functions, but
998 within one parallel body they are constant, so they can be
999 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
1000 which are declared const. Similarly for task body, except
1001 that in untied task omp_get_thread_num () can change at any task
1002 scheduling point. */
1003
1004 static void
1005 optimize_omp_library_calls (gimple *entry_stmt)
1006 {
1007 basic_block bb;
1008 gimple_stmt_iterator gsi;
1009 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1010 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1011 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1012 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1013 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1014 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1015 OMP_CLAUSE_UNTIED) != NULL);
1016
1017 FOR_EACH_BB_FN (bb, cfun)
1018 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1019 {
1020 gimple *call = gsi_stmt (gsi);
1021 tree decl;
1022
1023 if (is_gimple_call (call)
1024 && (decl = gimple_call_fndecl (call))
1025 && DECL_EXTERNAL (decl)
1026 && TREE_PUBLIC (decl)
1027 && DECL_INITIAL (decl) == NULL)
1028 {
1029 tree built_in;
1030
1031 if (DECL_NAME (decl) == thr_num_id)
1032 {
1033 /* In #pragma omp task untied omp_get_thread_num () can change
1034 during the execution of the task region. */
1035 if (untied_task)
1036 continue;
1037 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1038 }
1039 else if (DECL_NAME (decl) == num_thr_id)
1040 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1041 else
1042 continue;
1043
1044 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1045 || gimple_call_num_args (call) != 0)
1046 continue;
1047
1048 if (flag_exceptions && !TREE_NOTHROW (decl))
1049 continue;
1050
1051 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1052 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1053 TREE_TYPE (TREE_TYPE (built_in))))
1054 continue;
1055
1056 gimple_call_set_fndecl (call, built_in);
1057 }
1058 }
1059 }
1060
1061 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1062 regimplified. */
1063
1064 static tree
1065 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1066 {
1067 tree t = *tp;
1068
1069 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1070 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1071 return t;
1072
1073 if (TREE_CODE (t) == ADDR_EXPR)
1074 recompute_tree_invariant_for_addr_expr (t);
1075
1076 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1077 return NULL_TREE;
1078 }
1079
1080 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1081
1082 static void
1083 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1084 bool after)
1085 {
1086 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1087 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1088 !after, after ? GSI_CONTINUE_LINKING
1089 : GSI_SAME_STMT);
1090 gimple *stmt = gimple_build_assign (to, from);
1091 if (after)
1092 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1093 else
1094 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1095 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1096 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1097 {
1098 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1099 gimple_regimplify_operands (stmt, &gsi);
1100 }
1101 }
1102
1103 /* Expand the OpenMP parallel or task directive starting at REGION. */
1104
1105 static void
1106 expand_omp_taskreg (struct omp_region *region)
1107 {
1108 basic_block entry_bb, exit_bb, new_bb;
1109 struct function *child_cfun;
1110 tree child_fn, block, t;
1111 gimple_stmt_iterator gsi;
1112 gimple *entry_stmt, *stmt;
1113 edge e;
1114 vec<tree, va_gc> *ws_args;
1115
1116 entry_stmt = last_stmt (region->entry);
1117 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1118 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1119
1120 entry_bb = region->entry;
1121 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1122 exit_bb = region->cont;
1123 else
1124 exit_bb = region->exit;
1125
1126 bool is_cilk_for
1127 = (flag_cilkplus
1128 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1129 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1130 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1131
1132 if (is_cilk_for)
1133 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1134 and the inner statement contains the name of the built-in function
1135 and grain. */
1136 ws_args = region->inner->ws_args;
1137 else if (is_combined_parallel (region))
1138 ws_args = region->ws_args;
1139 else
1140 ws_args = NULL;
1141
1142 if (child_cfun->cfg)
1143 {
1144 /* Due to inlining, it may happen that we have already outlined
1145 the region, in which case all we need to do is make the
1146 sub-graph unreachable and emit the parallel call. */
1147 edge entry_succ_e, exit_succ_e;
1148
1149 entry_succ_e = single_succ_edge (entry_bb);
1150
1151 gsi = gsi_last_bb (entry_bb);
1152 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1153 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1154 gsi_remove (&gsi, true);
1155
1156 new_bb = entry_bb;
1157 if (exit_bb)
1158 {
1159 exit_succ_e = single_succ_edge (exit_bb);
1160 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1161 }
1162 remove_edge_and_dominated_blocks (entry_succ_e);
1163 }
1164 else
1165 {
1166 unsigned srcidx, dstidx, num;
1167
1168 /* If the parallel region needs data sent from the parent
1169 function, then the very first statement (except possible
1170 tree profile counter updates) of the parallel body
1171 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1172 &.OMP_DATA_O is passed as an argument to the child function,
1173 we need to replace it with the argument as seen by the child
1174 function.
1175
1176 In most cases, this will end up being the identity assignment
1177 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1178 a function call that has been inlined, the original PARM_DECL
1179 .OMP_DATA_I may have been converted into a different local
1180 variable. In which case, we need to keep the assignment. */
1181 if (gimple_omp_taskreg_data_arg (entry_stmt))
1182 {
1183 basic_block entry_succ_bb
1184 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1185 : FALLTHRU_EDGE (entry_bb)->dest;
1186 tree arg;
1187 gimple *parcopy_stmt = NULL;
1188
1189 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1190 {
1191 gimple *stmt;
1192
1193 gcc_assert (!gsi_end_p (gsi));
1194 stmt = gsi_stmt (gsi);
1195 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1196 continue;
1197
1198 if (gimple_num_ops (stmt) == 2)
1199 {
1200 tree arg = gimple_assign_rhs1 (stmt);
1201
1202 /* We're ignore the subcode because we're
1203 effectively doing a STRIP_NOPS. */
1204
1205 if (TREE_CODE (arg) == ADDR_EXPR
1206 && TREE_OPERAND (arg, 0)
1207 == gimple_omp_taskreg_data_arg (entry_stmt))
1208 {
1209 parcopy_stmt = stmt;
1210 break;
1211 }
1212 }
1213 }
1214
1215 gcc_assert (parcopy_stmt != NULL);
1216 arg = DECL_ARGUMENTS (child_fn);
1217
1218 if (!gimple_in_ssa_p (cfun))
1219 {
1220 if (gimple_assign_lhs (parcopy_stmt) == arg)
1221 gsi_remove (&gsi, true);
1222 else
1223 {
1224 /* ?? Is setting the subcode really necessary ?? */
1225 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1226 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1227 }
1228 }
1229 else
1230 {
1231 tree lhs = gimple_assign_lhs (parcopy_stmt);
1232 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1233 /* We'd like to set the rhs to the default def in the child_fn,
1234 but it's too early to create ssa names in the child_fn.
1235 Instead, we set the rhs to the parm. In
1236 move_sese_region_to_fn, we introduce a default def for the
1237 parm, map the parm to it's default def, and once we encounter
1238 this stmt, replace the parm with the default def. */
1239 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1240 update_stmt (parcopy_stmt);
1241 }
1242 }
1243
1244 /* Declare local variables needed in CHILD_CFUN. */
1245 block = DECL_INITIAL (child_fn);
1246 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1247 /* The gimplifier could record temporaries in parallel/task block
1248 rather than in containing function's local_decls chain,
1249 which would mean cgraph missed finalizing them. Do it now. */
1250 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1251 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1252 varpool_node::finalize_decl (t);
1253 DECL_SAVED_TREE (child_fn) = NULL;
1254 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1255 gimple_set_body (child_fn, NULL);
1256 TREE_USED (block) = 1;
1257
1258 /* Reset DECL_CONTEXT on function arguments. */
1259 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1260 DECL_CONTEXT (t) = child_fn;
1261
1262 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1263 so that it can be moved to the child function. */
1264 gsi = gsi_last_bb (entry_bb);
1265 stmt = gsi_stmt (gsi);
1266 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1267 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1268 e = split_block (entry_bb, stmt);
1269 gsi_remove (&gsi, true);
1270 entry_bb = e->dest;
1271 edge e2 = NULL;
1272 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1273 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1274 else
1275 {
1276 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1277 gcc_assert (e2->dest == region->exit);
1278 remove_edge (BRANCH_EDGE (entry_bb));
1279 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1280 gsi = gsi_last_bb (region->exit);
1281 gcc_assert (!gsi_end_p (gsi)
1282 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1283 gsi_remove (&gsi, true);
1284 }
1285
1286 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1287 if (exit_bb)
1288 {
1289 gsi = gsi_last_bb (exit_bb);
1290 gcc_assert (!gsi_end_p (gsi)
1291 && (gimple_code (gsi_stmt (gsi))
1292 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1293 stmt = gimple_build_return (NULL);
1294 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1295 gsi_remove (&gsi, true);
1296 }
1297
1298 /* Move the parallel region into CHILD_CFUN. */
1299
1300 if (gimple_in_ssa_p (cfun))
1301 {
1302 init_tree_ssa (child_cfun);
1303 init_ssa_operands (child_cfun);
1304 child_cfun->gimple_df->in_ssa_p = true;
1305 block = NULL_TREE;
1306 }
1307 else
1308 block = gimple_block (entry_stmt);
1309
1310 /* Make sure to generate early debug for the function before
1311 outlining anything. */
1312 if (! gimple_in_ssa_p (cfun))
1313 (*debug_hooks->early_global_decl) (cfun->decl);
1314
1315 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1316 if (exit_bb)
1317 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1318 if (e2)
1319 {
1320 basic_block dest_bb = e2->dest;
1321 if (!exit_bb)
1322 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1323 remove_edge (e2);
1324 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1325 }
1326 /* When the OMP expansion process cannot guarantee an up-to-date
1327 loop tree arrange for the child function to fixup loops. */
1328 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1329 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1330
1331 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1332 num = vec_safe_length (child_cfun->local_decls);
1333 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1334 {
1335 t = (*child_cfun->local_decls)[srcidx];
1336 if (DECL_CONTEXT (t) == cfun->decl)
1337 continue;
1338 if (srcidx != dstidx)
1339 (*child_cfun->local_decls)[dstidx] = t;
1340 dstidx++;
1341 }
1342 if (dstidx != num)
1343 vec_safe_truncate (child_cfun->local_decls, dstidx);
1344
1345 /* Inform the callgraph about the new function. */
1346 child_cfun->curr_properties = cfun->curr_properties;
1347 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1348 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1349 cgraph_node *node = cgraph_node::get_create (child_fn);
1350 node->parallelized_function = 1;
1351 cgraph_node::add_new_function (child_fn, true);
1352
1353 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1354 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1355
1356 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1357 fixed in a following pass. */
1358 push_cfun (child_cfun);
1359 if (need_asm)
1360 assign_assembler_name_if_needed (child_fn);
1361
1362 if (optimize)
1363 optimize_omp_library_calls (entry_stmt);
1364 cgraph_edge::rebuild_edges ();
1365
1366 /* Some EH regions might become dead, see PR34608. If
1367 pass_cleanup_cfg isn't the first pass to happen with the
1368 new child, these dead EH edges might cause problems.
1369 Clean them up now. */
1370 if (flag_exceptions)
1371 {
1372 basic_block bb;
1373 bool changed = false;
1374
1375 FOR_EACH_BB_FN (bb, cfun)
1376 changed |= gimple_purge_dead_eh_edges (bb);
1377 if (changed)
1378 cleanup_tree_cfg ();
1379 }
1380 if (gimple_in_ssa_p (cfun))
1381 update_ssa (TODO_update_ssa);
1382 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1383 verify_loop_structure ();
1384 pop_cfun ();
1385
1386 if (dump_file && !gimple_in_ssa_p (cfun))
1387 {
1388 omp_any_child_fn_dumped = true;
1389 dump_function_header (dump_file, child_fn, dump_flags);
1390 dump_function_to_file (child_fn, dump_file, dump_flags);
1391 }
1392 }
1393
1394 /* Emit a library call to launch the children threads. */
1395 if (is_cilk_for)
1396 expand_cilk_for_call (new_bb,
1397 as_a <gomp_parallel *> (entry_stmt), ws_args);
1398 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1399 expand_parallel_call (region, new_bb,
1400 as_a <gomp_parallel *> (entry_stmt), ws_args);
1401 else
1402 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1403 if (gimple_in_ssa_p (cfun))
1404 update_ssa (TODO_update_ssa_only_virtuals);
1405 }
1406
1407 /* Information about members of an OpenACC collapsed loop nest. */
1408
1409 struct oacc_collapse
1410 {
1411 tree base; /* Base value. */
1412 tree iters; /* Number of steps. */
1413 tree step; /* Step size. */
1414 tree tile; /* Tile increment (if tiled). */
1415 tree outer; /* Tile iterator var. */
1416 };
1417
1418 /* Helper for expand_oacc_for. Determine collapsed loop information.
1419 Fill in COUNTS array. Emit any initialization code before GSI.
1420 Return the calculated outer loop bound of BOUND_TYPE. */
1421
1422 static tree
1423 expand_oacc_collapse_init (const struct omp_for_data *fd,
1424 gimple_stmt_iterator *gsi,
1425 oacc_collapse *counts, tree bound_type,
1426 location_t loc)
1427 {
1428 tree tiling = fd->tiling;
1429 tree total = build_int_cst (bound_type, 1);
1430 int ix;
1431
1432 gcc_assert (integer_onep (fd->loop.step));
1433 gcc_assert (integer_zerop (fd->loop.n1));
1434
1435 /* When tiling, the first operand of the tile clause applies to the
1436 innermost loop, and we work outwards from there. Seems
1437 backwards, but whatever. */
1438 for (ix = fd->collapse; ix--;)
1439 {
1440 const omp_for_data_loop *loop = &fd->loops[ix];
1441
1442 tree iter_type = TREE_TYPE (loop->v);
1443 tree diff_type = iter_type;
1444 tree plus_type = iter_type;
1445
1446 gcc_assert (loop->cond_code == fd->loop.cond_code);
1447
1448 if (POINTER_TYPE_P (iter_type))
1449 plus_type = sizetype;
1450 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1451 diff_type = signed_type_for (diff_type);
1452
1453 if (tiling)
1454 {
1455 tree num = build_int_cst (integer_type_node, fd->collapse);
1456 tree loop_no = build_int_cst (integer_type_node, ix);
1457 tree tile = TREE_VALUE (tiling);
1458 gcall *call
1459 = gimple_build_call_internal (IFN_GOACC_TILE, 5, num, loop_no, tile,
1460 /* gwv-outer=*/integer_zero_node,
1461 /* gwv-inner=*/integer_zero_node);
1462
1463 counts[ix].outer = create_tmp_var (iter_type, ".outer");
1464 counts[ix].tile = create_tmp_var (diff_type, ".tile");
1465 gimple_call_set_lhs (call, counts[ix].tile);
1466 gimple_set_location (call, loc);
1467 gsi_insert_before (gsi, call, GSI_SAME_STMT);
1468
1469 tiling = TREE_CHAIN (tiling);
1470 }
1471 else
1472 {
1473 counts[ix].tile = NULL;
1474 counts[ix].outer = loop->v;
1475 }
1476
1477 tree b = loop->n1;
1478 tree e = loop->n2;
1479 tree s = loop->step;
1480 bool up = loop->cond_code == LT_EXPR;
1481 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1482 bool negating;
1483 tree expr;
1484
1485 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1486 true, GSI_SAME_STMT);
1487 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1488 true, GSI_SAME_STMT);
1489
1490 /* Convert the step, avoiding possible unsigned->signed overflow. */
1491 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1492 if (negating)
1493 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1494 s = fold_convert (diff_type, s);
1495 if (negating)
1496 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1497 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1498 true, GSI_SAME_STMT);
1499
1500 /* Determine the range, avoiding possible unsigned->signed overflow. */
1501 negating = !up && TYPE_UNSIGNED (iter_type);
1502 expr = fold_build2 (MINUS_EXPR, plus_type,
1503 fold_convert (plus_type, negating ? b : e),
1504 fold_convert (plus_type, negating ? e : b));
1505 expr = fold_convert (diff_type, expr);
1506 if (negating)
1507 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1508 tree range = force_gimple_operand_gsi
1509 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1510
1511 /* Determine number of iterations. */
1512 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1513 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1514 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1515
1516 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1517 true, GSI_SAME_STMT);
1518
1519 counts[ix].base = b;
1520 counts[ix].iters = iters;
1521 counts[ix].step = s;
1522
1523 total = fold_build2 (MULT_EXPR, bound_type, total,
1524 fold_convert (bound_type, iters));
1525 }
1526
1527 return total;
1528 }
1529
1530 /* Emit initializers for collapsed loop members. INNER is true if
1531 this is for the element loop of a TILE. IVAR is the outer
1532 loop iteration variable, from which collapsed loop iteration values
1533 are calculated. COUNTS array has been initialized by
1534 expand_oacc_collapse_inits. */
1535
1536 static void
1537 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
1538 gimple_stmt_iterator *gsi,
1539 const oacc_collapse *counts, tree ivar)
1540 {
1541 tree ivar_type = TREE_TYPE (ivar);
1542
1543 /* The most rapidly changing iteration variable is the innermost
1544 one. */
1545 for (int ix = fd->collapse; ix--;)
1546 {
1547 const omp_for_data_loop *loop = &fd->loops[ix];
1548 const oacc_collapse *collapse = &counts[ix];
1549 tree v = inner ? loop->v : collapse->outer;
1550 tree iter_type = TREE_TYPE (v);
1551 tree diff_type = TREE_TYPE (collapse->step);
1552 tree plus_type = iter_type;
1553 enum tree_code plus_code = PLUS_EXPR;
1554 tree expr;
1555
1556 if (POINTER_TYPE_P (iter_type))
1557 {
1558 plus_code = POINTER_PLUS_EXPR;
1559 plus_type = sizetype;
1560 }
1561
1562 expr = ivar;
1563 if (ix)
1564 {
1565 tree mod = fold_convert (ivar_type, collapse->iters);
1566 ivar = fold_build2 (TRUNC_DIV_EXPR, ivar_type, expr, mod);
1567 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, expr, mod);
1568 ivar = force_gimple_operand_gsi (gsi, ivar, true, NULL_TREE,
1569 true, GSI_SAME_STMT);
1570 }
1571
1572 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1573 collapse->step);
1574 expr = fold_build2 (plus_code, iter_type,
1575 inner ? collapse->outer : collapse->base,
1576 fold_convert (plus_type, expr));
1577 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1578 true, GSI_SAME_STMT);
1579 gassign *ass = gimple_build_assign (v, expr);
1580 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1581 }
1582 }
1583
1584 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1585 of the combined collapse > 1 loop constructs, generate code like:
1586 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1587 if (cond3 is <)
1588 adj = STEP3 - 1;
1589 else
1590 adj = STEP3 + 1;
1591 count3 = (adj + N32 - N31) / STEP3;
1592 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1593 if (cond2 is <)
1594 adj = STEP2 - 1;
1595 else
1596 adj = STEP2 + 1;
1597 count2 = (adj + N22 - N21) / STEP2;
1598 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1599 if (cond1 is <)
1600 adj = STEP1 - 1;
1601 else
1602 adj = STEP1 + 1;
1603 count1 = (adj + N12 - N11) / STEP1;
1604 count = count1 * count2 * count3;
1605 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1606 count = 0;
1607 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1608 of the combined loop constructs, just initialize COUNTS array
1609 from the _looptemp_ clauses. */
1610
1611 /* NOTE: It *could* be better to moosh all of the BBs together,
1612 creating one larger BB with all the computation and the unexpected
1613 jump at the end. I.e.
1614
1615 bool zero3, zero2, zero1, zero;
1616
1617 zero3 = N32 c3 N31;
1618 count3 = (N32 - N31) /[cl] STEP3;
1619 zero2 = N22 c2 N21;
1620 count2 = (N22 - N21) /[cl] STEP2;
1621 zero1 = N12 c1 N11;
1622 count1 = (N12 - N11) /[cl] STEP1;
1623 zero = zero3 || zero2 || zero1;
1624 count = count1 * count2 * count3;
1625 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1626
1627 After all, we expect the zero=false, and thus we expect to have to
1628 evaluate all of the comparison expressions, so short-circuiting
1629 oughtn't be a win. Since the condition isn't protecting a
1630 denominator, we're not concerned about divide-by-zero, so we can
1631 fully evaluate count even if a numerator turned out to be wrong.
1632
1633 It seems like putting this all together would create much better
1634 scheduling opportunities, and less pressure on the chip's branch
1635 predictor. */
1636
1637 static void
1638 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1639 basic_block &entry_bb, tree *counts,
1640 basic_block &zero_iter1_bb, int &first_zero_iter1,
1641 basic_block &zero_iter2_bb, int &first_zero_iter2,
1642 basic_block &l2_dom_bb)
1643 {
1644 tree t, type = TREE_TYPE (fd->loop.v);
1645 edge e, ne;
1646 int i;
1647
1648 /* Collapsed loops need work for expansion into SSA form. */
1649 gcc_assert (!gimple_in_ssa_p (cfun));
1650
1651 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1652 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1653 {
1654 gcc_assert (fd->ordered == 0);
1655 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1656 isn't supposed to be handled, as the inner loop doesn't
1657 use it. */
1658 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1659 OMP_CLAUSE__LOOPTEMP_);
1660 gcc_assert (innerc);
1661 for (i = 0; i < fd->collapse; i++)
1662 {
1663 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1664 OMP_CLAUSE__LOOPTEMP_);
1665 gcc_assert (innerc);
1666 if (i)
1667 counts[i] = OMP_CLAUSE_DECL (innerc);
1668 else
1669 counts[0] = NULL_TREE;
1670 }
1671 return;
1672 }
1673
1674 for (i = fd->collapse; i < fd->ordered; i++)
1675 {
1676 tree itype = TREE_TYPE (fd->loops[i].v);
1677 counts[i] = NULL_TREE;
1678 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1679 fold_convert (itype, fd->loops[i].n1),
1680 fold_convert (itype, fd->loops[i].n2));
1681 if (t && integer_zerop (t))
1682 {
1683 for (i = fd->collapse; i < fd->ordered; i++)
1684 counts[i] = build_int_cst (type, 0);
1685 break;
1686 }
1687 }
1688 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1689 {
1690 tree itype = TREE_TYPE (fd->loops[i].v);
1691
1692 if (i >= fd->collapse && counts[i])
1693 continue;
1694 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1695 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1696 fold_convert (itype, fd->loops[i].n1),
1697 fold_convert (itype, fd->loops[i].n2)))
1698 == NULL_TREE || !integer_onep (t)))
1699 {
1700 gcond *cond_stmt;
1701 tree n1, n2;
1702 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1703 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1704 true, GSI_SAME_STMT);
1705 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1706 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1707 true, GSI_SAME_STMT);
1708 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1709 NULL_TREE, NULL_TREE);
1710 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1711 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1712 expand_omp_regimplify_p, NULL, NULL)
1713 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1714 expand_omp_regimplify_p, NULL, NULL))
1715 {
1716 *gsi = gsi_for_stmt (cond_stmt);
1717 gimple_regimplify_operands (cond_stmt, gsi);
1718 }
1719 e = split_block (entry_bb, cond_stmt);
1720 basic_block &zero_iter_bb
1721 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1722 int &first_zero_iter
1723 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1724 if (zero_iter_bb == NULL)
1725 {
1726 gassign *assign_stmt;
1727 first_zero_iter = i;
1728 zero_iter_bb = create_empty_bb (entry_bb);
1729 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1730 *gsi = gsi_after_labels (zero_iter_bb);
1731 if (i < fd->collapse)
1732 assign_stmt = gimple_build_assign (fd->loop.n2,
1733 build_zero_cst (type));
1734 else
1735 {
1736 counts[i] = create_tmp_reg (type, ".count");
1737 assign_stmt
1738 = gimple_build_assign (counts[i], build_zero_cst (type));
1739 }
1740 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1741 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1742 entry_bb);
1743 }
1744 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1745 ne->probability = profile_probability::very_unlikely ();
1746 e->flags = EDGE_TRUE_VALUE;
1747 e->probability = ne->probability.invert ();
1748 if (l2_dom_bb == NULL)
1749 l2_dom_bb = entry_bb;
1750 entry_bb = e->dest;
1751 *gsi = gsi_last_bb (entry_bb);
1752 }
1753
1754 if (POINTER_TYPE_P (itype))
1755 itype = signed_type_for (itype);
1756 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1757 ? -1 : 1));
1758 t = fold_build2 (PLUS_EXPR, itype,
1759 fold_convert (itype, fd->loops[i].step), t);
1760 t = fold_build2 (PLUS_EXPR, itype, t,
1761 fold_convert (itype, fd->loops[i].n2));
1762 t = fold_build2 (MINUS_EXPR, itype, t,
1763 fold_convert (itype, fd->loops[i].n1));
1764 /* ?? We could probably use CEIL_DIV_EXPR instead of
1765 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1766 generate the same code in the end because generically we
1767 don't know that the values involved must be negative for
1768 GT?? */
1769 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1770 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1771 fold_build1 (NEGATE_EXPR, itype, t),
1772 fold_build1 (NEGATE_EXPR, itype,
1773 fold_convert (itype,
1774 fd->loops[i].step)));
1775 else
1776 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1777 fold_convert (itype, fd->loops[i].step));
1778 t = fold_convert (type, t);
1779 if (TREE_CODE (t) == INTEGER_CST)
1780 counts[i] = t;
1781 else
1782 {
1783 if (i < fd->collapse || i != first_zero_iter2)
1784 counts[i] = create_tmp_reg (type, ".count");
1785 expand_omp_build_assign (gsi, counts[i], t);
1786 }
1787 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1788 {
1789 if (i == 0)
1790 t = counts[0];
1791 else
1792 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1793 expand_omp_build_assign (gsi, fd->loop.n2, t);
1794 }
1795 }
1796 }
1797
1798 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1799 T = V;
1800 V3 = N31 + (T % count3) * STEP3;
1801 T = T / count3;
1802 V2 = N21 + (T % count2) * STEP2;
1803 T = T / count2;
1804 V1 = N11 + T * STEP1;
1805 if this loop doesn't have an inner loop construct combined with it.
1806 If it does have an inner loop construct combined with it and the
1807 iteration count isn't known constant, store values from counts array
1808 into its _looptemp_ temporaries instead. */
1809
1810 static void
1811 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1812 tree *counts, gimple *inner_stmt, tree startvar)
1813 {
1814 int i;
1815 if (gimple_omp_for_combined_p (fd->for_stmt))
1816 {
1817 /* If fd->loop.n2 is constant, then no propagation of the counts
1818 is needed, they are constant. */
1819 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1820 return;
1821
1822 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1823 ? gimple_omp_taskreg_clauses (inner_stmt)
1824 : gimple_omp_for_clauses (inner_stmt);
1825 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1826 isn't supposed to be handled, as the inner loop doesn't
1827 use it. */
1828 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1829 gcc_assert (innerc);
1830 for (i = 0; i < fd->collapse; i++)
1831 {
1832 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1833 OMP_CLAUSE__LOOPTEMP_);
1834 gcc_assert (innerc);
1835 if (i)
1836 {
1837 tree tem = OMP_CLAUSE_DECL (innerc);
1838 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 gassign *stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1843 }
1844 }
1845 return;
1846 }
1847
1848 tree type = TREE_TYPE (fd->loop.v);
1849 tree tem = create_tmp_reg (type, ".tem");
1850 gassign *stmt = gimple_build_assign (tem, startvar);
1851 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1852
1853 for (i = fd->collapse - 1; i >= 0; i--)
1854 {
1855 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1856 itype = vtype;
1857 if (POINTER_TYPE_P (vtype))
1858 itype = signed_type_for (vtype);
1859 if (i != 0)
1860 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1861 else
1862 t = tem;
1863 t = fold_convert (itype, t);
1864 t = fold_build2 (MULT_EXPR, itype, t,
1865 fold_convert (itype, fd->loops[i].step));
1866 if (POINTER_TYPE_P (vtype))
1867 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1868 else
1869 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1870 t = force_gimple_operand_gsi (gsi, t,
1871 DECL_P (fd->loops[i].v)
1872 && TREE_ADDRESSABLE (fd->loops[i].v),
1873 NULL_TREE, false,
1874 GSI_CONTINUE_LINKING);
1875 stmt = gimple_build_assign (fd->loops[i].v, t);
1876 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1877 if (i != 0)
1878 {
1879 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1880 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1881 false, GSI_CONTINUE_LINKING);
1882 stmt = gimple_build_assign (tem, t);
1883 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1884 }
1885 }
1886 }
1887
1888 /* Helper function for expand_omp_for_*. Generate code like:
1889 L10:
1890 V3 += STEP3;
1891 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1892 L11:
1893 V3 = N31;
1894 V2 += STEP2;
1895 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1896 L12:
1897 V2 = N21;
1898 V1 += STEP1;
1899 goto BODY_BB; */
1900
1901 static basic_block
1902 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1903 basic_block body_bb)
1904 {
1905 basic_block last_bb, bb, collapse_bb = NULL;
1906 int i;
1907 gimple_stmt_iterator gsi;
1908 edge e;
1909 tree t;
1910 gimple *stmt;
1911
1912 last_bb = cont_bb;
1913 for (i = fd->collapse - 1; i >= 0; i--)
1914 {
1915 tree vtype = TREE_TYPE (fd->loops[i].v);
1916
1917 bb = create_empty_bb (last_bb);
1918 add_bb_to_loop (bb, last_bb->loop_father);
1919 gsi = gsi_start_bb (bb);
1920
1921 if (i < fd->collapse - 1)
1922 {
1923 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1924 e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
1925
1926 t = fd->loops[i + 1].n1;
1927 t = force_gimple_operand_gsi (&gsi, t,
1928 DECL_P (fd->loops[i + 1].v)
1929 && TREE_ADDRESSABLE (fd->loops[i
1930 + 1].v),
1931 NULL_TREE, false,
1932 GSI_CONTINUE_LINKING);
1933 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1934 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1935 }
1936 else
1937 collapse_bb = bb;
1938
1939 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1940
1941 if (POINTER_TYPE_P (vtype))
1942 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1943 else
1944 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1945 t = force_gimple_operand_gsi (&gsi, t,
1946 DECL_P (fd->loops[i].v)
1947 && TREE_ADDRESSABLE (fd->loops[i].v),
1948 NULL_TREE, false, GSI_CONTINUE_LINKING);
1949 stmt = gimple_build_assign (fd->loops[i].v, t);
1950 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1951
1952 if (i > 0)
1953 {
1954 t = fd->loops[i].n2;
1955 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1956 false, GSI_CONTINUE_LINKING);
1957 tree v = fd->loops[i].v;
1958 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1959 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1960 false, GSI_CONTINUE_LINKING);
1961 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1962 stmt = gimple_build_cond_empty (t);
1963 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1964 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1965 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
1966 }
1967 else
1968 make_edge (bb, body_bb, EDGE_FALLTHRU);
1969 last_bb = bb;
1970 }
1971
1972 return collapse_bb;
1973 }
1974
1975 /* Expand #pragma omp ordered depend(source). */
1976
1977 static void
1978 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1979 tree *counts, location_t loc)
1980 {
1981 enum built_in_function source_ix
1982 = fd->iter_type == long_integer_type_node
1983 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1984 gimple *g
1985 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1986 build_fold_addr_expr (counts[fd->ordered]));
1987 gimple_set_location (g, loc);
1988 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1989 }
1990
1991 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1992
1993 static void
1994 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1995 tree *counts, tree c, location_t loc)
1996 {
1997 auto_vec<tree, 10> args;
1998 enum built_in_function sink_ix
1999 = fd->iter_type == long_integer_type_node
2000 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
2001 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
2002 int i;
2003 gimple_stmt_iterator gsi2 = *gsi;
2004 bool warned_step = false;
2005
2006 for (i = 0; i < fd->ordered; i++)
2007 {
2008 tree step = NULL_TREE;
2009 off = TREE_PURPOSE (deps);
2010 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2011 {
2012 step = TREE_OPERAND (off, 1);
2013 off = TREE_OPERAND (off, 0);
2014 }
2015 if (!integer_zerop (off))
2016 {
2017 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2018 || fd->loops[i].cond_code == GT_EXPR);
2019 bool forward = fd->loops[i].cond_code == LT_EXPR;
2020 if (step)
2021 {
2022 /* Non-simple Fortran DO loops. If step is variable,
2023 we don't know at compile even the direction, so can't
2024 warn. */
2025 if (TREE_CODE (step) != INTEGER_CST)
2026 break;
2027 forward = tree_int_cst_sgn (step) != -1;
2028 }
2029 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2030 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
2031 "lexically later iteration");
2032 break;
2033 }
2034 deps = TREE_CHAIN (deps);
2035 }
2036 /* If all offsets corresponding to the collapsed loops are zero,
2037 this depend clause can be ignored. FIXME: but there is still a
2038 flush needed. We need to emit one __sync_synchronize () for it
2039 though (perhaps conditionally)? Solve this together with the
2040 conservative dependence folding optimization.
2041 if (i >= fd->collapse)
2042 return; */
2043
2044 deps = OMP_CLAUSE_DECL (c);
2045 gsi_prev (&gsi2);
2046 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2047 edge e2 = split_block_after_labels (e1->dest);
2048
2049 gsi2 = gsi_after_labels (e1->dest);
2050 *gsi = gsi_last_bb (e1->src);
2051 for (i = 0; i < fd->ordered; i++)
2052 {
2053 tree itype = TREE_TYPE (fd->loops[i].v);
2054 tree step = NULL_TREE;
2055 tree orig_off = NULL_TREE;
2056 if (POINTER_TYPE_P (itype))
2057 itype = sizetype;
2058 if (i)
2059 deps = TREE_CHAIN (deps);
2060 off = TREE_PURPOSE (deps);
2061 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2062 {
2063 step = TREE_OPERAND (off, 1);
2064 off = TREE_OPERAND (off, 0);
2065 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2066 && integer_onep (fd->loops[i].step)
2067 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2068 }
2069 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2070 if (step)
2071 {
2072 off = fold_convert_loc (loc, itype, off);
2073 orig_off = off;
2074 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2075 }
2076
2077 if (integer_zerop (off))
2078 t = boolean_true_node;
2079 else
2080 {
2081 tree a;
2082 tree co = fold_convert_loc (loc, itype, off);
2083 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2084 {
2085 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2086 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2087 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2088 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2089 co);
2090 }
2091 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2092 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2093 fd->loops[i].v, co);
2094 else
2095 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2096 fd->loops[i].v, co);
2097 if (step)
2098 {
2099 tree t1, t2;
2100 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2101 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2102 fd->loops[i].n1);
2103 else
2104 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2105 fd->loops[i].n2);
2106 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2107 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2108 fd->loops[i].n2);
2109 else
2110 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2111 fd->loops[i].n1);
2112 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2113 step, build_int_cst (TREE_TYPE (step), 0));
2114 if (TREE_CODE (step) != INTEGER_CST)
2115 {
2116 t1 = unshare_expr (t1);
2117 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2118 false, GSI_CONTINUE_LINKING);
2119 t2 = unshare_expr (t2);
2120 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2121 false, GSI_CONTINUE_LINKING);
2122 }
2123 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2124 t, t2, t1);
2125 }
2126 else if (fd->loops[i].cond_code == LT_EXPR)
2127 {
2128 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2129 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2130 fd->loops[i].n1);
2131 else
2132 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2133 fd->loops[i].n2);
2134 }
2135 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2136 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2137 fd->loops[i].n2);
2138 else
2139 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2140 fd->loops[i].n1);
2141 }
2142 if (cond)
2143 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2144 else
2145 cond = t;
2146
2147 off = fold_convert_loc (loc, itype, off);
2148
2149 if (step
2150 || (fd->loops[i].cond_code == LT_EXPR
2151 ? !integer_onep (fd->loops[i].step)
2152 : !integer_minus_onep (fd->loops[i].step)))
2153 {
2154 if (step == NULL_TREE
2155 && TYPE_UNSIGNED (itype)
2156 && fd->loops[i].cond_code == GT_EXPR)
2157 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2158 fold_build1_loc (loc, NEGATE_EXPR, itype,
2159 s));
2160 else
2161 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2162 orig_off ? orig_off : off, s);
2163 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2164 build_int_cst (itype, 0));
2165 if (integer_zerop (t) && !warned_step)
2166 {
2167 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2168 "in the iteration space");
2169 warned_step = true;
2170 }
2171 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2172 cond, t);
2173 }
2174
2175 if (i <= fd->collapse - 1 && fd->collapse > 1)
2176 t = fd->loop.v;
2177 else if (counts[i])
2178 t = counts[i];
2179 else
2180 {
2181 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2182 fd->loops[i].v, fd->loops[i].n1);
2183 t = fold_convert_loc (loc, fd->iter_type, t);
2184 }
2185 if (step)
2186 /* We have divided off by step already earlier. */;
2187 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2188 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2189 fold_build1_loc (loc, NEGATE_EXPR, itype,
2190 s));
2191 else
2192 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2193 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2194 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2195 off = fold_convert_loc (loc, fd->iter_type, off);
2196 if (i <= fd->collapse - 1 && fd->collapse > 1)
2197 {
2198 if (i)
2199 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2200 off);
2201 if (i < fd->collapse - 1)
2202 {
2203 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2204 counts[i]);
2205 continue;
2206 }
2207 }
2208 off = unshare_expr (off);
2209 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2210 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2211 true, GSI_SAME_STMT);
2212 args.safe_push (t);
2213 }
2214 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2215 gimple_set_location (g, loc);
2216 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2217
2218 cond = unshare_expr (cond);
2219 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2220 GSI_CONTINUE_LINKING);
2221 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2222 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2223 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2224 e1->probability = e3->probability.invert ();
2225 e1->flags = EDGE_TRUE_VALUE;
2226 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2227
2228 *gsi = gsi_after_labels (e2->dest);
2229 }
2230
2231 /* Expand all #pragma omp ordered depend(source) and
2232 #pragma omp ordered depend(sink:...) constructs in the current
2233 #pragma omp for ordered(n) region. */
2234
2235 static void
2236 expand_omp_ordered_source_sink (struct omp_region *region,
2237 struct omp_for_data *fd, tree *counts,
2238 basic_block cont_bb)
2239 {
2240 struct omp_region *inner;
2241 int i;
2242 for (i = fd->collapse - 1; i < fd->ordered; i++)
2243 if (i == fd->collapse - 1 && fd->collapse > 1)
2244 counts[i] = NULL_TREE;
2245 else if (i >= fd->collapse && !cont_bb)
2246 counts[i] = build_zero_cst (fd->iter_type);
2247 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2248 && integer_onep (fd->loops[i].step))
2249 counts[i] = NULL_TREE;
2250 else
2251 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2252 tree atype
2253 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2254 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2255 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2256
2257 for (inner = region->inner; inner; inner = inner->next)
2258 if (inner->type == GIMPLE_OMP_ORDERED)
2259 {
2260 gomp_ordered *ord_stmt = inner->ord_stmt;
2261 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2262 location_t loc = gimple_location (ord_stmt);
2263 tree c;
2264 for (c = gimple_omp_ordered_clauses (ord_stmt);
2265 c; c = OMP_CLAUSE_CHAIN (c))
2266 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2267 break;
2268 if (c)
2269 expand_omp_ordered_source (&gsi, fd, counts, loc);
2270 for (c = gimple_omp_ordered_clauses (ord_stmt);
2271 c; c = OMP_CLAUSE_CHAIN (c))
2272 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2273 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2274 gsi_remove (&gsi, true);
2275 }
2276 }
2277
2278 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2279 collapsed. */
2280
2281 static basic_block
2282 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2283 basic_block cont_bb, basic_block body_bb,
2284 bool ordered_lastprivate)
2285 {
2286 if (fd->ordered == fd->collapse)
2287 return cont_bb;
2288
2289 if (!cont_bb)
2290 {
2291 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2292 for (int i = fd->collapse; i < fd->ordered; i++)
2293 {
2294 tree type = TREE_TYPE (fd->loops[i].v);
2295 tree n1 = fold_convert (type, fd->loops[i].n1);
2296 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2297 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2298 size_int (i - fd->collapse + 1),
2299 NULL_TREE, NULL_TREE);
2300 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2301 }
2302 return NULL;
2303 }
2304
2305 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2306 {
2307 tree t, type = TREE_TYPE (fd->loops[i].v);
2308 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2309 expand_omp_build_assign (&gsi, fd->loops[i].v,
2310 fold_convert (type, fd->loops[i].n1));
2311 if (counts[i])
2312 expand_omp_build_assign (&gsi, counts[i],
2313 build_zero_cst (fd->iter_type));
2314 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2315 size_int (i - fd->collapse + 1),
2316 NULL_TREE, NULL_TREE);
2317 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2318 if (!gsi_end_p (gsi))
2319 gsi_prev (&gsi);
2320 else
2321 gsi = gsi_last_bb (body_bb);
2322 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2323 basic_block new_body = e1->dest;
2324 if (body_bb == cont_bb)
2325 cont_bb = new_body;
2326 edge e2 = NULL;
2327 basic_block new_header;
2328 if (EDGE_COUNT (cont_bb->preds) > 0)
2329 {
2330 gsi = gsi_last_bb (cont_bb);
2331 if (POINTER_TYPE_P (type))
2332 t = fold_build_pointer_plus (fd->loops[i].v,
2333 fold_convert (sizetype,
2334 fd->loops[i].step));
2335 else
2336 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2337 fold_convert (type, fd->loops[i].step));
2338 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2339 if (counts[i])
2340 {
2341 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2342 build_int_cst (fd->iter_type, 1));
2343 expand_omp_build_assign (&gsi, counts[i], t);
2344 t = counts[i];
2345 }
2346 else
2347 {
2348 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2349 fd->loops[i].v, fd->loops[i].n1);
2350 t = fold_convert (fd->iter_type, t);
2351 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2352 true, GSI_SAME_STMT);
2353 }
2354 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2355 size_int (i - fd->collapse + 1),
2356 NULL_TREE, NULL_TREE);
2357 expand_omp_build_assign (&gsi, aref, t);
2358 gsi_prev (&gsi);
2359 e2 = split_block (cont_bb, gsi_stmt (gsi));
2360 new_header = e2->dest;
2361 }
2362 else
2363 new_header = cont_bb;
2364 gsi = gsi_after_labels (new_header);
2365 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2366 true, GSI_SAME_STMT);
2367 tree n2
2368 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2369 true, NULL_TREE, true, GSI_SAME_STMT);
2370 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2371 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2372 edge e3 = split_block (new_header, gsi_stmt (gsi));
2373 cont_bb = e3->dest;
2374 remove_edge (e1);
2375 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2376 e3->flags = EDGE_FALSE_VALUE;
2377 e3->probability = profile_probability::guessed_always ().apply_scale (1, 8);
2378 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2379 e1->probability = e3->probability.invert ();
2380
2381 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2382 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2383
2384 if (e2)
2385 {
2386 struct loop *loop = alloc_loop ();
2387 loop->header = new_header;
2388 loop->latch = e2->src;
2389 add_loop (loop, body_bb->loop_father);
2390 }
2391 }
2392
2393 /* If there are any lastprivate clauses and it is possible some loops
2394 might have zero iterations, ensure all the decls are initialized,
2395 otherwise we could crash evaluating C++ class iterators with lastprivate
2396 clauses. */
2397 bool need_inits = false;
2398 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2399 if (need_inits)
2400 {
2401 tree type = TREE_TYPE (fd->loops[i].v);
2402 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2403 expand_omp_build_assign (&gsi, fd->loops[i].v,
2404 fold_convert (type, fd->loops[i].n1));
2405 }
2406 else
2407 {
2408 tree type = TREE_TYPE (fd->loops[i].v);
2409 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2410 boolean_type_node,
2411 fold_convert (type, fd->loops[i].n1),
2412 fold_convert (type, fd->loops[i].n2));
2413 if (!integer_onep (this_cond))
2414 need_inits = true;
2415 }
2416
2417 return cont_bb;
2418 }
2419
2420 /* A subroutine of expand_omp_for. Generate code for a parallel
2421 loop with any schedule. Given parameters:
2422
2423 for (V = N1; V cond N2; V += STEP) BODY;
2424
2425 where COND is "<" or ">", we generate pseudocode
2426
2427 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2428 if (more) goto L0; else goto L3;
2429 L0:
2430 V = istart0;
2431 iend = iend0;
2432 L1:
2433 BODY;
2434 V += STEP;
2435 if (V cond iend) goto L1; else goto L2;
2436 L2:
2437 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2438 L3:
2439
2440 If this is a combined omp parallel loop, instead of the call to
2441 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2442 If this is gimple_omp_for_combined_p loop, then instead of assigning
2443 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2444 inner GIMPLE_OMP_FOR and V += STEP; and
2445 if (V cond iend) goto L1; else goto L2; are removed.
2446
2447 For collapsed loops, given parameters:
2448 collapse(3)
2449 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2450 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2451 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2452 BODY;
2453
2454 we generate pseudocode
2455
2456 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2457 if (cond3 is <)
2458 adj = STEP3 - 1;
2459 else
2460 adj = STEP3 + 1;
2461 count3 = (adj + N32 - N31) / STEP3;
2462 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2463 if (cond2 is <)
2464 adj = STEP2 - 1;
2465 else
2466 adj = STEP2 + 1;
2467 count2 = (adj + N22 - N21) / STEP2;
2468 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2469 if (cond1 is <)
2470 adj = STEP1 - 1;
2471 else
2472 adj = STEP1 + 1;
2473 count1 = (adj + N12 - N11) / STEP1;
2474 count = count1 * count2 * count3;
2475 goto Z1;
2476 Z0:
2477 count = 0;
2478 Z1:
2479 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2480 if (more) goto L0; else goto L3;
2481 L0:
2482 V = istart0;
2483 T = V;
2484 V3 = N31 + (T % count3) * STEP3;
2485 T = T / count3;
2486 V2 = N21 + (T % count2) * STEP2;
2487 T = T / count2;
2488 V1 = N11 + T * STEP1;
2489 iend = iend0;
2490 L1:
2491 BODY;
2492 V += 1;
2493 if (V < iend) goto L10; else goto L2;
2494 L10:
2495 V3 += STEP3;
2496 if (V3 cond3 N32) goto L1; else goto L11;
2497 L11:
2498 V3 = N31;
2499 V2 += STEP2;
2500 if (V2 cond2 N22) goto L1; else goto L12;
2501 L12:
2502 V2 = N21;
2503 V1 += STEP1;
2504 goto L1;
2505 L2:
2506 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2507 L3:
2508
2509 */
2510
2511 static void
2512 expand_omp_for_generic (struct omp_region *region,
2513 struct omp_for_data *fd,
2514 enum built_in_function start_fn,
2515 enum built_in_function next_fn,
2516 gimple *inner_stmt)
2517 {
2518 tree type, istart0, iend0, iend;
2519 tree t, vmain, vback, bias = NULL_TREE;
2520 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2521 basic_block l2_bb = NULL, l3_bb = NULL;
2522 gimple_stmt_iterator gsi;
2523 gassign *assign_stmt;
2524 bool in_combined_parallel = is_combined_parallel (region);
2525 bool broken_loop = region->cont == NULL;
2526 edge e, ne;
2527 tree *counts = NULL;
2528 int i;
2529 bool ordered_lastprivate = false;
2530
2531 gcc_assert (!broken_loop || !in_combined_parallel);
2532 gcc_assert (fd->iter_type == long_integer_type_node
2533 || !in_combined_parallel);
2534
2535 entry_bb = region->entry;
2536 cont_bb = region->cont;
2537 collapse_bb = NULL;
2538 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2539 gcc_assert (broken_loop
2540 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2541 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2542 l1_bb = single_succ (l0_bb);
2543 if (!broken_loop)
2544 {
2545 l2_bb = create_empty_bb (cont_bb);
2546 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2547 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2548 == l1_bb));
2549 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2550 }
2551 else
2552 l2_bb = NULL;
2553 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2554 exit_bb = region->exit;
2555
2556 gsi = gsi_last_bb (entry_bb);
2557
2558 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2559 if (fd->ordered
2560 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2561 OMP_CLAUSE_LASTPRIVATE))
2562 ordered_lastprivate = false;
2563 if (fd->collapse > 1 || fd->ordered)
2564 {
2565 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2566 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2567
2568 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2569 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2570 zero_iter1_bb, first_zero_iter1,
2571 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2572
2573 if (zero_iter1_bb)
2574 {
2575 /* Some counts[i] vars might be uninitialized if
2576 some loop has zero iterations. But the body shouldn't
2577 be executed in that case, so just avoid uninit warnings. */
2578 for (i = first_zero_iter1;
2579 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2580 if (SSA_VAR_P (counts[i]))
2581 TREE_NO_WARNING (counts[i]) = 1;
2582 gsi_prev (&gsi);
2583 e = split_block (entry_bb, gsi_stmt (gsi));
2584 entry_bb = e->dest;
2585 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2586 gsi = gsi_last_bb (entry_bb);
2587 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2588 get_immediate_dominator (CDI_DOMINATORS,
2589 zero_iter1_bb));
2590 }
2591 if (zero_iter2_bb)
2592 {
2593 /* Some counts[i] vars might be uninitialized if
2594 some loop has zero iterations. But the body shouldn't
2595 be executed in that case, so just avoid uninit warnings. */
2596 for (i = first_zero_iter2; i < fd->ordered; i++)
2597 if (SSA_VAR_P (counts[i]))
2598 TREE_NO_WARNING (counts[i]) = 1;
2599 if (zero_iter1_bb)
2600 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2601 else
2602 {
2603 gsi_prev (&gsi);
2604 e = split_block (entry_bb, gsi_stmt (gsi));
2605 entry_bb = e->dest;
2606 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2607 gsi = gsi_last_bb (entry_bb);
2608 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2609 get_immediate_dominator
2610 (CDI_DOMINATORS, zero_iter2_bb));
2611 }
2612 }
2613 if (fd->collapse == 1)
2614 {
2615 counts[0] = fd->loop.n2;
2616 fd->loop = fd->loops[0];
2617 }
2618 }
2619
2620 type = TREE_TYPE (fd->loop.v);
2621 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2622 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2623 TREE_ADDRESSABLE (istart0) = 1;
2624 TREE_ADDRESSABLE (iend0) = 1;
2625
2626 /* See if we need to bias by LLONG_MIN. */
2627 if (fd->iter_type == long_long_unsigned_type_node
2628 && TREE_CODE (type) == INTEGER_TYPE
2629 && !TYPE_UNSIGNED (type)
2630 && fd->ordered == 0)
2631 {
2632 tree n1, n2;
2633
2634 if (fd->loop.cond_code == LT_EXPR)
2635 {
2636 n1 = fd->loop.n1;
2637 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2638 }
2639 else
2640 {
2641 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2642 n2 = fd->loop.n1;
2643 }
2644 if (TREE_CODE (n1) != INTEGER_CST
2645 || TREE_CODE (n2) != INTEGER_CST
2646 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2647 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2648 }
2649
2650 gimple_stmt_iterator gsif = gsi;
2651 gsi_prev (&gsif);
2652
2653 tree arr = NULL_TREE;
2654 if (in_combined_parallel)
2655 {
2656 gcc_assert (fd->ordered == 0);
2657 /* In a combined parallel loop, emit a call to
2658 GOMP_loop_foo_next. */
2659 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2660 build_fold_addr_expr (istart0),
2661 build_fold_addr_expr (iend0));
2662 }
2663 else
2664 {
2665 tree t0, t1, t2, t3, t4;
2666 /* If this is not a combined parallel loop, emit a call to
2667 GOMP_loop_foo_start in ENTRY_BB. */
2668 t4 = build_fold_addr_expr (iend0);
2669 t3 = build_fold_addr_expr (istart0);
2670 if (fd->ordered)
2671 {
2672 t0 = build_int_cst (unsigned_type_node,
2673 fd->ordered - fd->collapse + 1);
2674 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2675 fd->ordered
2676 - fd->collapse + 1),
2677 ".omp_counts");
2678 DECL_NAMELESS (arr) = 1;
2679 TREE_ADDRESSABLE (arr) = 1;
2680 TREE_STATIC (arr) = 1;
2681 vec<constructor_elt, va_gc> *v;
2682 vec_alloc (v, fd->ordered - fd->collapse + 1);
2683 int idx;
2684
2685 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2686 {
2687 tree c;
2688 if (idx == 0 && fd->collapse > 1)
2689 c = fd->loop.n2;
2690 else
2691 c = counts[idx + fd->collapse - 1];
2692 tree purpose = size_int (idx);
2693 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2694 if (TREE_CODE (c) != INTEGER_CST)
2695 TREE_STATIC (arr) = 0;
2696 }
2697
2698 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2699 if (!TREE_STATIC (arr))
2700 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2701 void_type_node, arr),
2702 true, NULL_TREE, true, GSI_SAME_STMT);
2703 t1 = build_fold_addr_expr (arr);
2704 t2 = NULL_TREE;
2705 }
2706 else
2707 {
2708 t2 = fold_convert (fd->iter_type, fd->loop.step);
2709 t1 = fd->loop.n2;
2710 t0 = fd->loop.n1;
2711 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2712 {
2713 tree innerc
2714 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2715 OMP_CLAUSE__LOOPTEMP_);
2716 gcc_assert (innerc);
2717 t0 = OMP_CLAUSE_DECL (innerc);
2718 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2719 OMP_CLAUSE__LOOPTEMP_);
2720 gcc_assert (innerc);
2721 t1 = OMP_CLAUSE_DECL (innerc);
2722 }
2723 if (POINTER_TYPE_P (TREE_TYPE (t0))
2724 && TYPE_PRECISION (TREE_TYPE (t0))
2725 != TYPE_PRECISION (fd->iter_type))
2726 {
2727 /* Avoid casting pointers to integer of a different size. */
2728 tree itype = signed_type_for (type);
2729 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2730 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2731 }
2732 else
2733 {
2734 t1 = fold_convert (fd->iter_type, t1);
2735 t0 = fold_convert (fd->iter_type, t0);
2736 }
2737 if (bias)
2738 {
2739 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2740 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2741 }
2742 }
2743 if (fd->iter_type == long_integer_type_node || fd->ordered)
2744 {
2745 if (fd->chunk_size)
2746 {
2747 t = fold_convert (fd->iter_type, fd->chunk_size);
2748 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2749 if (fd->ordered)
2750 t = build_call_expr (builtin_decl_explicit (start_fn),
2751 5, t0, t1, t, t3, t4);
2752 else
2753 t = build_call_expr (builtin_decl_explicit (start_fn),
2754 6, t0, t1, t2, t, t3, t4);
2755 }
2756 else if (fd->ordered)
2757 t = build_call_expr (builtin_decl_explicit (start_fn),
2758 4, t0, t1, t3, t4);
2759 else
2760 t = build_call_expr (builtin_decl_explicit (start_fn),
2761 5, t0, t1, t2, t3, t4);
2762 }
2763 else
2764 {
2765 tree t5;
2766 tree c_bool_type;
2767 tree bfn_decl;
2768
2769 /* The GOMP_loop_ull_*start functions have additional boolean
2770 argument, true for < loops and false for > loops.
2771 In Fortran, the C bool type can be different from
2772 boolean_type_node. */
2773 bfn_decl = builtin_decl_explicit (start_fn);
2774 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2775 t5 = build_int_cst (c_bool_type,
2776 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2777 if (fd->chunk_size)
2778 {
2779 tree bfn_decl = builtin_decl_explicit (start_fn);
2780 t = fold_convert (fd->iter_type, fd->chunk_size);
2781 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2782 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2783 }
2784 else
2785 t = build_call_expr (builtin_decl_explicit (start_fn),
2786 6, t5, t0, t1, t2, t3, t4);
2787 }
2788 }
2789 if (TREE_TYPE (t) != boolean_type_node)
2790 t = fold_build2 (NE_EXPR, boolean_type_node,
2791 t, build_int_cst (TREE_TYPE (t), 0));
2792 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2793 true, GSI_SAME_STMT);
2794 if (arr && !TREE_STATIC (arr))
2795 {
2796 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2797 TREE_THIS_VOLATILE (clobber) = 1;
2798 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2799 GSI_SAME_STMT);
2800 }
2801 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2802
2803 /* Remove the GIMPLE_OMP_FOR statement. */
2804 gsi_remove (&gsi, true);
2805
2806 if (gsi_end_p (gsif))
2807 gsif = gsi_after_labels (gsi_bb (gsif));
2808 gsi_next (&gsif);
2809
2810 /* Iteration setup for sequential loop goes in L0_BB. */
2811 tree startvar = fd->loop.v;
2812 tree endvar = NULL_TREE;
2813
2814 if (gimple_omp_for_combined_p (fd->for_stmt))
2815 {
2816 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2817 && gimple_omp_for_kind (inner_stmt)
2818 == GF_OMP_FOR_KIND_SIMD);
2819 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2820 OMP_CLAUSE__LOOPTEMP_);
2821 gcc_assert (innerc);
2822 startvar = OMP_CLAUSE_DECL (innerc);
2823 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2824 OMP_CLAUSE__LOOPTEMP_);
2825 gcc_assert (innerc);
2826 endvar = OMP_CLAUSE_DECL (innerc);
2827 }
2828
2829 gsi = gsi_start_bb (l0_bb);
2830 t = istart0;
2831 if (fd->ordered && fd->collapse == 1)
2832 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2833 fold_convert (fd->iter_type, fd->loop.step));
2834 else if (bias)
2835 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2836 if (fd->ordered && fd->collapse == 1)
2837 {
2838 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2839 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2840 fd->loop.n1, fold_convert (sizetype, t));
2841 else
2842 {
2843 t = fold_convert (TREE_TYPE (startvar), t);
2844 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2845 fd->loop.n1, t);
2846 }
2847 }
2848 else
2849 {
2850 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2851 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2852 t = fold_convert (TREE_TYPE (startvar), t);
2853 }
2854 t = force_gimple_operand_gsi (&gsi, t,
2855 DECL_P (startvar)
2856 && TREE_ADDRESSABLE (startvar),
2857 NULL_TREE, false, GSI_CONTINUE_LINKING);
2858 assign_stmt = gimple_build_assign (startvar, t);
2859 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2860
2861 t = iend0;
2862 if (fd->ordered && fd->collapse == 1)
2863 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2864 fold_convert (fd->iter_type, fd->loop.step));
2865 else if (bias)
2866 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2867 if (fd->ordered && fd->collapse == 1)
2868 {
2869 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2870 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2871 fd->loop.n1, fold_convert (sizetype, t));
2872 else
2873 {
2874 t = fold_convert (TREE_TYPE (startvar), t);
2875 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2876 fd->loop.n1, t);
2877 }
2878 }
2879 else
2880 {
2881 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2882 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2883 t = fold_convert (TREE_TYPE (startvar), t);
2884 }
2885 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2886 false, GSI_CONTINUE_LINKING);
2887 if (endvar)
2888 {
2889 assign_stmt = gimple_build_assign (endvar, iend);
2890 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2891 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2892 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2893 else
2894 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2895 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2896 }
2897 /* Handle linear clause adjustments. */
2898 tree itercnt = NULL_TREE;
2899 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2900 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2901 c; c = OMP_CLAUSE_CHAIN (c))
2902 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2903 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2904 {
2905 tree d = OMP_CLAUSE_DECL (c);
2906 bool is_ref = omp_is_reference (d);
2907 tree t = d, a, dest;
2908 if (is_ref)
2909 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2910 tree type = TREE_TYPE (t);
2911 if (POINTER_TYPE_P (type))
2912 type = sizetype;
2913 dest = unshare_expr (t);
2914 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2915 expand_omp_build_assign (&gsif, v, t);
2916 if (itercnt == NULL_TREE)
2917 {
2918 itercnt = startvar;
2919 tree n1 = fd->loop.n1;
2920 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2921 {
2922 itercnt
2923 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2924 itercnt);
2925 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2926 }
2927 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2928 itercnt, n1);
2929 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2930 itercnt, fd->loop.step);
2931 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2932 NULL_TREE, false,
2933 GSI_CONTINUE_LINKING);
2934 }
2935 a = fold_build2 (MULT_EXPR, type,
2936 fold_convert (type, itercnt),
2937 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2938 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2939 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2940 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2941 false, GSI_CONTINUE_LINKING);
2942 assign_stmt = gimple_build_assign (dest, t);
2943 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2944 }
2945 if (fd->collapse > 1)
2946 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2947
2948 if (fd->ordered)
2949 {
2950 /* Until now, counts array contained number of iterations or
2951 variable containing it for ith loop. From now on, we need
2952 those counts only for collapsed loops, and only for the 2nd
2953 till the last collapsed one. Move those one element earlier,
2954 we'll use counts[fd->collapse - 1] for the first source/sink
2955 iteration counter and so on and counts[fd->ordered]
2956 as the array holding the current counter values for
2957 depend(source). */
2958 if (fd->collapse > 1)
2959 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2960 if (broken_loop)
2961 {
2962 int i;
2963 for (i = fd->collapse; i < fd->ordered; i++)
2964 {
2965 tree type = TREE_TYPE (fd->loops[i].v);
2966 tree this_cond
2967 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2968 fold_convert (type, fd->loops[i].n1),
2969 fold_convert (type, fd->loops[i].n2));
2970 if (!integer_onep (this_cond))
2971 break;
2972 }
2973 if (i < fd->ordered)
2974 {
2975 cont_bb
2976 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2977 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2978 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2979 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2980 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2981 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2982 make_edge (cont_bb, l1_bb, 0);
2983 l2_bb = create_empty_bb (cont_bb);
2984 broken_loop = false;
2985 }
2986 }
2987 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2988 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2989 ordered_lastprivate);
2990 if (counts[fd->collapse - 1])
2991 {
2992 gcc_assert (fd->collapse == 1);
2993 gsi = gsi_last_bb (l0_bb);
2994 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2995 istart0, true);
2996 gsi = gsi_last_bb (cont_bb);
2997 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2998 build_int_cst (fd->iter_type, 1));
2999 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
3000 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3001 size_zero_node, NULL_TREE, NULL_TREE);
3002 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
3003 t = counts[fd->collapse - 1];
3004 }
3005 else if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3008 {
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3012 }
3013 gsi = gsi_last_bb (l0_bb);
3014 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
3015 size_zero_node, NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 false, GSI_CONTINUE_LINKING);
3018 expand_omp_build_assign (&gsi, aref, t, true);
3019 }
3020
3021 if (!broken_loop)
3022 {
3023 /* Code to control the increment and predicate for the sequential
3024 loop goes in the CONT_BB. */
3025 gsi = gsi_last_bb (cont_bb);
3026 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3027 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3028 vmain = gimple_omp_continue_control_use (cont_stmt);
3029 vback = gimple_omp_continue_control_def (cont_stmt);
3030
3031 if (!gimple_omp_for_combined_p (fd->for_stmt))
3032 {
3033 if (POINTER_TYPE_P (type))
3034 t = fold_build_pointer_plus (vmain, fd->loop.step);
3035 else
3036 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
3037 t = force_gimple_operand_gsi (&gsi, t,
3038 DECL_P (vback)
3039 && TREE_ADDRESSABLE (vback),
3040 NULL_TREE, true, GSI_SAME_STMT);
3041 assign_stmt = gimple_build_assign (vback, t);
3042 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3043
3044 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3045 {
3046 if (fd->collapse > 1)
3047 t = fd->loop.v;
3048 else
3049 {
3050 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3051 fd->loops[0].v, fd->loops[0].n1);
3052 t = fold_convert (fd->iter_type, t);
3053 }
3054 tree aref = build4 (ARRAY_REF, fd->iter_type,
3055 counts[fd->ordered], size_zero_node,
3056 NULL_TREE, NULL_TREE);
3057 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3058 true, GSI_SAME_STMT);
3059 expand_omp_build_assign (&gsi, aref, t);
3060 }
3061
3062 t = build2 (fd->loop.cond_code, boolean_type_node,
3063 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3064 iend);
3065 gcond *cond_stmt = gimple_build_cond_empty (t);
3066 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3067 }
3068
3069 /* Remove GIMPLE_OMP_CONTINUE. */
3070 gsi_remove (&gsi, true);
3071
3072 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3073 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3074
3075 /* Emit code to get the next parallel iteration in L2_BB. */
3076 gsi = gsi_start_bb (l2_bb);
3077
3078 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3079 build_fold_addr_expr (istart0),
3080 build_fold_addr_expr (iend0));
3081 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3082 false, GSI_CONTINUE_LINKING);
3083 if (TREE_TYPE (t) != boolean_type_node)
3084 t = fold_build2 (NE_EXPR, boolean_type_node,
3085 t, build_int_cst (TREE_TYPE (t), 0));
3086 gcond *cond_stmt = gimple_build_cond_empty (t);
3087 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3088 }
3089
3090 /* Add the loop cleanup function. */
3091 gsi = gsi_last_bb (exit_bb);
3092 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3093 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3094 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3095 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3096 else
3097 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3098 gcall *call_stmt = gimple_build_call (t, 0);
3099 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3100 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3101 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3102 if (fd->ordered)
3103 {
3104 tree arr = counts[fd->ordered];
3105 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3106 TREE_THIS_VOLATILE (clobber) = 1;
3107 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3108 GSI_SAME_STMT);
3109 }
3110 gsi_remove (&gsi, true);
3111
3112 /* Connect the new blocks. */
3113 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3114 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3115
3116 if (!broken_loop)
3117 {
3118 gimple_seq phis;
3119
3120 e = find_edge (cont_bb, l3_bb);
3121 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3122
3123 phis = phi_nodes (l3_bb);
3124 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3125 {
3126 gimple *phi = gsi_stmt (gsi);
3127 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3128 PHI_ARG_DEF_FROM_EDGE (phi, e));
3129 }
3130 remove_edge (e);
3131
3132 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3133 e = find_edge (cont_bb, l1_bb);
3134 if (e == NULL)
3135 {
3136 e = BRANCH_EDGE (cont_bb);
3137 gcc_assert (single_succ (e->dest) == l1_bb);
3138 }
3139 if (gimple_omp_for_combined_p (fd->for_stmt))
3140 {
3141 remove_edge (e);
3142 e = NULL;
3143 }
3144 else if (fd->collapse > 1)
3145 {
3146 remove_edge (e);
3147 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3148 }
3149 else
3150 e->flags = EDGE_TRUE_VALUE;
3151 if (e)
3152 {
3153 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
3154 find_edge (cont_bb, l2_bb)->probability = e->probability.invert ();
3155 }
3156 else
3157 {
3158 e = find_edge (cont_bb, l2_bb);
3159 e->flags = EDGE_FALLTHRU;
3160 }
3161 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3162
3163 if (gimple_in_ssa_p (cfun))
3164 {
3165 /* Add phis to the outer loop that connect to the phis in the inner,
3166 original loop, and move the loop entry value of the inner phi to
3167 the loop entry value of the outer phi. */
3168 gphi_iterator psi;
3169 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3170 {
3171 source_location locus;
3172 gphi *nphi;
3173 gphi *exit_phi = psi.phi ();
3174
3175 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3176 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3177
3178 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3179 edge latch_to_l1 = find_edge (latch, l1_bb);
3180 gphi *inner_phi
3181 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3182
3183 tree t = gimple_phi_result (exit_phi);
3184 tree new_res = copy_ssa_name (t, NULL);
3185 nphi = create_phi_node (new_res, l0_bb);
3186
3187 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3188 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3189 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3190 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3191 add_phi_arg (nphi, t, entry_to_l0, locus);
3192
3193 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3194 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3195
3196 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3197 };
3198 }
3199
3200 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3201 recompute_dominator (CDI_DOMINATORS, l2_bb));
3202 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3203 recompute_dominator (CDI_DOMINATORS, l3_bb));
3204 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3205 recompute_dominator (CDI_DOMINATORS, l0_bb));
3206 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3207 recompute_dominator (CDI_DOMINATORS, l1_bb));
3208
3209 /* We enter expand_omp_for_generic with a loop. This original loop may
3210 have its own loop struct, or it may be part of an outer loop struct
3211 (which may be the fake loop). */
3212 struct loop *outer_loop = entry_bb->loop_father;
3213 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3214
3215 add_bb_to_loop (l2_bb, outer_loop);
3216
3217 /* We've added a new loop around the original loop. Allocate the
3218 corresponding loop struct. */
3219 struct loop *new_loop = alloc_loop ();
3220 new_loop->header = l0_bb;
3221 new_loop->latch = l2_bb;
3222 add_loop (new_loop, outer_loop);
3223
3224 /* Allocate a loop structure for the original loop unless we already
3225 had one. */
3226 if (!orig_loop_has_loop_struct
3227 && !gimple_omp_for_combined_p (fd->for_stmt))
3228 {
3229 struct loop *orig_loop = alloc_loop ();
3230 orig_loop->header = l1_bb;
3231 /* The loop may have multiple latches. */
3232 add_loop (orig_loop, new_loop);
3233 }
3234 }
3235 }
3236
3237 /* A subroutine of expand_omp_for. Generate code for a parallel
3238 loop with static schedule and no specified chunk size. Given
3239 parameters:
3240
3241 for (V = N1; V cond N2; V += STEP) BODY;
3242
3243 where COND is "<" or ">", we generate pseudocode
3244
3245 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3246 if (cond is <)
3247 adj = STEP - 1;
3248 else
3249 adj = STEP + 1;
3250 if ((__typeof (V)) -1 > 0 && cond is >)
3251 n = -(adj + N2 - N1) / -STEP;
3252 else
3253 n = (adj + N2 - N1) / STEP;
3254 q = n / nthreads;
3255 tt = n % nthreads;
3256 if (threadid < tt) goto L3; else goto L4;
3257 L3:
3258 tt = 0;
3259 q = q + 1;
3260 L4:
3261 s0 = q * threadid + tt;
3262 e0 = s0 + q;
3263 V = s0 * STEP + N1;
3264 if (s0 >= e0) goto L2; else goto L0;
3265 L0:
3266 e = e0 * STEP + N1;
3267 L1:
3268 BODY;
3269 V += STEP;
3270 if (V cond e) goto L1;
3271 L2:
3272 */
3273
3274 static void
3275 expand_omp_for_static_nochunk (struct omp_region *region,
3276 struct omp_for_data *fd,
3277 gimple *inner_stmt)
3278 {
3279 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3280 tree type, itype, vmain, vback;
3281 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3282 basic_block body_bb, cont_bb, collapse_bb = NULL;
3283 basic_block fin_bb;
3284 gimple_stmt_iterator gsi;
3285 edge ep;
3286 bool broken_loop = region->cont == NULL;
3287 tree *counts = NULL;
3288 tree n1, n2, step;
3289
3290 itype = type = TREE_TYPE (fd->loop.v);
3291 if (POINTER_TYPE_P (type))
3292 itype = signed_type_for (type);
3293
3294 entry_bb = region->entry;
3295 cont_bb = region->cont;
3296 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3297 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3298 gcc_assert (broken_loop
3299 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3300 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3301 body_bb = single_succ (seq_start_bb);
3302 if (!broken_loop)
3303 {
3304 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3305 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3306 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3307 }
3308 exit_bb = region->exit;
3309
3310 /* Iteration space partitioning goes in ENTRY_BB. */
3311 gsi = gsi_last_bb (entry_bb);
3312 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3313
3314 if (fd->collapse > 1)
3315 {
3316 int first_zero_iter = -1, dummy = -1;
3317 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3318
3319 counts = XALLOCAVEC (tree, fd->collapse);
3320 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3321 fin_bb, first_zero_iter,
3322 dummy_bb, dummy, l2_dom_bb);
3323 t = NULL_TREE;
3324 }
3325 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3326 t = integer_one_node;
3327 else
3328 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3329 fold_convert (type, fd->loop.n1),
3330 fold_convert (type, fd->loop.n2));
3331 if (fd->collapse == 1
3332 && TYPE_UNSIGNED (type)
3333 && (t == NULL_TREE || !integer_onep (t)))
3334 {
3335 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3336 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3337 true, GSI_SAME_STMT);
3338 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3339 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3340 true, GSI_SAME_STMT);
3341 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3342 NULL_TREE, NULL_TREE);
3343 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3344 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3345 expand_omp_regimplify_p, NULL, NULL)
3346 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3347 expand_omp_regimplify_p, NULL, NULL))
3348 {
3349 gsi = gsi_for_stmt (cond_stmt);
3350 gimple_regimplify_operands (cond_stmt, &gsi);
3351 }
3352 ep = split_block (entry_bb, cond_stmt);
3353 ep->flags = EDGE_TRUE_VALUE;
3354 entry_bb = ep->dest;
3355 ep->probability = profile_probability::very_likely ();
3356 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3357 ep->probability = profile_probability::very_unlikely ();
3358 if (gimple_in_ssa_p (cfun))
3359 {
3360 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3361 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3362 !gsi_end_p (gpi); gsi_next (&gpi))
3363 {
3364 gphi *phi = gpi.phi ();
3365 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3366 ep, UNKNOWN_LOCATION);
3367 }
3368 }
3369 gsi = gsi_last_bb (entry_bb);
3370 }
3371
3372 switch (gimple_omp_for_kind (fd->for_stmt))
3373 {
3374 case GF_OMP_FOR_KIND_FOR:
3375 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3376 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3377 break;
3378 case GF_OMP_FOR_KIND_DISTRIBUTE:
3379 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3380 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3381 break;
3382 default:
3383 gcc_unreachable ();
3384 }
3385 nthreads = build_call_expr (nthreads, 0);
3386 nthreads = fold_convert (itype, nthreads);
3387 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3388 true, GSI_SAME_STMT);
3389 threadid = build_call_expr (threadid, 0);
3390 threadid = fold_convert (itype, threadid);
3391 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3392 true, GSI_SAME_STMT);
3393
3394 n1 = fd->loop.n1;
3395 n2 = fd->loop.n2;
3396 step = fd->loop.step;
3397 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3398 {
3399 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3400 OMP_CLAUSE__LOOPTEMP_);
3401 gcc_assert (innerc);
3402 n1 = OMP_CLAUSE_DECL (innerc);
3403 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3404 OMP_CLAUSE__LOOPTEMP_);
3405 gcc_assert (innerc);
3406 n2 = OMP_CLAUSE_DECL (innerc);
3407 }
3408 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3409 true, NULL_TREE, true, GSI_SAME_STMT);
3410 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3411 true, NULL_TREE, true, GSI_SAME_STMT);
3412 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3413 true, NULL_TREE, true, GSI_SAME_STMT);
3414
3415 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3416 t = fold_build2 (PLUS_EXPR, itype, step, t);
3417 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3418 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3419 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3420 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3421 fold_build1 (NEGATE_EXPR, itype, t),
3422 fold_build1 (NEGATE_EXPR, itype, step));
3423 else
3424 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3425 t = fold_convert (itype, t);
3426 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3427
3428 q = create_tmp_reg (itype, "q");
3429 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3430 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3431 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3432
3433 tt = create_tmp_reg (itype, "tt");
3434 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3435 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3436 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3437
3438 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3439 gcond *cond_stmt = gimple_build_cond_empty (t);
3440 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3441
3442 second_bb = split_block (entry_bb, cond_stmt)->dest;
3443 gsi = gsi_last_bb (second_bb);
3444 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3445
3446 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3447 GSI_SAME_STMT);
3448 gassign *assign_stmt
3449 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3450 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3451
3452 third_bb = split_block (second_bb, assign_stmt)->dest;
3453 gsi = gsi_last_bb (third_bb);
3454 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3455
3456 t = build2 (MULT_EXPR, itype, q, threadid);
3457 t = build2 (PLUS_EXPR, itype, t, tt);
3458 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3459
3460 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3461 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3462
3463 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3464 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3465
3466 /* Remove the GIMPLE_OMP_FOR statement. */
3467 gsi_remove (&gsi, true);
3468
3469 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3470 gsi = gsi_start_bb (seq_start_bb);
3471
3472 tree startvar = fd->loop.v;
3473 tree endvar = NULL_TREE;
3474
3475 if (gimple_omp_for_combined_p (fd->for_stmt))
3476 {
3477 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3478 ? gimple_omp_parallel_clauses (inner_stmt)
3479 : gimple_omp_for_clauses (inner_stmt);
3480 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3481 gcc_assert (innerc);
3482 startvar = OMP_CLAUSE_DECL (innerc);
3483 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3484 OMP_CLAUSE__LOOPTEMP_);
3485 gcc_assert (innerc);
3486 endvar = OMP_CLAUSE_DECL (innerc);
3487 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3488 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3489 {
3490 int i;
3491 for (i = 1; i < fd->collapse; i++)
3492 {
3493 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3494 OMP_CLAUSE__LOOPTEMP_);
3495 gcc_assert (innerc);
3496 }
3497 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3498 OMP_CLAUSE__LOOPTEMP_);
3499 if (innerc)
3500 {
3501 /* If needed (distribute parallel for with lastprivate),
3502 propagate down the total number of iterations. */
3503 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3504 fd->loop.n2);
3505 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3506 GSI_CONTINUE_LINKING);
3507 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3508 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3509 }
3510 }
3511 }
3512 t = fold_convert (itype, s0);
3513 t = fold_build2 (MULT_EXPR, itype, t, step);
3514 if (POINTER_TYPE_P (type))
3515 t = fold_build_pointer_plus (n1, t);
3516 else
3517 t = fold_build2 (PLUS_EXPR, type, t, n1);
3518 t = fold_convert (TREE_TYPE (startvar), t);
3519 t = force_gimple_operand_gsi (&gsi, t,
3520 DECL_P (startvar)
3521 && TREE_ADDRESSABLE (startvar),
3522 NULL_TREE, false, GSI_CONTINUE_LINKING);
3523 assign_stmt = gimple_build_assign (startvar, t);
3524 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3525
3526 t = fold_convert (itype, e0);
3527 t = fold_build2 (MULT_EXPR, itype, t, step);
3528 if (POINTER_TYPE_P (type))
3529 t = fold_build_pointer_plus (n1, t);
3530 else
3531 t = fold_build2 (PLUS_EXPR, type, t, n1);
3532 t = fold_convert (TREE_TYPE (startvar), t);
3533 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3534 false, GSI_CONTINUE_LINKING);
3535 if (endvar)
3536 {
3537 assign_stmt = gimple_build_assign (endvar, e);
3538 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3539 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3540 assign_stmt = gimple_build_assign (fd->loop.v, e);
3541 else
3542 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3543 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3544 }
3545 /* Handle linear clause adjustments. */
3546 tree itercnt = NULL_TREE;
3547 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3548 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3549 c; c = OMP_CLAUSE_CHAIN (c))
3550 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3551 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3552 {
3553 tree d = OMP_CLAUSE_DECL (c);
3554 bool is_ref = omp_is_reference (d);
3555 tree t = d, a, dest;
3556 if (is_ref)
3557 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3558 if (itercnt == NULL_TREE)
3559 {
3560 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3561 {
3562 itercnt = fold_build2 (MINUS_EXPR, itype,
3563 fold_convert (itype, n1),
3564 fold_convert (itype, fd->loop.n1));
3565 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3566 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3567 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3568 NULL_TREE, false,
3569 GSI_CONTINUE_LINKING);
3570 }
3571 else
3572 itercnt = s0;
3573 }
3574 tree type = TREE_TYPE (t);
3575 if (POINTER_TYPE_P (type))
3576 type = sizetype;
3577 a = fold_build2 (MULT_EXPR, type,
3578 fold_convert (type, itercnt),
3579 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3580 dest = unshare_expr (t);
3581 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3582 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3583 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3584 false, GSI_CONTINUE_LINKING);
3585 assign_stmt = gimple_build_assign (dest, t);
3586 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3587 }
3588 if (fd->collapse > 1)
3589 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3590
3591 if (!broken_loop)
3592 {
3593 /* The code controlling the sequential loop replaces the
3594 GIMPLE_OMP_CONTINUE. */
3595 gsi = gsi_last_bb (cont_bb);
3596 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3597 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3598 vmain = gimple_omp_continue_control_use (cont_stmt);
3599 vback = gimple_omp_continue_control_def (cont_stmt);
3600
3601 if (!gimple_omp_for_combined_p (fd->for_stmt))
3602 {
3603 if (POINTER_TYPE_P (type))
3604 t = fold_build_pointer_plus (vmain, step);
3605 else
3606 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3607 t = force_gimple_operand_gsi (&gsi, t,
3608 DECL_P (vback)
3609 && TREE_ADDRESSABLE (vback),
3610 NULL_TREE, true, GSI_SAME_STMT);
3611 assign_stmt = gimple_build_assign (vback, t);
3612 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3613
3614 t = build2 (fd->loop.cond_code, boolean_type_node,
3615 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3616 ? t : vback, e);
3617 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3618 }
3619
3620 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3621 gsi_remove (&gsi, true);
3622
3623 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3624 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3625 }
3626
3627 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3628 gsi = gsi_last_bb (exit_bb);
3629 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3630 {
3631 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3632 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3633 }
3634 gsi_remove (&gsi, true);
3635
3636 /* Connect all the blocks. */
3637 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3638 ep->probability = profile_probability::guessed_always ().apply_scale (3, 4);
3639 ep = find_edge (entry_bb, second_bb);
3640 ep->flags = EDGE_TRUE_VALUE;
3641 ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
3642 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3643 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3644
3645 if (!broken_loop)
3646 {
3647 ep = find_edge (cont_bb, body_bb);
3648 if (ep == NULL)
3649 {
3650 ep = BRANCH_EDGE (cont_bb);
3651 gcc_assert (single_succ (ep->dest) == body_bb);
3652 }
3653 if (gimple_omp_for_combined_p (fd->for_stmt))
3654 {
3655 remove_edge (ep);
3656 ep = NULL;
3657 }
3658 else if (fd->collapse > 1)
3659 {
3660 remove_edge (ep);
3661 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3662 }
3663 else
3664 ep->flags = EDGE_TRUE_VALUE;
3665 find_edge (cont_bb, fin_bb)->flags
3666 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3667 }
3668
3669 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3670 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3671 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3672
3673 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3674 recompute_dominator (CDI_DOMINATORS, body_bb));
3675 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3676 recompute_dominator (CDI_DOMINATORS, fin_bb));
3677
3678 struct loop *loop = body_bb->loop_father;
3679 if (loop != entry_bb->loop_father)
3680 {
3681 gcc_assert (broken_loop || loop->header == body_bb);
3682 gcc_assert (broken_loop
3683 || loop->latch == region->cont
3684 || single_pred (loop->latch) == region->cont);
3685 return;
3686 }
3687
3688 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3689 {
3690 loop = alloc_loop ();
3691 loop->header = body_bb;
3692 if (collapse_bb == NULL)
3693 loop->latch = cont_bb;
3694 add_loop (loop, body_bb->loop_father);
3695 }
3696 }
3697
3698 /* Return phi in E->DEST with ARG on edge E. */
3699
3700 static gphi *
3701 find_phi_with_arg_on_edge (tree arg, edge e)
3702 {
3703 basic_block bb = e->dest;
3704
3705 for (gphi_iterator gpi = gsi_start_phis (bb);
3706 !gsi_end_p (gpi);
3707 gsi_next (&gpi))
3708 {
3709 gphi *phi = gpi.phi ();
3710 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3711 return phi;
3712 }
3713
3714 return NULL;
3715 }
3716
3717 /* A subroutine of expand_omp_for. Generate code for a parallel
3718 loop with static schedule and a specified chunk size. Given
3719 parameters:
3720
3721 for (V = N1; V cond N2; V += STEP) BODY;
3722
3723 where COND is "<" or ">", we generate pseudocode
3724
3725 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3726 if (cond is <)
3727 adj = STEP - 1;
3728 else
3729 adj = STEP + 1;
3730 if ((__typeof (V)) -1 > 0 && cond is >)
3731 n = -(adj + N2 - N1) / -STEP;
3732 else
3733 n = (adj + N2 - N1) / STEP;
3734 trip = 0;
3735 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3736 here so that V is defined
3737 if the loop is not entered
3738 L0:
3739 s0 = (trip * nthreads + threadid) * CHUNK;
3740 e0 = min (s0 + CHUNK, n);
3741 if (s0 < n) goto L1; else goto L4;
3742 L1:
3743 V = s0 * STEP + N1;
3744 e = e0 * STEP + N1;
3745 L2:
3746 BODY;
3747 V += STEP;
3748 if (V cond e) goto L2; else goto L3;
3749 L3:
3750 trip += 1;
3751 goto L0;
3752 L4:
3753 */
3754
3755 static void
3756 expand_omp_for_static_chunk (struct omp_region *region,
3757 struct omp_for_data *fd, gimple *inner_stmt)
3758 {
3759 tree n, s0, e0, e, t;
3760 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3761 tree type, itype, vmain, vback, vextra;
3762 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3763 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3764 gimple_stmt_iterator gsi;
3765 edge se;
3766 bool broken_loop = region->cont == NULL;
3767 tree *counts = NULL;
3768 tree n1, n2, step;
3769
3770 itype = type = TREE_TYPE (fd->loop.v);
3771 if (POINTER_TYPE_P (type))
3772 itype = signed_type_for (type);
3773
3774 entry_bb = region->entry;
3775 se = split_block (entry_bb, last_stmt (entry_bb));
3776 entry_bb = se->src;
3777 iter_part_bb = se->dest;
3778 cont_bb = region->cont;
3779 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3780 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3781 gcc_assert (broken_loop
3782 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3783 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3784 body_bb = single_succ (seq_start_bb);
3785 if (!broken_loop)
3786 {
3787 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3788 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3789 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3790 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3791 }
3792 exit_bb = region->exit;
3793
3794 /* Trip and adjustment setup goes in ENTRY_BB. */
3795 gsi = gsi_last_bb (entry_bb);
3796 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3797
3798 if (fd->collapse > 1)
3799 {
3800 int first_zero_iter = -1, dummy = -1;
3801 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3802
3803 counts = XALLOCAVEC (tree, fd->collapse);
3804 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3805 fin_bb, first_zero_iter,
3806 dummy_bb, dummy, l2_dom_bb);
3807 t = NULL_TREE;
3808 }
3809 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3810 t = integer_one_node;
3811 else
3812 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3813 fold_convert (type, fd->loop.n1),
3814 fold_convert (type, fd->loop.n2));
3815 if (fd->collapse == 1
3816 && TYPE_UNSIGNED (type)
3817 && (t == NULL_TREE || !integer_onep (t)))
3818 {
3819 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3820 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3821 true, GSI_SAME_STMT);
3822 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3823 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3824 true, GSI_SAME_STMT);
3825 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3826 NULL_TREE, NULL_TREE);
3827 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3828 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3829 expand_omp_regimplify_p, NULL, NULL)
3830 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3831 expand_omp_regimplify_p, NULL, NULL))
3832 {
3833 gsi = gsi_for_stmt (cond_stmt);
3834 gimple_regimplify_operands (cond_stmt, &gsi);
3835 }
3836 se = split_block (entry_bb, cond_stmt);
3837 se->flags = EDGE_TRUE_VALUE;
3838 entry_bb = se->dest;
3839 se->probability = profile_probability::very_likely ();
3840 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3841 se->probability = profile_probability::very_unlikely ();
3842 if (gimple_in_ssa_p (cfun))
3843 {
3844 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3845 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3846 !gsi_end_p (gpi); gsi_next (&gpi))
3847 {
3848 gphi *phi = gpi.phi ();
3849 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3850 se, UNKNOWN_LOCATION);
3851 }
3852 }
3853 gsi = gsi_last_bb (entry_bb);
3854 }
3855
3856 switch (gimple_omp_for_kind (fd->for_stmt))
3857 {
3858 case GF_OMP_FOR_KIND_FOR:
3859 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3860 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3861 break;
3862 case GF_OMP_FOR_KIND_DISTRIBUTE:
3863 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3864 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3865 break;
3866 default:
3867 gcc_unreachable ();
3868 }
3869 nthreads = build_call_expr (nthreads, 0);
3870 nthreads = fold_convert (itype, nthreads);
3871 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3872 true, GSI_SAME_STMT);
3873 threadid = build_call_expr (threadid, 0);
3874 threadid = fold_convert (itype, threadid);
3875 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3876 true, GSI_SAME_STMT);
3877
3878 n1 = fd->loop.n1;
3879 n2 = fd->loop.n2;
3880 step = fd->loop.step;
3881 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3882 {
3883 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3884 OMP_CLAUSE__LOOPTEMP_);
3885 gcc_assert (innerc);
3886 n1 = OMP_CLAUSE_DECL (innerc);
3887 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3888 OMP_CLAUSE__LOOPTEMP_);
3889 gcc_assert (innerc);
3890 n2 = OMP_CLAUSE_DECL (innerc);
3891 }
3892 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3893 true, NULL_TREE, true, GSI_SAME_STMT);
3894 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3895 true, NULL_TREE, true, GSI_SAME_STMT);
3896 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3897 true, NULL_TREE, true, GSI_SAME_STMT);
3898 tree chunk_size = fold_convert (itype, fd->chunk_size);
3899 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3900 chunk_size
3901 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3902 GSI_SAME_STMT);
3903
3904 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3905 t = fold_build2 (PLUS_EXPR, itype, step, t);
3906 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3907 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3908 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3909 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3910 fold_build1 (NEGATE_EXPR, itype, t),
3911 fold_build1 (NEGATE_EXPR, itype, step));
3912 else
3913 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3914 t = fold_convert (itype, t);
3915 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 true, GSI_SAME_STMT);
3917
3918 trip_var = create_tmp_reg (itype, ".trip");
3919 if (gimple_in_ssa_p (cfun))
3920 {
3921 trip_init = make_ssa_name (trip_var);
3922 trip_main = make_ssa_name (trip_var);
3923 trip_back = make_ssa_name (trip_var);
3924 }
3925 else
3926 {
3927 trip_init = trip_var;
3928 trip_main = trip_var;
3929 trip_back = trip_var;
3930 }
3931
3932 gassign *assign_stmt
3933 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3934 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3935
3936 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3937 t = fold_build2 (MULT_EXPR, itype, t, step);
3938 if (POINTER_TYPE_P (type))
3939 t = fold_build_pointer_plus (n1, t);
3940 else
3941 t = fold_build2 (PLUS_EXPR, type, t, n1);
3942 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3943 true, GSI_SAME_STMT);
3944
3945 /* Remove the GIMPLE_OMP_FOR. */
3946 gsi_remove (&gsi, true);
3947
3948 gimple_stmt_iterator gsif = gsi;
3949
3950 /* Iteration space partitioning goes in ITER_PART_BB. */
3951 gsi = gsi_last_bb (iter_part_bb);
3952
3953 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3954 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3955 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3956 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3957 false, GSI_CONTINUE_LINKING);
3958
3959 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3960 t = fold_build2 (MIN_EXPR, itype, t, n);
3961 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3962 false, GSI_CONTINUE_LINKING);
3963
3964 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3965 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3966
3967 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3968 gsi = gsi_start_bb (seq_start_bb);
3969
3970 tree startvar = fd->loop.v;
3971 tree endvar = NULL_TREE;
3972
3973 if (gimple_omp_for_combined_p (fd->for_stmt))
3974 {
3975 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3976 ? gimple_omp_parallel_clauses (inner_stmt)
3977 : gimple_omp_for_clauses (inner_stmt);
3978 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3979 gcc_assert (innerc);
3980 startvar = OMP_CLAUSE_DECL (innerc);
3981 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3982 OMP_CLAUSE__LOOPTEMP_);
3983 gcc_assert (innerc);
3984 endvar = OMP_CLAUSE_DECL (innerc);
3985 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3986 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3987 {
3988 int i;
3989 for (i = 1; i < fd->collapse; i++)
3990 {
3991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3992 OMP_CLAUSE__LOOPTEMP_);
3993 gcc_assert (innerc);
3994 }
3995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3996 OMP_CLAUSE__LOOPTEMP_);
3997 if (innerc)
3998 {
3999 /* If needed (distribute parallel for with lastprivate),
4000 propagate down the total number of iterations. */
4001 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
4002 fd->loop.n2);
4003 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
4004 GSI_CONTINUE_LINKING);
4005 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4006 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4007 }
4008 }
4009 }
4010
4011 t = fold_convert (itype, s0);
4012 t = fold_build2 (MULT_EXPR, itype, t, step);
4013 if (POINTER_TYPE_P (type))
4014 t = fold_build_pointer_plus (n1, t);
4015 else
4016 t = fold_build2 (PLUS_EXPR, type, t, n1);
4017 t = fold_convert (TREE_TYPE (startvar), t);
4018 t = force_gimple_operand_gsi (&gsi, t,
4019 DECL_P (startvar)
4020 && TREE_ADDRESSABLE (startvar),
4021 NULL_TREE, false, GSI_CONTINUE_LINKING);
4022 assign_stmt = gimple_build_assign (startvar, t);
4023 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4024
4025 t = fold_convert (itype, e0);
4026 t = fold_build2 (MULT_EXPR, itype, t, step);
4027 if (POINTER_TYPE_P (type))
4028 t = fold_build_pointer_plus (n1, t);
4029 else
4030 t = fold_build2 (PLUS_EXPR, type, t, n1);
4031 t = fold_convert (TREE_TYPE (startvar), t);
4032 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4033 false, GSI_CONTINUE_LINKING);
4034 if (endvar)
4035 {
4036 assign_stmt = gimple_build_assign (endvar, e);
4037 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4038 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
4039 assign_stmt = gimple_build_assign (fd->loop.v, e);
4040 else
4041 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4042 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4043 }
4044 /* Handle linear clause adjustments. */
4045 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4046 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4047 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4048 c; c = OMP_CLAUSE_CHAIN (c))
4049 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4050 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4051 {
4052 tree d = OMP_CLAUSE_DECL (c);
4053 bool is_ref = omp_is_reference (d);
4054 tree t = d, a, dest;
4055 if (is_ref)
4056 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4057 tree type = TREE_TYPE (t);
4058 if (POINTER_TYPE_P (type))
4059 type = sizetype;
4060 dest = unshare_expr (t);
4061 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4062 expand_omp_build_assign (&gsif, v, t);
4063 if (itercnt == NULL_TREE)
4064 {
4065 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4066 {
4067 itercntbias
4068 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4069 fold_convert (itype, fd->loop.n1));
4070 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4071 itercntbias, step);
4072 itercntbias
4073 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4074 NULL_TREE, true,
4075 GSI_SAME_STMT);
4076 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4077 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4078 NULL_TREE, false,
4079 GSI_CONTINUE_LINKING);
4080 }
4081 else
4082 itercnt = s0;
4083 }
4084 a = fold_build2 (MULT_EXPR, type,
4085 fold_convert (type, itercnt),
4086 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4087 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4088 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4089 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4090 false, GSI_CONTINUE_LINKING);
4091 assign_stmt = gimple_build_assign (dest, t);
4092 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4093 }
4094 if (fd->collapse > 1)
4095 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4096
4097 if (!broken_loop)
4098 {
4099 /* The code controlling the sequential loop goes in CONT_BB,
4100 replacing the GIMPLE_OMP_CONTINUE. */
4101 gsi = gsi_last_bb (cont_bb);
4102 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4103 vmain = gimple_omp_continue_control_use (cont_stmt);
4104 vback = gimple_omp_continue_control_def (cont_stmt);
4105
4106 if (!gimple_omp_for_combined_p (fd->for_stmt))
4107 {
4108 if (POINTER_TYPE_P (type))
4109 t = fold_build_pointer_plus (vmain, step);
4110 else
4111 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4112 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4113 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4114 true, GSI_SAME_STMT);
4115 assign_stmt = gimple_build_assign (vback, t);
4116 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4117
4118 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4119 t = build2 (EQ_EXPR, boolean_type_node,
4120 build_int_cst (itype, 0),
4121 build_int_cst (itype, 1));
4122 else
4123 t = build2 (fd->loop.cond_code, boolean_type_node,
4124 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4125 ? t : vback, e);
4126 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4127 }
4128
4129 /* Remove GIMPLE_OMP_CONTINUE. */
4130 gsi_remove (&gsi, true);
4131
4132 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4133 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4134
4135 /* Trip update code goes into TRIP_UPDATE_BB. */
4136 gsi = gsi_start_bb (trip_update_bb);
4137
4138 t = build_int_cst (itype, 1);
4139 t = build2 (PLUS_EXPR, itype, trip_main, t);
4140 assign_stmt = gimple_build_assign (trip_back, t);
4141 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4142 }
4143
4144 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4145 gsi = gsi_last_bb (exit_bb);
4146 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4147 {
4148 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4149 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4150 }
4151 gsi_remove (&gsi, true);
4152
4153 /* Connect the new blocks. */
4154 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4155 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4156
4157 if (!broken_loop)
4158 {
4159 se = find_edge (cont_bb, body_bb);
4160 if (se == NULL)
4161 {
4162 se = BRANCH_EDGE (cont_bb);
4163 gcc_assert (single_succ (se->dest) == body_bb);
4164 }
4165 if (gimple_omp_for_combined_p (fd->for_stmt))
4166 {
4167 remove_edge (se);
4168 se = NULL;
4169 }
4170 else if (fd->collapse > 1)
4171 {
4172 remove_edge (se);
4173 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4174 }
4175 else
4176 se->flags = EDGE_TRUE_VALUE;
4177 find_edge (cont_bb, trip_update_bb)->flags
4178 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4179
4180 redirect_edge_and_branch (single_succ_edge (trip_update_bb),
4181 iter_part_bb);
4182 }
4183
4184 if (gimple_in_ssa_p (cfun))
4185 {
4186 gphi_iterator psi;
4187 gphi *phi;
4188 edge re, ene;
4189 edge_var_map *vm;
4190 size_t i;
4191
4192 gcc_assert (fd->collapse == 1 && !broken_loop);
4193
4194 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4195 remove arguments of the phi nodes in fin_bb. We need to create
4196 appropriate phi nodes in iter_part_bb instead. */
4197 se = find_edge (iter_part_bb, fin_bb);
4198 re = single_succ_edge (trip_update_bb);
4199 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4200 ene = single_succ_edge (entry_bb);
4201
4202 psi = gsi_start_phis (fin_bb);
4203 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4204 gsi_next (&psi), ++i)
4205 {
4206 gphi *nphi;
4207 source_location locus;
4208
4209 phi = psi.phi ();
4210 if (operand_equal_p (gimple_phi_arg_def (phi, 0),
4211 redirect_edge_var_map_def (vm), 0))
4212 continue;
4213
4214 t = gimple_phi_result (phi);
4215 gcc_assert (t == redirect_edge_var_map_result (vm));
4216
4217 if (!single_pred_p (fin_bb))
4218 t = copy_ssa_name (t, phi);
4219
4220 nphi = create_phi_node (t, iter_part_bb);
4221
4222 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4223 locus = gimple_phi_arg_location_from_edge (phi, se);
4224
4225 /* A special case -- fd->loop.v is not yet computed in
4226 iter_part_bb, we need to use vextra instead. */
4227 if (t == fd->loop.v)
4228 t = vextra;
4229 add_phi_arg (nphi, t, ene, locus);
4230 locus = redirect_edge_var_map_location (vm);
4231 tree back_arg = redirect_edge_var_map_def (vm);
4232 add_phi_arg (nphi, back_arg, re, locus);
4233 edge ce = find_edge (cont_bb, body_bb);
4234 if (ce == NULL)
4235 {
4236 ce = BRANCH_EDGE (cont_bb);
4237 gcc_assert (single_succ (ce->dest) == body_bb);
4238 ce = single_succ_edge (ce->dest);
4239 }
4240 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4241 gcc_assert (inner_loop_phi != NULL);
4242 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4243 find_edge (seq_start_bb, body_bb), locus);
4244
4245 if (!single_pred_p (fin_bb))
4246 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4247 }
4248 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4249 redirect_edge_var_map_clear (re);
4250 if (single_pred_p (fin_bb))
4251 while (1)
4252 {
4253 psi = gsi_start_phis (fin_bb);
4254 if (gsi_end_p (psi))
4255 break;
4256 remove_phi_node (&psi, false);
4257 }
4258
4259 /* Make phi node for trip. */
4260 phi = create_phi_node (trip_main, iter_part_bb);
4261 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4262 UNKNOWN_LOCATION);
4263 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4264 UNKNOWN_LOCATION);
4265 }
4266
4267 if (!broken_loop)
4268 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4269 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4270 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4271 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4272 recompute_dominator (CDI_DOMINATORS, fin_bb));
4273 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4274 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4275 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4276 recompute_dominator (CDI_DOMINATORS, body_bb));
4277
4278 if (!broken_loop)
4279 {
4280 struct loop *loop = body_bb->loop_father;
4281 struct loop *trip_loop = alloc_loop ();
4282 trip_loop->header = iter_part_bb;
4283 trip_loop->latch = trip_update_bb;
4284 add_loop (trip_loop, iter_part_bb->loop_father);
4285
4286 if (loop != entry_bb->loop_father)
4287 {
4288 gcc_assert (loop->header == body_bb);
4289 gcc_assert (loop->latch == region->cont
4290 || single_pred (loop->latch) == region->cont);
4291 trip_loop->inner = loop;
4292 return;
4293 }
4294
4295 if (!gimple_omp_for_combined_p (fd->for_stmt))
4296 {
4297 loop = alloc_loop ();
4298 loop->header = body_bb;
4299 if (collapse_bb == NULL)
4300 loop->latch = cont_bb;
4301 add_loop (loop, trip_loop);
4302 }
4303 }
4304 }
4305
4306 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4307 Given parameters:
4308 for (V = N1; V cond N2; V += STEP) BODY;
4309
4310 where COND is "<" or ">" or "!=", we generate pseudocode
4311
4312 for (ind_var = low; ind_var < high; ind_var++)
4313 {
4314 V = n1 + (ind_var * STEP)
4315
4316 <BODY>
4317 }
4318
4319 In the above pseudocode, low and high are function parameters of the
4320 child function. In the function below, we are inserting a temp.
4321 variable that will be making a call to two OMP functions that will not be
4322 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4323 with _Cilk_for). These functions are replaced with low and high
4324 by the function that handles taskreg. */
4325
4326
4327 static void
4328 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4329 {
4330 bool broken_loop = region->cont == NULL;
4331 basic_block entry_bb = region->entry;
4332 basic_block cont_bb = region->cont;
4333
4334 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4335 gcc_assert (broken_loop
4336 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4337 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4338 basic_block l1_bb, l2_bb;
4339
4340 if (!broken_loop)
4341 {
4342 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4343 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4344 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4345 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4346 }
4347 else
4348 {
4349 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4350 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4351 l2_bb = single_succ (l1_bb);
4352 }
4353 basic_block exit_bb = region->exit;
4354 basic_block l2_dom_bb = NULL;
4355
4356 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4357
4358 /* Below statements until the "tree high_val = ..." are pseudo statements
4359 used to pass information to be used by expand_omp_taskreg.
4360 low_val and high_val will be replaced by the __low and __high
4361 parameter from the child function.
4362
4363 The call_exprs part is a place-holder, it is mainly used
4364 to distinctly identify to the top-level part that this is
4365 where we should put low and high (reasoning given in header
4366 comment). */
4367
4368 gomp_parallel *par_stmt
4369 = as_a <gomp_parallel *> (last_stmt (region->outer->entry));
4370 tree child_fndecl = gimple_omp_parallel_child_fn (par_stmt);
4371 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4372 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4373 {
4374 if (id_equal (DECL_NAME (t), "__high"))
4375 high_val = t;
4376 else if (id_equal (DECL_NAME (t), "__low"))
4377 low_val = t;
4378 }
4379 gcc_assert (low_val && high_val);
4380
4381 tree type = TREE_TYPE (low_val);
4382 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4383 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4384
4385 /* Not needed in SSA form right now. */
4386 gcc_assert (!gimple_in_ssa_p (cfun));
4387 if (l2_dom_bb == NULL)
4388 l2_dom_bb = l1_bb;
4389
4390 tree n1 = low_val;
4391 tree n2 = high_val;
4392
4393 gimple *stmt = gimple_build_assign (ind_var, n1);
4394
4395 /* Replace the GIMPLE_OMP_FOR statement. */
4396 gsi_replace (&gsi, stmt, true);
4397
4398 if (!broken_loop)
4399 {
4400 /* Code to control the increment goes in the CONT_BB. */
4401 gsi = gsi_last_bb (cont_bb);
4402 stmt = gsi_stmt (gsi);
4403 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4404 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4405 build_one_cst (type));
4406
4407 /* Replace GIMPLE_OMP_CONTINUE. */
4408 gsi_replace (&gsi, stmt, true);
4409 }
4410
4411 /* Emit the condition in L1_BB. */
4412 gsi = gsi_after_labels (l1_bb);
4413 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4414 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4415 fd->loop.step);
4416 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4417 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4418 fd->loop.n1, fold_convert (sizetype, t));
4419 else
4420 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4421 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4422 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4423 expand_omp_build_assign (&gsi, fd->loop.v, t);
4424
4425 /* The condition is always '<' since the runtime will fill in the low
4426 and high values. */
4427 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4428 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4429
4430 /* Remove GIMPLE_OMP_RETURN. */
4431 gsi = gsi_last_bb (exit_bb);
4432 gsi_remove (&gsi, true);
4433
4434 /* Connect the new blocks. */
4435 remove_edge (FALLTHRU_EDGE (entry_bb));
4436
4437 edge e, ne;
4438 if (!broken_loop)
4439 {
4440 remove_edge (BRANCH_EDGE (entry_bb));
4441 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4442
4443 e = BRANCH_EDGE (l1_bb);
4444 ne = FALLTHRU_EDGE (l1_bb);
4445 e->flags = EDGE_TRUE_VALUE;
4446 }
4447 else
4448 {
4449 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4450
4451 ne = single_succ_edge (l1_bb);
4452 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4453
4454 }
4455 ne->flags = EDGE_FALSE_VALUE;
4456 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4457 ne->probability = e->probability.invert ();
4458
4459 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4460 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4461 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4462
4463 if (!broken_loop)
4464 {
4465 struct loop *loop = alloc_loop ();
4466 loop->header = l1_bb;
4467 loop->latch = cont_bb;
4468 add_loop (loop, l1_bb->loop_father);
4469 loop->safelen = INT_MAX;
4470 }
4471
4472 /* Pick the correct library function based on the precision of the
4473 induction variable type. */
4474 tree lib_fun = NULL_TREE;
4475 if (TYPE_PRECISION (type) == 32)
4476 lib_fun = cilk_for_32_fndecl;
4477 else if (TYPE_PRECISION (type) == 64)
4478 lib_fun = cilk_for_64_fndecl;
4479 else
4480 gcc_unreachable ();
4481
4482 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4483
4484 /* WS_ARGS contains the library function flavor to call:
4485 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4486 user-defined grain value. If the user does not define one, then zero
4487 is passed in by the parser. */
4488 vec_alloc (region->ws_args, 2);
4489 region->ws_args->quick_push (lib_fun);
4490 region->ws_args->quick_push (fd->chunk_size);
4491 }
4492
4493 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4494 loop. Given parameters:
4495
4496 for (V = N1; V cond N2; V += STEP) BODY;
4497
4498 where COND is "<" or ">", we generate pseudocode
4499
4500 V = N1;
4501 goto L1;
4502 L0:
4503 BODY;
4504 V += STEP;
4505 L1:
4506 if (V cond N2) goto L0; else goto L2;
4507 L2:
4508
4509 For collapsed loops, given parameters:
4510 collapse(3)
4511 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4512 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4513 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4514 BODY;
4515
4516 we generate pseudocode
4517
4518 if (cond3 is <)
4519 adj = STEP3 - 1;
4520 else
4521 adj = STEP3 + 1;
4522 count3 = (adj + N32 - N31) / STEP3;
4523 if (cond2 is <)
4524 adj = STEP2 - 1;
4525 else
4526 adj = STEP2 + 1;
4527 count2 = (adj + N22 - N21) / STEP2;
4528 if (cond1 is <)
4529 adj = STEP1 - 1;
4530 else
4531 adj = STEP1 + 1;
4532 count1 = (adj + N12 - N11) / STEP1;
4533 count = count1 * count2 * count3;
4534 V = 0;
4535 V1 = N11;
4536 V2 = N21;
4537 V3 = N31;
4538 goto L1;
4539 L0:
4540 BODY;
4541 V += 1;
4542 V3 += STEP3;
4543 V2 += (V3 cond3 N32) ? 0 : STEP2;
4544 V3 = (V3 cond3 N32) ? V3 : N31;
4545 V1 += (V2 cond2 N22) ? 0 : STEP1;
4546 V2 = (V2 cond2 N22) ? V2 : N21;
4547 L1:
4548 if (V < count) goto L0; else goto L2;
4549 L2:
4550
4551 */
4552
4553 static void
4554 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4555 {
4556 tree type, t;
4557 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4558 gimple_stmt_iterator gsi;
4559 gimple *stmt;
4560 gcond *cond_stmt;
4561 bool broken_loop = region->cont == NULL;
4562 edge e, ne;
4563 tree *counts = NULL;
4564 int i;
4565 int safelen_int = INT_MAX;
4566 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4567 OMP_CLAUSE_SAFELEN);
4568 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4569 OMP_CLAUSE__SIMDUID_);
4570 tree n1, n2;
4571
4572 if (safelen)
4573 {
4574 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4575 if (TREE_CODE (safelen) != INTEGER_CST)
4576 safelen_int = 0;
4577 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4578 safelen_int = tree_to_uhwi (safelen);
4579 if (safelen_int == 1)
4580 safelen_int = 0;
4581 }
4582 type = TREE_TYPE (fd->loop.v);
4583 entry_bb = region->entry;
4584 cont_bb = region->cont;
4585 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4586 gcc_assert (broken_loop
4587 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4588 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4589 if (!broken_loop)
4590 {
4591 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4592 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4593 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4594 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4595 }
4596 else
4597 {
4598 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4599 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4600 l2_bb = single_succ (l1_bb);
4601 }
4602 exit_bb = region->exit;
4603 l2_dom_bb = NULL;
4604
4605 gsi = gsi_last_bb (entry_bb);
4606
4607 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4608 /* Not needed in SSA form right now. */
4609 gcc_assert (!gimple_in_ssa_p (cfun));
4610 if (fd->collapse > 1)
4611 {
4612 int first_zero_iter = -1, dummy = -1;
4613 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4614
4615 counts = XALLOCAVEC (tree, fd->collapse);
4616 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4617 zero_iter_bb, first_zero_iter,
4618 dummy_bb, dummy, l2_dom_bb);
4619 }
4620 if (l2_dom_bb == NULL)
4621 l2_dom_bb = l1_bb;
4622
4623 n1 = fd->loop.n1;
4624 n2 = fd->loop.n2;
4625 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4626 {
4627 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4628 OMP_CLAUSE__LOOPTEMP_);
4629 gcc_assert (innerc);
4630 n1 = OMP_CLAUSE_DECL (innerc);
4631 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4632 OMP_CLAUSE__LOOPTEMP_);
4633 gcc_assert (innerc);
4634 n2 = OMP_CLAUSE_DECL (innerc);
4635 }
4636 tree step = fd->loop.step;
4637
4638 bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4639 OMP_CLAUSE__SIMT_);
4640 if (is_simt)
4641 {
4642 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4643 is_simt = safelen_int > 1;
4644 }
4645 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4646 if (is_simt)
4647 {
4648 simt_lane = create_tmp_var (unsigned_type_node);
4649 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4650 gimple_call_set_lhs (g, simt_lane);
4651 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4652 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4653 fold_convert (TREE_TYPE (step), simt_lane));
4654 n1 = fold_convert (type, n1);
4655 if (POINTER_TYPE_P (type))
4656 n1 = fold_build_pointer_plus (n1, offset);
4657 else
4658 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4659
4660 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4661 if (fd->collapse > 1)
4662 simt_maxlane = build_one_cst (unsigned_type_node);
4663 else if (safelen_int < omp_max_simt_vf ())
4664 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4665 tree vf
4666 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4667 unsigned_type_node, 0);
4668 if (simt_maxlane)
4669 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4670 vf = fold_convert (TREE_TYPE (step), vf);
4671 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4672 }
4673
4674 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4675 if (fd->collapse > 1)
4676 {
4677 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4678 {
4679 gsi_prev (&gsi);
4680 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4681 gsi_next (&gsi);
4682 }
4683 else
4684 for (i = 0; i < fd->collapse; i++)
4685 {
4686 tree itype = TREE_TYPE (fd->loops[i].v);
4687 if (POINTER_TYPE_P (itype))
4688 itype = signed_type_for (itype);
4689 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4690 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4691 }
4692 }
4693
4694 /* Remove the GIMPLE_OMP_FOR statement. */
4695 gsi_remove (&gsi, true);
4696
4697 if (!broken_loop)
4698 {
4699 /* Code to control the increment goes in the CONT_BB. */
4700 gsi = gsi_last_bb (cont_bb);
4701 stmt = gsi_stmt (gsi);
4702 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4703
4704 if (POINTER_TYPE_P (type))
4705 t = fold_build_pointer_plus (fd->loop.v, step);
4706 else
4707 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4708 expand_omp_build_assign (&gsi, fd->loop.v, t);
4709
4710 if (fd->collapse > 1)
4711 {
4712 i = fd->collapse - 1;
4713 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4714 {
4715 t = fold_convert (sizetype, fd->loops[i].step);
4716 t = fold_build_pointer_plus (fd->loops[i].v, t);
4717 }
4718 else
4719 {
4720 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4721 fd->loops[i].step);
4722 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4723 fd->loops[i].v, t);
4724 }
4725 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4726
4727 for (i = fd->collapse - 1; i > 0; i--)
4728 {
4729 tree itype = TREE_TYPE (fd->loops[i].v);
4730 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4731 if (POINTER_TYPE_P (itype2))
4732 itype2 = signed_type_for (itype2);
4733 t = fold_convert (itype2, fd->loops[i - 1].step);
4734 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4735 GSI_SAME_STMT);
4736 t = build3 (COND_EXPR, itype2,
4737 build2 (fd->loops[i].cond_code, boolean_type_node,
4738 fd->loops[i].v,
4739 fold_convert (itype, fd->loops[i].n2)),
4740 build_int_cst (itype2, 0), t);
4741 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4742 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4743 else
4744 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4745 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4746
4747 t = fold_convert (itype, fd->loops[i].n1);
4748 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
4749 GSI_SAME_STMT);
4750 t = build3 (COND_EXPR, itype,
4751 build2 (fd->loops[i].cond_code, boolean_type_node,
4752 fd->loops[i].v,
4753 fold_convert (itype, fd->loops[i].n2)),
4754 fd->loops[i].v, t);
4755 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4756 }
4757 }
4758
4759 /* Remove GIMPLE_OMP_CONTINUE. */
4760 gsi_remove (&gsi, true);
4761 }
4762
4763 /* Emit the condition in L1_BB. */
4764 gsi = gsi_start_bb (l1_bb);
4765
4766 t = fold_convert (type, n2);
4767 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4768 false, GSI_CONTINUE_LINKING);
4769 tree v = fd->loop.v;
4770 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4771 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4772 false, GSI_CONTINUE_LINKING);
4773 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4774 cond_stmt = gimple_build_cond_empty (t);
4775 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4776 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4777 NULL, NULL)
4778 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4779 NULL, NULL))
4780 {
4781 gsi = gsi_for_stmt (cond_stmt);
4782 gimple_regimplify_operands (cond_stmt, &gsi);
4783 }
4784
4785 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4786 if (is_simt)
4787 {
4788 gsi = gsi_start_bb (l2_bb);
4789 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4790 if (POINTER_TYPE_P (type))
4791 t = fold_build_pointer_plus (fd->loop.v, step);
4792 else
4793 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4794 expand_omp_build_assign (&gsi, fd->loop.v, t);
4795 }
4796
4797 /* Remove GIMPLE_OMP_RETURN. */
4798 gsi = gsi_last_bb (exit_bb);
4799 gsi_remove (&gsi, true);
4800
4801 /* Connect the new blocks. */
4802 remove_edge (FALLTHRU_EDGE (entry_bb));
4803
4804 if (!broken_loop)
4805 {
4806 remove_edge (BRANCH_EDGE (entry_bb));
4807 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4808
4809 e = BRANCH_EDGE (l1_bb);
4810 ne = FALLTHRU_EDGE (l1_bb);
4811 e->flags = EDGE_TRUE_VALUE;
4812 }
4813 else
4814 {
4815 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4816
4817 ne = single_succ_edge (l1_bb);
4818 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4819
4820 }
4821 ne->flags = EDGE_FALSE_VALUE;
4822 e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
4823 ne->probability = e->probability.invert ();
4824
4825 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4826 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4827
4828 if (simt_maxlane)
4829 {
4830 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4831 NULL_TREE, NULL_TREE);
4832 gsi = gsi_last_bb (entry_bb);
4833 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4834 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4835 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4836 FALLTHRU_EDGE (entry_bb)->probability
4837 = profile_probability::guessed_always ().apply_scale (7, 8);
4838 BRANCH_EDGE (entry_bb)->probability
4839 = FALLTHRU_EDGE (entry_bb)->probability.invert ();
4840 l2_dom_bb = entry_bb;
4841 }
4842 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4843
4844 if (!broken_loop)
4845 {
4846 struct loop *loop = alloc_loop ();
4847 loop->header = l1_bb;
4848 loop->latch = cont_bb;
4849 add_loop (loop, l1_bb->loop_father);
4850 loop->safelen = safelen_int;
4851 if (simduid)
4852 {
4853 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4854 cfun->has_simduid_loops = true;
4855 }
4856 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4857 the loop. */
4858 if ((flag_tree_loop_vectorize
4859 || !global_options_set.x_flag_tree_loop_vectorize)
4860 && flag_tree_loop_optimize
4861 && loop->safelen > 1)
4862 {
4863 loop->force_vectorize = true;
4864 cfun->has_force_vectorize_loops = true;
4865 }
4866 }
4867 else if (simduid)
4868 cfun->has_simduid_loops = true;
4869 }
4870
4871 /* Taskloop construct is represented after gimplification with
4872 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4873 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4874 which should just compute all the needed loop temporaries
4875 for GIMPLE_OMP_TASK. */
4876
4877 static void
4878 expand_omp_taskloop_for_outer (struct omp_region *region,
4879 struct omp_for_data *fd,
4880 gimple *inner_stmt)
4881 {
4882 tree type, bias = NULL_TREE;
4883 basic_block entry_bb, cont_bb, exit_bb;
4884 gimple_stmt_iterator gsi;
4885 gassign *assign_stmt;
4886 tree *counts = NULL;
4887 int i;
4888
4889 gcc_assert (inner_stmt);
4890 gcc_assert (region->cont);
4891 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4892 && gimple_omp_task_taskloop_p (inner_stmt));
4893 type = TREE_TYPE (fd->loop.v);
4894
4895 /* See if we need to bias by LLONG_MIN. */
4896 if (fd->iter_type == long_long_unsigned_type_node
4897 && TREE_CODE (type) == INTEGER_TYPE
4898 && !TYPE_UNSIGNED (type))
4899 {
4900 tree n1, n2;
4901
4902 if (fd->loop.cond_code == LT_EXPR)
4903 {
4904 n1 = fd->loop.n1;
4905 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4906 }
4907 else
4908 {
4909 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4910 n2 = fd->loop.n1;
4911 }
4912 if (TREE_CODE (n1) != INTEGER_CST
4913 || TREE_CODE (n2) != INTEGER_CST
4914 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4915 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4916 }
4917
4918 entry_bb = region->entry;
4919 cont_bb = region->cont;
4920 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4921 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4922 exit_bb = region->exit;
4923
4924 gsi = gsi_last_bb (entry_bb);
4925 gimple *for_stmt = gsi_stmt (gsi);
4926 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4927 if (fd->collapse > 1)
4928 {
4929 int first_zero_iter = -1, dummy = -1;
4930 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4931
4932 counts = XALLOCAVEC (tree, fd->collapse);
4933 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4934 zero_iter_bb, first_zero_iter,
4935 dummy_bb, dummy, l2_dom_bb);
4936
4937 if (zero_iter_bb)
4938 {
4939 /* Some counts[i] vars might be uninitialized if
4940 some loop has zero iterations. But the body shouldn't
4941 be executed in that case, so just avoid uninit warnings. */
4942 for (i = first_zero_iter; i < fd->collapse; i++)
4943 if (SSA_VAR_P (counts[i]))
4944 TREE_NO_WARNING (counts[i]) = 1;
4945 gsi_prev (&gsi);
4946 edge e = split_block (entry_bb, gsi_stmt (gsi));
4947 entry_bb = e->dest;
4948 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4949 gsi = gsi_last_bb (entry_bb);
4950 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4951 get_immediate_dominator (CDI_DOMINATORS,
4952 zero_iter_bb));
4953 }
4954 }
4955
4956 tree t0, t1;
4957 t1 = fd->loop.n2;
4958 t0 = fd->loop.n1;
4959 if (POINTER_TYPE_P (TREE_TYPE (t0))
4960 && TYPE_PRECISION (TREE_TYPE (t0))
4961 != TYPE_PRECISION (fd->iter_type))
4962 {
4963 /* Avoid casting pointers to integer of a different size. */
4964 tree itype = signed_type_for (type);
4965 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4966 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4967 }
4968 else
4969 {
4970 t1 = fold_convert (fd->iter_type, t1);
4971 t0 = fold_convert (fd->iter_type, t0);
4972 }
4973 if (bias)
4974 {
4975 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4976 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4977 }
4978
4979 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4980 OMP_CLAUSE__LOOPTEMP_);
4981 gcc_assert (innerc);
4982 tree startvar = OMP_CLAUSE_DECL (innerc);
4983 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4984 gcc_assert (innerc);
4985 tree endvar = OMP_CLAUSE_DECL (innerc);
4986 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4987 {
4988 gcc_assert (innerc);
4989 for (i = 1; i < fd->collapse; i++)
4990 {
4991 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4992 OMP_CLAUSE__LOOPTEMP_);
4993 gcc_assert (innerc);
4994 }
4995 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4996 OMP_CLAUSE__LOOPTEMP_);
4997 if (innerc)
4998 {
4999 /* If needed (inner taskloop has lastprivate clause), propagate
5000 down the total number of iterations. */
5001 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
5002 NULL_TREE, false,
5003 GSI_CONTINUE_LINKING);
5004 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
5005 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5006 }
5007 }
5008
5009 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
5010 GSI_CONTINUE_LINKING);
5011 assign_stmt = gimple_build_assign (startvar, t0);
5012 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5013
5014 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
5015 GSI_CONTINUE_LINKING);
5016 assign_stmt = gimple_build_assign (endvar, t1);
5017 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5018 if (fd->collapse > 1)
5019 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5020
5021 /* Remove the GIMPLE_OMP_FOR statement. */
5022 gsi = gsi_for_stmt (for_stmt);
5023 gsi_remove (&gsi, true);
5024
5025 gsi = gsi_last_bb (cont_bb);
5026 gsi_remove (&gsi, true);
5027
5028 gsi = gsi_last_bb (exit_bb);
5029 gsi_remove (&gsi, true);
5030
5031 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5032 remove_edge (BRANCH_EDGE (entry_bb));
5033 FALLTHRU_EDGE (cont_bb)->probability = profile_probability::always ();
5034 remove_edge (BRANCH_EDGE (cont_bb));
5035 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
5036 set_immediate_dominator (CDI_DOMINATORS, region->entry,
5037 recompute_dominator (CDI_DOMINATORS, region->entry));
5038 }
5039
5040 /* Taskloop construct is represented after gimplification with
5041 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
5042 in between them. This routine expands the inner GIMPLE_OMP_FOR.
5043 GOMP_taskloop{,_ull} function arranges for each task to be given just
5044 a single range of iterations. */
5045
5046 static void
5047 expand_omp_taskloop_for_inner (struct omp_region *region,
5048 struct omp_for_data *fd,
5049 gimple *inner_stmt)
5050 {
5051 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
5052 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
5053 basic_block fin_bb;
5054 gimple_stmt_iterator gsi;
5055 edge ep;
5056 bool broken_loop = region->cont == NULL;
5057 tree *counts = NULL;
5058 tree n1, n2, step;
5059
5060 itype = type = TREE_TYPE (fd->loop.v);
5061 if (POINTER_TYPE_P (type))
5062 itype = signed_type_for (type);
5063
5064 /* See if we need to bias by LLONG_MIN. */
5065 if (fd->iter_type == long_long_unsigned_type_node
5066 && TREE_CODE (type) == INTEGER_TYPE
5067 && !TYPE_UNSIGNED (type))
5068 {
5069 tree n1, n2;
5070
5071 if (fd->loop.cond_code == LT_EXPR)
5072 {
5073 n1 = fd->loop.n1;
5074 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5075 }
5076 else
5077 {
5078 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5079 n2 = fd->loop.n1;
5080 }
5081 if (TREE_CODE (n1) != INTEGER_CST
5082 || TREE_CODE (n2) != INTEGER_CST
5083 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5084 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5085 }
5086
5087 entry_bb = region->entry;
5088 cont_bb = region->cont;
5089 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5090 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5091 gcc_assert (broken_loop
5092 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5093 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5094 if (!broken_loop)
5095 {
5096 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5097 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5098 }
5099 exit_bb = region->exit;
5100
5101 /* Iteration space partitioning goes in ENTRY_BB. */
5102 gsi = gsi_last_bb (entry_bb);
5103 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5104
5105 if (fd->collapse > 1)
5106 {
5107 int first_zero_iter = -1, dummy = -1;
5108 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5109
5110 counts = XALLOCAVEC (tree, fd->collapse);
5111 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5112 fin_bb, first_zero_iter,
5113 dummy_bb, dummy, l2_dom_bb);
5114 t = NULL_TREE;
5115 }
5116 else
5117 t = integer_one_node;
5118
5119 step = fd->loop.step;
5120 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5121 OMP_CLAUSE__LOOPTEMP_);
5122 gcc_assert (innerc);
5123 n1 = OMP_CLAUSE_DECL (innerc);
5124 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5125 gcc_assert (innerc);
5126 n2 = OMP_CLAUSE_DECL (innerc);
5127 if (bias)
5128 {
5129 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5130 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5131 }
5132 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5133 true, NULL_TREE, true, GSI_SAME_STMT);
5134 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5135 true, NULL_TREE, true, GSI_SAME_STMT);
5136 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5137 true, NULL_TREE, true, GSI_SAME_STMT);
5138
5139 tree startvar = fd->loop.v;
5140 tree endvar = NULL_TREE;
5141
5142 if (gimple_omp_for_combined_p (fd->for_stmt))
5143 {
5144 tree clauses = gimple_omp_for_clauses (inner_stmt);
5145 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5146 gcc_assert (innerc);
5147 startvar = OMP_CLAUSE_DECL (innerc);
5148 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5149 OMP_CLAUSE__LOOPTEMP_);
5150 gcc_assert (innerc);
5151 endvar = OMP_CLAUSE_DECL (innerc);
5152 }
5153 t = fold_convert (TREE_TYPE (startvar), n1);
5154 t = force_gimple_operand_gsi (&gsi, t,
5155 DECL_P (startvar)
5156 && TREE_ADDRESSABLE (startvar),
5157 NULL_TREE, false, GSI_CONTINUE_LINKING);
5158 gimple *assign_stmt = gimple_build_assign (startvar, t);
5159 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5160
5161 t = fold_convert (TREE_TYPE (startvar), n2);
5162 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5163 false, GSI_CONTINUE_LINKING);
5164 if (endvar)
5165 {
5166 assign_stmt = gimple_build_assign (endvar, e);
5167 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5168 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5169 assign_stmt = gimple_build_assign (fd->loop.v, e);
5170 else
5171 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5172 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5173 }
5174 if (fd->collapse > 1)
5175 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5176
5177 if (!broken_loop)
5178 {
5179 /* The code controlling the sequential loop replaces the
5180 GIMPLE_OMP_CONTINUE. */
5181 gsi = gsi_last_bb (cont_bb);
5182 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5183 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5184 vmain = gimple_omp_continue_control_use (cont_stmt);
5185 vback = gimple_omp_continue_control_def (cont_stmt);
5186
5187 if (!gimple_omp_for_combined_p (fd->for_stmt))
5188 {
5189 if (POINTER_TYPE_P (type))
5190 t = fold_build_pointer_plus (vmain, step);
5191 else
5192 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5193 t = force_gimple_operand_gsi (&gsi, t,
5194 DECL_P (vback)
5195 && TREE_ADDRESSABLE (vback),
5196 NULL_TREE, true, GSI_SAME_STMT);
5197 assign_stmt = gimple_build_assign (vback, t);
5198 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5199
5200 t = build2 (fd->loop.cond_code, boolean_type_node,
5201 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5202 ? t : vback, e);
5203 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5204 }
5205
5206 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5207 gsi_remove (&gsi, true);
5208
5209 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5210 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5211 }
5212
5213 /* Remove the GIMPLE_OMP_FOR statement. */
5214 gsi = gsi_for_stmt (fd->for_stmt);
5215 gsi_remove (&gsi, true);
5216
5217 /* Remove the GIMPLE_OMP_RETURN statement. */
5218 gsi = gsi_last_bb (exit_bb);
5219 gsi_remove (&gsi, true);
5220
5221 FALLTHRU_EDGE (entry_bb)->probability = profile_probability::always ();
5222 if (!broken_loop)
5223 remove_edge (BRANCH_EDGE (entry_bb));
5224 else
5225 {
5226 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5227 region->outer->cont = NULL;
5228 }
5229
5230 /* Connect all the blocks. */
5231 if (!broken_loop)
5232 {
5233 ep = find_edge (cont_bb, body_bb);
5234 if (gimple_omp_for_combined_p (fd->for_stmt))
5235 {
5236 remove_edge (ep);
5237 ep = NULL;
5238 }
5239 else if (fd->collapse > 1)
5240 {
5241 remove_edge (ep);
5242 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5243 }
5244 else
5245 ep->flags = EDGE_TRUE_VALUE;
5246 find_edge (cont_bb, fin_bb)->flags
5247 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5248 }
5249
5250 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5251 recompute_dominator (CDI_DOMINATORS, body_bb));
5252 if (!broken_loop)
5253 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5254 recompute_dominator (CDI_DOMINATORS, fin_bb));
5255
5256 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5257 {
5258 struct loop *loop = alloc_loop ();
5259 loop->header = body_bb;
5260 if (collapse_bb == NULL)
5261 loop->latch = cont_bb;
5262 add_loop (loop, body_bb->loop_father);
5263 }
5264 }
5265
5266 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5267 partitioned loop. The lowering here is abstracted, in that the
5268 loop parameters are passed through internal functions, which are
5269 further lowered by oacc_device_lower, once we get to the target
5270 compiler. The loop is of the form:
5271
5272 for (V = B; V LTGT E; V += S) {BODY}
5273
5274 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5275 (constant 0 for no chunking) and we will have a GWV partitioning
5276 mask, specifying dimensions over which the loop is to be
5277 partitioned (see note below). We generate code that looks like
5278 (this ignores tiling):
5279
5280 <entry_bb> [incoming FALL->body, BRANCH->exit]
5281 typedef signedintify (typeof (V)) T; // underlying signed integral type
5282 T range = E - B;
5283 T chunk_no = 0;
5284 T DIR = LTGT == '<' ? +1 : -1;
5285 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5286 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5287
5288 <head_bb> [created by splitting end of entry_bb]
5289 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5290 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5291 if (!(offset LTGT bound)) goto bottom_bb;
5292
5293 <body_bb> [incoming]
5294 V = B + offset;
5295 {BODY}
5296
5297 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5298 offset += step;
5299 if (offset LTGT bound) goto body_bb; [*]
5300
5301 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5302 chunk_no++;
5303 if (chunk < chunk_max) goto head_bb;
5304
5305 <exit_bb> [incoming]
5306 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5307
5308 [*] Needed if V live at end of loop. */
5309
5310 static void
5311 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5312 {
5313 tree v = fd->loop.v;
5314 enum tree_code cond_code = fd->loop.cond_code;
5315 enum tree_code plus_code = PLUS_EXPR;
5316
5317 tree chunk_size = integer_minus_one_node;
5318 tree gwv = integer_zero_node;
5319 tree iter_type = TREE_TYPE (v);
5320 tree diff_type = iter_type;
5321 tree plus_type = iter_type;
5322 struct oacc_collapse *counts = NULL;
5323
5324 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5325 == GF_OMP_FOR_KIND_OACC_LOOP);
5326 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5327 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5328
5329 if (POINTER_TYPE_P (iter_type))
5330 {
5331 plus_code = POINTER_PLUS_EXPR;
5332 plus_type = sizetype;
5333 }
5334 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5335 diff_type = signed_type_for (diff_type);
5336 if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
5337 diff_type = integer_type_node;
5338
5339 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5340 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5341 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5342 basic_block bottom_bb = NULL;
5343
5344 /* entry_bb has two sucessors; the branch edge is to the exit
5345 block, fallthrough edge to body. */
5346 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5347 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5348
5349 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5350 body_bb, or to a block whose only successor is the body_bb. Its
5351 fallthrough successor is the final block (same as the branch
5352 successor of the entry_bb). */
5353 if (cont_bb)
5354 {
5355 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5356 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5357
5358 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5359 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5360 }
5361 else
5362 gcc_assert (!gimple_in_ssa_p (cfun));
5363
5364 /* The exit block only has entry_bb and cont_bb as predecessors. */
5365 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5366
5367 tree chunk_no;
5368 tree chunk_max = NULL_TREE;
5369 tree bound, offset;
5370 tree step = create_tmp_var (diff_type, ".step");
5371 bool up = cond_code == LT_EXPR;
5372 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5373 bool chunking = !gimple_in_ssa_p (cfun);
5374 bool negating;
5375
5376 /* Tiling vars. */
5377 tree tile_size = NULL_TREE;
5378 tree element_s = NULL_TREE;
5379 tree e_bound = NULL_TREE, e_offset = NULL_TREE, e_step = NULL_TREE;
5380 basic_block elem_body_bb = NULL;
5381 basic_block elem_cont_bb = NULL;
5382
5383 /* SSA instances. */
5384 tree offset_incr = NULL_TREE;
5385 tree offset_init = NULL_TREE;
5386
5387 gimple_stmt_iterator gsi;
5388 gassign *ass;
5389 gcall *call;
5390 gimple *stmt;
5391 tree expr;
5392 location_t loc;
5393 edge split, be, fte;
5394
5395 /* Split the end of entry_bb to create head_bb. */
5396 split = split_block (entry_bb, last_stmt (entry_bb));
5397 basic_block head_bb = split->dest;
5398 entry_bb = split->src;
5399
5400 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5401 gsi = gsi_last_bb (entry_bb);
5402 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5403 loc = gimple_location (for_stmt);
5404
5405 if (gimple_in_ssa_p (cfun))
5406 {
5407 offset_init = gimple_omp_for_index (for_stmt, 0);
5408 gcc_assert (integer_zerop (fd->loop.n1));
5409 /* The SSA parallelizer does gang parallelism. */
5410 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5411 }
5412
5413 if (fd->collapse > 1 || fd->tiling)
5414 {
5415 gcc_assert (!gimple_in_ssa_p (cfun) && up);
5416 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5417 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5418 TREE_TYPE (fd->loop.n2), loc);
5419
5420 if (SSA_VAR_P (fd->loop.n2))
5421 {
5422 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5423 true, GSI_SAME_STMT);
5424 ass = gimple_build_assign (fd->loop.n2, total);
5425 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5426 }
5427 }
5428
5429 tree b = fd->loop.n1;
5430 tree e = fd->loop.n2;
5431 tree s = fd->loop.step;
5432
5433 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5434 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5435
5436 /* Convert the step, avoiding possible unsigned->signed overflow. */
5437 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5438 if (negating)
5439 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5440 s = fold_convert (diff_type, s);
5441 if (negating)
5442 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5443 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5444
5445 if (!chunking)
5446 chunk_size = integer_zero_node;
5447 expr = fold_convert (diff_type, chunk_size);
5448 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5449 NULL_TREE, true, GSI_SAME_STMT);
5450
5451 if (fd->tiling)
5452 {
5453 /* Determine the tile size and element step,
5454 modify the outer loop step size. */
5455 tile_size = create_tmp_var (diff_type, ".tile_size");
5456 expr = build_int_cst (diff_type, 1);
5457 for (int ix = 0; ix < fd->collapse; ix++)
5458 expr = fold_build2 (MULT_EXPR, diff_type, counts[ix].tile, expr);
5459 expr = force_gimple_operand_gsi (&gsi, expr, true,
5460 NULL_TREE, true, GSI_SAME_STMT);
5461 ass = gimple_build_assign (tile_size, expr);
5462 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5463
5464 element_s = create_tmp_var (diff_type, ".element_s");
5465 ass = gimple_build_assign (element_s, s);
5466 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5467
5468 expr = fold_build2 (MULT_EXPR, diff_type, s, tile_size);
5469 s = force_gimple_operand_gsi (&gsi, expr, true,
5470 NULL_TREE, true, GSI_SAME_STMT);
5471 }
5472
5473 /* Determine the range, avoiding possible unsigned->signed overflow. */
5474 negating = !up && TYPE_UNSIGNED (iter_type);
5475 expr = fold_build2 (MINUS_EXPR, plus_type,
5476 fold_convert (plus_type, negating ? b : e),
5477 fold_convert (plus_type, negating ? e : b));
5478 expr = fold_convert (diff_type, expr);
5479 if (negating)
5480 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5481 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5482 NULL_TREE, true, GSI_SAME_STMT);
5483
5484 chunk_no = build_int_cst (diff_type, 0);
5485 if (chunking)
5486 {
5487 gcc_assert (!gimple_in_ssa_p (cfun));
5488
5489 expr = chunk_no;
5490 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5491 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5492
5493 ass = gimple_build_assign (chunk_no, expr);
5494 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5495
5496 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5497 build_int_cst (integer_type_node,
5498 IFN_GOACC_LOOP_CHUNKS),
5499 dir, range, s, chunk_size, gwv);
5500 gimple_call_set_lhs (call, chunk_max);
5501 gimple_set_location (call, loc);
5502 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5503 }
5504 else
5505 chunk_size = chunk_no;
5506
5507 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5508 build_int_cst (integer_type_node,
5509 IFN_GOACC_LOOP_STEP),
5510 dir, range, s, chunk_size, gwv);
5511 gimple_call_set_lhs (call, step);
5512 gimple_set_location (call, loc);
5513 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5514
5515 /* Remove the GIMPLE_OMP_FOR. */
5516 gsi_remove (&gsi, true);
5517
5518 /* Fixup edges from head_bb. */
5519 be = BRANCH_EDGE (head_bb);
5520 fte = FALLTHRU_EDGE (head_bb);
5521 be->flags |= EDGE_FALSE_VALUE;
5522 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5523
5524 basic_block body_bb = fte->dest;
5525
5526 if (gimple_in_ssa_p (cfun))
5527 {
5528 gsi = gsi_last_bb (cont_bb);
5529 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5530
5531 offset = gimple_omp_continue_control_use (cont_stmt);
5532 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5533 }
5534 else
5535 {
5536 offset = create_tmp_var (diff_type, ".offset");
5537 offset_init = offset_incr = offset;
5538 }
5539 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5540
5541 /* Loop offset & bound go into head_bb. */
5542 gsi = gsi_start_bb (head_bb);
5543
5544 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5545 build_int_cst (integer_type_node,
5546 IFN_GOACC_LOOP_OFFSET),
5547 dir, range, s,
5548 chunk_size, gwv, chunk_no);
5549 gimple_call_set_lhs (call, offset_init);
5550 gimple_set_location (call, loc);
5551 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5552
5553 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5554 build_int_cst (integer_type_node,
5555 IFN_GOACC_LOOP_BOUND),
5556 dir, range, s,
5557 chunk_size, gwv, offset_init);
5558 gimple_call_set_lhs (call, bound);
5559 gimple_set_location (call, loc);
5560 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5561
5562 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5563 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5564 GSI_CONTINUE_LINKING);
5565
5566 /* V assignment goes into body_bb. */
5567 if (!gimple_in_ssa_p (cfun))
5568 {
5569 gsi = gsi_start_bb (body_bb);
5570
5571 expr = build2 (plus_code, iter_type, b,
5572 fold_convert (plus_type, offset));
5573 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5574 true, GSI_SAME_STMT);
5575 ass = gimple_build_assign (v, expr);
5576 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5577
5578 if (fd->collapse > 1 || fd->tiling)
5579 expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
5580
5581 if (fd->tiling)
5582 {
5583 /* Determine the range of the element loop -- usually simply
5584 the tile_size, but could be smaller if the final
5585 iteration of the outer loop is a partial tile. */
5586 tree e_range = create_tmp_var (diff_type, ".e_range");
5587
5588 expr = build2 (MIN_EXPR, diff_type,
5589 build2 (MINUS_EXPR, diff_type, bound, offset),
5590 build2 (MULT_EXPR, diff_type, tile_size,
5591 element_s));
5592 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5593 true, GSI_SAME_STMT);
5594 ass = gimple_build_assign (e_range, expr);
5595 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5596
5597 /* Determine bound, offset & step of inner loop. */
5598 e_bound = create_tmp_var (diff_type, ".e_bound");
5599 e_offset = create_tmp_var (diff_type, ".e_offset");
5600 e_step = create_tmp_var (diff_type, ".e_step");
5601
5602 /* Mark these as element loops. */
5603 tree t, e_gwv = integer_minus_one_node;
5604 tree chunk = build_int_cst (diff_type, 0); /* Never chunked. */
5605
5606 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_OFFSET);
5607 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5608 element_s, chunk, e_gwv, chunk);
5609 gimple_call_set_lhs (call, e_offset);
5610 gimple_set_location (call, loc);
5611 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5612
5613 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_BOUND);
5614 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7, t, dir, e_range,
5615 element_s, chunk, e_gwv, e_offset);
5616 gimple_call_set_lhs (call, e_bound);
5617 gimple_set_location (call, loc);
5618 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5619
5620 t = build_int_cst (integer_type_node, IFN_GOACC_LOOP_STEP);
5621 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6, t, dir, e_range,
5622 element_s, chunk, e_gwv);
5623 gimple_call_set_lhs (call, e_step);
5624 gimple_set_location (call, loc);
5625 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5626
5627 /* Add test and split block. */
5628 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5629 stmt = gimple_build_cond_empty (expr);
5630 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5631 split = split_block (body_bb, stmt);
5632 elem_body_bb = split->dest;
5633 if (cont_bb == body_bb)
5634 cont_bb = elem_body_bb;
5635 body_bb = split->src;
5636
5637 split->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5638
5639 /* Initialize the user's loop vars. */
5640 gsi = gsi_start_bb (elem_body_bb);
5641 expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
5642 }
5643 }
5644
5645 /* Loop increment goes into cont_bb. If this is not a loop, we
5646 will have spawned threads as if it was, and each one will
5647 execute one iteration. The specification is not explicit about
5648 whether such constructs are ill-formed or not, and they can
5649 occur, especially when noreturn routines are involved. */
5650 if (cont_bb)
5651 {
5652 gsi = gsi_last_bb (cont_bb);
5653 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5654 loc = gimple_location (cont_stmt);
5655
5656 if (fd->tiling)
5657 {
5658 /* Insert element loop increment and test. */
5659 expr = build2 (PLUS_EXPR, diff_type, e_offset, e_step);
5660 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5661 true, GSI_SAME_STMT);
5662 ass = gimple_build_assign (e_offset, expr);
5663 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5664 expr = build2 (cond_code, boolean_type_node, e_offset, e_bound);
5665
5666 stmt = gimple_build_cond_empty (expr);
5667 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5668 split = split_block (cont_bb, stmt);
5669 elem_cont_bb = split->src;
5670 cont_bb = split->dest;
5671
5672 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5673 split->probability = profile_probability::unlikely ().guessed ();
5674 edge latch_edge
5675 = make_edge (elem_cont_bb, elem_body_bb, EDGE_TRUE_VALUE);
5676 latch_edge->probability = profile_probability::likely ().guessed ();
5677
5678 edge skip_edge = make_edge (body_bb, cont_bb, EDGE_FALSE_VALUE);
5679 skip_edge->probability = profile_probability::unlikely ().guessed ();
5680 edge loop_entry_edge = EDGE_SUCC (body_bb, 1 - skip_edge->dest_idx);
5681 loop_entry_edge->probability
5682 = profile_probability::likely ().guessed ();
5683
5684 gsi = gsi_for_stmt (cont_stmt);
5685 }
5686
5687 /* Increment offset. */
5688 if (gimple_in_ssa_p (cfun))
5689 expr = build2 (plus_code, iter_type, offset,
5690 fold_convert (plus_type, step));
5691 else
5692 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5693 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5694 true, GSI_SAME_STMT);
5695 ass = gimple_build_assign (offset_incr, expr);
5696 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5697 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5698 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5699
5700 /* Remove the GIMPLE_OMP_CONTINUE. */
5701 gsi_remove (&gsi, true);
5702
5703 /* Fixup edges from cont_bb. */
5704 be = BRANCH_EDGE (cont_bb);
5705 fte = FALLTHRU_EDGE (cont_bb);
5706 be->flags |= EDGE_TRUE_VALUE;
5707 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5708
5709 if (chunking)
5710 {
5711 /* Split the beginning of exit_bb to make bottom_bb. We
5712 need to insert a nop at the start, because splitting is
5713 after a stmt, not before. */
5714 gsi = gsi_start_bb (exit_bb);
5715 stmt = gimple_build_nop ();
5716 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5717 split = split_block (exit_bb, stmt);
5718 bottom_bb = split->src;
5719 exit_bb = split->dest;
5720 gsi = gsi_last_bb (bottom_bb);
5721
5722 /* Chunk increment and test goes into bottom_bb. */
5723 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5724 build_int_cst (diff_type, 1));
5725 ass = gimple_build_assign (chunk_no, expr);
5726 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5727
5728 /* Chunk test at end of bottom_bb. */
5729 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5730 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5731 GSI_CONTINUE_LINKING);
5732
5733 /* Fixup edges from bottom_bb. */
5734 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5735 split->probability = profile_probability::unlikely ().guessed ();
5736 edge latch_edge = make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5737 latch_edge->probability = profile_probability::likely ().guessed ();
5738 }
5739 }
5740
5741 gsi = gsi_last_bb (exit_bb);
5742 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5743 loc = gimple_location (gsi_stmt (gsi));
5744
5745 if (!gimple_in_ssa_p (cfun))
5746 {
5747 /* Insert the final value of V, in case it is live. This is the
5748 value for the only thread that survives past the join. */
5749 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5750 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5751 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5752 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5753 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5754 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5755 true, GSI_SAME_STMT);
5756 ass = gimple_build_assign (v, expr);
5757 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5758 }
5759
5760 /* Remove the OMP_RETURN. */
5761 gsi_remove (&gsi, true);
5762
5763 if (cont_bb)
5764 {
5765 /* We now have one, two or three nested loops. Update the loop
5766 structures. */
5767 struct loop *parent = entry_bb->loop_father;
5768 struct loop *body = body_bb->loop_father;
5769
5770 if (chunking)
5771 {
5772 struct loop *chunk_loop = alloc_loop ();
5773 chunk_loop->header = head_bb;
5774 chunk_loop->latch = bottom_bb;
5775 add_loop (chunk_loop, parent);
5776 parent = chunk_loop;
5777 }
5778 else if (parent != body)
5779 {
5780 gcc_assert (body->header == body_bb);
5781 gcc_assert (body->latch == cont_bb
5782 || single_pred (body->latch) == cont_bb);
5783 parent = NULL;
5784 }
5785
5786 if (parent)
5787 {
5788 struct loop *body_loop = alloc_loop ();
5789 body_loop->header = body_bb;
5790 body_loop->latch = cont_bb;
5791 add_loop (body_loop, parent);
5792
5793 if (fd->tiling)
5794 {
5795 /* Insert tiling's element loop. */
5796 struct loop *inner_loop = alloc_loop ();
5797 inner_loop->header = elem_body_bb;
5798 inner_loop->latch = elem_cont_bb;
5799 add_loop (inner_loop, body_loop);
5800 }
5801 }
5802 }
5803 }
5804
5805 /* Expand the OMP loop defined by REGION. */
5806
5807 static void
5808 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5809 {
5810 struct omp_for_data fd;
5811 struct omp_for_data_loop *loops;
5812
5813 loops
5814 = (struct omp_for_data_loop *)
5815 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5816 * sizeof (struct omp_for_data_loop));
5817 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5818 &fd, loops);
5819 region->sched_kind = fd.sched_kind;
5820 region->sched_modifiers = fd.sched_modifiers;
5821
5822 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5823 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5824 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5825 if (region->cont)
5826 {
5827 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5828 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5829 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5830 }
5831 else
5832 /* If there isn't a continue then this is a degerate case where
5833 the introduction of abnormal edges during lowering will prevent
5834 original loops from being detected. Fix that up. */
5835 loops_state_set (LOOPS_NEED_FIXUP);
5836
5837 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5838 expand_omp_simd (region, &fd);
5839 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5840 expand_cilk_for (region, &fd);
5841 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5842 {
5843 gcc_assert (!inner_stmt);
5844 expand_oacc_for (region, &fd);
5845 }
5846 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5847 {
5848 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5849 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5850 else
5851 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5852 }
5853 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5854 && !fd.have_ordered)
5855 {
5856 if (fd.chunk_size == NULL)
5857 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5858 else
5859 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5860 }
5861 else
5862 {
5863 int fn_index, start_ix, next_ix;
5864
5865 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5866 == GF_OMP_FOR_KIND_FOR);
5867 if (fd.chunk_size == NULL
5868 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5869 fd.chunk_size = integer_zero_node;
5870 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5871 switch (fd.sched_kind)
5872 {
5873 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5874 fn_index = 3;
5875 break;
5876 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5877 case OMP_CLAUSE_SCHEDULE_GUIDED:
5878 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5879 && !fd.ordered
5880 && !fd.have_ordered)
5881 {
5882 fn_index = 3 + fd.sched_kind;
5883 break;
5884 }
5885 /* FALLTHRU */
5886 default:
5887 fn_index = fd.sched_kind;
5888 break;
5889 }
5890 if (!fd.ordered)
5891 fn_index += fd.have_ordered * 6;
5892 if (fd.ordered)
5893 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5894 else
5895 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5896 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5897 if (fd.iter_type == long_long_unsigned_type_node)
5898 {
5899 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5900 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5901 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5902 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5903 }
5904 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5905 (enum built_in_function) next_ix, inner_stmt);
5906 }
5907
5908 if (gimple_in_ssa_p (cfun))
5909 update_ssa (TODO_update_ssa_only_virtuals);
5910 }
5911
5912 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5913
5914 v = GOMP_sections_start (n);
5915 L0:
5916 switch (v)
5917 {
5918 case 0:
5919 goto L2;
5920 case 1:
5921 section 1;
5922 goto L1;
5923 case 2:
5924 ...
5925 case n:
5926 ...
5927 default:
5928 abort ();
5929 }
5930 L1:
5931 v = GOMP_sections_next ();
5932 goto L0;
5933 L2:
5934 reduction;
5935
5936 If this is a combined parallel sections, replace the call to
5937 GOMP_sections_start with call to GOMP_sections_next. */
5938
5939 static void
5940 expand_omp_sections (struct omp_region *region)
5941 {
5942 tree t, u, vin = NULL, vmain, vnext, l2;
5943 unsigned len;
5944 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5945 gimple_stmt_iterator si, switch_si;
5946 gomp_sections *sections_stmt;
5947 gimple *stmt;
5948 gomp_continue *cont;
5949 edge_iterator ei;
5950 edge e;
5951 struct omp_region *inner;
5952 unsigned i, casei;
5953 bool exit_reachable = region->cont != NULL;
5954
5955 gcc_assert (region->exit != NULL);
5956 entry_bb = region->entry;
5957 l0_bb = single_succ (entry_bb);
5958 l1_bb = region->cont;
5959 l2_bb = region->exit;
5960 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5961 l2 = gimple_block_label (l2_bb);
5962 else
5963 {
5964 /* This can happen if there are reductions. */
5965 len = EDGE_COUNT (l0_bb->succs);
5966 gcc_assert (len > 0);
5967 e = EDGE_SUCC (l0_bb, len - 1);
5968 si = gsi_last_bb (e->dest);
5969 l2 = NULL_TREE;
5970 if (gsi_end_p (si)
5971 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5972 l2 = gimple_block_label (e->dest);
5973 else
5974 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5975 {
5976 si = gsi_last_bb (e->dest);
5977 if (gsi_end_p (si)
5978 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5979 {
5980 l2 = gimple_block_label (e->dest);
5981 break;
5982 }
5983 }
5984 }
5985 if (exit_reachable)
5986 default_bb = create_empty_bb (l1_bb->prev_bb);
5987 else
5988 default_bb = create_empty_bb (l0_bb);
5989
5990 /* We will build a switch() with enough cases for all the
5991 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5992 and a default case to abort if something goes wrong. */
5993 len = EDGE_COUNT (l0_bb->succs);
5994
5995 /* Use vec::quick_push on label_vec throughout, since we know the size
5996 in advance. */
5997 auto_vec<tree> label_vec (len);
5998
5999 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
6000 GIMPLE_OMP_SECTIONS statement. */
6001 si = gsi_last_bb (entry_bb);
6002 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
6003 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
6004 vin = gimple_omp_sections_control (sections_stmt);
6005 if (!is_combined_parallel (region))
6006 {
6007 /* If we are not inside a combined parallel+sections region,
6008 call GOMP_sections_start. */
6009 t = build_int_cst (unsigned_type_node, len - 1);
6010 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
6011 stmt = gimple_build_call (u, 1, t);
6012 }
6013 else
6014 {
6015 /* Otherwise, call GOMP_sections_next. */
6016 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6017 stmt = gimple_build_call (u, 0);
6018 }
6019 gimple_call_set_lhs (stmt, vin);
6020 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6021 gsi_remove (&si, true);
6022
6023 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
6024 L0_BB. */
6025 switch_si = gsi_last_bb (l0_bb);
6026 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
6027 if (exit_reachable)
6028 {
6029 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
6030 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
6031 vmain = gimple_omp_continue_control_use (cont);
6032 vnext = gimple_omp_continue_control_def (cont);
6033 }
6034 else
6035 {
6036 vmain = vin;
6037 vnext = NULL_TREE;
6038 }
6039
6040 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
6041 label_vec.quick_push (t);
6042 i = 1;
6043
6044 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
6045 for (inner = region->inner, casei = 1;
6046 inner;
6047 inner = inner->next, i++, casei++)
6048 {
6049 basic_block s_entry_bb, s_exit_bb;
6050
6051 /* Skip optional reduction region. */
6052 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
6053 {
6054 --i;
6055 --casei;
6056 continue;
6057 }
6058
6059 s_entry_bb = inner->entry;
6060 s_exit_bb = inner->exit;
6061
6062 t = gimple_block_label (s_entry_bb);
6063 u = build_int_cst (unsigned_type_node, casei);
6064 u = build_case_label (u, NULL, t);
6065 label_vec.quick_push (u);
6066
6067 si = gsi_last_bb (s_entry_bb);
6068 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
6069 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
6070 gsi_remove (&si, true);
6071 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
6072
6073 if (s_exit_bb == NULL)
6074 continue;
6075
6076 si = gsi_last_bb (s_exit_bb);
6077 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6078 gsi_remove (&si, true);
6079
6080 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
6081 }
6082
6083 /* Error handling code goes in DEFAULT_BB. */
6084 t = gimple_block_label (default_bb);
6085 u = build_case_label (NULL, NULL, t);
6086 make_edge (l0_bb, default_bb, 0);
6087 add_bb_to_loop (default_bb, current_loops->tree_root);
6088
6089 stmt = gimple_build_switch (vmain, u, label_vec);
6090 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
6091 gsi_remove (&switch_si, true);
6092
6093 si = gsi_start_bb (default_bb);
6094 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
6095 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
6096
6097 if (exit_reachable)
6098 {
6099 tree bfn_decl;
6100
6101 /* Code to get the next section goes in L1_BB. */
6102 si = gsi_last_bb (l1_bb);
6103 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
6104
6105 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
6106 stmt = gimple_build_call (bfn_decl, 0);
6107 gimple_call_set_lhs (stmt, vnext);
6108 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6109 gsi_remove (&si, true);
6110
6111 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
6112 }
6113
6114 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
6115 si = gsi_last_bb (l2_bb);
6116 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
6117 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
6118 else if (gimple_omp_return_lhs (gsi_stmt (si)))
6119 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
6120 else
6121 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
6122 stmt = gimple_build_call (t, 0);
6123 if (gimple_omp_return_lhs (gsi_stmt (si)))
6124 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
6125 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
6126 gsi_remove (&si, true);
6127
6128 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
6129 }
6130
6131 /* Expand code for an OpenMP single directive. We've already expanded
6132 much of the code, here we simply place the GOMP_barrier call. */
6133
6134 static void
6135 expand_omp_single (struct omp_region *region)
6136 {
6137 basic_block entry_bb, exit_bb;
6138 gimple_stmt_iterator si;
6139
6140 entry_bb = region->entry;
6141 exit_bb = region->exit;
6142
6143 si = gsi_last_bb (entry_bb);
6144 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
6145 gsi_remove (&si, true);
6146 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6147
6148 si = gsi_last_bb (exit_bb);
6149 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
6150 {
6151 tree t = gimple_omp_return_lhs (gsi_stmt (si));
6152 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
6153 }
6154 gsi_remove (&si, true);
6155 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6156 }
6157
6158 /* Generic expansion for OpenMP synchronization directives: master,
6159 ordered and critical. All we need to do here is remove the entry
6160 and exit markers for REGION. */
6161
6162 static void
6163 expand_omp_synch (struct omp_region *region)
6164 {
6165 basic_block entry_bb, exit_bb;
6166 gimple_stmt_iterator si;
6167
6168 entry_bb = region->entry;
6169 exit_bb = region->exit;
6170
6171 si = gsi_last_bb (entry_bb);
6172 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
6173 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
6174 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
6175 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
6176 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
6177 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
6178 gsi_remove (&si, true);
6179 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
6180
6181 if (exit_bb)
6182 {
6183 si = gsi_last_bb (exit_bb);
6184 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
6185 gsi_remove (&si, true);
6186 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
6187 }
6188 }
6189
6190 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6191 operation as a normal volatile load. */
6192
6193 static bool
6194 expand_omp_atomic_load (basic_block load_bb, tree addr,
6195 tree loaded_val, int index)
6196 {
6197 enum built_in_function tmpbase;
6198 gimple_stmt_iterator gsi;
6199 basic_block store_bb;
6200 location_t loc;
6201 gimple *stmt;
6202 tree decl, call, type, itype;
6203
6204 gsi = gsi_last_bb (load_bb);
6205 stmt = gsi_stmt (gsi);
6206 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6207 loc = gimple_location (stmt);
6208
6209 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6210 is smaller than word size, then expand_atomic_load assumes that the load
6211 is atomic. We could avoid the builtin entirely in this case. */
6212
6213 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6214 decl = builtin_decl_explicit (tmpbase);
6215 if (decl == NULL_TREE)
6216 return false;
6217
6218 type = TREE_TYPE (loaded_val);
6219 itype = TREE_TYPE (TREE_TYPE (decl));
6220
6221 call = build_call_expr_loc (loc, decl, 2, addr,
6222 build_int_cst (NULL,
6223 gimple_omp_atomic_seq_cst_p (stmt)
6224 ? MEMMODEL_SEQ_CST
6225 : MEMMODEL_RELAXED));
6226 if (!useless_type_conversion_p (type, itype))
6227 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6228 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6229
6230 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6231 gsi_remove (&gsi, true);
6232
6233 store_bb = single_succ (load_bb);
6234 gsi = gsi_last_bb (store_bb);
6235 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6236 gsi_remove (&gsi, true);
6237
6238 if (gimple_in_ssa_p (cfun))
6239 update_ssa (TODO_update_ssa_no_phi);
6240
6241 return true;
6242 }
6243
6244 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6245 operation as a normal volatile store. */
6246
6247 static bool
6248 expand_omp_atomic_store (basic_block load_bb, tree addr,
6249 tree loaded_val, tree stored_val, int index)
6250 {
6251 enum built_in_function tmpbase;
6252 gimple_stmt_iterator gsi;
6253 basic_block store_bb = single_succ (load_bb);
6254 location_t loc;
6255 gimple *stmt;
6256 tree decl, call, type, itype;
6257 machine_mode imode;
6258 bool exchange;
6259
6260 gsi = gsi_last_bb (load_bb);
6261 stmt = gsi_stmt (gsi);
6262 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6263
6264 /* If the load value is needed, then this isn't a store but an exchange. */
6265 exchange = gimple_omp_atomic_need_value_p (stmt);
6266
6267 gsi = gsi_last_bb (store_bb);
6268 stmt = gsi_stmt (gsi);
6269 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6270 loc = gimple_location (stmt);
6271
6272 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6273 is smaller than word size, then expand_atomic_store assumes that the store
6274 is atomic. We could avoid the builtin entirely in this case. */
6275
6276 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6277 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6278 decl = builtin_decl_explicit (tmpbase);
6279 if (decl == NULL_TREE)
6280 return false;
6281
6282 type = TREE_TYPE (stored_val);
6283
6284 /* Dig out the type of the function's second argument. */
6285 itype = TREE_TYPE (decl);
6286 itype = TYPE_ARG_TYPES (itype);
6287 itype = TREE_CHAIN (itype);
6288 itype = TREE_VALUE (itype);
6289 imode = TYPE_MODE (itype);
6290
6291 if (exchange && !can_atomic_exchange_p (imode, true))
6292 return false;
6293
6294 if (!useless_type_conversion_p (itype, type))
6295 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6296 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6297 build_int_cst (NULL,
6298 gimple_omp_atomic_seq_cst_p (stmt)
6299 ? MEMMODEL_SEQ_CST
6300 : MEMMODEL_RELAXED));
6301 if (exchange)
6302 {
6303 if (!useless_type_conversion_p (type, itype))
6304 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6305 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6306 }
6307
6308 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6309 gsi_remove (&gsi, true);
6310
6311 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6312 gsi = gsi_last_bb (load_bb);
6313 gsi_remove (&gsi, true);
6314
6315 if (gimple_in_ssa_p (cfun))
6316 update_ssa (TODO_update_ssa_no_phi);
6317
6318 return true;
6319 }
6320
6321 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6322 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6323 size of the data type, and thus usable to find the index of the builtin
6324 decl. Returns false if the expression is not of the proper form. */
6325
6326 static bool
6327 expand_omp_atomic_fetch_op (basic_block load_bb,
6328 tree addr, tree loaded_val,
6329 tree stored_val, int index)
6330 {
6331 enum built_in_function oldbase, newbase, tmpbase;
6332 tree decl, itype, call;
6333 tree lhs, rhs;
6334 basic_block store_bb = single_succ (load_bb);
6335 gimple_stmt_iterator gsi;
6336 gimple *stmt;
6337 location_t loc;
6338 enum tree_code code;
6339 bool need_old, need_new;
6340 machine_mode imode;
6341 bool seq_cst;
6342
6343 /* We expect to find the following sequences:
6344
6345 load_bb:
6346 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6347
6348 store_bb:
6349 val = tmp OP something; (or: something OP tmp)
6350 GIMPLE_OMP_STORE (val)
6351
6352 ???FIXME: Allow a more flexible sequence.
6353 Perhaps use data flow to pick the statements.
6354
6355 */
6356
6357 gsi = gsi_after_labels (store_bb);
6358 stmt = gsi_stmt (gsi);
6359 loc = gimple_location (stmt);
6360 if (!is_gimple_assign (stmt))
6361 return false;
6362 gsi_next (&gsi);
6363 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6364 return false;
6365 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6366 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6367 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6368 gcc_checking_assert (!need_old || !need_new);
6369
6370 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6371 return false;
6372
6373 /* Check for one of the supported fetch-op operations. */
6374 code = gimple_assign_rhs_code (stmt);
6375 switch (code)
6376 {
6377 case PLUS_EXPR:
6378 case POINTER_PLUS_EXPR:
6379 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6380 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6381 break;
6382 case MINUS_EXPR:
6383 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6384 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6385 break;
6386 case BIT_AND_EXPR:
6387 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6388 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6389 break;
6390 case BIT_IOR_EXPR:
6391 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6392 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6393 break;
6394 case BIT_XOR_EXPR:
6395 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6396 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6397 break;
6398 default:
6399 return false;
6400 }
6401
6402 /* Make sure the expression is of the proper form. */
6403 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6404 rhs = gimple_assign_rhs2 (stmt);
6405 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6406 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6407 rhs = gimple_assign_rhs1 (stmt);
6408 else
6409 return false;
6410
6411 tmpbase = ((enum built_in_function)
6412 ((need_new ? newbase : oldbase) + index + 1));
6413 decl = builtin_decl_explicit (tmpbase);
6414 if (decl == NULL_TREE)
6415 return false;
6416 itype = TREE_TYPE (TREE_TYPE (decl));
6417 imode = TYPE_MODE (itype);
6418
6419 /* We could test all of the various optabs involved, but the fact of the
6420 matter is that (with the exception of i486 vs i586 and xadd) all targets
6421 that support any atomic operaton optab also implements compare-and-swap.
6422 Let optabs.c take care of expanding any compare-and-swap loop. */
6423 if (!can_compare_and_swap_p (imode, true) || !can_atomic_load_p (imode))
6424 return false;
6425
6426 gsi = gsi_last_bb (load_bb);
6427 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6428
6429 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6430 It only requires that the operation happen atomically. Thus we can
6431 use the RELAXED memory model. */
6432 call = build_call_expr_loc (loc, decl, 3, addr,
6433 fold_convert_loc (loc, itype, rhs),
6434 build_int_cst (NULL,
6435 seq_cst ? MEMMODEL_SEQ_CST
6436 : MEMMODEL_RELAXED));
6437
6438 if (need_old || need_new)
6439 {
6440 lhs = need_old ? loaded_val : stored_val;
6441 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6442 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6443 }
6444 else
6445 call = fold_convert_loc (loc, void_type_node, call);
6446 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6447 gsi_remove (&gsi, true);
6448
6449 gsi = gsi_last_bb (store_bb);
6450 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6451 gsi_remove (&gsi, true);
6452 gsi = gsi_last_bb (store_bb);
6453 stmt = gsi_stmt (gsi);
6454 gsi_remove (&gsi, true);
6455
6456 if (gimple_in_ssa_p (cfun))
6457 {
6458 release_defs (stmt);
6459 update_ssa (TODO_update_ssa_no_phi);
6460 }
6461
6462 return true;
6463 }
6464
6465 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6466
6467 oldval = *addr;
6468 repeat:
6469 newval = rhs; // with oldval replacing *addr in rhs
6470 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6471 if (oldval != newval)
6472 goto repeat;
6473
6474 INDEX is log2 of the size of the data type, and thus usable to find the
6475 index of the builtin decl. */
6476
6477 static bool
6478 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6479 tree addr, tree loaded_val, tree stored_val,
6480 int index)
6481 {
6482 tree loadedi, storedi, initial, new_storedi, old_vali;
6483 tree type, itype, cmpxchg, iaddr;
6484 gimple_stmt_iterator si;
6485 basic_block loop_header = single_succ (load_bb);
6486 gimple *phi, *stmt;
6487 edge e;
6488 enum built_in_function fncode;
6489
6490 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6491 order to use the RELAXED memory model effectively. */
6492 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6493 + index + 1);
6494 cmpxchg = builtin_decl_explicit (fncode);
6495 if (cmpxchg == NULL_TREE)
6496 return false;
6497 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6498 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6499
6500 if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
6501 || !can_atomic_load_p (TYPE_MODE (itype)))
6502 return false;
6503
6504 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6505 si = gsi_last_bb (load_bb);
6506 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6507
6508 /* For floating-point values, we'll need to view-convert them to integers
6509 so that we can perform the atomic compare and swap. Simplify the
6510 following code by always setting up the "i"ntegral variables. */
6511 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6512 {
6513 tree iaddr_val;
6514
6515 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6516 true));
6517 iaddr_val
6518 = force_gimple_operand_gsi (&si,
6519 fold_convert (TREE_TYPE (iaddr), addr),
6520 false, NULL_TREE, true, GSI_SAME_STMT);
6521 stmt = gimple_build_assign (iaddr, iaddr_val);
6522 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6523 loadedi = create_tmp_var (itype);
6524 if (gimple_in_ssa_p (cfun))
6525 loadedi = make_ssa_name (loadedi);
6526 }
6527 else
6528 {
6529 iaddr = addr;
6530 loadedi = loaded_val;
6531 }
6532
6533 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6534 tree loaddecl = builtin_decl_explicit (fncode);
6535 if (loaddecl)
6536 initial
6537 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6538 build_call_expr (loaddecl, 2, iaddr,
6539 build_int_cst (NULL_TREE,
6540 MEMMODEL_RELAXED)));
6541 else
6542 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6543 build_int_cst (TREE_TYPE (iaddr), 0));
6544
6545 initial
6546 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6547 GSI_SAME_STMT);
6548
6549 /* Move the value to the LOADEDI temporary. */
6550 if (gimple_in_ssa_p (cfun))
6551 {
6552 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6553 phi = create_phi_node (loadedi, loop_header);
6554 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6555 initial);
6556 }
6557 else
6558 gsi_insert_before (&si,
6559 gimple_build_assign (loadedi, initial),
6560 GSI_SAME_STMT);
6561 if (loadedi != loaded_val)
6562 {
6563 gimple_stmt_iterator gsi2;
6564 tree x;
6565
6566 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6567 gsi2 = gsi_start_bb (loop_header);
6568 if (gimple_in_ssa_p (cfun))
6569 {
6570 gassign *stmt;
6571 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6572 true, GSI_SAME_STMT);
6573 stmt = gimple_build_assign (loaded_val, x);
6574 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6575 }
6576 else
6577 {
6578 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6579 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6580 true, GSI_SAME_STMT);
6581 }
6582 }
6583 gsi_remove (&si, true);
6584
6585 si = gsi_last_bb (store_bb);
6586 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6587
6588 if (iaddr == addr)
6589 storedi = stored_val;
6590 else
6591 storedi
6592 = force_gimple_operand_gsi (&si,
6593 build1 (VIEW_CONVERT_EXPR, itype,
6594 stored_val), true, NULL_TREE, true,
6595 GSI_SAME_STMT);
6596
6597 /* Build the compare&swap statement. */
6598 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6599 new_storedi = force_gimple_operand_gsi (&si,
6600 fold_convert (TREE_TYPE (loadedi),
6601 new_storedi),
6602 true, NULL_TREE,
6603 true, GSI_SAME_STMT);
6604
6605 if (gimple_in_ssa_p (cfun))
6606 old_vali = loadedi;
6607 else
6608 {
6609 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6610 stmt = gimple_build_assign (old_vali, loadedi);
6611 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6612
6613 stmt = gimple_build_assign (loadedi, new_storedi);
6614 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6615 }
6616
6617 /* Note that we always perform the comparison as an integer, even for
6618 floating point. This allows the atomic operation to properly
6619 succeed even with NaNs and -0.0. */
6620 tree ne = build2 (NE_EXPR, boolean_type_node, new_storedi, old_vali);
6621 stmt = gimple_build_cond_empty (ne);
6622 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6623
6624 /* Update cfg. */
6625 e = single_succ_edge (store_bb);
6626 e->flags &= ~EDGE_FALLTHRU;
6627 e->flags |= EDGE_FALSE_VALUE;
6628 /* Expect no looping. */
6629 e->probability = profile_probability::guessed_always ();
6630
6631 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6632 e->probability = profile_probability::guessed_never ();
6633
6634 /* Copy the new value to loadedi (we already did that before the condition
6635 if we are not in SSA). */
6636 if (gimple_in_ssa_p (cfun))
6637 {
6638 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6639 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6640 }
6641
6642 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6643 gsi_remove (&si, true);
6644
6645 struct loop *loop = alloc_loop ();
6646 loop->header = loop_header;
6647 loop->latch = store_bb;
6648 add_loop (loop, loop_header->loop_father);
6649
6650 if (gimple_in_ssa_p (cfun))
6651 update_ssa (TODO_update_ssa_no_phi);
6652
6653 return true;
6654 }
6655
6656 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6657
6658 GOMP_atomic_start ();
6659 *addr = rhs;
6660 GOMP_atomic_end ();
6661
6662 The result is not globally atomic, but works so long as all parallel
6663 references are within #pragma omp atomic directives. According to
6664 responses received from omp@openmp.org, appears to be within spec.
6665 Which makes sense, since that's how several other compilers handle
6666 this situation as well.
6667 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6668 expanding. STORED_VAL is the operand of the matching
6669 GIMPLE_OMP_ATOMIC_STORE.
6670
6671 We replace
6672 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6673 loaded_val = *addr;
6674
6675 and replace
6676 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6677 *addr = stored_val;
6678 */
6679
6680 static bool
6681 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6682 tree addr, tree loaded_val, tree stored_val)
6683 {
6684 gimple_stmt_iterator si;
6685 gassign *stmt;
6686 tree t;
6687
6688 si = gsi_last_bb (load_bb);
6689 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6690
6691 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6692 t = build_call_expr (t, 0);
6693 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6694
6695 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6696 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6697 gsi_remove (&si, true);
6698
6699 si = gsi_last_bb (store_bb);
6700 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6701
6702 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6703 stored_val);
6704 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6705
6706 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6707 t = build_call_expr (t, 0);
6708 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6709 gsi_remove (&si, true);
6710
6711 if (gimple_in_ssa_p (cfun))
6712 update_ssa (TODO_update_ssa_no_phi);
6713 return true;
6714 }
6715
6716 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6717 using expand_omp_atomic_fetch_op. If it failed, we try to
6718 call expand_omp_atomic_pipeline, and if it fails too, the
6719 ultimate fallback is wrapping the operation in a mutex
6720 (expand_omp_atomic_mutex). REGION is the atomic region built
6721 by build_omp_regions_1(). */
6722
6723 static void
6724 expand_omp_atomic (struct omp_region *region)
6725 {
6726 basic_block load_bb = region->entry, store_bb = region->exit;
6727 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6728 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6729 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6730 tree addr = gimple_omp_atomic_load_rhs (load);
6731 tree stored_val = gimple_omp_atomic_store_val (store);
6732 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6733 HOST_WIDE_INT index;
6734
6735 /* Make sure the type is one of the supported sizes. */
6736 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6737 index = exact_log2 (index);
6738 if (index >= 0 && index <= 4)
6739 {
6740 unsigned int align = TYPE_ALIGN_UNIT (type);
6741
6742 /* __sync builtins require strict data alignment. */
6743 if (exact_log2 (align) >= index)
6744 {
6745 /* Atomic load. */
6746 scalar_mode smode;
6747 if (loaded_val == stored_val
6748 && (is_int_mode (TYPE_MODE (type), &smode)
6749 || is_float_mode (TYPE_MODE (type), &smode))
6750 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6751 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6752 return;
6753
6754 /* Atomic store. */
6755 if ((is_int_mode (TYPE_MODE (type), &smode)
6756 || is_float_mode (TYPE_MODE (type), &smode))
6757 && GET_MODE_BITSIZE (smode) <= BITS_PER_WORD
6758 && store_bb == single_succ (load_bb)
6759 && first_stmt (store_bb) == store
6760 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6761 stored_val, index))
6762 return;
6763
6764 /* When possible, use specialized atomic update functions. */
6765 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6766 && store_bb == single_succ (load_bb)
6767 && expand_omp_atomic_fetch_op (load_bb, addr,
6768 loaded_val, stored_val, index))
6769 return;
6770
6771 /* If we don't have specialized __sync builtins, try and implement
6772 as a compare and swap loop. */
6773 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6774 loaded_val, stored_val, index))
6775 return;
6776 }
6777 }
6778
6779 /* The ultimate fallback is wrapping the operation in a mutex. */
6780 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6781 }
6782
6783 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6784 at REGION_EXIT. */
6785
6786 static void
6787 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6788 basic_block region_exit)
6789 {
6790 struct loop *outer = region_entry->loop_father;
6791 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6792
6793 /* Don't parallelize the kernels region if it contains more than one outer
6794 loop. */
6795 unsigned int nr_outer_loops = 0;
6796 struct loop *single_outer = NULL;
6797 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6798 {
6799 gcc_assert (loop_outer (loop) == outer);
6800
6801 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6802 continue;
6803
6804 if (region_exit != NULL
6805 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6806 continue;
6807
6808 nr_outer_loops++;
6809 single_outer = loop;
6810 }
6811 if (nr_outer_loops != 1)
6812 return;
6813
6814 for (struct loop *loop = single_outer->inner;
6815 loop != NULL;
6816 loop = loop->inner)
6817 if (loop->next)
6818 return;
6819
6820 /* Mark the loops in the region. */
6821 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6822 loop->in_oacc_kernels_region = true;
6823 }
6824
6825 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6826
6827 struct GTY(()) grid_launch_attributes_trees
6828 {
6829 tree kernel_dim_array_type;
6830 tree kernel_lattrs_dimnum_decl;
6831 tree kernel_lattrs_grid_decl;
6832 tree kernel_lattrs_group_decl;
6833 tree kernel_launch_attributes_type;
6834 };
6835
6836 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6837
6838 /* Create types used to pass kernel launch attributes to target. */
6839
6840 static void
6841 grid_create_kernel_launch_attr_types (void)
6842 {
6843 if (grid_attr_trees)
6844 return;
6845 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6846
6847 tree dim_arr_index_type
6848 = build_index_type (build_int_cst (integer_type_node, 2));
6849 grid_attr_trees->kernel_dim_array_type
6850 = build_array_type (uint32_type_node, dim_arr_index_type);
6851
6852 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6853 grid_attr_trees->kernel_lattrs_dimnum_decl
6854 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6855 uint32_type_node);
6856 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6857
6858 grid_attr_trees->kernel_lattrs_grid_decl
6859 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6860 grid_attr_trees->kernel_dim_array_type);
6861 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6862 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6863 grid_attr_trees->kernel_lattrs_group_decl
6864 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6865 grid_attr_trees->kernel_dim_array_type);
6866 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6867 = grid_attr_trees->kernel_lattrs_grid_decl;
6868 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6869 "__gomp_kernel_launch_attributes",
6870 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6871 }
6872
6873 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6874 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6875 of type uint32_type_node. */
6876
6877 static void
6878 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6879 tree fld_decl, int index, tree value)
6880 {
6881 tree ref = build4 (ARRAY_REF, uint32_type_node,
6882 build3 (COMPONENT_REF,
6883 grid_attr_trees->kernel_dim_array_type,
6884 range_var, fld_decl, NULL_TREE),
6885 build_int_cst (integer_type_node, index),
6886 NULL_TREE, NULL_TREE);
6887 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6888 }
6889
6890 /* Return a tree representation of a pointer to a structure with grid and
6891 work-group size information. Statements filling that information will be
6892 inserted before GSI, TGT_STMT is the target statement which has the
6893 necessary information in it. */
6894
6895 static tree
6896 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6897 gomp_target *tgt_stmt)
6898 {
6899 grid_create_kernel_launch_attr_types ();
6900 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6901 "__kernel_launch_attrs");
6902
6903 unsigned max_dim = 0;
6904 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6905 clause;
6906 clause = OMP_CLAUSE_CHAIN (clause))
6907 {
6908 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6909 continue;
6910
6911 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6912 max_dim = MAX (dim, max_dim);
6913
6914 grid_insert_store_range_dim (gsi, lattrs,
6915 grid_attr_trees->kernel_lattrs_grid_decl,
6916 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6917 grid_insert_store_range_dim (gsi, lattrs,
6918 grid_attr_trees->kernel_lattrs_group_decl,
6919 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6920 }
6921
6922 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6923 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6924 gcc_checking_assert (max_dim <= 2);
6925 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6926 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6927 GSI_SAME_STMT);
6928 TREE_ADDRESSABLE (lattrs) = 1;
6929 return build_fold_addr_expr (lattrs);
6930 }
6931
6932 /* Build target argument identifier from the DEVICE identifier, value
6933 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6934
6935 static tree
6936 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6937 {
6938 tree t = build_int_cst (integer_type_node, device);
6939 if (subseqent_param)
6940 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6941 build_int_cst (integer_type_node,
6942 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6943 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6944 build_int_cst (integer_type_node, id));
6945 return t;
6946 }
6947
6948 /* Like above but return it in type that can be directly stored as an element
6949 of the argument array. */
6950
6951 static tree
6952 get_target_argument_identifier (int device, bool subseqent_param, int id)
6953 {
6954 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6955 return fold_convert (ptr_type_node, t);
6956 }
6957
6958 /* Return a target argument consisting of DEVICE identifier, value identifier
6959 ID, and the actual VALUE. */
6960
6961 static tree
6962 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6963 tree value)
6964 {
6965 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6966 fold_convert (integer_type_node, value),
6967 build_int_cst (unsigned_type_node,
6968 GOMP_TARGET_ARG_VALUE_SHIFT));
6969 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6970 get_target_argument_identifier_1 (device, false, id));
6971 t = fold_convert (ptr_type_node, t);
6972 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6973 }
6974
6975 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6976 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6977 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6978 arguments. */
6979
6980 static void
6981 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6982 int id, tree value, vec <tree> *args)
6983 {
6984 if (tree_fits_shwi_p (value)
6985 && tree_to_shwi (value) > -(1 << 15)
6986 && tree_to_shwi (value) < (1 << 15))
6987 args->quick_push (get_target_argument_value (gsi, device, id, value));
6988 else
6989 {
6990 args->quick_push (get_target_argument_identifier (device, true, id));
6991 value = fold_convert (ptr_type_node, value);
6992 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6993 GSI_SAME_STMT);
6994 args->quick_push (value);
6995 }
6996 }
6997
6998 /* Create an array of arguments that is then passed to GOMP_target. */
6999
7000 static tree
7001 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
7002 {
7003 auto_vec <tree, 6> args;
7004 tree clauses = gimple_omp_target_clauses (tgt_stmt);
7005 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
7006 if (c)
7007 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
7008 else
7009 t = integer_minus_one_node;
7010 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7011 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
7012
7013 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
7014 if (c)
7015 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
7016 else
7017 t = integer_minus_one_node;
7018 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
7019 GOMP_TARGET_ARG_THREAD_LIMIT, t,
7020 &args);
7021
7022 /* Add HSA-specific grid sizes, if available. */
7023 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7024 OMP_CLAUSE__GRIDDIM_))
7025 {
7026 int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
7027 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
7028 args.quick_push (t);
7029 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
7030 }
7031
7032 /* Produce more, perhaps device specific, arguments here. */
7033
7034 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
7035 args.length () + 1),
7036 ".omp_target_args");
7037 for (unsigned i = 0; i < args.length (); i++)
7038 {
7039 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7040 build_int_cst (integer_type_node, i),
7041 NULL_TREE, NULL_TREE);
7042 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
7043 GSI_SAME_STMT);
7044 }
7045 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
7046 build_int_cst (integer_type_node, args.length ()),
7047 NULL_TREE, NULL_TREE);
7048 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
7049 GSI_SAME_STMT);
7050 TREE_ADDRESSABLE (argarray) = 1;
7051 return build_fold_addr_expr (argarray);
7052 }
7053
7054 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
7055
7056 static void
7057 expand_omp_target (struct omp_region *region)
7058 {
7059 basic_block entry_bb, exit_bb, new_bb;
7060 struct function *child_cfun;
7061 tree child_fn, block, t;
7062 gimple_stmt_iterator gsi;
7063 gomp_target *entry_stmt;
7064 gimple *stmt;
7065 edge e;
7066 bool offloaded, data_region;
7067
7068 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
7069 new_bb = region->entry;
7070
7071 offloaded = is_gimple_omp_offloaded (entry_stmt);
7072 switch (gimple_omp_target_kind (entry_stmt))
7073 {
7074 case GF_OMP_TARGET_KIND_REGION:
7075 case GF_OMP_TARGET_KIND_UPDATE:
7076 case GF_OMP_TARGET_KIND_ENTER_DATA:
7077 case GF_OMP_TARGET_KIND_EXIT_DATA:
7078 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7079 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7080 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7081 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7082 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7083 data_region = false;
7084 break;
7085 case GF_OMP_TARGET_KIND_DATA:
7086 case GF_OMP_TARGET_KIND_OACC_DATA:
7087 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7088 data_region = true;
7089 break;
7090 default:
7091 gcc_unreachable ();
7092 }
7093
7094 child_fn = NULL_TREE;
7095 child_cfun = NULL;
7096 if (offloaded)
7097 {
7098 child_fn = gimple_omp_target_child_fn (entry_stmt);
7099 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
7100 }
7101
7102 /* Supported by expand_omp_taskreg, but not here. */
7103 if (child_cfun != NULL)
7104 gcc_checking_assert (!child_cfun->cfg);
7105 gcc_checking_assert (!gimple_in_ssa_p (cfun));
7106
7107 entry_bb = region->entry;
7108 exit_bb = region->exit;
7109
7110 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
7111 {
7112 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
7113
7114 /* Further down, both OpenACC kernels and OpenACC parallel constructs
7115 will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
7116 two, there is an "oacc kernels" attribute set for OpenACC kernels. */
7117 DECL_ATTRIBUTES (child_fn)
7118 = tree_cons (get_identifier ("oacc kernels"),
7119 NULL_TREE, DECL_ATTRIBUTES (child_fn));
7120 }
7121
7122 if (offloaded)
7123 {
7124 unsigned srcidx, dstidx, num;
7125
7126 /* If the offloading region needs data sent from the parent
7127 function, then the very first statement (except possible
7128 tree profile counter updates) of the offloading body
7129 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
7130 &.OMP_DATA_O is passed as an argument to the child function,
7131 we need to replace it with the argument as seen by the child
7132 function.
7133
7134 In most cases, this will end up being the identity assignment
7135 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
7136 a function call that has been inlined, the original PARM_DECL
7137 .OMP_DATA_I may have been converted into a different local
7138 variable. In which case, we need to keep the assignment. */
7139 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
7140 if (data_arg)
7141 {
7142 basic_block entry_succ_bb = single_succ (entry_bb);
7143 gimple_stmt_iterator gsi;
7144 tree arg;
7145 gimple *tgtcopy_stmt = NULL;
7146 tree sender = TREE_VEC_ELT (data_arg, 0);
7147
7148 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
7149 {
7150 gcc_assert (!gsi_end_p (gsi));
7151 stmt = gsi_stmt (gsi);
7152 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7153 continue;
7154
7155 if (gimple_num_ops (stmt) == 2)
7156 {
7157 tree arg = gimple_assign_rhs1 (stmt);
7158
7159 /* We're ignoring the subcode because we're
7160 effectively doing a STRIP_NOPS. */
7161
7162 if (TREE_CODE (arg) == ADDR_EXPR
7163 && TREE_OPERAND (arg, 0) == sender)
7164 {
7165 tgtcopy_stmt = stmt;
7166 break;
7167 }
7168 }
7169 }
7170
7171 gcc_assert (tgtcopy_stmt != NULL);
7172 arg = DECL_ARGUMENTS (child_fn);
7173
7174 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
7175 gsi_remove (&gsi, true);
7176 }
7177
7178 /* Declare local variables needed in CHILD_CFUN. */
7179 block = DECL_INITIAL (child_fn);
7180 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
7181 /* The gimplifier could record temporaries in the offloading block
7182 rather than in containing function's local_decls chain,
7183 which would mean cgraph missed finalizing them. Do it now. */
7184 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
7185 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
7186 varpool_node::finalize_decl (t);
7187 DECL_SAVED_TREE (child_fn) = NULL;
7188 /* We'll create a CFG for child_fn, so no gimple body is needed. */
7189 gimple_set_body (child_fn, NULL);
7190 TREE_USED (block) = 1;
7191
7192 /* Reset DECL_CONTEXT on function arguments. */
7193 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
7194 DECL_CONTEXT (t) = child_fn;
7195
7196 /* Split ENTRY_BB at GIMPLE_*,
7197 so that it can be moved to the child function. */
7198 gsi = gsi_last_bb (entry_bb);
7199 stmt = gsi_stmt (gsi);
7200 gcc_assert (stmt
7201 && gimple_code (stmt) == gimple_code (entry_stmt));
7202 e = split_block (entry_bb, stmt);
7203 gsi_remove (&gsi, true);
7204 entry_bb = e->dest;
7205 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7206
7207 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7208 if (exit_bb)
7209 {
7210 gsi = gsi_last_bb (exit_bb);
7211 gcc_assert (!gsi_end_p (gsi)
7212 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7213 stmt = gimple_build_return (NULL);
7214 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7215 gsi_remove (&gsi, true);
7216 }
7217
7218 /* Make sure to generate early debug for the function before
7219 outlining anything. */
7220 if (! gimple_in_ssa_p (cfun))
7221 (*debug_hooks->early_global_decl) (cfun->decl);
7222
7223 /* Move the offloading region into CHILD_CFUN. */
7224
7225 block = gimple_block (entry_stmt);
7226
7227 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7228 if (exit_bb)
7229 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7230 /* When the OMP expansion process cannot guarantee an up-to-date
7231 loop tree arrange for the child function to fixup loops. */
7232 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7233 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7234
7235 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7236 num = vec_safe_length (child_cfun->local_decls);
7237 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7238 {
7239 t = (*child_cfun->local_decls)[srcidx];
7240 if (DECL_CONTEXT (t) == cfun->decl)
7241 continue;
7242 if (srcidx != dstidx)
7243 (*child_cfun->local_decls)[dstidx] = t;
7244 dstidx++;
7245 }
7246 if (dstidx != num)
7247 vec_safe_truncate (child_cfun->local_decls, dstidx);
7248
7249 /* Inform the callgraph about the new function. */
7250 child_cfun->curr_properties = cfun->curr_properties;
7251 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7252 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7253 cgraph_node *node = cgraph_node::get_create (child_fn);
7254 node->parallelized_function = 1;
7255 cgraph_node::add_new_function (child_fn, true);
7256
7257 /* Add the new function to the offload table. */
7258 if (ENABLE_OFFLOADING)
7259 vec_safe_push (offload_funcs, child_fn);
7260
7261 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7262 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7263
7264 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7265 fixed in a following pass. */
7266 push_cfun (child_cfun);
7267 if (need_asm)
7268 assign_assembler_name_if_needed (child_fn);
7269 cgraph_edge::rebuild_edges ();
7270
7271 /* Some EH regions might become dead, see PR34608. If
7272 pass_cleanup_cfg isn't the first pass to happen with the
7273 new child, these dead EH edges might cause problems.
7274 Clean them up now. */
7275 if (flag_exceptions)
7276 {
7277 basic_block bb;
7278 bool changed = false;
7279
7280 FOR_EACH_BB_FN (bb, cfun)
7281 changed |= gimple_purge_dead_eh_edges (bb);
7282 if (changed)
7283 cleanup_tree_cfg ();
7284 }
7285 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7286 verify_loop_structure ();
7287 pop_cfun ();
7288
7289 if (dump_file && !gimple_in_ssa_p (cfun))
7290 {
7291 omp_any_child_fn_dumped = true;
7292 dump_function_header (dump_file, child_fn, dump_flags);
7293 dump_function_to_file (child_fn, dump_file, dump_flags);
7294 }
7295 }
7296
7297 /* Emit a library call to launch the offloading region, or do data
7298 transfers. */
7299 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7300 enum built_in_function start_ix;
7301 location_t clause_loc;
7302 unsigned int flags_i = 0;
7303
7304 switch (gimple_omp_target_kind (entry_stmt))
7305 {
7306 case GF_OMP_TARGET_KIND_REGION:
7307 start_ix = BUILT_IN_GOMP_TARGET;
7308 break;
7309 case GF_OMP_TARGET_KIND_DATA:
7310 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7311 break;
7312 case GF_OMP_TARGET_KIND_UPDATE:
7313 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7314 break;
7315 case GF_OMP_TARGET_KIND_ENTER_DATA:
7316 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7317 break;
7318 case GF_OMP_TARGET_KIND_EXIT_DATA:
7319 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7320 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7321 break;
7322 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7323 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7324 start_ix = BUILT_IN_GOACC_PARALLEL;
7325 break;
7326 case GF_OMP_TARGET_KIND_OACC_DATA:
7327 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7328 start_ix = BUILT_IN_GOACC_DATA_START;
7329 break;
7330 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7331 start_ix = BUILT_IN_GOACC_UPDATE;
7332 break;
7333 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7334 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7335 break;
7336 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7337 start_ix = BUILT_IN_GOACC_DECLARE;
7338 break;
7339 default:
7340 gcc_unreachable ();
7341 }
7342
7343 clauses = gimple_omp_target_clauses (entry_stmt);
7344
7345 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7346 library choose) and there is no conditional. */
7347 cond = NULL_TREE;
7348 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7349
7350 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7351 if (c)
7352 cond = OMP_CLAUSE_IF_EXPR (c);
7353
7354 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7355 if (c)
7356 {
7357 /* Even if we pass it to all library function calls, it is currently only
7358 defined/used for the OpenMP target ones. */
7359 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7360 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7361 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7362 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7363
7364 device = OMP_CLAUSE_DEVICE_ID (c);
7365 clause_loc = OMP_CLAUSE_LOCATION (c);
7366 }
7367 else
7368 clause_loc = gimple_location (entry_stmt);
7369
7370 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7371 if (c)
7372 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7373
7374 /* Ensure 'device' is of the correct type. */
7375 device = fold_convert_loc (clause_loc, integer_type_node, device);
7376
7377 /* If we found the clause 'if (cond)', build
7378 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7379 if (cond)
7380 {
7381 cond = gimple_boolify (cond);
7382
7383 basic_block cond_bb, then_bb, else_bb;
7384 edge e;
7385 tree tmp_var;
7386
7387 tmp_var = create_tmp_var (TREE_TYPE (device));
7388 if (offloaded)
7389 e = split_block_after_labels (new_bb);
7390 else
7391 {
7392 gsi = gsi_last_bb (new_bb);
7393 gsi_prev (&gsi);
7394 e = split_block (new_bb, gsi_stmt (gsi));
7395 }
7396 cond_bb = e->src;
7397 new_bb = e->dest;
7398 remove_edge (e);
7399
7400 then_bb = create_empty_bb (cond_bb);
7401 else_bb = create_empty_bb (then_bb);
7402 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7403 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7404
7405 stmt = gimple_build_cond_empty (cond);
7406 gsi = gsi_last_bb (cond_bb);
7407 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7408
7409 gsi = gsi_start_bb (then_bb);
7410 stmt = gimple_build_assign (tmp_var, device);
7411 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7412
7413 gsi = gsi_start_bb (else_bb);
7414 stmt = gimple_build_assign (tmp_var,
7415 build_int_cst (integer_type_node,
7416 GOMP_DEVICE_HOST_FALLBACK));
7417 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7418
7419 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7420 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7421 add_bb_to_loop (then_bb, cond_bb->loop_father);
7422 add_bb_to_loop (else_bb, cond_bb->loop_father);
7423 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7424 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7425
7426 device = tmp_var;
7427 gsi = gsi_last_bb (new_bb);
7428 }
7429 else
7430 {
7431 gsi = gsi_last_bb (new_bb);
7432 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7433 true, GSI_SAME_STMT);
7434 }
7435
7436 t = gimple_omp_target_data_arg (entry_stmt);
7437 if (t == NULL)
7438 {
7439 t1 = size_zero_node;
7440 t2 = build_zero_cst (ptr_type_node);
7441 t3 = t2;
7442 t4 = t2;
7443 }
7444 else
7445 {
7446 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7447 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7448 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7449 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7450 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7451 }
7452
7453 gimple *g;
7454 bool tagging = false;
7455 /* The maximum number used by any start_ix, without varargs. */
7456 auto_vec<tree, 11> args;
7457 args.quick_push (device);
7458 if (offloaded)
7459 args.quick_push (build_fold_addr_expr (child_fn));
7460 args.quick_push (t1);
7461 args.quick_push (t2);
7462 args.quick_push (t3);
7463 args.quick_push (t4);
7464 switch (start_ix)
7465 {
7466 case BUILT_IN_GOACC_DATA_START:
7467 case BUILT_IN_GOACC_DECLARE:
7468 case BUILT_IN_GOMP_TARGET_DATA:
7469 break;
7470 case BUILT_IN_GOMP_TARGET:
7471 case BUILT_IN_GOMP_TARGET_UPDATE:
7472 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7473 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7474 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7475 if (c)
7476 depend = OMP_CLAUSE_DECL (c);
7477 else
7478 depend = build_int_cst (ptr_type_node, 0);
7479 args.quick_push (depend);
7480 if (start_ix == BUILT_IN_GOMP_TARGET)
7481 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7482 break;
7483 case BUILT_IN_GOACC_PARALLEL:
7484 oacc_set_fn_attrib (child_fn, clauses, &args);
7485 tagging = true;
7486 /* FALLTHRU */
7487 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7488 case BUILT_IN_GOACC_UPDATE:
7489 {
7490 tree t_async = NULL_TREE;
7491
7492 /* If present, use the value specified by the respective
7493 clause, making sure that is of the correct type. */
7494 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7495 if (c)
7496 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7497 integer_type_node,
7498 OMP_CLAUSE_ASYNC_EXPR (c));
7499 else if (!tagging)
7500 /* Default values for t_async. */
7501 t_async = fold_convert_loc (gimple_location (entry_stmt),
7502 integer_type_node,
7503 build_int_cst (integer_type_node,
7504 GOMP_ASYNC_SYNC));
7505 if (tagging && t_async)
7506 {
7507 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7508
7509 if (TREE_CODE (t_async) == INTEGER_CST)
7510 {
7511 /* See if we can pack the async arg in to the tag's
7512 operand. */
7513 i_async = TREE_INT_CST_LOW (t_async);
7514 if (i_async < GOMP_LAUNCH_OP_MAX)
7515 t_async = NULL_TREE;
7516 else
7517 i_async = GOMP_LAUNCH_OP_MAX;
7518 }
7519 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7520 i_async));
7521 }
7522 if (t_async)
7523 args.safe_push (t_async);
7524
7525 /* Save the argument index, and ... */
7526 unsigned t_wait_idx = args.length ();
7527 unsigned num_waits = 0;
7528 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7529 if (!tagging || c)
7530 /* ... push a placeholder. */
7531 args.safe_push (integer_zero_node);
7532
7533 for (; c; c = OMP_CLAUSE_CHAIN (c))
7534 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7535 {
7536 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7537 integer_type_node,
7538 OMP_CLAUSE_WAIT_EXPR (c)));
7539 num_waits++;
7540 }
7541
7542 if (!tagging || num_waits)
7543 {
7544 tree len;
7545
7546 /* Now that we know the number, update the placeholder. */
7547 if (tagging)
7548 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7549 else
7550 len = build_int_cst (integer_type_node, num_waits);
7551 len = fold_convert_loc (gimple_location (entry_stmt),
7552 unsigned_type_node, len);
7553 args[t_wait_idx] = len;
7554 }
7555 }
7556 break;
7557 default:
7558 gcc_unreachable ();
7559 }
7560 if (tagging)
7561 /* Push terminal marker - zero. */
7562 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7563
7564 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7565 gimple_set_location (g, gimple_location (entry_stmt));
7566 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7567 if (!offloaded)
7568 {
7569 g = gsi_stmt (gsi);
7570 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7571 gsi_remove (&gsi, true);
7572 }
7573 if (data_region && region->exit)
7574 {
7575 gsi = gsi_last_bb (region->exit);
7576 g = gsi_stmt (gsi);
7577 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7578 gsi_remove (&gsi, true);
7579 }
7580 }
7581
7582 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7583 iteration variable derived from the thread number. INTRA_GROUP means this
7584 is an expansion of a loop iterating over work-items within a separate
7585 iteration over groups. */
7586
7587 static void
7588 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7589 {
7590 gimple_stmt_iterator gsi;
7591 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7592 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7593 == GF_OMP_FOR_KIND_GRID_LOOP);
7594 size_t collapse = gimple_omp_for_collapse (for_stmt);
7595 struct omp_for_data_loop *loops
7596 = XALLOCAVEC (struct omp_for_data_loop,
7597 gimple_omp_for_collapse (for_stmt));
7598 struct omp_for_data fd;
7599
7600 remove_edge (BRANCH_EDGE (kfor->entry));
7601 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7602
7603 gcc_assert (kfor->cont);
7604 omp_extract_for_data (for_stmt, &fd, loops);
7605
7606 gsi = gsi_start_bb (body_bb);
7607
7608 for (size_t dim = 0; dim < collapse; dim++)
7609 {
7610 tree type, itype;
7611 itype = type = TREE_TYPE (fd.loops[dim].v);
7612 if (POINTER_TYPE_P (type))
7613 itype = signed_type_for (type);
7614
7615 tree n1 = fd.loops[dim].n1;
7616 tree step = fd.loops[dim].step;
7617 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7618 true, NULL_TREE, true, GSI_SAME_STMT);
7619 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7620 true, NULL_TREE, true, GSI_SAME_STMT);
7621 tree threadid;
7622 if (gimple_omp_for_grid_group_iter (for_stmt))
7623 {
7624 gcc_checking_assert (!intra_group);
7625 threadid = build_call_expr (builtin_decl_explicit
7626 (BUILT_IN_HSA_WORKGROUPID), 1,
7627 build_int_cstu (unsigned_type_node, dim));
7628 }
7629 else if (intra_group)
7630 threadid = build_call_expr (builtin_decl_explicit
7631 (BUILT_IN_HSA_WORKITEMID), 1,
7632 build_int_cstu (unsigned_type_node, dim));
7633 else
7634 threadid = build_call_expr (builtin_decl_explicit
7635 (BUILT_IN_HSA_WORKITEMABSID), 1,
7636 build_int_cstu (unsigned_type_node, dim));
7637 threadid = fold_convert (itype, threadid);
7638 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7639 true, GSI_SAME_STMT);
7640
7641 tree startvar = fd.loops[dim].v;
7642 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7643 if (POINTER_TYPE_P (type))
7644 t = fold_build_pointer_plus (n1, t);
7645 else
7646 t = fold_build2 (PLUS_EXPR, type, t, n1);
7647 t = fold_convert (type, t);
7648 t = force_gimple_operand_gsi (&gsi, t,
7649 DECL_P (startvar)
7650 && TREE_ADDRESSABLE (startvar),
7651 NULL_TREE, true, GSI_SAME_STMT);
7652 gassign *assign_stmt = gimple_build_assign (startvar, t);
7653 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7654 }
7655 /* Remove the omp for statement. */
7656 gsi = gsi_last_bb (kfor->entry);
7657 gsi_remove (&gsi, true);
7658
7659 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7660 gsi = gsi_last_bb (kfor->cont);
7661 gcc_assert (!gsi_end_p (gsi)
7662 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7663 gsi_remove (&gsi, true);
7664
7665 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7666 gsi = gsi_last_bb (kfor->exit);
7667 gcc_assert (!gsi_end_p (gsi)
7668 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7669 if (intra_group)
7670 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7671 gsi_remove (&gsi, true);
7672
7673 /* Fixup the much simpler CFG. */
7674 remove_edge (find_edge (kfor->cont, body_bb));
7675
7676 if (kfor->cont != body_bb)
7677 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7678 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7679 }
7680
7681 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7682 argument_decls. */
7683
7684 struct grid_arg_decl_map
7685 {
7686 tree old_arg;
7687 tree new_arg;
7688 };
7689
7690 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7691 pertaining to kernel function. */
7692
7693 static tree
7694 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7695 {
7696 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7697 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7698 tree t = *tp;
7699
7700 if (t == adm->old_arg)
7701 *tp = adm->new_arg;
7702 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7703 return NULL_TREE;
7704 }
7705
7706 /* If TARGET region contains a kernel body for loop, remove its region from the
7707 TARGET and expand it in HSA gridified kernel fashion. */
7708
7709 static void
7710 grid_expand_target_grid_body (struct omp_region *target)
7711 {
7712 if (!hsa_gen_requested_p ())
7713 return;
7714
7715 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7716 struct omp_region **pp;
7717
7718 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7719 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7720 break;
7721
7722 struct omp_region *gpukernel = *pp;
7723
7724 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7725 if (!gpukernel)
7726 {
7727 /* HSA cannot handle OACC stuff. */
7728 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7729 return;
7730 gcc_checking_assert (orig_child_fndecl);
7731 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7732 OMP_CLAUSE__GRIDDIM_));
7733 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7734
7735 hsa_register_kernel (n);
7736 return;
7737 }
7738
7739 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7740 OMP_CLAUSE__GRIDDIM_));
7741 tree inside_block
7742 = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7743 *pp = gpukernel->next;
7744 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7745 if ((*pp)->type == GIMPLE_OMP_FOR)
7746 break;
7747
7748 struct omp_region *kfor = *pp;
7749 gcc_assert (kfor);
7750 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7751 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7752 *pp = kfor->next;
7753 if (kfor->inner)
7754 {
7755 if (gimple_omp_for_grid_group_iter (for_stmt))
7756 {
7757 struct omp_region **next_pp;
7758 for (pp = &kfor->inner; *pp; pp = next_pp)
7759 {
7760 next_pp = &(*pp)->next;
7761 if ((*pp)->type != GIMPLE_OMP_FOR)
7762 continue;
7763 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7764 gcc_assert (gimple_omp_for_kind (inner)
7765 == GF_OMP_FOR_KIND_GRID_LOOP);
7766 grid_expand_omp_for_loop (*pp, true);
7767 *pp = (*pp)->next;
7768 next_pp = pp;
7769 }
7770 }
7771 expand_omp (kfor->inner);
7772 }
7773 if (gpukernel->inner)
7774 expand_omp (gpukernel->inner);
7775
7776 tree kern_fndecl = copy_node (orig_child_fndecl);
7777 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7778 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7779 tree tgtblock = gimple_block (tgt_stmt);
7780 tree fniniblock = make_node (BLOCK);
7781 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7782 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7783 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7784 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7785 DECL_INITIAL (kern_fndecl) = fniniblock;
7786 push_struct_function (kern_fndecl);
7787 cfun->function_end_locus = gimple_location (tgt_stmt);
7788 init_tree_ssa (cfun);
7789 pop_cfun ();
7790
7791 /* Make sure to generate early debug for the function before
7792 outlining anything. */
7793 if (! gimple_in_ssa_p (cfun))
7794 (*debug_hooks->early_global_decl) (cfun->decl);
7795
7796 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7797 gcc_assert (!DECL_CHAIN (old_parm_decl));
7798 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7799 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7800 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7801 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7802 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7803 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7804 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7805 kern_cfun->curr_properties = cfun->curr_properties;
7806
7807 grid_expand_omp_for_loop (kfor, false);
7808
7809 /* Remove the omp for statement. */
7810 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7811 gsi_remove (&gsi, true);
7812 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7813 return. */
7814 gsi = gsi_last_bb (gpukernel->exit);
7815 gcc_assert (!gsi_end_p (gsi)
7816 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7817 gimple *ret_stmt = gimple_build_return (NULL);
7818 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7819 gsi_remove (&gsi, true);
7820
7821 /* Statements in the first BB in the target construct have been produced by
7822 target lowering and must be copied inside the GPUKERNEL, with the two
7823 exceptions of the first OMP statement and the OMP_DATA assignment
7824 statement. */
7825 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7826 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7827 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7828 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7829 !gsi_end_p (tsi); gsi_next (&tsi))
7830 {
7831 gimple *stmt = gsi_stmt (tsi);
7832 if (is_gimple_omp (stmt))
7833 break;
7834 if (sender
7835 && is_gimple_assign (stmt)
7836 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7837 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7838 continue;
7839 gimple *copy = gimple_copy (stmt);
7840 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7841 gimple_set_block (copy, fniniblock);
7842 }
7843
7844 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7845 gpukernel->exit, inside_block);
7846
7847 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7848 kcn->mark_force_output ();
7849 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7850
7851 hsa_register_kernel (kcn, orig_child);
7852
7853 cgraph_node::add_new_function (kern_fndecl, true);
7854 push_cfun (kern_cfun);
7855 cgraph_edge::rebuild_edges ();
7856
7857 /* Re-map any mention of the PARM_DECL of the original function to the
7858 PARM_DECL of the new one.
7859
7860 TODO: It would be great if lowering produced references into the GPU
7861 kernel decl straight away and we did not have to do this. */
7862 struct grid_arg_decl_map adm;
7863 adm.old_arg = old_parm_decl;
7864 adm.new_arg = new_parm_decl;
7865 basic_block bb;
7866 FOR_EACH_BB_FN (bb, kern_cfun)
7867 {
7868 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7869 {
7870 gimple *stmt = gsi_stmt (gsi);
7871 struct walk_stmt_info wi;
7872 memset (&wi, 0, sizeof (wi));
7873 wi.info = &adm;
7874 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7875 }
7876 }
7877 pop_cfun ();
7878
7879 return;
7880 }
7881
7882 /* Expand the parallel region tree rooted at REGION. Expansion
7883 proceeds in depth-first order. Innermost regions are expanded
7884 first. This way, parallel regions that require a new function to
7885 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7886 internal dependencies in their body. */
7887
7888 static void
7889 expand_omp (struct omp_region *region)
7890 {
7891 omp_any_child_fn_dumped = false;
7892 while (region)
7893 {
7894 location_t saved_location;
7895 gimple *inner_stmt = NULL;
7896
7897 /* First, determine whether this is a combined parallel+workshare
7898 region. */
7899 if (region->type == GIMPLE_OMP_PARALLEL)
7900 determine_parallel_type (region);
7901 else if (region->type == GIMPLE_OMP_TARGET)
7902 grid_expand_target_grid_body (region);
7903
7904 if (region->type == GIMPLE_OMP_FOR
7905 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7906 inner_stmt = last_stmt (region->inner->entry);
7907
7908 if (region->inner)
7909 expand_omp (region->inner);
7910
7911 saved_location = input_location;
7912 if (gimple_has_location (last_stmt (region->entry)))
7913 input_location = gimple_location (last_stmt (region->entry));
7914
7915 switch (region->type)
7916 {
7917 case GIMPLE_OMP_PARALLEL:
7918 case GIMPLE_OMP_TASK:
7919 expand_omp_taskreg (region);
7920 break;
7921
7922 case GIMPLE_OMP_FOR:
7923 expand_omp_for (region, inner_stmt);
7924 break;
7925
7926 case GIMPLE_OMP_SECTIONS:
7927 expand_omp_sections (region);
7928 break;
7929
7930 case GIMPLE_OMP_SECTION:
7931 /* Individual omp sections are handled together with their
7932 parent GIMPLE_OMP_SECTIONS region. */
7933 break;
7934
7935 case GIMPLE_OMP_SINGLE:
7936 expand_omp_single (region);
7937 break;
7938
7939 case GIMPLE_OMP_ORDERED:
7940 {
7941 gomp_ordered *ord_stmt
7942 = as_a <gomp_ordered *> (last_stmt (region->entry));
7943 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7944 OMP_CLAUSE_DEPEND))
7945 {
7946 /* We'll expand these when expanding corresponding
7947 worksharing region with ordered(n) clause. */
7948 gcc_assert (region->outer
7949 && region->outer->type == GIMPLE_OMP_FOR);
7950 region->ord_stmt = ord_stmt;
7951 break;
7952 }
7953 }
7954 /* FALLTHRU */
7955 case GIMPLE_OMP_MASTER:
7956 case GIMPLE_OMP_TASKGROUP:
7957 case GIMPLE_OMP_CRITICAL:
7958 case GIMPLE_OMP_TEAMS:
7959 expand_omp_synch (region);
7960 break;
7961
7962 case GIMPLE_OMP_ATOMIC_LOAD:
7963 expand_omp_atomic (region);
7964 break;
7965
7966 case GIMPLE_OMP_TARGET:
7967 expand_omp_target (region);
7968 break;
7969
7970 default:
7971 gcc_unreachable ();
7972 }
7973
7974 input_location = saved_location;
7975 region = region->next;
7976 }
7977 if (omp_any_child_fn_dumped)
7978 {
7979 if (dump_file)
7980 dump_function_header (dump_file, current_function_decl, dump_flags);
7981 omp_any_child_fn_dumped = false;
7982 }
7983 }
7984
7985 /* Helper for build_omp_regions. Scan the dominator tree starting at
7986 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7987 true, the function ends once a single tree is built (otherwise, whole
7988 forest of OMP constructs may be built). */
7989
7990 static void
7991 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7992 bool single_tree)
7993 {
7994 gimple_stmt_iterator gsi;
7995 gimple *stmt;
7996 basic_block son;
7997
7998 gsi = gsi_last_bb (bb);
7999 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
8000 {
8001 struct omp_region *region;
8002 enum gimple_code code;
8003
8004 stmt = gsi_stmt (gsi);
8005 code = gimple_code (stmt);
8006 if (code == GIMPLE_OMP_RETURN)
8007 {
8008 /* STMT is the return point out of region PARENT. Mark it
8009 as the exit point and make PARENT the immediately
8010 enclosing region. */
8011 gcc_assert (parent);
8012 region = parent;
8013 region->exit = bb;
8014 parent = parent->outer;
8015 }
8016 else if (code == GIMPLE_OMP_ATOMIC_STORE)
8017 {
8018 /* GIMPLE_OMP_ATOMIC_STORE is analogous to
8019 GIMPLE_OMP_RETURN, but matches with
8020 GIMPLE_OMP_ATOMIC_LOAD. */
8021 gcc_assert (parent);
8022 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
8023 region = parent;
8024 region->exit = bb;
8025 parent = parent->outer;
8026 }
8027 else if (code == GIMPLE_OMP_CONTINUE)
8028 {
8029 gcc_assert (parent);
8030 parent->cont = bb;
8031 }
8032 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
8033 {
8034 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
8035 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
8036 }
8037 else
8038 {
8039 region = new_omp_region (bb, code, parent);
8040 /* Otherwise... */
8041 if (code == GIMPLE_OMP_TARGET)
8042 {
8043 switch (gimple_omp_target_kind (stmt))
8044 {
8045 case GF_OMP_TARGET_KIND_REGION:
8046 case GF_OMP_TARGET_KIND_DATA:
8047 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8048 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8049 case GF_OMP_TARGET_KIND_OACC_DATA:
8050 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8051 break;
8052 case GF_OMP_TARGET_KIND_UPDATE:
8053 case GF_OMP_TARGET_KIND_ENTER_DATA:
8054 case GF_OMP_TARGET_KIND_EXIT_DATA:
8055 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8056 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8057 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8058 /* ..., other than for those stand-alone directives... */
8059 region = NULL;
8060 break;
8061 default:
8062 gcc_unreachable ();
8063 }
8064 }
8065 else if (code == GIMPLE_OMP_ORDERED
8066 && omp_find_clause (gimple_omp_ordered_clauses
8067 (as_a <gomp_ordered *> (stmt)),
8068 OMP_CLAUSE_DEPEND))
8069 /* #pragma omp ordered depend is also just a stand-alone
8070 directive. */
8071 region = NULL;
8072 /* ..., this directive becomes the parent for a new region. */
8073 if (region)
8074 parent = region;
8075 }
8076 }
8077
8078 if (single_tree && !parent)
8079 return;
8080
8081 for (son = first_dom_son (CDI_DOMINATORS, bb);
8082 son;
8083 son = next_dom_son (CDI_DOMINATORS, son))
8084 build_omp_regions_1 (son, parent, single_tree);
8085 }
8086
8087 /* Builds the tree of OMP regions rooted at ROOT, storing it to
8088 root_omp_region. */
8089
8090 static void
8091 build_omp_regions_root (basic_block root)
8092 {
8093 gcc_assert (root_omp_region == NULL);
8094 build_omp_regions_1 (root, NULL, true);
8095 gcc_assert (root_omp_region != NULL);
8096 }
8097
8098 /* Expands omp construct (and its subconstructs) starting in HEAD. */
8099
8100 void
8101 omp_expand_local (basic_block head)
8102 {
8103 build_omp_regions_root (head);
8104 if (dump_file && (dump_flags & TDF_DETAILS))
8105 {
8106 fprintf (dump_file, "\nOMP region tree\n\n");
8107 dump_omp_region (dump_file, root_omp_region, 0);
8108 fprintf (dump_file, "\n");
8109 }
8110
8111 remove_exit_barriers (root_omp_region);
8112 expand_omp (root_omp_region);
8113
8114 omp_free_regions ();
8115 }
8116
8117 /* Scan the CFG and build a tree of OMP regions. Return the root of
8118 the OMP region tree. */
8119
8120 static void
8121 build_omp_regions (void)
8122 {
8123 gcc_assert (root_omp_region == NULL);
8124 calculate_dominance_info (CDI_DOMINATORS);
8125 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
8126 }
8127
8128 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
8129
8130 static unsigned int
8131 execute_expand_omp (void)
8132 {
8133 build_omp_regions ();
8134
8135 if (!root_omp_region)
8136 return 0;
8137
8138 if (dump_file)
8139 {
8140 fprintf (dump_file, "\nOMP region tree\n\n");
8141 dump_omp_region (dump_file, root_omp_region, 0);
8142 fprintf (dump_file, "\n");
8143 }
8144
8145 remove_exit_barriers (root_omp_region);
8146
8147 expand_omp (root_omp_region);
8148
8149 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
8150 verify_loop_structure ();
8151 cleanup_tree_cfg ();
8152
8153 omp_free_regions ();
8154
8155 return 0;
8156 }
8157
8158 /* OMP expansion -- the default pass, run before creation of SSA form. */
8159
8160 namespace {
8161
8162 const pass_data pass_data_expand_omp =
8163 {
8164 GIMPLE_PASS, /* type */
8165 "ompexp", /* name */
8166 OPTGROUP_OMP, /* optinfo_flags */
8167 TV_NONE, /* tv_id */
8168 PROP_gimple_any, /* properties_required */
8169 PROP_gimple_eomp, /* properties_provided */
8170 0, /* properties_destroyed */
8171 0, /* todo_flags_start */
8172 0, /* todo_flags_finish */
8173 };
8174
8175 class pass_expand_omp : public gimple_opt_pass
8176 {
8177 public:
8178 pass_expand_omp (gcc::context *ctxt)
8179 : gimple_opt_pass (pass_data_expand_omp, ctxt)
8180 {}
8181
8182 /* opt_pass methods: */
8183 virtual unsigned int execute (function *)
8184 {
8185 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
8186 || flag_openmp_simd != 0)
8187 && !seen_error ());
8188
8189 /* This pass always runs, to provide PROP_gimple_eomp.
8190 But often, there is nothing to do. */
8191 if (!gate)
8192 return 0;
8193
8194 return execute_expand_omp ();
8195 }
8196
8197 }; // class pass_expand_omp
8198
8199 } // anon namespace
8200
8201 gimple_opt_pass *
8202 make_pass_expand_omp (gcc::context *ctxt)
8203 {
8204 return new pass_expand_omp (ctxt);
8205 }
8206
8207 namespace {
8208
8209 const pass_data pass_data_expand_omp_ssa =
8210 {
8211 GIMPLE_PASS, /* type */
8212 "ompexpssa", /* name */
8213 OPTGROUP_OMP, /* optinfo_flags */
8214 TV_NONE, /* tv_id */
8215 PROP_cfg | PROP_ssa, /* properties_required */
8216 PROP_gimple_eomp, /* properties_provided */
8217 0, /* properties_destroyed */
8218 0, /* todo_flags_start */
8219 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8220 };
8221
8222 class pass_expand_omp_ssa : public gimple_opt_pass
8223 {
8224 public:
8225 pass_expand_omp_ssa (gcc::context *ctxt)
8226 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8227 {}
8228
8229 /* opt_pass methods: */
8230 virtual bool gate (function *fun)
8231 {
8232 return !(fun->curr_properties & PROP_gimple_eomp);
8233 }
8234 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8235 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8236
8237 }; // class pass_expand_omp_ssa
8238
8239 } // anon namespace
8240
8241 gimple_opt_pass *
8242 make_pass_expand_omp_ssa (gcc::context *ctxt)
8243 {
8244 return new pass_expand_omp_ssa (ctxt);
8245 }
8246
8247 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8248 GIMPLE_* codes. */
8249
8250 bool
8251 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8252 int *region_idx)
8253 {
8254 gimple *last = last_stmt (bb);
8255 enum gimple_code code = gimple_code (last);
8256 struct omp_region *cur_region = *region;
8257 bool fallthru = false;
8258
8259 switch (code)
8260 {
8261 case GIMPLE_OMP_PARALLEL:
8262 case GIMPLE_OMP_TASK:
8263 case GIMPLE_OMP_FOR:
8264 case GIMPLE_OMP_SINGLE:
8265 case GIMPLE_OMP_TEAMS:
8266 case GIMPLE_OMP_MASTER:
8267 case GIMPLE_OMP_TASKGROUP:
8268 case GIMPLE_OMP_CRITICAL:
8269 case GIMPLE_OMP_SECTION:
8270 case GIMPLE_OMP_GRID_BODY:
8271 cur_region = new_omp_region (bb, code, cur_region);
8272 fallthru = true;
8273 break;
8274
8275 case GIMPLE_OMP_ORDERED:
8276 cur_region = new_omp_region (bb, code, cur_region);
8277 fallthru = true;
8278 if (omp_find_clause (gimple_omp_ordered_clauses
8279 (as_a <gomp_ordered *> (last)),
8280 OMP_CLAUSE_DEPEND))
8281 cur_region = cur_region->outer;
8282 break;
8283
8284 case GIMPLE_OMP_TARGET:
8285 cur_region = new_omp_region (bb, code, cur_region);
8286 fallthru = true;
8287 switch (gimple_omp_target_kind (last))
8288 {
8289 case GF_OMP_TARGET_KIND_REGION:
8290 case GF_OMP_TARGET_KIND_DATA:
8291 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8292 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8293 case GF_OMP_TARGET_KIND_OACC_DATA:
8294 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8295 break;
8296 case GF_OMP_TARGET_KIND_UPDATE:
8297 case GF_OMP_TARGET_KIND_ENTER_DATA:
8298 case GF_OMP_TARGET_KIND_EXIT_DATA:
8299 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8300 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8301 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8302 cur_region = cur_region->outer;
8303 break;
8304 default:
8305 gcc_unreachable ();
8306 }
8307 break;
8308
8309 case GIMPLE_OMP_SECTIONS:
8310 cur_region = new_omp_region (bb, code, cur_region);
8311 fallthru = true;
8312 break;
8313
8314 case GIMPLE_OMP_SECTIONS_SWITCH:
8315 fallthru = false;
8316 break;
8317
8318 case GIMPLE_OMP_ATOMIC_LOAD:
8319 case GIMPLE_OMP_ATOMIC_STORE:
8320 fallthru = true;
8321 break;
8322
8323 case GIMPLE_OMP_RETURN:
8324 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8325 somewhere other than the next block. This will be
8326 created later. */
8327 cur_region->exit = bb;
8328 if (cur_region->type == GIMPLE_OMP_TASK)
8329 /* Add an edge corresponding to not scheduling the task
8330 immediately. */
8331 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8332 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8333 cur_region = cur_region->outer;
8334 break;
8335
8336 case GIMPLE_OMP_CONTINUE:
8337 cur_region->cont = bb;
8338 switch (cur_region->type)
8339 {
8340 case GIMPLE_OMP_FOR:
8341 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8342 succs edges as abnormal to prevent splitting
8343 them. */
8344 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8345 /* Make the loopback edge. */
8346 make_edge (bb, single_succ (cur_region->entry),
8347 EDGE_ABNORMAL);
8348
8349 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8350 corresponds to the case that the body of the loop
8351 is not executed at all. */
8352 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8353 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8354 fallthru = false;
8355 break;
8356
8357 case GIMPLE_OMP_SECTIONS:
8358 /* Wire up the edges into and out of the nested sections. */
8359 {
8360 basic_block switch_bb = single_succ (cur_region->entry);
8361
8362 struct omp_region *i;
8363 for (i = cur_region->inner; i ; i = i->next)
8364 {
8365 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8366 make_edge (switch_bb, i->entry, 0);
8367 make_edge (i->exit, bb, EDGE_FALLTHRU);
8368 }
8369
8370 /* Make the loopback edge to the block with
8371 GIMPLE_OMP_SECTIONS_SWITCH. */
8372 make_edge (bb, switch_bb, 0);
8373
8374 /* Make the edge from the switch to exit. */
8375 make_edge (switch_bb, bb->next_bb, 0);
8376 fallthru = false;
8377 }
8378 break;
8379
8380 case GIMPLE_OMP_TASK:
8381 fallthru = true;
8382 break;
8383
8384 default:
8385 gcc_unreachable ();
8386 }
8387 break;
8388
8389 default:
8390 gcc_unreachable ();
8391 }
8392
8393 if (*region != cur_region)
8394 {
8395 *region = cur_region;
8396 if (cur_region)
8397 *region_idx = cur_region->entry->index;
8398 else
8399 *region_idx = 0;
8400 }
8401
8402 return fallthru;
8403 }
8404
8405 #include "gt-omp-expand.h"