]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/omp-expand.c
Split omp-low into multiple files
[thirdparty/gcc.git] / gcc / omp-expand.c
1 /* Expansion pass for OMP directives. Outlines regions of certain OMP
2 directives to separate functions, converts others into explicit calls to the
3 runtime library (libgomp) and so forth
4
5 Copyright (C) 2005-2016 Free Software Foundation, Inc.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "memmodel.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "tree-pass.h"
34 #include "ssa.h"
35 #include "optabs.h"
36 #include "cgraph.h"
37 #include "pretty-print.h"
38 #include "diagnostic-core.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "cfganal.h"
42 #include "internal-fn.h"
43 #include "gimplify.h"
44 #include "gimple-iterator.h"
45 #include "gimplify-me.h"
46 #include "gimple-walk.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "tree-ssa.h"
50 #include "splay-tree.h"
51 #include "cfgloop.h"
52 #include "omp-general.h"
53 #include "omp-offload.h"
54 #include "tree-cfgcleanup.h"
55 #include "symbol-summary.h"
56 #include "cilk.h"
57 #include "gomp-constants.h"
58 #include "gimple-pretty-print.h"
59 #include "hsa.h"
60
61
62 /* OMP region information. Every parallel and workshare
63 directive is enclosed between two markers, the OMP_* directive
64 and a corresponding GIMPLE_OMP_RETURN statement. */
65
66 struct omp_region
67 {
68 /* The enclosing region. */
69 struct omp_region *outer;
70
71 /* First child region. */
72 struct omp_region *inner;
73
74 /* Next peer region. */
75 struct omp_region *next;
76
77 /* Block containing the omp directive as its last stmt. */
78 basic_block entry;
79
80 /* Block containing the GIMPLE_OMP_RETURN as its last stmt. */
81 basic_block exit;
82
83 /* Block containing the GIMPLE_OMP_CONTINUE as its last stmt. */
84 basic_block cont;
85
86 /* If this is a combined parallel+workshare region, this is a list
87 of additional arguments needed by the combined parallel+workshare
88 library call. */
89 vec<tree, va_gc> *ws_args;
90
91 /* The code for the omp directive of this region. */
92 enum gimple_code type;
93
94 /* Schedule kind, only used for GIMPLE_OMP_FOR type regions. */
95 enum omp_clause_schedule_kind sched_kind;
96
97 /* Schedule modifiers. */
98 unsigned char sched_modifiers;
99
100 /* True if this is a combined parallel+workshare region. */
101 bool is_combined_parallel;
102
103 /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
104 a depend clause. */
105 gomp_ordered *ord_stmt;
106 };
107
108 static struct omp_region *root_omp_region;
109 static bool omp_any_child_fn_dumped;
110
111 static void expand_omp_build_assign (gimple_stmt_iterator *, tree, tree,
112 bool = false);
113 static gphi *find_phi_with_arg_on_edge (tree, edge);
114 static void expand_omp (struct omp_region *region);
115
116 /* Return true if REGION is a combined parallel+workshare region. */
117
118 static inline bool
119 is_combined_parallel (struct omp_region *region)
120 {
121 return region->is_combined_parallel;
122 }
123
124 /* Given two blocks PAR_ENTRY_BB and WS_ENTRY_BB such that WS_ENTRY_BB
125 is the immediate dominator of PAR_ENTRY_BB, return true if there
126 are no data dependencies that would prevent expanding the parallel
127 directive at PAR_ENTRY_BB as a combined parallel+workshare region.
128
129 When expanding a combined parallel+workshare region, the call to
130 the child function may need additional arguments in the case of
131 GIMPLE_OMP_FOR regions. In some cases, these arguments are
132 computed out of variables passed in from the parent to the child
133 via 'struct .omp_data_s'. For instance:
134
135 #pragma omp parallel for schedule (guided, i * 4)
136 for (j ...)
137
138 Is lowered into:
139
140 # BLOCK 2 (PAR_ENTRY_BB)
141 .omp_data_o.i = i;
142 #pragma omp parallel [child fn: bar.omp_fn.0 ( ..., D.1598)
143
144 # BLOCK 3 (WS_ENTRY_BB)
145 .omp_data_i = &.omp_data_o;
146 D.1667 = .omp_data_i->i;
147 D.1598 = D.1667 * 4;
148 #pragma omp for schedule (guided, D.1598)
149
150 When we outline the parallel region, the call to the child function
151 'bar.omp_fn.0' will need the value D.1598 in its argument list, but
152 that value is computed *after* the call site. So, in principle we
153 cannot do the transformation.
154
155 To see whether the code in WS_ENTRY_BB blocks the combined
156 parallel+workshare call, we collect all the variables used in the
157 GIMPLE_OMP_FOR header check whether they appear on the LHS of any
158 statement in WS_ENTRY_BB. If so, then we cannot emit the combined
159 call.
160
161 FIXME. If we had the SSA form built at this point, we could merely
162 hoist the code in block 3 into block 2 and be done with it. But at
163 this point we don't have dataflow information and though we could
164 hack something up here, it is really not worth the aggravation. */
165
166 static bool
167 workshare_safe_to_combine_p (basic_block ws_entry_bb)
168 {
169 struct omp_for_data fd;
170 gimple *ws_stmt = last_stmt (ws_entry_bb);
171
172 if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
173 return true;
174
175 gcc_assert (gimple_code (ws_stmt) == GIMPLE_OMP_FOR);
176
177 omp_extract_for_data (as_a <gomp_for *> (ws_stmt), &fd, NULL);
178
179 if (fd.collapse > 1 && TREE_CODE (fd.loop.n2) != INTEGER_CST)
180 return false;
181 if (fd.iter_type != long_integer_type_node)
182 return false;
183
184 /* FIXME. We give up too easily here. If any of these arguments
185 are not constants, they will likely involve variables that have
186 been mapped into fields of .omp_data_s for sharing with the child
187 function. With appropriate data flow, it would be possible to
188 see through this. */
189 if (!is_gimple_min_invariant (fd.loop.n1)
190 || !is_gimple_min_invariant (fd.loop.n2)
191 || !is_gimple_min_invariant (fd.loop.step)
192 || (fd.chunk_size && !is_gimple_min_invariant (fd.chunk_size)))
193 return false;
194
195 return true;
196 }
197
198 /* Adjust CHUNK_SIZE from SCHEDULE clause, depending on simd modifier
199 presence (SIMD_SCHEDULE). */
200
201 static tree
202 omp_adjust_chunk_size (tree chunk_size, bool simd_schedule)
203 {
204 if (!simd_schedule)
205 return chunk_size;
206
207 int vf = omp_max_vf ();
208 if (vf == 1)
209 return chunk_size;
210
211 tree type = TREE_TYPE (chunk_size);
212 chunk_size = fold_build2 (PLUS_EXPR, type, chunk_size,
213 build_int_cst (type, vf - 1));
214 return fold_build2 (BIT_AND_EXPR, type, chunk_size,
215 build_int_cst (type, -vf));
216 }
217
218 /* Collect additional arguments needed to emit a combined
219 parallel+workshare call. WS_STMT is the workshare directive being
220 expanded. */
221
222 static vec<tree, va_gc> *
223 get_ws_args_for (gimple *par_stmt, gimple *ws_stmt)
224 {
225 tree t;
226 location_t loc = gimple_location (ws_stmt);
227 vec<tree, va_gc> *ws_args;
228
229 if (gomp_for *for_stmt = dyn_cast <gomp_for *> (ws_stmt))
230 {
231 struct omp_for_data fd;
232 tree n1, n2;
233
234 omp_extract_for_data (for_stmt, &fd, NULL);
235 n1 = fd.loop.n1;
236 n2 = fd.loop.n2;
237
238 if (gimple_omp_for_combined_into_p (for_stmt))
239 {
240 tree innerc
241 = omp_find_clause (gimple_omp_parallel_clauses (par_stmt),
242 OMP_CLAUSE__LOOPTEMP_);
243 gcc_assert (innerc);
244 n1 = OMP_CLAUSE_DECL (innerc);
245 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
246 OMP_CLAUSE__LOOPTEMP_);
247 gcc_assert (innerc);
248 n2 = OMP_CLAUSE_DECL (innerc);
249 }
250
251 vec_alloc (ws_args, 3 + (fd.chunk_size != 0));
252
253 t = fold_convert_loc (loc, long_integer_type_node, n1);
254 ws_args->quick_push (t);
255
256 t = fold_convert_loc (loc, long_integer_type_node, n2);
257 ws_args->quick_push (t);
258
259 t = fold_convert_loc (loc, long_integer_type_node, fd.loop.step);
260 ws_args->quick_push (t);
261
262 if (fd.chunk_size)
263 {
264 t = fold_convert_loc (loc, long_integer_type_node, fd.chunk_size);
265 t = omp_adjust_chunk_size (t, fd.simd_schedule);
266 ws_args->quick_push (t);
267 }
268
269 return ws_args;
270 }
271 else if (gimple_code (ws_stmt) == GIMPLE_OMP_SECTIONS)
272 {
273 /* Number of sections is equal to the number of edges from the
274 GIMPLE_OMP_SECTIONS_SWITCH statement, except for the one to
275 the exit of the sections region. */
276 basic_block bb = single_succ (gimple_bb (ws_stmt));
277 t = build_int_cst (unsigned_type_node, EDGE_COUNT (bb->succs) - 1);
278 vec_alloc (ws_args, 1);
279 ws_args->quick_push (t);
280 return ws_args;
281 }
282
283 gcc_unreachable ();
284 }
285
286 /* Discover whether REGION is a combined parallel+workshare region. */
287
288 static void
289 determine_parallel_type (struct omp_region *region)
290 {
291 basic_block par_entry_bb, par_exit_bb;
292 basic_block ws_entry_bb, ws_exit_bb;
293
294 if (region == NULL || region->inner == NULL
295 || region->exit == NULL || region->inner->exit == NULL
296 || region->inner->cont == NULL)
297 return;
298
299 /* We only support parallel+for and parallel+sections. */
300 if (region->type != GIMPLE_OMP_PARALLEL
301 || (region->inner->type != GIMPLE_OMP_FOR
302 && region->inner->type != GIMPLE_OMP_SECTIONS))
303 return;
304
305 /* Check for perfect nesting PAR_ENTRY_BB -> WS_ENTRY_BB and
306 WS_EXIT_BB -> PAR_EXIT_BB. */
307 par_entry_bb = region->entry;
308 par_exit_bb = region->exit;
309 ws_entry_bb = region->inner->entry;
310 ws_exit_bb = region->inner->exit;
311
312 if (single_succ (par_entry_bb) == ws_entry_bb
313 && single_succ (ws_exit_bb) == par_exit_bb
314 && workshare_safe_to_combine_p (ws_entry_bb)
315 && (gimple_omp_parallel_combined_p (last_stmt (par_entry_bb))
316 || (last_and_only_stmt (ws_entry_bb)
317 && last_and_only_stmt (par_exit_bb))))
318 {
319 gimple *par_stmt = last_stmt (par_entry_bb);
320 gimple *ws_stmt = last_stmt (ws_entry_bb);
321
322 if (region->inner->type == GIMPLE_OMP_FOR)
323 {
324 /* If this is a combined parallel loop, we need to determine
325 whether or not to use the combined library calls. There
326 are two cases where we do not apply the transformation:
327 static loops and any kind of ordered loop. In the first
328 case, we already open code the loop so there is no need
329 to do anything else. In the latter case, the combined
330 parallel loop call would still need extra synchronization
331 to implement ordered semantics, so there would not be any
332 gain in using the combined call. */
333 tree clauses = gimple_omp_for_clauses (ws_stmt);
334 tree c = omp_find_clause (clauses, OMP_CLAUSE_SCHEDULE);
335 if (c == NULL
336 || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
337 == OMP_CLAUSE_SCHEDULE_STATIC)
338 || omp_find_clause (clauses, OMP_CLAUSE_ORDERED))
339 {
340 region->is_combined_parallel = false;
341 region->inner->is_combined_parallel = false;
342 return;
343 }
344 }
345
346 region->is_combined_parallel = true;
347 region->inner->is_combined_parallel = true;
348 region->ws_args = get_ws_args_for (par_stmt, ws_stmt);
349 }
350 }
351
352 /* Debugging dumps for parallel regions. */
353 void dump_omp_region (FILE *, struct omp_region *, int);
354 void debug_omp_region (struct omp_region *);
355 void debug_all_omp_regions (void);
356
357 /* Dump the parallel region tree rooted at REGION. */
358
359 void
360 dump_omp_region (FILE *file, struct omp_region *region, int indent)
361 {
362 fprintf (file, "%*sbb %d: %s\n", indent, "", region->entry->index,
363 gimple_code_name[region->type]);
364
365 if (region->inner)
366 dump_omp_region (file, region->inner, indent + 4);
367
368 if (region->cont)
369 {
370 fprintf (file, "%*sbb %d: GIMPLE_OMP_CONTINUE\n", indent, "",
371 region->cont->index);
372 }
373
374 if (region->exit)
375 fprintf (file, "%*sbb %d: GIMPLE_OMP_RETURN\n", indent, "",
376 region->exit->index);
377 else
378 fprintf (file, "%*s[no exit marker]\n", indent, "");
379
380 if (region->next)
381 dump_omp_region (file, region->next, indent);
382 }
383
384 DEBUG_FUNCTION void
385 debug_omp_region (struct omp_region *region)
386 {
387 dump_omp_region (stderr, region, 0);
388 }
389
390 DEBUG_FUNCTION void
391 debug_all_omp_regions (void)
392 {
393 dump_omp_region (stderr, root_omp_region, 0);
394 }
395
396 /* Create a new parallel region starting at STMT inside region PARENT. */
397
398 static struct omp_region *
399 new_omp_region (basic_block bb, enum gimple_code type,
400 struct omp_region *parent)
401 {
402 struct omp_region *region = XCNEW (struct omp_region);
403
404 region->outer = parent;
405 region->entry = bb;
406 region->type = type;
407
408 if (parent)
409 {
410 /* This is a nested region. Add it to the list of inner
411 regions in PARENT. */
412 region->next = parent->inner;
413 parent->inner = region;
414 }
415 else
416 {
417 /* This is a toplevel region. Add it to the list of toplevel
418 regions in ROOT_OMP_REGION. */
419 region->next = root_omp_region;
420 root_omp_region = region;
421 }
422
423 return region;
424 }
425
426 /* Release the memory associated with the region tree rooted at REGION. */
427
428 static void
429 free_omp_region_1 (struct omp_region *region)
430 {
431 struct omp_region *i, *n;
432
433 for (i = region->inner; i ; i = n)
434 {
435 n = i->next;
436 free_omp_region_1 (i);
437 }
438
439 free (region);
440 }
441
442 /* Release the memory for the entire omp region tree. */
443
444 void
445 omp_free_regions (void)
446 {
447 struct omp_region *r, *n;
448 for (r = root_omp_region; r ; r = n)
449 {
450 n = r->next;
451 free_omp_region_1 (r);
452 }
453 root_omp_region = NULL;
454 }
455
456 /* A convenience function to build an empty GIMPLE_COND with just the
457 condition. */
458
459 static gcond *
460 gimple_build_cond_empty (tree cond)
461 {
462 enum tree_code pred_code;
463 tree lhs, rhs;
464
465 gimple_cond_get_ops_from_tree (cond, &pred_code, &lhs, &rhs);
466 return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
467 }
468
469 /* Return true if a parallel REGION is within a declare target function or
470 within a target region and is not a part of a gridified target. */
471
472 static bool
473 parallel_needs_hsa_kernel_p (struct omp_region *region)
474 {
475 bool indirect = false;
476 for (region = region->outer; region; region = region->outer)
477 {
478 if (region->type == GIMPLE_OMP_PARALLEL)
479 indirect = true;
480 else if (region->type == GIMPLE_OMP_TARGET)
481 {
482 gomp_target *tgt_stmt
483 = as_a <gomp_target *> (last_stmt (region->entry));
484
485 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
486 OMP_CLAUSE__GRIDDIM_))
487 return indirect;
488 else
489 return true;
490 }
491 }
492
493 if (lookup_attribute ("omp declare target",
494 DECL_ATTRIBUTES (current_function_decl)))
495 return true;
496
497 return false;
498 }
499
500 /* Build the function calls to GOMP_parallel_start etc to actually
501 generate the parallel operation. REGION is the parallel region
502 being expanded. BB is the block where to insert the code. WS_ARGS
503 will be set if this is a call to a combined parallel+workshare
504 construct, it contains the list of additional arguments needed by
505 the workshare construct. */
506
507 static void
508 expand_parallel_call (struct omp_region *region, basic_block bb,
509 gomp_parallel *entry_stmt,
510 vec<tree, va_gc> *ws_args)
511 {
512 tree t, t1, t2, val, cond, c, clauses, flags;
513 gimple_stmt_iterator gsi;
514 gimple *stmt;
515 enum built_in_function start_ix;
516 int start_ix2;
517 location_t clause_loc;
518 vec<tree, va_gc> *args;
519
520 clauses = gimple_omp_parallel_clauses (entry_stmt);
521
522 /* Determine what flavor of GOMP_parallel we will be
523 emitting. */
524 start_ix = BUILT_IN_GOMP_PARALLEL;
525 if (is_combined_parallel (region))
526 {
527 switch (region->inner->type)
528 {
529 case GIMPLE_OMP_FOR:
530 gcc_assert (region->inner->sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
531 switch (region->inner->sched_kind)
532 {
533 case OMP_CLAUSE_SCHEDULE_RUNTIME:
534 start_ix2 = 3;
535 break;
536 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
537 case OMP_CLAUSE_SCHEDULE_GUIDED:
538 if (region->inner->sched_modifiers
539 & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
540 {
541 start_ix2 = 3 + region->inner->sched_kind;
542 break;
543 }
544 /* FALLTHRU */
545 default:
546 start_ix2 = region->inner->sched_kind;
547 break;
548 }
549 start_ix2 += (int) BUILT_IN_GOMP_PARALLEL_LOOP_STATIC;
550 start_ix = (enum built_in_function) start_ix2;
551 break;
552 case GIMPLE_OMP_SECTIONS:
553 start_ix = BUILT_IN_GOMP_PARALLEL_SECTIONS;
554 break;
555 default:
556 gcc_unreachable ();
557 }
558 }
559
560 /* By default, the value of NUM_THREADS is zero (selected at run time)
561 and there is no conditional. */
562 cond = NULL_TREE;
563 val = build_int_cst (unsigned_type_node, 0);
564 flags = build_int_cst (unsigned_type_node, 0);
565
566 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
567 if (c)
568 cond = OMP_CLAUSE_IF_EXPR (c);
569
570 c = omp_find_clause (clauses, OMP_CLAUSE_NUM_THREADS);
571 if (c)
572 {
573 val = OMP_CLAUSE_NUM_THREADS_EXPR (c);
574 clause_loc = OMP_CLAUSE_LOCATION (c);
575 }
576 else
577 clause_loc = gimple_location (entry_stmt);
578
579 c = omp_find_clause (clauses, OMP_CLAUSE_PROC_BIND);
580 if (c)
581 flags = build_int_cst (unsigned_type_node, OMP_CLAUSE_PROC_BIND_KIND (c));
582
583 /* Ensure 'val' is of the correct type. */
584 val = fold_convert_loc (clause_loc, unsigned_type_node, val);
585
586 /* If we found the clause 'if (cond)', build either
587 (cond != 0) or (cond ? val : 1u). */
588 if (cond)
589 {
590 cond = gimple_boolify (cond);
591
592 if (integer_zerop (val))
593 val = fold_build2_loc (clause_loc,
594 EQ_EXPR, unsigned_type_node, cond,
595 build_int_cst (TREE_TYPE (cond), 0));
596 else
597 {
598 basic_block cond_bb, then_bb, else_bb;
599 edge e, e_then, e_else;
600 tree tmp_then, tmp_else, tmp_join, tmp_var;
601
602 tmp_var = create_tmp_var (TREE_TYPE (val));
603 if (gimple_in_ssa_p (cfun))
604 {
605 tmp_then = make_ssa_name (tmp_var);
606 tmp_else = make_ssa_name (tmp_var);
607 tmp_join = make_ssa_name (tmp_var);
608 }
609 else
610 {
611 tmp_then = tmp_var;
612 tmp_else = tmp_var;
613 tmp_join = tmp_var;
614 }
615
616 e = split_block_after_labels (bb);
617 cond_bb = e->src;
618 bb = e->dest;
619 remove_edge (e);
620
621 then_bb = create_empty_bb (cond_bb);
622 else_bb = create_empty_bb (then_bb);
623 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
624 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
625
626 stmt = gimple_build_cond_empty (cond);
627 gsi = gsi_start_bb (cond_bb);
628 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
629
630 gsi = gsi_start_bb (then_bb);
631 expand_omp_build_assign (&gsi, tmp_then, val, true);
632
633 gsi = gsi_start_bb (else_bb);
634 expand_omp_build_assign (&gsi, tmp_else,
635 build_int_cst (unsigned_type_node, 1),
636 true);
637
638 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
639 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
640 add_bb_to_loop (then_bb, cond_bb->loop_father);
641 add_bb_to_loop (else_bb, cond_bb->loop_father);
642 e_then = make_edge (then_bb, bb, EDGE_FALLTHRU);
643 e_else = make_edge (else_bb, bb, EDGE_FALLTHRU);
644
645 if (gimple_in_ssa_p (cfun))
646 {
647 gphi *phi = create_phi_node (tmp_join, bb);
648 add_phi_arg (phi, tmp_then, e_then, UNKNOWN_LOCATION);
649 add_phi_arg (phi, tmp_else, e_else, UNKNOWN_LOCATION);
650 }
651
652 val = tmp_join;
653 }
654
655 gsi = gsi_start_bb (bb);
656 val = force_gimple_operand_gsi (&gsi, val, true, NULL_TREE,
657 false, GSI_CONTINUE_LINKING);
658 }
659
660 gsi = gsi_last_bb (bb);
661 t = gimple_omp_parallel_data_arg (entry_stmt);
662 if (t == NULL)
663 t1 = null_pointer_node;
664 else
665 t1 = build_fold_addr_expr (t);
666 tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
667 t2 = build_fold_addr_expr (child_fndecl);
668
669 vec_alloc (args, 4 + vec_safe_length (ws_args));
670 args->quick_push (t2);
671 args->quick_push (t1);
672 args->quick_push (val);
673 if (ws_args)
674 args->splice (*ws_args);
675 args->quick_push (flags);
676
677 t = build_call_expr_loc_vec (UNKNOWN_LOCATION,
678 builtin_decl_explicit (start_ix), args);
679
680 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
681 false, GSI_CONTINUE_LINKING);
682
683 if (hsa_gen_requested_p ()
684 && parallel_needs_hsa_kernel_p (region))
685 {
686 cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
687 hsa_register_kernel (child_cnode);
688 }
689 }
690
691 /* Insert a function call whose name is FUNC_NAME with the information from
692 ENTRY_STMT into the basic_block BB. */
693
694 static void
695 expand_cilk_for_call (basic_block bb, gomp_parallel *entry_stmt,
696 vec <tree, va_gc> *ws_args)
697 {
698 tree t, t1, t2;
699 gimple_stmt_iterator gsi;
700 vec <tree, va_gc> *args;
701
702 gcc_assert (vec_safe_length (ws_args) == 2);
703 tree func_name = (*ws_args)[0];
704 tree grain = (*ws_args)[1];
705
706 tree clauses = gimple_omp_parallel_clauses (entry_stmt);
707 tree count = omp_find_clause (clauses, OMP_CLAUSE__CILK_FOR_COUNT_);
708 gcc_assert (count != NULL_TREE);
709 count = OMP_CLAUSE_OPERAND (count, 0);
710
711 gsi = gsi_last_bb (bb);
712 t = gimple_omp_parallel_data_arg (entry_stmt);
713 if (t == NULL)
714 t1 = null_pointer_node;
715 else
716 t1 = build_fold_addr_expr (t);
717 t2 = build_fold_addr_expr (gimple_omp_parallel_child_fn (entry_stmt));
718
719 vec_alloc (args, 4);
720 args->quick_push (t2);
721 args->quick_push (t1);
722 args->quick_push (count);
723 args->quick_push (grain);
724 t = build_call_expr_loc_vec (UNKNOWN_LOCATION, func_name, args);
725
726 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, false,
727 GSI_CONTINUE_LINKING);
728 }
729
730 /* Build the function call to GOMP_task to actually
731 generate the task operation. BB is the block where to insert the code. */
732
733 static void
734 expand_task_call (struct omp_region *region, basic_block bb,
735 gomp_task *entry_stmt)
736 {
737 tree t1, t2, t3;
738 gimple_stmt_iterator gsi;
739 location_t loc = gimple_location (entry_stmt);
740
741 tree clauses = gimple_omp_task_clauses (entry_stmt);
742
743 tree ifc = omp_find_clause (clauses, OMP_CLAUSE_IF);
744 tree untied = omp_find_clause (clauses, OMP_CLAUSE_UNTIED);
745 tree mergeable = omp_find_clause (clauses, OMP_CLAUSE_MERGEABLE);
746 tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
747 tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
748 tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
749
750 unsigned int iflags
751 = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
752 | (mergeable ? GOMP_TASK_FLAG_MERGEABLE : 0)
753 | (depend ? GOMP_TASK_FLAG_DEPEND : 0);
754
755 bool taskloop_p = gimple_omp_task_taskloop_p (entry_stmt);
756 tree startvar = NULL_TREE, endvar = NULL_TREE, step = NULL_TREE;
757 tree num_tasks = NULL_TREE;
758 bool ull = false;
759 if (taskloop_p)
760 {
761 gimple *g = last_stmt (region->outer->entry);
762 gcc_assert (gimple_code (g) == GIMPLE_OMP_FOR
763 && gimple_omp_for_kind (g) == GF_OMP_FOR_KIND_TASKLOOP);
764 struct omp_for_data fd;
765 omp_extract_for_data (as_a <gomp_for *> (g), &fd, NULL);
766 startvar = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
767 endvar = omp_find_clause (OMP_CLAUSE_CHAIN (startvar),
768 OMP_CLAUSE__LOOPTEMP_);
769 startvar = OMP_CLAUSE_DECL (startvar);
770 endvar = OMP_CLAUSE_DECL (endvar);
771 step = fold_convert_loc (loc, fd.iter_type, fd.loop.step);
772 if (fd.loop.cond_code == LT_EXPR)
773 iflags |= GOMP_TASK_FLAG_UP;
774 tree tclauses = gimple_omp_for_clauses (g);
775 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
776 if (num_tasks)
777 num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
778 else
779 {
780 num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
781 if (num_tasks)
782 {
783 iflags |= GOMP_TASK_FLAG_GRAINSIZE;
784 num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
785 }
786 else
787 num_tasks = integer_zero_node;
788 }
789 num_tasks = fold_convert_loc (loc, long_integer_type_node, num_tasks);
790 if (ifc == NULL_TREE)
791 iflags |= GOMP_TASK_FLAG_IF;
792 if (omp_find_clause (tclauses, OMP_CLAUSE_NOGROUP))
793 iflags |= GOMP_TASK_FLAG_NOGROUP;
794 ull = fd.iter_type == long_long_unsigned_type_node;
795 }
796 else if (priority)
797 iflags |= GOMP_TASK_FLAG_PRIORITY;
798
799 tree flags = build_int_cst (unsigned_type_node, iflags);
800
801 tree cond = boolean_true_node;
802 if (ifc)
803 {
804 if (taskloop_p)
805 {
806 tree t = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
807 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
808 build_int_cst (unsigned_type_node,
809 GOMP_TASK_FLAG_IF),
810 build_int_cst (unsigned_type_node, 0));
811 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node,
812 flags, t);
813 }
814 else
815 cond = gimple_boolify (OMP_CLAUSE_IF_EXPR (ifc));
816 }
817
818 if (finalc)
819 {
820 tree t = gimple_boolify (OMP_CLAUSE_FINAL_EXPR (finalc));
821 t = fold_build3_loc (loc, COND_EXPR, unsigned_type_node, t,
822 build_int_cst (unsigned_type_node,
823 GOMP_TASK_FLAG_FINAL),
824 build_int_cst (unsigned_type_node, 0));
825 flags = fold_build2_loc (loc, PLUS_EXPR, unsigned_type_node, flags, t);
826 }
827 if (depend)
828 depend = OMP_CLAUSE_DECL (depend);
829 else
830 depend = build_int_cst (ptr_type_node, 0);
831 if (priority)
832 priority = fold_convert (integer_type_node,
833 OMP_CLAUSE_PRIORITY_EXPR (priority));
834 else
835 priority = integer_zero_node;
836
837 gsi = gsi_last_bb (bb);
838 tree t = gimple_omp_task_data_arg (entry_stmt);
839 if (t == NULL)
840 t2 = null_pointer_node;
841 else
842 t2 = build_fold_addr_expr_loc (loc, t);
843 t1 = build_fold_addr_expr_loc (loc, gimple_omp_task_child_fn (entry_stmt));
844 t = gimple_omp_task_copy_fn (entry_stmt);
845 if (t == NULL)
846 t3 = null_pointer_node;
847 else
848 t3 = build_fold_addr_expr_loc (loc, t);
849
850 if (taskloop_p)
851 t = build_call_expr (ull
852 ? builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP_ULL)
853 : builtin_decl_explicit (BUILT_IN_GOMP_TASKLOOP),
854 11, t1, t2, t3,
855 gimple_omp_task_arg_size (entry_stmt),
856 gimple_omp_task_arg_align (entry_stmt), flags,
857 num_tasks, priority, startvar, endvar, step);
858 else
859 t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
860 9, t1, t2, t3,
861 gimple_omp_task_arg_size (entry_stmt),
862 gimple_omp_task_arg_align (entry_stmt), cond, flags,
863 depend, priority);
864
865 force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
866 false, GSI_CONTINUE_LINKING);
867 }
868
869 /* Chain all the DECLs in LIST by their TREE_CHAIN fields. */
870
871 static tree
872 vec2chain (vec<tree, va_gc> *v)
873 {
874 tree chain = NULL_TREE, t;
875 unsigned ix;
876
877 FOR_EACH_VEC_SAFE_ELT_REVERSE (v, ix, t)
878 {
879 DECL_CHAIN (t) = chain;
880 chain = t;
881 }
882
883 return chain;
884 }
885
886 /* Remove barriers in REGION->EXIT's block. Note that this is only
887 valid for GIMPLE_OMP_PARALLEL regions. Since the end of a parallel region
888 is an implicit barrier, any workshare inside the GIMPLE_OMP_PARALLEL that
889 left a barrier at the end of the GIMPLE_OMP_PARALLEL region can now be
890 removed. */
891
892 static void
893 remove_exit_barrier (struct omp_region *region)
894 {
895 gimple_stmt_iterator gsi;
896 basic_block exit_bb;
897 edge_iterator ei;
898 edge e;
899 gimple *stmt;
900 int any_addressable_vars = -1;
901
902 exit_bb = region->exit;
903
904 /* If the parallel region doesn't return, we don't have REGION->EXIT
905 block at all. */
906 if (! exit_bb)
907 return;
908
909 /* The last insn in the block will be the parallel's GIMPLE_OMP_RETURN. The
910 workshare's GIMPLE_OMP_RETURN will be in a preceding block. The kinds of
911 statements that can appear in between are extremely limited -- no
912 memory operations at all. Here, we allow nothing at all, so the
913 only thing we allow to precede this GIMPLE_OMP_RETURN is a label. */
914 gsi = gsi_last_bb (exit_bb);
915 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
916 gsi_prev (&gsi);
917 if (!gsi_end_p (gsi) && gimple_code (gsi_stmt (gsi)) != GIMPLE_LABEL)
918 return;
919
920 FOR_EACH_EDGE (e, ei, exit_bb->preds)
921 {
922 gsi = gsi_last_bb (e->src);
923 if (gsi_end_p (gsi))
924 continue;
925 stmt = gsi_stmt (gsi);
926 if (gimple_code (stmt) == GIMPLE_OMP_RETURN
927 && !gimple_omp_return_nowait_p (stmt))
928 {
929 /* OpenMP 3.0 tasks unfortunately prevent this optimization
930 in many cases. If there could be tasks queued, the barrier
931 might be needed to let the tasks run before some local
932 variable of the parallel that the task uses as shared
933 runs out of scope. The task can be spawned either
934 from within current function (this would be easy to check)
935 or from some function it calls and gets passed an address
936 of such a variable. */
937 if (any_addressable_vars < 0)
938 {
939 gomp_parallel *parallel_stmt
940 = as_a <gomp_parallel *> (last_stmt (region->entry));
941 tree child_fun = gimple_omp_parallel_child_fn (parallel_stmt);
942 tree local_decls, block, decl;
943 unsigned ix;
944
945 any_addressable_vars = 0;
946 FOR_EACH_LOCAL_DECL (DECL_STRUCT_FUNCTION (child_fun), ix, decl)
947 if (TREE_ADDRESSABLE (decl))
948 {
949 any_addressable_vars = 1;
950 break;
951 }
952 for (block = gimple_block (stmt);
953 !any_addressable_vars
954 && block
955 && TREE_CODE (block) == BLOCK;
956 block = BLOCK_SUPERCONTEXT (block))
957 {
958 for (local_decls = BLOCK_VARS (block);
959 local_decls;
960 local_decls = DECL_CHAIN (local_decls))
961 if (TREE_ADDRESSABLE (local_decls))
962 {
963 any_addressable_vars = 1;
964 break;
965 }
966 if (block == gimple_block (parallel_stmt))
967 break;
968 }
969 }
970 if (!any_addressable_vars)
971 gimple_omp_return_set_nowait (stmt);
972 }
973 }
974 }
975
976 static void
977 remove_exit_barriers (struct omp_region *region)
978 {
979 if (region->type == GIMPLE_OMP_PARALLEL)
980 remove_exit_barrier (region);
981
982 if (region->inner)
983 {
984 region = region->inner;
985 remove_exit_barriers (region);
986 while (region->next)
987 {
988 region = region->next;
989 remove_exit_barriers (region);
990 }
991 }
992 }
993
994 /* Optimize omp_get_thread_num () and omp_get_num_threads ()
995 calls. These can't be declared as const functions, but
996 within one parallel body they are constant, so they can be
997 transformed there into __builtin_omp_get_{thread_num,num_threads} ()
998 which are declared const. Similarly for task body, except
999 that in untied task omp_get_thread_num () can change at any task
1000 scheduling point. */
1001
1002 static void
1003 optimize_omp_library_calls (gimple *entry_stmt)
1004 {
1005 basic_block bb;
1006 gimple_stmt_iterator gsi;
1007 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1008 tree thr_num_id = DECL_ASSEMBLER_NAME (thr_num_tree);
1009 tree num_thr_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1010 tree num_thr_id = DECL_ASSEMBLER_NAME (num_thr_tree);
1011 bool untied_task = (gimple_code (entry_stmt) == GIMPLE_OMP_TASK
1012 && omp_find_clause (gimple_omp_task_clauses (entry_stmt),
1013 OMP_CLAUSE_UNTIED) != NULL);
1014
1015 FOR_EACH_BB_FN (bb, cfun)
1016 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1017 {
1018 gimple *call = gsi_stmt (gsi);
1019 tree decl;
1020
1021 if (is_gimple_call (call)
1022 && (decl = gimple_call_fndecl (call))
1023 && DECL_EXTERNAL (decl)
1024 && TREE_PUBLIC (decl)
1025 && DECL_INITIAL (decl) == NULL)
1026 {
1027 tree built_in;
1028
1029 if (DECL_NAME (decl) == thr_num_id)
1030 {
1031 /* In #pragma omp task untied omp_get_thread_num () can change
1032 during the execution of the task region. */
1033 if (untied_task)
1034 continue;
1035 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
1036 }
1037 else if (DECL_NAME (decl) == num_thr_id)
1038 built_in = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
1039 else
1040 continue;
1041
1042 if (DECL_ASSEMBLER_NAME (decl) != DECL_ASSEMBLER_NAME (built_in)
1043 || gimple_call_num_args (call) != 0)
1044 continue;
1045
1046 if (flag_exceptions && !TREE_NOTHROW (decl))
1047 continue;
1048
1049 if (TREE_CODE (TREE_TYPE (decl)) != FUNCTION_TYPE
1050 || !types_compatible_p (TREE_TYPE (TREE_TYPE (decl)),
1051 TREE_TYPE (TREE_TYPE (built_in))))
1052 continue;
1053
1054 gimple_call_set_fndecl (call, built_in);
1055 }
1056 }
1057 }
1058
1059 /* Callback for expand_omp_build_assign. Return non-NULL if *tp needs to be
1060 regimplified. */
1061
1062 static tree
1063 expand_omp_regimplify_p (tree *tp, int *walk_subtrees, void *)
1064 {
1065 tree t = *tp;
1066
1067 /* Any variable with DECL_VALUE_EXPR needs to be regimplified. */
1068 if (VAR_P (t) && DECL_HAS_VALUE_EXPR_P (t))
1069 return t;
1070
1071 if (TREE_CODE (t) == ADDR_EXPR)
1072 recompute_tree_invariant_for_addr_expr (t);
1073
1074 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
1075 return NULL_TREE;
1076 }
1077
1078 /* Prepend or append TO = FROM assignment before or after *GSI_P. */
1079
1080 static void
1081 expand_omp_build_assign (gimple_stmt_iterator *gsi_p, tree to, tree from,
1082 bool after)
1083 {
1084 bool simple_p = DECL_P (to) && TREE_ADDRESSABLE (to);
1085 from = force_gimple_operand_gsi (gsi_p, from, simple_p, NULL_TREE,
1086 !after, after ? GSI_CONTINUE_LINKING
1087 : GSI_SAME_STMT);
1088 gimple *stmt = gimple_build_assign (to, from);
1089 if (after)
1090 gsi_insert_after (gsi_p, stmt, GSI_CONTINUE_LINKING);
1091 else
1092 gsi_insert_before (gsi_p, stmt, GSI_SAME_STMT);
1093 if (walk_tree (&from, expand_omp_regimplify_p, NULL, NULL)
1094 || walk_tree (&to, expand_omp_regimplify_p, NULL, NULL))
1095 {
1096 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1097 gimple_regimplify_operands (stmt, &gsi);
1098 }
1099 }
1100
1101 /* Expand the OpenMP parallel or task directive starting at REGION. */
1102
1103 static void
1104 expand_omp_taskreg (struct omp_region *region)
1105 {
1106 basic_block entry_bb, exit_bb, new_bb;
1107 struct function *child_cfun;
1108 tree child_fn, block, t;
1109 gimple_stmt_iterator gsi;
1110 gimple *entry_stmt, *stmt;
1111 edge e;
1112 vec<tree, va_gc> *ws_args;
1113
1114 entry_stmt = last_stmt (region->entry);
1115 child_fn = gimple_omp_taskreg_child_fn (entry_stmt);
1116 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
1117
1118 entry_bb = region->entry;
1119 if (gimple_code (entry_stmt) == GIMPLE_OMP_TASK)
1120 exit_bb = region->cont;
1121 else
1122 exit_bb = region->exit;
1123
1124 bool is_cilk_for
1125 = (flag_cilkplus
1126 && gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL
1127 && omp_find_clause (gimple_omp_parallel_clauses (entry_stmt),
1128 OMP_CLAUSE__CILK_FOR_COUNT_) != NULL_TREE);
1129
1130 if (is_cilk_for)
1131 /* If it is a _Cilk_for statement, it is modelled *like* a parallel for,
1132 and the inner statement contains the name of the built-in function
1133 and grain. */
1134 ws_args = region->inner->ws_args;
1135 else if (is_combined_parallel (region))
1136 ws_args = region->ws_args;
1137 else
1138 ws_args = NULL;
1139
1140 if (child_cfun->cfg)
1141 {
1142 /* Due to inlining, it may happen that we have already outlined
1143 the region, in which case all we need to do is make the
1144 sub-graph unreachable and emit the parallel call. */
1145 edge entry_succ_e, exit_succ_e;
1146
1147 entry_succ_e = single_succ_edge (entry_bb);
1148
1149 gsi = gsi_last_bb (entry_bb);
1150 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_PARALLEL
1151 || gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_TASK);
1152 gsi_remove (&gsi, true);
1153
1154 new_bb = entry_bb;
1155 if (exit_bb)
1156 {
1157 exit_succ_e = single_succ_edge (exit_bb);
1158 make_edge (new_bb, exit_succ_e->dest, EDGE_FALLTHRU);
1159 }
1160 remove_edge_and_dominated_blocks (entry_succ_e);
1161 }
1162 else
1163 {
1164 unsigned srcidx, dstidx, num;
1165
1166 /* If the parallel region needs data sent from the parent
1167 function, then the very first statement (except possible
1168 tree profile counter updates) of the parallel body
1169 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
1170 &.OMP_DATA_O is passed as an argument to the child function,
1171 we need to replace it with the argument as seen by the child
1172 function.
1173
1174 In most cases, this will end up being the identity assignment
1175 .OMP_DATA_I = .OMP_DATA_I. However, if the parallel body had
1176 a function call that has been inlined, the original PARM_DECL
1177 .OMP_DATA_I may have been converted into a different local
1178 variable. In which case, we need to keep the assignment. */
1179 if (gimple_omp_taskreg_data_arg (entry_stmt))
1180 {
1181 basic_block entry_succ_bb
1182 = single_succ_p (entry_bb) ? single_succ (entry_bb)
1183 : FALLTHRU_EDGE (entry_bb)->dest;
1184 tree arg;
1185 gimple *parcopy_stmt = NULL;
1186
1187 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
1188 {
1189 gimple *stmt;
1190
1191 gcc_assert (!gsi_end_p (gsi));
1192 stmt = gsi_stmt (gsi);
1193 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1194 continue;
1195
1196 if (gimple_num_ops (stmt) == 2)
1197 {
1198 tree arg = gimple_assign_rhs1 (stmt);
1199
1200 /* We're ignore the subcode because we're
1201 effectively doing a STRIP_NOPS. */
1202
1203 if (TREE_CODE (arg) == ADDR_EXPR
1204 && TREE_OPERAND (arg, 0)
1205 == gimple_omp_taskreg_data_arg (entry_stmt))
1206 {
1207 parcopy_stmt = stmt;
1208 break;
1209 }
1210 }
1211 }
1212
1213 gcc_assert (parcopy_stmt != NULL);
1214 arg = DECL_ARGUMENTS (child_fn);
1215
1216 if (!gimple_in_ssa_p (cfun))
1217 {
1218 if (gimple_assign_lhs (parcopy_stmt) == arg)
1219 gsi_remove (&gsi, true);
1220 else
1221 {
1222 /* ?? Is setting the subcode really necessary ?? */
1223 gimple_omp_set_subcode (parcopy_stmt, TREE_CODE (arg));
1224 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1225 }
1226 }
1227 else
1228 {
1229 tree lhs = gimple_assign_lhs (parcopy_stmt);
1230 gcc_assert (SSA_NAME_VAR (lhs) == arg);
1231 /* We'd like to set the rhs to the default def in the child_fn,
1232 but it's too early to create ssa names in the child_fn.
1233 Instead, we set the rhs to the parm. In
1234 move_sese_region_to_fn, we introduce a default def for the
1235 parm, map the parm to it's default def, and once we encounter
1236 this stmt, replace the parm with the default def. */
1237 gimple_assign_set_rhs1 (parcopy_stmt, arg);
1238 update_stmt (parcopy_stmt);
1239 }
1240 }
1241
1242 /* Declare local variables needed in CHILD_CFUN. */
1243 block = DECL_INITIAL (child_fn);
1244 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
1245 /* The gimplifier could record temporaries in parallel/task block
1246 rather than in containing function's local_decls chain,
1247 which would mean cgraph missed finalizing them. Do it now. */
1248 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
1249 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
1250 varpool_node::finalize_decl (t);
1251 DECL_SAVED_TREE (child_fn) = NULL;
1252 /* We'll create a CFG for child_fn, so no gimple body is needed. */
1253 gimple_set_body (child_fn, NULL);
1254 TREE_USED (block) = 1;
1255
1256 /* Reset DECL_CONTEXT on function arguments. */
1257 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
1258 DECL_CONTEXT (t) = child_fn;
1259
1260 /* Split ENTRY_BB at GIMPLE_OMP_PARALLEL or GIMPLE_OMP_TASK,
1261 so that it can be moved to the child function. */
1262 gsi = gsi_last_bb (entry_bb);
1263 stmt = gsi_stmt (gsi);
1264 gcc_assert (stmt && (gimple_code (stmt) == GIMPLE_OMP_PARALLEL
1265 || gimple_code (stmt) == GIMPLE_OMP_TASK));
1266 e = split_block (entry_bb, stmt);
1267 gsi_remove (&gsi, true);
1268 entry_bb = e->dest;
1269 edge e2 = NULL;
1270 if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1271 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
1272 else
1273 {
1274 e2 = make_edge (e->src, BRANCH_EDGE (entry_bb)->dest, EDGE_ABNORMAL);
1275 gcc_assert (e2->dest == region->exit);
1276 remove_edge (BRANCH_EDGE (entry_bb));
1277 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e->src);
1278 gsi = gsi_last_bb (region->exit);
1279 gcc_assert (!gsi_end_p (gsi)
1280 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
1281 gsi_remove (&gsi, true);
1282 }
1283
1284 /* Convert GIMPLE_OMP_{RETURN,CONTINUE} into a RETURN_EXPR. */
1285 if (exit_bb)
1286 {
1287 gsi = gsi_last_bb (exit_bb);
1288 gcc_assert (!gsi_end_p (gsi)
1289 && (gimple_code (gsi_stmt (gsi))
1290 == (e2 ? GIMPLE_OMP_CONTINUE : GIMPLE_OMP_RETURN)));
1291 stmt = gimple_build_return (NULL);
1292 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
1293 gsi_remove (&gsi, true);
1294 }
1295
1296 /* Move the parallel region into CHILD_CFUN. */
1297
1298 if (gimple_in_ssa_p (cfun))
1299 {
1300 init_tree_ssa (child_cfun);
1301 init_ssa_operands (child_cfun);
1302 child_cfun->gimple_df->in_ssa_p = true;
1303 block = NULL_TREE;
1304 }
1305 else
1306 block = gimple_block (entry_stmt);
1307
1308 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
1309 if (exit_bb)
1310 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
1311 if (e2)
1312 {
1313 basic_block dest_bb = e2->dest;
1314 if (!exit_bb)
1315 make_edge (new_bb, dest_bb, EDGE_FALLTHRU);
1316 remove_edge (e2);
1317 set_immediate_dominator (CDI_DOMINATORS, dest_bb, new_bb);
1318 }
1319 /* When the OMP expansion process cannot guarantee an up-to-date
1320 loop tree arrange for the child function to fixup loops. */
1321 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1322 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
1323
1324 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
1325 num = vec_safe_length (child_cfun->local_decls);
1326 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
1327 {
1328 t = (*child_cfun->local_decls)[srcidx];
1329 if (DECL_CONTEXT (t) == cfun->decl)
1330 continue;
1331 if (srcidx != dstidx)
1332 (*child_cfun->local_decls)[dstidx] = t;
1333 dstidx++;
1334 }
1335 if (dstidx != num)
1336 vec_safe_truncate (child_cfun->local_decls, dstidx);
1337
1338 /* Inform the callgraph about the new function. */
1339 child_cfun->curr_properties = cfun->curr_properties;
1340 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
1341 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
1342 cgraph_node *node = cgraph_node::get_create (child_fn);
1343 node->parallelized_function = 1;
1344 cgraph_node::add_new_function (child_fn, true);
1345
1346 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
1347 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
1348
1349 /* Fix the callgraph edges for child_cfun. Those for cfun will be
1350 fixed in a following pass. */
1351 push_cfun (child_cfun);
1352 if (need_asm)
1353 assign_assembler_name_if_neeeded (child_fn);
1354
1355 if (optimize)
1356 optimize_omp_library_calls (entry_stmt);
1357 cgraph_edge::rebuild_edges ();
1358
1359 /* Some EH regions might become dead, see PR34608. If
1360 pass_cleanup_cfg isn't the first pass to happen with the
1361 new child, these dead EH edges might cause problems.
1362 Clean them up now. */
1363 if (flag_exceptions)
1364 {
1365 basic_block bb;
1366 bool changed = false;
1367
1368 FOR_EACH_BB_FN (bb, cfun)
1369 changed |= gimple_purge_dead_eh_edges (bb);
1370 if (changed)
1371 cleanup_tree_cfg ();
1372 }
1373 if (gimple_in_ssa_p (cfun))
1374 update_ssa (TODO_update_ssa);
1375 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
1376 verify_loop_structure ();
1377 pop_cfun ();
1378
1379 if (dump_file && !gimple_in_ssa_p (cfun))
1380 {
1381 omp_any_child_fn_dumped = true;
1382 dump_function_header (dump_file, child_fn, dump_flags);
1383 dump_function_to_file (child_fn, dump_file, dump_flags);
1384 }
1385 }
1386
1387 /* Emit a library call to launch the children threads. */
1388 if (is_cilk_for)
1389 expand_cilk_for_call (new_bb,
1390 as_a <gomp_parallel *> (entry_stmt), ws_args);
1391 else if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
1392 expand_parallel_call (region, new_bb,
1393 as_a <gomp_parallel *> (entry_stmt), ws_args);
1394 else
1395 expand_task_call (region, new_bb, as_a <gomp_task *> (entry_stmt));
1396 if (gimple_in_ssa_p (cfun))
1397 update_ssa (TODO_update_ssa_only_virtuals);
1398 }
1399
1400 /* Information about members of an OpenACC collapsed loop nest. */
1401
1402 struct oacc_collapse
1403 {
1404 tree base; /* Base value. */
1405 tree iters; /* Number of steps. */
1406 tree step; /* step size. */
1407 };
1408
1409 /* Helper for expand_oacc_for. Determine collapsed loop information.
1410 Fill in COUNTS array. Emit any initialization code before GSI.
1411 Return the calculated outer loop bound of BOUND_TYPE. */
1412
1413 static tree
1414 expand_oacc_collapse_init (const struct omp_for_data *fd,
1415 gimple_stmt_iterator *gsi,
1416 oacc_collapse *counts, tree bound_type)
1417 {
1418 tree total = build_int_cst (bound_type, 1);
1419 int ix;
1420
1421 gcc_assert (integer_onep (fd->loop.step));
1422 gcc_assert (integer_zerop (fd->loop.n1));
1423
1424 for (ix = 0; ix != fd->collapse; ix++)
1425 {
1426 const omp_for_data_loop *loop = &fd->loops[ix];
1427
1428 tree iter_type = TREE_TYPE (loop->v);
1429 tree diff_type = iter_type;
1430 tree plus_type = iter_type;
1431
1432 gcc_assert (loop->cond_code == fd->loop.cond_code);
1433
1434 if (POINTER_TYPE_P (iter_type))
1435 plus_type = sizetype;
1436 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
1437 diff_type = signed_type_for (diff_type);
1438
1439 tree b = loop->n1;
1440 tree e = loop->n2;
1441 tree s = loop->step;
1442 bool up = loop->cond_code == LT_EXPR;
1443 tree dir = build_int_cst (diff_type, up ? +1 : -1);
1444 bool negating;
1445 tree expr;
1446
1447 b = force_gimple_operand_gsi (gsi, b, true, NULL_TREE,
1448 true, GSI_SAME_STMT);
1449 e = force_gimple_operand_gsi (gsi, e, true, NULL_TREE,
1450 true, GSI_SAME_STMT);
1451
1452 /* Convert the step, avoiding possible unsigned->signed overflow. */
1453 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
1454 if (negating)
1455 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
1456 s = fold_convert (diff_type, s);
1457 if (negating)
1458 s = fold_build1 (NEGATE_EXPR, diff_type, s);
1459 s = force_gimple_operand_gsi (gsi, s, true, NULL_TREE,
1460 true, GSI_SAME_STMT);
1461
1462 /* Determine the range, avoiding possible unsigned->signed overflow. */
1463 negating = !up && TYPE_UNSIGNED (iter_type);
1464 expr = fold_build2 (MINUS_EXPR, plus_type,
1465 fold_convert (plus_type, negating ? b : e),
1466 fold_convert (plus_type, negating ? e : b));
1467 expr = fold_convert (diff_type, expr);
1468 if (negating)
1469 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
1470 tree range = force_gimple_operand_gsi
1471 (gsi, expr, true, NULL_TREE, true, GSI_SAME_STMT);
1472
1473 /* Determine number of iterations. */
1474 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
1475 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
1476 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
1477
1478 tree iters = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1479 true, GSI_SAME_STMT);
1480
1481 counts[ix].base = b;
1482 counts[ix].iters = iters;
1483 counts[ix].step = s;
1484
1485 total = fold_build2 (MULT_EXPR, bound_type, total,
1486 fold_convert (bound_type, iters));
1487 }
1488
1489 return total;
1490 }
1491
1492 /* Emit initializers for collapsed loop members. IVAR is the outer
1493 loop iteration variable, from which collapsed loop iteration values
1494 are calculated. COUNTS array has been initialized by
1495 expand_oacc_collapse_inits. */
1496
1497 static void
1498 expand_oacc_collapse_vars (const struct omp_for_data *fd,
1499 gimple_stmt_iterator *gsi,
1500 const oacc_collapse *counts, tree ivar)
1501 {
1502 tree ivar_type = TREE_TYPE (ivar);
1503
1504 /* The most rapidly changing iteration variable is the innermost
1505 one. */
1506 for (int ix = fd->collapse; ix--;)
1507 {
1508 const omp_for_data_loop *loop = &fd->loops[ix];
1509 const oacc_collapse *collapse = &counts[ix];
1510 tree iter_type = TREE_TYPE (loop->v);
1511 tree diff_type = TREE_TYPE (collapse->step);
1512 tree plus_type = iter_type;
1513 enum tree_code plus_code = PLUS_EXPR;
1514 tree expr;
1515
1516 if (POINTER_TYPE_P (iter_type))
1517 {
1518 plus_code = POINTER_PLUS_EXPR;
1519 plus_type = sizetype;
1520 }
1521
1522 expr = fold_build2 (TRUNC_MOD_EXPR, ivar_type, ivar,
1523 fold_convert (ivar_type, collapse->iters));
1524 expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
1525 collapse->step);
1526 expr = fold_build2 (plus_code, iter_type, collapse->base,
1527 fold_convert (plus_type, expr));
1528 expr = force_gimple_operand_gsi (gsi, expr, false, NULL_TREE,
1529 true, GSI_SAME_STMT);
1530 gassign *ass = gimple_build_assign (loop->v, expr);
1531 gsi_insert_before (gsi, ass, GSI_SAME_STMT);
1532
1533 if (ix)
1534 {
1535 expr = fold_build2 (TRUNC_DIV_EXPR, ivar_type, ivar,
1536 fold_convert (ivar_type, collapse->iters));
1537 ivar = force_gimple_operand_gsi (gsi, expr, true, NULL_TREE,
1538 true, GSI_SAME_STMT);
1539 }
1540 }
1541 }
1542
1543 /* Helper function for expand_omp_{for_*,simd}. If this is the outermost
1544 of the combined collapse > 1 loop constructs, generate code like:
1545 if (__builtin_expect (N32 cond3 N31, 0)) goto ZERO_ITER_BB;
1546 if (cond3 is <)
1547 adj = STEP3 - 1;
1548 else
1549 adj = STEP3 + 1;
1550 count3 = (adj + N32 - N31) / STEP3;
1551 if (__builtin_expect (N22 cond2 N21, 0)) goto ZERO_ITER_BB;
1552 if (cond2 is <)
1553 adj = STEP2 - 1;
1554 else
1555 adj = STEP2 + 1;
1556 count2 = (adj + N22 - N21) / STEP2;
1557 if (__builtin_expect (N12 cond1 N11, 0)) goto ZERO_ITER_BB;
1558 if (cond1 is <)
1559 adj = STEP1 - 1;
1560 else
1561 adj = STEP1 + 1;
1562 count1 = (adj + N12 - N11) / STEP1;
1563 count = count1 * count2 * count3;
1564 Furthermore, if ZERO_ITER_BB is NULL, create a BB which does:
1565 count = 0;
1566 and set ZERO_ITER_BB to that bb. If this isn't the outermost
1567 of the combined loop constructs, just initialize COUNTS array
1568 from the _looptemp_ clauses. */
1569
1570 /* NOTE: It *could* be better to moosh all of the BBs together,
1571 creating one larger BB with all the computation and the unexpected
1572 jump at the end. I.e.
1573
1574 bool zero3, zero2, zero1, zero;
1575
1576 zero3 = N32 c3 N31;
1577 count3 = (N32 - N31) /[cl] STEP3;
1578 zero2 = N22 c2 N21;
1579 count2 = (N22 - N21) /[cl] STEP2;
1580 zero1 = N12 c1 N11;
1581 count1 = (N12 - N11) /[cl] STEP1;
1582 zero = zero3 || zero2 || zero1;
1583 count = count1 * count2 * count3;
1584 if (__builtin_expect(zero, false)) goto zero_iter_bb;
1585
1586 After all, we expect the zero=false, and thus we expect to have to
1587 evaluate all of the comparison expressions, so short-circuiting
1588 oughtn't be a win. Since the condition isn't protecting a
1589 denominator, we're not concerned about divide-by-zero, so we can
1590 fully evaluate count even if a numerator turned out to be wrong.
1591
1592 It seems like putting this all together would create much better
1593 scheduling opportunities, and less pressure on the chip's branch
1594 predictor. */
1595
1596 static void
1597 expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1598 basic_block &entry_bb, tree *counts,
1599 basic_block &zero_iter1_bb, int &first_zero_iter1,
1600 basic_block &zero_iter2_bb, int &first_zero_iter2,
1601 basic_block &l2_dom_bb)
1602 {
1603 tree t, type = TREE_TYPE (fd->loop.v);
1604 edge e, ne;
1605 int i;
1606
1607 /* Collapsed loops need work for expansion into SSA form. */
1608 gcc_assert (!gimple_in_ssa_p (cfun));
1609
1610 if (gimple_omp_for_combined_into_p (fd->for_stmt)
1611 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
1612 {
1613 gcc_assert (fd->ordered == 0);
1614 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1615 isn't supposed to be handled, as the inner loop doesn't
1616 use it. */
1617 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
1618 OMP_CLAUSE__LOOPTEMP_);
1619 gcc_assert (innerc);
1620 for (i = 0; i < fd->collapse; i++)
1621 {
1622 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1623 OMP_CLAUSE__LOOPTEMP_);
1624 gcc_assert (innerc);
1625 if (i)
1626 counts[i] = OMP_CLAUSE_DECL (innerc);
1627 else
1628 counts[0] = NULL_TREE;
1629 }
1630 return;
1631 }
1632
1633 for (i = fd->collapse; i < fd->ordered; i++)
1634 {
1635 tree itype = TREE_TYPE (fd->loops[i].v);
1636 counts[i] = NULL_TREE;
1637 t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1638 fold_convert (itype, fd->loops[i].n1),
1639 fold_convert (itype, fd->loops[i].n2));
1640 if (t && integer_zerop (t))
1641 {
1642 for (i = fd->collapse; i < fd->ordered; i++)
1643 counts[i] = build_int_cst (type, 0);
1644 break;
1645 }
1646 }
1647 for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
1648 {
1649 tree itype = TREE_TYPE (fd->loops[i].v);
1650
1651 if (i >= fd->collapse && counts[i])
1652 continue;
1653 if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
1654 && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
1655 fold_convert (itype, fd->loops[i].n1),
1656 fold_convert (itype, fd->loops[i].n2)))
1657 == NULL_TREE || !integer_onep (t)))
1658 {
1659 gcond *cond_stmt;
1660 tree n1, n2;
1661 n1 = fold_convert (itype, unshare_expr (fd->loops[i].n1));
1662 n1 = force_gimple_operand_gsi (gsi, n1, true, NULL_TREE,
1663 true, GSI_SAME_STMT);
1664 n2 = fold_convert (itype, unshare_expr (fd->loops[i].n2));
1665 n2 = force_gimple_operand_gsi (gsi, n2, true, NULL_TREE,
1666 true, GSI_SAME_STMT);
1667 cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
1668 NULL_TREE, NULL_TREE);
1669 gsi_insert_before (gsi, cond_stmt, GSI_SAME_STMT);
1670 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
1671 expand_omp_regimplify_p, NULL, NULL)
1672 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
1673 expand_omp_regimplify_p, NULL, NULL))
1674 {
1675 *gsi = gsi_for_stmt (cond_stmt);
1676 gimple_regimplify_operands (cond_stmt, gsi);
1677 }
1678 e = split_block (entry_bb, cond_stmt);
1679 basic_block &zero_iter_bb
1680 = i < fd->collapse ? zero_iter1_bb : zero_iter2_bb;
1681 int &first_zero_iter
1682 = i < fd->collapse ? first_zero_iter1 : first_zero_iter2;
1683 if (zero_iter_bb == NULL)
1684 {
1685 gassign *assign_stmt;
1686 first_zero_iter = i;
1687 zero_iter_bb = create_empty_bb (entry_bb);
1688 add_bb_to_loop (zero_iter_bb, entry_bb->loop_father);
1689 *gsi = gsi_after_labels (zero_iter_bb);
1690 if (i < fd->collapse)
1691 assign_stmt = gimple_build_assign (fd->loop.n2,
1692 build_zero_cst (type));
1693 else
1694 {
1695 counts[i] = create_tmp_reg (type, ".count");
1696 assign_stmt
1697 = gimple_build_assign (counts[i], build_zero_cst (type));
1698 }
1699 gsi_insert_before (gsi, assign_stmt, GSI_SAME_STMT);
1700 set_immediate_dominator (CDI_DOMINATORS, zero_iter_bb,
1701 entry_bb);
1702 }
1703 ne = make_edge (entry_bb, zero_iter_bb, EDGE_FALSE_VALUE);
1704 ne->probability = REG_BR_PROB_BASE / 2000 - 1;
1705 e->flags = EDGE_TRUE_VALUE;
1706 e->probability = REG_BR_PROB_BASE - ne->probability;
1707 if (l2_dom_bb == NULL)
1708 l2_dom_bb = entry_bb;
1709 entry_bb = e->dest;
1710 *gsi = gsi_last_bb (entry_bb);
1711 }
1712
1713 if (POINTER_TYPE_P (itype))
1714 itype = signed_type_for (itype);
1715 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
1716 ? -1 : 1));
1717 t = fold_build2 (PLUS_EXPR, itype,
1718 fold_convert (itype, fd->loops[i].step), t);
1719 t = fold_build2 (PLUS_EXPR, itype, t,
1720 fold_convert (itype, fd->loops[i].n2));
1721 t = fold_build2 (MINUS_EXPR, itype, t,
1722 fold_convert (itype, fd->loops[i].n1));
1723 /* ?? We could probably use CEIL_DIV_EXPR instead of
1724 TRUNC_DIV_EXPR and adjusting by hand. Unless we can't
1725 generate the same code in the end because generically we
1726 don't know that the values involved must be negative for
1727 GT?? */
1728 if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
1729 t = fold_build2 (TRUNC_DIV_EXPR, itype,
1730 fold_build1 (NEGATE_EXPR, itype, t),
1731 fold_build1 (NEGATE_EXPR, itype,
1732 fold_convert (itype,
1733 fd->loops[i].step)));
1734 else
1735 t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
1736 fold_convert (itype, fd->loops[i].step));
1737 t = fold_convert (type, t);
1738 if (TREE_CODE (t) == INTEGER_CST)
1739 counts[i] = t;
1740 else
1741 {
1742 if (i < fd->collapse || i != first_zero_iter2)
1743 counts[i] = create_tmp_reg (type, ".count");
1744 expand_omp_build_assign (gsi, counts[i], t);
1745 }
1746 if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
1747 {
1748 if (i == 0)
1749 t = counts[0];
1750 else
1751 t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
1752 expand_omp_build_assign (gsi, fd->loop.n2, t);
1753 }
1754 }
1755 }
1756
1757 /* Helper function for expand_omp_{for_*,simd}. Generate code like:
1758 T = V;
1759 V3 = N31 + (T % count3) * STEP3;
1760 T = T / count3;
1761 V2 = N21 + (T % count2) * STEP2;
1762 T = T / count2;
1763 V1 = N11 + T * STEP1;
1764 if this loop doesn't have an inner loop construct combined with it.
1765 If it does have an inner loop construct combined with it and the
1766 iteration count isn't known constant, store values from counts array
1767 into its _looptemp_ temporaries instead. */
1768
1769 static void
1770 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
1771 tree *counts, gimple *inner_stmt, tree startvar)
1772 {
1773 int i;
1774 if (gimple_omp_for_combined_p (fd->for_stmt))
1775 {
1776 /* If fd->loop.n2 is constant, then no propagation of the counts
1777 is needed, they are constant. */
1778 if (TREE_CODE (fd->loop.n2) == INTEGER_CST)
1779 return;
1780
1781 tree clauses = gimple_code (inner_stmt) != GIMPLE_OMP_FOR
1782 ? gimple_omp_taskreg_clauses (inner_stmt)
1783 : gimple_omp_for_clauses (inner_stmt);
1784 /* First two _looptemp_ clauses are for istart/iend, counts[0]
1785 isn't supposed to be handled, as the inner loop doesn't
1786 use it. */
1787 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
1788 gcc_assert (innerc);
1789 for (i = 0; i < fd->collapse; i++)
1790 {
1791 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
1792 OMP_CLAUSE__LOOPTEMP_);
1793 gcc_assert (innerc);
1794 if (i)
1795 {
1796 tree tem = OMP_CLAUSE_DECL (innerc);
1797 tree t = fold_convert (TREE_TYPE (tem), counts[i]);
1798 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1799 false, GSI_CONTINUE_LINKING);
1800 gassign *stmt = gimple_build_assign (tem, t);
1801 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1802 }
1803 }
1804 return;
1805 }
1806
1807 tree type = TREE_TYPE (fd->loop.v);
1808 tree tem = create_tmp_reg (type, ".tem");
1809 gassign *stmt = gimple_build_assign (tem, startvar);
1810 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1811
1812 for (i = fd->collapse - 1; i >= 0; i--)
1813 {
1814 tree vtype = TREE_TYPE (fd->loops[i].v), itype, t;
1815 itype = vtype;
1816 if (POINTER_TYPE_P (vtype))
1817 itype = signed_type_for (vtype);
1818 if (i != 0)
1819 t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
1820 else
1821 t = tem;
1822 t = fold_convert (itype, t);
1823 t = fold_build2 (MULT_EXPR, itype, t,
1824 fold_convert (itype, fd->loops[i].step));
1825 if (POINTER_TYPE_P (vtype))
1826 t = fold_build_pointer_plus (fd->loops[i].n1, t);
1827 else
1828 t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
1829 t = force_gimple_operand_gsi (gsi, t,
1830 DECL_P (fd->loops[i].v)
1831 && TREE_ADDRESSABLE (fd->loops[i].v),
1832 NULL_TREE, false,
1833 GSI_CONTINUE_LINKING);
1834 stmt = gimple_build_assign (fd->loops[i].v, t);
1835 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1836 if (i != 0)
1837 {
1838 t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
1839 t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
1840 false, GSI_CONTINUE_LINKING);
1841 stmt = gimple_build_assign (tem, t);
1842 gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
1843 }
1844 }
1845 }
1846
1847 /* Helper function for expand_omp_for_*. Generate code like:
1848 L10:
1849 V3 += STEP3;
1850 if (V3 cond3 N32) goto BODY_BB; else goto L11;
1851 L11:
1852 V3 = N31;
1853 V2 += STEP2;
1854 if (V2 cond2 N22) goto BODY_BB; else goto L12;
1855 L12:
1856 V2 = N21;
1857 V1 += STEP1;
1858 goto BODY_BB; */
1859
1860 static basic_block
1861 extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
1862 basic_block body_bb)
1863 {
1864 basic_block last_bb, bb, collapse_bb = NULL;
1865 int i;
1866 gimple_stmt_iterator gsi;
1867 edge e;
1868 tree t;
1869 gimple *stmt;
1870
1871 last_bb = cont_bb;
1872 for (i = fd->collapse - 1; i >= 0; i--)
1873 {
1874 tree vtype = TREE_TYPE (fd->loops[i].v);
1875
1876 bb = create_empty_bb (last_bb);
1877 add_bb_to_loop (bb, last_bb->loop_father);
1878 gsi = gsi_start_bb (bb);
1879
1880 if (i < fd->collapse - 1)
1881 {
1882 e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
1883 e->probability = REG_BR_PROB_BASE / 8;
1884
1885 t = fd->loops[i + 1].n1;
1886 t = force_gimple_operand_gsi (&gsi, t,
1887 DECL_P (fd->loops[i + 1].v)
1888 && TREE_ADDRESSABLE (fd->loops[i
1889 + 1].v),
1890 NULL_TREE, false,
1891 GSI_CONTINUE_LINKING);
1892 stmt = gimple_build_assign (fd->loops[i + 1].v, t);
1893 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1894 }
1895 else
1896 collapse_bb = bb;
1897
1898 set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
1899
1900 if (POINTER_TYPE_P (vtype))
1901 t = fold_build_pointer_plus (fd->loops[i].v, fd->loops[i].step);
1902 else
1903 t = fold_build2 (PLUS_EXPR, vtype, fd->loops[i].v, fd->loops[i].step);
1904 t = force_gimple_operand_gsi (&gsi, t,
1905 DECL_P (fd->loops[i].v)
1906 && TREE_ADDRESSABLE (fd->loops[i].v),
1907 NULL_TREE, false, GSI_CONTINUE_LINKING);
1908 stmt = gimple_build_assign (fd->loops[i].v, t);
1909 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1910
1911 if (i > 0)
1912 {
1913 t = fd->loops[i].n2;
1914 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
1915 false, GSI_CONTINUE_LINKING);
1916 tree v = fd->loops[i].v;
1917 if (DECL_P (v) && TREE_ADDRESSABLE (v))
1918 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
1919 false, GSI_CONTINUE_LINKING);
1920 t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
1921 stmt = gimple_build_cond_empty (t);
1922 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
1923 e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
1924 e->probability = REG_BR_PROB_BASE * 7 / 8;
1925 }
1926 else
1927 make_edge (bb, body_bb, EDGE_FALLTHRU);
1928 last_bb = bb;
1929 }
1930
1931 return collapse_bb;
1932 }
1933
1934 /* Expand #pragma omp ordered depend(source). */
1935
1936 static void
1937 expand_omp_ordered_source (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1938 tree *counts, location_t loc)
1939 {
1940 enum built_in_function source_ix
1941 = fd->iter_type == long_integer_type_node
1942 ? BUILT_IN_GOMP_DOACROSS_POST : BUILT_IN_GOMP_DOACROSS_ULL_POST;
1943 gimple *g
1944 = gimple_build_call (builtin_decl_explicit (source_ix), 1,
1945 build_fold_addr_expr (counts[fd->ordered]));
1946 gimple_set_location (g, loc);
1947 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1948 }
1949
1950 /* Expand a single depend from #pragma omp ordered depend(sink:...). */
1951
1952 static void
1953 expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
1954 tree *counts, tree c, location_t loc)
1955 {
1956 auto_vec<tree, 10> args;
1957 enum built_in_function sink_ix
1958 = fd->iter_type == long_integer_type_node
1959 ? BUILT_IN_GOMP_DOACROSS_WAIT : BUILT_IN_GOMP_DOACROSS_ULL_WAIT;
1960 tree t, off, coff = NULL_TREE, deps = OMP_CLAUSE_DECL (c), cond = NULL_TREE;
1961 int i;
1962 gimple_stmt_iterator gsi2 = *gsi;
1963 bool warned_step = false;
1964
1965 for (i = 0; i < fd->ordered; i++)
1966 {
1967 tree step = NULL_TREE;
1968 off = TREE_PURPOSE (deps);
1969 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
1970 {
1971 step = TREE_OPERAND (off, 1);
1972 off = TREE_OPERAND (off, 0);
1973 }
1974 if (!integer_zerop (off))
1975 {
1976 gcc_assert (fd->loops[i].cond_code == LT_EXPR
1977 || fd->loops[i].cond_code == GT_EXPR);
1978 bool forward = fd->loops[i].cond_code == LT_EXPR;
1979 if (step)
1980 {
1981 /* Non-simple Fortran DO loops. If step is variable,
1982 we don't know at compile even the direction, so can't
1983 warn. */
1984 if (TREE_CODE (step) != INTEGER_CST)
1985 break;
1986 forward = tree_int_cst_sgn (step) != -1;
1987 }
1988 if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
1989 warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
1990 "lexically later iteration");
1991 break;
1992 }
1993 deps = TREE_CHAIN (deps);
1994 }
1995 /* If all offsets corresponding to the collapsed loops are zero,
1996 this depend clause can be ignored. FIXME: but there is still a
1997 flush needed. We need to emit one __sync_synchronize () for it
1998 though (perhaps conditionally)? Solve this together with the
1999 conservative dependence folding optimization.
2000 if (i >= fd->collapse)
2001 return; */
2002
2003 deps = OMP_CLAUSE_DECL (c);
2004 gsi_prev (&gsi2);
2005 edge e1 = split_block (gsi_bb (gsi2), gsi_stmt (gsi2));
2006 edge e2 = split_block_after_labels (e1->dest);
2007
2008 gsi2 = gsi_after_labels (e1->dest);
2009 *gsi = gsi_last_bb (e1->src);
2010 for (i = 0; i < fd->ordered; i++)
2011 {
2012 tree itype = TREE_TYPE (fd->loops[i].v);
2013 tree step = NULL_TREE;
2014 tree orig_off = NULL_TREE;
2015 if (POINTER_TYPE_P (itype))
2016 itype = sizetype;
2017 if (i)
2018 deps = TREE_CHAIN (deps);
2019 off = TREE_PURPOSE (deps);
2020 if (TREE_CODE (off) == TRUNC_DIV_EXPR)
2021 {
2022 step = TREE_OPERAND (off, 1);
2023 off = TREE_OPERAND (off, 0);
2024 gcc_assert (fd->loops[i].cond_code == LT_EXPR
2025 && integer_onep (fd->loops[i].step)
2026 && !POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)));
2027 }
2028 tree s = fold_convert_loc (loc, itype, step ? step : fd->loops[i].step);
2029 if (step)
2030 {
2031 off = fold_convert_loc (loc, itype, off);
2032 orig_off = off;
2033 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2034 }
2035
2036 if (integer_zerop (off))
2037 t = boolean_true_node;
2038 else
2039 {
2040 tree a;
2041 tree co = fold_convert_loc (loc, itype, off);
2042 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
2043 {
2044 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2045 co = fold_build1_loc (loc, NEGATE_EXPR, itype, co);
2046 a = fold_build2_loc (loc, POINTER_PLUS_EXPR,
2047 TREE_TYPE (fd->loops[i].v), fd->loops[i].v,
2048 co);
2049 }
2050 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2051 a = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2052 fd->loops[i].v, co);
2053 else
2054 a = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
2055 fd->loops[i].v, co);
2056 if (step)
2057 {
2058 tree t1, t2;
2059 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2060 t1 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2061 fd->loops[i].n1);
2062 else
2063 t1 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2064 fd->loops[i].n2);
2065 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2066 t2 = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2067 fd->loops[i].n2);
2068 else
2069 t2 = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2070 fd->loops[i].n1);
2071 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node,
2072 step, build_int_cst (TREE_TYPE (step), 0));
2073 if (TREE_CODE (step) != INTEGER_CST)
2074 {
2075 t1 = unshare_expr (t1);
2076 t1 = force_gimple_operand_gsi (gsi, t1, true, NULL_TREE,
2077 false, GSI_CONTINUE_LINKING);
2078 t2 = unshare_expr (t2);
2079 t2 = force_gimple_operand_gsi (gsi, t2, true, NULL_TREE,
2080 false, GSI_CONTINUE_LINKING);
2081 }
2082 t = fold_build3_loc (loc, COND_EXPR, boolean_type_node,
2083 t, t2, t1);
2084 }
2085 else if (fd->loops[i].cond_code == LT_EXPR)
2086 {
2087 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2088 t = fold_build2_loc (loc, GE_EXPR, boolean_type_node, a,
2089 fd->loops[i].n1);
2090 else
2091 t = fold_build2_loc (loc, LT_EXPR, boolean_type_node, a,
2092 fd->loops[i].n2);
2093 }
2094 else if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2095 t = fold_build2_loc (loc, GT_EXPR, boolean_type_node, a,
2096 fd->loops[i].n2);
2097 else
2098 t = fold_build2_loc (loc, LE_EXPR, boolean_type_node, a,
2099 fd->loops[i].n1);
2100 }
2101 if (cond)
2102 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node, cond, t);
2103 else
2104 cond = t;
2105
2106 off = fold_convert_loc (loc, itype, off);
2107
2108 if (step
2109 || (fd->loops[i].cond_code == LT_EXPR
2110 ? !integer_onep (fd->loops[i].step)
2111 : !integer_minus_onep (fd->loops[i].step)))
2112 {
2113 if (step == NULL_TREE
2114 && TYPE_UNSIGNED (itype)
2115 && fd->loops[i].cond_code == GT_EXPR)
2116 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype, off,
2117 fold_build1_loc (loc, NEGATE_EXPR, itype,
2118 s));
2119 else
2120 t = fold_build2_loc (loc, TRUNC_MOD_EXPR, itype,
2121 orig_off ? orig_off : off, s);
2122 t = fold_build2_loc (loc, EQ_EXPR, boolean_type_node, t,
2123 build_int_cst (itype, 0));
2124 if (integer_zerop (t) && !warned_step)
2125 {
2126 warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
2127 "in the iteration space");
2128 warned_step = true;
2129 }
2130 cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
2131 cond, t);
2132 }
2133
2134 if (i <= fd->collapse - 1 && fd->collapse > 1)
2135 t = fd->loop.v;
2136 else if (counts[i])
2137 t = counts[i];
2138 else
2139 {
2140 t = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2141 fd->loops[i].v, fd->loops[i].n1);
2142 t = fold_convert_loc (loc, fd->iter_type, t);
2143 }
2144 if (step)
2145 /* We have divided off by step already earlier. */;
2146 else if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
2147 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off,
2148 fold_build1_loc (loc, NEGATE_EXPR, itype,
2149 s));
2150 else
2151 off = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, off, s);
2152 if (OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
2153 off = fold_build1_loc (loc, NEGATE_EXPR, itype, off);
2154 off = fold_convert_loc (loc, fd->iter_type, off);
2155 if (i <= fd->collapse - 1 && fd->collapse > 1)
2156 {
2157 if (i)
2158 off = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, coff,
2159 off);
2160 if (i < fd->collapse - 1)
2161 {
2162 coff = fold_build2_loc (loc, MULT_EXPR, fd->iter_type, off,
2163 counts[i]);
2164 continue;
2165 }
2166 }
2167 off = unshare_expr (off);
2168 t = fold_build2_loc (loc, PLUS_EXPR, fd->iter_type, t, off);
2169 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
2170 true, GSI_SAME_STMT);
2171 args.safe_push (t);
2172 }
2173 gimple *g = gimple_build_call_vec (builtin_decl_explicit (sink_ix), args);
2174 gimple_set_location (g, loc);
2175 gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
2176
2177 cond = unshare_expr (cond);
2178 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, false,
2179 GSI_CONTINUE_LINKING);
2180 gsi_insert_after (gsi, gimple_build_cond_empty (cond), GSI_NEW_STMT);
2181 edge e3 = make_edge (e1->src, e2->dest, EDGE_FALSE_VALUE);
2182 e3->probability = REG_BR_PROB_BASE / 8;
2183 e1->probability = REG_BR_PROB_BASE - e3->probability;
2184 e1->flags = EDGE_TRUE_VALUE;
2185 set_immediate_dominator (CDI_DOMINATORS, e2->dest, e1->src);
2186
2187 *gsi = gsi_after_labels (e2->dest);
2188 }
2189
2190 /* Expand all #pragma omp ordered depend(source) and
2191 #pragma omp ordered depend(sink:...) constructs in the current
2192 #pragma omp for ordered(n) region. */
2193
2194 static void
2195 expand_omp_ordered_source_sink (struct omp_region *region,
2196 struct omp_for_data *fd, tree *counts,
2197 basic_block cont_bb)
2198 {
2199 struct omp_region *inner;
2200 int i;
2201 for (i = fd->collapse - 1; i < fd->ordered; i++)
2202 if (i == fd->collapse - 1 && fd->collapse > 1)
2203 counts[i] = NULL_TREE;
2204 else if (i >= fd->collapse && !cont_bb)
2205 counts[i] = build_zero_cst (fd->iter_type);
2206 else if (!POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v))
2207 && integer_onep (fd->loops[i].step))
2208 counts[i] = NULL_TREE;
2209 else
2210 counts[i] = create_tmp_var (fd->iter_type, ".orditer");
2211 tree atype
2212 = build_array_type_nelts (fd->iter_type, fd->ordered - fd->collapse + 1);
2213 counts[fd->ordered] = create_tmp_var (atype, ".orditera");
2214 TREE_ADDRESSABLE (counts[fd->ordered]) = 1;
2215
2216 for (inner = region->inner; inner; inner = inner->next)
2217 if (inner->type == GIMPLE_OMP_ORDERED)
2218 {
2219 gomp_ordered *ord_stmt = inner->ord_stmt;
2220 gimple_stmt_iterator gsi = gsi_for_stmt (ord_stmt);
2221 location_t loc = gimple_location (ord_stmt);
2222 tree c;
2223 for (c = gimple_omp_ordered_clauses (ord_stmt);
2224 c; c = OMP_CLAUSE_CHAIN (c))
2225 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SOURCE)
2226 break;
2227 if (c)
2228 expand_omp_ordered_source (&gsi, fd, counts, loc);
2229 for (c = gimple_omp_ordered_clauses (ord_stmt);
2230 c; c = OMP_CLAUSE_CHAIN (c))
2231 if (OMP_CLAUSE_DEPEND_KIND (c) == OMP_CLAUSE_DEPEND_SINK)
2232 expand_omp_ordered_sink (&gsi, fd, counts, c, loc);
2233 gsi_remove (&gsi, true);
2234 }
2235 }
2236
2237 /* Wrap the body into fd->ordered - fd->collapse loops that aren't
2238 collapsed. */
2239
2240 static basic_block
2241 expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
2242 basic_block cont_bb, basic_block body_bb,
2243 bool ordered_lastprivate)
2244 {
2245 if (fd->ordered == fd->collapse)
2246 return cont_bb;
2247
2248 if (!cont_bb)
2249 {
2250 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2251 for (int i = fd->collapse; i < fd->ordered; i++)
2252 {
2253 tree type = TREE_TYPE (fd->loops[i].v);
2254 tree n1 = fold_convert (type, fd->loops[i].n1);
2255 expand_omp_build_assign (&gsi, fd->loops[i].v, n1);
2256 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2257 size_int (i - fd->collapse + 1),
2258 NULL_TREE, NULL_TREE);
2259 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2260 }
2261 return NULL;
2262 }
2263
2264 for (int i = fd->ordered - 1; i >= fd->collapse; i--)
2265 {
2266 tree t, type = TREE_TYPE (fd->loops[i].v);
2267 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2268 expand_omp_build_assign (&gsi, fd->loops[i].v,
2269 fold_convert (type, fd->loops[i].n1));
2270 if (counts[i])
2271 expand_omp_build_assign (&gsi, counts[i],
2272 build_zero_cst (fd->iter_type));
2273 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2274 size_int (i - fd->collapse + 1),
2275 NULL_TREE, NULL_TREE);
2276 expand_omp_build_assign (&gsi, aref, build_zero_cst (fd->iter_type));
2277 if (!gsi_end_p (gsi))
2278 gsi_prev (&gsi);
2279 else
2280 gsi = gsi_last_bb (body_bb);
2281 edge e1 = split_block (body_bb, gsi_stmt (gsi));
2282 basic_block new_body = e1->dest;
2283 if (body_bb == cont_bb)
2284 cont_bb = new_body;
2285 edge e2 = NULL;
2286 basic_block new_header;
2287 if (EDGE_COUNT (cont_bb->preds) > 0)
2288 {
2289 gsi = gsi_last_bb (cont_bb);
2290 if (POINTER_TYPE_P (type))
2291 t = fold_build_pointer_plus (fd->loops[i].v,
2292 fold_convert (sizetype,
2293 fd->loops[i].step));
2294 else
2295 t = fold_build2 (PLUS_EXPR, type, fd->loops[i].v,
2296 fold_convert (type, fd->loops[i].step));
2297 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
2298 if (counts[i])
2299 {
2300 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[i],
2301 build_int_cst (fd->iter_type, 1));
2302 expand_omp_build_assign (&gsi, counts[i], t);
2303 t = counts[i];
2304 }
2305 else
2306 {
2307 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[i].v),
2308 fd->loops[i].v, fd->loops[i].n1);
2309 t = fold_convert (fd->iter_type, t);
2310 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2311 true, GSI_SAME_STMT);
2312 }
2313 aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2314 size_int (i - fd->collapse + 1),
2315 NULL_TREE, NULL_TREE);
2316 expand_omp_build_assign (&gsi, aref, t);
2317 gsi_prev (&gsi);
2318 e2 = split_block (cont_bb, gsi_stmt (gsi));
2319 new_header = e2->dest;
2320 }
2321 else
2322 new_header = cont_bb;
2323 gsi = gsi_after_labels (new_header);
2324 tree v = force_gimple_operand_gsi (&gsi, fd->loops[i].v, true, NULL_TREE,
2325 true, GSI_SAME_STMT);
2326 tree n2
2327 = force_gimple_operand_gsi (&gsi, fold_convert (type, fd->loops[i].n2),
2328 true, NULL_TREE, true, GSI_SAME_STMT);
2329 t = build2 (fd->loops[i].cond_code, boolean_type_node, v, n2);
2330 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_NEW_STMT);
2331 edge e3 = split_block (new_header, gsi_stmt (gsi));
2332 cont_bb = e3->dest;
2333 remove_edge (e1);
2334 make_edge (body_bb, new_header, EDGE_FALLTHRU);
2335 e3->flags = EDGE_FALSE_VALUE;
2336 e3->probability = REG_BR_PROB_BASE / 8;
2337 e1 = make_edge (new_header, new_body, EDGE_TRUE_VALUE);
2338 e1->probability = REG_BR_PROB_BASE - e3->probability;
2339
2340 set_immediate_dominator (CDI_DOMINATORS, new_header, body_bb);
2341 set_immediate_dominator (CDI_DOMINATORS, new_body, new_header);
2342
2343 if (e2)
2344 {
2345 struct loop *loop = alloc_loop ();
2346 loop->header = new_header;
2347 loop->latch = e2->src;
2348 add_loop (loop, body_bb->loop_father);
2349 }
2350 }
2351
2352 /* If there are any lastprivate clauses and it is possible some loops
2353 might have zero iterations, ensure all the decls are initialized,
2354 otherwise we could crash evaluating C++ class iterators with lastprivate
2355 clauses. */
2356 bool need_inits = false;
2357 for (int i = fd->collapse; ordered_lastprivate && i < fd->ordered; i++)
2358 if (need_inits)
2359 {
2360 tree type = TREE_TYPE (fd->loops[i].v);
2361 gimple_stmt_iterator gsi = gsi_after_labels (body_bb);
2362 expand_omp_build_assign (&gsi, fd->loops[i].v,
2363 fold_convert (type, fd->loops[i].n1));
2364 }
2365 else
2366 {
2367 tree type = TREE_TYPE (fd->loops[i].v);
2368 tree this_cond = fold_build2 (fd->loops[i].cond_code,
2369 boolean_type_node,
2370 fold_convert (type, fd->loops[i].n1),
2371 fold_convert (type, fd->loops[i].n2));
2372 if (!integer_onep (this_cond))
2373 need_inits = true;
2374 }
2375
2376 return cont_bb;
2377 }
2378
2379 /* A subroutine of expand_omp_for. Generate code for a parallel
2380 loop with any schedule. Given parameters:
2381
2382 for (V = N1; V cond N2; V += STEP) BODY;
2383
2384 where COND is "<" or ">", we generate pseudocode
2385
2386 more = GOMP_loop_foo_start (N1, N2, STEP, CHUNK, &istart0, &iend0);
2387 if (more) goto L0; else goto L3;
2388 L0:
2389 V = istart0;
2390 iend = iend0;
2391 L1:
2392 BODY;
2393 V += STEP;
2394 if (V cond iend) goto L1; else goto L2;
2395 L2:
2396 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2397 L3:
2398
2399 If this is a combined omp parallel loop, instead of the call to
2400 GOMP_loop_foo_start, we call GOMP_loop_foo_next.
2401 If this is gimple_omp_for_combined_p loop, then instead of assigning
2402 V and iend in L0 we assign the first two _looptemp_ clause decls of the
2403 inner GIMPLE_OMP_FOR and V += STEP; and
2404 if (V cond iend) goto L1; else goto L2; are removed.
2405
2406 For collapsed loops, given parameters:
2407 collapse(3)
2408 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
2409 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
2410 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
2411 BODY;
2412
2413 we generate pseudocode
2414
2415 if (__builtin_expect (N32 cond3 N31, 0)) goto Z0;
2416 if (cond3 is <)
2417 adj = STEP3 - 1;
2418 else
2419 adj = STEP3 + 1;
2420 count3 = (adj + N32 - N31) / STEP3;
2421 if (__builtin_expect (N22 cond2 N21, 0)) goto Z0;
2422 if (cond2 is <)
2423 adj = STEP2 - 1;
2424 else
2425 adj = STEP2 + 1;
2426 count2 = (adj + N22 - N21) / STEP2;
2427 if (__builtin_expect (N12 cond1 N11, 0)) goto Z0;
2428 if (cond1 is <)
2429 adj = STEP1 - 1;
2430 else
2431 adj = STEP1 + 1;
2432 count1 = (adj + N12 - N11) / STEP1;
2433 count = count1 * count2 * count3;
2434 goto Z1;
2435 Z0:
2436 count = 0;
2437 Z1:
2438 more = GOMP_loop_foo_start (0, count, 1, CHUNK, &istart0, &iend0);
2439 if (more) goto L0; else goto L3;
2440 L0:
2441 V = istart0;
2442 T = V;
2443 V3 = N31 + (T % count3) * STEP3;
2444 T = T / count3;
2445 V2 = N21 + (T % count2) * STEP2;
2446 T = T / count2;
2447 V1 = N11 + T * STEP1;
2448 iend = iend0;
2449 L1:
2450 BODY;
2451 V += 1;
2452 if (V < iend) goto L10; else goto L2;
2453 L10:
2454 V3 += STEP3;
2455 if (V3 cond3 N32) goto L1; else goto L11;
2456 L11:
2457 V3 = N31;
2458 V2 += STEP2;
2459 if (V2 cond2 N22) goto L1; else goto L12;
2460 L12:
2461 V2 = N21;
2462 V1 += STEP1;
2463 goto L1;
2464 L2:
2465 if (GOMP_loop_foo_next (&istart0, &iend0)) goto L0; else goto L3;
2466 L3:
2467
2468 */
2469
2470 static void
2471 expand_omp_for_generic (struct omp_region *region,
2472 struct omp_for_data *fd,
2473 enum built_in_function start_fn,
2474 enum built_in_function next_fn,
2475 gimple *inner_stmt)
2476 {
2477 tree type, istart0, iend0, iend;
2478 tree t, vmain, vback, bias = NULL_TREE;
2479 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, collapse_bb;
2480 basic_block l2_bb = NULL, l3_bb = NULL;
2481 gimple_stmt_iterator gsi;
2482 gassign *assign_stmt;
2483 bool in_combined_parallel = is_combined_parallel (region);
2484 bool broken_loop = region->cont == NULL;
2485 edge e, ne;
2486 tree *counts = NULL;
2487 int i;
2488 bool ordered_lastprivate = false;
2489
2490 gcc_assert (!broken_loop || !in_combined_parallel);
2491 gcc_assert (fd->iter_type == long_integer_type_node
2492 || !in_combined_parallel);
2493
2494 entry_bb = region->entry;
2495 cont_bb = region->cont;
2496 collapse_bb = NULL;
2497 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
2498 gcc_assert (broken_loop
2499 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
2500 l0_bb = split_edge (FALLTHRU_EDGE (entry_bb));
2501 l1_bb = single_succ (l0_bb);
2502 if (!broken_loop)
2503 {
2504 l2_bb = create_empty_bb (cont_bb);
2505 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l1_bb
2506 || (single_succ_edge (BRANCH_EDGE (cont_bb)->dest)->dest
2507 == l1_bb));
2508 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
2509 }
2510 else
2511 l2_bb = NULL;
2512 l3_bb = BRANCH_EDGE (entry_bb)->dest;
2513 exit_bb = region->exit;
2514
2515 gsi = gsi_last_bb (entry_bb);
2516
2517 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
2518 if (fd->ordered
2519 && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
2520 OMP_CLAUSE_LASTPRIVATE))
2521 ordered_lastprivate = false;
2522 if (fd->collapse > 1 || fd->ordered)
2523 {
2524 int first_zero_iter1 = -1, first_zero_iter2 = -1;
2525 basic_block zero_iter1_bb = NULL, zero_iter2_bb = NULL, l2_dom_bb = NULL;
2526
2527 counts = XALLOCAVEC (tree, fd->ordered ? fd->ordered + 1 : fd->collapse);
2528 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
2529 zero_iter1_bb, first_zero_iter1,
2530 zero_iter2_bb, first_zero_iter2, l2_dom_bb);
2531
2532 if (zero_iter1_bb)
2533 {
2534 /* Some counts[i] vars might be uninitialized if
2535 some loop has zero iterations. But the body shouldn't
2536 be executed in that case, so just avoid uninit warnings. */
2537 for (i = first_zero_iter1;
2538 i < (fd->ordered ? fd->ordered : fd->collapse); i++)
2539 if (SSA_VAR_P (counts[i]))
2540 TREE_NO_WARNING (counts[i]) = 1;
2541 gsi_prev (&gsi);
2542 e = split_block (entry_bb, gsi_stmt (gsi));
2543 entry_bb = e->dest;
2544 make_edge (zero_iter1_bb, entry_bb, EDGE_FALLTHRU);
2545 gsi = gsi_last_bb (entry_bb);
2546 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2547 get_immediate_dominator (CDI_DOMINATORS,
2548 zero_iter1_bb));
2549 }
2550 if (zero_iter2_bb)
2551 {
2552 /* Some counts[i] vars might be uninitialized if
2553 some loop has zero iterations. But the body shouldn't
2554 be executed in that case, so just avoid uninit warnings. */
2555 for (i = first_zero_iter2; i < fd->ordered; i++)
2556 if (SSA_VAR_P (counts[i]))
2557 TREE_NO_WARNING (counts[i]) = 1;
2558 if (zero_iter1_bb)
2559 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2560 else
2561 {
2562 gsi_prev (&gsi);
2563 e = split_block (entry_bb, gsi_stmt (gsi));
2564 entry_bb = e->dest;
2565 make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
2566 gsi = gsi_last_bb (entry_bb);
2567 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
2568 get_immediate_dominator
2569 (CDI_DOMINATORS, zero_iter2_bb));
2570 }
2571 }
2572 if (fd->collapse == 1)
2573 {
2574 counts[0] = fd->loop.n2;
2575 fd->loop = fd->loops[0];
2576 }
2577 }
2578
2579 type = TREE_TYPE (fd->loop.v);
2580 istart0 = create_tmp_var (fd->iter_type, ".istart0");
2581 iend0 = create_tmp_var (fd->iter_type, ".iend0");
2582 TREE_ADDRESSABLE (istart0) = 1;
2583 TREE_ADDRESSABLE (iend0) = 1;
2584
2585 /* See if we need to bias by LLONG_MIN. */
2586 if (fd->iter_type == long_long_unsigned_type_node
2587 && TREE_CODE (type) == INTEGER_TYPE
2588 && !TYPE_UNSIGNED (type)
2589 && fd->ordered == 0)
2590 {
2591 tree n1, n2;
2592
2593 if (fd->loop.cond_code == LT_EXPR)
2594 {
2595 n1 = fd->loop.n1;
2596 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
2597 }
2598 else
2599 {
2600 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
2601 n2 = fd->loop.n1;
2602 }
2603 if (TREE_CODE (n1) != INTEGER_CST
2604 || TREE_CODE (n2) != INTEGER_CST
2605 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
2606 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
2607 }
2608
2609 gimple_stmt_iterator gsif = gsi;
2610 gsi_prev (&gsif);
2611
2612 tree arr = NULL_TREE;
2613 if (in_combined_parallel)
2614 {
2615 gcc_assert (fd->ordered == 0);
2616 /* In a combined parallel loop, emit a call to
2617 GOMP_loop_foo_next. */
2618 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
2619 build_fold_addr_expr (istart0),
2620 build_fold_addr_expr (iend0));
2621 }
2622 else
2623 {
2624 tree t0, t1, t2, t3, t4;
2625 /* If this is not a combined parallel loop, emit a call to
2626 GOMP_loop_foo_start in ENTRY_BB. */
2627 t4 = build_fold_addr_expr (iend0);
2628 t3 = build_fold_addr_expr (istart0);
2629 if (fd->ordered)
2630 {
2631 t0 = build_int_cst (unsigned_type_node,
2632 fd->ordered - fd->collapse + 1);
2633 arr = create_tmp_var (build_array_type_nelts (fd->iter_type,
2634 fd->ordered
2635 - fd->collapse + 1),
2636 ".omp_counts");
2637 DECL_NAMELESS (arr) = 1;
2638 TREE_ADDRESSABLE (arr) = 1;
2639 TREE_STATIC (arr) = 1;
2640 vec<constructor_elt, va_gc> *v;
2641 vec_alloc (v, fd->ordered - fd->collapse + 1);
2642 int idx;
2643
2644 for (idx = 0; idx < fd->ordered - fd->collapse + 1; idx++)
2645 {
2646 tree c;
2647 if (idx == 0 && fd->collapse > 1)
2648 c = fd->loop.n2;
2649 else
2650 c = counts[idx + fd->collapse - 1];
2651 tree purpose = size_int (idx);
2652 CONSTRUCTOR_APPEND_ELT (v, purpose, c);
2653 if (TREE_CODE (c) != INTEGER_CST)
2654 TREE_STATIC (arr) = 0;
2655 }
2656
2657 DECL_INITIAL (arr) = build_constructor (TREE_TYPE (arr), v);
2658 if (!TREE_STATIC (arr))
2659 force_gimple_operand_gsi (&gsi, build1 (DECL_EXPR,
2660 void_type_node, arr),
2661 true, NULL_TREE, true, GSI_SAME_STMT);
2662 t1 = build_fold_addr_expr (arr);
2663 t2 = NULL_TREE;
2664 }
2665 else
2666 {
2667 t2 = fold_convert (fd->iter_type, fd->loop.step);
2668 t1 = fd->loop.n2;
2669 t0 = fd->loop.n1;
2670 if (gimple_omp_for_combined_into_p (fd->for_stmt))
2671 {
2672 tree innerc
2673 = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
2674 OMP_CLAUSE__LOOPTEMP_);
2675 gcc_assert (innerc);
2676 t0 = OMP_CLAUSE_DECL (innerc);
2677 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2678 OMP_CLAUSE__LOOPTEMP_);
2679 gcc_assert (innerc);
2680 t1 = OMP_CLAUSE_DECL (innerc);
2681 }
2682 if (POINTER_TYPE_P (TREE_TYPE (t0))
2683 && TYPE_PRECISION (TREE_TYPE (t0))
2684 != TYPE_PRECISION (fd->iter_type))
2685 {
2686 /* Avoid casting pointers to integer of a different size. */
2687 tree itype = signed_type_for (type);
2688 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
2689 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
2690 }
2691 else
2692 {
2693 t1 = fold_convert (fd->iter_type, t1);
2694 t0 = fold_convert (fd->iter_type, t0);
2695 }
2696 if (bias)
2697 {
2698 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
2699 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
2700 }
2701 }
2702 if (fd->iter_type == long_integer_type_node || fd->ordered)
2703 {
2704 if (fd->chunk_size)
2705 {
2706 t = fold_convert (fd->iter_type, fd->chunk_size);
2707 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2708 if (fd->ordered)
2709 t = build_call_expr (builtin_decl_explicit (start_fn),
2710 5, t0, t1, t, t3, t4);
2711 else
2712 t = build_call_expr (builtin_decl_explicit (start_fn),
2713 6, t0, t1, t2, t, t3, t4);
2714 }
2715 else if (fd->ordered)
2716 t = build_call_expr (builtin_decl_explicit (start_fn),
2717 4, t0, t1, t3, t4);
2718 else
2719 t = build_call_expr (builtin_decl_explicit (start_fn),
2720 5, t0, t1, t2, t3, t4);
2721 }
2722 else
2723 {
2724 tree t5;
2725 tree c_bool_type;
2726 tree bfn_decl;
2727
2728 /* The GOMP_loop_ull_*start functions have additional boolean
2729 argument, true for < loops and false for > loops.
2730 In Fortran, the C bool type can be different from
2731 boolean_type_node. */
2732 bfn_decl = builtin_decl_explicit (start_fn);
2733 c_bool_type = TREE_TYPE (TREE_TYPE (bfn_decl));
2734 t5 = build_int_cst (c_bool_type,
2735 fd->loop.cond_code == LT_EXPR ? 1 : 0);
2736 if (fd->chunk_size)
2737 {
2738 tree bfn_decl = builtin_decl_explicit (start_fn);
2739 t = fold_convert (fd->iter_type, fd->chunk_size);
2740 t = omp_adjust_chunk_size (t, fd->simd_schedule);
2741 t = build_call_expr (bfn_decl, 7, t5, t0, t1, t2, t, t3, t4);
2742 }
2743 else
2744 t = build_call_expr (builtin_decl_explicit (start_fn),
2745 6, t5, t0, t1, t2, t3, t4);
2746 }
2747 }
2748 if (TREE_TYPE (t) != boolean_type_node)
2749 t = fold_build2 (NE_EXPR, boolean_type_node,
2750 t, build_int_cst (TREE_TYPE (t), 0));
2751 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2752 true, GSI_SAME_STMT);
2753 if (arr && !TREE_STATIC (arr))
2754 {
2755 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
2756 TREE_THIS_VOLATILE (clobber) = 1;
2757 gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
2758 GSI_SAME_STMT);
2759 }
2760 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
2761
2762 /* Remove the GIMPLE_OMP_FOR statement. */
2763 gsi_remove (&gsi, true);
2764
2765 if (gsi_end_p (gsif))
2766 gsif = gsi_after_labels (gsi_bb (gsif));
2767 gsi_next (&gsif);
2768
2769 /* Iteration setup for sequential loop goes in L0_BB. */
2770 tree startvar = fd->loop.v;
2771 tree endvar = NULL_TREE;
2772
2773 if (gimple_omp_for_combined_p (fd->for_stmt))
2774 {
2775 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_FOR
2776 && gimple_omp_for_kind (inner_stmt)
2777 == GF_OMP_FOR_KIND_SIMD);
2778 tree innerc = omp_find_clause (gimple_omp_for_clauses (inner_stmt),
2779 OMP_CLAUSE__LOOPTEMP_);
2780 gcc_assert (innerc);
2781 startvar = OMP_CLAUSE_DECL (innerc);
2782 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
2783 OMP_CLAUSE__LOOPTEMP_);
2784 gcc_assert (innerc);
2785 endvar = OMP_CLAUSE_DECL (innerc);
2786 }
2787
2788 gsi = gsi_start_bb (l0_bb);
2789 t = istart0;
2790 if (fd->ordered && fd->collapse == 1)
2791 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2792 fold_convert (fd->iter_type, fd->loop.step));
2793 else if (bias)
2794 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2795 if (fd->ordered && fd->collapse == 1)
2796 {
2797 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2798 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2799 fd->loop.n1, fold_convert (sizetype, t));
2800 else
2801 {
2802 t = fold_convert (TREE_TYPE (startvar), t);
2803 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2804 fd->loop.n1, t);
2805 }
2806 }
2807 else
2808 {
2809 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2810 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2811 t = fold_convert (TREE_TYPE (startvar), t);
2812 }
2813 t = force_gimple_operand_gsi (&gsi, t,
2814 DECL_P (startvar)
2815 && TREE_ADDRESSABLE (startvar),
2816 NULL_TREE, false, GSI_CONTINUE_LINKING);
2817 assign_stmt = gimple_build_assign (startvar, t);
2818 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2819
2820 t = iend0;
2821 if (fd->ordered && fd->collapse == 1)
2822 t = fold_build2 (MULT_EXPR, fd->iter_type, t,
2823 fold_convert (fd->iter_type, fd->loop.step));
2824 else if (bias)
2825 t = fold_build2 (MINUS_EXPR, fd->iter_type, t, bias);
2826 if (fd->ordered && fd->collapse == 1)
2827 {
2828 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2829 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (startvar),
2830 fd->loop.n1, fold_convert (sizetype, t));
2831 else
2832 {
2833 t = fold_convert (TREE_TYPE (startvar), t);
2834 t = fold_build2 (PLUS_EXPR, TREE_TYPE (startvar),
2835 fd->loop.n1, t);
2836 }
2837 }
2838 else
2839 {
2840 if (POINTER_TYPE_P (TREE_TYPE (startvar)))
2841 t = fold_convert (signed_type_for (TREE_TYPE (startvar)), t);
2842 t = fold_convert (TREE_TYPE (startvar), t);
2843 }
2844 iend = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2845 false, GSI_CONTINUE_LINKING);
2846 if (endvar)
2847 {
2848 assign_stmt = gimple_build_assign (endvar, iend);
2849 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2850 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (iend)))
2851 assign_stmt = gimple_build_assign (fd->loop.v, iend);
2852 else
2853 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, iend);
2854 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2855 }
2856 /* Handle linear clause adjustments. */
2857 tree itercnt = NULL_TREE;
2858 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
2859 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
2860 c; c = OMP_CLAUSE_CHAIN (c))
2861 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
2862 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
2863 {
2864 tree d = OMP_CLAUSE_DECL (c);
2865 bool is_ref = omp_is_reference (d);
2866 tree t = d, a, dest;
2867 if (is_ref)
2868 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
2869 tree type = TREE_TYPE (t);
2870 if (POINTER_TYPE_P (type))
2871 type = sizetype;
2872 dest = unshare_expr (t);
2873 tree v = create_tmp_var (TREE_TYPE (t), NULL);
2874 expand_omp_build_assign (&gsif, v, t);
2875 if (itercnt == NULL_TREE)
2876 {
2877 itercnt = startvar;
2878 tree n1 = fd->loop.n1;
2879 if (POINTER_TYPE_P (TREE_TYPE (itercnt)))
2880 {
2881 itercnt
2882 = fold_convert (signed_type_for (TREE_TYPE (itercnt)),
2883 itercnt);
2884 n1 = fold_convert (TREE_TYPE (itercnt), n1);
2885 }
2886 itercnt = fold_build2 (MINUS_EXPR, TREE_TYPE (itercnt),
2887 itercnt, n1);
2888 itercnt = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (itercnt),
2889 itercnt, fd->loop.step);
2890 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
2891 NULL_TREE, false,
2892 GSI_CONTINUE_LINKING);
2893 }
2894 a = fold_build2 (MULT_EXPR, type,
2895 fold_convert (type, itercnt),
2896 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
2897 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
2898 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
2899 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2900 false, GSI_CONTINUE_LINKING);
2901 assign_stmt = gimple_build_assign (dest, t);
2902 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
2903 }
2904 if (fd->collapse > 1)
2905 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
2906
2907 if (fd->ordered)
2908 {
2909 /* Until now, counts array contained number of iterations or
2910 variable containing it for ith loop. From now on, we need
2911 those counts only for collapsed loops, and only for the 2nd
2912 till the last collapsed one. Move those one element earlier,
2913 we'll use counts[fd->collapse - 1] for the first source/sink
2914 iteration counter and so on and counts[fd->ordered]
2915 as the array holding the current counter values for
2916 depend(source). */
2917 if (fd->collapse > 1)
2918 memmove (counts, counts + 1, (fd->collapse - 1) * sizeof (counts[0]));
2919 if (broken_loop)
2920 {
2921 int i;
2922 for (i = fd->collapse; i < fd->ordered; i++)
2923 {
2924 tree type = TREE_TYPE (fd->loops[i].v);
2925 tree this_cond
2926 = fold_build2 (fd->loops[i].cond_code, boolean_type_node,
2927 fold_convert (type, fd->loops[i].n1),
2928 fold_convert (type, fd->loops[i].n2));
2929 if (!integer_onep (this_cond))
2930 break;
2931 }
2932 if (i < fd->ordered)
2933 {
2934 cont_bb
2935 = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
2936 add_bb_to_loop (cont_bb, l1_bb->loop_father);
2937 gimple_stmt_iterator gsi = gsi_after_labels (cont_bb);
2938 gimple *g = gimple_build_omp_continue (fd->loop.v, fd->loop.v);
2939 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2940 make_edge (cont_bb, l3_bb, EDGE_FALLTHRU);
2941 make_edge (cont_bb, l1_bb, 0);
2942 l2_bb = create_empty_bb (cont_bb);
2943 broken_loop = false;
2944 }
2945 }
2946 expand_omp_ordered_source_sink (region, fd, counts, cont_bb);
2947 cont_bb = expand_omp_for_ordered_loops (fd, counts, cont_bb, l1_bb,
2948 ordered_lastprivate);
2949 if (counts[fd->collapse - 1])
2950 {
2951 gcc_assert (fd->collapse == 1);
2952 gsi = gsi_last_bb (l0_bb);
2953 expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
2954 istart0, true);
2955 gsi = gsi_last_bb (cont_bb);
2956 t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
2957 build_int_cst (fd->iter_type, 1));
2958 expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
2959 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2960 size_zero_node, NULL_TREE, NULL_TREE);
2961 expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
2962 t = counts[fd->collapse - 1];
2963 }
2964 else if (fd->collapse > 1)
2965 t = fd->loop.v;
2966 else
2967 {
2968 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
2969 fd->loops[0].v, fd->loops[0].n1);
2970 t = fold_convert (fd->iter_type, t);
2971 }
2972 gsi = gsi_last_bb (l0_bb);
2973 tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
2974 size_zero_node, NULL_TREE, NULL_TREE);
2975 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
2976 false, GSI_CONTINUE_LINKING);
2977 expand_omp_build_assign (&gsi, aref, t, true);
2978 }
2979
2980 if (!broken_loop)
2981 {
2982 /* Code to control the increment and predicate for the sequential
2983 loop goes in the CONT_BB. */
2984 gsi = gsi_last_bb (cont_bb);
2985 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
2986 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
2987 vmain = gimple_omp_continue_control_use (cont_stmt);
2988 vback = gimple_omp_continue_control_def (cont_stmt);
2989
2990 if (!gimple_omp_for_combined_p (fd->for_stmt))
2991 {
2992 if (POINTER_TYPE_P (type))
2993 t = fold_build_pointer_plus (vmain, fd->loop.step);
2994 else
2995 t = fold_build2 (PLUS_EXPR, type, vmain, fd->loop.step);
2996 t = force_gimple_operand_gsi (&gsi, t,
2997 DECL_P (vback)
2998 && TREE_ADDRESSABLE (vback),
2999 NULL_TREE, true, GSI_SAME_STMT);
3000 assign_stmt = gimple_build_assign (vback, t);
3001 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3002
3003 if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
3004 {
3005 if (fd->collapse > 1)
3006 t = fd->loop.v;
3007 else
3008 {
3009 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
3010 fd->loops[0].v, fd->loops[0].n1);
3011 t = fold_convert (fd->iter_type, t);
3012 }
3013 tree aref = build4 (ARRAY_REF, fd->iter_type,
3014 counts[fd->ordered], size_zero_node,
3015 NULL_TREE, NULL_TREE);
3016 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3017 true, GSI_SAME_STMT);
3018 expand_omp_build_assign (&gsi, aref, t);
3019 }
3020
3021 t = build2 (fd->loop.cond_code, boolean_type_node,
3022 DECL_P (vback) && TREE_ADDRESSABLE (vback) ? t : vback,
3023 iend);
3024 gcond *cond_stmt = gimple_build_cond_empty (t);
3025 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3026 }
3027
3028 /* Remove GIMPLE_OMP_CONTINUE. */
3029 gsi_remove (&gsi, true);
3030
3031 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3032 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
3033
3034 /* Emit code to get the next parallel iteration in L2_BB. */
3035 gsi = gsi_start_bb (l2_bb);
3036
3037 t = build_call_expr (builtin_decl_explicit (next_fn), 2,
3038 build_fold_addr_expr (istart0),
3039 build_fold_addr_expr (iend0));
3040 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3041 false, GSI_CONTINUE_LINKING);
3042 if (TREE_TYPE (t) != boolean_type_node)
3043 t = fold_build2 (NE_EXPR, boolean_type_node,
3044 t, build_int_cst (TREE_TYPE (t), 0));
3045 gcond *cond_stmt = gimple_build_cond_empty (t);
3046 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
3047 }
3048
3049 /* Add the loop cleanup function. */
3050 gsi = gsi_last_bb (exit_bb);
3051 if (gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3052 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
3053 else if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3054 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_CANCEL);
3055 else
3056 t = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END);
3057 gcall *call_stmt = gimple_build_call (t, 0);
3058 if (gimple_omp_return_lhs (gsi_stmt (gsi)))
3059 gimple_call_set_lhs (call_stmt, gimple_omp_return_lhs (gsi_stmt (gsi)));
3060 gsi_insert_after (&gsi, call_stmt, GSI_SAME_STMT);
3061 if (fd->ordered)
3062 {
3063 tree arr = counts[fd->ordered];
3064 tree clobber = build_constructor (TREE_TYPE (arr), NULL);
3065 TREE_THIS_VOLATILE (clobber) = 1;
3066 gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
3067 GSI_SAME_STMT);
3068 }
3069 gsi_remove (&gsi, true);
3070
3071 /* Connect the new blocks. */
3072 find_edge (entry_bb, l0_bb)->flags = EDGE_TRUE_VALUE;
3073 find_edge (entry_bb, l3_bb)->flags = EDGE_FALSE_VALUE;
3074
3075 if (!broken_loop)
3076 {
3077 gimple_seq phis;
3078
3079 e = find_edge (cont_bb, l3_bb);
3080 ne = make_edge (l2_bb, l3_bb, EDGE_FALSE_VALUE);
3081
3082 phis = phi_nodes (l3_bb);
3083 for (gsi = gsi_start (phis); !gsi_end_p (gsi); gsi_next (&gsi))
3084 {
3085 gimple *phi = gsi_stmt (gsi);
3086 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, ne),
3087 PHI_ARG_DEF_FROM_EDGE (phi, e));
3088 }
3089 remove_edge (e);
3090
3091 make_edge (cont_bb, l2_bb, EDGE_FALSE_VALUE);
3092 e = find_edge (cont_bb, l1_bb);
3093 if (e == NULL)
3094 {
3095 e = BRANCH_EDGE (cont_bb);
3096 gcc_assert (single_succ (e->dest) == l1_bb);
3097 }
3098 if (gimple_omp_for_combined_p (fd->for_stmt))
3099 {
3100 remove_edge (e);
3101 e = NULL;
3102 }
3103 else if (fd->collapse > 1)
3104 {
3105 remove_edge (e);
3106 e = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3107 }
3108 else
3109 e->flags = EDGE_TRUE_VALUE;
3110 if (e)
3111 {
3112 e->probability = REG_BR_PROB_BASE * 7 / 8;
3113 find_edge (cont_bb, l2_bb)->probability = REG_BR_PROB_BASE / 8;
3114 }
3115 else
3116 {
3117 e = find_edge (cont_bb, l2_bb);
3118 e->flags = EDGE_FALLTHRU;
3119 }
3120 make_edge (l2_bb, l0_bb, EDGE_TRUE_VALUE);
3121
3122 if (gimple_in_ssa_p (cfun))
3123 {
3124 /* Add phis to the outer loop that connect to the phis in the inner,
3125 original loop, and move the loop entry value of the inner phi to
3126 the loop entry value of the outer phi. */
3127 gphi_iterator psi;
3128 for (psi = gsi_start_phis (l3_bb); !gsi_end_p (psi); gsi_next (&psi))
3129 {
3130 source_location locus;
3131 gphi *nphi;
3132 gphi *exit_phi = psi.phi ();
3133
3134 edge l2_to_l3 = find_edge (l2_bb, l3_bb);
3135 tree exit_res = PHI_ARG_DEF_FROM_EDGE (exit_phi, l2_to_l3);
3136
3137 basic_block latch = BRANCH_EDGE (cont_bb)->dest;
3138 edge latch_to_l1 = find_edge (latch, l1_bb);
3139 gphi *inner_phi
3140 = find_phi_with_arg_on_edge (exit_res, latch_to_l1);
3141
3142 tree t = gimple_phi_result (exit_phi);
3143 tree new_res = copy_ssa_name (t, NULL);
3144 nphi = create_phi_node (new_res, l0_bb);
3145
3146 edge l0_to_l1 = find_edge (l0_bb, l1_bb);
3147 t = PHI_ARG_DEF_FROM_EDGE (inner_phi, l0_to_l1);
3148 locus = gimple_phi_arg_location_from_edge (inner_phi, l0_to_l1);
3149 edge entry_to_l0 = find_edge (entry_bb, l0_bb);
3150 add_phi_arg (nphi, t, entry_to_l0, locus);
3151
3152 edge l2_to_l0 = find_edge (l2_bb, l0_bb);
3153 add_phi_arg (nphi, exit_res, l2_to_l0, UNKNOWN_LOCATION);
3154
3155 add_phi_arg (inner_phi, new_res, l0_to_l1, UNKNOWN_LOCATION);
3156 };
3157 }
3158
3159 set_immediate_dominator (CDI_DOMINATORS, l2_bb,
3160 recompute_dominator (CDI_DOMINATORS, l2_bb));
3161 set_immediate_dominator (CDI_DOMINATORS, l3_bb,
3162 recompute_dominator (CDI_DOMINATORS, l3_bb));
3163 set_immediate_dominator (CDI_DOMINATORS, l0_bb,
3164 recompute_dominator (CDI_DOMINATORS, l0_bb));
3165 set_immediate_dominator (CDI_DOMINATORS, l1_bb,
3166 recompute_dominator (CDI_DOMINATORS, l1_bb));
3167
3168 /* We enter expand_omp_for_generic with a loop. This original loop may
3169 have its own loop struct, or it may be part of an outer loop struct
3170 (which may be the fake loop). */
3171 struct loop *outer_loop = entry_bb->loop_father;
3172 bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
3173
3174 add_bb_to_loop (l2_bb, outer_loop);
3175
3176 /* We've added a new loop around the original loop. Allocate the
3177 corresponding loop struct. */
3178 struct loop *new_loop = alloc_loop ();
3179 new_loop->header = l0_bb;
3180 new_loop->latch = l2_bb;
3181 add_loop (new_loop, outer_loop);
3182
3183 /* Allocate a loop structure for the original loop unless we already
3184 had one. */
3185 if (!orig_loop_has_loop_struct
3186 && !gimple_omp_for_combined_p (fd->for_stmt))
3187 {
3188 struct loop *orig_loop = alloc_loop ();
3189 orig_loop->header = l1_bb;
3190 /* The loop may have multiple latches. */
3191 add_loop (orig_loop, new_loop);
3192 }
3193 }
3194 }
3195
3196 /* A subroutine of expand_omp_for. Generate code for a parallel
3197 loop with static schedule and no specified chunk size. Given
3198 parameters:
3199
3200 for (V = N1; V cond N2; V += STEP) BODY;
3201
3202 where COND is "<" or ">", we generate pseudocode
3203
3204 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3205 if (cond is <)
3206 adj = STEP - 1;
3207 else
3208 adj = STEP + 1;
3209 if ((__typeof (V)) -1 > 0 && cond is >)
3210 n = -(adj + N2 - N1) / -STEP;
3211 else
3212 n = (adj + N2 - N1) / STEP;
3213 q = n / nthreads;
3214 tt = n % nthreads;
3215 if (threadid < tt) goto L3; else goto L4;
3216 L3:
3217 tt = 0;
3218 q = q + 1;
3219 L4:
3220 s0 = q * threadid + tt;
3221 e0 = s0 + q;
3222 V = s0 * STEP + N1;
3223 if (s0 >= e0) goto L2; else goto L0;
3224 L0:
3225 e = e0 * STEP + N1;
3226 L1:
3227 BODY;
3228 V += STEP;
3229 if (V cond e) goto L1;
3230 L2:
3231 */
3232
3233 static void
3234 expand_omp_for_static_nochunk (struct omp_region *region,
3235 struct omp_for_data *fd,
3236 gimple *inner_stmt)
3237 {
3238 tree n, q, s0, e0, e, t, tt, nthreads, threadid;
3239 tree type, itype, vmain, vback;
3240 basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
3241 basic_block body_bb, cont_bb, collapse_bb = NULL;
3242 basic_block fin_bb;
3243 gimple_stmt_iterator gsi;
3244 edge ep;
3245 bool broken_loop = region->cont == NULL;
3246 tree *counts = NULL;
3247 tree n1, n2, step;
3248
3249 itype = type = TREE_TYPE (fd->loop.v);
3250 if (POINTER_TYPE_P (type))
3251 itype = signed_type_for (type);
3252
3253 entry_bb = region->entry;
3254 cont_bb = region->cont;
3255 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
3256 fin_bb = BRANCH_EDGE (entry_bb)->dest;
3257 gcc_assert (broken_loop
3258 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
3259 seq_start_bb = split_edge (FALLTHRU_EDGE (entry_bb));
3260 body_bb = single_succ (seq_start_bb);
3261 if (!broken_loop)
3262 {
3263 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3264 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3265 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3266 }
3267 exit_bb = region->exit;
3268
3269 /* Iteration space partitioning goes in ENTRY_BB. */
3270 gsi = gsi_last_bb (entry_bb);
3271 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3272
3273 if (fd->collapse > 1)
3274 {
3275 int first_zero_iter = -1, dummy = -1;
3276 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3277
3278 counts = XALLOCAVEC (tree, fd->collapse);
3279 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3280 fin_bb, first_zero_iter,
3281 dummy_bb, dummy, l2_dom_bb);
3282 t = NULL_TREE;
3283 }
3284 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3285 t = integer_one_node;
3286 else
3287 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3288 fold_convert (type, fd->loop.n1),
3289 fold_convert (type, fd->loop.n2));
3290 if (fd->collapse == 1
3291 && TYPE_UNSIGNED (type)
3292 && (t == NULL_TREE || !integer_onep (t)))
3293 {
3294 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3295 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3296 true, GSI_SAME_STMT);
3297 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3298 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3299 true, GSI_SAME_STMT);
3300 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3301 NULL_TREE, NULL_TREE);
3302 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3303 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3304 expand_omp_regimplify_p, NULL, NULL)
3305 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3306 expand_omp_regimplify_p, NULL, NULL))
3307 {
3308 gsi = gsi_for_stmt (cond_stmt);
3309 gimple_regimplify_operands (cond_stmt, &gsi);
3310 }
3311 ep = split_block (entry_bb, cond_stmt);
3312 ep->flags = EDGE_TRUE_VALUE;
3313 entry_bb = ep->dest;
3314 ep->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3315 ep = make_edge (ep->src, fin_bb, EDGE_FALSE_VALUE);
3316 ep->probability = REG_BR_PROB_BASE / 2000 - 1;
3317 if (gimple_in_ssa_p (cfun))
3318 {
3319 int dest_idx = find_edge (entry_bb, fin_bb)->dest_idx;
3320 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3321 !gsi_end_p (gpi); gsi_next (&gpi))
3322 {
3323 gphi *phi = gpi.phi ();
3324 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3325 ep, UNKNOWN_LOCATION);
3326 }
3327 }
3328 gsi = gsi_last_bb (entry_bb);
3329 }
3330
3331 switch (gimple_omp_for_kind (fd->for_stmt))
3332 {
3333 case GF_OMP_FOR_KIND_FOR:
3334 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3335 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3336 break;
3337 case GF_OMP_FOR_KIND_DISTRIBUTE:
3338 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3339 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3340 break;
3341 default:
3342 gcc_unreachable ();
3343 }
3344 nthreads = build_call_expr (nthreads, 0);
3345 nthreads = fold_convert (itype, nthreads);
3346 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3347 true, GSI_SAME_STMT);
3348 threadid = build_call_expr (threadid, 0);
3349 threadid = fold_convert (itype, threadid);
3350 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3351 true, GSI_SAME_STMT);
3352
3353 n1 = fd->loop.n1;
3354 n2 = fd->loop.n2;
3355 step = fd->loop.step;
3356 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3357 {
3358 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3359 OMP_CLAUSE__LOOPTEMP_);
3360 gcc_assert (innerc);
3361 n1 = OMP_CLAUSE_DECL (innerc);
3362 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3363 OMP_CLAUSE__LOOPTEMP_);
3364 gcc_assert (innerc);
3365 n2 = OMP_CLAUSE_DECL (innerc);
3366 }
3367 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3368 true, NULL_TREE, true, GSI_SAME_STMT);
3369 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3370 true, NULL_TREE, true, GSI_SAME_STMT);
3371 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3372 true, NULL_TREE, true, GSI_SAME_STMT);
3373
3374 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3375 t = fold_build2 (PLUS_EXPR, itype, step, t);
3376 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3377 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3378 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3379 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3380 fold_build1 (NEGATE_EXPR, itype, t),
3381 fold_build1 (NEGATE_EXPR, itype, step));
3382 else
3383 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3384 t = fold_convert (itype, t);
3385 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3386
3387 q = create_tmp_reg (itype, "q");
3388 t = fold_build2 (TRUNC_DIV_EXPR, itype, n, nthreads);
3389 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3390 gsi_insert_before (&gsi, gimple_build_assign (q, t), GSI_SAME_STMT);
3391
3392 tt = create_tmp_reg (itype, "tt");
3393 t = fold_build2 (TRUNC_MOD_EXPR, itype, n, nthreads);
3394 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, true, GSI_SAME_STMT);
3395 gsi_insert_before (&gsi, gimple_build_assign (tt, t), GSI_SAME_STMT);
3396
3397 t = build2 (LT_EXPR, boolean_type_node, threadid, tt);
3398 gcond *cond_stmt = gimple_build_cond_empty (t);
3399 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3400
3401 second_bb = split_block (entry_bb, cond_stmt)->dest;
3402 gsi = gsi_last_bb (second_bb);
3403 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3404
3405 gsi_insert_before (&gsi, gimple_build_assign (tt, build_int_cst (itype, 0)),
3406 GSI_SAME_STMT);
3407 gassign *assign_stmt
3408 = gimple_build_assign (q, PLUS_EXPR, q, build_int_cst (itype, 1));
3409 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3410
3411 third_bb = split_block (second_bb, assign_stmt)->dest;
3412 gsi = gsi_last_bb (third_bb);
3413 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3414
3415 t = build2 (MULT_EXPR, itype, q, threadid);
3416 t = build2 (PLUS_EXPR, itype, t, tt);
3417 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3418
3419 t = fold_build2 (PLUS_EXPR, itype, s0, q);
3420 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
3421
3422 t = build2 (GE_EXPR, boolean_type_node, s0, e0);
3423 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3424
3425 /* Remove the GIMPLE_OMP_FOR statement. */
3426 gsi_remove (&gsi, true);
3427
3428 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3429 gsi = gsi_start_bb (seq_start_bb);
3430
3431 tree startvar = fd->loop.v;
3432 tree endvar = NULL_TREE;
3433
3434 if (gimple_omp_for_combined_p (fd->for_stmt))
3435 {
3436 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3437 ? gimple_omp_parallel_clauses (inner_stmt)
3438 : gimple_omp_for_clauses (inner_stmt);
3439 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3440 gcc_assert (innerc);
3441 startvar = OMP_CLAUSE_DECL (innerc);
3442 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3443 OMP_CLAUSE__LOOPTEMP_);
3444 gcc_assert (innerc);
3445 endvar = OMP_CLAUSE_DECL (innerc);
3446 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3447 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3448 {
3449 int i;
3450 for (i = 1; i < fd->collapse; i++)
3451 {
3452 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3453 OMP_CLAUSE__LOOPTEMP_);
3454 gcc_assert (innerc);
3455 }
3456 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3457 OMP_CLAUSE__LOOPTEMP_);
3458 if (innerc)
3459 {
3460 /* If needed (distribute parallel for with lastprivate),
3461 propagate down the total number of iterations. */
3462 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3463 fd->loop.n2);
3464 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3465 GSI_CONTINUE_LINKING);
3466 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3467 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3468 }
3469 }
3470 }
3471 t = fold_convert (itype, s0);
3472 t = fold_build2 (MULT_EXPR, itype, t, step);
3473 if (POINTER_TYPE_P (type))
3474 t = fold_build_pointer_plus (n1, t);
3475 else
3476 t = fold_build2 (PLUS_EXPR, type, t, n1);
3477 t = fold_convert (TREE_TYPE (startvar), t);
3478 t = force_gimple_operand_gsi (&gsi, t,
3479 DECL_P (startvar)
3480 && TREE_ADDRESSABLE (startvar),
3481 NULL_TREE, false, GSI_CONTINUE_LINKING);
3482 assign_stmt = gimple_build_assign (startvar, t);
3483 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3484
3485 t = fold_convert (itype, e0);
3486 t = fold_build2 (MULT_EXPR, itype, t, step);
3487 if (POINTER_TYPE_P (type))
3488 t = fold_build_pointer_plus (n1, t);
3489 else
3490 t = fold_build2 (PLUS_EXPR, type, t, n1);
3491 t = fold_convert (TREE_TYPE (startvar), t);
3492 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3493 false, GSI_CONTINUE_LINKING);
3494 if (endvar)
3495 {
3496 assign_stmt = gimple_build_assign (endvar, e);
3497 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3498 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3499 assign_stmt = gimple_build_assign (fd->loop.v, e);
3500 else
3501 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
3502 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3503 }
3504 /* Handle linear clause adjustments. */
3505 tree itercnt = NULL_TREE;
3506 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
3507 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
3508 c; c = OMP_CLAUSE_CHAIN (c))
3509 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
3510 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
3511 {
3512 tree d = OMP_CLAUSE_DECL (c);
3513 bool is_ref = omp_is_reference (d);
3514 tree t = d, a, dest;
3515 if (is_ref)
3516 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
3517 if (itercnt == NULL_TREE)
3518 {
3519 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3520 {
3521 itercnt = fold_build2 (MINUS_EXPR, itype,
3522 fold_convert (itype, n1),
3523 fold_convert (itype, fd->loop.n1));
3524 itercnt = fold_build2 (EXACT_DIV_EXPR, itype, itercnt, step);
3525 itercnt = fold_build2 (PLUS_EXPR, itype, itercnt, s0);
3526 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
3527 NULL_TREE, false,
3528 GSI_CONTINUE_LINKING);
3529 }
3530 else
3531 itercnt = s0;
3532 }
3533 tree type = TREE_TYPE (t);
3534 if (POINTER_TYPE_P (type))
3535 type = sizetype;
3536 a = fold_build2 (MULT_EXPR, type,
3537 fold_convert (type, itercnt),
3538 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
3539 dest = unshare_expr (t);
3540 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
3541 : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
3542 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3543 false, GSI_CONTINUE_LINKING);
3544 assign_stmt = gimple_build_assign (dest, t);
3545 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3546 }
3547 if (fd->collapse > 1)
3548 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
3549
3550 if (!broken_loop)
3551 {
3552 /* The code controlling the sequential loop replaces the
3553 GIMPLE_OMP_CONTINUE. */
3554 gsi = gsi_last_bb (cont_bb);
3555 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
3556 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
3557 vmain = gimple_omp_continue_control_use (cont_stmt);
3558 vback = gimple_omp_continue_control_def (cont_stmt);
3559
3560 if (!gimple_omp_for_combined_p (fd->for_stmt))
3561 {
3562 if (POINTER_TYPE_P (type))
3563 t = fold_build_pointer_plus (vmain, step);
3564 else
3565 t = fold_build2 (PLUS_EXPR, type, vmain, step);
3566 t = force_gimple_operand_gsi (&gsi, t,
3567 DECL_P (vback)
3568 && TREE_ADDRESSABLE (vback),
3569 NULL_TREE, true, GSI_SAME_STMT);
3570 assign_stmt = gimple_build_assign (vback, t);
3571 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3572
3573 t = build2 (fd->loop.cond_code, boolean_type_node,
3574 DECL_P (vback) && TREE_ADDRESSABLE (vback)
3575 ? t : vback, e);
3576 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
3577 }
3578
3579 /* Remove the GIMPLE_OMP_CONTINUE statement. */
3580 gsi_remove (&gsi, true);
3581
3582 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
3583 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
3584 }
3585
3586 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
3587 gsi = gsi_last_bb (exit_bb);
3588 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
3589 {
3590 t = gimple_omp_return_lhs (gsi_stmt (gsi));
3591 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
3592 }
3593 gsi_remove (&gsi, true);
3594
3595 /* Connect all the blocks. */
3596 ep = make_edge (entry_bb, third_bb, EDGE_FALSE_VALUE);
3597 ep->probability = REG_BR_PROB_BASE / 4 * 3;
3598 ep = find_edge (entry_bb, second_bb);
3599 ep->flags = EDGE_TRUE_VALUE;
3600 ep->probability = REG_BR_PROB_BASE / 4;
3601 find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
3602 find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
3603
3604 if (!broken_loop)
3605 {
3606 ep = find_edge (cont_bb, body_bb);
3607 if (ep == NULL)
3608 {
3609 ep = BRANCH_EDGE (cont_bb);
3610 gcc_assert (single_succ (ep->dest) == body_bb);
3611 }
3612 if (gimple_omp_for_combined_p (fd->for_stmt))
3613 {
3614 remove_edge (ep);
3615 ep = NULL;
3616 }
3617 else if (fd->collapse > 1)
3618 {
3619 remove_edge (ep);
3620 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
3621 }
3622 else
3623 ep->flags = EDGE_TRUE_VALUE;
3624 find_edge (cont_bb, fin_bb)->flags
3625 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
3626 }
3627
3628 set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
3629 set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
3630 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
3631
3632 set_immediate_dominator (CDI_DOMINATORS, body_bb,
3633 recompute_dominator (CDI_DOMINATORS, body_bb));
3634 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
3635 recompute_dominator (CDI_DOMINATORS, fin_bb));
3636
3637 struct loop *loop = body_bb->loop_father;
3638 if (loop != entry_bb->loop_father)
3639 {
3640 gcc_assert (broken_loop || loop->header == body_bb);
3641 gcc_assert (broken_loop
3642 || loop->latch == region->cont
3643 || single_pred (loop->latch) == region->cont);
3644 return;
3645 }
3646
3647 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
3648 {
3649 loop = alloc_loop ();
3650 loop->header = body_bb;
3651 if (collapse_bb == NULL)
3652 loop->latch = cont_bb;
3653 add_loop (loop, body_bb->loop_father);
3654 }
3655 }
3656
3657 /* Return phi in E->DEST with ARG on edge E. */
3658
3659 static gphi *
3660 find_phi_with_arg_on_edge (tree arg, edge e)
3661 {
3662 basic_block bb = e->dest;
3663
3664 for (gphi_iterator gpi = gsi_start_phis (bb);
3665 !gsi_end_p (gpi);
3666 gsi_next (&gpi))
3667 {
3668 gphi *phi = gpi.phi ();
3669 if (PHI_ARG_DEF_FROM_EDGE (phi, e) == arg)
3670 return phi;
3671 }
3672
3673 return NULL;
3674 }
3675
3676 /* A subroutine of expand_omp_for. Generate code for a parallel
3677 loop with static schedule and a specified chunk size. Given
3678 parameters:
3679
3680 for (V = N1; V cond N2; V += STEP) BODY;
3681
3682 where COND is "<" or ">", we generate pseudocode
3683
3684 if ((__typeof (V)) -1 > 0 && N2 cond N1) goto L2;
3685 if (cond is <)
3686 adj = STEP - 1;
3687 else
3688 adj = STEP + 1;
3689 if ((__typeof (V)) -1 > 0 && cond is >)
3690 n = -(adj + N2 - N1) / -STEP;
3691 else
3692 n = (adj + N2 - N1) / STEP;
3693 trip = 0;
3694 V = threadid * CHUNK * STEP + N1; -- this extra definition of V is
3695 here so that V is defined
3696 if the loop is not entered
3697 L0:
3698 s0 = (trip * nthreads + threadid) * CHUNK;
3699 e0 = min(s0 + CHUNK, n);
3700 if (s0 < n) goto L1; else goto L4;
3701 L1:
3702 V = s0 * STEP + N1;
3703 e = e0 * STEP + N1;
3704 L2:
3705 BODY;
3706 V += STEP;
3707 if (V cond e) goto L2; else goto L3;
3708 L3:
3709 trip += 1;
3710 goto L0;
3711 L4:
3712 */
3713
3714 static void
3715 expand_omp_for_static_chunk (struct omp_region *region,
3716 struct omp_for_data *fd, gimple *inner_stmt)
3717 {
3718 tree n, s0, e0, e, t;
3719 tree trip_var, trip_init, trip_main, trip_back, nthreads, threadid;
3720 tree type, itype, vmain, vback, vextra;
3721 basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
3722 basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
3723 gimple_stmt_iterator gsi;
3724 edge se;
3725 bool broken_loop = region->cont == NULL;
3726 tree *counts = NULL;
3727 tree n1, n2, step;
3728
3729 itype = type = TREE_TYPE (fd->loop.v);
3730 if (POINTER_TYPE_P (type))
3731 itype = signed_type_for (type);
3732
3733 entry_bb = region->entry;
3734 se = split_block (entry_bb, last_stmt (entry_bb));
3735 entry_bb = se->src;
3736 iter_part_bb = se->dest;
3737 cont_bb = region->cont;
3738 gcc_assert (EDGE_COUNT (iter_part_bb->succs) == 2);
3739 fin_bb = BRANCH_EDGE (iter_part_bb)->dest;
3740 gcc_assert (broken_loop
3741 || fin_bb == FALLTHRU_EDGE (cont_bb)->dest);
3742 seq_start_bb = split_edge (FALLTHRU_EDGE (iter_part_bb));
3743 body_bb = single_succ (seq_start_bb);
3744 if (!broken_loop)
3745 {
3746 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb
3747 || single_succ (BRANCH_EDGE (cont_bb)->dest) == body_bb);
3748 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
3749 trip_update_bb = split_edge (FALLTHRU_EDGE (cont_bb));
3750 }
3751 exit_bb = region->exit;
3752
3753 /* Trip and adjustment setup goes in ENTRY_BB. */
3754 gsi = gsi_last_bb (entry_bb);
3755 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
3756
3757 if (fd->collapse > 1)
3758 {
3759 int first_zero_iter = -1, dummy = -1;
3760 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
3761
3762 counts = XALLOCAVEC (tree, fd->collapse);
3763 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
3764 fin_bb, first_zero_iter,
3765 dummy_bb, dummy, l2_dom_bb);
3766 t = NULL_TREE;
3767 }
3768 else if (gimple_omp_for_combined_into_p (fd->for_stmt))
3769 t = integer_one_node;
3770 else
3771 t = fold_binary (fd->loop.cond_code, boolean_type_node,
3772 fold_convert (type, fd->loop.n1),
3773 fold_convert (type, fd->loop.n2));
3774 if (fd->collapse == 1
3775 && TYPE_UNSIGNED (type)
3776 && (t == NULL_TREE || !integer_onep (t)))
3777 {
3778 n1 = fold_convert (type, unshare_expr (fd->loop.n1));
3779 n1 = force_gimple_operand_gsi (&gsi, n1, true, NULL_TREE,
3780 true, GSI_SAME_STMT);
3781 n2 = fold_convert (type, unshare_expr (fd->loop.n2));
3782 n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
3783 true, GSI_SAME_STMT);
3784 gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
3785 NULL_TREE, NULL_TREE);
3786 gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
3787 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
3788 expand_omp_regimplify_p, NULL, NULL)
3789 || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
3790 expand_omp_regimplify_p, NULL, NULL))
3791 {
3792 gsi = gsi_for_stmt (cond_stmt);
3793 gimple_regimplify_operands (cond_stmt, &gsi);
3794 }
3795 se = split_block (entry_bb, cond_stmt);
3796 se->flags = EDGE_TRUE_VALUE;
3797 entry_bb = se->dest;
3798 se->probability = REG_BR_PROB_BASE - (REG_BR_PROB_BASE / 2000 - 1);
3799 se = make_edge (se->src, fin_bb, EDGE_FALSE_VALUE);
3800 se->probability = REG_BR_PROB_BASE / 2000 - 1;
3801 if (gimple_in_ssa_p (cfun))
3802 {
3803 int dest_idx = find_edge (iter_part_bb, fin_bb)->dest_idx;
3804 for (gphi_iterator gpi = gsi_start_phis (fin_bb);
3805 !gsi_end_p (gpi); gsi_next (&gpi))
3806 {
3807 gphi *phi = gpi.phi ();
3808 add_phi_arg (phi, gimple_phi_arg_def (phi, dest_idx),
3809 se, UNKNOWN_LOCATION);
3810 }
3811 }
3812 gsi = gsi_last_bb (entry_bb);
3813 }
3814
3815 switch (gimple_omp_for_kind (fd->for_stmt))
3816 {
3817 case GF_OMP_FOR_KIND_FOR:
3818 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3819 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
3820 break;
3821 case GF_OMP_FOR_KIND_DISTRIBUTE:
3822 nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_TEAMS);
3823 threadid = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
3824 break;
3825 default:
3826 gcc_unreachable ();
3827 }
3828 nthreads = build_call_expr (nthreads, 0);
3829 nthreads = fold_convert (itype, nthreads);
3830 nthreads = force_gimple_operand_gsi (&gsi, nthreads, true, NULL_TREE,
3831 true, GSI_SAME_STMT);
3832 threadid = build_call_expr (threadid, 0);
3833 threadid = fold_convert (itype, threadid);
3834 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
3835 true, GSI_SAME_STMT);
3836
3837 n1 = fd->loop.n1;
3838 n2 = fd->loop.n2;
3839 step = fd->loop.step;
3840 if (gimple_omp_for_combined_into_p (fd->for_stmt))
3841 {
3842 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
3843 OMP_CLAUSE__LOOPTEMP_);
3844 gcc_assert (innerc);
3845 n1 = OMP_CLAUSE_DECL (innerc);
3846 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3847 OMP_CLAUSE__LOOPTEMP_);
3848 gcc_assert (innerc);
3849 n2 = OMP_CLAUSE_DECL (innerc);
3850 }
3851 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
3852 true, NULL_TREE, true, GSI_SAME_STMT);
3853 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
3854 true, NULL_TREE, true, GSI_SAME_STMT);
3855 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
3856 true, NULL_TREE, true, GSI_SAME_STMT);
3857 tree chunk_size = fold_convert (itype, fd->chunk_size);
3858 chunk_size = omp_adjust_chunk_size (chunk_size, fd->simd_schedule);
3859 chunk_size
3860 = force_gimple_operand_gsi (&gsi, chunk_size, true, NULL_TREE, true,
3861 GSI_SAME_STMT);
3862
3863 t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
3864 t = fold_build2 (PLUS_EXPR, itype, step, t);
3865 t = fold_build2 (PLUS_EXPR, itype, t, n2);
3866 t = fold_build2 (MINUS_EXPR, itype, t, fold_convert (itype, n1));
3867 if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
3868 t = fold_build2 (TRUNC_DIV_EXPR, itype,
3869 fold_build1 (NEGATE_EXPR, itype, t),
3870 fold_build1 (NEGATE_EXPR, itype, step));
3871 else
3872 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
3873 t = fold_convert (itype, t);
3874 n = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3875 true, GSI_SAME_STMT);
3876
3877 trip_var = create_tmp_reg (itype, ".trip");
3878 if (gimple_in_ssa_p (cfun))
3879 {
3880 trip_init = make_ssa_name (trip_var);
3881 trip_main = make_ssa_name (trip_var);
3882 trip_back = make_ssa_name (trip_var);
3883 }
3884 else
3885 {
3886 trip_init = trip_var;
3887 trip_main = trip_var;
3888 trip_back = trip_var;
3889 }
3890
3891 gassign *assign_stmt
3892 = gimple_build_assign (trip_init, build_int_cst (itype, 0));
3893 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
3894
3895 t = fold_build2 (MULT_EXPR, itype, threadid, chunk_size);
3896 t = fold_build2 (MULT_EXPR, itype, t, step);
3897 if (POINTER_TYPE_P (type))
3898 t = fold_build_pointer_plus (n1, t);
3899 else
3900 t = fold_build2 (PLUS_EXPR, type, t, n1);
3901 vextra = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3902 true, GSI_SAME_STMT);
3903
3904 /* Remove the GIMPLE_OMP_FOR. */
3905 gsi_remove (&gsi, true);
3906
3907 gimple_stmt_iterator gsif = gsi;
3908
3909 /* Iteration space partitioning goes in ITER_PART_BB. */
3910 gsi = gsi_last_bb (iter_part_bb);
3911
3912 t = fold_build2 (MULT_EXPR, itype, trip_main, nthreads);
3913 t = fold_build2 (PLUS_EXPR, itype, t, threadid);
3914 t = fold_build2 (MULT_EXPR, itype, t, chunk_size);
3915 s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3916 false, GSI_CONTINUE_LINKING);
3917
3918 t = fold_build2 (PLUS_EXPR, itype, s0, chunk_size);
3919 t = fold_build2 (MIN_EXPR, itype, t, n);
3920 e0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3921 false, GSI_CONTINUE_LINKING);
3922
3923 t = build2 (LT_EXPR, boolean_type_node, s0, n);
3924 gsi_insert_after (&gsi, gimple_build_cond_empty (t), GSI_CONTINUE_LINKING);
3925
3926 /* Setup code for sequential iteration goes in SEQ_START_BB. */
3927 gsi = gsi_start_bb (seq_start_bb);
3928
3929 tree startvar = fd->loop.v;
3930 tree endvar = NULL_TREE;
3931
3932 if (gimple_omp_for_combined_p (fd->for_stmt))
3933 {
3934 tree clauses = gimple_code (inner_stmt) == GIMPLE_OMP_PARALLEL
3935 ? gimple_omp_parallel_clauses (inner_stmt)
3936 : gimple_omp_for_clauses (inner_stmt);
3937 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
3938 gcc_assert (innerc);
3939 startvar = OMP_CLAUSE_DECL (innerc);
3940 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3941 OMP_CLAUSE__LOOPTEMP_);
3942 gcc_assert (innerc);
3943 endvar = OMP_CLAUSE_DECL (innerc);
3944 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
3945 && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
3946 {
3947 int i;
3948 for (i = 1; i < fd->collapse; i++)
3949 {
3950 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3951 OMP_CLAUSE__LOOPTEMP_);
3952 gcc_assert (innerc);
3953 }
3954 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
3955 OMP_CLAUSE__LOOPTEMP_);
3956 if (innerc)
3957 {
3958 /* If needed (distribute parallel for with lastprivate),
3959 propagate down the total number of iterations. */
3960 tree t = fold_convert (TREE_TYPE (OMP_CLAUSE_DECL (innerc)),
3961 fd->loop.n2);
3962 t = force_gimple_operand_gsi (&gsi, t, false, NULL_TREE, false,
3963 GSI_CONTINUE_LINKING);
3964 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
3965 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3966 }
3967 }
3968 }
3969
3970 t = fold_convert (itype, s0);
3971 t = fold_build2 (MULT_EXPR, itype, t, step);
3972 if (POINTER_TYPE_P (type))
3973 t = fold_build_pointer_plus (n1, t);
3974 else
3975 t = fold_build2 (PLUS_EXPR, type, t, n1);
3976 t = fold_convert (TREE_TYPE (startvar), t);
3977 t = force_gimple_operand_gsi (&gsi, t,
3978 DECL_P (startvar)
3979 && TREE_ADDRESSABLE (startvar),
3980 NULL_TREE, false, GSI_CONTINUE_LINKING);
3981 assign_stmt = gimple_build_assign (startvar, t);
3982 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3983
3984 t = fold_convert (itype, e0);
3985 t = fold_build2 (MULT_EXPR, itype, t, step);
3986 if (POINTER_TYPE_P (type))
3987 t = fold_build_pointer_plus (n1, t);
3988 else
3989 t = fold_build2 (PLUS_EXPR, type, t, n1);
3990 t = fold_convert (TREE_TYPE (startvar), t);
3991 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
3992 false, GSI_CONTINUE_LINKING);
3993 if (endvar)
3994 {
3995 assign_stmt = gimple_build_assign (endvar, e);
3996 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
3997 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
3998 assign_stmt = gimple_build_assign (fd->loop.v, e);
3999 else
4000 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
4001 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4002 }
4003 /* Handle linear clause adjustments. */
4004 tree itercnt = NULL_TREE, itercntbias = NULL_TREE;
4005 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
4006 for (tree c = gimple_omp_for_clauses (fd->for_stmt);
4007 c; c = OMP_CLAUSE_CHAIN (c))
4008 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LINEAR
4009 && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
4010 {
4011 tree d = OMP_CLAUSE_DECL (c);
4012 bool is_ref = omp_is_reference (d);
4013 tree t = d, a, dest;
4014 if (is_ref)
4015 t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
4016 tree type = TREE_TYPE (t);
4017 if (POINTER_TYPE_P (type))
4018 type = sizetype;
4019 dest = unshare_expr (t);
4020 tree v = create_tmp_var (TREE_TYPE (t), NULL);
4021 expand_omp_build_assign (&gsif, v, t);
4022 if (itercnt == NULL_TREE)
4023 {
4024 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4025 {
4026 itercntbias
4027 = fold_build2 (MINUS_EXPR, itype, fold_convert (itype, n1),
4028 fold_convert (itype, fd->loop.n1));
4029 itercntbias = fold_build2 (EXACT_DIV_EXPR, itype,
4030 itercntbias, step);
4031 itercntbias
4032 = force_gimple_operand_gsi (&gsif, itercntbias, true,
4033 NULL_TREE, true,
4034 GSI_SAME_STMT);
4035 itercnt = fold_build2 (PLUS_EXPR, itype, itercntbias, s0);
4036 itercnt = force_gimple_operand_gsi (&gsi, itercnt, true,
4037 NULL_TREE, false,
4038 GSI_CONTINUE_LINKING);
4039 }
4040 else
4041 itercnt = s0;
4042 }
4043 a = fold_build2 (MULT_EXPR, type,
4044 fold_convert (type, itercnt),
4045 fold_convert (type, OMP_CLAUSE_LINEAR_STEP (c)));
4046 t = fold_build2 (type == TREE_TYPE (t) ? PLUS_EXPR
4047 : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
4048 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4049 false, GSI_CONTINUE_LINKING);
4050 assign_stmt = gimple_build_assign (dest, t);
4051 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4052 }
4053 if (fd->collapse > 1)
4054 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4055
4056 if (!broken_loop)
4057 {
4058 /* The code controlling the sequential loop goes in CONT_BB,
4059 replacing the GIMPLE_OMP_CONTINUE. */
4060 gsi = gsi_last_bb (cont_bb);
4061 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
4062 vmain = gimple_omp_continue_control_use (cont_stmt);
4063 vback = gimple_omp_continue_control_def (cont_stmt);
4064
4065 if (!gimple_omp_for_combined_p (fd->for_stmt))
4066 {
4067 if (POINTER_TYPE_P (type))
4068 t = fold_build_pointer_plus (vmain, step);
4069 else
4070 t = fold_build2 (PLUS_EXPR, type, vmain, step);
4071 if (DECL_P (vback) && TREE_ADDRESSABLE (vback))
4072 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4073 true, GSI_SAME_STMT);
4074 assign_stmt = gimple_build_assign (vback, t);
4075 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
4076
4077 if (tree_int_cst_equal (fd->chunk_size, integer_one_node))
4078 t = build2 (EQ_EXPR, boolean_type_node,
4079 build_int_cst (itype, 0),
4080 build_int_cst (itype, 1));
4081 else
4082 t = build2 (fd->loop.cond_code, boolean_type_node,
4083 DECL_P (vback) && TREE_ADDRESSABLE (vback)
4084 ? t : vback, e);
4085 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
4086 }
4087
4088 /* Remove GIMPLE_OMP_CONTINUE. */
4089 gsi_remove (&gsi, true);
4090
4091 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
4092 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
4093
4094 /* Trip update code goes into TRIP_UPDATE_BB. */
4095 gsi = gsi_start_bb (trip_update_bb);
4096
4097 t = build_int_cst (itype, 1);
4098 t = build2 (PLUS_EXPR, itype, trip_main, t);
4099 assign_stmt = gimple_build_assign (trip_back, t);
4100 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4101 }
4102
4103 /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing. */
4104 gsi = gsi_last_bb (exit_bb);
4105 if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
4106 {
4107 t = gimple_omp_return_lhs (gsi_stmt (gsi));
4108 gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
4109 }
4110 gsi_remove (&gsi, true);
4111
4112 /* Connect the new blocks. */
4113 find_edge (iter_part_bb, seq_start_bb)->flags = EDGE_TRUE_VALUE;
4114 find_edge (iter_part_bb, fin_bb)->flags = EDGE_FALSE_VALUE;
4115
4116 if (!broken_loop)
4117 {
4118 se = find_edge (cont_bb, body_bb);
4119 if (se == NULL)
4120 {
4121 se = BRANCH_EDGE (cont_bb);
4122 gcc_assert (single_succ (se->dest) == body_bb);
4123 }
4124 if (gimple_omp_for_combined_p (fd->for_stmt))
4125 {
4126 remove_edge (se);
4127 se = NULL;
4128 }
4129 else if (fd->collapse > 1)
4130 {
4131 remove_edge (se);
4132 se = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
4133 }
4134 else
4135 se->flags = EDGE_TRUE_VALUE;
4136 find_edge (cont_bb, trip_update_bb)->flags
4137 = se ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
4138
4139 redirect_edge_and_branch (single_succ_edge (trip_update_bb), iter_part_bb);
4140 }
4141
4142 if (gimple_in_ssa_p (cfun))
4143 {
4144 gphi_iterator psi;
4145 gphi *phi;
4146 edge re, ene;
4147 edge_var_map *vm;
4148 size_t i;
4149
4150 gcc_assert (fd->collapse == 1 && !broken_loop);
4151
4152 /* When we redirect the edge from trip_update_bb to iter_part_bb, we
4153 remove arguments of the phi nodes in fin_bb. We need to create
4154 appropriate phi nodes in iter_part_bb instead. */
4155 se = find_edge (iter_part_bb, fin_bb);
4156 re = single_succ_edge (trip_update_bb);
4157 vec<edge_var_map> *head = redirect_edge_var_map_vector (re);
4158 ene = single_succ_edge (entry_bb);
4159
4160 psi = gsi_start_phis (fin_bb);
4161 for (i = 0; !gsi_end_p (psi) && head->iterate (i, &vm);
4162 gsi_next (&psi), ++i)
4163 {
4164 gphi *nphi;
4165 source_location locus;
4166
4167 phi = psi.phi ();
4168 t = gimple_phi_result (phi);
4169 gcc_assert (t == redirect_edge_var_map_result (vm));
4170
4171 if (!single_pred_p (fin_bb))
4172 t = copy_ssa_name (t, phi);
4173
4174 nphi = create_phi_node (t, iter_part_bb);
4175
4176 t = PHI_ARG_DEF_FROM_EDGE (phi, se);
4177 locus = gimple_phi_arg_location_from_edge (phi, se);
4178
4179 /* A special case -- fd->loop.v is not yet computed in
4180 iter_part_bb, we need to use vextra instead. */
4181 if (t == fd->loop.v)
4182 t = vextra;
4183 add_phi_arg (nphi, t, ene, locus);
4184 locus = redirect_edge_var_map_location (vm);
4185 tree back_arg = redirect_edge_var_map_def (vm);
4186 add_phi_arg (nphi, back_arg, re, locus);
4187 edge ce = find_edge (cont_bb, body_bb);
4188 if (ce == NULL)
4189 {
4190 ce = BRANCH_EDGE (cont_bb);
4191 gcc_assert (single_succ (ce->dest) == body_bb);
4192 ce = single_succ_edge (ce->dest);
4193 }
4194 gphi *inner_loop_phi = find_phi_with_arg_on_edge (back_arg, ce);
4195 gcc_assert (inner_loop_phi != NULL);
4196 add_phi_arg (inner_loop_phi, gimple_phi_result (nphi),
4197 find_edge (seq_start_bb, body_bb), locus);
4198
4199 if (!single_pred_p (fin_bb))
4200 add_phi_arg (phi, gimple_phi_result (nphi), se, locus);
4201 }
4202 gcc_assert (gsi_end_p (psi) && (head == NULL || i == head->length ()));
4203 redirect_edge_var_map_clear (re);
4204 if (single_pred_p (fin_bb))
4205 while (1)
4206 {
4207 psi = gsi_start_phis (fin_bb);
4208 if (gsi_end_p (psi))
4209 break;
4210 remove_phi_node (&psi, false);
4211 }
4212
4213 /* Make phi node for trip. */
4214 phi = create_phi_node (trip_main, iter_part_bb);
4215 add_phi_arg (phi, trip_back, single_succ_edge (trip_update_bb),
4216 UNKNOWN_LOCATION);
4217 add_phi_arg (phi, trip_init, single_succ_edge (entry_bb),
4218 UNKNOWN_LOCATION);
4219 }
4220
4221 if (!broken_loop)
4222 set_immediate_dominator (CDI_DOMINATORS, trip_update_bb, cont_bb);
4223 set_immediate_dominator (CDI_DOMINATORS, iter_part_bb,
4224 recompute_dominator (CDI_DOMINATORS, iter_part_bb));
4225 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
4226 recompute_dominator (CDI_DOMINATORS, fin_bb));
4227 set_immediate_dominator (CDI_DOMINATORS, seq_start_bb,
4228 recompute_dominator (CDI_DOMINATORS, seq_start_bb));
4229 set_immediate_dominator (CDI_DOMINATORS, body_bb,
4230 recompute_dominator (CDI_DOMINATORS, body_bb));
4231
4232 if (!broken_loop)
4233 {
4234 struct loop *loop = body_bb->loop_father;
4235 struct loop *trip_loop = alloc_loop ();
4236 trip_loop->header = iter_part_bb;
4237 trip_loop->latch = trip_update_bb;
4238 add_loop (trip_loop, iter_part_bb->loop_father);
4239
4240 if (loop != entry_bb->loop_father)
4241 {
4242 gcc_assert (loop->header == body_bb);
4243 gcc_assert (loop->latch == region->cont
4244 || single_pred (loop->latch) == region->cont);
4245 trip_loop->inner = loop;
4246 return;
4247 }
4248
4249 if (!gimple_omp_for_combined_p (fd->for_stmt))
4250 {
4251 loop = alloc_loop ();
4252 loop->header = body_bb;
4253 if (collapse_bb == NULL)
4254 loop->latch = cont_bb;
4255 add_loop (loop, trip_loop);
4256 }
4257 }
4258 }
4259
4260 /* A subroutine of expand_omp_for. Generate code for _Cilk_for loop.
4261 Given parameters:
4262 for (V = N1; V cond N2; V += STEP) BODY;
4263
4264 where COND is "<" or ">" or "!=", we generate pseudocode
4265
4266 for (ind_var = low; ind_var < high; ind_var++)
4267 {
4268 V = n1 + (ind_var * STEP)
4269
4270 <BODY>
4271 }
4272
4273 In the above pseudocode, low and high are function parameters of the
4274 child function. In the function below, we are inserting a temp.
4275 variable that will be making a call to two OMP functions that will not be
4276 found in the body of _Cilk_for (since OMP_FOR cannot be mixed
4277 with _Cilk_for). These functions are replaced with low and high
4278 by the function that handles taskreg. */
4279
4280
4281 static void
4282 expand_cilk_for (struct omp_region *region, struct omp_for_data *fd)
4283 {
4284 bool broken_loop = region->cont == NULL;
4285 basic_block entry_bb = region->entry;
4286 basic_block cont_bb = region->cont;
4287
4288 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4289 gcc_assert (broken_loop
4290 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4291 basic_block l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4292 basic_block l1_bb, l2_bb;
4293
4294 if (!broken_loop)
4295 {
4296 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4297 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4298 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4299 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4300 }
4301 else
4302 {
4303 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4304 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4305 l2_bb = single_succ (l1_bb);
4306 }
4307 basic_block exit_bb = region->exit;
4308 basic_block l2_dom_bb = NULL;
4309
4310 gimple_stmt_iterator gsi = gsi_last_bb (entry_bb);
4311
4312 /* Below statements until the "tree high_val = ..." are pseudo statements
4313 used to pass information to be used by expand_omp_taskreg.
4314 low_val and high_val will be replaced by the __low and __high
4315 parameter from the child function.
4316
4317 The call_exprs part is a place-holder, it is mainly used
4318 to distinctly identify to the top-level part that this is
4319 where we should put low and high (reasoning given in header
4320 comment). */
4321
4322 tree child_fndecl
4323 = gimple_omp_parallel_child_fn (
4324 as_a <gomp_parallel *> (last_stmt (region->outer->entry)));
4325 tree t, low_val = NULL_TREE, high_val = NULL_TREE;
4326 for (t = DECL_ARGUMENTS (child_fndecl); t; t = TREE_CHAIN (t))
4327 {
4328 if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__high"))
4329 high_val = t;
4330 else if (!strcmp (IDENTIFIER_POINTER (DECL_NAME (t)), "__low"))
4331 low_val = t;
4332 }
4333 gcc_assert (low_val && high_val);
4334
4335 tree type = TREE_TYPE (low_val);
4336 tree ind_var = create_tmp_reg (type, "__cilk_ind_var");
4337 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4338
4339 /* Not needed in SSA form right now. */
4340 gcc_assert (!gimple_in_ssa_p (cfun));
4341 if (l2_dom_bb == NULL)
4342 l2_dom_bb = l1_bb;
4343
4344 tree n1 = low_val;
4345 tree n2 = high_val;
4346
4347 gimple *stmt = gimple_build_assign (ind_var, n1);
4348
4349 /* Replace the GIMPLE_OMP_FOR statement. */
4350 gsi_replace (&gsi, stmt, true);
4351
4352 if (!broken_loop)
4353 {
4354 /* Code to control the increment goes in the CONT_BB. */
4355 gsi = gsi_last_bb (cont_bb);
4356 stmt = gsi_stmt (gsi);
4357 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4358 stmt = gimple_build_assign (ind_var, PLUS_EXPR, ind_var,
4359 build_one_cst (type));
4360
4361 /* Replace GIMPLE_OMP_CONTINUE. */
4362 gsi_replace (&gsi, stmt, true);
4363 }
4364
4365 /* Emit the condition in L1_BB. */
4366 gsi = gsi_after_labels (l1_bb);
4367 t = fold_build2 (MULT_EXPR, TREE_TYPE (fd->loop.step),
4368 fold_convert (TREE_TYPE (fd->loop.step), ind_var),
4369 fd->loop.step);
4370 if (POINTER_TYPE_P (TREE_TYPE (fd->loop.n1)))
4371 t = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4372 fd->loop.n1, fold_convert (sizetype, t));
4373 else
4374 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loop.n1),
4375 fd->loop.n1, fold_convert (TREE_TYPE (fd->loop.n1), t));
4376 t = fold_convert (TREE_TYPE (fd->loop.v), t);
4377 expand_omp_build_assign (&gsi, fd->loop.v, t);
4378
4379 /* The condition is always '<' since the runtime will fill in the low
4380 and high values. */
4381 stmt = gimple_build_cond (LT_EXPR, ind_var, n2, NULL_TREE, NULL_TREE);
4382 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
4383
4384 /* Remove GIMPLE_OMP_RETURN. */
4385 gsi = gsi_last_bb (exit_bb);
4386 gsi_remove (&gsi, true);
4387
4388 /* Connect the new blocks. */
4389 remove_edge (FALLTHRU_EDGE (entry_bb));
4390
4391 edge e, ne;
4392 if (!broken_loop)
4393 {
4394 remove_edge (BRANCH_EDGE (entry_bb));
4395 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4396
4397 e = BRANCH_EDGE (l1_bb);
4398 ne = FALLTHRU_EDGE (l1_bb);
4399 e->flags = EDGE_TRUE_VALUE;
4400 }
4401 else
4402 {
4403 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4404
4405 ne = single_succ_edge (l1_bb);
4406 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4407
4408 }
4409 ne->flags = EDGE_FALSE_VALUE;
4410 e->probability = REG_BR_PROB_BASE * 7 / 8;
4411 ne->probability = REG_BR_PROB_BASE / 8;
4412
4413 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4414 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4415 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4416
4417 if (!broken_loop)
4418 {
4419 struct loop *loop = alloc_loop ();
4420 loop->header = l1_bb;
4421 loop->latch = cont_bb;
4422 add_loop (loop, l1_bb->loop_father);
4423 loop->safelen = INT_MAX;
4424 }
4425
4426 /* Pick the correct library function based on the precision of the
4427 induction variable type. */
4428 tree lib_fun = NULL_TREE;
4429 if (TYPE_PRECISION (type) == 32)
4430 lib_fun = cilk_for_32_fndecl;
4431 else if (TYPE_PRECISION (type) == 64)
4432 lib_fun = cilk_for_64_fndecl;
4433 else
4434 gcc_unreachable ();
4435
4436 gcc_assert (fd->sched_kind == OMP_CLAUSE_SCHEDULE_CILKFOR);
4437
4438 /* WS_ARGS contains the library function flavor to call:
4439 __libcilkrts_cilk_for_64 or __libcilkrts_cilk_for_32), and the
4440 user-defined grain value. If the user does not define one, then zero
4441 is passed in by the parser. */
4442 vec_alloc (region->ws_args, 2);
4443 region->ws_args->quick_push (lib_fun);
4444 region->ws_args->quick_push (fd->chunk_size);
4445 }
4446
4447 /* A subroutine of expand_omp_for. Generate code for a simd non-worksharing
4448 loop. Given parameters:
4449
4450 for (V = N1; V cond N2; V += STEP) BODY;
4451
4452 where COND is "<" or ">", we generate pseudocode
4453
4454 V = N1;
4455 goto L1;
4456 L0:
4457 BODY;
4458 V += STEP;
4459 L1:
4460 if (V cond N2) goto L0; else goto L2;
4461 L2:
4462
4463 For collapsed loops, given parameters:
4464 collapse(3)
4465 for (V1 = N11; V1 cond1 N12; V1 += STEP1)
4466 for (V2 = N21; V2 cond2 N22; V2 += STEP2)
4467 for (V3 = N31; V3 cond3 N32; V3 += STEP3)
4468 BODY;
4469
4470 we generate pseudocode
4471
4472 if (cond3 is <)
4473 adj = STEP3 - 1;
4474 else
4475 adj = STEP3 + 1;
4476 count3 = (adj + N32 - N31) / STEP3;
4477 if (cond2 is <)
4478 adj = STEP2 - 1;
4479 else
4480 adj = STEP2 + 1;
4481 count2 = (adj + N22 - N21) / STEP2;
4482 if (cond1 is <)
4483 adj = STEP1 - 1;
4484 else
4485 adj = STEP1 + 1;
4486 count1 = (adj + N12 - N11) / STEP1;
4487 count = count1 * count2 * count3;
4488 V = 0;
4489 V1 = N11;
4490 V2 = N21;
4491 V3 = N31;
4492 goto L1;
4493 L0:
4494 BODY;
4495 V += 1;
4496 V3 += STEP3;
4497 V2 += (V3 cond3 N32) ? 0 : STEP2;
4498 V3 = (V3 cond3 N32) ? V3 : N31;
4499 V1 += (V2 cond2 N22) ? 0 : STEP1;
4500 V2 = (V2 cond2 N22) ? V2 : N21;
4501 L1:
4502 if (V < count) goto L0; else goto L2;
4503 L2:
4504
4505 */
4506
4507 static void
4508 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
4509 {
4510 tree type, t;
4511 basic_block entry_bb, cont_bb, exit_bb, l0_bb, l1_bb, l2_bb, l2_dom_bb;
4512 gimple_stmt_iterator gsi;
4513 gimple *stmt;
4514 gcond *cond_stmt;
4515 bool broken_loop = region->cont == NULL;
4516 edge e, ne;
4517 tree *counts = NULL;
4518 int i;
4519 int safelen_int = INT_MAX;
4520 tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4521 OMP_CLAUSE_SAFELEN);
4522 tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4523 OMP_CLAUSE__SIMDUID_);
4524 tree n1, n2;
4525
4526 if (safelen)
4527 {
4528 safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
4529 if (TREE_CODE (safelen) != INTEGER_CST)
4530 safelen_int = 0;
4531 else if (tree_fits_uhwi_p (safelen) && tree_to_uhwi (safelen) < INT_MAX)
4532 safelen_int = tree_to_uhwi (safelen);
4533 if (safelen_int == 1)
4534 safelen_int = 0;
4535 }
4536 type = TREE_TYPE (fd->loop.v);
4537 entry_bb = region->entry;
4538 cont_bb = region->cont;
4539 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4540 gcc_assert (broken_loop
4541 || BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4542 l0_bb = FALLTHRU_EDGE (entry_bb)->dest;
4543 if (!broken_loop)
4544 {
4545 gcc_assert (BRANCH_EDGE (cont_bb)->dest == l0_bb);
4546 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
4547 l1_bb = split_block (cont_bb, last_stmt (cont_bb))->dest;
4548 l2_bb = BRANCH_EDGE (entry_bb)->dest;
4549 }
4550 else
4551 {
4552 BRANCH_EDGE (entry_bb)->flags &= ~EDGE_ABNORMAL;
4553 l1_bb = split_edge (BRANCH_EDGE (entry_bb));
4554 l2_bb = single_succ (l1_bb);
4555 }
4556 exit_bb = region->exit;
4557 l2_dom_bb = NULL;
4558
4559 gsi = gsi_last_bb (entry_bb);
4560
4561 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
4562 /* Not needed in SSA form right now. */
4563 gcc_assert (!gimple_in_ssa_p (cfun));
4564 if (fd->collapse > 1)
4565 {
4566 int first_zero_iter = -1, dummy = -1;
4567 basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
4568
4569 counts = XALLOCAVEC (tree, fd->collapse);
4570 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4571 zero_iter_bb, first_zero_iter,
4572 dummy_bb, dummy, l2_dom_bb);
4573 }
4574 if (l2_dom_bb == NULL)
4575 l2_dom_bb = l1_bb;
4576
4577 n1 = fd->loop.n1;
4578 n2 = fd->loop.n2;
4579 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4580 {
4581 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4582 OMP_CLAUSE__LOOPTEMP_);
4583 gcc_assert (innerc);
4584 n1 = OMP_CLAUSE_DECL (innerc);
4585 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4586 OMP_CLAUSE__LOOPTEMP_);
4587 gcc_assert (innerc);
4588 n2 = OMP_CLAUSE_DECL (innerc);
4589 }
4590 tree step = fd->loop.step;
4591
4592 bool is_simt = (safelen_int > 1
4593 && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
4594 OMP_CLAUSE__SIMT_));
4595 tree simt_lane = NULL_TREE, simt_maxlane = NULL_TREE;
4596 if (is_simt)
4597 {
4598 cfun->curr_properties &= ~PROP_gimple_lomp_dev;
4599 simt_lane = create_tmp_var (unsigned_type_node);
4600 gimple *g = gimple_build_call_internal (IFN_GOMP_SIMT_LANE, 0);
4601 gimple_call_set_lhs (g, simt_lane);
4602 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
4603 tree offset = fold_build2 (MULT_EXPR, TREE_TYPE (step), step,
4604 fold_convert (TREE_TYPE (step), simt_lane));
4605 n1 = fold_convert (type, n1);
4606 if (POINTER_TYPE_P (type))
4607 n1 = fold_build_pointer_plus (n1, offset);
4608 else
4609 n1 = fold_build2 (PLUS_EXPR, type, n1, fold_convert (type, offset));
4610
4611 /* Collapsed loops not handled for SIMT yet: limit to one lane only. */
4612 if (fd->collapse > 1)
4613 simt_maxlane = build_one_cst (unsigned_type_node);
4614 else if (safelen_int < omp_max_simt_vf ())
4615 simt_maxlane = build_int_cst (unsigned_type_node, safelen_int);
4616 tree vf
4617 = build_call_expr_internal_loc (UNKNOWN_LOCATION, IFN_GOMP_SIMT_VF,
4618 unsigned_type_node, 0);
4619 if (simt_maxlane)
4620 vf = fold_build2 (MIN_EXPR, unsigned_type_node, vf, simt_maxlane);
4621 vf = fold_convert (TREE_TYPE (step), vf);
4622 step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
4623 }
4624
4625 expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
4626 if (fd->collapse > 1)
4627 {
4628 if (gimple_omp_for_combined_into_p (fd->for_stmt))
4629 {
4630 gsi_prev (&gsi);
4631 expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
4632 gsi_next (&gsi);
4633 }
4634 else
4635 for (i = 0; i < fd->collapse; i++)
4636 {
4637 tree itype = TREE_TYPE (fd->loops[i].v);
4638 if (POINTER_TYPE_P (itype))
4639 itype = signed_type_for (itype);
4640 t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
4641 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4642 }
4643 }
4644
4645 /* Remove the GIMPLE_OMP_FOR statement. */
4646 gsi_remove (&gsi, true);
4647
4648 if (!broken_loop)
4649 {
4650 /* Code to control the increment goes in the CONT_BB. */
4651 gsi = gsi_last_bb (cont_bb);
4652 stmt = gsi_stmt (gsi);
4653 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
4654
4655 if (POINTER_TYPE_P (type))
4656 t = fold_build_pointer_plus (fd->loop.v, step);
4657 else
4658 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4659 expand_omp_build_assign (&gsi, fd->loop.v, t);
4660
4661 if (fd->collapse > 1)
4662 {
4663 i = fd->collapse - 1;
4664 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
4665 {
4666 t = fold_convert (sizetype, fd->loops[i].step);
4667 t = fold_build_pointer_plus (fd->loops[i].v, t);
4668 }
4669 else
4670 {
4671 t = fold_convert (TREE_TYPE (fd->loops[i].v),
4672 fd->loops[i].step);
4673 t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
4674 fd->loops[i].v, t);
4675 }
4676 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4677
4678 for (i = fd->collapse - 1; i > 0; i--)
4679 {
4680 tree itype = TREE_TYPE (fd->loops[i].v);
4681 tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
4682 if (POINTER_TYPE_P (itype2))
4683 itype2 = signed_type_for (itype2);
4684 t = build3 (COND_EXPR, itype2,
4685 build2 (fd->loops[i].cond_code, boolean_type_node,
4686 fd->loops[i].v,
4687 fold_convert (itype, fd->loops[i].n2)),
4688 build_int_cst (itype2, 0),
4689 fold_convert (itype2, fd->loops[i - 1].step));
4690 if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
4691 t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
4692 else
4693 t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
4694 expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
4695
4696 t = build3 (COND_EXPR, itype,
4697 build2 (fd->loops[i].cond_code, boolean_type_node,
4698 fd->loops[i].v,
4699 fold_convert (itype, fd->loops[i].n2)),
4700 fd->loops[i].v,
4701 fold_convert (itype, fd->loops[i].n1));
4702 expand_omp_build_assign (&gsi, fd->loops[i].v, t);
4703 }
4704 }
4705
4706 /* Remove GIMPLE_OMP_CONTINUE. */
4707 gsi_remove (&gsi, true);
4708 }
4709
4710 /* Emit the condition in L1_BB. */
4711 gsi = gsi_start_bb (l1_bb);
4712
4713 t = fold_convert (type, n2);
4714 t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
4715 false, GSI_CONTINUE_LINKING);
4716 tree v = fd->loop.v;
4717 if (DECL_P (v) && TREE_ADDRESSABLE (v))
4718 v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
4719 false, GSI_CONTINUE_LINKING);
4720 t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
4721 cond_stmt = gimple_build_cond_empty (t);
4722 gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
4723 if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
4724 NULL, NULL)
4725 || walk_tree (gimple_cond_rhs_ptr (cond_stmt), expand_omp_regimplify_p,
4726 NULL, NULL))
4727 {
4728 gsi = gsi_for_stmt (cond_stmt);
4729 gimple_regimplify_operands (cond_stmt, &gsi);
4730 }
4731
4732 /* Add 'V -= STEP * (SIMT_VF - 1)' after the loop. */
4733 if (is_simt)
4734 {
4735 gsi = gsi_start_bb (l2_bb);
4736 step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
4737 if (POINTER_TYPE_P (type))
4738 t = fold_build_pointer_plus (fd->loop.v, step);
4739 else
4740 t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
4741 expand_omp_build_assign (&gsi, fd->loop.v, t);
4742 }
4743
4744 /* Remove GIMPLE_OMP_RETURN. */
4745 gsi = gsi_last_bb (exit_bb);
4746 gsi_remove (&gsi, true);
4747
4748 /* Connect the new blocks. */
4749 remove_edge (FALLTHRU_EDGE (entry_bb));
4750
4751 if (!broken_loop)
4752 {
4753 remove_edge (BRANCH_EDGE (entry_bb));
4754 make_edge (entry_bb, l1_bb, EDGE_FALLTHRU);
4755
4756 e = BRANCH_EDGE (l1_bb);
4757 ne = FALLTHRU_EDGE (l1_bb);
4758 e->flags = EDGE_TRUE_VALUE;
4759 }
4760 else
4761 {
4762 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
4763
4764 ne = single_succ_edge (l1_bb);
4765 e = make_edge (l1_bb, l0_bb, EDGE_TRUE_VALUE);
4766
4767 }
4768 ne->flags = EDGE_FALSE_VALUE;
4769 e->probability = REG_BR_PROB_BASE * 7 / 8;
4770 ne->probability = REG_BR_PROB_BASE / 8;
4771
4772 set_immediate_dominator (CDI_DOMINATORS, l1_bb, entry_bb);
4773 set_immediate_dominator (CDI_DOMINATORS, l0_bb, l1_bb);
4774
4775 if (simt_maxlane)
4776 {
4777 cond_stmt = gimple_build_cond (LT_EXPR, simt_lane, simt_maxlane,
4778 NULL_TREE, NULL_TREE);
4779 gsi = gsi_last_bb (entry_bb);
4780 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
4781 make_edge (entry_bb, l2_bb, EDGE_FALSE_VALUE);
4782 FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
4783 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE * 7 / 8;
4784 BRANCH_EDGE (entry_bb)->probability = REG_BR_PROB_BASE / 8;
4785 l2_dom_bb = entry_bb;
4786 }
4787 set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
4788
4789 if (!broken_loop)
4790 {
4791 struct loop *loop = alloc_loop ();
4792 loop->header = l1_bb;
4793 loop->latch = cont_bb;
4794 add_loop (loop, l1_bb->loop_father);
4795 loop->safelen = safelen_int;
4796 if (simduid)
4797 {
4798 loop->simduid = OMP_CLAUSE__SIMDUID__DECL (simduid);
4799 cfun->has_simduid_loops = true;
4800 }
4801 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
4802 the loop. */
4803 if ((flag_tree_loop_vectorize
4804 || (!global_options_set.x_flag_tree_loop_vectorize
4805 && !global_options_set.x_flag_tree_vectorize))
4806 && flag_tree_loop_optimize
4807 && loop->safelen > 1)
4808 {
4809 loop->force_vectorize = true;
4810 cfun->has_force_vectorize_loops = true;
4811 }
4812 }
4813 else if (simduid)
4814 cfun->has_simduid_loops = true;
4815 }
4816
4817 /* Taskloop construct is represented after gimplification with
4818 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4819 in between them. This routine expands the outer GIMPLE_OMP_FOR,
4820 which should just compute all the needed loop temporaries
4821 for GIMPLE_OMP_TASK. */
4822
4823 static void
4824 expand_omp_taskloop_for_outer (struct omp_region *region,
4825 struct omp_for_data *fd,
4826 gimple *inner_stmt)
4827 {
4828 tree type, bias = NULL_TREE;
4829 basic_block entry_bb, cont_bb, exit_bb;
4830 gimple_stmt_iterator gsi;
4831 gassign *assign_stmt;
4832 tree *counts = NULL;
4833 int i;
4834
4835 gcc_assert (inner_stmt);
4836 gcc_assert (region->cont);
4837 gcc_assert (gimple_code (inner_stmt) == GIMPLE_OMP_TASK
4838 && gimple_omp_task_taskloop_p (inner_stmt));
4839 type = TREE_TYPE (fd->loop.v);
4840
4841 /* See if we need to bias by LLONG_MIN. */
4842 if (fd->iter_type == long_long_unsigned_type_node
4843 && TREE_CODE (type) == INTEGER_TYPE
4844 && !TYPE_UNSIGNED (type))
4845 {
4846 tree n1, n2;
4847
4848 if (fd->loop.cond_code == LT_EXPR)
4849 {
4850 n1 = fd->loop.n1;
4851 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
4852 }
4853 else
4854 {
4855 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
4856 n2 = fd->loop.n1;
4857 }
4858 if (TREE_CODE (n1) != INTEGER_CST
4859 || TREE_CODE (n2) != INTEGER_CST
4860 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
4861 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
4862 }
4863
4864 entry_bb = region->entry;
4865 cont_bb = region->cont;
4866 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
4867 gcc_assert (BRANCH_EDGE (entry_bb)->dest == FALLTHRU_EDGE (cont_bb)->dest);
4868 exit_bb = region->exit;
4869
4870 gsi = gsi_last_bb (entry_bb);
4871 gimple *for_stmt = gsi_stmt (gsi);
4872 gcc_assert (gimple_code (for_stmt) == GIMPLE_OMP_FOR);
4873 if (fd->collapse > 1)
4874 {
4875 int first_zero_iter = -1, dummy = -1;
4876 basic_block zero_iter_bb = NULL, dummy_bb = NULL, l2_dom_bb = NULL;
4877
4878 counts = XALLOCAVEC (tree, fd->collapse);
4879 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
4880 zero_iter_bb, first_zero_iter,
4881 dummy_bb, dummy, l2_dom_bb);
4882
4883 if (zero_iter_bb)
4884 {
4885 /* Some counts[i] vars might be uninitialized if
4886 some loop has zero iterations. But the body shouldn't
4887 be executed in that case, so just avoid uninit warnings. */
4888 for (i = first_zero_iter; i < fd->collapse; i++)
4889 if (SSA_VAR_P (counts[i]))
4890 TREE_NO_WARNING (counts[i]) = 1;
4891 gsi_prev (&gsi);
4892 edge e = split_block (entry_bb, gsi_stmt (gsi));
4893 entry_bb = e->dest;
4894 make_edge (zero_iter_bb, entry_bb, EDGE_FALLTHRU);
4895 gsi = gsi_last_bb (entry_bb);
4896 set_immediate_dominator (CDI_DOMINATORS, entry_bb,
4897 get_immediate_dominator (CDI_DOMINATORS,
4898 zero_iter_bb));
4899 }
4900 }
4901
4902 tree t0, t1;
4903 t1 = fd->loop.n2;
4904 t0 = fd->loop.n1;
4905 if (POINTER_TYPE_P (TREE_TYPE (t0))
4906 && TYPE_PRECISION (TREE_TYPE (t0))
4907 != TYPE_PRECISION (fd->iter_type))
4908 {
4909 /* Avoid casting pointers to integer of a different size. */
4910 tree itype = signed_type_for (type);
4911 t1 = fold_convert (fd->iter_type, fold_convert (itype, t1));
4912 t0 = fold_convert (fd->iter_type, fold_convert (itype, t0));
4913 }
4914 else
4915 {
4916 t1 = fold_convert (fd->iter_type, t1);
4917 t0 = fold_convert (fd->iter_type, t0);
4918 }
4919 if (bias)
4920 {
4921 t1 = fold_build2 (PLUS_EXPR, fd->iter_type, t1, bias);
4922 t0 = fold_build2 (PLUS_EXPR, fd->iter_type, t0, bias);
4923 }
4924
4925 tree innerc = omp_find_clause (gimple_omp_task_clauses (inner_stmt),
4926 OMP_CLAUSE__LOOPTEMP_);
4927 gcc_assert (innerc);
4928 tree startvar = OMP_CLAUSE_DECL (innerc);
4929 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
4930 gcc_assert (innerc);
4931 tree endvar = OMP_CLAUSE_DECL (innerc);
4932 if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
4933 {
4934 gcc_assert (innerc);
4935 for (i = 1; i < fd->collapse; i++)
4936 {
4937 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4938 OMP_CLAUSE__LOOPTEMP_);
4939 gcc_assert (innerc);
4940 }
4941 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
4942 OMP_CLAUSE__LOOPTEMP_);
4943 if (innerc)
4944 {
4945 /* If needed (inner taskloop has lastprivate clause), propagate
4946 down the total number of iterations. */
4947 tree t = force_gimple_operand_gsi (&gsi, fd->loop.n2, false,
4948 NULL_TREE, false,
4949 GSI_CONTINUE_LINKING);
4950 assign_stmt = gimple_build_assign (OMP_CLAUSE_DECL (innerc), t);
4951 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4952 }
4953 }
4954
4955 t0 = force_gimple_operand_gsi (&gsi, t0, false, NULL_TREE, false,
4956 GSI_CONTINUE_LINKING);
4957 assign_stmt = gimple_build_assign (startvar, t0);
4958 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4959
4960 t1 = force_gimple_operand_gsi (&gsi, t1, false, NULL_TREE, false,
4961 GSI_CONTINUE_LINKING);
4962 assign_stmt = gimple_build_assign (endvar, t1);
4963 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
4964 if (fd->collapse > 1)
4965 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
4966
4967 /* Remove the GIMPLE_OMP_FOR statement. */
4968 gsi = gsi_for_stmt (for_stmt);
4969 gsi_remove (&gsi, true);
4970
4971 gsi = gsi_last_bb (cont_bb);
4972 gsi_remove (&gsi, true);
4973
4974 gsi = gsi_last_bb (exit_bb);
4975 gsi_remove (&gsi, true);
4976
4977 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
4978 remove_edge (BRANCH_EDGE (entry_bb));
4979 FALLTHRU_EDGE (cont_bb)->probability = REG_BR_PROB_BASE;
4980 remove_edge (BRANCH_EDGE (cont_bb));
4981 set_immediate_dominator (CDI_DOMINATORS, exit_bb, cont_bb);
4982 set_immediate_dominator (CDI_DOMINATORS, region->entry,
4983 recompute_dominator (CDI_DOMINATORS, region->entry));
4984 }
4985
4986 /* Taskloop construct is represented after gimplification with
4987 two GIMPLE_OMP_FOR constructs with GIMPLE_OMP_TASK sandwiched
4988 in between them. This routine expands the inner GIMPLE_OMP_FOR.
4989 GOMP_taskloop{,_ull} function arranges for each task to be given just
4990 a single range of iterations. */
4991
4992 static void
4993 expand_omp_taskloop_for_inner (struct omp_region *region,
4994 struct omp_for_data *fd,
4995 gimple *inner_stmt)
4996 {
4997 tree e, t, type, itype, vmain, vback, bias = NULL_TREE;
4998 basic_block entry_bb, exit_bb, body_bb, cont_bb, collapse_bb = NULL;
4999 basic_block fin_bb;
5000 gimple_stmt_iterator gsi;
5001 edge ep;
5002 bool broken_loop = region->cont == NULL;
5003 tree *counts = NULL;
5004 tree n1, n2, step;
5005
5006 itype = type = TREE_TYPE (fd->loop.v);
5007 if (POINTER_TYPE_P (type))
5008 itype = signed_type_for (type);
5009
5010 /* See if we need to bias by LLONG_MIN. */
5011 if (fd->iter_type == long_long_unsigned_type_node
5012 && TREE_CODE (type) == INTEGER_TYPE
5013 && !TYPE_UNSIGNED (type))
5014 {
5015 tree n1, n2;
5016
5017 if (fd->loop.cond_code == LT_EXPR)
5018 {
5019 n1 = fd->loop.n1;
5020 n2 = fold_build2 (PLUS_EXPR, type, fd->loop.n2, fd->loop.step);
5021 }
5022 else
5023 {
5024 n1 = fold_build2 (MINUS_EXPR, type, fd->loop.n2, fd->loop.step);
5025 n2 = fd->loop.n1;
5026 }
5027 if (TREE_CODE (n1) != INTEGER_CST
5028 || TREE_CODE (n2) != INTEGER_CST
5029 || ((tree_int_cst_sgn (n1) < 0) ^ (tree_int_cst_sgn (n2) < 0)))
5030 bias = fold_convert (fd->iter_type, TYPE_MIN_VALUE (type));
5031 }
5032
5033 entry_bb = region->entry;
5034 cont_bb = region->cont;
5035 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2);
5036 fin_bb = BRANCH_EDGE (entry_bb)->dest;
5037 gcc_assert (broken_loop
5038 || (fin_bb == FALLTHRU_EDGE (cont_bb)->dest));
5039 body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5040 if (!broken_loop)
5041 {
5042 gcc_assert (BRANCH_EDGE (cont_bb)->dest == body_bb);
5043 gcc_assert (EDGE_COUNT (cont_bb->succs) == 2);
5044 }
5045 exit_bb = region->exit;
5046
5047 /* Iteration space partitioning goes in ENTRY_BB. */
5048 gsi = gsi_last_bb (entry_bb);
5049 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
5050
5051 if (fd->collapse > 1)
5052 {
5053 int first_zero_iter = -1, dummy = -1;
5054 basic_block l2_dom_bb = NULL, dummy_bb = NULL;
5055
5056 counts = XALLOCAVEC (tree, fd->collapse);
5057 expand_omp_for_init_counts (fd, &gsi, entry_bb, counts,
5058 fin_bb, first_zero_iter,
5059 dummy_bb, dummy, l2_dom_bb);
5060 t = NULL_TREE;
5061 }
5062 else
5063 t = integer_one_node;
5064
5065 step = fd->loop.step;
5066 tree innerc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
5067 OMP_CLAUSE__LOOPTEMP_);
5068 gcc_assert (innerc);
5069 n1 = OMP_CLAUSE_DECL (innerc);
5070 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc), OMP_CLAUSE__LOOPTEMP_);
5071 gcc_assert (innerc);
5072 n2 = OMP_CLAUSE_DECL (innerc);
5073 if (bias)
5074 {
5075 n1 = fold_build2 (PLUS_EXPR, fd->iter_type, n1, bias);
5076 n2 = fold_build2 (PLUS_EXPR, fd->iter_type, n2, bias);
5077 }
5078 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
5079 true, NULL_TREE, true, GSI_SAME_STMT);
5080 n2 = force_gimple_operand_gsi (&gsi, fold_convert (itype, n2),
5081 true, NULL_TREE, true, GSI_SAME_STMT);
5082 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
5083 true, NULL_TREE, true, GSI_SAME_STMT);
5084
5085 tree startvar = fd->loop.v;
5086 tree endvar = NULL_TREE;
5087
5088 if (gimple_omp_for_combined_p (fd->for_stmt))
5089 {
5090 tree clauses = gimple_omp_for_clauses (inner_stmt);
5091 tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
5092 gcc_assert (innerc);
5093 startvar = OMP_CLAUSE_DECL (innerc);
5094 innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
5095 OMP_CLAUSE__LOOPTEMP_);
5096 gcc_assert (innerc);
5097 endvar = OMP_CLAUSE_DECL (innerc);
5098 }
5099 t = fold_convert (TREE_TYPE (startvar), n1);
5100 t = force_gimple_operand_gsi (&gsi, t,
5101 DECL_P (startvar)
5102 && TREE_ADDRESSABLE (startvar),
5103 NULL_TREE, false, GSI_CONTINUE_LINKING);
5104 gimple *assign_stmt = gimple_build_assign (startvar, t);
5105 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5106
5107 t = fold_convert (TREE_TYPE (startvar), n2);
5108 e = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
5109 false, GSI_CONTINUE_LINKING);
5110 if (endvar)
5111 {
5112 assign_stmt = gimple_build_assign (endvar, e);
5113 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5114 if (useless_type_conversion_p (TREE_TYPE (fd->loop.v), TREE_TYPE (e)))
5115 assign_stmt = gimple_build_assign (fd->loop.v, e);
5116 else
5117 assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
5118 gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
5119 }
5120 if (fd->collapse > 1)
5121 expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
5122
5123 if (!broken_loop)
5124 {
5125 /* The code controlling the sequential loop replaces the
5126 GIMPLE_OMP_CONTINUE. */
5127 gsi = gsi_last_bb (cont_bb);
5128 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5129 gcc_assert (gimple_code (cont_stmt) == GIMPLE_OMP_CONTINUE);
5130 vmain = gimple_omp_continue_control_use (cont_stmt);
5131 vback = gimple_omp_continue_control_def (cont_stmt);
5132
5133 if (!gimple_omp_for_combined_p (fd->for_stmt))
5134 {
5135 if (POINTER_TYPE_P (type))
5136 t = fold_build_pointer_plus (vmain, step);
5137 else
5138 t = fold_build2 (PLUS_EXPR, type, vmain, step);
5139 t = force_gimple_operand_gsi (&gsi, t,
5140 DECL_P (vback)
5141 && TREE_ADDRESSABLE (vback),
5142 NULL_TREE, true, GSI_SAME_STMT);
5143 assign_stmt = gimple_build_assign (vback, t);
5144 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5145
5146 t = build2 (fd->loop.cond_code, boolean_type_node,
5147 DECL_P (vback) && TREE_ADDRESSABLE (vback)
5148 ? t : vback, e);
5149 gsi_insert_before (&gsi, gimple_build_cond_empty (t), GSI_SAME_STMT);
5150 }
5151
5152 /* Remove the GIMPLE_OMP_CONTINUE statement. */
5153 gsi_remove (&gsi, true);
5154
5155 if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
5156 collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
5157 }
5158
5159 /* Remove the GIMPLE_OMP_FOR statement. */
5160 gsi = gsi_for_stmt (fd->for_stmt);
5161 gsi_remove (&gsi, true);
5162
5163 /* Remove the GIMPLE_OMP_RETURN statement. */
5164 gsi = gsi_last_bb (exit_bb);
5165 gsi_remove (&gsi, true);
5166
5167 FALLTHRU_EDGE (entry_bb)->probability = REG_BR_PROB_BASE;
5168 if (!broken_loop)
5169 remove_edge (BRANCH_EDGE (entry_bb));
5170 else
5171 {
5172 remove_edge_and_dominated_blocks (BRANCH_EDGE (entry_bb));
5173 region->outer->cont = NULL;
5174 }
5175
5176 /* Connect all the blocks. */
5177 if (!broken_loop)
5178 {
5179 ep = find_edge (cont_bb, body_bb);
5180 if (gimple_omp_for_combined_p (fd->for_stmt))
5181 {
5182 remove_edge (ep);
5183 ep = NULL;
5184 }
5185 else if (fd->collapse > 1)
5186 {
5187 remove_edge (ep);
5188 ep = make_edge (cont_bb, collapse_bb, EDGE_TRUE_VALUE);
5189 }
5190 else
5191 ep->flags = EDGE_TRUE_VALUE;
5192 find_edge (cont_bb, fin_bb)->flags
5193 = ep ? EDGE_FALSE_VALUE : EDGE_FALLTHRU;
5194 }
5195
5196 set_immediate_dominator (CDI_DOMINATORS, body_bb,
5197 recompute_dominator (CDI_DOMINATORS, body_bb));
5198 if (!broken_loop)
5199 set_immediate_dominator (CDI_DOMINATORS, fin_bb,
5200 recompute_dominator (CDI_DOMINATORS, fin_bb));
5201
5202 if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
5203 {
5204 struct loop *loop = alloc_loop ();
5205 loop->header = body_bb;
5206 if (collapse_bb == NULL)
5207 loop->latch = cont_bb;
5208 add_loop (loop, body_bb->loop_father);
5209 }
5210 }
5211
5212 /* A subroutine of expand_omp_for. Generate code for an OpenACC
5213 partitioned loop. The lowering here is abstracted, in that the
5214 loop parameters are passed through internal functions, which are
5215 further lowered by oacc_device_lower, once we get to the target
5216 compiler. The loop is of the form:
5217
5218 for (V = B; V LTGT E; V += S) {BODY}
5219
5220 where LTGT is < or >. We may have a specified chunking size, CHUNKING
5221 (constant 0 for no chunking) and we will have a GWV partitioning
5222 mask, specifying dimensions over which the loop is to be
5223 partitioned (see note below). We generate code that looks like:
5224
5225 <entry_bb> [incoming FALL->body, BRANCH->exit]
5226 typedef signedintify (typeof (V)) T; // underlying signed integral type
5227 T range = E - B;
5228 T chunk_no = 0;
5229 T DIR = LTGT == '<' ? +1 : -1;
5230 T chunk_max = GOACC_LOOP_CHUNK (dir, range, S, CHUNK_SIZE, GWV);
5231 T step = GOACC_LOOP_STEP (dir, range, S, CHUNK_SIZE, GWV);
5232
5233 <head_bb> [created by splitting end of entry_bb]
5234 T offset = GOACC_LOOP_OFFSET (dir, range, S, CHUNK_SIZE, GWV, chunk_no);
5235 T bound = GOACC_LOOP_BOUND (dir, range, S, CHUNK_SIZE, GWV, offset);
5236 if (!(offset LTGT bound)) goto bottom_bb;
5237
5238 <body_bb> [incoming]
5239 V = B + offset;
5240 {BODY}
5241
5242 <cont_bb> [incoming, may == body_bb FALL->exit_bb, BRANCH->body_bb]
5243 offset += step;
5244 if (offset LTGT bound) goto body_bb; [*]
5245
5246 <bottom_bb> [created by splitting start of exit_bb] insert BRANCH->head_bb
5247 chunk_no++;
5248 if (chunk < chunk_max) goto head_bb;
5249
5250 <exit_bb> [incoming]
5251 V = B + ((range -/+ 1) / S +/- 1) * S [*]
5252
5253 [*] Needed if V live at end of loop
5254
5255 Note: CHUNKING & GWV mask are specified explicitly here. This is a
5256 transition, and will be specified by a more general mechanism shortly.
5257 */
5258
5259 static void
5260 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
5261 {
5262 tree v = fd->loop.v;
5263 enum tree_code cond_code = fd->loop.cond_code;
5264 enum tree_code plus_code = PLUS_EXPR;
5265
5266 tree chunk_size = integer_minus_one_node;
5267 tree gwv = integer_zero_node;
5268 tree iter_type = TREE_TYPE (v);
5269 tree diff_type = iter_type;
5270 tree plus_type = iter_type;
5271 struct oacc_collapse *counts = NULL;
5272
5273 gcc_checking_assert (gimple_omp_for_kind (fd->for_stmt)
5274 == GF_OMP_FOR_KIND_OACC_LOOP);
5275 gcc_assert (!gimple_omp_for_combined_into_p (fd->for_stmt));
5276 gcc_assert (cond_code == LT_EXPR || cond_code == GT_EXPR);
5277
5278 if (POINTER_TYPE_P (iter_type))
5279 {
5280 plus_code = POINTER_PLUS_EXPR;
5281 plus_type = sizetype;
5282 }
5283 if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
5284 diff_type = signed_type_for (diff_type);
5285
5286 basic_block entry_bb = region->entry; /* BB ending in OMP_FOR */
5287 basic_block exit_bb = region->exit; /* BB ending in OMP_RETURN */
5288 basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE */
5289 basic_block bottom_bb = NULL;
5290
5291 /* entry_bb has two sucessors; the branch edge is to the exit
5292 block, fallthrough edge to body. */
5293 gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
5294 && BRANCH_EDGE (entry_bb)->dest == exit_bb);
5295
5296 /* If cont_bb non-NULL, it has 2 successors. The branch successor is
5297 body_bb, or to a block whose only successor is the body_bb. Its
5298 fallthrough successor is the final block (same as the branch
5299 successor of the entry_bb). */
5300 if (cont_bb)
5301 {
5302 basic_block body_bb = FALLTHRU_EDGE (entry_bb)->dest;
5303 basic_block bed = BRANCH_EDGE (cont_bb)->dest;
5304
5305 gcc_assert (FALLTHRU_EDGE (cont_bb)->dest == exit_bb);
5306 gcc_assert (bed == body_bb || single_succ_edge (bed)->dest == body_bb);
5307 }
5308 else
5309 gcc_assert (!gimple_in_ssa_p (cfun));
5310
5311 /* The exit block only has entry_bb and cont_bb as predecessors. */
5312 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1 + (cont_bb != NULL));
5313
5314 tree chunk_no;
5315 tree chunk_max = NULL_TREE;
5316 tree bound, offset;
5317 tree step = create_tmp_var (diff_type, ".step");
5318 bool up = cond_code == LT_EXPR;
5319 tree dir = build_int_cst (diff_type, up ? +1 : -1);
5320 bool chunking = !gimple_in_ssa_p (cfun);;
5321 bool negating;
5322
5323 /* SSA instances. */
5324 tree offset_incr = NULL_TREE;
5325 tree offset_init = NULL_TREE;
5326
5327 gimple_stmt_iterator gsi;
5328 gassign *ass;
5329 gcall *call;
5330 gimple *stmt;
5331 tree expr;
5332 location_t loc;
5333 edge split, be, fte;
5334
5335 /* Split the end of entry_bb to create head_bb. */
5336 split = split_block (entry_bb, last_stmt (entry_bb));
5337 basic_block head_bb = split->dest;
5338 entry_bb = split->src;
5339
5340 /* Chunk setup goes at end of entry_bb, replacing the omp_for. */
5341 gsi = gsi_last_bb (entry_bb);
5342 gomp_for *for_stmt = as_a <gomp_for *> (gsi_stmt (gsi));
5343 loc = gimple_location (for_stmt);
5344
5345 if (gimple_in_ssa_p (cfun))
5346 {
5347 offset_init = gimple_omp_for_index (for_stmt, 0);
5348 gcc_assert (integer_zerop (fd->loop.n1));
5349 /* The SSA parallelizer does gang parallelism. */
5350 gwv = build_int_cst (integer_type_node, GOMP_DIM_MASK (GOMP_DIM_GANG));
5351 }
5352
5353 if (fd->collapse > 1)
5354 {
5355 counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
5356 tree total = expand_oacc_collapse_init (fd, &gsi, counts,
5357 TREE_TYPE (fd->loop.n2));
5358
5359 if (SSA_VAR_P (fd->loop.n2))
5360 {
5361 total = force_gimple_operand_gsi (&gsi, total, false, NULL_TREE,
5362 true, GSI_SAME_STMT);
5363 ass = gimple_build_assign (fd->loop.n2, total);
5364 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5365 }
5366
5367 }
5368
5369 tree b = fd->loop.n1;
5370 tree e = fd->loop.n2;
5371 tree s = fd->loop.step;
5372
5373 b = force_gimple_operand_gsi (&gsi, b, true, NULL_TREE, true, GSI_SAME_STMT);
5374 e = force_gimple_operand_gsi (&gsi, e, true, NULL_TREE, true, GSI_SAME_STMT);
5375
5376 /* Convert the step, avoiding possible unsigned->signed overflow. */
5377 negating = !up && TYPE_UNSIGNED (TREE_TYPE (s));
5378 if (negating)
5379 s = fold_build1 (NEGATE_EXPR, TREE_TYPE (s), s);
5380 s = fold_convert (diff_type, s);
5381 if (negating)
5382 s = fold_build1 (NEGATE_EXPR, diff_type, s);
5383 s = force_gimple_operand_gsi (&gsi, s, true, NULL_TREE, true, GSI_SAME_STMT);
5384
5385 if (!chunking)
5386 chunk_size = integer_zero_node;
5387 expr = fold_convert (diff_type, chunk_size);
5388 chunk_size = force_gimple_operand_gsi (&gsi, expr, true,
5389 NULL_TREE, true, GSI_SAME_STMT);
5390 /* Determine the range, avoiding possible unsigned->signed overflow. */
5391 negating = !up && TYPE_UNSIGNED (iter_type);
5392 expr = fold_build2 (MINUS_EXPR, plus_type,
5393 fold_convert (plus_type, negating ? b : e),
5394 fold_convert (plus_type, negating ? e : b));
5395 expr = fold_convert (diff_type, expr);
5396 if (negating)
5397 expr = fold_build1 (NEGATE_EXPR, diff_type, expr);
5398 tree range = force_gimple_operand_gsi (&gsi, expr, true,
5399 NULL_TREE, true, GSI_SAME_STMT);
5400
5401 chunk_no = build_int_cst (diff_type, 0);
5402 if (chunking)
5403 {
5404 gcc_assert (!gimple_in_ssa_p (cfun));
5405
5406 expr = chunk_no;
5407 chunk_max = create_tmp_var (diff_type, ".chunk_max");
5408 chunk_no = create_tmp_var (diff_type, ".chunk_no");
5409
5410 ass = gimple_build_assign (chunk_no, expr);
5411 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5412
5413 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5414 build_int_cst (integer_type_node,
5415 IFN_GOACC_LOOP_CHUNKS),
5416 dir, range, s, chunk_size, gwv);
5417 gimple_call_set_lhs (call, chunk_max);
5418 gimple_set_location (call, loc);
5419 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5420 }
5421 else
5422 chunk_size = chunk_no;
5423
5424 call = gimple_build_call_internal (IFN_GOACC_LOOP, 6,
5425 build_int_cst (integer_type_node,
5426 IFN_GOACC_LOOP_STEP),
5427 dir, range, s, chunk_size, gwv);
5428 gimple_call_set_lhs (call, step);
5429 gimple_set_location (call, loc);
5430 gsi_insert_before (&gsi, call, GSI_SAME_STMT);
5431
5432 /* Remove the GIMPLE_OMP_FOR. */
5433 gsi_remove (&gsi, true);
5434
5435 /* Fixup edges from head_bb */
5436 be = BRANCH_EDGE (head_bb);
5437 fte = FALLTHRU_EDGE (head_bb);
5438 be->flags |= EDGE_FALSE_VALUE;
5439 fte->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
5440
5441 basic_block body_bb = fte->dest;
5442
5443 if (gimple_in_ssa_p (cfun))
5444 {
5445 gsi = gsi_last_bb (cont_bb);
5446 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5447
5448 offset = gimple_omp_continue_control_use (cont_stmt);
5449 offset_incr = gimple_omp_continue_control_def (cont_stmt);
5450 }
5451 else
5452 {
5453 offset = create_tmp_var (diff_type, ".offset");
5454 offset_init = offset_incr = offset;
5455 }
5456 bound = create_tmp_var (TREE_TYPE (offset), ".bound");
5457
5458 /* Loop offset & bound go into head_bb. */
5459 gsi = gsi_start_bb (head_bb);
5460
5461 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5462 build_int_cst (integer_type_node,
5463 IFN_GOACC_LOOP_OFFSET),
5464 dir, range, s,
5465 chunk_size, gwv, chunk_no);
5466 gimple_call_set_lhs (call, offset_init);
5467 gimple_set_location (call, loc);
5468 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5469
5470 call = gimple_build_call_internal (IFN_GOACC_LOOP, 7,
5471 build_int_cst (integer_type_node,
5472 IFN_GOACC_LOOP_BOUND),
5473 dir, range, s,
5474 chunk_size, gwv, offset_init);
5475 gimple_call_set_lhs (call, bound);
5476 gimple_set_location (call, loc);
5477 gsi_insert_after (&gsi, call, GSI_CONTINUE_LINKING);
5478
5479 expr = build2 (cond_code, boolean_type_node, offset_init, bound);
5480 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5481 GSI_CONTINUE_LINKING);
5482
5483 /* V assignment goes into body_bb. */
5484 if (!gimple_in_ssa_p (cfun))
5485 {
5486 gsi = gsi_start_bb (body_bb);
5487
5488 expr = build2 (plus_code, iter_type, b,
5489 fold_convert (plus_type, offset));
5490 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5491 true, GSI_SAME_STMT);
5492 ass = gimple_build_assign (v, expr);
5493 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5494 if (fd->collapse > 1)
5495 expand_oacc_collapse_vars (fd, &gsi, counts, v);
5496 }
5497
5498 /* Loop increment goes into cont_bb. If this is not a loop, we
5499 will have spawned threads as if it was, and each one will
5500 execute one iteration. The specification is not explicit about
5501 whether such constructs are ill-formed or not, and they can
5502 occur, especially when noreturn routines are involved. */
5503 if (cont_bb)
5504 {
5505 gsi = gsi_last_bb (cont_bb);
5506 gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
5507 loc = gimple_location (cont_stmt);
5508
5509 /* Increment offset. */
5510 if (gimple_in_ssa_p (cfun))
5511 expr= build2 (plus_code, iter_type, offset,
5512 fold_convert (plus_type, step));
5513 else
5514 expr = build2 (PLUS_EXPR, diff_type, offset, step);
5515 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5516 true, GSI_SAME_STMT);
5517 ass = gimple_build_assign (offset_incr, expr);
5518 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5519 expr = build2 (cond_code, boolean_type_node, offset_incr, bound);
5520 gsi_insert_before (&gsi, gimple_build_cond_empty (expr), GSI_SAME_STMT);
5521
5522 /* Remove the GIMPLE_OMP_CONTINUE. */
5523 gsi_remove (&gsi, true);
5524
5525 /* Fixup edges from cont_bb */
5526 be = BRANCH_EDGE (cont_bb);
5527 fte = FALLTHRU_EDGE (cont_bb);
5528 be->flags |= EDGE_TRUE_VALUE;
5529 fte->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5530
5531 if (chunking)
5532 {
5533 /* Split the beginning of exit_bb to make bottom_bb. We
5534 need to insert a nop at the start, because splitting is
5535 after a stmt, not before. */
5536 gsi = gsi_start_bb (exit_bb);
5537 stmt = gimple_build_nop ();
5538 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
5539 split = split_block (exit_bb, stmt);
5540 bottom_bb = split->src;
5541 exit_bb = split->dest;
5542 gsi = gsi_last_bb (bottom_bb);
5543
5544 /* Chunk increment and test goes into bottom_bb. */
5545 expr = build2 (PLUS_EXPR, diff_type, chunk_no,
5546 build_int_cst (diff_type, 1));
5547 ass = gimple_build_assign (chunk_no, expr);
5548 gsi_insert_after (&gsi, ass, GSI_CONTINUE_LINKING);
5549
5550 /* Chunk test at end of bottom_bb. */
5551 expr = build2 (LT_EXPR, boolean_type_node, chunk_no, chunk_max);
5552 gsi_insert_after (&gsi, gimple_build_cond_empty (expr),
5553 GSI_CONTINUE_LINKING);
5554
5555 /* Fixup edges from bottom_bb. */
5556 split->flags ^= EDGE_FALLTHRU | EDGE_FALSE_VALUE;
5557 make_edge (bottom_bb, head_bb, EDGE_TRUE_VALUE);
5558 }
5559 }
5560
5561 gsi = gsi_last_bb (exit_bb);
5562 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
5563 loc = gimple_location (gsi_stmt (gsi));
5564
5565 if (!gimple_in_ssa_p (cfun))
5566 {
5567 /* Insert the final value of V, in case it is live. This is the
5568 value for the only thread that survives past the join. */
5569 expr = fold_build2 (MINUS_EXPR, diff_type, range, dir);
5570 expr = fold_build2 (PLUS_EXPR, diff_type, expr, s);
5571 expr = fold_build2 (TRUNC_DIV_EXPR, diff_type, expr, s);
5572 expr = fold_build2 (MULT_EXPR, diff_type, expr, s);
5573 expr = build2 (plus_code, iter_type, b, fold_convert (plus_type, expr));
5574 expr = force_gimple_operand_gsi (&gsi, expr, false, NULL_TREE,
5575 true, GSI_SAME_STMT);
5576 ass = gimple_build_assign (v, expr);
5577 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
5578 }
5579
5580 /* Remove the OMP_RETURN. */
5581 gsi_remove (&gsi, true);
5582
5583 if (cont_bb)
5584 {
5585 /* We now have one or two nested loops. Update the loop
5586 structures. */
5587 struct loop *parent = entry_bb->loop_father;
5588 struct loop *body = body_bb->loop_father;
5589
5590 if (chunking)
5591 {
5592 struct loop *chunk_loop = alloc_loop ();
5593 chunk_loop->header = head_bb;
5594 chunk_loop->latch = bottom_bb;
5595 add_loop (chunk_loop, parent);
5596 parent = chunk_loop;
5597 }
5598 else if (parent != body)
5599 {
5600 gcc_assert (body->header == body_bb);
5601 gcc_assert (body->latch == cont_bb
5602 || single_pred (body->latch) == cont_bb);
5603 parent = NULL;
5604 }
5605
5606 if (parent)
5607 {
5608 struct loop *body_loop = alloc_loop ();
5609 body_loop->header = body_bb;
5610 body_loop->latch = cont_bb;
5611 add_loop (body_loop, parent);
5612 }
5613 }
5614 }
5615
5616 /* Expand the OMP loop defined by REGION. */
5617
5618 static void
5619 expand_omp_for (struct omp_region *region, gimple *inner_stmt)
5620 {
5621 struct omp_for_data fd;
5622 struct omp_for_data_loop *loops;
5623
5624 loops
5625 = (struct omp_for_data_loop *)
5626 alloca (gimple_omp_for_collapse (last_stmt (region->entry))
5627 * sizeof (struct omp_for_data_loop));
5628 omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
5629 &fd, loops);
5630 region->sched_kind = fd.sched_kind;
5631 region->sched_modifiers = fd.sched_modifiers;
5632
5633 gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
5634 BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5635 FALLTHRU_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
5636 if (region->cont)
5637 {
5638 gcc_assert (EDGE_COUNT (region->cont->succs) == 2);
5639 BRANCH_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5640 FALLTHRU_EDGE (region->cont)->flags &= ~EDGE_ABNORMAL;
5641 }
5642 else
5643 /* If there isn't a continue then this is a degerate case where
5644 the introduction of abnormal edges during lowering will prevent
5645 original loops from being detected. Fix that up. */
5646 loops_state_set (LOOPS_NEED_FIXUP);
5647
5648 if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
5649 expand_omp_simd (region, &fd);
5650 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_CILKFOR)
5651 expand_cilk_for (region, &fd);
5652 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
5653 {
5654 gcc_assert (!inner_stmt);
5655 expand_oacc_for (region, &fd);
5656 }
5657 else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
5658 {
5659 if (gimple_omp_for_combined_into_p (fd.for_stmt))
5660 expand_omp_taskloop_for_inner (region, &fd, inner_stmt);
5661 else
5662 expand_omp_taskloop_for_outer (region, &fd, inner_stmt);
5663 }
5664 else if (fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
5665 && !fd.have_ordered)
5666 {
5667 if (fd.chunk_size == NULL)
5668 expand_omp_for_static_nochunk (region, &fd, inner_stmt);
5669 else
5670 expand_omp_for_static_chunk (region, &fd, inner_stmt);
5671 }
5672 else
5673 {
5674 int fn_index, start_ix, next_ix;
5675
5676 gcc_assert (gimple_omp_for_kind (fd.for_stmt)
5677 == GF_OMP_FOR_KIND_FOR);
5678 if (fd.chunk_size == NULL
5679 && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
5680 fd.chunk_size = integer_zero_node;
5681 gcc_assert (fd.sched_kind != OMP_CLAUSE_SCHEDULE_AUTO);
5682 switch (fd.sched_kind)
5683 {
5684 case OMP_CLAUSE_SCHEDULE_RUNTIME:
5685 fn_index = 3;
5686 break;
5687 case OMP_CLAUSE_SCHEDULE_DYNAMIC:
5688 case OMP_CLAUSE_SCHEDULE_GUIDED:
5689 if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC)
5690 && !fd.ordered
5691 && !fd.have_ordered)
5692 {
5693 fn_index = 3 + fd.sched_kind;
5694 break;
5695 }
5696 /* FALLTHRU */
5697 default:
5698 fn_index = fd.sched_kind;
5699 break;
5700 }
5701 if (!fd.ordered)
5702 fn_index += fd.have_ordered * 6;
5703 if (fd.ordered)
5704 start_ix = ((int)BUILT_IN_GOMP_LOOP_DOACROSS_STATIC_START) + fn_index;
5705 else
5706 start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
5707 next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
5708 if (fd.iter_type == long_long_unsigned_type_node)
5709 {
5710 start_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_START
5711 - (int)BUILT_IN_GOMP_LOOP_STATIC_START);
5712 next_ix += ((int)BUILT_IN_GOMP_LOOP_ULL_STATIC_NEXT
5713 - (int)BUILT_IN_GOMP_LOOP_STATIC_NEXT);
5714 }
5715 expand_omp_for_generic (region, &fd, (enum built_in_function) start_ix,
5716 (enum built_in_function) next_ix, inner_stmt);
5717 }
5718
5719 if (gimple_in_ssa_p (cfun))
5720 update_ssa (TODO_update_ssa_only_virtuals);
5721 }
5722
5723 /* Expand code for an OpenMP sections directive. In pseudo code, we generate
5724
5725 v = GOMP_sections_start (n);
5726 L0:
5727 switch (v)
5728 {
5729 case 0:
5730 goto L2;
5731 case 1:
5732 section 1;
5733 goto L1;
5734 case 2:
5735 ...
5736 case n:
5737 ...
5738 default:
5739 abort ();
5740 }
5741 L1:
5742 v = GOMP_sections_next ();
5743 goto L0;
5744 L2:
5745 reduction;
5746
5747 If this is a combined parallel sections, replace the call to
5748 GOMP_sections_start with call to GOMP_sections_next. */
5749
5750 static void
5751 expand_omp_sections (struct omp_region *region)
5752 {
5753 tree t, u, vin = NULL, vmain, vnext, l2;
5754 unsigned len;
5755 basic_block entry_bb, l0_bb, l1_bb, l2_bb, default_bb;
5756 gimple_stmt_iterator si, switch_si;
5757 gomp_sections *sections_stmt;
5758 gimple *stmt;
5759 gomp_continue *cont;
5760 edge_iterator ei;
5761 edge e;
5762 struct omp_region *inner;
5763 unsigned i, casei;
5764 bool exit_reachable = region->cont != NULL;
5765
5766 gcc_assert (region->exit != NULL);
5767 entry_bb = region->entry;
5768 l0_bb = single_succ (entry_bb);
5769 l1_bb = region->cont;
5770 l2_bb = region->exit;
5771 if (single_pred_p (l2_bb) && single_pred (l2_bb) == l0_bb)
5772 l2 = gimple_block_label (l2_bb);
5773 else
5774 {
5775 /* This can happen if there are reductions. */
5776 len = EDGE_COUNT (l0_bb->succs);
5777 gcc_assert (len > 0);
5778 e = EDGE_SUCC (l0_bb, len - 1);
5779 si = gsi_last_bb (e->dest);
5780 l2 = NULL_TREE;
5781 if (gsi_end_p (si)
5782 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5783 l2 = gimple_block_label (e->dest);
5784 else
5785 FOR_EACH_EDGE (e, ei, l0_bb->succs)
5786 {
5787 si = gsi_last_bb (e->dest);
5788 if (gsi_end_p (si)
5789 || gimple_code (gsi_stmt (si)) != GIMPLE_OMP_SECTION)
5790 {
5791 l2 = gimple_block_label (e->dest);
5792 break;
5793 }
5794 }
5795 }
5796 if (exit_reachable)
5797 default_bb = create_empty_bb (l1_bb->prev_bb);
5798 else
5799 default_bb = create_empty_bb (l0_bb);
5800
5801 /* We will build a switch() with enough cases for all the
5802 GIMPLE_OMP_SECTION regions, a '0' case to handle the end of more work
5803 and a default case to abort if something goes wrong. */
5804 len = EDGE_COUNT (l0_bb->succs);
5805
5806 /* Use vec::quick_push on label_vec throughout, since we know the size
5807 in advance. */
5808 auto_vec<tree> label_vec (len);
5809
5810 /* The call to GOMP_sections_start goes in ENTRY_BB, replacing the
5811 GIMPLE_OMP_SECTIONS statement. */
5812 si = gsi_last_bb (entry_bb);
5813 sections_stmt = as_a <gomp_sections *> (gsi_stmt (si));
5814 gcc_assert (gimple_code (sections_stmt) == GIMPLE_OMP_SECTIONS);
5815 vin = gimple_omp_sections_control (sections_stmt);
5816 if (!is_combined_parallel (region))
5817 {
5818 /* If we are not inside a combined parallel+sections region,
5819 call GOMP_sections_start. */
5820 t = build_int_cst (unsigned_type_node, len - 1);
5821 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_START);
5822 stmt = gimple_build_call (u, 1, t);
5823 }
5824 else
5825 {
5826 /* Otherwise, call GOMP_sections_next. */
5827 u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5828 stmt = gimple_build_call (u, 0);
5829 }
5830 gimple_call_set_lhs (stmt, vin);
5831 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5832 gsi_remove (&si, true);
5833
5834 /* The switch() statement replacing GIMPLE_OMP_SECTIONS_SWITCH goes in
5835 L0_BB. */
5836 switch_si = gsi_last_bb (l0_bb);
5837 gcc_assert (gimple_code (gsi_stmt (switch_si)) == GIMPLE_OMP_SECTIONS_SWITCH);
5838 if (exit_reachable)
5839 {
5840 cont = as_a <gomp_continue *> (last_stmt (l1_bb));
5841 gcc_assert (gimple_code (cont) == GIMPLE_OMP_CONTINUE);
5842 vmain = gimple_omp_continue_control_use (cont);
5843 vnext = gimple_omp_continue_control_def (cont);
5844 }
5845 else
5846 {
5847 vmain = vin;
5848 vnext = NULL_TREE;
5849 }
5850
5851 t = build_case_label (build_int_cst (unsigned_type_node, 0), NULL, l2);
5852 label_vec.quick_push (t);
5853 i = 1;
5854
5855 /* Convert each GIMPLE_OMP_SECTION into a CASE_LABEL_EXPR. */
5856 for (inner = region->inner, casei = 1;
5857 inner;
5858 inner = inner->next, i++, casei++)
5859 {
5860 basic_block s_entry_bb, s_exit_bb;
5861
5862 /* Skip optional reduction region. */
5863 if (inner->type == GIMPLE_OMP_ATOMIC_LOAD)
5864 {
5865 --i;
5866 --casei;
5867 continue;
5868 }
5869
5870 s_entry_bb = inner->entry;
5871 s_exit_bb = inner->exit;
5872
5873 t = gimple_block_label (s_entry_bb);
5874 u = build_int_cst (unsigned_type_node, casei);
5875 u = build_case_label (u, NULL, t);
5876 label_vec.quick_push (u);
5877
5878 si = gsi_last_bb (s_entry_bb);
5879 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SECTION);
5880 gcc_assert (i < len || gimple_omp_section_last_p (gsi_stmt (si)));
5881 gsi_remove (&si, true);
5882 single_succ_edge (s_entry_bb)->flags = EDGE_FALLTHRU;
5883
5884 if (s_exit_bb == NULL)
5885 continue;
5886
5887 si = gsi_last_bb (s_exit_bb);
5888 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5889 gsi_remove (&si, true);
5890
5891 single_succ_edge (s_exit_bb)->flags = EDGE_FALLTHRU;
5892 }
5893
5894 /* Error handling code goes in DEFAULT_BB. */
5895 t = gimple_block_label (default_bb);
5896 u = build_case_label (NULL, NULL, t);
5897 make_edge (l0_bb, default_bb, 0);
5898 add_bb_to_loop (default_bb, current_loops->tree_root);
5899
5900 stmt = gimple_build_switch (vmain, u, label_vec);
5901 gsi_insert_after (&switch_si, stmt, GSI_SAME_STMT);
5902 gsi_remove (&switch_si, true);
5903
5904 si = gsi_start_bb (default_bb);
5905 stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_TRAP), 0);
5906 gsi_insert_after (&si, stmt, GSI_CONTINUE_LINKING);
5907
5908 if (exit_reachable)
5909 {
5910 tree bfn_decl;
5911
5912 /* Code to get the next section goes in L1_BB. */
5913 si = gsi_last_bb (l1_bb);
5914 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CONTINUE);
5915
5916 bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
5917 stmt = gimple_build_call (bfn_decl, 0);
5918 gimple_call_set_lhs (stmt, vnext);
5919 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5920 gsi_remove (&si, true);
5921
5922 single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
5923 }
5924
5925 /* Cleanup function replaces GIMPLE_OMP_RETURN in EXIT_BB. */
5926 si = gsi_last_bb (l2_bb);
5927 if (gimple_omp_return_nowait_p (gsi_stmt (si)))
5928 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_NOWAIT);
5929 else if (gimple_omp_return_lhs (gsi_stmt (si)))
5930 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END_CANCEL);
5931 else
5932 t = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_END);
5933 stmt = gimple_build_call (t, 0);
5934 if (gimple_omp_return_lhs (gsi_stmt (si)))
5935 gimple_call_set_lhs (stmt, gimple_omp_return_lhs (gsi_stmt (si)));
5936 gsi_insert_after (&si, stmt, GSI_SAME_STMT);
5937 gsi_remove (&si, true);
5938
5939 set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
5940 }
5941
5942 /* Expand code for an OpenMP single directive. We've already expanded
5943 much of the code, here we simply place the GOMP_barrier call. */
5944
5945 static void
5946 expand_omp_single (struct omp_region *region)
5947 {
5948 basic_block entry_bb, exit_bb;
5949 gimple_stmt_iterator si;
5950
5951 entry_bb = region->entry;
5952 exit_bb = region->exit;
5953
5954 si = gsi_last_bb (entry_bb);
5955 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
5956 gsi_remove (&si, true);
5957 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5958
5959 si = gsi_last_bb (exit_bb);
5960 if (!gimple_omp_return_nowait_p (gsi_stmt (si)))
5961 {
5962 tree t = gimple_omp_return_lhs (gsi_stmt (si));
5963 gsi_insert_after (&si, omp_build_barrier (t), GSI_SAME_STMT);
5964 }
5965 gsi_remove (&si, true);
5966 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5967 }
5968
5969 /* Generic expansion for OpenMP synchronization directives: master,
5970 ordered and critical. All we need to do here is remove the entry
5971 and exit markers for REGION. */
5972
5973 static void
5974 expand_omp_synch (struct omp_region *region)
5975 {
5976 basic_block entry_bb, exit_bb;
5977 gimple_stmt_iterator si;
5978
5979 entry_bb = region->entry;
5980 exit_bb = region->exit;
5981
5982 si = gsi_last_bb (entry_bb);
5983 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
5984 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
5985 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
5986 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
5987 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
5988 || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TEAMS);
5989 gsi_remove (&si, true);
5990 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
5991
5992 if (exit_bb)
5993 {
5994 si = gsi_last_bb (exit_bb);
5995 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_RETURN);
5996 gsi_remove (&si, true);
5997 single_succ_edge (exit_bb)->flags = EDGE_FALLTHRU;
5998 }
5999 }
6000
6001 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6002 operation as a normal volatile load. */
6003
6004 static bool
6005 expand_omp_atomic_load (basic_block load_bb, tree addr,
6006 tree loaded_val, int index)
6007 {
6008 enum built_in_function tmpbase;
6009 gimple_stmt_iterator gsi;
6010 basic_block store_bb;
6011 location_t loc;
6012 gimple *stmt;
6013 tree decl, call, type, itype;
6014
6015 gsi = gsi_last_bb (load_bb);
6016 stmt = gsi_stmt (gsi);
6017 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6018 loc = gimple_location (stmt);
6019
6020 /* ??? If the target does not implement atomic_load_optab[mode], and mode
6021 is smaller than word size, then expand_atomic_load assumes that the load
6022 is atomic. We could avoid the builtin entirely in this case. */
6023
6024 tmpbase = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6025 decl = builtin_decl_explicit (tmpbase);
6026 if (decl == NULL_TREE)
6027 return false;
6028
6029 type = TREE_TYPE (loaded_val);
6030 itype = TREE_TYPE (TREE_TYPE (decl));
6031
6032 call = build_call_expr_loc (loc, decl, 2, addr,
6033 build_int_cst (NULL,
6034 gimple_omp_atomic_seq_cst_p (stmt)
6035 ? MEMMODEL_SEQ_CST
6036 : MEMMODEL_RELAXED));
6037 if (!useless_type_conversion_p (type, itype))
6038 call = fold_build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6039 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6040
6041 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6042 gsi_remove (&gsi, true);
6043
6044 store_bb = single_succ (load_bb);
6045 gsi = gsi_last_bb (store_bb);
6046 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6047 gsi_remove (&gsi, true);
6048
6049 if (gimple_in_ssa_p (cfun))
6050 update_ssa (TODO_update_ssa_no_phi);
6051
6052 return true;
6053 }
6054
6055 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6056 operation as a normal volatile store. */
6057
6058 static bool
6059 expand_omp_atomic_store (basic_block load_bb, tree addr,
6060 tree loaded_val, tree stored_val, int index)
6061 {
6062 enum built_in_function tmpbase;
6063 gimple_stmt_iterator gsi;
6064 basic_block store_bb = single_succ (load_bb);
6065 location_t loc;
6066 gimple *stmt;
6067 tree decl, call, type, itype;
6068 machine_mode imode;
6069 bool exchange;
6070
6071 gsi = gsi_last_bb (load_bb);
6072 stmt = gsi_stmt (gsi);
6073 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_LOAD);
6074
6075 /* If the load value is needed, then this isn't a store but an exchange. */
6076 exchange = gimple_omp_atomic_need_value_p (stmt);
6077
6078 gsi = gsi_last_bb (store_bb);
6079 stmt = gsi_stmt (gsi);
6080 gcc_assert (gimple_code (stmt) == GIMPLE_OMP_ATOMIC_STORE);
6081 loc = gimple_location (stmt);
6082
6083 /* ??? If the target does not implement atomic_store_optab[mode], and mode
6084 is smaller than word size, then expand_atomic_store assumes that the store
6085 is atomic. We could avoid the builtin entirely in this case. */
6086
6087 tmpbase = (exchange ? BUILT_IN_ATOMIC_EXCHANGE_N : BUILT_IN_ATOMIC_STORE_N);
6088 tmpbase = (enum built_in_function) ((int) tmpbase + index + 1);
6089 decl = builtin_decl_explicit (tmpbase);
6090 if (decl == NULL_TREE)
6091 return false;
6092
6093 type = TREE_TYPE (stored_val);
6094
6095 /* Dig out the type of the function's second argument. */
6096 itype = TREE_TYPE (decl);
6097 itype = TYPE_ARG_TYPES (itype);
6098 itype = TREE_CHAIN (itype);
6099 itype = TREE_VALUE (itype);
6100 imode = TYPE_MODE (itype);
6101
6102 if (exchange && !can_atomic_exchange_p (imode, true))
6103 return false;
6104
6105 if (!useless_type_conversion_p (itype, type))
6106 stored_val = fold_build1_loc (loc, VIEW_CONVERT_EXPR, itype, stored_val);
6107 call = build_call_expr_loc (loc, decl, 3, addr, stored_val,
6108 build_int_cst (NULL,
6109 gimple_omp_atomic_seq_cst_p (stmt)
6110 ? MEMMODEL_SEQ_CST
6111 : MEMMODEL_RELAXED));
6112 if (exchange)
6113 {
6114 if (!useless_type_conversion_p (type, itype))
6115 call = build1_loc (loc, VIEW_CONVERT_EXPR, type, call);
6116 call = build2_loc (loc, MODIFY_EXPR, void_type_node, loaded_val, call);
6117 }
6118
6119 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6120 gsi_remove (&gsi, true);
6121
6122 /* Remove the GIMPLE_OMP_ATOMIC_LOAD that we verified above. */
6123 gsi = gsi_last_bb (load_bb);
6124 gsi_remove (&gsi, true);
6125
6126 if (gimple_in_ssa_p (cfun))
6127 update_ssa (TODO_update_ssa_no_phi);
6128
6129 return true;
6130 }
6131
6132 /* A subroutine of expand_omp_atomic. Attempt to implement the atomic
6133 operation as a __atomic_fetch_op builtin. INDEX is log2 of the
6134 size of the data type, and thus usable to find the index of the builtin
6135 decl. Returns false if the expression is not of the proper form. */
6136
6137 static bool
6138 expand_omp_atomic_fetch_op (basic_block load_bb,
6139 tree addr, tree loaded_val,
6140 tree stored_val, int index)
6141 {
6142 enum built_in_function oldbase, newbase, tmpbase;
6143 tree decl, itype, call;
6144 tree lhs, rhs;
6145 basic_block store_bb = single_succ (load_bb);
6146 gimple_stmt_iterator gsi;
6147 gimple *stmt;
6148 location_t loc;
6149 enum tree_code code;
6150 bool need_old, need_new;
6151 machine_mode imode;
6152 bool seq_cst;
6153
6154 /* We expect to find the following sequences:
6155
6156 load_bb:
6157 GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
6158
6159 store_bb:
6160 val = tmp OP something; (or: something OP tmp)
6161 GIMPLE_OMP_STORE (val)
6162
6163 ???FIXME: Allow a more flexible sequence.
6164 Perhaps use data flow to pick the statements.
6165
6166 */
6167
6168 gsi = gsi_after_labels (store_bb);
6169 stmt = gsi_stmt (gsi);
6170 loc = gimple_location (stmt);
6171 if (!is_gimple_assign (stmt))
6172 return false;
6173 gsi_next (&gsi);
6174 if (gimple_code (gsi_stmt (gsi)) != GIMPLE_OMP_ATOMIC_STORE)
6175 return false;
6176 need_new = gimple_omp_atomic_need_value_p (gsi_stmt (gsi));
6177 need_old = gimple_omp_atomic_need_value_p (last_stmt (load_bb));
6178 seq_cst = gimple_omp_atomic_seq_cst_p (last_stmt (load_bb));
6179 gcc_checking_assert (!need_old || !need_new);
6180
6181 if (!operand_equal_p (gimple_assign_lhs (stmt), stored_val, 0))
6182 return false;
6183
6184 /* Check for one of the supported fetch-op operations. */
6185 code = gimple_assign_rhs_code (stmt);
6186 switch (code)
6187 {
6188 case PLUS_EXPR:
6189 case POINTER_PLUS_EXPR:
6190 oldbase = BUILT_IN_ATOMIC_FETCH_ADD_N;
6191 newbase = BUILT_IN_ATOMIC_ADD_FETCH_N;
6192 break;
6193 case MINUS_EXPR:
6194 oldbase = BUILT_IN_ATOMIC_FETCH_SUB_N;
6195 newbase = BUILT_IN_ATOMIC_SUB_FETCH_N;
6196 break;
6197 case BIT_AND_EXPR:
6198 oldbase = BUILT_IN_ATOMIC_FETCH_AND_N;
6199 newbase = BUILT_IN_ATOMIC_AND_FETCH_N;
6200 break;
6201 case BIT_IOR_EXPR:
6202 oldbase = BUILT_IN_ATOMIC_FETCH_OR_N;
6203 newbase = BUILT_IN_ATOMIC_OR_FETCH_N;
6204 break;
6205 case BIT_XOR_EXPR:
6206 oldbase = BUILT_IN_ATOMIC_FETCH_XOR_N;
6207 newbase = BUILT_IN_ATOMIC_XOR_FETCH_N;
6208 break;
6209 default:
6210 return false;
6211 }
6212
6213 /* Make sure the expression is of the proper form. */
6214 if (operand_equal_p (gimple_assign_rhs1 (stmt), loaded_val, 0))
6215 rhs = gimple_assign_rhs2 (stmt);
6216 else if (commutative_tree_code (gimple_assign_rhs_code (stmt))
6217 && operand_equal_p (gimple_assign_rhs2 (stmt), loaded_val, 0))
6218 rhs = gimple_assign_rhs1 (stmt);
6219 else
6220 return false;
6221
6222 tmpbase = ((enum built_in_function)
6223 ((need_new ? newbase : oldbase) + index + 1));
6224 decl = builtin_decl_explicit (tmpbase);
6225 if (decl == NULL_TREE)
6226 return false;
6227 itype = TREE_TYPE (TREE_TYPE (decl));
6228 imode = TYPE_MODE (itype);
6229
6230 /* We could test all of the various optabs involved, but the fact of the
6231 matter is that (with the exception of i486 vs i586 and xadd) all targets
6232 that support any atomic operaton optab also implements compare-and-swap.
6233 Let optabs.c take care of expanding any compare-and-swap loop. */
6234 if (!can_compare_and_swap_p (imode, true))
6235 return false;
6236
6237 gsi = gsi_last_bb (load_bb);
6238 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
6239
6240 /* OpenMP does not imply any barrier-like semantics on its atomic ops.
6241 It only requires that the operation happen atomically. Thus we can
6242 use the RELAXED memory model. */
6243 call = build_call_expr_loc (loc, decl, 3, addr,
6244 fold_convert_loc (loc, itype, rhs),
6245 build_int_cst (NULL,
6246 seq_cst ? MEMMODEL_SEQ_CST
6247 : MEMMODEL_RELAXED));
6248
6249 if (need_old || need_new)
6250 {
6251 lhs = need_old ? loaded_val : stored_val;
6252 call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
6253 call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
6254 }
6255 else
6256 call = fold_convert_loc (loc, void_type_node, call);
6257 force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
6258 gsi_remove (&gsi, true);
6259
6260 gsi = gsi_last_bb (store_bb);
6261 gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
6262 gsi_remove (&gsi, true);
6263 gsi = gsi_last_bb (store_bb);
6264 stmt = gsi_stmt (gsi);
6265 gsi_remove (&gsi, true);
6266
6267 if (gimple_in_ssa_p (cfun))
6268 {
6269 release_defs (stmt);
6270 update_ssa (TODO_update_ssa_no_phi);
6271 }
6272
6273 return true;
6274 }
6275
6276 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6277
6278 oldval = *addr;
6279 repeat:
6280 newval = rhs; // with oldval replacing *addr in rhs
6281 oldval = __sync_val_compare_and_swap (addr, oldval, newval);
6282 if (oldval != newval)
6283 goto repeat;
6284
6285 INDEX is log2 of the size of the data type, and thus usable to find the
6286 index of the builtin decl. */
6287
6288 static bool
6289 expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
6290 tree addr, tree loaded_val, tree stored_val,
6291 int index)
6292 {
6293 tree loadedi, storedi, initial, new_storedi, old_vali;
6294 tree type, itype, cmpxchg, iaddr;
6295 gimple_stmt_iterator si;
6296 basic_block loop_header = single_succ (load_bb);
6297 gimple *phi, *stmt;
6298 edge e;
6299 enum built_in_function fncode;
6300
6301 /* ??? We need a non-pointer interface to __atomic_compare_exchange in
6302 order to use the RELAXED memory model effectively. */
6303 fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
6304 + index + 1);
6305 cmpxchg = builtin_decl_explicit (fncode);
6306 if (cmpxchg == NULL_TREE)
6307 return false;
6308 type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6309 itype = TREE_TYPE (TREE_TYPE (cmpxchg));
6310
6311 if (!can_compare_and_swap_p (TYPE_MODE (itype), true))
6312 return false;
6313
6314 /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD. */
6315 si = gsi_last_bb (load_bb);
6316 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6317
6318 /* For floating-point values, we'll need to view-convert them to integers
6319 so that we can perform the atomic compare and swap. Simplify the
6320 following code by always setting up the "i"ntegral variables. */
6321 if (!INTEGRAL_TYPE_P (type) && !POINTER_TYPE_P (type))
6322 {
6323 tree iaddr_val;
6324
6325 iaddr = create_tmp_reg (build_pointer_type_for_mode (itype, ptr_mode,
6326 true));
6327 iaddr_val
6328 = force_gimple_operand_gsi (&si,
6329 fold_convert (TREE_TYPE (iaddr), addr),
6330 false, NULL_TREE, true, GSI_SAME_STMT);
6331 stmt = gimple_build_assign (iaddr, iaddr_val);
6332 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6333 loadedi = create_tmp_var (itype);
6334 if (gimple_in_ssa_p (cfun))
6335 loadedi = make_ssa_name (loadedi);
6336 }
6337 else
6338 {
6339 iaddr = addr;
6340 loadedi = loaded_val;
6341 }
6342
6343 fncode = (enum built_in_function) (BUILT_IN_ATOMIC_LOAD_N + index + 1);
6344 tree loaddecl = builtin_decl_explicit (fncode);
6345 if (loaddecl)
6346 initial
6347 = fold_convert (TREE_TYPE (TREE_TYPE (iaddr)),
6348 build_call_expr (loaddecl, 2, iaddr,
6349 build_int_cst (NULL_TREE,
6350 MEMMODEL_RELAXED)));
6351 else
6352 initial = build2 (MEM_REF, TREE_TYPE (TREE_TYPE (iaddr)), iaddr,
6353 build_int_cst (TREE_TYPE (iaddr), 0));
6354
6355 initial
6356 = force_gimple_operand_gsi (&si, initial, true, NULL_TREE, true,
6357 GSI_SAME_STMT);
6358
6359 /* Move the value to the LOADEDI temporary. */
6360 if (gimple_in_ssa_p (cfun))
6361 {
6362 gcc_assert (gimple_seq_empty_p (phi_nodes (loop_header)));
6363 phi = create_phi_node (loadedi, loop_header);
6364 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, single_succ_edge (load_bb)),
6365 initial);
6366 }
6367 else
6368 gsi_insert_before (&si,
6369 gimple_build_assign (loadedi, initial),
6370 GSI_SAME_STMT);
6371 if (loadedi != loaded_val)
6372 {
6373 gimple_stmt_iterator gsi2;
6374 tree x;
6375
6376 x = build1 (VIEW_CONVERT_EXPR, type, loadedi);
6377 gsi2 = gsi_start_bb (loop_header);
6378 if (gimple_in_ssa_p (cfun))
6379 {
6380 gassign *stmt;
6381 x = force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6382 true, GSI_SAME_STMT);
6383 stmt = gimple_build_assign (loaded_val, x);
6384 gsi_insert_before (&gsi2, stmt, GSI_SAME_STMT);
6385 }
6386 else
6387 {
6388 x = build2 (MODIFY_EXPR, TREE_TYPE (loaded_val), loaded_val, x);
6389 force_gimple_operand_gsi (&gsi2, x, true, NULL_TREE,
6390 true, GSI_SAME_STMT);
6391 }
6392 }
6393 gsi_remove (&si, true);
6394
6395 si = gsi_last_bb (store_bb);
6396 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6397
6398 if (iaddr == addr)
6399 storedi = stored_val;
6400 else
6401 storedi =
6402 force_gimple_operand_gsi (&si,
6403 build1 (VIEW_CONVERT_EXPR, itype,
6404 stored_val), true, NULL_TREE, true,
6405 GSI_SAME_STMT);
6406
6407 /* Build the compare&swap statement. */
6408 new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
6409 new_storedi = force_gimple_operand_gsi (&si,
6410 fold_convert (TREE_TYPE (loadedi),
6411 new_storedi),
6412 true, NULL_TREE,
6413 true, GSI_SAME_STMT);
6414
6415 if (gimple_in_ssa_p (cfun))
6416 old_vali = loadedi;
6417 else
6418 {
6419 old_vali = create_tmp_var (TREE_TYPE (loadedi));
6420 stmt = gimple_build_assign (old_vali, loadedi);
6421 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6422
6423 stmt = gimple_build_assign (loadedi, new_storedi);
6424 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6425 }
6426
6427 /* Note that we always perform the comparison as an integer, even for
6428 floating point. This allows the atomic operation to properly
6429 succeed even with NaNs and -0.0. */
6430 stmt = gimple_build_cond_empty
6431 (build2 (NE_EXPR, boolean_type_node,
6432 new_storedi, old_vali));
6433 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6434
6435 /* Update cfg. */
6436 e = single_succ_edge (store_bb);
6437 e->flags &= ~EDGE_FALLTHRU;
6438 e->flags |= EDGE_FALSE_VALUE;
6439
6440 e = make_edge (store_bb, loop_header, EDGE_TRUE_VALUE);
6441
6442 /* Copy the new value to loadedi (we already did that before the condition
6443 if we are not in SSA). */
6444 if (gimple_in_ssa_p (cfun))
6445 {
6446 phi = gimple_seq_first_stmt (phi_nodes (loop_header));
6447 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e), new_storedi);
6448 }
6449
6450 /* Remove GIMPLE_OMP_ATOMIC_STORE. */
6451 gsi_remove (&si, true);
6452
6453 struct loop *loop = alloc_loop ();
6454 loop->header = loop_header;
6455 loop->latch = store_bb;
6456 add_loop (loop, loop_header->loop_father);
6457
6458 if (gimple_in_ssa_p (cfun))
6459 update_ssa (TODO_update_ssa_no_phi);
6460
6461 return true;
6462 }
6463
6464 /* A subroutine of expand_omp_atomic. Implement the atomic operation as:
6465
6466 GOMP_atomic_start ();
6467 *addr = rhs;
6468 GOMP_atomic_end ();
6469
6470 The result is not globally atomic, but works so long as all parallel
6471 references are within #pragma omp atomic directives. According to
6472 responses received from omp@openmp.org, appears to be within spec.
6473 Which makes sense, since that's how several other compilers handle
6474 this situation as well.
6475 LOADED_VAL and ADDR are the operands of GIMPLE_OMP_ATOMIC_LOAD we're
6476 expanding. STORED_VAL is the operand of the matching
6477 GIMPLE_OMP_ATOMIC_STORE.
6478
6479 We replace
6480 GIMPLE_OMP_ATOMIC_LOAD (loaded_val, addr) with
6481 loaded_val = *addr;
6482
6483 and replace
6484 GIMPLE_OMP_ATOMIC_STORE (stored_val) with
6485 *addr = stored_val;
6486 */
6487
6488 static bool
6489 expand_omp_atomic_mutex (basic_block load_bb, basic_block store_bb,
6490 tree addr, tree loaded_val, tree stored_val)
6491 {
6492 gimple_stmt_iterator si;
6493 gassign *stmt;
6494 tree t;
6495
6496 si = gsi_last_bb (load_bb);
6497 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
6498
6499 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_START);
6500 t = build_call_expr (t, 0);
6501 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6502
6503 stmt = gimple_build_assign (loaded_val, build_simple_mem_ref (addr));
6504 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6505 gsi_remove (&si, true);
6506
6507 si = gsi_last_bb (store_bb);
6508 gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_STORE);
6509
6510 stmt = gimple_build_assign (build_simple_mem_ref (unshare_expr (addr)),
6511 stored_val);
6512 gsi_insert_before (&si, stmt, GSI_SAME_STMT);
6513
6514 t = builtin_decl_explicit (BUILT_IN_GOMP_ATOMIC_END);
6515 t = build_call_expr (t, 0);
6516 force_gimple_operand_gsi (&si, t, true, NULL_TREE, true, GSI_SAME_STMT);
6517 gsi_remove (&si, true);
6518
6519 if (gimple_in_ssa_p (cfun))
6520 update_ssa (TODO_update_ssa_no_phi);
6521 return true;
6522 }
6523
6524 /* Expand an GIMPLE_OMP_ATOMIC statement. We try to expand
6525 using expand_omp_atomic_fetch_op. If it failed, we try to
6526 call expand_omp_atomic_pipeline, and if it fails too, the
6527 ultimate fallback is wrapping the operation in a mutex
6528 (expand_omp_atomic_mutex). REGION is the atomic region built
6529 by build_omp_regions_1(). */
6530
6531 static void
6532 expand_omp_atomic (struct omp_region *region)
6533 {
6534 basic_block load_bb = region->entry, store_bb = region->exit;
6535 gomp_atomic_load *load = as_a <gomp_atomic_load *> (last_stmt (load_bb));
6536 gomp_atomic_store *store = as_a <gomp_atomic_store *> (last_stmt (store_bb));
6537 tree loaded_val = gimple_omp_atomic_load_lhs (load);
6538 tree addr = gimple_omp_atomic_load_rhs (load);
6539 tree stored_val = gimple_omp_atomic_store_val (store);
6540 tree type = TYPE_MAIN_VARIANT (TREE_TYPE (TREE_TYPE (addr)));
6541 HOST_WIDE_INT index;
6542
6543 /* Make sure the type is one of the supported sizes. */
6544 index = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6545 index = exact_log2 (index);
6546 if (index >= 0 && index <= 4)
6547 {
6548 unsigned int align = TYPE_ALIGN_UNIT (type);
6549
6550 /* __sync builtins require strict data alignment. */
6551 if (exact_log2 (align) >= index)
6552 {
6553 /* Atomic load. */
6554 if (loaded_val == stored_val
6555 && (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6556 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6557 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6558 && expand_omp_atomic_load (load_bb, addr, loaded_val, index))
6559 return;
6560
6561 /* Atomic store. */
6562 if ((GET_MODE_CLASS (TYPE_MODE (type)) == MODE_INT
6563 || GET_MODE_CLASS (TYPE_MODE (type)) == MODE_FLOAT)
6564 && GET_MODE_BITSIZE (TYPE_MODE (type)) <= BITS_PER_WORD
6565 && store_bb == single_succ (load_bb)
6566 && first_stmt (store_bb) == store
6567 && expand_omp_atomic_store (load_bb, addr, loaded_val,
6568 stored_val, index))
6569 return;
6570
6571 /* When possible, use specialized atomic update functions. */
6572 if ((INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type))
6573 && store_bb == single_succ (load_bb)
6574 && expand_omp_atomic_fetch_op (load_bb, addr,
6575 loaded_val, stored_val, index))
6576 return;
6577
6578 /* If we don't have specialized __sync builtins, try and implement
6579 as a compare and swap loop. */
6580 if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
6581 loaded_val, stored_val, index))
6582 return;
6583 }
6584 }
6585
6586 /* The ultimate fallback is wrapping the operation in a mutex. */
6587 expand_omp_atomic_mutex (load_bb, store_bb, addr, loaded_val, stored_val);
6588 }
6589
6590 /* Mark the loops inside the kernels region starting at REGION_ENTRY and ending
6591 at REGION_EXIT. */
6592
6593 static void
6594 mark_loops_in_oacc_kernels_region (basic_block region_entry,
6595 basic_block region_exit)
6596 {
6597 struct loop *outer = region_entry->loop_father;
6598 gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
6599
6600 /* Don't parallelize the kernels region if it contains more than one outer
6601 loop. */
6602 unsigned int nr_outer_loops = 0;
6603 struct loop *single_outer = NULL;
6604 for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
6605 {
6606 gcc_assert (loop_outer (loop) == outer);
6607
6608 if (!dominated_by_p (CDI_DOMINATORS, loop->header, region_entry))
6609 continue;
6610
6611 if (region_exit != NULL
6612 && dominated_by_p (CDI_DOMINATORS, loop->header, region_exit))
6613 continue;
6614
6615 nr_outer_loops++;
6616 single_outer = loop;
6617 }
6618 if (nr_outer_loops != 1)
6619 return;
6620
6621 for (struct loop *loop = single_outer->inner; loop != NULL; loop = loop->inner)
6622 if (loop->next)
6623 return;
6624
6625 /* Mark the loops in the region. */
6626 for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
6627 loop->in_oacc_kernels_region = true;
6628 }
6629
6630 /* Types used to pass grid and wortkgroup sizes to kernel invocation. */
6631
6632 struct GTY(()) grid_launch_attributes_trees
6633 {
6634 tree kernel_dim_array_type;
6635 tree kernel_lattrs_dimnum_decl;
6636 tree kernel_lattrs_grid_decl;
6637 tree kernel_lattrs_group_decl;
6638 tree kernel_launch_attributes_type;
6639 };
6640
6641 static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
6642
6643 /* Create types used to pass kernel launch attributes to target. */
6644
6645 static void
6646 grid_create_kernel_launch_attr_types (void)
6647 {
6648 if (grid_attr_trees)
6649 return;
6650 grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
6651
6652 tree dim_arr_index_type
6653 = build_index_type (build_int_cst (integer_type_node, 2));
6654 grid_attr_trees->kernel_dim_array_type
6655 = build_array_type (uint32_type_node, dim_arr_index_type);
6656
6657 grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
6658 grid_attr_trees->kernel_lattrs_dimnum_decl
6659 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
6660 uint32_type_node);
6661 DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
6662
6663 grid_attr_trees->kernel_lattrs_grid_decl
6664 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
6665 grid_attr_trees->kernel_dim_array_type);
6666 DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
6667 = grid_attr_trees->kernel_lattrs_dimnum_decl;
6668 grid_attr_trees->kernel_lattrs_group_decl
6669 = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
6670 grid_attr_trees->kernel_dim_array_type);
6671 DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
6672 = grid_attr_trees->kernel_lattrs_grid_decl;
6673 finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
6674 "__gomp_kernel_launch_attributes",
6675 grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
6676 }
6677
6678 /* Insert before the current statement in GSI a store of VALUE to INDEX of
6679 array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR. VALUE must be
6680 of type uint32_type_node. */
6681
6682 static void
6683 grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
6684 tree fld_decl, int index, tree value)
6685 {
6686 tree ref = build4 (ARRAY_REF, uint32_type_node,
6687 build3 (COMPONENT_REF,
6688 grid_attr_trees->kernel_dim_array_type,
6689 range_var, fld_decl, NULL_TREE),
6690 build_int_cst (integer_type_node, index),
6691 NULL_TREE, NULL_TREE);
6692 gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
6693 }
6694
6695 /* Return a tree representation of a pointer to a structure with grid and
6696 work-group size information. Statements filling that information will be
6697 inserted before GSI, TGT_STMT is the target statement which has the
6698 necessary information in it. */
6699
6700 static tree
6701 grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
6702 gomp_target *tgt_stmt)
6703 {
6704 grid_create_kernel_launch_attr_types ();
6705 tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
6706 "__kernel_launch_attrs");
6707
6708 unsigned max_dim = 0;
6709 for (tree clause = gimple_omp_target_clauses (tgt_stmt);
6710 clause;
6711 clause = OMP_CLAUSE_CHAIN (clause))
6712 {
6713 if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
6714 continue;
6715
6716 unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
6717 max_dim = MAX (dim, max_dim);
6718
6719 grid_insert_store_range_dim (gsi, lattrs,
6720 grid_attr_trees->kernel_lattrs_grid_decl,
6721 dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
6722 grid_insert_store_range_dim (gsi, lattrs,
6723 grid_attr_trees->kernel_lattrs_group_decl,
6724 dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
6725 }
6726
6727 tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
6728 grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
6729 gcc_checking_assert (max_dim <= 2);
6730 tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
6731 gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
6732 GSI_SAME_STMT);
6733 TREE_ADDRESSABLE (lattrs) = 1;
6734 return build_fold_addr_expr (lattrs);
6735 }
6736
6737 /* Build target argument identifier from the DEVICE identifier, value
6738 identifier ID and whether the element also has a SUBSEQUENT_PARAM. */
6739
6740 static tree
6741 get_target_argument_identifier_1 (int device, bool subseqent_param, int id)
6742 {
6743 tree t = build_int_cst (integer_type_node, device);
6744 if (subseqent_param)
6745 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6746 build_int_cst (integer_type_node,
6747 GOMP_TARGET_ARG_SUBSEQUENT_PARAM));
6748 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6749 build_int_cst (integer_type_node, id));
6750 return t;
6751 }
6752
6753 /* Like above but return it in type that can be directly stored as an element
6754 of the argument array. */
6755
6756 static tree
6757 get_target_argument_identifier (int device, bool subseqent_param, int id)
6758 {
6759 tree t = get_target_argument_identifier_1 (device, subseqent_param, id);
6760 return fold_convert (ptr_type_node, t);
6761 }
6762
6763 /* Return a target argument consisting of DEVICE identifier, value identifier
6764 ID, and the actual VALUE. */
6765
6766 static tree
6767 get_target_argument_value (gimple_stmt_iterator *gsi, int device, int id,
6768 tree value)
6769 {
6770 tree t = fold_build2 (LSHIFT_EXPR, integer_type_node,
6771 fold_convert (integer_type_node, value),
6772 build_int_cst (unsigned_type_node,
6773 GOMP_TARGET_ARG_VALUE_SHIFT));
6774 t = fold_build2 (BIT_IOR_EXPR, integer_type_node, t,
6775 get_target_argument_identifier_1 (device, false, id));
6776 t = fold_convert (ptr_type_node, t);
6777 return force_gimple_operand_gsi (gsi, t, true, NULL, true, GSI_SAME_STMT);
6778 }
6779
6780 /* If VALUE is an integer constant greater than -2^15 and smaller than 2^15,
6781 push one argument to ARGS with both the DEVICE, ID and VALUE embedded in it,
6782 otherwise push an identifier (with DEVICE and ID) and the VALUE in two
6783 arguments. */
6784
6785 static void
6786 push_target_argument_according_to_value (gimple_stmt_iterator *gsi, int device,
6787 int id, tree value, vec <tree> *args)
6788 {
6789 if (tree_fits_shwi_p (value)
6790 && tree_to_shwi (value) > -(1 << 15)
6791 && tree_to_shwi (value) < (1 << 15))
6792 args->quick_push (get_target_argument_value (gsi, device, id, value));
6793 else
6794 {
6795 args->quick_push (get_target_argument_identifier (device, true, id));
6796 value = fold_convert (ptr_type_node, value);
6797 value = force_gimple_operand_gsi (gsi, value, true, NULL, true,
6798 GSI_SAME_STMT);
6799 args->quick_push (value);
6800 }
6801 }
6802
6803 /* Create an array of arguments that is then passed to GOMP_target. */
6804
6805 static tree
6806 get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
6807 {
6808 auto_vec <tree, 6> args;
6809 tree clauses = gimple_omp_target_clauses (tgt_stmt);
6810 tree t, c = omp_find_clause (clauses, OMP_CLAUSE_NUM_TEAMS);
6811 if (c)
6812 t = OMP_CLAUSE_NUM_TEAMS_EXPR (c);
6813 else
6814 t = integer_minus_one_node;
6815 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6816 GOMP_TARGET_ARG_NUM_TEAMS, t, &args);
6817
6818 c = omp_find_clause (clauses, OMP_CLAUSE_THREAD_LIMIT);
6819 if (c)
6820 t = OMP_CLAUSE_THREAD_LIMIT_EXPR (c);
6821 else
6822 t = integer_minus_one_node;
6823 push_target_argument_according_to_value (gsi, GOMP_TARGET_ARG_DEVICE_ALL,
6824 GOMP_TARGET_ARG_THREAD_LIMIT, t,
6825 &args);
6826
6827 /* Add HSA-specific grid sizes, if available. */
6828 if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
6829 OMP_CLAUSE__GRIDDIM_))
6830 {
6831 t = get_target_argument_identifier (GOMP_DEVICE_HSA, true,
6832 GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES);
6833 args.quick_push (t);
6834 args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
6835 }
6836
6837 /* Produce more, perhaps device specific, arguments here. */
6838
6839 tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
6840 args.length () + 1),
6841 ".omp_target_args");
6842 for (unsigned i = 0; i < args.length (); i++)
6843 {
6844 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6845 build_int_cst (integer_type_node, i),
6846 NULL_TREE, NULL_TREE);
6847 gsi_insert_before (gsi, gimple_build_assign (ref, args[i]),
6848 GSI_SAME_STMT);
6849 }
6850 tree ref = build4 (ARRAY_REF, ptr_type_node, argarray,
6851 build_int_cst (integer_type_node, args.length ()),
6852 NULL_TREE, NULL_TREE);
6853 gsi_insert_before (gsi, gimple_build_assign (ref, null_pointer_node),
6854 GSI_SAME_STMT);
6855 TREE_ADDRESSABLE (argarray) = 1;
6856 return build_fold_addr_expr (argarray);
6857 }
6858
6859 /* Expand the GIMPLE_OMP_TARGET starting at REGION. */
6860
6861 static void
6862 expand_omp_target (struct omp_region *region)
6863 {
6864 basic_block entry_bb, exit_bb, new_bb;
6865 struct function *child_cfun;
6866 tree child_fn, block, t;
6867 gimple_stmt_iterator gsi;
6868 gomp_target *entry_stmt;
6869 gimple *stmt;
6870 edge e;
6871 bool offloaded, data_region;
6872
6873 entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
6874 new_bb = region->entry;
6875
6876 offloaded = is_gimple_omp_offloaded (entry_stmt);
6877 switch (gimple_omp_target_kind (entry_stmt))
6878 {
6879 case GF_OMP_TARGET_KIND_REGION:
6880 case GF_OMP_TARGET_KIND_UPDATE:
6881 case GF_OMP_TARGET_KIND_ENTER_DATA:
6882 case GF_OMP_TARGET_KIND_EXIT_DATA:
6883 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
6884 case GF_OMP_TARGET_KIND_OACC_KERNELS:
6885 case GF_OMP_TARGET_KIND_OACC_UPDATE:
6886 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
6887 case GF_OMP_TARGET_KIND_OACC_DECLARE:
6888 data_region = false;
6889 break;
6890 case GF_OMP_TARGET_KIND_DATA:
6891 case GF_OMP_TARGET_KIND_OACC_DATA:
6892 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
6893 data_region = true;
6894 break;
6895 default:
6896 gcc_unreachable ();
6897 }
6898
6899 child_fn = NULL_TREE;
6900 child_cfun = NULL;
6901 if (offloaded)
6902 {
6903 child_fn = gimple_omp_target_child_fn (entry_stmt);
6904 child_cfun = DECL_STRUCT_FUNCTION (child_fn);
6905 }
6906
6907 /* Supported by expand_omp_taskreg, but not here. */
6908 if (child_cfun != NULL)
6909 gcc_checking_assert (!child_cfun->cfg);
6910 gcc_checking_assert (!gimple_in_ssa_p (cfun));
6911
6912 entry_bb = region->entry;
6913 exit_bb = region->exit;
6914
6915 if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
6916 mark_loops_in_oacc_kernels_region (region->entry, region->exit);
6917
6918 if (offloaded)
6919 {
6920 unsigned srcidx, dstidx, num;
6921
6922 /* If the offloading region needs data sent from the parent
6923 function, then the very first statement (except possible
6924 tree profile counter updates) of the offloading body
6925 is a copy assignment .OMP_DATA_I = &.OMP_DATA_O. Since
6926 &.OMP_DATA_O is passed as an argument to the child function,
6927 we need to replace it with the argument as seen by the child
6928 function.
6929
6930 In most cases, this will end up being the identity assignment
6931 .OMP_DATA_I = .OMP_DATA_I. However, if the offloading body had
6932 a function call that has been inlined, the original PARM_DECL
6933 .OMP_DATA_I may have been converted into a different local
6934 variable. In which case, we need to keep the assignment. */
6935 tree data_arg = gimple_omp_target_data_arg (entry_stmt);
6936 if (data_arg)
6937 {
6938 basic_block entry_succ_bb = single_succ (entry_bb);
6939 gimple_stmt_iterator gsi;
6940 tree arg;
6941 gimple *tgtcopy_stmt = NULL;
6942 tree sender = TREE_VEC_ELT (data_arg, 0);
6943
6944 for (gsi = gsi_start_bb (entry_succ_bb); ; gsi_next (&gsi))
6945 {
6946 gcc_assert (!gsi_end_p (gsi));
6947 stmt = gsi_stmt (gsi);
6948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6949 continue;
6950
6951 if (gimple_num_ops (stmt) == 2)
6952 {
6953 tree arg = gimple_assign_rhs1 (stmt);
6954
6955 /* We're ignoring the subcode because we're
6956 effectively doing a STRIP_NOPS. */
6957
6958 if (TREE_CODE (arg) == ADDR_EXPR
6959 && TREE_OPERAND (arg, 0) == sender)
6960 {
6961 tgtcopy_stmt = stmt;
6962 break;
6963 }
6964 }
6965 }
6966
6967 gcc_assert (tgtcopy_stmt != NULL);
6968 arg = DECL_ARGUMENTS (child_fn);
6969
6970 gcc_assert (gimple_assign_lhs (tgtcopy_stmt) == arg);
6971 gsi_remove (&gsi, true);
6972 }
6973
6974 /* Declare local variables needed in CHILD_CFUN. */
6975 block = DECL_INITIAL (child_fn);
6976 BLOCK_VARS (block) = vec2chain (child_cfun->local_decls);
6977 /* The gimplifier could record temporaries in the offloading block
6978 rather than in containing function's local_decls chain,
6979 which would mean cgraph missed finalizing them. Do it now. */
6980 for (t = BLOCK_VARS (block); t; t = DECL_CHAIN (t))
6981 if (VAR_P (t) && TREE_STATIC (t) && !DECL_EXTERNAL (t))
6982 varpool_node::finalize_decl (t);
6983 DECL_SAVED_TREE (child_fn) = NULL;
6984 /* We'll create a CFG for child_fn, so no gimple body is needed. */
6985 gimple_set_body (child_fn, NULL);
6986 TREE_USED (block) = 1;
6987
6988 /* Reset DECL_CONTEXT on function arguments. */
6989 for (t = DECL_ARGUMENTS (child_fn); t; t = DECL_CHAIN (t))
6990 DECL_CONTEXT (t) = child_fn;
6991
6992 /* Split ENTRY_BB at GIMPLE_*,
6993 so that it can be moved to the child function. */
6994 gsi = gsi_last_bb (entry_bb);
6995 stmt = gsi_stmt (gsi);
6996 gcc_assert (stmt
6997 && gimple_code (stmt) == gimple_code (entry_stmt));
6998 e = split_block (entry_bb, stmt);
6999 gsi_remove (&gsi, true);
7000 entry_bb = e->dest;
7001 single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
7002
7003 /* Convert GIMPLE_OMP_RETURN into a RETURN_EXPR. */
7004 if (exit_bb)
7005 {
7006 gsi = gsi_last_bb (exit_bb);
7007 gcc_assert (!gsi_end_p (gsi)
7008 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7009 stmt = gimple_build_return (NULL);
7010 gsi_insert_after (&gsi, stmt, GSI_SAME_STMT);
7011 gsi_remove (&gsi, true);
7012 }
7013
7014 /* Move the offloading region into CHILD_CFUN. */
7015
7016 block = gimple_block (entry_stmt);
7017
7018 new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
7019 if (exit_bb)
7020 single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
7021 /* When the OMP expansion process cannot guarantee an up-to-date
7022 loop tree arrange for the child function to fixup loops. */
7023 if (loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7024 child_cfun->x_current_loops->state |= LOOPS_NEED_FIXUP;
7025
7026 /* Remove non-local VAR_DECLs from child_cfun->local_decls list. */
7027 num = vec_safe_length (child_cfun->local_decls);
7028 for (srcidx = 0, dstidx = 0; srcidx < num; srcidx++)
7029 {
7030 t = (*child_cfun->local_decls)[srcidx];
7031 if (DECL_CONTEXT (t) == cfun->decl)
7032 continue;
7033 if (srcidx != dstidx)
7034 (*child_cfun->local_decls)[dstidx] = t;
7035 dstidx++;
7036 }
7037 if (dstidx != num)
7038 vec_safe_truncate (child_cfun->local_decls, dstidx);
7039
7040 /* Inform the callgraph about the new function. */
7041 child_cfun->curr_properties = cfun->curr_properties;
7042 child_cfun->has_simduid_loops |= cfun->has_simduid_loops;
7043 child_cfun->has_force_vectorize_loops |= cfun->has_force_vectorize_loops;
7044 cgraph_node *node = cgraph_node::get_create (child_fn);
7045 node->parallelized_function = 1;
7046 cgraph_node::add_new_function (child_fn, true);
7047
7048 /* Add the new function to the offload table. */
7049 if (ENABLE_OFFLOADING)
7050 vec_safe_push (offload_funcs, child_fn);
7051
7052 bool need_asm = DECL_ASSEMBLER_NAME_SET_P (current_function_decl)
7053 && !DECL_ASSEMBLER_NAME_SET_P (child_fn);
7054
7055 /* Fix the callgraph edges for child_cfun. Those for cfun will be
7056 fixed in a following pass. */
7057 push_cfun (child_cfun);
7058 if (need_asm)
7059 assign_assembler_name_if_neeeded (child_fn);
7060 cgraph_edge::rebuild_edges ();
7061
7062 /* Some EH regions might become dead, see PR34608. If
7063 pass_cleanup_cfg isn't the first pass to happen with the
7064 new child, these dead EH edges might cause problems.
7065 Clean them up now. */
7066 if (flag_exceptions)
7067 {
7068 basic_block bb;
7069 bool changed = false;
7070
7071 FOR_EACH_BB_FN (bb, cfun)
7072 changed |= gimple_purge_dead_eh_edges (bb);
7073 if (changed)
7074 cleanup_tree_cfg ();
7075 }
7076 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7077 verify_loop_structure ();
7078 pop_cfun ();
7079
7080 if (dump_file && !gimple_in_ssa_p (cfun))
7081 {
7082 omp_any_child_fn_dumped = true;
7083 dump_function_header (dump_file, child_fn, dump_flags);
7084 dump_function_to_file (child_fn, dump_file, dump_flags);
7085 }
7086 }
7087
7088 /* Emit a library call to launch the offloading region, or do data
7089 transfers. */
7090 tree t1, t2, t3, t4, device, cond, depend, c, clauses;
7091 enum built_in_function start_ix;
7092 location_t clause_loc;
7093 unsigned int flags_i = 0;
7094 bool oacc_kernels_p = false;
7095
7096 switch (gimple_omp_target_kind (entry_stmt))
7097 {
7098 case GF_OMP_TARGET_KIND_REGION:
7099 start_ix = BUILT_IN_GOMP_TARGET;
7100 break;
7101 case GF_OMP_TARGET_KIND_DATA:
7102 start_ix = BUILT_IN_GOMP_TARGET_DATA;
7103 break;
7104 case GF_OMP_TARGET_KIND_UPDATE:
7105 start_ix = BUILT_IN_GOMP_TARGET_UPDATE;
7106 break;
7107 case GF_OMP_TARGET_KIND_ENTER_DATA:
7108 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7109 break;
7110 case GF_OMP_TARGET_KIND_EXIT_DATA:
7111 start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
7112 flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
7113 break;
7114 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7115 oacc_kernels_p = true;
7116 /* FALLTHROUGH */
7117 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7118 start_ix = BUILT_IN_GOACC_PARALLEL;
7119 break;
7120 case GF_OMP_TARGET_KIND_OACC_DATA:
7121 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7122 start_ix = BUILT_IN_GOACC_DATA_START;
7123 break;
7124 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7125 start_ix = BUILT_IN_GOACC_UPDATE;
7126 break;
7127 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7128 start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
7129 break;
7130 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7131 start_ix = BUILT_IN_GOACC_DECLARE;
7132 break;
7133 default:
7134 gcc_unreachable ();
7135 }
7136
7137 clauses = gimple_omp_target_clauses (entry_stmt);
7138
7139 /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
7140 library choose) and there is no conditional. */
7141 cond = NULL_TREE;
7142 device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
7143
7144 c = omp_find_clause (clauses, OMP_CLAUSE_IF);
7145 if (c)
7146 cond = OMP_CLAUSE_IF_EXPR (c);
7147
7148 c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
7149 if (c)
7150 {
7151 /* Even if we pass it to all library function calls, it is currently only
7152 defined/used for the OpenMP target ones. */
7153 gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
7154 || start_ix == BUILT_IN_GOMP_TARGET_DATA
7155 || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
7156 || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
7157
7158 device = OMP_CLAUSE_DEVICE_ID (c);
7159 clause_loc = OMP_CLAUSE_LOCATION (c);
7160 }
7161 else
7162 clause_loc = gimple_location (entry_stmt);
7163
7164 c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
7165 if (c)
7166 flags_i |= GOMP_TARGET_FLAG_NOWAIT;
7167
7168 /* Ensure 'device' is of the correct type. */
7169 device = fold_convert_loc (clause_loc, integer_type_node, device);
7170
7171 /* If we found the clause 'if (cond)', build
7172 (cond ? device : GOMP_DEVICE_HOST_FALLBACK). */
7173 if (cond)
7174 {
7175 cond = gimple_boolify (cond);
7176
7177 basic_block cond_bb, then_bb, else_bb;
7178 edge e;
7179 tree tmp_var;
7180
7181 tmp_var = create_tmp_var (TREE_TYPE (device));
7182 if (offloaded)
7183 e = split_block_after_labels (new_bb);
7184 else
7185 {
7186 gsi = gsi_last_bb (new_bb);
7187 gsi_prev (&gsi);
7188 e = split_block (new_bb, gsi_stmt (gsi));
7189 }
7190 cond_bb = e->src;
7191 new_bb = e->dest;
7192 remove_edge (e);
7193
7194 then_bb = create_empty_bb (cond_bb);
7195 else_bb = create_empty_bb (then_bb);
7196 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
7197 set_immediate_dominator (CDI_DOMINATORS, else_bb, cond_bb);
7198
7199 stmt = gimple_build_cond_empty (cond);
7200 gsi = gsi_last_bb (cond_bb);
7201 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7202
7203 gsi = gsi_start_bb (then_bb);
7204 stmt = gimple_build_assign (tmp_var, device);
7205 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7206
7207 gsi = gsi_start_bb (else_bb);
7208 stmt = gimple_build_assign (tmp_var,
7209 build_int_cst (integer_type_node,
7210 GOMP_DEVICE_HOST_FALLBACK));
7211 gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
7212
7213 make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
7214 make_edge (cond_bb, else_bb, EDGE_FALSE_VALUE);
7215 add_bb_to_loop (then_bb, cond_bb->loop_father);
7216 add_bb_to_loop (else_bb, cond_bb->loop_father);
7217 make_edge (then_bb, new_bb, EDGE_FALLTHRU);
7218 make_edge (else_bb, new_bb, EDGE_FALLTHRU);
7219
7220 device = tmp_var;
7221 gsi = gsi_last_bb (new_bb);
7222 }
7223 else
7224 {
7225 gsi = gsi_last_bb (new_bb);
7226 device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
7227 true, GSI_SAME_STMT);
7228 }
7229
7230 t = gimple_omp_target_data_arg (entry_stmt);
7231 if (t == NULL)
7232 {
7233 t1 = size_zero_node;
7234 t2 = build_zero_cst (ptr_type_node);
7235 t3 = t2;
7236 t4 = t2;
7237 }
7238 else
7239 {
7240 t1 = TYPE_MAX_VALUE (TYPE_DOMAIN (TREE_TYPE (TREE_VEC_ELT (t, 1))));
7241 t1 = size_binop (PLUS_EXPR, t1, size_int (1));
7242 t2 = build_fold_addr_expr (TREE_VEC_ELT (t, 0));
7243 t3 = build_fold_addr_expr (TREE_VEC_ELT (t, 1));
7244 t4 = build_fold_addr_expr (TREE_VEC_ELT (t, 2));
7245 }
7246
7247 gimple *g;
7248 bool tagging = false;
7249 /* The maximum number used by any start_ix, without varargs. */
7250 auto_vec<tree, 11> args;
7251 args.quick_push (device);
7252 if (offloaded)
7253 args.quick_push (build_fold_addr_expr (child_fn));
7254 args.quick_push (t1);
7255 args.quick_push (t2);
7256 args.quick_push (t3);
7257 args.quick_push (t4);
7258 switch (start_ix)
7259 {
7260 case BUILT_IN_GOACC_DATA_START:
7261 case BUILT_IN_GOACC_DECLARE:
7262 case BUILT_IN_GOMP_TARGET_DATA:
7263 break;
7264 case BUILT_IN_GOMP_TARGET:
7265 case BUILT_IN_GOMP_TARGET_UPDATE:
7266 case BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA:
7267 args.quick_push (build_int_cst (unsigned_type_node, flags_i));
7268 c = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
7269 if (c)
7270 depend = OMP_CLAUSE_DECL (c);
7271 else
7272 depend = build_int_cst (ptr_type_node, 0);
7273 args.quick_push (depend);
7274 if (start_ix == BUILT_IN_GOMP_TARGET)
7275 args.quick_push (get_target_arguments (&gsi, entry_stmt));
7276 break;
7277 case BUILT_IN_GOACC_PARALLEL:
7278 {
7279 oacc_set_fn_attrib (child_fn, clauses, oacc_kernels_p, &args);
7280 tagging = true;
7281 }
7282 /* FALLTHRU */
7283 case BUILT_IN_GOACC_ENTER_EXIT_DATA:
7284 case BUILT_IN_GOACC_UPDATE:
7285 {
7286 tree t_async = NULL_TREE;
7287
7288 /* If present, use the value specified by the respective
7289 clause, making sure that is of the correct type. */
7290 c = omp_find_clause (clauses, OMP_CLAUSE_ASYNC);
7291 if (c)
7292 t_async = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7293 integer_type_node,
7294 OMP_CLAUSE_ASYNC_EXPR (c));
7295 else if (!tagging)
7296 /* Default values for t_async. */
7297 t_async = fold_convert_loc (gimple_location (entry_stmt),
7298 integer_type_node,
7299 build_int_cst (integer_type_node,
7300 GOMP_ASYNC_SYNC));
7301 if (tagging && t_async)
7302 {
7303 unsigned HOST_WIDE_INT i_async = GOMP_LAUNCH_OP_MAX;
7304
7305 if (TREE_CODE (t_async) == INTEGER_CST)
7306 {
7307 /* See if we can pack the async arg in to the tag's
7308 operand. */
7309 i_async = TREE_INT_CST_LOW (t_async);
7310 if (i_async < GOMP_LAUNCH_OP_MAX)
7311 t_async = NULL_TREE;
7312 else
7313 i_async = GOMP_LAUNCH_OP_MAX;
7314 }
7315 args.safe_push (oacc_launch_pack (GOMP_LAUNCH_ASYNC, NULL_TREE,
7316 i_async));
7317 }
7318 if (t_async)
7319 args.safe_push (t_async);
7320
7321 /* Save the argument index, and ... */
7322 unsigned t_wait_idx = args.length ();
7323 unsigned num_waits = 0;
7324 c = omp_find_clause (clauses, OMP_CLAUSE_WAIT);
7325 if (!tagging || c)
7326 /* ... push a placeholder. */
7327 args.safe_push (integer_zero_node);
7328
7329 for (; c; c = OMP_CLAUSE_CHAIN (c))
7330 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
7331 {
7332 args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
7333 integer_type_node,
7334 OMP_CLAUSE_WAIT_EXPR (c)));
7335 num_waits++;
7336 }
7337
7338 if (!tagging || num_waits)
7339 {
7340 tree len;
7341
7342 /* Now that we know the number, update the placeholder. */
7343 if (tagging)
7344 len = oacc_launch_pack (GOMP_LAUNCH_WAIT, NULL_TREE, num_waits);
7345 else
7346 len = build_int_cst (integer_type_node, num_waits);
7347 len = fold_convert_loc (gimple_location (entry_stmt),
7348 unsigned_type_node, len);
7349 args[t_wait_idx] = len;
7350 }
7351 }
7352 break;
7353 default:
7354 gcc_unreachable ();
7355 }
7356 if (tagging)
7357 /* Push terminal marker - zero. */
7358 args.safe_push (oacc_launch_pack (0, NULL_TREE, 0));
7359
7360 g = gimple_build_call_vec (builtin_decl_explicit (start_ix), args);
7361 gimple_set_location (g, gimple_location (entry_stmt));
7362 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
7363 if (!offloaded)
7364 {
7365 g = gsi_stmt (gsi);
7366 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
7367 gsi_remove (&gsi, true);
7368 }
7369 if (data_region && region->exit)
7370 {
7371 gsi = gsi_last_bb (region->exit);
7372 g = gsi_stmt (gsi);
7373 gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
7374 gsi_remove (&gsi, true);
7375 }
7376 }
7377
7378 /* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
7379 iteration variable derived from the thread number. INTRA_GROUP means this
7380 is an expansion of a loop iterating over work-items within a separate
7381 iteration over groups. */
7382
7383 static void
7384 grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
7385 {
7386 gimple_stmt_iterator gsi;
7387 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7388 gcc_checking_assert (gimple_omp_for_kind (for_stmt)
7389 == GF_OMP_FOR_KIND_GRID_LOOP);
7390 size_t collapse = gimple_omp_for_collapse (for_stmt);
7391 struct omp_for_data_loop *loops
7392 = XALLOCAVEC (struct omp_for_data_loop,
7393 gimple_omp_for_collapse (for_stmt));
7394 struct omp_for_data fd;
7395
7396 remove_edge (BRANCH_EDGE (kfor->entry));
7397 basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
7398
7399 gcc_assert (kfor->cont);
7400 omp_extract_for_data (for_stmt, &fd, loops);
7401
7402 gsi = gsi_start_bb (body_bb);
7403
7404 for (size_t dim = 0; dim < collapse; dim++)
7405 {
7406 tree type, itype;
7407 itype = type = TREE_TYPE (fd.loops[dim].v);
7408 if (POINTER_TYPE_P (type))
7409 itype = signed_type_for (type);
7410
7411 tree n1 = fd.loops[dim].n1;
7412 tree step = fd.loops[dim].step;
7413 n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
7414 true, NULL_TREE, true, GSI_SAME_STMT);
7415 step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
7416 true, NULL_TREE, true, GSI_SAME_STMT);
7417 tree threadid;
7418 if (gimple_omp_for_grid_group_iter (for_stmt))
7419 {
7420 gcc_checking_assert (!intra_group);
7421 threadid = build_call_expr (builtin_decl_explicit
7422 (BUILT_IN_HSA_WORKGROUPID), 1,
7423 build_int_cstu (unsigned_type_node, dim));
7424 }
7425 else if (intra_group)
7426 threadid = build_call_expr (builtin_decl_explicit
7427 (BUILT_IN_HSA_WORKITEMID), 1,
7428 build_int_cstu (unsigned_type_node, dim));
7429 else
7430 threadid = build_call_expr (builtin_decl_explicit
7431 (BUILT_IN_HSA_WORKITEMABSID), 1,
7432 build_int_cstu (unsigned_type_node, dim));
7433 threadid = fold_convert (itype, threadid);
7434 threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
7435 true, GSI_SAME_STMT);
7436
7437 tree startvar = fd.loops[dim].v;
7438 tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
7439 if (POINTER_TYPE_P (type))
7440 t = fold_build_pointer_plus (n1, t);
7441 else
7442 t = fold_build2 (PLUS_EXPR, type, t, n1);
7443 t = fold_convert (type, t);
7444 t = force_gimple_operand_gsi (&gsi, t,
7445 DECL_P (startvar)
7446 && TREE_ADDRESSABLE (startvar),
7447 NULL_TREE, true, GSI_SAME_STMT);
7448 gassign *assign_stmt = gimple_build_assign (startvar, t);
7449 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
7450 }
7451 /* Remove the omp for statement */
7452 gsi = gsi_last_bb (kfor->entry);
7453 gsi_remove (&gsi, true);
7454
7455 /* Remove the GIMPLE_OMP_CONTINUE statement. */
7456 gsi = gsi_last_bb (kfor->cont);
7457 gcc_assert (!gsi_end_p (gsi)
7458 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
7459 gsi_remove (&gsi, true);
7460
7461 /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary. */
7462 gsi = gsi_last_bb (kfor->exit);
7463 gcc_assert (!gsi_end_p (gsi)
7464 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7465 if (intra_group)
7466 gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
7467 gsi_remove (&gsi, true);
7468
7469 /* Fixup the much simpler CFG. */
7470 remove_edge (find_edge (kfor->cont, body_bb));
7471
7472 if (kfor->cont != body_bb)
7473 set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
7474 set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
7475 }
7476
7477 /* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
7478 argument_decls. */
7479
7480 struct grid_arg_decl_map
7481 {
7482 tree old_arg;
7483 tree new_arg;
7484 };
7485
7486 /* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
7487 pertaining to kernel function. */
7488
7489 static tree
7490 grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
7491 {
7492 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
7493 struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
7494 tree t = *tp;
7495
7496 if (t == adm->old_arg)
7497 *tp = adm->new_arg;
7498 *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
7499 return NULL_TREE;
7500 }
7501
7502 /* If TARGET region contains a kernel body for loop, remove its region from the
7503 TARGET and expand it in HSA gridified kernel fashion. */
7504
7505 static void
7506 grid_expand_target_grid_body (struct omp_region *target)
7507 {
7508 if (!hsa_gen_requested_p ())
7509 return;
7510
7511 gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
7512 struct omp_region **pp;
7513
7514 for (pp = &target->inner; *pp; pp = &(*pp)->next)
7515 if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
7516 break;
7517
7518 struct omp_region *gpukernel = *pp;
7519
7520 tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
7521 if (!gpukernel)
7522 {
7523 /* HSA cannot handle OACC stuff. */
7524 if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
7525 return;
7526 gcc_checking_assert (orig_child_fndecl);
7527 gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7528 OMP_CLAUSE__GRIDDIM_));
7529 cgraph_node *n = cgraph_node::get (orig_child_fndecl);
7530
7531 hsa_register_kernel (n);
7532 return;
7533 }
7534
7535 gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
7536 OMP_CLAUSE__GRIDDIM_));
7537 tree inside_block = gimple_block (first_stmt (single_succ (gpukernel->entry)));
7538 *pp = gpukernel->next;
7539 for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
7540 if ((*pp)->type == GIMPLE_OMP_FOR)
7541 break;
7542
7543 struct omp_region *kfor = *pp;
7544 gcc_assert (kfor);
7545 gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
7546 gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
7547 *pp = kfor->next;
7548 if (kfor->inner)
7549 {
7550 if (gimple_omp_for_grid_group_iter (for_stmt))
7551 {
7552 struct omp_region **next_pp;
7553 for (pp = &kfor->inner; *pp; pp = next_pp)
7554 {
7555 next_pp = &(*pp)->next;
7556 if ((*pp)->type != GIMPLE_OMP_FOR)
7557 continue;
7558 gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
7559 gcc_assert (gimple_omp_for_kind (inner)
7560 == GF_OMP_FOR_KIND_GRID_LOOP);
7561 grid_expand_omp_for_loop (*pp, true);
7562 *pp = (*pp)->next;
7563 next_pp = pp;
7564 }
7565 }
7566 expand_omp (kfor->inner);
7567 }
7568 if (gpukernel->inner)
7569 expand_omp (gpukernel->inner);
7570
7571 tree kern_fndecl = copy_node (orig_child_fndecl);
7572 DECL_NAME (kern_fndecl) = clone_function_name (kern_fndecl, "kernel");
7573 SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
7574 tree tgtblock = gimple_block (tgt_stmt);
7575 tree fniniblock = make_node (BLOCK);
7576 BLOCK_ABSTRACT_ORIGIN (fniniblock) = tgtblock;
7577 BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
7578 BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
7579 BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
7580 DECL_INITIAL (kern_fndecl) = fniniblock;
7581 push_struct_function (kern_fndecl);
7582 cfun->function_end_locus = gimple_location (tgt_stmt);
7583 init_tree_ssa (cfun);
7584 pop_cfun ();
7585
7586 tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
7587 gcc_assert (!DECL_CHAIN (old_parm_decl));
7588 tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
7589 DECL_CONTEXT (new_parm_decl) = kern_fndecl;
7590 DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
7591 gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
7592 DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
7593 DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
7594 struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
7595 kern_cfun->curr_properties = cfun->curr_properties;
7596
7597 grid_expand_omp_for_loop (kfor, false);
7598
7599 /* Remove the omp for statement */
7600 gimple_stmt_iterator gsi = gsi_last_bb (gpukernel->entry);
7601 gsi_remove (&gsi, true);
7602 /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
7603 return. */
7604 gsi = gsi_last_bb (gpukernel->exit);
7605 gcc_assert (!gsi_end_p (gsi)
7606 && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
7607 gimple *ret_stmt = gimple_build_return (NULL);
7608 gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
7609 gsi_remove (&gsi, true);
7610
7611 /* Statements in the first BB in the target construct have been produced by
7612 target lowering and must be copied inside the GPUKERNEL, with the two
7613 exceptions of the first OMP statement and the OMP_DATA assignment
7614 statement. */
7615 gsi = gsi_start_bb (single_succ (gpukernel->entry));
7616 tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
7617 tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
7618 for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
7619 !gsi_end_p (tsi); gsi_next (&tsi))
7620 {
7621 gimple *stmt = gsi_stmt (tsi);
7622 if (is_gimple_omp (stmt))
7623 break;
7624 if (sender
7625 && is_gimple_assign (stmt)
7626 && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
7627 && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
7628 continue;
7629 gimple *copy = gimple_copy (stmt);
7630 gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
7631 gimple_set_block (copy, fniniblock);
7632 }
7633
7634 move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
7635 gpukernel->exit, inside_block);
7636
7637 cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
7638 kcn->mark_force_output ();
7639 cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
7640
7641 hsa_register_kernel (kcn, orig_child);
7642
7643 cgraph_node::add_new_function (kern_fndecl, true);
7644 push_cfun (kern_cfun);
7645 cgraph_edge::rebuild_edges ();
7646
7647 /* Re-map any mention of the PARM_DECL of the original function to the
7648 PARM_DECL of the new one.
7649
7650 TODO: It would be great if lowering produced references into the GPU
7651 kernel decl straight away and we did not have to do this. */
7652 struct grid_arg_decl_map adm;
7653 adm.old_arg = old_parm_decl;
7654 adm.new_arg = new_parm_decl;
7655 basic_block bb;
7656 FOR_EACH_BB_FN (bb, kern_cfun)
7657 {
7658 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
7659 {
7660 gimple *stmt = gsi_stmt (gsi);
7661 struct walk_stmt_info wi;
7662 memset (&wi, 0, sizeof (wi));
7663 wi.info = &adm;
7664 walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
7665 }
7666 }
7667 pop_cfun ();
7668
7669 return;
7670 }
7671
7672 /* Expand the parallel region tree rooted at REGION. Expansion
7673 proceeds in depth-first order. Innermost regions are expanded
7674 first. This way, parallel regions that require a new function to
7675 be created (e.g., GIMPLE_OMP_PARALLEL) can be expanded without having any
7676 internal dependencies in their body. */
7677
7678 static void
7679 expand_omp (struct omp_region *region)
7680 {
7681 omp_any_child_fn_dumped = false;
7682 while (region)
7683 {
7684 location_t saved_location;
7685 gimple *inner_stmt = NULL;
7686
7687 /* First, determine whether this is a combined parallel+workshare
7688 region. */
7689 if (region->type == GIMPLE_OMP_PARALLEL)
7690 determine_parallel_type (region);
7691 else if (region->type == GIMPLE_OMP_TARGET)
7692 grid_expand_target_grid_body (region);
7693
7694 if (region->type == GIMPLE_OMP_FOR
7695 && gimple_omp_for_combined_p (last_stmt (region->entry)))
7696 inner_stmt = last_stmt (region->inner->entry);
7697
7698 if (region->inner)
7699 expand_omp (region->inner);
7700
7701 saved_location = input_location;
7702 if (gimple_has_location (last_stmt (region->entry)))
7703 input_location = gimple_location (last_stmt (region->entry));
7704
7705 switch (region->type)
7706 {
7707 case GIMPLE_OMP_PARALLEL:
7708 case GIMPLE_OMP_TASK:
7709 expand_omp_taskreg (region);
7710 break;
7711
7712 case GIMPLE_OMP_FOR:
7713 expand_omp_for (region, inner_stmt);
7714 break;
7715
7716 case GIMPLE_OMP_SECTIONS:
7717 expand_omp_sections (region);
7718 break;
7719
7720 case GIMPLE_OMP_SECTION:
7721 /* Individual omp sections are handled together with their
7722 parent GIMPLE_OMP_SECTIONS region. */
7723 break;
7724
7725 case GIMPLE_OMP_SINGLE:
7726 expand_omp_single (region);
7727 break;
7728
7729 case GIMPLE_OMP_ORDERED:
7730 {
7731 gomp_ordered *ord_stmt
7732 = as_a <gomp_ordered *> (last_stmt (region->entry));
7733 if (omp_find_clause (gimple_omp_ordered_clauses (ord_stmt),
7734 OMP_CLAUSE_DEPEND))
7735 {
7736 /* We'll expand these when expanding corresponding
7737 worksharing region with ordered(n) clause. */
7738 gcc_assert (region->outer
7739 && region->outer->type == GIMPLE_OMP_FOR);
7740 region->ord_stmt = ord_stmt;
7741 break;
7742 }
7743 }
7744 /* FALLTHRU */
7745 case GIMPLE_OMP_MASTER:
7746 case GIMPLE_OMP_TASKGROUP:
7747 case GIMPLE_OMP_CRITICAL:
7748 case GIMPLE_OMP_TEAMS:
7749 expand_omp_synch (region);
7750 break;
7751
7752 case GIMPLE_OMP_ATOMIC_LOAD:
7753 expand_omp_atomic (region);
7754 break;
7755
7756 case GIMPLE_OMP_TARGET:
7757 expand_omp_target (region);
7758 break;
7759
7760 default:
7761 gcc_unreachable ();
7762 }
7763
7764 input_location = saved_location;
7765 region = region->next;
7766 }
7767 if (omp_any_child_fn_dumped)
7768 {
7769 if (dump_file)
7770 dump_function_header (dump_file, current_function_decl, dump_flags);
7771 omp_any_child_fn_dumped = false;
7772 }
7773 }
7774
7775 /* Helper for build_omp_regions. Scan the dominator tree starting at
7776 block BB. PARENT is the region that contains BB. If SINGLE_TREE is
7777 true, the function ends once a single tree is built (otherwise, whole
7778 forest of OMP constructs may be built). */
7779
7780 static void
7781 build_omp_regions_1 (basic_block bb, struct omp_region *parent,
7782 bool single_tree)
7783 {
7784 gimple_stmt_iterator gsi;
7785 gimple *stmt;
7786 basic_block son;
7787
7788 gsi = gsi_last_bb (bb);
7789 if (!gsi_end_p (gsi) && is_gimple_omp (gsi_stmt (gsi)))
7790 {
7791 struct omp_region *region;
7792 enum gimple_code code;
7793
7794 stmt = gsi_stmt (gsi);
7795 code = gimple_code (stmt);
7796 if (code == GIMPLE_OMP_RETURN)
7797 {
7798 /* STMT is the return point out of region PARENT. Mark it
7799 as the exit point and make PARENT the immediately
7800 enclosing region. */
7801 gcc_assert (parent);
7802 region = parent;
7803 region->exit = bb;
7804 parent = parent->outer;
7805 }
7806 else if (code == GIMPLE_OMP_ATOMIC_STORE)
7807 {
7808 /* GIMPLE_OMP_ATOMIC_STORE is analoguous to
7809 GIMPLE_OMP_RETURN, but matches with
7810 GIMPLE_OMP_ATOMIC_LOAD. */
7811 gcc_assert (parent);
7812 gcc_assert (parent->type == GIMPLE_OMP_ATOMIC_LOAD);
7813 region = parent;
7814 region->exit = bb;
7815 parent = parent->outer;
7816 }
7817 else if (code == GIMPLE_OMP_CONTINUE)
7818 {
7819 gcc_assert (parent);
7820 parent->cont = bb;
7821 }
7822 else if (code == GIMPLE_OMP_SECTIONS_SWITCH)
7823 {
7824 /* GIMPLE_OMP_SECTIONS_SWITCH is part of
7825 GIMPLE_OMP_SECTIONS, and we do nothing for it. */
7826 }
7827 else
7828 {
7829 region = new_omp_region (bb, code, parent);
7830 /* Otherwise... */
7831 if (code == GIMPLE_OMP_TARGET)
7832 {
7833 switch (gimple_omp_target_kind (stmt))
7834 {
7835 case GF_OMP_TARGET_KIND_REGION:
7836 case GF_OMP_TARGET_KIND_DATA:
7837 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
7838 case GF_OMP_TARGET_KIND_OACC_KERNELS:
7839 case GF_OMP_TARGET_KIND_OACC_DATA:
7840 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
7841 break;
7842 case GF_OMP_TARGET_KIND_UPDATE:
7843 case GF_OMP_TARGET_KIND_ENTER_DATA:
7844 case GF_OMP_TARGET_KIND_EXIT_DATA:
7845 case GF_OMP_TARGET_KIND_OACC_UPDATE:
7846 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
7847 case GF_OMP_TARGET_KIND_OACC_DECLARE:
7848 /* ..., other than for those stand-alone directives... */
7849 region = NULL;
7850 break;
7851 default:
7852 gcc_unreachable ();
7853 }
7854 }
7855 else if (code == GIMPLE_OMP_ORDERED
7856 && omp_find_clause (gimple_omp_ordered_clauses
7857 (as_a <gomp_ordered *> (stmt)),
7858 OMP_CLAUSE_DEPEND))
7859 /* #pragma omp ordered depend is also just a stand-alone
7860 directive. */
7861 region = NULL;
7862 /* ..., this directive becomes the parent for a new region. */
7863 if (region)
7864 parent = region;
7865 }
7866 }
7867
7868 if (single_tree && !parent)
7869 return;
7870
7871 for (son = first_dom_son (CDI_DOMINATORS, bb);
7872 son;
7873 son = next_dom_son (CDI_DOMINATORS, son))
7874 build_omp_regions_1 (son, parent, single_tree);
7875 }
7876
7877 /* Builds the tree of OMP regions rooted at ROOT, storing it to
7878 root_omp_region. */
7879
7880 static void
7881 build_omp_regions_root (basic_block root)
7882 {
7883 gcc_assert (root_omp_region == NULL);
7884 build_omp_regions_1 (root, NULL, true);
7885 gcc_assert (root_omp_region != NULL);
7886 }
7887
7888 /* Expands omp construct (and its subconstructs) starting in HEAD. */
7889
7890 void
7891 omp_expand_local (basic_block head)
7892 {
7893 build_omp_regions_root (head);
7894 if (dump_file && (dump_flags & TDF_DETAILS))
7895 {
7896 fprintf (dump_file, "\nOMP region tree\n\n");
7897 dump_omp_region (dump_file, root_omp_region, 0);
7898 fprintf (dump_file, "\n");
7899 }
7900
7901 remove_exit_barriers (root_omp_region);
7902 expand_omp (root_omp_region);
7903
7904 omp_free_regions ();
7905 }
7906
7907 /* Scan the CFG and build a tree of OMP regions. Return the root of
7908 the OMP region tree. */
7909
7910 static void
7911 build_omp_regions (void)
7912 {
7913 gcc_assert (root_omp_region == NULL);
7914 calculate_dominance_info (CDI_DOMINATORS);
7915 build_omp_regions_1 (ENTRY_BLOCK_PTR_FOR_FN (cfun), NULL, false);
7916 }
7917
7918 /* Main entry point for expanding OMP-GIMPLE into runtime calls. */
7919
7920 static unsigned int
7921 execute_expand_omp (void)
7922 {
7923 build_omp_regions ();
7924
7925 if (!root_omp_region)
7926 return 0;
7927
7928 if (dump_file)
7929 {
7930 fprintf (dump_file, "\nOMP region tree\n\n");
7931 dump_omp_region (dump_file, root_omp_region, 0);
7932 fprintf (dump_file, "\n");
7933 }
7934
7935 remove_exit_barriers (root_omp_region);
7936
7937 expand_omp (root_omp_region);
7938
7939 if (flag_checking && !loops_state_satisfies_p (LOOPS_NEED_FIXUP))
7940 verify_loop_structure ();
7941 cleanup_tree_cfg ();
7942
7943 omp_free_regions ();
7944
7945 return 0;
7946 }
7947
7948 /* OMP expansion -- the default pass, run before creation of SSA form. */
7949
7950 namespace {
7951
7952 const pass_data pass_data_expand_omp =
7953 {
7954 GIMPLE_PASS, /* type */
7955 "ompexp", /* name */
7956 OPTGROUP_OPENMP, /* optinfo_flags */
7957 TV_NONE, /* tv_id */
7958 PROP_gimple_any, /* properties_required */
7959 PROP_gimple_eomp, /* properties_provided */
7960 0, /* properties_destroyed */
7961 0, /* todo_flags_start */
7962 0, /* todo_flags_finish */
7963 };
7964
7965 class pass_expand_omp : public gimple_opt_pass
7966 {
7967 public:
7968 pass_expand_omp (gcc::context *ctxt)
7969 : gimple_opt_pass (pass_data_expand_omp, ctxt)
7970 {}
7971
7972 /* opt_pass methods: */
7973 virtual unsigned int execute (function *)
7974 {
7975 bool gate = ((flag_cilkplus != 0 || flag_openacc != 0 || flag_openmp != 0
7976 || flag_openmp_simd != 0)
7977 && !seen_error ());
7978
7979 /* This pass always runs, to provide PROP_gimple_eomp.
7980 But often, there is nothing to do. */
7981 if (!gate)
7982 return 0;
7983
7984 return execute_expand_omp ();
7985 }
7986
7987 }; // class pass_expand_omp
7988
7989 } // anon namespace
7990
7991 gimple_opt_pass *
7992 make_pass_expand_omp (gcc::context *ctxt)
7993 {
7994 return new pass_expand_omp (ctxt);
7995 }
7996
7997 namespace {
7998
7999 const pass_data pass_data_expand_omp_ssa =
8000 {
8001 GIMPLE_PASS, /* type */
8002 "ompexpssa", /* name */
8003 OPTGROUP_OPENMP, /* optinfo_flags */
8004 TV_NONE, /* tv_id */
8005 PROP_cfg | PROP_ssa, /* properties_required */
8006 PROP_gimple_eomp, /* properties_provided */
8007 0, /* properties_destroyed */
8008 0, /* todo_flags_start */
8009 TODO_cleanup_cfg | TODO_rebuild_alias, /* todo_flags_finish */
8010 };
8011
8012 class pass_expand_omp_ssa : public gimple_opt_pass
8013 {
8014 public:
8015 pass_expand_omp_ssa (gcc::context *ctxt)
8016 : gimple_opt_pass (pass_data_expand_omp_ssa, ctxt)
8017 {}
8018
8019 /* opt_pass methods: */
8020 virtual bool gate (function *fun)
8021 {
8022 return !(fun->curr_properties & PROP_gimple_eomp);
8023 }
8024 virtual unsigned int execute (function *) { return execute_expand_omp (); }
8025 opt_pass * clone () { return new pass_expand_omp_ssa (m_ctxt); }
8026
8027 }; // class pass_expand_omp_ssa
8028
8029 } // anon namespace
8030
8031 gimple_opt_pass *
8032 make_pass_expand_omp_ssa (gcc::context *ctxt)
8033 {
8034 return new pass_expand_omp_ssa (ctxt);
8035 }
8036
8037 /* Called from tree-cfg.c::make_edges to create cfg edges for all relevant
8038 GIMPLE_* codes. */
8039
8040 bool
8041 omp_make_gimple_edges (basic_block bb, struct omp_region **region,
8042 int *region_idx)
8043 {
8044 gimple *last = last_stmt (bb);
8045 enum gimple_code code = gimple_code (last);
8046 struct omp_region *cur_region = *region;
8047 bool fallthru = false;
8048
8049 switch (code)
8050 {
8051 case GIMPLE_OMP_PARALLEL:
8052 case GIMPLE_OMP_TASK:
8053 case GIMPLE_OMP_FOR:
8054 case GIMPLE_OMP_SINGLE:
8055 case GIMPLE_OMP_TEAMS:
8056 case GIMPLE_OMP_MASTER:
8057 case GIMPLE_OMP_TASKGROUP:
8058 case GIMPLE_OMP_CRITICAL:
8059 case GIMPLE_OMP_SECTION:
8060 case GIMPLE_OMP_GRID_BODY:
8061 cur_region = new_omp_region (bb, code, cur_region);
8062 fallthru = true;
8063 break;
8064
8065 case GIMPLE_OMP_ORDERED:
8066 cur_region = new_omp_region (bb, code, cur_region);
8067 fallthru = true;
8068 if (omp_find_clause (gimple_omp_ordered_clauses
8069 (as_a <gomp_ordered *> (last)),
8070 OMP_CLAUSE_DEPEND))
8071 cur_region = cur_region->outer;
8072 break;
8073
8074 case GIMPLE_OMP_TARGET:
8075 cur_region = new_omp_region (bb, code, cur_region);
8076 fallthru = true;
8077 switch (gimple_omp_target_kind (last))
8078 {
8079 case GF_OMP_TARGET_KIND_REGION:
8080 case GF_OMP_TARGET_KIND_DATA:
8081 case GF_OMP_TARGET_KIND_OACC_PARALLEL:
8082 case GF_OMP_TARGET_KIND_OACC_KERNELS:
8083 case GF_OMP_TARGET_KIND_OACC_DATA:
8084 case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
8085 break;
8086 case GF_OMP_TARGET_KIND_UPDATE:
8087 case GF_OMP_TARGET_KIND_ENTER_DATA:
8088 case GF_OMP_TARGET_KIND_EXIT_DATA:
8089 case GF_OMP_TARGET_KIND_OACC_UPDATE:
8090 case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
8091 case GF_OMP_TARGET_KIND_OACC_DECLARE:
8092 cur_region = cur_region->outer;
8093 break;
8094 default:
8095 gcc_unreachable ();
8096 }
8097 break;
8098
8099 case GIMPLE_OMP_SECTIONS:
8100 cur_region = new_omp_region (bb, code, cur_region);
8101 fallthru = true;
8102 break;
8103
8104 case GIMPLE_OMP_SECTIONS_SWITCH:
8105 fallthru = false;
8106 break;
8107
8108 case GIMPLE_OMP_ATOMIC_LOAD:
8109 case GIMPLE_OMP_ATOMIC_STORE:
8110 fallthru = true;
8111 break;
8112
8113 case GIMPLE_OMP_RETURN:
8114 /* In the case of a GIMPLE_OMP_SECTION, the edge will go
8115 somewhere other than the next block. This will be
8116 created later. */
8117 cur_region->exit = bb;
8118 if (cur_region->type == GIMPLE_OMP_TASK)
8119 /* Add an edge corresponding to not scheduling the task
8120 immediately. */
8121 make_edge (cur_region->entry, bb, EDGE_ABNORMAL);
8122 fallthru = cur_region->type != GIMPLE_OMP_SECTION;
8123 cur_region = cur_region->outer;
8124 break;
8125
8126 case GIMPLE_OMP_CONTINUE:
8127 cur_region->cont = bb;
8128 switch (cur_region->type)
8129 {
8130 case GIMPLE_OMP_FOR:
8131 /* Mark all GIMPLE_OMP_FOR and GIMPLE_OMP_CONTINUE
8132 succs edges as abnormal to prevent splitting
8133 them. */
8134 single_succ_edge (cur_region->entry)->flags |= EDGE_ABNORMAL;
8135 /* Make the loopback edge. */
8136 make_edge (bb, single_succ (cur_region->entry),
8137 EDGE_ABNORMAL);
8138
8139 /* Create an edge from GIMPLE_OMP_FOR to exit, which
8140 corresponds to the case that the body of the loop
8141 is not executed at all. */
8142 make_edge (cur_region->entry, bb->next_bb, EDGE_ABNORMAL);
8143 make_edge (bb, bb->next_bb, EDGE_FALLTHRU | EDGE_ABNORMAL);
8144 fallthru = false;
8145 break;
8146
8147 case GIMPLE_OMP_SECTIONS:
8148 /* Wire up the edges into and out of the nested sections. */
8149 {
8150 basic_block switch_bb = single_succ (cur_region->entry);
8151
8152 struct omp_region *i;
8153 for (i = cur_region->inner; i ; i = i->next)
8154 {
8155 gcc_assert (i->type == GIMPLE_OMP_SECTION);
8156 make_edge (switch_bb, i->entry, 0);
8157 make_edge (i->exit, bb, EDGE_FALLTHRU);
8158 }
8159
8160 /* Make the loopback edge to the block with
8161 GIMPLE_OMP_SECTIONS_SWITCH. */
8162 make_edge (bb, switch_bb, 0);
8163
8164 /* Make the edge from the switch to exit. */
8165 make_edge (switch_bb, bb->next_bb, 0);
8166 fallthru = false;
8167 }
8168 break;
8169
8170 case GIMPLE_OMP_TASK:
8171 fallthru = true;
8172 break;
8173
8174 default:
8175 gcc_unreachable ();
8176 }
8177 break;
8178
8179 default:
8180 gcc_unreachable ();
8181 }
8182
8183 if (*region != cur_region)
8184 {
8185 *region = cur_region;
8186 if (cur_region)
8187 *region_idx = cur_region->entry->index;
8188 else
8189 *region_idx = 0;
8190 }
8191
8192 return fallthru;
8193 }
8194
8195 #include "gt-omp-expand.h"