]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
tree-parloops.c (report_ploop_op): Copy from report_vect_op.
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "params.h"
56 #include "params-enum.h"
57 #include "tree-ssa-alias.h"
58 #include "tree-eh.h"
59 #include "gomp-constants.h"
60 #include "tree-dfa.h"
61 #include "stringpool.h"
62 #include "attribs.h"
63
64 /* This pass tries to distribute iterations of loops into several threads.
65 The implementation is straightforward -- for each loop we test whether its
66 iterations are independent, and if it is the case (and some additional
67 conditions regarding profitability and correctness are satisfied), we
68 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
69 machinery do its job.
70
71 The most of the complexity is in bringing the code into shape expected
72 by the omp expanders:
73 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
74 variable and that the exit test is at the start of the loop body
75 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
76 variables by accesses through pointers, and breaking up ssa chains
77 by storing the values incoming to the parallelized loop to a structure
78 passed to the new function as an argument (something similar is done
79 in omp gimplification, unfortunately only a small part of the code
80 can be shared).
81
82 TODO:
83 -- if there are several parallelizable loops in a function, it may be
84 possible to generate the threads just once (using synchronization to
85 ensure that cross-loop dependences are obeyed).
86 -- handling of common reduction patterns for outer loops.
87
88 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
89 /*
90 Reduction handling:
91 currently we use code inspired by vect_force_simple_reduction to detect
92 reduction patterns.
93 The code transformation will be introduced by an example.
94
95
96 parloop
97 {
98 int sum=1;
99
100 for (i = 0; i < N; i++)
101 {
102 x[i] = i + 3;
103 sum+=x[i];
104 }
105 }
106
107 gimple-like code:
108 header_bb:
109
110 # sum_29 = PHI <sum_11(5), 1(3)>
111 # i_28 = PHI <i_12(5), 0(3)>
112 D.1795_8 = i_28 + 3;
113 x[i_28] = D.1795_8;
114 sum_11 = D.1795_8 + sum_29;
115 i_12 = i_28 + 1;
116 if (N_6(D) > i_12)
117 goto header_bb;
118
119
120 exit_bb:
121
122 # sum_21 = PHI <sum_11(4)>
123 printf (&"%d"[0], sum_21);
124
125
126 after reduction transformation (only relevant parts):
127
128 parloop
129 {
130
131 ....
132
133
134 # Storing the initial value given by the user. #
135
136 .paral_data_store.32.sum.27 = 1;
137
138 #pragma omp parallel num_threads(4)
139
140 #pragma omp for schedule(static)
141
142 # The neutral element corresponding to the particular
143 reduction's operation, e.g. 0 for PLUS_EXPR,
144 1 for MULT_EXPR, etc. replaces the user's initial value. #
145
146 # sum.27_29 = PHI <sum.27_11, 0>
147
148 sum.27_11 = D.1827_8 + sum.27_29;
149
150 GIMPLE_OMP_CONTINUE
151
152 # Adding this reduction phi is done at create_phi_for_local_result() #
153 # sum.27_56 = PHI <sum.27_11, 0>
154 GIMPLE_OMP_RETURN
155
156 # Creating the atomic operation is done at
157 create_call_for_reduction_1() #
158
159 #pragma omp atomic_load
160 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
161 D.1840_60 = sum.27_56 + D.1839_59;
162 #pragma omp atomic_store (D.1840_60);
163
164 GIMPLE_OMP_RETURN
165
166 # collecting the result after the join of the threads is done at
167 create_loads_for_reductions().
168 The value computed by the threads is loaded from the
169 shared struct. #
170
171
172 .paral_data_load.33_52 = &.paral_data_store.32;
173 sum_37 = .paral_data_load.33_52->sum.27;
174 sum_43 = D.1795_41 + sum_37;
175
176 exit bb:
177 # sum_21 = PHI <sum_43, sum_26>
178 printf (&"%d"[0], sum_21);
179
180 ...
181
182 }
183
184 */
185
186 /* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
187 statement STMT is printed with a message MSG. */
188
189 static void
190 report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
191 {
192 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
193 }
194
195 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
196 operation. Return true if the results of DEF_STMT_INFO are something
197 that can be accumulated by such a reduction. */
198
199 static bool
200 parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
201 {
202 return (is_gimple_assign (def_stmt_info->stmt)
203 || is_gimple_call (def_stmt_info->stmt)
204 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
205 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
206 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
207 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
208 }
209
210 /* Detect SLP reduction of the form:
211
212 #a1 = phi <a5, a0>
213 a2 = operation (a1)
214 a3 = operation (a2)
215 a4 = operation (a3)
216 a5 = operation (a4)
217
218 #a = phi <a5>
219
220 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
221 FIRST_STMT is the first reduction stmt in the chain
222 (a2 = operation (a1)).
223
224 Return TRUE if a reduction chain was detected. */
225
226 static bool
227 parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
228 gimple *first_stmt)
229 {
230 class loop *loop = (gimple_bb (phi))->loop_father;
231 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
232 enum tree_code code;
233 gimple *loop_use_stmt = NULL;
234 stmt_vec_info use_stmt_info;
235 tree lhs;
236 imm_use_iterator imm_iter;
237 use_operand_p use_p;
238 int nloop_uses, size = 0, n_out_of_loop_uses;
239 bool found = false;
240
241 if (loop != vect_loop)
242 return false;
243
244 auto_vec<stmt_vec_info, 8> reduc_chain;
245 lhs = PHI_RESULT (phi);
246 code = gimple_assign_rhs_code (first_stmt);
247 while (1)
248 {
249 nloop_uses = 0;
250 n_out_of_loop_uses = 0;
251 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
252 {
253 gimple *use_stmt = USE_STMT (use_p);
254 if (is_gimple_debug (use_stmt))
255 continue;
256
257 /* Check if we got back to the reduction phi. */
258 if (use_stmt == phi)
259 {
260 loop_use_stmt = use_stmt;
261 found = true;
262 break;
263 }
264
265 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
266 {
267 loop_use_stmt = use_stmt;
268 nloop_uses++;
269 }
270 else
271 n_out_of_loop_uses++;
272
273 /* There are can be either a single use in the loop or two uses in
274 phi nodes. */
275 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
276 return false;
277 }
278
279 if (found)
280 break;
281
282 /* We reached a statement with no loop uses. */
283 if (nloop_uses == 0)
284 return false;
285
286 /* This is a loop exit phi, and we haven't reached the reduction phi. */
287 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
288 return false;
289
290 if (!is_gimple_assign (loop_use_stmt)
291 || code != gimple_assign_rhs_code (loop_use_stmt)
292 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
293 return false;
294
295 /* Insert USE_STMT into reduction chain. */
296 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
297 reduc_chain.safe_push (use_stmt_info);
298
299 lhs = gimple_assign_lhs (loop_use_stmt);
300 size++;
301 }
302
303 if (!found || loop_use_stmt != phi || size < 2)
304 return false;
305
306 /* Swap the operands, if needed, to make the reduction operand be the second
307 operand. */
308 lhs = PHI_RESULT (phi);
309 for (unsigned i = 0; i < reduc_chain.length (); ++i)
310 {
311 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
312 if (gimple_assign_rhs2 (next_stmt) == lhs)
313 {
314 tree op = gimple_assign_rhs1 (next_stmt);
315 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
316
317 /* Check that the other def is either defined in the loop
318 ("vect_internal_def"), or it's an induction (defined by a
319 loop-header phi-node). */
320 if (def_stmt_info
321 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
322 && parloops_valid_reduction_input_p (def_stmt_info))
323 {
324 lhs = gimple_assign_lhs (next_stmt);
325 continue;
326 }
327
328 return false;
329 }
330 else
331 {
332 tree op = gimple_assign_rhs2 (next_stmt);
333 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
334
335 /* Check that the other def is either defined in the loop
336 ("vect_internal_def"), or it's an induction (defined by a
337 loop-header phi-node). */
338 if (def_stmt_info
339 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
340 && parloops_valid_reduction_input_p (def_stmt_info))
341 {
342 if (dump_enabled_p ())
343 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
344 next_stmt);
345
346 swap_ssa_operands (next_stmt,
347 gimple_assign_rhs1_ptr (next_stmt),
348 gimple_assign_rhs2_ptr (next_stmt));
349 update_stmt (next_stmt);
350
351 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
352 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
353 }
354 else
355 return false;
356 }
357
358 lhs = gimple_assign_lhs (next_stmt);
359 }
360
361 /* Build up the actual chain. */
362 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
363 {
364 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
365 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
366 }
367 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
368 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
369
370 /* Save the chain for further analysis in SLP detection. */
371 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
372 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
373
374 return true;
375 }
376
377 /* Return true if we need an in-order reduction for operation CODE
378 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
379 overflow must wrap. */
380
381 static bool
382 parloops_needs_fold_left_reduction_p (tree type, tree_code code,
383 bool need_wrapping_integral_overflow)
384 {
385 /* CHECKME: check for !flag_finite_math_only too? */
386 if (SCALAR_FLOAT_TYPE_P (type))
387 switch (code)
388 {
389 case MIN_EXPR:
390 case MAX_EXPR:
391 return false;
392
393 default:
394 return !flag_associative_math;
395 }
396
397 if (INTEGRAL_TYPE_P (type))
398 {
399 if (!operation_no_trapping_overflow (type, code))
400 return true;
401 if (need_wrapping_integral_overflow
402 && !TYPE_OVERFLOW_WRAPS (type)
403 && operation_can_overflow (code))
404 return true;
405 return false;
406 }
407
408 if (SAT_FIXED_POINT_TYPE_P (type))
409 return true;
410
411 return false;
412 }
413
414
415 /* Function parloops_is_simple_reduction
416
417 (1) Detect a cross-iteration def-use cycle that represents a simple
418 reduction computation. We look for the following pattern:
419
420 loop_header:
421 a1 = phi < a0, a2 >
422 a3 = ...
423 a2 = operation (a3, a1)
424
425 or
426
427 a3 = ...
428 loop_header:
429 a1 = phi < a0, a2 >
430 a2 = operation (a3, a1)
431
432 such that:
433 1. operation is commutative and associative and it is safe to
434 change the order of the computation
435 2. no uses for a2 in the loop (a2 is used out of the loop)
436 3. no uses of a1 in the loop besides the reduction operation
437 4. no uses of a1 outside the loop.
438
439 Conditions 1,4 are tested here.
440 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
441
442 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
443 nested cycles.
444
445 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
446 reductions:
447
448 a1 = phi < a0, a2 >
449 inner loop (def of a3)
450 a2 = phi < a3 >
451
452 (4) Detect condition expressions, ie:
453 for (int i = 0; i < N; i++)
454 if (a[i] < val)
455 ret_val = a[i];
456
457 */
458
459 static stmt_vec_info
460 parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
461 bool *double_reduc,
462 bool need_wrapping_integral_overflow,
463 enum vect_reduction_type *v_reduc_type)
464 {
465 gphi *phi = as_a <gphi *> (phi_info->stmt);
466 class loop *loop = (gimple_bb (phi))->loop_father;
467 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
468 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
469 gimple *phi_use_stmt = NULL;
470 enum tree_code orig_code, code;
471 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
472 tree type;
473 tree name;
474 imm_use_iterator imm_iter;
475 use_operand_p use_p;
476 bool phi_def;
477
478 *double_reduc = false;
479 *v_reduc_type = TREE_CODE_REDUCTION;
480
481 tree phi_name = PHI_RESULT (phi);
482 /* ??? If there are no uses of the PHI result the inner loop reduction
483 won't be detected as possibly double-reduction by vectorizable_reduction
484 because that tries to walk the PHI arg from the preheader edge which
485 can be constant. See PR60382. */
486 if (has_zero_uses (phi_name))
487 return NULL;
488 unsigned nphi_def_loop_uses = 0;
489 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
490 {
491 gimple *use_stmt = USE_STMT (use_p);
492 if (is_gimple_debug (use_stmt))
493 continue;
494
495 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
496 {
497 if (dump_enabled_p ())
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "intermediate value used outside loop.\n");
500
501 return NULL;
502 }
503
504 nphi_def_loop_uses++;
505 phi_use_stmt = use_stmt;
506 }
507
508 edge latch_e = loop_latch_edge (loop);
509 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
510 if (TREE_CODE (loop_arg) != SSA_NAME)
511 {
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 "reduction: not ssa_name: %T\n", loop_arg);
515 return NULL;
516 }
517
518 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
519 if (!def_stmt_info
520 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
521 return NULL;
522
523 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
524 {
525 name = gimple_assign_lhs (def_stmt);
526 phi_def = false;
527 }
528 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
529 {
530 name = PHI_RESULT (def_stmt);
531 phi_def = true;
532 }
533 else
534 {
535 if (dump_enabled_p ())
536 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
537 "reduction: unhandled reduction operation: %G",
538 def_stmt_info->stmt);
539 return NULL;
540 }
541
542 unsigned nlatch_def_loop_uses = 0;
543 auto_vec<gphi *, 3> lcphis;
544 bool inner_loop_of_double_reduc = false;
545 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
546 {
547 gimple *use_stmt = USE_STMT (use_p);
548 if (is_gimple_debug (use_stmt))
549 continue;
550 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
551 nlatch_def_loop_uses++;
552 else
553 {
554 /* We can have more than one loop-closed PHI. */
555 lcphis.safe_push (as_a <gphi *> (use_stmt));
556 if (nested_in_vect_loop
557 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
558 == vect_double_reduction_def))
559 inner_loop_of_double_reduc = true;
560 }
561 }
562
563 /* If this isn't a nested cycle or if the nested cycle reduction value
564 is used ouside of the inner loop we cannot handle uses of the reduction
565 value. */
566 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
567 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
568 {
569 if (dump_enabled_p ())
570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571 "reduction used in loop.\n");
572 return NULL;
573 }
574
575 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
576 defined in the inner loop. */
577 if (phi_def)
578 {
579 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
580 op1 = PHI_ARG_DEF (def_stmt, 0);
581
582 if (gimple_phi_num_args (def_stmt) != 1
583 || TREE_CODE (op1) != SSA_NAME)
584 {
585 if (dump_enabled_p ())
586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
587 "unsupported phi node definition.\n");
588
589 return NULL;
590 }
591
592 gimple *def1 = SSA_NAME_DEF_STMT (op1);
593 if (gimple_bb (def1)
594 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
595 && loop->inner
596 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
597 && is_gimple_assign (def1)
598 && is_a <gphi *> (phi_use_stmt)
599 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
600 {
601 if (dump_enabled_p ())
602 report_ploop_op (MSG_NOTE, def_stmt,
603 "detected double reduction: ");
604
605 *double_reduc = true;
606 return def_stmt_info;
607 }
608
609 return NULL;
610 }
611
612 /* If we are vectorizing an inner reduction we are executing that
613 in the original order only in case we are not dealing with a
614 double reduction. */
615 bool check_reduction = true;
616 if (flow_loop_nested_p (vect_loop, loop))
617 {
618 gphi *lcphi;
619 unsigned i;
620 check_reduction = false;
621 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
622 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
623 {
624 gimple *use_stmt = USE_STMT (use_p);
625 if (is_gimple_debug (use_stmt))
626 continue;
627 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
628 check_reduction = true;
629 }
630 }
631
632 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
633 code = orig_code = gimple_assign_rhs_code (def_stmt);
634
635 if (nested_in_vect_loop && !check_reduction)
636 {
637 /* FIXME: Even for non-reductions code generation is funneled
638 through vectorizable_reduction for the stmt defining the
639 PHI latch value. So we have to artificially restrict ourselves
640 for the supported operations. */
641 switch (get_gimple_rhs_class (code))
642 {
643 case GIMPLE_BINARY_RHS:
644 case GIMPLE_TERNARY_RHS:
645 break;
646 default:
647 /* Not supported by vectorizable_reduction. */
648 if (dump_enabled_p ())
649 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
650 "nested cycle: not handled operation: ");
651 return NULL;
652 }
653 if (dump_enabled_p ())
654 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
655 return def_stmt_info;
656 }
657
658 /* We can handle "res -= x[i]", which is non-associative by
659 simply rewriting this into "res += -x[i]". Avoid changing
660 gimple instruction for the first simple tests and only do this
661 if we're allowed to change code at all. */
662 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
663 code = PLUS_EXPR;
664
665 if (code == COND_EXPR)
666 {
667 if (! nested_in_vect_loop)
668 *v_reduc_type = COND_REDUCTION;
669
670 op3 = gimple_assign_rhs1 (def_stmt);
671 if (COMPARISON_CLASS_P (op3))
672 {
673 op4 = TREE_OPERAND (op3, 1);
674 op3 = TREE_OPERAND (op3, 0);
675 }
676 if (op3 == phi_name || op4 == phi_name)
677 {
678 if (dump_enabled_p ())
679 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
680 "reduction: condition depends on previous"
681 " iteration: ");
682 return NULL;
683 }
684
685 op1 = gimple_assign_rhs2 (def_stmt);
686 op2 = gimple_assign_rhs3 (def_stmt);
687 }
688 else if (!commutative_tree_code (code) || !associative_tree_code (code))
689 {
690 if (dump_enabled_p ())
691 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
692 "reduction: not commutative/associative: ");
693 return NULL;
694 }
695 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
696 {
697 op1 = gimple_assign_rhs1 (def_stmt);
698 op2 = gimple_assign_rhs2 (def_stmt);
699 }
700 else
701 {
702 if (dump_enabled_p ())
703 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
704 "reduction: not handled operation: ");
705 return NULL;
706 }
707
708 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
709 {
710 if (dump_enabled_p ())
711 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
712 "reduction: both uses not ssa_names: ");
713
714 return NULL;
715 }
716
717 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
718 if ((TREE_CODE (op1) == SSA_NAME
719 && !types_compatible_p (type,TREE_TYPE (op1)))
720 || (TREE_CODE (op2) == SSA_NAME
721 && !types_compatible_p (type, TREE_TYPE (op2)))
722 || (op3 && TREE_CODE (op3) == SSA_NAME
723 && !types_compatible_p (type, TREE_TYPE (op3)))
724 || (op4 && TREE_CODE (op4) == SSA_NAME
725 && !types_compatible_p (type, TREE_TYPE (op4))))
726 {
727 if (dump_enabled_p ())
728 {
729 dump_printf_loc (MSG_NOTE, vect_location,
730 "reduction: multiple types: operation type: "
731 "%T, operands types: %T,%T",
732 type, TREE_TYPE (op1), TREE_TYPE (op2));
733 if (op3)
734 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
735
736 if (op4)
737 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
738 dump_printf (MSG_NOTE, "\n");
739 }
740
741 return NULL;
742 }
743
744 /* Check whether it's ok to change the order of the computation.
745 Generally, when vectorizing a reduction we change the order of the
746 computation. This may change the behavior of the program in some
747 cases, so we need to check that this is ok. One exception is when
748 vectorizing an outer-loop: the inner-loop is executed sequentially,
749 and therefore vectorizing reductions in the inner-loop during
750 outer-loop vectorization is safe. */
751 if (check_reduction
752 && *v_reduc_type == TREE_CODE_REDUCTION
753 && parloops_needs_fold_left_reduction_p (type, code,
754 need_wrapping_integral_overflow))
755 *v_reduc_type = FOLD_LEFT_REDUCTION;
756
757 /* Reduction is safe. We're dealing with one of the following:
758 1) integer arithmetic and no trapv
759 2) floating point arithmetic, and special flags permit this optimization
760 3) nested cycle (i.e., outer loop vectorization). */
761 stmt_vec_info def1_info = loop_info->lookup_def (op1);
762 stmt_vec_info def2_info = loop_info->lookup_def (op2);
763 if (code != COND_EXPR && !def1_info && !def2_info)
764 {
765 if (dump_enabled_p ())
766 report_ploop_op (MSG_NOTE, def_stmt,
767 "reduction: no defs for operands: ");
768 return NULL;
769 }
770
771 /* Check that one def is the reduction def, defined by PHI,
772 the other def is either defined in the loop ("vect_internal_def"),
773 or it's an induction (defined by a loop-header phi-node). */
774
775 if (def2_info
776 && def2_info->stmt == phi
777 && (code == COND_EXPR
778 || !def1_info
779 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
780 || parloops_valid_reduction_input_p (def1_info)))
781 {
782 if (dump_enabled_p ())
783 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
784 return def_stmt_info;
785 }
786
787 if (def1_info
788 && def1_info->stmt == phi
789 && (code == COND_EXPR
790 || !def2_info
791 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
792 || parloops_valid_reduction_input_p (def2_info)))
793 {
794 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
795 {
796 /* Check if we can swap operands (just for simplicity - so that
797 the rest of the code can assume that the reduction variable
798 is always the last (second) argument). */
799 if (code == COND_EXPR)
800 {
801 /* Swap cond_expr by inverting the condition. */
802 tree cond_expr = gimple_assign_rhs1 (def_stmt);
803 enum tree_code invert_code = ERROR_MARK;
804 enum tree_code cond_code = TREE_CODE (cond_expr);
805
806 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
807 {
808 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
809 invert_code = invert_tree_comparison (cond_code, honor_nans);
810 }
811 if (invert_code != ERROR_MARK)
812 {
813 TREE_SET_CODE (cond_expr, invert_code);
814 swap_ssa_operands (def_stmt,
815 gimple_assign_rhs2_ptr (def_stmt),
816 gimple_assign_rhs3_ptr (def_stmt));
817 }
818 else
819 {
820 if (dump_enabled_p ())
821 report_ploop_op (MSG_NOTE, def_stmt,
822 "detected reduction: cannot swap operands "
823 "for cond_expr");
824 return NULL;
825 }
826 }
827 else
828 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
829 gimple_assign_rhs2_ptr (def_stmt));
830
831 if (dump_enabled_p ())
832 report_ploop_op (MSG_NOTE, def_stmt,
833 "detected reduction: need to swap operands: ");
834
835 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
836 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
837 }
838 else
839 {
840 if (dump_enabled_p ())
841 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
842 }
843
844 return def_stmt_info;
845 }
846
847 /* Try to find SLP reduction chain. */
848 if (! nested_in_vect_loop
849 && code != COND_EXPR
850 && orig_code != MINUS_EXPR
851 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
852 {
853 if (dump_enabled_p ())
854 report_ploop_op (MSG_NOTE, def_stmt,
855 "reduction: detected reduction chain: ");
856
857 return def_stmt_info;
858 }
859
860 /* Look for the expression computing loop_arg from loop PHI result. */
861 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
862 return def_stmt_info;
863
864 if (dump_enabled_p ())
865 {
866 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
867 "reduction: unknown pattern: ");
868 }
869
870 return NULL;
871 }
872
873 /* Wrapper around vect_is_simple_reduction, which will modify code
874 in-place if it enables detection of more reductions. Arguments
875 as there. */
876
877 stmt_vec_info
878 parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
879 bool *double_reduc,
880 bool need_wrapping_integral_overflow)
881 {
882 enum vect_reduction_type v_reduc_type;
883 stmt_vec_info def_info
884 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
885 need_wrapping_integral_overflow,
886 &v_reduc_type);
887 if (def_info)
888 {
889 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
890 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
891 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
892 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
893 }
894 return def_info;
895 }
896
897 /* Minimal number of iterations of a loop that should be executed in each
898 thread. */
899 #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
900
901 /* Element of the hashtable, representing a
902 reduction in the current loop. */
903 struct reduction_info
904 {
905 gimple *reduc_stmt; /* reduction statement. */
906 gimple *reduc_phi; /* The phi node defining the reduction. */
907 enum tree_code reduction_code;/* code for the reduction operation. */
908 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
909 result. */
910 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
911 of the reduction variable when existing the loop. */
912 tree initial_value; /* The initial value of the reduction var before entering the loop. */
913 tree field; /* the name of the field in the parloop data structure intended for reduction. */
914 tree reduc_addr; /* The address of the reduction variable for
915 openacc reductions. */
916 tree init; /* reduction initialization value. */
917 gphi *new_phi; /* (helper field) Newly created phi node whose result
918 will be passed to the atomic operation. Represents
919 the local result each thread computed for the reduction
920 operation. */
921 };
922
923 /* Reduction info hashtable helpers. */
924
925 struct reduction_hasher : free_ptr_hash <reduction_info>
926 {
927 static inline hashval_t hash (const reduction_info *);
928 static inline bool equal (const reduction_info *, const reduction_info *);
929 };
930
931 /* Equality and hash functions for hashtab code. */
932
933 inline bool
934 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
935 {
936 return (a->reduc_phi == b->reduc_phi);
937 }
938
939 inline hashval_t
940 reduction_hasher::hash (const reduction_info *a)
941 {
942 return a->reduc_version;
943 }
944
945 typedef hash_table<reduction_hasher> reduction_info_table_type;
946
947
948 static struct reduction_info *
949 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
950 {
951 struct reduction_info tmpred, *red;
952
953 if (reduction_list->is_empty () || phi == NULL)
954 return NULL;
955
956 if (gimple_uid (phi) == (unsigned int)-1
957 || gimple_uid (phi) == 0)
958 return NULL;
959
960 tmpred.reduc_phi = phi;
961 tmpred.reduc_version = gimple_uid (phi);
962 red = reduction_list->find (&tmpred);
963 gcc_assert (red == NULL || red->reduc_phi == phi);
964
965 return red;
966 }
967
968 /* Element of hashtable of names to copy. */
969
970 struct name_to_copy_elt
971 {
972 unsigned version; /* The version of the name to copy. */
973 tree new_name; /* The new name used in the copy. */
974 tree field; /* The field of the structure used to pass the
975 value. */
976 };
977
978 /* Name copies hashtable helpers. */
979
980 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
981 {
982 static inline hashval_t hash (const name_to_copy_elt *);
983 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
984 };
985
986 /* Equality and hash functions for hashtab code. */
987
988 inline bool
989 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
990 {
991 return a->version == b->version;
992 }
993
994 inline hashval_t
995 name_to_copy_hasher::hash (const name_to_copy_elt *a)
996 {
997 return (hashval_t) a->version;
998 }
999
1000 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
1001
1002 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
1003 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
1004 represents the denominator for every element in the matrix. */
1005 typedef struct lambda_trans_matrix_s
1006 {
1007 lambda_matrix matrix;
1008 int rowsize;
1009 int colsize;
1010 int denominator;
1011 } *lambda_trans_matrix;
1012 #define LTM_MATRIX(T) ((T)->matrix)
1013 #define LTM_ROWSIZE(T) ((T)->rowsize)
1014 #define LTM_COLSIZE(T) ((T)->colsize)
1015 #define LTM_DENOMINATOR(T) ((T)->denominator)
1016
1017 /* Allocate a new transformation matrix. */
1018
1019 static lambda_trans_matrix
1020 lambda_trans_matrix_new (int colsize, int rowsize,
1021 struct obstack * lambda_obstack)
1022 {
1023 lambda_trans_matrix ret;
1024
1025 ret = (lambda_trans_matrix)
1026 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1027 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1028 LTM_ROWSIZE (ret) = rowsize;
1029 LTM_COLSIZE (ret) = colsize;
1030 LTM_DENOMINATOR (ret) = 1;
1031 return ret;
1032 }
1033
1034 /* Multiply a vector VEC by a matrix MAT.
1035 MAT is an M*N matrix, and VEC is a vector with length N. The result
1036 is stored in DEST which must be a vector of length M. */
1037
1038 static void
1039 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1040 lambda_vector vec, lambda_vector dest)
1041 {
1042 int i, j;
1043
1044 lambda_vector_clear (dest, m);
1045 for (i = 0; i < m; i++)
1046 for (j = 0; j < n; j++)
1047 dest[i] += matrix[i][j] * vec[j];
1048 }
1049
1050 /* Return true if TRANS is a legal transformation matrix that respects
1051 the dependence vectors in DISTS and DIRS. The conservative answer
1052 is false.
1053
1054 "Wolfe proves that a unimodular transformation represented by the
1055 matrix T is legal when applied to a loop nest with a set of
1056 lexicographically non-negative distance vectors RDG if and only if
1057 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1058 i.e.: if and only if it transforms the lexicographically positive
1059 distance vectors to lexicographically positive vectors. Note that
1060 a unimodular matrix must transform the zero vector (and only it) to
1061 the zero vector." S.Muchnick. */
1062
1063 static bool
1064 lambda_transform_legal_p (lambda_trans_matrix trans,
1065 int nb_loops,
1066 vec<ddr_p> dependence_relations)
1067 {
1068 unsigned int i, j;
1069 lambda_vector distres;
1070 struct data_dependence_relation *ddr;
1071
1072 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1073 && LTM_ROWSIZE (trans) == nb_loops);
1074
1075 /* When there are no dependences, the transformation is correct. */
1076 if (dependence_relations.length () == 0)
1077 return true;
1078
1079 ddr = dependence_relations[0];
1080 if (ddr == NULL)
1081 return true;
1082
1083 /* When there is an unknown relation in the dependence_relations, we
1084 know that it is no worth looking at this loop nest: give up. */
1085 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1086 return false;
1087
1088 distres = lambda_vector_new (nb_loops);
1089
1090 /* For each distance vector in the dependence graph. */
1091 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
1092 {
1093 /* Don't care about relations for which we know that there is no
1094 dependence, nor about read-read (aka. output-dependences):
1095 these data accesses can happen in any order. */
1096 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1097 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1098 continue;
1099
1100 /* Conservatively answer: "this transformation is not valid". */
1101 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1102 return false;
1103
1104 /* If the dependence could not be captured by a distance vector,
1105 conservatively answer that the transform is not valid. */
1106 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1107 return false;
1108
1109 /* Compute trans.dist_vect */
1110 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1111 {
1112 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1113 DDR_DIST_VECT (ddr, j), distres);
1114
1115 if (!lambda_vector_lexico_pos (distres, nb_loops))
1116 return false;
1117 }
1118 }
1119 return true;
1120 }
1121
1122 /* Data dependency analysis. Returns true if the iterations of LOOP
1123 are independent on each other (that is, if we can execute them
1124 in parallel). */
1125
1126 static bool
1127 loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
1128 {
1129 vec<ddr_p> dependence_relations;
1130 vec<data_reference_p> datarefs;
1131 lambda_trans_matrix trans;
1132 bool ret = false;
1133
1134 if (dump_file && (dump_flags & TDF_DETAILS))
1135 {
1136 fprintf (dump_file, "Considering loop %d\n", loop->num);
1137 if (!loop->inner)
1138 fprintf (dump_file, "loop is innermost\n");
1139 else
1140 fprintf (dump_file, "loop NOT innermost\n");
1141 }
1142
1143 /* Check for problems with dependences. If the loop can be reversed,
1144 the iterations are independent. */
1145 auto_vec<loop_p, 3> loop_nest;
1146 datarefs.create (10);
1147 dependence_relations.create (100);
1148 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1149 &dependence_relations))
1150 {
1151 if (dump_file && (dump_flags & TDF_DETAILS))
1152 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1153 ret = false;
1154 goto end;
1155 }
1156 if (dump_file && (dump_flags & TDF_DETAILS))
1157 dump_data_dependence_relations (dump_file, dependence_relations);
1158
1159 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
1160 LTM_MATRIX (trans)[0][0] = -1;
1161
1162 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1163 {
1164 ret = true;
1165 if (dump_file && (dump_flags & TDF_DETAILS))
1166 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1167 }
1168 else if (dump_file && (dump_flags & TDF_DETAILS))
1169 fprintf (dump_file,
1170 " FAILED: data dependencies exist across iterations\n");
1171
1172 end:
1173 free_dependence_relations (dependence_relations);
1174 free_data_refs (datarefs);
1175
1176 return ret;
1177 }
1178
1179 /* Return true when LOOP contains basic blocks marked with the
1180 BB_IRREDUCIBLE_LOOP flag. */
1181
1182 static inline bool
1183 loop_has_blocks_with_irreducible_flag (class loop *loop)
1184 {
1185 unsigned i;
1186 basic_block *bbs = get_loop_body_in_dom_order (loop);
1187 bool res = true;
1188
1189 for (i = 0; i < loop->num_nodes; i++)
1190 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1191 goto end;
1192
1193 res = false;
1194 end:
1195 free (bbs);
1196 return res;
1197 }
1198
1199 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
1200 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
1201 to their addresses that can be reused. The address of OBJ is known to
1202 be invariant in the whole function. Other needed statements are placed
1203 right before GSI. */
1204
1205 static tree
1206 take_address_of (tree obj, tree type, edge entry,
1207 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
1208 {
1209 int uid;
1210 tree *var_p, name, addr;
1211 gassign *stmt;
1212 gimple_seq stmts;
1213
1214 /* Since the address of OBJ is invariant, the trees may be shared.
1215 Avoid rewriting unrelated parts of the code. */
1216 obj = unshare_expr (obj);
1217 for (var_p = &obj;
1218 handled_component_p (*var_p);
1219 var_p = &TREE_OPERAND (*var_p, 0))
1220 continue;
1221
1222 /* Canonicalize the access to base on a MEM_REF. */
1223 if (DECL_P (*var_p))
1224 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1225
1226 /* Assign a canonical SSA name to the address of the base decl used
1227 in the address and share it for all accesses and addresses based
1228 on it. */
1229 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1230 int_tree_map elt;
1231 elt.uid = uid;
1232 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1233 if (!slot->to)
1234 {
1235 if (gsi == NULL)
1236 return NULL;
1237 addr = TREE_OPERAND (*var_p, 0);
1238 const char *obj_name
1239 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1240 if (obj_name)
1241 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1242 else
1243 name = make_ssa_name (TREE_TYPE (addr));
1244 stmt = gimple_build_assign (name, addr);
1245 gsi_insert_on_edge_immediate (entry, stmt);
1246
1247 slot->uid = uid;
1248 slot->to = name;
1249 }
1250 else
1251 name = slot->to;
1252
1253 /* Express the address in terms of the canonical SSA name. */
1254 TREE_OPERAND (*var_p, 0) = name;
1255 if (gsi == NULL)
1256 return build_fold_addr_expr_with_type (obj, type);
1257
1258 name = force_gimple_operand (build_addr (obj),
1259 &stmts, true, NULL_TREE);
1260 if (!gimple_seq_empty_p (stmts))
1261 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1262
1263 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
1264 {
1265 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
1266 NULL_TREE);
1267 if (!gimple_seq_empty_p (stmts))
1268 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
1269 }
1270
1271 return name;
1272 }
1273
1274 static tree
1275 reduc_stmt_res (gimple *stmt)
1276 {
1277 return (gimple_code (stmt) == GIMPLE_PHI
1278 ? gimple_phi_result (stmt)
1279 : gimple_assign_lhs (stmt));
1280 }
1281
1282 /* Callback for htab_traverse. Create the initialization statement
1283 for reduction described in SLOT, and place it at the preheader of
1284 the loop described in DATA. */
1285
1286 int
1287 initialize_reductions (reduction_info **slot, class loop *loop)
1288 {
1289 tree init;
1290 tree type, arg;
1291 edge e;
1292
1293 struct reduction_info *const reduc = *slot;
1294
1295 /* Create initialization in preheader:
1296 reduction_variable = initialization value of reduction. */
1297
1298 /* In the phi node at the header, replace the argument coming
1299 from the preheader with the reduction initialization value. */
1300
1301 /* Initialize the reduction. */
1302 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1303 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1304 reduc->reduction_code, type);
1305 reduc->init = init;
1306
1307 /* Replace the argument representing the initialization value
1308 with the initialization value for the reduction (neutral
1309 element for the particular operation, e.g. 0 for PLUS_EXPR,
1310 1 for MULT_EXPR, etc).
1311 Keep the old value in a new variable "reduction_initial",
1312 that will be taken in consideration after the parallel
1313 computing is done. */
1314
1315 e = loop_preheader_edge (loop);
1316 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1317 /* Create new variable to hold the initial value. */
1318
1319 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
1320 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
1321 reduc->initial_value = arg;
1322 return 1;
1323 }
1324
1325 struct elv_data
1326 {
1327 struct walk_stmt_info info;
1328 edge entry;
1329 int_tree_htab_type *decl_address;
1330 gimple_stmt_iterator *gsi;
1331 bool changed;
1332 bool reset;
1333 };
1334
1335 /* Eliminates references to local variables in *TP out of the single
1336 entry single exit region starting at DTA->ENTRY.
1337 DECL_ADDRESS contains addresses of the references that had their
1338 address taken already. If the expression is changed, CHANGED is
1339 set to true. Callback for walk_tree. */
1340
1341 static tree
1342 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
1343 {
1344 struct elv_data *const dta = (struct elv_data *) data;
1345 tree t = *tp, var, addr, addr_type, type, obj;
1346
1347 if (DECL_P (t))
1348 {
1349 *walk_subtrees = 0;
1350
1351 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1352 return NULL_TREE;
1353
1354 type = TREE_TYPE (t);
1355 addr_type = build_pointer_type (type);
1356 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1357 dta->gsi);
1358 if (dta->gsi == NULL && addr == NULL_TREE)
1359 {
1360 dta->reset = true;
1361 return NULL_TREE;
1362 }
1363
1364 *tp = build_simple_mem_ref (addr);
1365
1366 dta->changed = true;
1367 return NULL_TREE;
1368 }
1369
1370 if (TREE_CODE (t) == ADDR_EXPR)
1371 {
1372 /* ADDR_EXPR may appear in two contexts:
1373 -- as a gimple operand, when the address taken is a function invariant
1374 -- as gimple rhs, when the resulting address in not a function
1375 invariant
1376 We do not need to do anything special in the latter case (the base of
1377 the memory reference whose address is taken may be replaced in the
1378 DECL_P case). The former case is more complicated, as we need to
1379 ensure that the new address is still a gimple operand. Thus, it
1380 is not sufficient to replace just the base of the memory reference --
1381 we need to move the whole computation of the address out of the
1382 loop. */
1383 if (!is_gimple_val (t))
1384 return NULL_TREE;
1385
1386 *walk_subtrees = 0;
1387 obj = TREE_OPERAND (t, 0);
1388 var = get_base_address (obj);
1389 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
1390 return NULL_TREE;
1391
1392 addr_type = TREE_TYPE (t);
1393 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1394 dta->gsi);
1395 if (dta->gsi == NULL && addr == NULL_TREE)
1396 {
1397 dta->reset = true;
1398 return NULL_TREE;
1399 }
1400 *tp = addr;
1401
1402 dta->changed = true;
1403 return NULL_TREE;
1404 }
1405
1406 if (!EXPR_P (t))
1407 *walk_subtrees = 0;
1408
1409 return NULL_TREE;
1410 }
1411
1412 /* Moves the references to local variables in STMT at *GSI out of the single
1413 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1414 addresses of the references that had their address taken
1415 already. */
1416
1417 static void
1418 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
1419 int_tree_htab_type *decl_address)
1420 {
1421 struct elv_data dta;
1422 gimple *stmt = gsi_stmt (*gsi);
1423
1424 memset (&dta.info, '\0', sizeof (dta.info));
1425 dta.entry = entry;
1426 dta.decl_address = decl_address;
1427 dta.changed = false;
1428 dta.reset = false;
1429
1430 if (gimple_debug_bind_p (stmt))
1431 {
1432 dta.gsi = NULL;
1433 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1434 eliminate_local_variables_1, &dta.info, NULL);
1435 if (dta.reset)
1436 {
1437 gimple_debug_bind_reset_value (stmt);
1438 dta.changed = true;
1439 }
1440 }
1441 else if (gimple_clobber_p (stmt))
1442 {
1443 unlink_stmt_vdef (stmt);
1444 stmt = gimple_build_nop ();
1445 gsi_replace (gsi, stmt, false);
1446 dta.changed = true;
1447 }
1448 else
1449 {
1450 dta.gsi = gsi;
1451 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1452 }
1453
1454 if (dta.changed)
1455 update_stmt (stmt);
1456 }
1457
1458 /* Eliminates the references to local variables from the single entry
1459 single exit region between the ENTRY and EXIT edges.
1460
1461 This includes:
1462 1) Taking address of a local variable -- these are moved out of the
1463 region (and temporary variable is created to hold the address if
1464 necessary).
1465
1466 2) Dereferencing a local variable -- these are replaced with indirect
1467 references. */
1468
1469 static void
1470 eliminate_local_variables (edge entry, edge exit)
1471 {
1472 basic_block bb;
1473 auto_vec<basic_block, 3> body;
1474 unsigned i;
1475 gimple_stmt_iterator gsi;
1476 bool has_debug_stmt = false;
1477 int_tree_htab_type decl_address (10);
1478 basic_block entry_bb = entry->src;
1479 basic_block exit_bb = exit->dest;
1480
1481 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1482
1483 FOR_EACH_VEC_ELT (body, i, bb)
1484 if (bb != entry_bb && bb != exit_bb)
1485 {
1486 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1487 if (is_gimple_debug (gsi_stmt (gsi)))
1488 {
1489 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1490 has_debug_stmt = true;
1491 }
1492 else
1493 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1494 }
1495
1496 if (has_debug_stmt)
1497 FOR_EACH_VEC_ELT (body, i, bb)
1498 if (bb != entry_bb && bb != exit_bb)
1499 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1500 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1501 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1502 }
1503
1504 /* Returns true if expression EXPR is not defined between ENTRY and
1505 EXIT, i.e. if all its operands are defined outside of the region. */
1506
1507 static bool
1508 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1509 {
1510 basic_block entry_bb = entry->src;
1511 basic_block exit_bb = exit->dest;
1512 basic_block def_bb;
1513
1514 if (is_gimple_min_invariant (expr))
1515 return true;
1516
1517 if (TREE_CODE (expr) == SSA_NAME)
1518 {
1519 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1520 if (def_bb
1521 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1522 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1523 return false;
1524
1525 return true;
1526 }
1527
1528 return false;
1529 }
1530
1531 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1532 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1533 its duplicate stored in NAME_COPIES is returned.
1534
1535 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1536 duplicated, storing the copies in DECL_COPIES. */
1537
1538 static tree
1539 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1540 int_tree_htab_type *decl_copies,
1541 bool copy_name_p)
1542 {
1543 tree copy, var, var_copy;
1544 unsigned idx, uid, nuid;
1545 struct int_tree_map ielt;
1546 struct name_to_copy_elt elt, *nelt;
1547 name_to_copy_elt **slot;
1548 int_tree_map *dslot;
1549
1550 if (TREE_CODE (name) != SSA_NAME)
1551 return name;
1552
1553 idx = SSA_NAME_VERSION (name);
1554 elt.version = idx;
1555 slot = name_copies->find_slot_with_hash (&elt, idx,
1556 copy_name_p ? INSERT : NO_INSERT);
1557 if (slot && *slot)
1558 return (*slot)->new_name;
1559
1560 if (copy_name_p)
1561 {
1562 copy = duplicate_ssa_name (name, NULL);
1563 nelt = XNEW (struct name_to_copy_elt);
1564 nelt->version = idx;
1565 nelt->new_name = copy;
1566 nelt->field = NULL_TREE;
1567 *slot = nelt;
1568 }
1569 else
1570 {
1571 gcc_assert (!slot);
1572 copy = name;
1573 }
1574
1575 var = SSA_NAME_VAR (name);
1576 if (!var)
1577 return copy;
1578
1579 uid = DECL_UID (var);
1580 ielt.uid = uid;
1581 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1582 if (!dslot->to)
1583 {
1584 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
1585 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
1586 dslot->uid = uid;
1587 dslot->to = var_copy;
1588
1589 /* Ensure that when we meet this decl next time, we won't duplicate
1590 it again. */
1591 nuid = DECL_UID (var_copy);
1592 ielt.uid = nuid;
1593 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1594 gcc_assert (!dslot->to);
1595 dslot->uid = nuid;
1596 dslot->to = var_copy;
1597 }
1598 else
1599 var_copy = dslot->to;
1600
1601 replace_ssa_name_symbol (copy, var_copy);
1602 return copy;
1603 }
1604
1605 /* Finds the ssa names used in STMT that are defined outside the
1606 region between ENTRY and EXIT and replaces such ssa names with
1607 their duplicates. The duplicates are stored to NAME_COPIES. Base
1608 decls of all ssa names used in STMT (including those defined in
1609 LOOP) are replaced with the new temporary variables; the
1610 replacement decls are stored in DECL_COPIES. */
1611
1612 static void
1613 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
1614 name_to_copy_table_type *name_copies,
1615 int_tree_htab_type *decl_copies)
1616 {
1617 use_operand_p use;
1618 def_operand_p def;
1619 ssa_op_iter oi;
1620 tree name, copy;
1621 bool copy_name_p;
1622
1623 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
1624 {
1625 name = DEF_FROM_PTR (def);
1626 gcc_assert (TREE_CODE (name) == SSA_NAME);
1627 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1628 false);
1629 gcc_assert (copy == name);
1630 }
1631
1632 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1633 {
1634 name = USE_FROM_PTR (use);
1635 if (TREE_CODE (name) != SSA_NAME)
1636 continue;
1637
1638 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1639 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1640 copy_name_p);
1641 SET_USE (use, copy);
1642 }
1643 }
1644
1645 /* Finds the ssa names used in STMT that are defined outside the
1646 region between ENTRY and EXIT and replaces such ssa names with
1647 their duplicates. The duplicates are stored to NAME_COPIES. Base
1648 decls of all ssa names used in STMT (including those defined in
1649 LOOP) are replaced with the new temporary variables; the
1650 replacement decls are stored in DECL_COPIES. */
1651
1652 static bool
1653 separate_decls_in_region_debug (gimple *stmt,
1654 name_to_copy_table_type *name_copies,
1655 int_tree_htab_type *decl_copies)
1656 {
1657 use_operand_p use;
1658 ssa_op_iter oi;
1659 tree var, name;
1660 struct int_tree_map ielt;
1661 struct name_to_copy_elt elt;
1662 name_to_copy_elt **slot;
1663 int_tree_map *dslot;
1664
1665 if (gimple_debug_bind_p (stmt))
1666 var = gimple_debug_bind_get_var (stmt);
1667 else if (gimple_debug_source_bind_p (stmt))
1668 var = gimple_debug_source_bind_get_var (stmt);
1669 else
1670 return true;
1671 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
1672 return true;
1673 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1674 ielt.uid = DECL_UID (var);
1675 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
1676 if (!dslot)
1677 return true;
1678 if (gimple_debug_bind_p (stmt))
1679 gimple_debug_bind_set_var (stmt, dslot->to);
1680 else if (gimple_debug_source_bind_p (stmt))
1681 gimple_debug_source_bind_set_var (stmt, dslot->to);
1682
1683 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1684 {
1685 name = USE_FROM_PTR (use);
1686 if (TREE_CODE (name) != SSA_NAME)
1687 continue;
1688
1689 elt.version = SSA_NAME_VERSION (name);
1690 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
1691 if (!slot)
1692 {
1693 gimple_debug_bind_reset_value (stmt);
1694 update_stmt (stmt);
1695 break;
1696 }
1697
1698 SET_USE (use, (*slot)->new_name);
1699 }
1700
1701 return false;
1702 }
1703
1704 /* Callback for htab_traverse. Adds a field corresponding to the reduction
1705 specified in SLOT. The type is passed in DATA. */
1706
1707 int
1708 add_field_for_reduction (reduction_info **slot, tree type)
1709 {
1710
1711 struct reduction_info *const red = *slot;
1712 tree var = reduc_stmt_res (red->reduc_stmt);
1713 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1714 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1715
1716 insert_field_into_struct (type, field);
1717
1718 red->field = field;
1719
1720 return 1;
1721 }
1722
1723 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1724 described in SLOT. The type is passed in DATA. */
1725
1726 int
1727 add_field_for_name (name_to_copy_elt **slot, tree type)
1728 {
1729 struct name_to_copy_elt *const elt = *slot;
1730 tree name = ssa_name (elt->version);
1731 tree field = build_decl (UNKNOWN_LOCATION,
1732 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1733 TREE_TYPE (name));
1734
1735 insert_field_into_struct (type, field);
1736 elt->field = field;
1737
1738 return 1;
1739 }
1740
1741 /* Callback for htab_traverse. A local result is the intermediate result
1742 computed by a single
1743 thread, or the initial value in case no iteration was executed.
1744 This function creates a phi node reflecting these values.
1745 The phi's result will be stored in NEW_PHI field of the
1746 reduction's data structure. */
1747
1748 int
1749 create_phi_for_local_result (reduction_info **slot, class loop *loop)
1750 {
1751 struct reduction_info *const reduc = *slot;
1752 edge e;
1753 gphi *new_phi;
1754 basic_block store_bb, continue_bb;
1755 tree local_res;
1756 location_t locus;
1757
1758 /* STORE_BB is the block where the phi
1759 should be stored. It is the destination of the loop exit.
1760 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1761 continue_bb = single_pred (loop->latch);
1762 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1763
1764 /* STORE_BB has two predecessors. One coming from the loop
1765 (the reduction's result is computed at the loop),
1766 and another coming from a block preceding the loop,
1767 when no iterations
1768 are executed (the initial value should be taken). */
1769 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1770 e = EDGE_PRED (store_bb, 1);
1771 else
1772 e = EDGE_PRED (store_bb, 0);
1773 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1774 local_res = copy_ssa_name (lhs);
1775 locus = gimple_location (reduc->reduc_stmt);
1776 new_phi = create_phi_node (local_res, store_bb);
1777 add_phi_arg (new_phi, reduc->init, e, locus);
1778 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1779 reduc->new_phi = new_phi;
1780
1781 return 1;
1782 }
1783
1784 struct clsn_data
1785 {
1786 tree store;
1787 tree load;
1788
1789 basic_block store_bb;
1790 basic_block load_bb;
1791 };
1792
1793 /* Callback for htab_traverse. Create an atomic instruction for the
1794 reduction described in SLOT.
1795 DATA annotates the place in memory the atomic operation relates to,
1796 and the basic block it needs to be generated in. */
1797
1798 int
1799 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1800 {
1801 struct reduction_info *const reduc = *slot;
1802 gimple_stmt_iterator gsi;
1803 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1804 tree load_struct;
1805 basic_block bb;
1806 basic_block new_bb;
1807 edge e;
1808 tree t, addr, ref, x;
1809 tree tmp_load, name;
1810 gimple *load;
1811
1812 if (reduc->reduc_addr == NULL_TREE)
1813 {
1814 load_struct = build_simple_mem_ref (clsn_data->load);
1815 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1816
1817 addr = build_addr (t);
1818 }
1819 else
1820 {
1821 /* Set the address for the atomic store. */
1822 addr = reduc->reduc_addr;
1823
1824 /* Remove the non-atomic store '*addr = sum'. */
1825 tree res = PHI_RESULT (reduc->keep_res);
1826 use_operand_p use_p;
1827 gimple *stmt;
1828 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1829 gcc_assert (single_use_p);
1830 replace_uses_by (gimple_vdef (stmt),
1831 gimple_vuse (stmt));
1832 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1833 gsi_remove (&gsi, true);
1834 }
1835
1836 /* Create phi node. */
1837 bb = clsn_data->load_bb;
1838
1839 gsi = gsi_last_bb (bb);
1840 e = split_block (bb, gsi_stmt (gsi));
1841 new_bb = e->dest;
1842
1843 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1844 tmp_load = make_ssa_name (tmp_load);
1845 load = gimple_build_omp_atomic_load (tmp_load, addr,
1846 OMP_MEMORY_ORDER_RELAXED);
1847 SSA_NAME_DEF_STMT (tmp_load) = load;
1848 gsi = gsi_start_bb (new_bb);
1849 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1850
1851 e = split_block (new_bb, load);
1852 new_bb = e->dest;
1853 gsi = gsi_start_bb (new_bb);
1854 ref = tmp_load;
1855 x = fold_build2 (reduc->reduction_code,
1856 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1857 PHI_RESULT (reduc->new_phi));
1858
1859 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1860 GSI_CONTINUE_LINKING);
1861
1862 gimple *store = gimple_build_omp_atomic_store (name,
1863 OMP_MEMORY_ORDER_RELAXED);
1864 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
1865 return 1;
1866 }
1867
1868 /* Create the atomic operation at the join point of the threads.
1869 REDUCTION_LIST describes the reductions in the LOOP.
1870 LD_ST_DATA describes the shared data structure where
1871 shared data is stored in and loaded from. */
1872 static void
1873 create_call_for_reduction (class loop *loop,
1874 reduction_info_table_type *reduction_list,
1875 struct clsn_data *ld_st_data)
1876 {
1877 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
1878 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1879 basic_block continue_bb = single_pred (loop->latch);
1880 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1881 reduction_list
1882 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1883 }
1884
1885 /* Callback for htab_traverse. Loads the final reduction value at the
1886 join point of all threads, and inserts it in the right place. */
1887
1888 int
1889 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1890 {
1891 struct reduction_info *const red = *slot;
1892 gimple *stmt;
1893 gimple_stmt_iterator gsi;
1894 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1895 tree load_struct;
1896 tree name;
1897 tree x;
1898
1899 /* If there's no exit phi, the result of the reduction is unused. */
1900 if (red->keep_res == NULL)
1901 return 1;
1902
1903 gsi = gsi_after_labels (clsn_data->load_bb);
1904 load_struct = build_simple_mem_ref (clsn_data->load);
1905 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1906 NULL_TREE);
1907
1908 x = load_struct;
1909 name = PHI_RESULT (red->keep_res);
1910 stmt = gimple_build_assign (name, x);
1911
1912 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1913
1914 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1915 !gsi_end_p (gsi); gsi_next (&gsi))
1916 if (gsi_stmt (gsi) == red->keep_res)
1917 {
1918 remove_phi_node (&gsi, false);
1919 return 1;
1920 }
1921 gcc_unreachable ();
1922 }
1923
1924 /* Load the reduction result that was stored in LD_ST_DATA.
1925 REDUCTION_LIST describes the list of reductions that the
1926 loads should be generated for. */
1927 static void
1928 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1929 struct clsn_data *ld_st_data)
1930 {
1931 gimple_stmt_iterator gsi;
1932 tree t;
1933 gimple *stmt;
1934
1935 gsi = gsi_after_labels (ld_st_data->load_bb);
1936 t = build_fold_addr_expr (ld_st_data->store);
1937 stmt = gimple_build_assign (ld_st_data->load, t);
1938
1939 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1940
1941 reduction_list
1942 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1943
1944 }
1945
1946 /* Callback for htab_traverse. Store the neutral value for the
1947 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1948 1 for MULT_EXPR, etc. into the reduction field.
1949 The reduction is specified in SLOT. The store information is
1950 passed in DATA. */
1951
1952 int
1953 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1954 {
1955 struct reduction_info *const red = *slot;
1956 tree t;
1957 gimple *stmt;
1958 gimple_stmt_iterator gsi;
1959 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1960
1961 gsi = gsi_last_bb (clsn_data->store_bb);
1962 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1963 stmt = gimple_build_assign (t, red->initial_value);
1964 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1965
1966 return 1;
1967 }
1968
1969 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1970 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1971 specified in SLOT. */
1972
1973 int
1974 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1975 struct clsn_data *clsn_data)
1976 {
1977 struct name_to_copy_elt *const elt = *slot;
1978 tree t;
1979 gimple *stmt;
1980 gimple_stmt_iterator gsi;
1981 tree type = TREE_TYPE (elt->new_name);
1982 tree load_struct;
1983
1984 gsi = gsi_last_bb (clsn_data->store_bb);
1985 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1986 stmt = gimple_build_assign (t, ssa_name (elt->version));
1987 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1988
1989 gsi = gsi_last_bb (clsn_data->load_bb);
1990 load_struct = build_simple_mem_ref (clsn_data->load);
1991 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1992 stmt = gimple_build_assign (elt->new_name, t);
1993 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1994
1995 return 1;
1996 }
1997
1998 /* Moves all the variables used in LOOP and defined outside of it (including
1999 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
2000 name) to a structure created for this purpose. The code
2001
2002 while (1)
2003 {
2004 use (a);
2005 use (b);
2006 }
2007
2008 is transformed this way:
2009
2010 bb0:
2011 old.a = a;
2012 old.b = b;
2013
2014 bb1:
2015 a' = new->a;
2016 b' = new->b;
2017 while (1)
2018 {
2019 use (a');
2020 use (b');
2021 }
2022
2023 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2024 pointer `new' is intentionally not initialized (the loop will be split to a
2025 separate function later, and `new' will be initialized from its arguments).
2026 LD_ST_DATA holds information about the shared data structure used to pass
2027 information among the threads. It is initialized here, and
2028 gen_parallel_loop will pass it to create_call_for_reduction that
2029 needs this information. REDUCTION_LIST describes the reductions
2030 in LOOP. */
2031
2032 static void
2033 separate_decls_in_region (edge entry, edge exit,
2034 reduction_info_table_type *reduction_list,
2035 tree *arg_struct, tree *new_arg_struct,
2036 struct clsn_data *ld_st_data)
2037
2038 {
2039 basic_block bb1 = split_edge (entry);
2040 basic_block bb0 = single_pred (bb1);
2041 name_to_copy_table_type name_copies (10);
2042 int_tree_htab_type decl_copies (10);
2043 unsigned i;
2044 tree type, type_name, nvar;
2045 gimple_stmt_iterator gsi;
2046 struct clsn_data clsn_data;
2047 auto_vec<basic_block, 3> body;
2048 basic_block bb;
2049 basic_block entry_bb = bb1;
2050 basic_block exit_bb = exit->dest;
2051 bool has_debug_stmt = false;
2052
2053 entry = single_succ_edge (entry_bb);
2054 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
2055
2056 FOR_EACH_VEC_ELT (body, i, bb)
2057 {
2058 if (bb != entry_bb && bb != exit_bb)
2059 {
2060 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2061 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
2062 &name_copies, &decl_copies);
2063
2064 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2065 {
2066 gimple *stmt = gsi_stmt (gsi);
2067
2068 if (is_gimple_debug (stmt))
2069 has_debug_stmt = true;
2070 else
2071 separate_decls_in_region_stmt (entry, exit, stmt,
2072 &name_copies, &decl_copies);
2073 }
2074 }
2075 }
2076
2077 /* Now process debug bind stmts. We must not create decls while
2078 processing debug stmts, so we defer their processing so as to
2079 make sure we will have debug info for as many variables as
2080 possible (all of those that were dealt with in the loop above),
2081 and discard those for which we know there's nothing we can
2082 do. */
2083 if (has_debug_stmt)
2084 FOR_EACH_VEC_ELT (body, i, bb)
2085 if (bb != entry_bb && bb != exit_bb)
2086 {
2087 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2088 {
2089 gimple *stmt = gsi_stmt (gsi);
2090
2091 if (is_gimple_debug (stmt))
2092 {
2093 if (separate_decls_in_region_debug (stmt, &name_copies,
2094 &decl_copies))
2095 {
2096 gsi_remove (&gsi, true);
2097 continue;
2098 }
2099 }
2100
2101 gsi_next (&gsi);
2102 }
2103 }
2104
2105 if (name_copies.is_empty () && reduction_list->is_empty ())
2106 {
2107 /* It may happen that there is nothing to copy (if there are only
2108 loop carried and external variables in the loop). */
2109 *arg_struct = NULL;
2110 *new_arg_struct = NULL;
2111 }
2112 else
2113 {
2114 /* Create the type for the structure to store the ssa names to. */
2115 type = lang_hooks.types.make_type (RECORD_TYPE);
2116 type_name = build_decl (UNKNOWN_LOCATION,
2117 TYPE_DECL, create_tmp_var_name (".paral_data"),
2118 type);
2119 TYPE_NAME (type) = type_name;
2120
2121 name_copies.traverse <tree, add_field_for_name> (type);
2122 if (reduction_list && !reduction_list->is_empty ())
2123 {
2124 /* Create the fields for reductions. */
2125 reduction_list->traverse <tree, add_field_for_reduction> (type);
2126 }
2127 layout_type (type);
2128
2129 /* Create the loads and stores. */
2130 *arg_struct = create_tmp_var (type, ".paral_data_store");
2131 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
2132 *new_arg_struct = make_ssa_name (nvar);
2133
2134 ld_st_data->store = *arg_struct;
2135 ld_st_data->load = *new_arg_struct;
2136 ld_st_data->store_bb = bb0;
2137 ld_st_data->load_bb = bb1;
2138
2139 name_copies
2140 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2141 (ld_st_data);
2142
2143 /* Load the calculation from memory (after the join of the threads). */
2144
2145 if (reduction_list && !reduction_list->is_empty ())
2146 {
2147 reduction_list
2148 ->traverse <struct clsn_data *, create_stores_for_reduction>
2149 (ld_st_data);
2150 clsn_data.load = make_ssa_name (nvar);
2151 clsn_data.load_bb = exit->dest;
2152 clsn_data.store = ld_st_data->store;
2153 create_final_loads_for_reduction (reduction_list, &clsn_data);
2154 }
2155 }
2156 }
2157
2158 /* Returns true if FN was created to run in parallel. */
2159
2160 bool
2161 parallelized_function_p (tree fndecl)
2162 {
2163 cgraph_node *node = cgraph_node::get (fndecl);
2164 gcc_assert (node != NULL);
2165 return node->parallelized_function;
2166 }
2167
2168 /* Creates and returns an empty function that will receive the body of
2169 a parallelized loop. */
2170
2171 static tree
2172 create_loop_fn (location_t loc)
2173 {
2174 char buf[100];
2175 char *tname;
2176 tree decl, type, name, t;
2177 struct function *act_cfun = cfun;
2178 static unsigned loopfn_num;
2179
2180 loc = LOCATION_LOCUS (loc);
2181 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2182 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2183 clean_symbol_name (tname);
2184 name = get_identifier (tname);
2185 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2186
2187 decl = build_decl (loc, FUNCTION_DECL, name, type);
2188 TREE_STATIC (decl) = 1;
2189 TREE_USED (decl) = 1;
2190 DECL_ARTIFICIAL (decl) = 1;
2191 DECL_IGNORED_P (decl) = 0;
2192 TREE_PUBLIC (decl) = 0;
2193 DECL_UNINLINABLE (decl) = 1;
2194 DECL_EXTERNAL (decl) = 0;
2195 DECL_CONTEXT (decl) = NULL_TREE;
2196 DECL_INITIAL (decl) = make_node (BLOCK);
2197 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
2198
2199 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
2200 DECL_ARTIFICIAL (t) = 1;
2201 DECL_IGNORED_P (t) = 1;
2202 DECL_RESULT (decl) = t;
2203
2204 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
2205 ptr_type_node);
2206 DECL_ARTIFICIAL (t) = 1;
2207 DECL_ARG_TYPE (t) = ptr_type_node;
2208 DECL_CONTEXT (t) = decl;
2209 TREE_USED (t) = 1;
2210 DECL_ARGUMENTS (decl) = t;
2211
2212 allocate_struct_function (decl, false);
2213 DECL_STRUCT_FUNCTION (decl)->last_clique = act_cfun->last_clique;
2214
2215 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2216 it. */
2217 set_cfun (act_cfun);
2218
2219 return decl;
2220 }
2221
2222 /* Replace uses of NAME by VAL in block BB. */
2223
2224 static void
2225 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2226 {
2227 gimple *use_stmt;
2228 imm_use_iterator imm_iter;
2229
2230 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2231 {
2232 if (gimple_bb (use_stmt) != bb)
2233 continue;
2234
2235 use_operand_p use_p;
2236 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2237 SET_USE (use_p, val);
2238 }
2239 }
2240
2241 /* Do transformation from:
2242
2243 <bb preheader>:
2244 ...
2245 goto <bb header>
2246
2247 <bb header>:
2248 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2249 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2250 ...
2251 use (ivtmp_a)
2252 ...
2253 sum_b = sum_a + sum_update
2254 ...
2255 if (ivtmp_a < n)
2256 goto <bb latch>;
2257 else
2258 goto <bb exit>;
2259
2260 <bb latch>:
2261 ivtmp_b = ivtmp_a + 1;
2262 goto <bb header>
2263
2264 <bb exit>:
2265 sum_z = PHI <sum_b (cond[1]), ...>
2266
2267 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2268 that's <bb header>.
2269
2270 to:
2271
2272 <bb preheader>:
2273 ...
2274 goto <bb newheader>
2275
2276 <bb header>:
2277 ivtmp_a = PHI <ivtmp_c (latch)>
2278 sum_a = PHI <sum_c (latch)>
2279 ...
2280 use (ivtmp_a)
2281 ...
2282 sum_b = sum_a + sum_update
2283 ...
2284 goto <bb latch>;
2285
2286 <bb newheader>:
2287 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2288 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2289 if (ivtmp_c < n + 1)
2290 goto <bb header>;
2291 else
2292 goto <bb newexit>;
2293
2294 <bb latch>:
2295 ivtmp_b = ivtmp_a + 1;
2296 goto <bb newheader>
2297
2298 <bb newexit>:
2299 sum_y = PHI <sum_c (newheader)>
2300
2301 <bb exit>:
2302 sum_z = PHI <sum_y (newexit), ...>
2303
2304
2305 In unified diff format:
2306
2307 <bb preheader>:
2308 ...
2309 - goto <bb header>
2310 + goto <bb newheader>
2311
2312 <bb header>:
2313 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2314 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
2315 + ivtmp_a = PHI <ivtmp_c (latch)>
2316 + sum_a = PHI <sum_c (latch)>
2317 ...
2318 use (ivtmp_a)
2319 ...
2320 sum_b = sum_a + sum_update
2321 ...
2322 - if (ivtmp_a < n)
2323 - goto <bb latch>;
2324 + goto <bb latch>;
2325 +
2326 + <bb newheader>:
2327 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2328 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
2329 + if (ivtmp_c < n + 1)
2330 + goto <bb header>;
2331 else
2332 goto <bb exit>;
2333
2334 <bb latch>:
2335 ivtmp_b = ivtmp_a + 1;
2336 - goto <bb header>
2337 + goto <bb newheader>
2338
2339 + <bb newexit>:
2340 + sum_y = PHI <sum_c (newheader)>
2341
2342 <bb exit>:
2343 - sum_z = PHI <sum_b (cond[1]), ...>
2344 + sum_z = PHI <sum_y (newexit), ...>
2345
2346 Note: the example does not show any virtual phis, but these are handled more
2347 or less as reductions.
2348
2349
2350 Moves the exit condition of LOOP to the beginning of its header.
2351 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2352 bound. */
2353
2354 static void
2355 transform_to_exit_first_loop_alt (class loop *loop,
2356 reduction_info_table_type *reduction_list,
2357 tree bound)
2358 {
2359 basic_block header = loop->header;
2360 basic_block latch = loop->latch;
2361 edge exit = single_dom_exit (loop);
2362 basic_block exit_block = exit->dest;
2363 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2364 tree control = gimple_cond_lhs (cond_stmt);
2365 edge e;
2366
2367 /* Rewriting virtuals into loop-closed ssa normal form makes this
2368 transformation simpler. It also ensures that the virtuals are in
2369 loop-closed ssa normal from after the transformation, which is required by
2370 create_parallel_loop. */
2371 rewrite_virtuals_into_loop_closed_ssa (loop);
2372
2373 /* Create the new_header block. */
2374 basic_block new_header = split_block_before_cond_jump (exit->src);
2375 edge edge_at_split = single_pred_edge (new_header);
2376
2377 /* Redirect entry edge to new_header. */
2378 edge entry = loop_preheader_edge (loop);
2379 e = redirect_edge_and_branch (entry, new_header);
2380 gcc_assert (e == entry);
2381
2382 /* Redirect post_inc_edge to new_header. */
2383 edge post_inc_edge = single_succ_edge (latch);
2384 e = redirect_edge_and_branch (post_inc_edge, new_header);
2385 gcc_assert (e == post_inc_edge);
2386
2387 /* Redirect post_cond_edge to header. */
2388 edge post_cond_edge = single_pred_edge (latch);
2389 e = redirect_edge_and_branch (post_cond_edge, header);
2390 gcc_assert (e == post_cond_edge);
2391
2392 /* Redirect edge_at_split to latch. */
2393 e = redirect_edge_and_branch (edge_at_split, latch);
2394 gcc_assert (e == edge_at_split);
2395
2396 /* Set the new loop bound. */
2397 gimple_cond_set_rhs (cond_stmt, bound);
2398 update_stmt (cond_stmt);
2399
2400 /* Repair the ssa. */
2401 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2402 edge_var_map *vm;
2403 gphi_iterator gsi;
2404 int i;
2405 for (gsi = gsi_start_phis (header), i = 0;
2406 !gsi_end_p (gsi) && v->iterate (i, &vm);
2407 gsi_next (&gsi), i++)
2408 {
2409 gphi *phi = gsi.phi ();
2410 tree res_a = PHI_RESULT (phi);
2411
2412 /* Create new phi. */
2413 tree res_c = copy_ssa_name (res_a, phi);
2414 gphi *nphi = create_phi_node (res_c, new_header);
2415
2416 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2417 replace_uses_in_bb_by (res_a, res_c, new_header);
2418
2419 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2420 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2421
2422 /* Replace sum_b with sum_c in exit phi. */
2423 tree res_b = redirect_edge_var_map_def (vm);
2424 replace_uses_in_bb_by (res_b, res_c, exit_block);
2425
2426 struct reduction_info *red = reduction_phi (reduction_list, phi);
2427 gcc_assert (virtual_operand_p (res_a)
2428 || res_a == control
2429 || red != NULL);
2430
2431 if (red)
2432 {
2433 /* Register the new reduction phi. */
2434 red->reduc_phi = nphi;
2435 gimple_set_uid (red->reduc_phi, red->reduc_version);
2436 }
2437 }
2438 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
2439
2440 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2441 flush_pending_stmts (entry);
2442
2443 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2444 flush_pending_stmts (post_inc_edge);
2445
2446
2447 basic_block new_exit_block = NULL;
2448 if (!single_pred_p (exit->dest))
2449 {
2450 /* Create a new empty exit block, inbetween the new loop header and the
2451 old exit block. The function separate_decls_in_region needs this block
2452 to insert code that is active on loop exit, but not any other path. */
2453 new_exit_block = split_edge (exit);
2454 }
2455
2456 /* Insert and register the reduction exit phis. */
2457 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2458 !gsi_end_p (gsi);
2459 gsi_next (&gsi))
2460 {
2461 gphi *phi = gsi.phi ();
2462 gphi *nphi = NULL;
2463 tree res_z = PHI_RESULT (phi);
2464 tree res_c;
2465
2466 if (new_exit_block != NULL)
2467 {
2468 /* Now that we have a new exit block, duplicate the phi of the old
2469 exit block in the new exit block to preserve loop-closed ssa. */
2470 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2471 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2472 tree res_y = copy_ssa_name (res_z, phi);
2473 nphi = create_phi_node (res_y, new_exit_block);
2474 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2475 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2476 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2477 }
2478 else
2479 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2480
2481 if (virtual_operand_p (res_z))
2482 continue;
2483
2484 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
2485 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2486 if (red != NULL)
2487 red->keep_res = (nphi != NULL
2488 ? nphi
2489 : phi);
2490 }
2491
2492 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2493 then we're still using some fields, so only bother about fields that are
2494 still used: header and latch.
2495 The loop has a new header bb, so we update it. The latch bb stays the
2496 same. */
2497 loop->header = new_header;
2498
2499 /* Recalculate dominance info. */
2500 free_dominance_info (CDI_DOMINATORS);
2501 calculate_dominance_info (CDI_DOMINATORS);
2502
2503 checking_verify_ssa (true, true);
2504 }
2505
2506 /* Tries to moves the exit condition of LOOP to the beginning of its header
2507 without duplication of the loop body. NIT is the number of iterations of the
2508 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2509 transformation is successful. */
2510
2511 static bool
2512 try_transform_to_exit_first_loop_alt (class loop *loop,
2513 reduction_info_table_type *reduction_list,
2514 tree nit)
2515 {
2516 /* Check whether the latch contains a single statement. */
2517 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2518 return false;
2519
2520 /* Check whether the latch contains no phis. */
2521 if (phi_nodes (loop->latch) != NULL)
2522 return false;
2523
2524 /* Check whether the latch contains the loop iv increment. */
2525 edge back = single_succ_edge (loop->latch);
2526 edge exit = single_dom_exit (loop);
2527 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2528 tree control = gimple_cond_lhs (cond_stmt);
2529 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2530 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2531 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2532 return false;
2533
2534 /* Check whether there's no code between the loop condition and the latch. */
2535 if (!single_pred_p (loop->latch)
2536 || single_pred (loop->latch) != exit->src)
2537 return false;
2538
2539 tree alt_bound = NULL_TREE;
2540 tree nit_type = TREE_TYPE (nit);
2541
2542 /* Figure out whether nit + 1 overflows. */
2543 if (TREE_CODE (nit) == INTEGER_CST)
2544 {
2545 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
2546 {
2547 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2548 nit, build_one_cst (nit_type));
2549
2550 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
2551 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2552 return true;
2553 }
2554 else
2555 {
2556 /* Todo: Figure out if we can trigger this, if it's worth to handle
2557 optimally, and if we can handle it optimally. */
2558 return false;
2559 }
2560 }
2561
2562 gcc_assert (TREE_CODE (nit) == SSA_NAME);
2563
2564 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2565 iv with base 0 and step 1 that is incremented in the latch, like this:
2566
2567 <bb header>:
2568 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2569 ...
2570 if (iv_1 < nit)
2571 goto <bb latch>;
2572 else
2573 goto <bb exit>;
2574
2575 <bb latch>:
2576 iv_2 = iv_1 + 1;
2577 goto <bb header>;
2578
2579 The range of iv_1 is [0, nit]. The latch edge is taken for
2580 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2581 number of latch executions is equal to nit.
2582
2583 The function max_loop_iterations gives us the maximum number of latch
2584 executions, so it gives us the maximum value of nit. */
2585 widest_int nit_max;
2586 if (!max_loop_iterations (loop, &nit_max))
2587 return false;
2588
2589 /* Check if nit + 1 overflows. */
2590 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
2591 if (nit_max >= type_max)
2592 return false;
2593
2594 gimple *def = SSA_NAME_DEF_STMT (nit);
2595
2596 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
2597 if (def
2598 && is_gimple_assign (def)
2599 && gimple_assign_rhs_code (def) == PLUS_EXPR)
2600 {
2601 tree op1 = gimple_assign_rhs1 (def);
2602 tree op2 = gimple_assign_rhs2 (def);
2603 if (integer_minus_onep (op1))
2604 alt_bound = op2;
2605 else if (integer_minus_onep (op2))
2606 alt_bound = op1;
2607 }
2608
2609 /* If not found, insert nit + 1. */
2610 if (alt_bound == NULL_TREE)
2611 {
2612 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2613 build_int_cst_type (nit_type, 1));
2614
2615 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2616
2617 alt_bound
2618 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2619 GSI_CONTINUE_LINKING);
2620 }
2621
2622 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2623 return true;
2624 }
2625
2626 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2627 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2628 LOOP. */
2629
2630 static void
2631 transform_to_exit_first_loop (class loop *loop,
2632 reduction_info_table_type *reduction_list,
2633 tree nit)
2634 {
2635 basic_block *bbs, *nbbs, ex_bb, orig_header;
2636 unsigned n;
2637 bool ok;
2638 edge exit = single_dom_exit (loop), hpred;
2639 tree control, control_name, res, t;
2640 gphi *phi, *nphi;
2641 gassign *stmt;
2642 gcond *cond_stmt, *cond_nit;
2643 tree nit_1;
2644
2645 split_block_after_labels (loop->header);
2646 orig_header = single_succ (loop->header);
2647 hpred = single_succ_edge (loop->header);
2648
2649 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2650 control = gimple_cond_lhs (cond_stmt);
2651 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
2652
2653 /* Make sure that we have phi nodes on exit for all loop header phis
2654 (create_parallel_loop requires that). */
2655 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2656 !gsi_end_p (gsi);
2657 gsi_next (&gsi))
2658 {
2659 phi = gsi.phi ();
2660 res = PHI_RESULT (phi);
2661 t = copy_ssa_name (res, phi);
2662 SET_PHI_RESULT (phi, t);
2663 nphi = create_phi_node (res, orig_header);
2664 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
2665
2666 if (res == control)
2667 {
2668 gimple_cond_set_lhs (cond_stmt, t);
2669 update_stmt (cond_stmt);
2670 control = t;
2671 }
2672 }
2673
2674 bbs = get_loop_body_in_dom_order (loop);
2675
2676 for (n = 0; bbs[n] != exit->src; n++)
2677 continue;
2678 nbbs = XNEWVEC (basic_block, n);
2679 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2680 bbs + 1, n, nbbs);
2681 gcc_assert (ok);
2682 free (bbs);
2683 ex_bb = nbbs[0];
2684 free (nbbs);
2685
2686 /* Other than reductions, the only gimple reg that should be copied
2687 out of the loop is the control variable. */
2688 exit = single_dom_exit (loop);
2689 control_name = NULL_TREE;
2690 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2691 !gsi_end_p (gsi); )
2692 {
2693 phi = gsi.phi ();
2694 res = PHI_RESULT (phi);
2695 if (virtual_operand_p (res))
2696 {
2697 gsi_next (&gsi);
2698 continue;
2699 }
2700
2701 /* Check if it is a part of reduction. If it is,
2702 keep the phi at the reduction's keep_res field. The
2703 PHI_RESULT of this phi is the resulting value of the reduction
2704 variable when exiting the loop. */
2705
2706 if (!reduction_list->is_empty ())
2707 {
2708 struct reduction_info *red;
2709
2710 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2711 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2712 if (red)
2713 {
2714 red->keep_res = phi;
2715 gsi_next (&gsi);
2716 continue;
2717 }
2718 }
2719 gcc_assert (control_name == NULL_TREE
2720 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2721 control_name = res;
2722 remove_phi_node (&gsi, false);
2723 }
2724 gcc_assert (control_name != NULL_TREE);
2725
2726 /* Initialize the control variable to number of iterations
2727 according to the rhs of the exit condition. */
2728 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2729 cond_nit = as_a <gcond *> (last_stmt (exit->src));
2730 nit_1 = gimple_cond_rhs (cond_nit);
2731 nit_1 = force_gimple_operand_gsi (&gsi,
2732 fold_convert (TREE_TYPE (control_name), nit_1),
2733 false, NULL_TREE, false, GSI_SAME_STMT);
2734 stmt = gimple_build_assign (control_name, nit_1);
2735 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2736 }
2737
2738 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2739 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2740 NEW_DATA is the variable that should be initialized from the argument
2741 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2742 that number is to be determined later. */
2743
2744 static void
2745 create_parallel_loop (class loop *loop, tree loop_fn, tree data,
2746 tree new_data, unsigned n_threads, location_t loc,
2747 bool oacc_kernels_p)
2748 {
2749 gimple_stmt_iterator gsi;
2750 basic_block for_bb, ex_bb, continue_bb;
2751 tree t, param;
2752 gomp_parallel *omp_par_stmt;
2753 gimple *omp_return_stmt1, *omp_return_stmt2;
2754 gimple *phi;
2755 gcond *cond_stmt;
2756 gomp_for *for_stmt;
2757 gomp_continue *omp_cont_stmt;
2758 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2759 edge exit, nexit, guard, end, e;
2760
2761 if (oacc_kernels_p)
2762 {
2763 gcc_checking_assert (lookup_attribute ("oacc kernels",
2764 DECL_ATTRIBUTES (cfun->decl)));
2765 /* Indicate to later processing that this is a parallelized OpenACC
2766 kernels construct. */
2767 DECL_ATTRIBUTES (cfun->decl)
2768 = tree_cons (get_identifier ("oacc kernels parallelized"),
2769 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
2770 }
2771 else
2772 {
2773 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2774
2775 basic_block bb = loop_preheader_edge (loop)->src;
2776 basic_block paral_bb = single_pred (bb);
2777 gsi = gsi_last_bb (paral_bb);
2778
2779 gcc_checking_assert (n_threads != 0);
2780 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2781 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2782 = build_int_cst (integer_type_node, n_threads);
2783 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2784 gimple_set_location (omp_par_stmt, loc);
2785
2786 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2787
2788 /* Initialize NEW_DATA. */
2789 if (data)
2790 {
2791 gassign *assign_stmt;
2792
2793 gsi = gsi_after_labels (bb);
2794
2795 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2796 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2797 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2798
2799 assign_stmt = gimple_build_assign (new_data,
2800 fold_convert (TREE_TYPE (new_data), param));
2801 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2802 }
2803
2804 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2805 bb = split_loop_exit_edge (single_dom_exit (loop));
2806 gsi = gsi_last_bb (bb);
2807 omp_return_stmt1 = gimple_build_omp_return (false);
2808 gimple_set_location (omp_return_stmt1, loc);
2809 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2810 }
2811
2812 /* Extract data for GIMPLE_OMP_FOR. */
2813 gcc_assert (loop->header == single_dom_exit (loop)->src);
2814 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2815
2816 cvar = gimple_cond_lhs (cond_stmt);
2817 cvar_base = SSA_NAME_VAR (cvar);
2818 phi = SSA_NAME_DEF_STMT (cvar);
2819 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2820 initvar = copy_ssa_name (cvar);
2821 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2822 initvar);
2823 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2824
2825 gsi = gsi_last_nondebug_bb (loop->latch);
2826 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2827 gsi_remove (&gsi, true);
2828
2829 /* Prepare cfg. */
2830 for_bb = split_edge (loop_preheader_edge (loop));
2831 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2832 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2833 gcc_assert (exit == single_dom_exit (loop));
2834
2835 guard = make_edge (for_bb, ex_bb, 0);
2836 /* FIXME: What is the probability? */
2837 guard->probability = profile_probability::guessed_never ();
2838 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2839 loop->latch = split_edge (single_succ_edge (loop->latch));
2840 single_pred_edge (loop->latch)->flags = 0;
2841 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2842 rescan_loop_exit (end, true, false);
2843
2844 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2845 !gsi_end_p (gpi); gsi_next (&gpi))
2846 {
2847 location_t locus;
2848 gphi *phi = gpi.phi ();
2849 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2850 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2851
2852 /* If the exit phi is not connected to a header phi in the same loop, this
2853 value is not modified in the loop, and we're done with this phi. */
2854 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2855 && gimple_bb (def_stmt) == loop->header))
2856 {
2857 locus = gimple_phi_arg_location_from_edge (phi, exit);
2858 add_phi_arg (phi, def, guard, locus);
2859 add_phi_arg (phi, def, end, locus);
2860 continue;
2861 }
2862
2863 gphi *stmt = as_a <gphi *> (def_stmt);
2864 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2865 locus = gimple_phi_arg_location_from_edge (stmt,
2866 loop_preheader_edge (loop));
2867 add_phi_arg (phi, def, guard, locus);
2868
2869 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2870 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2871 add_phi_arg (phi, def, end, locus);
2872 }
2873 e = redirect_edge_and_branch (exit, nexit->dest);
2874 PENDING_STMT (e) = NULL;
2875
2876 /* Emit GIMPLE_OMP_FOR. */
2877 if (oacc_kernels_p)
2878 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2879 omp-offload.c:execute_oacc_device_lower. */
2880 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2881 else
2882 {
2883 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2884 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2885 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2886 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2887 switch (schedule_type)
2888 {
2889 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2890 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2891 break;
2892 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2893 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2894 break;
2895 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2896 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2897 break;
2898 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2899 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2900 chunk_size = 0;
2901 break;
2902 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2903 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2904 chunk_size = 0;
2905 break;
2906 default:
2907 gcc_unreachable ();
2908 }
2909 if (chunk_size != 0)
2910 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2911 = build_int_cst (integer_type_node, chunk_size);
2912 }
2913
2914 for_stmt = gimple_build_omp_for (NULL,
2915 (oacc_kernels_p
2916 ? GF_OMP_FOR_KIND_OACC_LOOP
2917 : GF_OMP_FOR_KIND_FOR),
2918 t, 1, NULL);
2919
2920 gimple_cond_set_lhs (cond_stmt, cvar_base);
2921 type = TREE_TYPE (cvar);
2922 gimple_set_location (for_stmt, loc);
2923 gimple_omp_for_set_index (for_stmt, 0, initvar);
2924 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2925 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2926 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2927 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2928 cvar_base,
2929 build_int_cst (type, 1)));
2930
2931 gsi = gsi_last_bb (for_bb);
2932 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2933 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2934
2935 /* Emit GIMPLE_OMP_CONTINUE. */
2936 continue_bb = single_pred (loop->latch);
2937 gsi = gsi_last_bb (continue_bb);
2938 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2939 gimple_set_location (omp_cont_stmt, loc);
2940 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2941 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2942
2943 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2944 gsi = gsi_last_bb (ex_bb);
2945 omp_return_stmt2 = gimple_build_omp_return (true);
2946 gimple_set_location (omp_return_stmt2, loc);
2947 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2948
2949 /* After the above dom info is hosed. Re-compute it. */
2950 free_dominance_info (CDI_DOMINATORS);
2951 calculate_dominance_info (CDI_DOMINATORS);
2952 }
2953
2954 /* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2955 virtual phi. */
2956
2957 static unsigned int
2958 num_phis (basic_block bb, bool count_virtual_p)
2959 {
2960 unsigned int nr_phis = 0;
2961 gphi_iterator gsi;
2962 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2963 {
2964 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2965 continue;
2966
2967 nr_phis++;
2968 }
2969
2970 return nr_phis;
2971 }
2972
2973 /* Generates code to execute the iterations of LOOP in N_THREADS
2974 threads in parallel, which can be 0 if that number is to be determined
2975 later.
2976
2977 NITER describes number of iterations of LOOP.
2978 REDUCTION_LIST describes the reductions existent in the LOOP. */
2979
2980 static void
2981 gen_parallel_loop (class loop *loop,
2982 reduction_info_table_type *reduction_list,
2983 unsigned n_threads, class tree_niter_desc *niter,
2984 bool oacc_kernels_p)
2985 {
2986 tree many_iterations_cond, type, nit;
2987 tree arg_struct, new_arg_struct;
2988 gimple_seq stmts;
2989 edge entry, exit;
2990 struct clsn_data clsn_data;
2991 location_t loc;
2992 gimple *cond_stmt;
2993 unsigned int m_p_thread=2;
2994
2995 /* From
2996
2997 ---------------------------------------------------------------------
2998 loop
2999 {
3000 IV = phi (INIT, IV + STEP)
3001 BODY1;
3002 if (COND)
3003 break;
3004 BODY2;
3005 }
3006 ---------------------------------------------------------------------
3007
3008 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
3009 we generate the following code:
3010
3011 ---------------------------------------------------------------------
3012
3013 if (MAY_BE_ZERO
3014 || NITER < MIN_PER_THREAD * N_THREADS)
3015 goto original;
3016
3017 BODY1;
3018 store all local loop-invariant variables used in body of the loop to DATA.
3019 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
3020 load the variables from DATA.
3021 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
3022 BODY2;
3023 BODY1;
3024 GIMPLE_OMP_CONTINUE;
3025 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3026 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
3027 goto end;
3028
3029 original:
3030 loop
3031 {
3032 IV = phi (INIT, IV + STEP)
3033 BODY1;
3034 if (COND)
3035 break;
3036 BODY2;
3037 }
3038
3039 end:
3040
3041 */
3042
3043 /* Create two versions of the loop -- in the old one, we know that the
3044 number of iterations is large enough, and we will transform it into the
3045 loop that will be split to loop_fn, the new one will be used for the
3046 remaining iterations. */
3047
3048 /* We should compute a better number-of-iterations value for outer loops.
3049 That is, if we have
3050
3051 for (i = 0; i < n; ++i)
3052 for (j = 0; j < m; ++j)
3053 ...
3054
3055 we should compute nit = n * m, not nit = n.
3056 Also may_be_zero handling would need to be adjusted. */
3057
3058 type = TREE_TYPE (niter->niter);
3059 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3060 NULL_TREE);
3061 if (stmts)
3062 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3063
3064 if (!oacc_kernels_p)
3065 {
3066 if (loop->inner)
3067 m_p_thread=2;
3068 else
3069 m_p_thread=MIN_PER_THREAD;
3070
3071 gcc_checking_assert (n_threads != 0);
3072 many_iterations_cond =
3073 fold_build2 (GE_EXPR, boolean_type_node,
3074 nit, build_int_cst (type, m_p_thread * n_threads - 1));
3075
3076 many_iterations_cond
3077 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3078 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3079 many_iterations_cond);
3080 many_iterations_cond
3081 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
3082 if (stmts)
3083 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
3084 if (!is_gimple_condexpr (many_iterations_cond))
3085 {
3086 many_iterations_cond
3087 = force_gimple_operand (many_iterations_cond, &stmts,
3088 true, NULL_TREE);
3089 if (stmts)
3090 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3091 stmts);
3092 }
3093
3094 initialize_original_copy_tables ();
3095
3096 /* We assume that the loop usually iterates a lot. */
3097 loop_version (loop, many_iterations_cond, NULL,
3098 profile_probability::likely (),
3099 profile_probability::unlikely (),
3100 profile_probability::likely (),
3101 profile_probability::unlikely (), true);
3102 update_ssa (TODO_update_ssa);
3103 free_original_copy_tables ();
3104 }
3105
3106 /* Base all the induction variables in LOOP on a single control one. */
3107 canonicalize_loop_ivs (loop, &nit, true);
3108 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3109 {
3110 /* The call to canonicalize_loop_ivs above failed to "base all the
3111 induction variables in LOOP on a single control one". Do damage
3112 control. */
3113 basic_block preheader = loop_preheader_edge (loop)->src;
3114 basic_block cond_bb = single_pred (preheader);
3115 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3116 gimple_cond_make_true (cond);
3117 update_stmt (cond);
3118 /* We've gotten rid of the duplicate loop created by loop_version, but
3119 we can't undo whatever canonicalize_loop_ivs has done.
3120 TODO: Fix this properly by ensuring that the call to
3121 canonicalize_loop_ivs succeeds. */
3122 if (dump_file
3123 && (dump_flags & TDF_DETAILS))
3124 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3125 " aborting transformation\n", loop->num);
3126 return;
3127 }
3128
3129 /* Ensure that the exit condition is the first statement in the loop.
3130 The common case is that latch of the loop is empty (apart from the
3131 increment) and immediately follows the loop exit test. Attempt to move the
3132 entry of the loop directly before the exit check and increase the number of
3133 iterations of the loop by one. */
3134 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3135 {
3136 if (dump_file
3137 && (dump_flags & TDF_DETAILS))
3138 fprintf (dump_file,
3139 "alternative exit-first loop transform succeeded"
3140 " for loop %d\n", loop->num);
3141 }
3142 else
3143 {
3144 if (oacc_kernels_p)
3145 n_threads = 1;
3146
3147 /* Fall back on the method that handles more cases, but duplicates the
3148 loop body: move the exit condition of LOOP to the beginning of its
3149 header, and duplicate the part of the last iteration that gets disabled
3150 to the exit of the loop. */
3151 transform_to_exit_first_loop (loop, reduction_list, nit);
3152 }
3153
3154 /* Generate initializations for reductions. */
3155 if (!reduction_list->is_empty ())
3156 reduction_list->traverse <class loop *, initialize_reductions> (loop);
3157
3158 /* Eliminate the references to local variables from the loop. */
3159 gcc_assert (single_exit (loop));
3160 entry = loop_preheader_edge (loop);
3161 exit = single_dom_exit (loop);
3162
3163 /* This rewrites the body in terms of new variables. This has already
3164 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3165 if (!oacc_kernels_p)
3166 {
3167 eliminate_local_variables (entry, exit);
3168 /* In the old loop, move all variables non-local to the loop to a
3169 structure and back, and create separate decls for the variables used in
3170 loop. */
3171 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3172 &new_arg_struct, &clsn_data);
3173 }
3174 else
3175 {
3176 arg_struct = NULL_TREE;
3177 new_arg_struct = NULL_TREE;
3178 clsn_data.load = NULL_TREE;
3179 clsn_data.load_bb = exit->dest;
3180 clsn_data.store = NULL_TREE;
3181 clsn_data.store_bb = NULL;
3182 }
3183
3184 /* Create the parallel constructs. */
3185 loc = UNKNOWN_LOCATION;
3186 cond_stmt = last_stmt (loop->header);
3187 if (cond_stmt)
3188 loc = gimple_location (cond_stmt);
3189 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3190 n_threads, loc, oacc_kernels_p);
3191 if (!reduction_list->is_empty ())
3192 create_call_for_reduction (loop, reduction_list, &clsn_data);
3193
3194 scev_reset ();
3195
3196 /* Free loop bound estimations that could contain references to
3197 removed statements. */
3198 free_numbers_of_iterations_estimates (cfun);
3199 }
3200
3201 /* Returns true when LOOP contains vector phi nodes. */
3202
3203 static bool
3204 loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
3205 {
3206 unsigned i;
3207 basic_block *bbs = get_loop_body_in_dom_order (loop);
3208 gphi_iterator gsi;
3209 bool res = true;
3210
3211 for (i = 0; i < loop->num_nodes; i++)
3212 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3213 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
3214 goto end;
3215
3216 res = false;
3217 end:
3218 free (bbs);
3219 return res;
3220 }
3221
3222 /* Create a reduction_info struct, initialize it with REDUC_STMT
3223 and PHI, insert it to the REDUCTION_LIST. */
3224
3225 static void
3226 build_new_reduction (reduction_info_table_type *reduction_list,
3227 gimple *reduc_stmt, gphi *phi)
3228 {
3229 reduction_info **slot;
3230 struct reduction_info *new_reduction;
3231 enum tree_code reduction_code;
3232
3233 gcc_assert (reduc_stmt);
3234
3235 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3236 {
3237 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
3238 gimple *def1 = SSA_NAME_DEF_STMT (op1);
3239 reduction_code = gimple_assign_rhs_code (def1);
3240 }
3241 else
3242 reduction_code = gimple_assign_rhs_code (reduc_stmt);
3243 /* Check for OpenMP supported reduction. */
3244 switch (reduction_code)
3245 {
3246 case PLUS_EXPR:
3247 case MULT_EXPR:
3248 case MAX_EXPR:
3249 case MIN_EXPR:
3250 case BIT_IOR_EXPR:
3251 case BIT_XOR_EXPR:
3252 case BIT_AND_EXPR:
3253 case TRUTH_OR_EXPR:
3254 case TRUTH_XOR_EXPR:
3255 case TRUTH_AND_EXPR:
3256 break;
3257 default:
3258 return;
3259 }
3260
3261 if (dump_file && (dump_flags & TDF_DETAILS))
3262 {
3263 fprintf (dump_file,
3264 "Detected reduction. reduction stmt is:\n");
3265 print_gimple_stmt (dump_file, reduc_stmt, 0);
3266 fprintf (dump_file, "\n");
3267 }
3268
3269 new_reduction = XCNEW (struct reduction_info);
3270
3271 new_reduction->reduc_stmt = reduc_stmt;
3272 new_reduction->reduc_phi = phi;
3273 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
3274 new_reduction->reduction_code = reduction_code;
3275 slot = reduction_list->find_slot (new_reduction, INSERT);
3276 *slot = new_reduction;
3277 }
3278
3279 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3280
3281 int
3282 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
3283 {
3284 struct reduction_info *const red = *slot;
3285 gimple_set_uid (red->reduc_phi, red->reduc_version);
3286 return 1;
3287 }
3288
3289 /* Return true if the type of reduction performed by STMT_INFO is suitable
3290 for this pass. */
3291
3292 static bool
3293 valid_reduction_p (stmt_vec_info stmt_info)
3294 {
3295 /* Parallelization would reassociate the operation, which isn't
3296 allowed for in-order reductions. */
3297 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3298 return reduc_type != FOLD_LEFT_REDUCTION;
3299 }
3300
3301 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3302
3303 static void
3304 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
3305 {
3306 gphi_iterator gsi;
3307 loop_vec_info simple_loop_info;
3308 auto_vec<gphi *, 4> double_reduc_phis;
3309 auto_vec<gimple *, 4> double_reduc_stmts;
3310
3311 vec_info_shared shared;
3312 simple_loop_info = vect_analyze_loop_form (loop, &shared);
3313 if (simple_loop_info == NULL)
3314 goto gather_done;
3315
3316 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3317 {
3318 gphi *phi = gsi.phi ();
3319 affine_iv iv;
3320 tree res = PHI_RESULT (phi);
3321 bool double_reduc;
3322
3323 if (virtual_operand_p (res))
3324 continue;
3325
3326 if (simple_iv (loop, loop, res, &iv, true))
3327 continue;
3328
3329 stmt_vec_info reduc_stmt_info
3330 = parloops_force_simple_reduction (simple_loop_info,
3331 simple_loop_info->lookup_stmt (phi),
3332 &double_reduc, true);
3333 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
3334 continue;
3335
3336 if (double_reduc)
3337 {
3338 if (loop->inner->inner != NULL)
3339 continue;
3340
3341 double_reduc_phis.safe_push (phi);
3342 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
3343 continue;
3344 }
3345
3346 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
3347 }
3348 delete simple_loop_info;
3349
3350 if (!double_reduc_phis.is_empty ())
3351 {
3352 vec_info_shared shared;
3353 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
3354 if (simple_loop_info)
3355 {
3356 gphi *phi;
3357 unsigned int i;
3358
3359 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3360 {
3361 affine_iv iv;
3362 tree res = PHI_RESULT (phi);
3363 bool double_reduc;
3364
3365 use_operand_p use_p;
3366 gimple *inner_stmt;
3367 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3368 gcc_assert (single_use_p);
3369 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3370 continue;
3371 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3372 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3373 &iv, true))
3374 continue;
3375
3376 stmt_vec_info inner_phi_info
3377 = simple_loop_info->lookup_stmt (inner_phi);
3378 stmt_vec_info inner_reduc_stmt_info
3379 = parloops_force_simple_reduction (simple_loop_info,
3380 inner_phi_info,
3381 &double_reduc, true);
3382 gcc_assert (!double_reduc);
3383 if (!inner_reduc_stmt_info
3384 || !valid_reduction_p (inner_reduc_stmt_info))
3385 continue;
3386
3387 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3388 }
3389 delete simple_loop_info;
3390 }
3391 }
3392
3393 gather_done:
3394 if (reduction_list->is_empty ())
3395 return;
3396
3397 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
3398 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
3399 now. */
3400 basic_block bb;
3401 FOR_EACH_BB_FN (bb, cfun)
3402 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3403 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
3404 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
3405 }
3406
3407 /* Try to initialize NITER for code generation part. */
3408
3409 static bool
3410 try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
3411 {
3412 edge exit = single_dom_exit (loop);
3413
3414 gcc_assert (exit);
3415
3416 /* We need to know # of iterations, and there should be no uses of values
3417 defined inside loop outside of it, unless the values are invariants of
3418 the loop. */
3419 if (!number_of_iterations_exit (loop, exit, niter, false))
3420 {
3421 if (dump_file && (dump_flags & TDF_DETAILS))
3422 fprintf (dump_file, " FAILED: number of iterations not known\n");
3423 return false;
3424 }
3425
3426 return true;
3427 }
3428
3429 /* Return the default def of the first function argument. */
3430
3431 static tree
3432 get_omp_data_i_param (void)
3433 {
3434 tree decl = DECL_ARGUMENTS (cfun->decl);
3435 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3436 return ssa_default_def (cfun, decl);
3437 }
3438
3439 /* For PHI in loop header of LOOP, look for pattern:
3440
3441 <bb preheader>
3442 .omp_data_i = &.omp_data_arr;
3443 addr = .omp_data_i->sum;
3444 sum_a = *addr;
3445
3446 <bb header>:
3447 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3448
3449 and return addr. Otherwise, return NULL_TREE. */
3450
3451 static tree
3452 find_reduc_addr (class loop *loop, gphi *phi)
3453 {
3454 edge e = loop_preheader_edge (loop);
3455 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3456 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3457 if (!gimple_assign_single_p (stmt))
3458 return NULL_TREE;
3459 tree memref = gimple_assign_rhs1 (stmt);
3460 if (TREE_CODE (memref) != MEM_REF)
3461 return NULL_TREE;
3462 tree addr = TREE_OPERAND (memref, 0);
3463
3464 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3465 if (!gimple_assign_single_p (stmt2))
3466 return NULL_TREE;
3467 tree compref = gimple_assign_rhs1 (stmt2);
3468 if (TREE_CODE (compref) != COMPONENT_REF)
3469 return NULL_TREE;
3470 tree addr2 = TREE_OPERAND (compref, 0);
3471 if (TREE_CODE (addr2) != MEM_REF)
3472 return NULL_TREE;
3473 addr2 = TREE_OPERAND (addr2, 0);
3474 if (TREE_CODE (addr2) != SSA_NAME
3475 || addr2 != get_omp_data_i_param ())
3476 return NULL_TREE;
3477
3478 return addr;
3479 }
3480
3481 /* Try to initialize REDUCTION_LIST for code generation part.
3482 REDUCTION_LIST describes the reductions. */
3483
3484 static bool
3485 try_create_reduction_list (loop_p loop,
3486 reduction_info_table_type *reduction_list,
3487 bool oacc_kernels_p)
3488 {
3489 edge exit = single_dom_exit (loop);
3490 gphi_iterator gsi;
3491
3492 gcc_assert (exit);
3493
3494 /* Try to get rid of exit phis. */
3495 final_value_replacement_loop (loop);
3496
3497 gather_scalar_reductions (loop, reduction_list);
3498
3499
3500 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3501 {
3502 gphi *phi = gsi.phi ();
3503 struct reduction_info *red;
3504 imm_use_iterator imm_iter;
3505 use_operand_p use_p;
3506 gimple *reduc_phi;
3507 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3508
3509 if (!virtual_operand_p (val))
3510 {
3511 if (TREE_CODE (val) != SSA_NAME)
3512 {
3513 if (dump_file && (dump_flags & TDF_DETAILS))
3514 fprintf (dump_file,
3515 " FAILED: exit PHI argument invariant.\n");
3516 return false;
3517 }
3518
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 {
3521 fprintf (dump_file, "phi is ");
3522 print_gimple_stmt (dump_file, phi, 0);
3523 fprintf (dump_file, "arg of phi to exit: value ");
3524 print_generic_expr (dump_file, val);
3525 fprintf (dump_file, " used outside loop\n");
3526 fprintf (dump_file,
3527 " checking if it is part of reduction pattern:\n");
3528 }
3529 if (reduction_list->is_empty ())
3530 {
3531 if (dump_file && (dump_flags & TDF_DETAILS))
3532 fprintf (dump_file,
3533 " FAILED: it is not a part of reduction.\n");
3534 return false;
3535 }
3536 reduc_phi = NULL;
3537 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3538 {
3539 if (!gimple_debug_bind_p (USE_STMT (use_p))
3540 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3541 {
3542 reduc_phi = USE_STMT (use_p);
3543 break;
3544 }
3545 }
3546 red = reduction_phi (reduction_list, reduc_phi);
3547 if (red == NULL)
3548 {
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3550 fprintf (dump_file,
3551 " FAILED: it is not a part of reduction.\n");
3552 return false;
3553 }
3554 if (red->keep_res != NULL)
3555 {
3556 if (dump_file && (dump_flags & TDF_DETAILS))
3557 fprintf (dump_file,
3558 " FAILED: reduction has multiple exit phis.\n");
3559 return false;
3560 }
3561 red->keep_res = phi;
3562 if (dump_file && (dump_flags & TDF_DETAILS))
3563 {
3564 fprintf (dump_file, "reduction phi is ");
3565 print_gimple_stmt (dump_file, red->reduc_phi, 0);
3566 fprintf (dump_file, "reduction stmt is ");
3567 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
3568 }
3569 }
3570 }
3571
3572 /* The iterations of the loop may communicate only through bivs whose
3573 iteration space can be distributed efficiently. */
3574 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3575 {
3576 gphi *phi = gsi.phi ();
3577 tree def = PHI_RESULT (phi);
3578 affine_iv iv;
3579
3580 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
3581 {
3582 struct reduction_info *red;
3583
3584 red = reduction_phi (reduction_list, phi);
3585 if (red == NULL)
3586 {
3587 if (dump_file && (dump_flags & TDF_DETAILS))
3588 fprintf (dump_file,
3589 " FAILED: scalar dependency between iterations\n");
3590 return false;
3591 }
3592 }
3593 }
3594
3595 if (oacc_kernels_p)
3596 {
3597 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3598 gsi_next (&gsi))
3599 {
3600 gphi *phi = gsi.phi ();
3601 tree def = PHI_RESULT (phi);
3602 affine_iv iv;
3603
3604 if (!virtual_operand_p (def)
3605 && !simple_iv (loop, loop, def, &iv, true))
3606 {
3607 tree addr = find_reduc_addr (loop, phi);
3608 if (addr == NULL_TREE)
3609 return false;
3610 struct reduction_info *red = reduction_phi (reduction_list, phi);
3611 red->reduc_addr = addr;
3612 }
3613 }
3614 }
3615
3616 return true;
3617 }
3618
3619 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
3620
3621 static bool
3622 loop_has_phi_with_address_arg (class loop *loop)
3623 {
3624 basic_block *bbs = get_loop_body (loop);
3625 bool res = false;
3626
3627 unsigned i, j;
3628 gphi_iterator gsi;
3629 for (i = 0; i < loop->num_nodes; i++)
3630 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3631 {
3632 gphi *phi = gsi.phi ();
3633 for (j = 0; j < gimple_phi_num_args (phi); j++)
3634 {
3635 tree arg = gimple_phi_arg_def (phi, j);
3636 if (TREE_CODE (arg) == ADDR_EXPR)
3637 {
3638 /* This should be handled by eliminate_local_variables, but that
3639 function currently ignores phis. */
3640 res = true;
3641 goto end;
3642 }
3643 }
3644 }
3645 end:
3646 free (bbs);
3647
3648 return res;
3649 }
3650
3651 /* Return true if memory ref REF (corresponding to the stmt at GSI in
3652 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3653 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3654 store. Ignore conflicts with SKIP_STMT. */
3655
3656 static bool
3657 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3658 bool ref_is_store, vec<basic_block> region_bbs,
3659 unsigned int i, gimple *skip_stmt)
3660 {
3661 basic_block bb = region_bbs[i];
3662 gsi_next (&gsi);
3663
3664 while (true)
3665 {
3666 for (; !gsi_end_p (gsi);
3667 gsi_next (&gsi))
3668 {
3669 gimple *stmt = gsi_stmt (gsi);
3670 if (stmt == skip_stmt)
3671 {
3672 if (dump_file)
3673 {
3674 fprintf (dump_file, "skipping reduction store: ");
3675 print_gimple_stmt (dump_file, stmt, 0);
3676 }
3677 continue;
3678 }
3679
3680 if (!gimple_vdef (stmt)
3681 && !gimple_vuse (stmt))
3682 continue;
3683
3684 if (gimple_code (stmt) == GIMPLE_RETURN)
3685 continue;
3686
3687 if (ref_is_store)
3688 {
3689 if (ref_maybe_used_by_stmt_p (stmt, ref))
3690 {
3691 if (dump_file)
3692 {
3693 fprintf (dump_file, "Stmt ");
3694 print_gimple_stmt (dump_file, stmt, 0);
3695 }
3696 return true;
3697 }
3698 }
3699 else
3700 {
3701 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3702 {
3703 if (dump_file)
3704 {
3705 fprintf (dump_file, "Stmt ");
3706 print_gimple_stmt (dump_file, stmt, 0);
3707 }
3708 return true;
3709 }
3710 }
3711 }
3712 i++;
3713 if (i == region_bbs.length ())
3714 break;
3715 bb = region_bbs[i];
3716 gsi = gsi_start_bb (bb);
3717 }
3718
3719 return false;
3720 }
3721
3722 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3723 in parallel with REGION_BBS containing the loop. Return the stores of
3724 reduction results in REDUCTION_STORES. */
3725
3726 static bool
3727 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3728 reduction_info_table_type *reduction_list,
3729 bitmap reduction_stores)
3730 {
3731 tree omp_data_i = get_omp_data_i_param ();
3732
3733 unsigned i;
3734 basic_block bb;
3735 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3736 {
3737 if (bitmap_bit_p (in_loop_bbs, bb->index))
3738 continue;
3739
3740 gimple_stmt_iterator gsi;
3741 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3742 gsi_next (&gsi))
3743 {
3744 gimple *stmt = gsi_stmt (gsi);
3745 gimple *skip_stmt = NULL;
3746
3747 if (is_gimple_debug (stmt)
3748 || gimple_code (stmt) == GIMPLE_COND)
3749 continue;
3750
3751 ao_ref ref;
3752 bool ref_is_store = false;
3753 if (gimple_assign_load_p (stmt))
3754 {
3755 tree rhs = gimple_assign_rhs1 (stmt);
3756 tree base = get_base_address (rhs);
3757 if (TREE_CODE (base) == MEM_REF
3758 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3759 continue;
3760
3761 tree lhs = gimple_assign_lhs (stmt);
3762 if (TREE_CODE (lhs) == SSA_NAME
3763 && has_single_use (lhs))
3764 {
3765 use_operand_p use_p;
3766 gimple *use_stmt;
3767 struct reduction_info *red;
3768 single_imm_use (lhs, &use_p, &use_stmt);
3769 if (gimple_code (use_stmt) == GIMPLE_PHI
3770 && (red = reduction_phi (reduction_list, use_stmt)))
3771 {
3772 tree val = PHI_RESULT (red->keep_res);
3773 if (has_single_use (val))
3774 {
3775 single_imm_use (val, &use_p, &use_stmt);
3776 if (gimple_store_p (use_stmt))
3777 {
3778 unsigned int id
3779 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3780 bitmap_set_bit (reduction_stores, id);
3781 skip_stmt = use_stmt;
3782 if (dump_file)
3783 {
3784 fprintf (dump_file, "found reduction load: ");
3785 print_gimple_stmt (dump_file, stmt, 0);
3786 }
3787 }
3788 }
3789 }
3790 }
3791
3792 ao_ref_init (&ref, rhs);
3793 }
3794 else if (gimple_store_p (stmt))
3795 {
3796 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3797 ref_is_store = true;
3798 }
3799 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3800 continue;
3801 else if (!gimple_has_side_effects (stmt)
3802 && !gimple_could_trap_p (stmt)
3803 && !stmt_could_throw_p (cfun, stmt)
3804 && !gimple_vdef (stmt)
3805 && !gimple_vuse (stmt))
3806 continue;
3807 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3808 continue;
3809 else if (gimple_code (stmt) == GIMPLE_RETURN)
3810 continue;
3811 else
3812 {
3813 if (dump_file)
3814 {
3815 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3816 print_gimple_stmt (dump_file, stmt, 0);
3817 }
3818 return false;
3819 }
3820
3821 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3822 i, skip_stmt))
3823 {
3824 if (dump_file)
3825 {
3826 fprintf (dump_file, "conflicts with entry/exit stmt: ");
3827 print_gimple_stmt (dump_file, stmt, 0);
3828 }
3829 return false;
3830 }
3831 }
3832 }
3833
3834 return true;
3835 }
3836
3837 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3838 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3839 if any changes were made. */
3840
3841 static bool
3842 oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3843 bitmap reduction_stores)
3844 {
3845 tree gang_pos = NULL_TREE;
3846 bool changed = false;
3847
3848 unsigned i;
3849 basic_block bb;
3850 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3851 {
3852 if (bitmap_bit_p (in_loop_bbs, bb->index))
3853 continue;
3854
3855 gimple_stmt_iterator gsi;
3856 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3857 {
3858 gimple *stmt = gsi_stmt (gsi);
3859
3860 if (!gimple_store_p (stmt))
3861 {
3862 /* Update gsi to point to next stmt. */
3863 gsi_next (&gsi);
3864 continue;
3865 }
3866
3867 if (bitmap_bit_p (reduction_stores,
3868 SSA_NAME_VERSION (gimple_vdef (stmt))))
3869 {
3870 if (dump_file)
3871 {
3872 fprintf (dump_file,
3873 "skipped reduction store for single-gang"
3874 " neutering: ");
3875 print_gimple_stmt (dump_file, stmt, 0);
3876 }
3877
3878 /* Update gsi to point to next stmt. */
3879 gsi_next (&gsi);
3880 continue;
3881 }
3882
3883 changed = true;
3884
3885 if (gang_pos == NULL_TREE)
3886 {
3887 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3888 gcall *gang_single
3889 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3890 gang_pos = make_ssa_name (integer_type_node);
3891 gimple_call_set_lhs (gang_single, gang_pos);
3892 gimple_stmt_iterator start
3893 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3894 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3895 gimple_set_vuse (gang_single, vuse);
3896 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3897 }
3898
3899 if (dump_file)
3900 {
3901 fprintf (dump_file,
3902 "found store that needs single-gang neutering: ");
3903 print_gimple_stmt (dump_file, stmt, 0);
3904 }
3905
3906 {
3907 /* Split block before store. */
3908 gimple_stmt_iterator gsi2 = gsi;
3909 gsi_prev (&gsi2);
3910 edge e;
3911 if (gsi_end_p (gsi2))
3912 {
3913 e = split_block_after_labels (bb);
3914 gsi2 = gsi_last_bb (bb);
3915 }
3916 else
3917 e = split_block (bb, gsi_stmt (gsi2));
3918 basic_block bb2 = e->dest;
3919
3920 /* Split block after store. */
3921 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3922 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3923 basic_block bb3 = e2->dest;
3924
3925 gimple *cond
3926 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3927 NULL_TREE, NULL_TREE);
3928 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3929
3930 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3931 /* FIXME: What is the probability? */
3932 e3->probability = profile_probability::guessed_never ();
3933 e->flags = EDGE_TRUE_VALUE;
3934
3935 tree vdef = gimple_vdef (stmt);
3936 tree vuse = gimple_vuse (stmt);
3937
3938 tree phi_res = copy_ssa_name (vdef);
3939 gphi *new_phi = create_phi_node (phi_res, bb3);
3940 replace_uses_by (vdef, phi_res);
3941 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3942 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3943
3944 /* Update gsi to point to next stmt. */
3945 bb = bb3;
3946 gsi = gsi_start_bb (bb);
3947 }
3948 }
3949 }
3950
3951 return changed;
3952 }
3953
3954 /* Return true if the statements before and after the LOOP can be executed in
3955 parallel with the function containing the loop. Resolve conflicting stores
3956 outside LOOP by guarding them such that only a single gang executes them. */
3957
3958 static bool
3959 oacc_entry_exit_ok (class loop *loop,
3960 reduction_info_table_type *reduction_list)
3961 {
3962 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3963 vec<basic_block> region_bbs
3964 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3965
3966 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3967 bitmap_clear (in_loop_bbs);
3968 for (unsigned int i = 0; i < loop->num_nodes; i++)
3969 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3970
3971 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3972 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3973 reduction_stores);
3974
3975 if (res)
3976 {
3977 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3978 reduction_stores);
3979 if (changed)
3980 {
3981 free_dominance_info (CDI_DOMINATORS);
3982 calculate_dominance_info (CDI_DOMINATORS);
3983 }
3984 }
3985
3986 region_bbs.release ();
3987 free (loop_bbs);
3988
3989 BITMAP_FREE (in_loop_bbs);
3990 BITMAP_FREE (reduction_stores);
3991
3992 return res;
3993 }
3994
3995 /* Detect parallel loops and generate parallel code using libgomp
3996 primitives. Returns true if some loop was parallelized, false
3997 otherwise. */
3998
3999 static bool
4000 parallelize_loops (bool oacc_kernels_p)
4001 {
4002 unsigned n_threads;
4003 bool changed = false;
4004 class loop *loop;
4005 class loop *skip_loop = NULL;
4006 class tree_niter_desc niter_desc;
4007 struct obstack parloop_obstack;
4008 HOST_WIDE_INT estimated;
4009
4010 /* Do not parallelize loops in the functions created by parallelization. */
4011 if (!oacc_kernels_p
4012 && parallelized_function_p (cfun->decl))
4013 return false;
4014
4015 /* Do not parallelize loops in offloaded functions. */
4016 if (!oacc_kernels_p
4017 && oacc_get_fn_attrib (cfun->decl) != NULL)
4018 return false;
4019
4020 if (cfun->has_nonlocal_label)
4021 return false;
4022
4023 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4024 the argument to -ftree-parallelize-loops. */
4025 if (oacc_kernels_p)
4026 n_threads = 0;
4027 else
4028 n_threads = flag_tree_parallelize_loops;
4029
4030 gcc_obstack_init (&parloop_obstack);
4031 reduction_info_table_type reduction_list (10);
4032
4033 calculate_dominance_info (CDI_DOMINATORS);
4034
4035 FOR_EACH_LOOP (loop, 0)
4036 {
4037 if (loop == skip_loop)
4038 {
4039 if (!loop->in_oacc_kernels_region
4040 && dump_file && (dump_flags & TDF_DETAILS))
4041 fprintf (dump_file,
4042 "Skipping loop %d as inner loop of parallelized loop\n",
4043 loop->num);
4044
4045 skip_loop = loop->inner;
4046 continue;
4047 }
4048 else
4049 skip_loop = NULL;
4050
4051 reduction_list.empty ();
4052
4053 if (oacc_kernels_p)
4054 {
4055 if (!loop->in_oacc_kernels_region)
4056 continue;
4057
4058 /* Don't try to parallelize inner loops in an oacc kernels region. */
4059 if (loop->inner)
4060 skip_loop = loop->inner;
4061
4062 if (dump_file && (dump_flags & TDF_DETAILS))
4063 fprintf (dump_file,
4064 "Trying loop %d with header bb %d in oacc kernels"
4065 " region\n", loop->num, loop->header->index);
4066 }
4067
4068 if (dump_file && (dump_flags & TDF_DETAILS))
4069 {
4070 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4071 if (loop->inner)
4072 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4073 else
4074 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4075 }
4076
4077 if (!single_dom_exit (loop))
4078 {
4079
4080 if (dump_file && (dump_flags & TDF_DETAILS))
4081 fprintf (dump_file, "loop is !single_dom_exit\n");
4082
4083 continue;
4084 }
4085
4086 if (/* And of course, the loop must be parallelizable. */
4087 !can_duplicate_loop_p (loop)
4088 || loop_has_blocks_with_irreducible_flag (loop)
4089 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
4090 /* FIXME: the check for vector phi nodes could be removed. */
4091 || loop_has_vector_phi_nodes (loop))
4092 continue;
4093
4094 estimated = estimated_loop_iterations_int (loop);
4095 if (estimated == -1)
4096 estimated = get_likely_max_loop_iterations_int (loop);
4097 /* FIXME: Bypass this check as graphite doesn't update the
4098 count and frequency correctly now. */
4099 if (!flag_loop_parallelize_all
4100 && !oacc_kernels_p
4101 && ((estimated != -1
4102 && (estimated
4103 < ((HOST_WIDE_INT) n_threads
4104 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
4105 /* Do not bother with loops in cold areas. */
4106 || optimize_loop_nest_for_size_p (loop)))
4107 continue;
4108
4109 if (!try_get_loop_niter (loop, &niter_desc))
4110 continue;
4111
4112 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
4113 continue;
4114
4115 if (loop_has_phi_with_address_arg (loop))
4116 continue;
4117
4118 if (!loop->can_be_parallel
4119 && !loop_parallel_p (loop, &parloop_obstack))
4120 continue;
4121
4122 if (oacc_kernels_p
4123 && !oacc_entry_exit_ok (loop, &reduction_list))
4124 {
4125 if (dump_file)
4126 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4127 continue;
4128 }
4129
4130 changed = true;
4131 skip_loop = loop->inner;
4132
4133 if (dump_enabled_p ())
4134 {
4135 dump_user_location_t loop_loc = find_loop_location (loop);
4136 if (loop->inner)
4137 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4138 "parallelizing outer loop %d\n", loop->num);
4139 else
4140 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4141 "parallelizing inner loop %d\n", loop->num);
4142 }
4143
4144 gen_parallel_loop (loop, &reduction_list,
4145 n_threads, &niter_desc, oacc_kernels_p);
4146 }
4147
4148 obstack_free (&parloop_obstack, NULL);
4149
4150 /* Parallelization will cause new function calls to be inserted through
4151 which local variables will escape. Reset the points-to solution
4152 for ESCAPED. */
4153 if (changed)
4154 pt_solution_reset (&cfun->gimple_df->escaped);
4155
4156 return changed;
4157 }
4158
4159 /* Parallelization. */
4160
4161 namespace {
4162
4163 const pass_data pass_data_parallelize_loops =
4164 {
4165 GIMPLE_PASS, /* type */
4166 "parloops", /* name */
4167 OPTGROUP_LOOP, /* optinfo_flags */
4168 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4169 ( PROP_cfg | PROP_ssa ), /* properties_required */
4170 0, /* properties_provided */
4171 0, /* properties_destroyed */
4172 0, /* todo_flags_start */
4173 0, /* todo_flags_finish */
4174 };
4175
4176 class pass_parallelize_loops : public gimple_opt_pass
4177 {
4178 public:
4179 pass_parallelize_loops (gcc::context *ctxt)
4180 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4181 oacc_kernels_p (false)
4182 {}
4183
4184 /* opt_pass methods: */
4185 virtual bool gate (function *)
4186 {
4187 if (oacc_kernels_p)
4188 return flag_openacc;
4189 else
4190 return flag_tree_parallelize_loops > 1;
4191 }
4192 virtual unsigned int execute (function *);
4193 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
4194 void set_pass_param (unsigned int n, bool param)
4195 {
4196 gcc_assert (n == 0);
4197 oacc_kernels_p = param;
4198 }
4199
4200 private:
4201 bool oacc_kernels_p;
4202 }; // class pass_parallelize_loops
4203
4204 unsigned
4205 pass_parallelize_loops::execute (function *fun)
4206 {
4207 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4208 if (nthreads == NULL_TREE)
4209 return 0;
4210
4211 bool in_loop_pipeline = scev_initialized_p ();
4212 if (!in_loop_pipeline)
4213 loop_optimizer_init (LOOPS_NORMAL
4214 | LOOPS_HAVE_RECORDED_EXITS);
4215
4216 if (number_of_loops (fun) <= 1)
4217 return 0;
4218
4219 if (!in_loop_pipeline)
4220 {
4221 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4222 scev_initialize ();
4223 }
4224
4225 unsigned int todo = 0;
4226 if (parallelize_loops (oacc_kernels_p))
4227 {
4228 fun->curr_properties &= ~(PROP_gimple_eomp);
4229
4230 checking_verify_loop_structure ();
4231
4232 todo |= TODO_update_ssa;
4233 }
4234
4235 if (!in_loop_pipeline)
4236 {
4237 scev_finalize ();
4238 loop_optimizer_finalize ();
4239 }
4240
4241 return todo;
4242 }
4243
4244 } // anon namespace
4245
4246 gimple_opt_pass *
4247 make_pass_parallelize_loops (gcc::context *ctxt)
4248 {
4249 return new pass_parallelize_loops (ctxt);
4250 }