]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
99dee823 2 Copyright (C) 2006-2021 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "cgraph.h"
32#include "gimple-pretty-print.h"
c7131fb2 33#include "fold-const.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
5be5c238 37#include "gimple-walk.h"
d8a2d370
DN
38#include "stor-layout.h"
39#include "tree-nested.h"
442b4905 40#include "tree-cfg.h"
e28030cf
AM
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
442b4905
AM
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
5f40b3cb 46#include "cfgloop.h"
1bd6497c 47#include "tree-scalar-evolution.h"
5f40b3cb 48#include "langhooks.h"
a509ebb5 49#include "tree-vectorizer.h"
4a8fb1a1 50#include "tree-hasher.h"
c1bf2a39 51#include "tree-parloops.h"
629b3d75 52#include "omp-general.h"
0645c1a2 53#include "omp-low.h"
7c82d827 54#include "tree-ssa.h"
61d9c527
TV
55#include "tree-ssa-alias.h"
56#include "tree-eh.h"
57#include "gomp-constants.h"
58#include "tree-dfa.h"
314e6352
ML
59#include "stringpool.h"
60#include "attribs.h"
5f40b3cb
ZD
61
62/* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
b8698a0f 68
5f40b3cb
ZD
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
726a989a
RB
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
79
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
70837b71
RL
84 -- handling of common reduction patterns for outer loops.
85
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 87/*
a509ebb5 88 Reduction handling:
31de92e3
RB
89 currently we use code inspired by vect_force_simple_reduction to detect
90 reduction patterns.
a509ebb5 91 The code transformation will be introduced by an example.
b8698a0f
L
92
93
a509ebb5
RL
94parloop
95{
96 int sum=1;
97
0eb7e7aa 98 for (i = 0; i < N; i++)
a509ebb5
RL
99 {
100 x[i] = i + 3;
101 sum+=x[i];
102 }
103}
104
0eb7e7aa 105gimple-like code:
a509ebb5
RL
106header_bb:
107
0eb7e7aa
RL
108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
116
a509ebb5
RL
117
118exit_bb:
119
0eb7e7aa
RL
120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
a509ebb5
RL
122
123
124after reduction transformation (only relevant parts):
125
126parloop
127{
128
129....
130
0eb7e7aa 131
fa10beec 132 # Storing the initial value given by the user. #
0eb7e7aa 133
ae0bce62 134 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
135
136 #pragma omp parallel num_threads(4)
a509ebb5 137
0eb7e7aa 138 #pragma omp for schedule(static)
ae0bce62
RL
139
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
143
144 # sum.27_29 = PHI <sum.27_11, 0>
145
0eb7e7aa 146 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 147
726a989a 148 GIMPLE_OMP_CONTINUE
a509ebb5 149
0eb7e7aa
RL
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 152 GIMPLE_OMP_RETURN
b8698a0f
L
153
154 # Creating the atomic operation is done at
0eb7e7aa 155 create_call_for_reduction_1() #
a509ebb5 156
0eb7e7aa
RL
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
b8698a0f 161
726a989a 162 GIMPLE_OMP_RETURN
b8698a0f 163
0eb7e7aa
RL
164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
ae0bce62
RL
166 The value computed by the threads is loaded from the
167 shared struct. #
168
b8698a0f 169
0eb7e7aa 170 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 171 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
172 sum_43 = D.1795_41 + sum_37;
173
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
177
178...
179
a509ebb5
RL
180}
181
182*/
183
31de92e3
RB
184/* Error reporting helper for parloops_is_simple_reduction below. GIMPLE
185 statement STMT is printed with a message MSG. */
186
187static void
188report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
189{
190 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
191}
192
193/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
194 operation. Return true if the results of DEF_STMT_INFO are something
195 that can be accumulated by such a reduction. */
196
197static bool
198parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
199{
200 return (is_gimple_assign (def_stmt_info->stmt)
201 || is_gimple_call (def_stmt_info->stmt)
202 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
203 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
204 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
205 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
206}
207
208/* Detect SLP reduction of the form:
209
210 #a1 = phi <a5, a0>
211 a2 = operation (a1)
212 a3 = operation (a2)
213 a4 = operation (a3)
214 a5 = operation (a4)
215
216 #a = phi <a5>
217
218 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
219 FIRST_STMT is the first reduction stmt in the chain
220 (a2 = operation (a1)).
221
222 Return TRUE if a reduction chain was detected. */
223
224static bool
225parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
226 gimple *first_stmt)
227{
228 class loop *loop = (gimple_bb (phi))->loop_father;
229 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
230 enum tree_code code;
231 gimple *loop_use_stmt = NULL;
232 stmt_vec_info use_stmt_info;
233 tree lhs;
234 imm_use_iterator imm_iter;
235 use_operand_p use_p;
236 int nloop_uses, size = 0, n_out_of_loop_uses;
237 bool found = false;
238
239 if (loop != vect_loop)
240 return false;
241
242 auto_vec<stmt_vec_info, 8> reduc_chain;
243 lhs = PHI_RESULT (phi);
244 code = gimple_assign_rhs_code (first_stmt);
245 while (1)
246 {
247 nloop_uses = 0;
248 n_out_of_loop_uses = 0;
249 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
250 {
251 gimple *use_stmt = USE_STMT (use_p);
252 if (is_gimple_debug (use_stmt))
253 continue;
254
255 /* Check if we got back to the reduction phi. */
256 if (use_stmt == phi)
257 {
258 loop_use_stmt = use_stmt;
259 found = true;
260 break;
261 }
262
263 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
264 {
265 loop_use_stmt = use_stmt;
266 nloop_uses++;
267 }
268 else
269 n_out_of_loop_uses++;
270
271 /* There are can be either a single use in the loop or two uses in
272 phi nodes. */
273 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
274 return false;
275 }
276
277 if (found)
278 break;
279
280 /* We reached a statement with no loop uses. */
281 if (nloop_uses == 0)
282 return false;
283
284 /* This is a loop exit phi, and we haven't reached the reduction phi. */
285 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
286 return false;
287
288 if (!is_gimple_assign (loop_use_stmt)
289 || code != gimple_assign_rhs_code (loop_use_stmt)
290 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
291 return false;
292
293 /* Insert USE_STMT into reduction chain. */
294 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
295 reduc_chain.safe_push (use_stmt_info);
296
297 lhs = gimple_assign_lhs (loop_use_stmt);
298 size++;
299 }
300
301 if (!found || loop_use_stmt != phi || size < 2)
302 return false;
303
304 /* Swap the operands, if needed, to make the reduction operand be the second
305 operand. */
306 lhs = PHI_RESULT (phi);
307 for (unsigned i = 0; i < reduc_chain.length (); ++i)
308 {
309 gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
310 if (gimple_assign_rhs2 (next_stmt) == lhs)
311 {
312 tree op = gimple_assign_rhs1 (next_stmt);
313 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
314
315 /* Check that the other def is either defined in the loop
316 ("vect_internal_def"), or it's an induction (defined by a
317 loop-header phi-node). */
318 if (def_stmt_info
319 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
320 && parloops_valid_reduction_input_p (def_stmt_info))
321 {
322 lhs = gimple_assign_lhs (next_stmt);
323 continue;
324 }
325
326 return false;
327 }
328 else
329 {
330 tree op = gimple_assign_rhs2 (next_stmt);
331 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
332
333 /* Check that the other def is either defined in the loop
334 ("vect_internal_def"), or it's an induction (defined by a
335 loop-header phi-node). */
336 if (def_stmt_info
337 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
338 && parloops_valid_reduction_input_p (def_stmt_info))
339 {
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
342 next_stmt);
343
344 swap_ssa_operands (next_stmt,
345 gimple_assign_rhs1_ptr (next_stmt),
346 gimple_assign_rhs2_ptr (next_stmt));
347 update_stmt (next_stmt);
31de92e3
RB
348 }
349 else
350 return false;
351 }
352
353 lhs = gimple_assign_lhs (next_stmt);
354 }
355
356 /* Build up the actual chain. */
357 for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
358 {
359 REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
360 REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
361 }
362 REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
363 REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
364
365 /* Save the chain for further analysis in SLP detection. */
366 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
367 REDUC_GROUP_SIZE (reduc_chain[0]) = size;
368
369 return true;
370}
371
372/* Return true if we need an in-order reduction for operation CODE
373 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
374 overflow must wrap. */
375
376static bool
377parloops_needs_fold_left_reduction_p (tree type, tree_code code,
378 bool need_wrapping_integral_overflow)
379{
380 /* CHECKME: check for !flag_finite_math_only too? */
381 if (SCALAR_FLOAT_TYPE_P (type))
382 switch (code)
383 {
384 case MIN_EXPR:
385 case MAX_EXPR:
386 return false;
387
388 default:
389 return !flag_associative_math;
390 }
391
392 if (INTEGRAL_TYPE_P (type))
393 {
394 if (!operation_no_trapping_overflow (type, code))
395 return true;
396 if (need_wrapping_integral_overflow
397 && !TYPE_OVERFLOW_WRAPS (type)
398 && operation_can_overflow (code))
399 return true;
400 return false;
401 }
402
403 if (SAT_FIXED_POINT_TYPE_P (type))
404 return true;
405
406 return false;
407}
408
409
410/* Function parloops_is_simple_reduction
411
412 (1) Detect a cross-iteration def-use cycle that represents a simple
413 reduction computation. We look for the following pattern:
414
415 loop_header:
416 a1 = phi < a0, a2 >
417 a3 = ...
418 a2 = operation (a3, a1)
419
420 or
421
422 a3 = ...
423 loop_header:
424 a1 = phi < a0, a2 >
425 a2 = operation (a3, a1)
426
427 such that:
428 1. operation is commutative and associative and it is safe to
429 change the order of the computation
430 2. no uses for a2 in the loop (a2 is used out of the loop)
431 3. no uses of a1 in the loop besides the reduction operation
432 4. no uses of a1 outside the loop.
433
434 Conditions 1,4 are tested here.
435 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
436
437 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
438 nested cycles.
439
440 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
441 reductions:
442
443 a1 = phi < a0, a2 >
444 inner loop (def of a3)
445 a2 = phi < a3 >
446
447 (4) Detect condition expressions, ie:
448 for (int i = 0; i < N; i++)
449 if (a[i] < val)
450 ret_val = a[i];
451
452*/
453
454static stmt_vec_info
455parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
456 bool *double_reduc,
457 bool need_wrapping_integral_overflow,
458 enum vect_reduction_type *v_reduc_type)
459{
460 gphi *phi = as_a <gphi *> (phi_info->stmt);
461 class loop *loop = (gimple_bb (phi))->loop_father;
462 class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
463 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
464 gimple *phi_use_stmt = NULL;
465 enum tree_code orig_code, code;
466 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
467 tree type;
468 tree name;
469 imm_use_iterator imm_iter;
470 use_operand_p use_p;
471 bool phi_def;
472
473 *double_reduc = false;
474 *v_reduc_type = TREE_CODE_REDUCTION;
475
476 tree phi_name = PHI_RESULT (phi);
477 /* ??? If there are no uses of the PHI result the inner loop reduction
478 won't be detected as possibly double-reduction by vectorizable_reduction
479 because that tries to walk the PHI arg from the preheader edge which
480 can be constant. See PR60382. */
481 if (has_zero_uses (phi_name))
482 return NULL;
483 unsigned nphi_def_loop_uses = 0;
484 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
485 {
486 gimple *use_stmt = USE_STMT (use_p);
487 if (is_gimple_debug (use_stmt))
488 continue;
489
490 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
491 {
492 if (dump_enabled_p ())
493 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
494 "intermediate value used outside loop.\n");
495
496 return NULL;
497 }
498
499 nphi_def_loop_uses++;
500 phi_use_stmt = use_stmt;
501 }
502
503 edge latch_e = loop_latch_edge (loop);
504 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
505 if (TREE_CODE (loop_arg) != SSA_NAME)
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "reduction: not ssa_name: %T\n", loop_arg);
510 return NULL;
511 }
512
513 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
514 if (!def_stmt_info
515 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
516 return NULL;
517
518 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
519 {
520 name = gimple_assign_lhs (def_stmt);
521 phi_def = false;
522 }
523 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
524 {
525 name = PHI_RESULT (def_stmt);
526 phi_def = true;
527 }
528 else
529 {
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "reduction: unhandled reduction operation: %G",
533 def_stmt_info->stmt);
534 return NULL;
535 }
536
537 unsigned nlatch_def_loop_uses = 0;
538 auto_vec<gphi *, 3> lcphis;
539 bool inner_loop_of_double_reduc = false;
540 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
541 {
542 gimple *use_stmt = USE_STMT (use_p);
543 if (is_gimple_debug (use_stmt))
544 continue;
545 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
546 nlatch_def_loop_uses++;
547 else
548 {
549 /* We can have more than one loop-closed PHI. */
550 lcphis.safe_push (as_a <gphi *> (use_stmt));
551 if (nested_in_vect_loop
552 && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
553 == vect_double_reduction_def))
554 inner_loop_of_double_reduc = true;
555 }
556 }
557
558 /* If this isn't a nested cycle or if the nested cycle reduction value
559 is used ouside of the inner loop we cannot handle uses of the reduction
560 value. */
561 if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
562 && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
563 {
564 if (dump_enabled_p ())
565 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
566 "reduction used in loop.\n");
567 return NULL;
568 }
569
570 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
571 defined in the inner loop. */
572 if (phi_def)
573 {
574 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
575 op1 = PHI_ARG_DEF (def_stmt, 0);
576
577 if (gimple_phi_num_args (def_stmt) != 1
578 || TREE_CODE (op1) != SSA_NAME)
579 {
580 if (dump_enabled_p ())
581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
582 "unsupported phi node definition.\n");
583
584 return NULL;
585 }
586
587 gimple *def1 = SSA_NAME_DEF_STMT (op1);
588 if (gimple_bb (def1)
589 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
590 && loop->inner
591 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
592 && is_gimple_assign (def1)
593 && is_a <gphi *> (phi_use_stmt)
594 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
595 {
596 if (dump_enabled_p ())
597 report_ploop_op (MSG_NOTE, def_stmt,
598 "detected double reduction: ");
599
600 *double_reduc = true;
601 return def_stmt_info;
602 }
603
604 return NULL;
605 }
606
607 /* If we are vectorizing an inner reduction we are executing that
608 in the original order only in case we are not dealing with a
609 double reduction. */
610 bool check_reduction = true;
611 if (flow_loop_nested_p (vect_loop, loop))
612 {
613 gphi *lcphi;
614 unsigned i;
615 check_reduction = false;
616 FOR_EACH_VEC_ELT (lcphis, i, lcphi)
617 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
618 {
619 gimple *use_stmt = USE_STMT (use_p);
620 if (is_gimple_debug (use_stmt))
621 continue;
622 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
623 check_reduction = true;
624 }
625 }
626
627 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
628 code = orig_code = gimple_assign_rhs_code (def_stmt);
629
630 if (nested_in_vect_loop && !check_reduction)
631 {
632 /* FIXME: Even for non-reductions code generation is funneled
633 through vectorizable_reduction for the stmt defining the
634 PHI latch value. So we have to artificially restrict ourselves
635 for the supported operations. */
636 switch (get_gimple_rhs_class (code))
637 {
638 case GIMPLE_BINARY_RHS:
639 case GIMPLE_TERNARY_RHS:
640 break;
641 default:
642 /* Not supported by vectorizable_reduction. */
643 if (dump_enabled_p ())
644 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
645 "nested cycle: not handled operation: ");
646 return NULL;
647 }
648 if (dump_enabled_p ())
649 report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
650 return def_stmt_info;
651 }
652
653 /* We can handle "res -= x[i]", which is non-associative by
654 simply rewriting this into "res += -x[i]". Avoid changing
655 gimple instruction for the first simple tests and only do this
656 if we're allowed to change code at all. */
657 if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
658 code = PLUS_EXPR;
659
660 if (code == COND_EXPR)
661 {
662 if (! nested_in_vect_loop)
663 *v_reduc_type = COND_REDUCTION;
664
665 op3 = gimple_assign_rhs1 (def_stmt);
666 if (COMPARISON_CLASS_P (op3))
667 {
668 op4 = TREE_OPERAND (op3, 1);
669 op3 = TREE_OPERAND (op3, 0);
670 }
671 if (op3 == phi_name || op4 == phi_name)
672 {
673 if (dump_enabled_p ())
674 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
675 "reduction: condition depends on previous"
676 " iteration: ");
677 return NULL;
678 }
679
680 op1 = gimple_assign_rhs2 (def_stmt);
681 op2 = gimple_assign_rhs3 (def_stmt);
682 }
683 else if (!commutative_tree_code (code) || !associative_tree_code (code))
684 {
685 if (dump_enabled_p ())
686 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
687 "reduction: not commutative/associative: ");
688 return NULL;
689 }
690 else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
691 {
692 op1 = gimple_assign_rhs1 (def_stmt);
693 op2 = gimple_assign_rhs2 (def_stmt);
694 }
695 else
696 {
697 if (dump_enabled_p ())
698 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
699 "reduction: not handled operation: ");
700 return NULL;
701 }
702
703 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
704 {
705 if (dump_enabled_p ())
706 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
707 "reduction: both uses not ssa_names: ");
708
709 return NULL;
710 }
711
712 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
713 if ((TREE_CODE (op1) == SSA_NAME
714 && !types_compatible_p (type,TREE_TYPE (op1)))
715 || (TREE_CODE (op2) == SSA_NAME
716 && !types_compatible_p (type, TREE_TYPE (op2)))
717 || (op3 && TREE_CODE (op3) == SSA_NAME
718 && !types_compatible_p (type, TREE_TYPE (op3)))
719 || (op4 && TREE_CODE (op4) == SSA_NAME
720 && !types_compatible_p (type, TREE_TYPE (op4))))
721 {
722 if (dump_enabled_p ())
723 {
724 dump_printf_loc (MSG_NOTE, vect_location,
725 "reduction: multiple types: operation type: "
726 "%T, operands types: %T,%T",
727 type, TREE_TYPE (op1), TREE_TYPE (op2));
728 if (op3)
729 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
730
731 if (op4)
732 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
733 dump_printf (MSG_NOTE, "\n");
734 }
735
736 return NULL;
737 }
738
739 /* Check whether it's ok to change the order of the computation.
740 Generally, when vectorizing a reduction we change the order of the
741 computation. This may change the behavior of the program in some
742 cases, so we need to check that this is ok. One exception is when
743 vectorizing an outer-loop: the inner-loop is executed sequentially,
744 and therefore vectorizing reductions in the inner-loop during
745 outer-loop vectorization is safe. */
746 if (check_reduction
747 && *v_reduc_type == TREE_CODE_REDUCTION
748 && parloops_needs_fold_left_reduction_p (type, code,
749 need_wrapping_integral_overflow))
750 *v_reduc_type = FOLD_LEFT_REDUCTION;
751
752 /* Reduction is safe. We're dealing with one of the following:
753 1) integer arithmetic and no trapv
754 2) floating point arithmetic, and special flags permit this optimization
755 3) nested cycle (i.e., outer loop vectorization). */
756 stmt_vec_info def1_info = loop_info->lookup_def (op1);
757 stmt_vec_info def2_info = loop_info->lookup_def (op2);
758 if (code != COND_EXPR && !def1_info && !def2_info)
759 {
760 if (dump_enabled_p ())
761 report_ploop_op (MSG_NOTE, def_stmt,
762 "reduction: no defs for operands: ");
763 return NULL;
764 }
765
766 /* Check that one def is the reduction def, defined by PHI,
767 the other def is either defined in the loop ("vect_internal_def"),
768 or it's an induction (defined by a loop-header phi-node). */
769
770 if (def2_info
771 && def2_info->stmt == phi
772 && (code == COND_EXPR
773 || !def1_info
774 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
775 || parloops_valid_reduction_input_p (def1_info)))
776 {
777 if (dump_enabled_p ())
778 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
779 return def_stmt_info;
780 }
781
782 if (def1_info
783 && def1_info->stmt == phi
784 && (code == COND_EXPR
785 || !def2_info
786 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
787 || parloops_valid_reduction_input_p (def2_info)))
788 {
789 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
790 {
791 /* Check if we can swap operands (just for simplicity - so that
792 the rest of the code can assume that the reduction variable
793 is always the last (second) argument). */
794 if (code == COND_EXPR)
795 {
796 /* Swap cond_expr by inverting the condition. */
797 tree cond_expr = gimple_assign_rhs1 (def_stmt);
798 enum tree_code invert_code = ERROR_MARK;
799 enum tree_code cond_code = TREE_CODE (cond_expr);
800
801 if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
802 {
803 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
804 invert_code = invert_tree_comparison (cond_code, honor_nans);
805 }
806 if (invert_code != ERROR_MARK)
807 {
808 TREE_SET_CODE (cond_expr, invert_code);
809 swap_ssa_operands (def_stmt,
810 gimple_assign_rhs2_ptr (def_stmt),
811 gimple_assign_rhs3_ptr (def_stmt));
812 }
813 else
814 {
815 if (dump_enabled_p ())
816 report_ploop_op (MSG_NOTE, def_stmt,
817 "detected reduction: cannot swap operands "
818 "for cond_expr");
819 return NULL;
820 }
821 }
822 else
823 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
824 gimple_assign_rhs2_ptr (def_stmt));
825
826 if (dump_enabled_p ())
827 report_ploop_op (MSG_NOTE, def_stmt,
828 "detected reduction: need to swap operands: ");
31de92e3
RB
829 }
830 else
831 {
832 if (dump_enabled_p ())
833 report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
834 }
835
836 return def_stmt_info;
837 }
838
839 /* Try to find SLP reduction chain. */
840 if (! nested_in_vect_loop
841 && code != COND_EXPR
842 && orig_code != MINUS_EXPR
843 && parloops_is_slp_reduction (loop_info, phi, def_stmt))
844 {
845 if (dump_enabled_p ())
846 report_ploop_op (MSG_NOTE, def_stmt,
847 "reduction: detected reduction chain: ");
848
849 return def_stmt_info;
850 }
851
852 /* Look for the expression computing loop_arg from loop PHI result. */
853 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
854 return def_stmt_info;
855
856 if (dump_enabled_p ())
857 {
858 report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
859 "reduction: unknown pattern: ");
860 }
861
862 return NULL;
863}
864
865/* Wrapper around vect_is_simple_reduction, which will modify code
866 in-place if it enables detection of more reductions. Arguments
867 as there. */
868
869stmt_vec_info
870parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
871 bool *double_reduc,
872 bool need_wrapping_integral_overflow)
873{
874 enum vect_reduction_type v_reduc_type;
875 stmt_vec_info def_info
876 = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
877 need_wrapping_integral_overflow,
878 &v_reduc_type);
879 if (def_info)
880 {
881 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
882 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
883 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
884 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
885 }
886 return def_info;
887}
888
5f40b3cb
ZD
889/* Minimal number of iterations of a loop that should be executed in each
890 thread. */
028d4092 891#define MIN_PER_THREAD param_parloops_min_per_thread
5f40b3cb 892
b8698a0f 893/* Element of the hashtable, representing a
a509ebb5
RL
894 reduction in the current loop. */
895struct reduction_info
896{
355fe088
TS
897 gimple *reduc_stmt; /* reduction statement. */
898 gimple *reduc_phi; /* The phi node defining the reduction. */
726a989a 899 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
900 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
901 result. */
538dd0b7 902 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 903 of the reduction variable when existing the loop. */
ae0bce62 904 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 905 tree field; /* the name of the field in the parloop data structure intended for reduction. */
61d9c527
TV
906 tree reduc_addr; /* The address of the reduction variable for
907 openacc reductions. */
a509ebb5 908 tree init; /* reduction initialization value. */
538dd0b7 909 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
910 will be passed to the atomic operation. Represents
911 the local result each thread computed for the reduction
912 operation. */
913};
914
4a8fb1a1 915/* Reduction info hashtable helpers. */
a509ebb5 916
95fbe13e 917struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 918{
67f58944
TS
919 static inline hashval_t hash (const reduction_info *);
920 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
921};
922
923/* Equality and hash functions for hashtab code. */
a509ebb5 924
4a8fb1a1 925inline bool
67f58944 926reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 927{
a509ebb5
RL
928 return (a->reduc_phi == b->reduc_phi);
929}
930
4a8fb1a1 931inline hashval_t
67f58944 932reduction_hasher::hash (const reduction_info *a)
a509ebb5 933{
5d1fd1de 934 return a->reduc_version;
a509ebb5
RL
935}
936
c203e8a7 937typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
938
939
a509ebb5 940static struct reduction_info *
355fe088 941reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
a509ebb5
RL
942{
943 struct reduction_info tmpred, *red;
944
b119c055 945 if (reduction_list->is_empty () || phi == NULL)
a509ebb5
RL
946 return NULL;
947
fdce493d
TV
948 if (gimple_uid (phi) == (unsigned int)-1
949 || gimple_uid (phi) == 0)
950 return NULL;
951
a509ebb5 952 tmpred.reduc_phi = phi;
5d1fd1de 953 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 954 red = reduction_list->find (&tmpred);
fdce493d 955 gcc_assert (red == NULL || red->reduc_phi == phi);
a509ebb5
RL
956
957 return red;
958}
959
5f40b3cb
ZD
960/* Element of hashtable of names to copy. */
961
962struct name_to_copy_elt
963{
964 unsigned version; /* The version of the name to copy. */
965 tree new_name; /* The new name used in the copy. */
966 tree field; /* The field of the structure used to pass the
967 value. */
968};
969
4a8fb1a1 970/* Name copies hashtable helpers. */
5f40b3cb 971
95fbe13e 972struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 973{
67f58944
TS
974 static inline hashval_t hash (const name_to_copy_elt *);
975 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
976};
977
978/* Equality and hash functions for hashtab code. */
5f40b3cb 979
4a8fb1a1 980inline bool
67f58944 981name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 982{
5f40b3cb
ZD
983 return a->version == b->version;
984}
985
4a8fb1a1 986inline hashval_t
67f58944 987name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 988{
5f40b3cb
ZD
989 return (hashval_t) a->version;
990}
991
c203e8a7 992typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 993
b305e3da
SP
994/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
995 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
996 represents the denominator for every element in the matrix. */
997typedef struct lambda_trans_matrix_s
998{
999 lambda_matrix matrix;
1000 int rowsize;
1001 int colsize;
1002 int denominator;
1003} *lambda_trans_matrix;
1004#define LTM_MATRIX(T) ((T)->matrix)
1005#define LTM_ROWSIZE(T) ((T)->rowsize)
1006#define LTM_COLSIZE(T) ((T)->colsize)
1007#define LTM_DENOMINATOR(T) ((T)->denominator)
1008
1009/* Allocate a new transformation matrix. */
1010
1011static lambda_trans_matrix
1012lambda_trans_matrix_new (int colsize, int rowsize,
1013 struct obstack * lambda_obstack)
1014{
1015 lambda_trans_matrix ret;
1016
1017 ret = (lambda_trans_matrix)
1018 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
1019 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
1020 LTM_ROWSIZE (ret) = rowsize;
1021 LTM_COLSIZE (ret) = colsize;
1022 LTM_DENOMINATOR (ret) = 1;
1023 return ret;
1024}
1025
1026/* Multiply a vector VEC by a matrix MAT.
1027 MAT is an M*N matrix, and VEC is a vector with length N. The result
1028 is stored in DEST which must be a vector of length M. */
1029
1030static void
1031lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
1032 lambda_vector vec, lambda_vector dest)
1033{
1034 int i, j;
1035
1036 lambda_vector_clear (dest, m);
1037 for (i = 0; i < m; i++)
1038 for (j = 0; j < n; j++)
1039 dest[i] += matrix[i][j] * vec[j];
1040}
1041
1042/* Return true if TRANS is a legal transformation matrix that respects
1043 the dependence vectors in DISTS and DIRS. The conservative answer
1044 is false.
1045
1046 "Wolfe proves that a unimodular transformation represented by the
1047 matrix T is legal when applied to a loop nest with a set of
1048 lexicographically non-negative distance vectors RDG if and only if
1049 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
1050 i.e.: if and only if it transforms the lexicographically positive
1051 distance vectors to lexicographically positive vectors. Note that
1052 a unimodular matrix must transform the zero vector (and only it) to
1053 the zero vector." S.Muchnick. */
1054
1055static bool
1056lambda_transform_legal_p (lambda_trans_matrix trans,
1057 int nb_loops,
9771b263 1058 vec<ddr_p> dependence_relations)
b305e3da
SP
1059{
1060 unsigned int i, j;
1061 lambda_vector distres;
1062 struct data_dependence_relation *ddr;
1063
1064 gcc_assert (LTM_COLSIZE (trans) == nb_loops
1065 && LTM_ROWSIZE (trans) == nb_loops);
1066
1067 /* When there are no dependences, the transformation is correct. */
9771b263 1068 if (dependence_relations.length () == 0)
b305e3da
SP
1069 return true;
1070
9771b263 1071 ddr = dependence_relations[0];
b305e3da
SP
1072 if (ddr == NULL)
1073 return true;
1074
1075 /* When there is an unknown relation in the dependence_relations, we
1076 know that it is no worth looking at this loop nest: give up. */
1077 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1078 return false;
1079
1080 distres = lambda_vector_new (nb_loops);
1081
1082 /* For each distance vector in the dependence graph. */
9771b263 1083 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
1084 {
1085 /* Don't care about relations for which we know that there is no
1086 dependence, nor about read-read (aka. output-dependences):
1087 these data accesses can happen in any order. */
1088 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
1089 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
1090 continue;
1091
1092 /* Conservatively answer: "this transformation is not valid". */
1093 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1094 return false;
1095
1096 /* If the dependence could not be captured by a distance vector,
1097 conservatively answer that the transform is not valid. */
1098 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1099 return false;
1100
1101 /* Compute trans.dist_vect */
1102 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
1103 {
1104 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
1105 DDR_DIST_VECT (ddr, j), distres);
1106
1107 if (!lambda_vector_lexico_pos (distres, nb_loops))
1108 return false;
1109 }
1110 }
1111 return true;
1112}
08dab97a
RL
1113
1114/* Data dependency analysis. Returns true if the iterations of LOOP
1115 are independent on each other (that is, if we can execute them
1116 in parallel). */
5f40b3cb
ZD
1117
1118static bool
99b1c316 1119loop_parallel_p (class loop *loop, struct obstack * parloop_obstack)
5f40b3cb 1120{
9771b263
DN
1121 vec<ddr_p> dependence_relations;
1122 vec<data_reference_p> datarefs;
5f40b3cb
ZD
1123 lambda_trans_matrix trans;
1124 bool ret = false;
5f40b3cb
ZD
1125
1126 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
1127 {
1128 fprintf (dump_file, "Considering loop %d\n", loop->num);
1129 if (!loop->inner)
1130 fprintf (dump_file, "loop is innermost\n");
b8698a0f 1131 else
48710229
RL
1132 fprintf (dump_file, "loop NOT innermost\n");
1133 }
5f40b3cb 1134
5f40b3cb
ZD
1135 /* Check for problems with dependences. If the loop can be reversed,
1136 the iterations are independent. */
00f96dc9 1137 auto_vec<loop_p, 3> loop_nest;
9771b263 1138 datarefs.create (10);
07687835 1139 dependence_relations.create (100);
9ca3d00e
AB
1140 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
1141 &dependence_relations))
1142 {
1143 if (dump_file && (dump_flags & TDF_DETAILS))
1144 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
1145 ret = false;
1146 goto end;
1147 }
5f40b3cb
ZD
1148 if (dump_file && (dump_flags & TDF_DETAILS))
1149 dump_data_dependence_relations (dump_file, dependence_relations);
1150
f873b205 1151 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
1152 LTM_MATRIX (trans)[0][0] = -1;
1153
1154 if (lambda_transform_legal_p (trans, 1, dependence_relations))
1155 {
1156 ret = true;
1157 if (dump_file && (dump_flags & TDF_DETAILS))
1158 fprintf (dump_file, " SUCCESS: may be parallelized\n");
1159 }
1160 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
1161 fprintf (dump_file,
1162 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 1163
9ca3d00e 1164 end:
5f40b3cb
ZD
1165 free_dependence_relations (dependence_relations);
1166 free_data_refs (datarefs);
1167
1168 return ret;
1169}
1170
1d4af1e8
SP
1171/* Return true when LOOP contains basic blocks marked with the
1172 BB_IRREDUCIBLE_LOOP flag. */
1173
1174static inline bool
99b1c316 1175loop_has_blocks_with_irreducible_flag (class loop *loop)
1d4af1e8
SP
1176{
1177 unsigned i;
1178 basic_block *bbs = get_loop_body_in_dom_order (loop);
1179 bool res = true;
1180
1181 for (i = 0; i < loop->num_nodes; i++)
1182 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
1183 goto end;
1184
1185 res = false;
1186 end:
1187 free (bbs);
1188 return res;
1189}
1190
8a171a59 1191/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 1192 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 1193 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
1194 be invariant in the whole function. Other needed statements are placed
1195 right before GSI. */
5f40b3cb
ZD
1196
1197static tree
4a8fb1a1 1198take_address_of (tree obj, tree type, edge entry,
c203e8a7 1199 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 1200{
8a171a59 1201 int uid;
83d5977e 1202 tree *var_p, name, addr;
538dd0b7 1203 gassign *stmt;
726a989a 1204 gimple_seq stmts;
5f40b3cb 1205
8a171a59
ZD
1206 /* Since the address of OBJ is invariant, the trees may be shared.
1207 Avoid rewriting unrelated parts of the code. */
1208 obj = unshare_expr (obj);
1209 for (var_p = &obj;
1210 handled_component_p (*var_p);
1211 var_p = &TREE_OPERAND (*var_p, 0))
1212 continue;
8a171a59 1213
c9a410f0
RG
1214 /* Canonicalize the access to base on a MEM_REF. */
1215 if (DECL_P (*var_p))
1216 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
1217
1218 /* Assign a canonical SSA name to the address of the base decl used
1219 in the address and share it for all accesses and addresses based
1220 on it. */
1221 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
1222 int_tree_map elt;
1223 elt.uid = uid;
1224 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
1225 if (!slot->to)
5f40b3cb 1226 {
cba1eb61
JJ
1227 if (gsi == NULL)
1228 return NULL;
c9a410f0 1229 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
1230 const char *obj_name
1231 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
1232 if (obj_name)
1233 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
1234 else
b731b390 1235 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 1236 stmt = gimple_build_assign (name, addr);
726a989a 1237 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 1238
84baa4b9
TS
1239 slot->uid = uid;
1240 slot->to = name;
5f40b3cb 1241 }
8a171a59 1242 else
84baa4b9 1243 name = slot->to;
5f40b3cb 1244
c9a410f0
RG
1245 /* Express the address in terms of the canonical SSA name. */
1246 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
1247 if (gsi == NULL)
1248 return build_fold_addr_expr_with_type (obj, type);
1249
aa00059c 1250 name = force_gimple_operand (build_addr (obj),
c9a410f0
RG
1251 &stmts, true, NULL_TREE);
1252 if (!gimple_seq_empty_p (stmts))
cba1eb61 1253 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 1254
c9a410f0 1255 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 1256 {
726a989a 1257 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 1258 NULL_TREE);
726a989a 1259 if (!gimple_seq_empty_p (stmts))
cba1eb61 1260 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 1261 }
5f40b3cb
ZD
1262
1263 return name;
1264}
1265
12efb1d7 1266static tree
355fe088 1267reduc_stmt_res (gimple *stmt)
12efb1d7
TV
1268{
1269 return (gimple_code (stmt) == GIMPLE_PHI
1270 ? gimple_phi_result (stmt)
1271 : gimple_assign_lhs (stmt));
1272}
1273
a509ebb5 1274/* Callback for htab_traverse. Create the initialization statement
b8698a0f 1275 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
1276 the loop described in DATA. */
1277
4a8fb1a1 1278int
99b1c316 1279initialize_reductions (reduction_info **slot, class loop *loop)
a509ebb5 1280{
f2c9f71d
TS
1281 tree init;
1282 tree type, arg;
a509ebb5
RL
1283 edge e;
1284
4a8fb1a1 1285 struct reduction_info *const reduc = *slot;
a509ebb5 1286
b8698a0f 1287 /* Create initialization in preheader:
a509ebb5
RL
1288 reduction_variable = initialization value of reduction. */
1289
b8698a0f 1290 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
1291 from the preheader with the reduction initialization value. */
1292
f2c9f71d 1293 /* Initialize the reduction. */
a509ebb5 1294 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
f2c9f71d
TS
1295 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
1296 reduc->reduction_code, type);
a509ebb5
RL
1297 reduc->init = init;
1298
b8698a0f
L
1299 /* Replace the argument representing the initialization value
1300 with the initialization value for the reduction (neutral
1301 element for the particular operation, e.g. 0 for PLUS_EXPR,
1302 1 for MULT_EXPR, etc).
1303 Keep the old value in a new variable "reduction_initial",
1304 that will be taken in consideration after the parallel
0eb7e7aa 1305 computing is done. */
a509ebb5
RL
1306
1307 e = loop_preheader_edge (loop);
1308 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
1309 /* Create new variable to hold the initial value. */
a509ebb5 1310
a509ebb5 1311 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 1312 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 1313 reduc->initial_value = arg;
a509ebb5
RL
1314 return 1;
1315}
5f40b3cb
ZD
1316
1317struct elv_data
1318{
726a989a 1319 struct walk_stmt_info info;
9f9f72aa 1320 edge entry;
c203e8a7 1321 int_tree_htab_type *decl_address;
cba1eb61 1322 gimple_stmt_iterator *gsi;
5f40b3cb 1323 bool changed;
cba1eb61 1324 bool reset;
5f40b3cb
ZD
1325};
1326
9f9f72aa
AP
1327/* Eliminates references to local variables in *TP out of the single
1328 entry single exit region starting at DTA->ENTRY.
1329 DECL_ADDRESS contains addresses of the references that had their
1330 address taken already. If the expression is changed, CHANGED is
1331 set to true. Callback for walk_tree. */
a509ebb5 1332
5f40b3cb 1333static tree
8a171a59 1334eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 1335{
3d9a9f94 1336 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 1337 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
1338
1339 if (DECL_P (t))
1340 {
1341 *walk_subtrees = 0;
1342
1343 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
1344 return NULL_TREE;
1345
1346 type = TREE_TYPE (t);
1347 addr_type = build_pointer_type (type);
cba1eb61
JJ
1348 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
1349 dta->gsi);
1350 if (dta->gsi == NULL && addr == NULL_TREE)
1351 {
1352 dta->reset = true;
1353 return NULL_TREE;
1354 }
1355
70f34814 1356 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
1357
1358 dta->changed = true;
1359 return NULL_TREE;
1360 }
1361
1362 if (TREE_CODE (t) == ADDR_EXPR)
1363 {
8a171a59
ZD
1364 /* ADDR_EXPR may appear in two contexts:
1365 -- as a gimple operand, when the address taken is a function invariant
1366 -- as gimple rhs, when the resulting address in not a function
1367 invariant
1368 We do not need to do anything special in the latter case (the base of
1369 the memory reference whose address is taken may be replaced in the
1370 DECL_P case). The former case is more complicated, as we need to
1371 ensure that the new address is still a gimple operand. Thus, it
1372 is not sufficient to replace just the base of the memory reference --
1373 we need to move the whole computation of the address out of the
1374 loop. */
1375 if (!is_gimple_val (t))
5f40b3cb
ZD
1376 return NULL_TREE;
1377
1378 *walk_subtrees = 0;
8a171a59
ZD
1379 obj = TREE_OPERAND (t, 0);
1380 var = get_base_address (obj);
1381 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
1382 return NULL_TREE;
1383
1384 addr_type = TREE_TYPE (t);
cba1eb61
JJ
1385 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
1386 dta->gsi);
1387 if (dta->gsi == NULL && addr == NULL_TREE)
1388 {
1389 dta->reset = true;
1390 return NULL_TREE;
1391 }
5f40b3cb
ZD
1392 *tp = addr;
1393
1394 dta->changed = true;
1395 return NULL_TREE;
1396 }
1397
726a989a 1398 if (!EXPR_P (t))
5f40b3cb
ZD
1399 *walk_subtrees = 0;
1400
1401 return NULL_TREE;
1402}
1403
cba1eb61 1404/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
1405 entry single exit region starting at ENTRY. DECL_ADDRESS contains
1406 addresses of the references that had their address taken
1407 already. */
5f40b3cb
ZD
1408
1409static void
cba1eb61 1410eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 1411 int_tree_htab_type *decl_address)
5f40b3cb
ZD
1412{
1413 struct elv_data dta;
355fe088 1414 gimple *stmt = gsi_stmt (*gsi);
5f40b3cb 1415
726a989a 1416 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 1417 dta.entry = entry;
5f40b3cb
ZD
1418 dta.decl_address = decl_address;
1419 dta.changed = false;
cba1eb61 1420 dta.reset = false;
5f40b3cb 1421
b5b8b0ac 1422 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
1423 {
1424 dta.gsi = NULL;
1425 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
1426 eliminate_local_variables_1, &dta.info, NULL);
1427 if (dta.reset)
1428 {
1429 gimple_debug_bind_reset_value (stmt);
1430 dta.changed = true;
1431 }
1432 }
29b89442
JJ
1433 else if (gimple_clobber_p (stmt))
1434 {
42fb90d7 1435 unlink_stmt_vdef (stmt);
29b89442
JJ
1436 stmt = gimple_build_nop ();
1437 gsi_replace (gsi, stmt, false);
1438 dta.changed = true;
1439 }
b5b8b0ac 1440 else
cba1eb61
JJ
1441 {
1442 dta.gsi = gsi;
1443 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
1444 }
5f40b3cb
ZD
1445
1446 if (dta.changed)
1447 update_stmt (stmt);
1448}
1449
9f9f72aa
AP
1450/* Eliminates the references to local variables from the single entry
1451 single exit region between the ENTRY and EXIT edges.
b8698a0f 1452
a509ebb5 1453 This includes:
b8698a0f
L
1454 1) Taking address of a local variable -- these are moved out of the
1455 region (and temporary variable is created to hold the address if
a509ebb5 1456 necessary).
9f9f72aa 1457
5f40b3cb 1458 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 1459 references. */
5f40b3cb
ZD
1460
1461static void
9f9f72aa 1462eliminate_local_variables (edge entry, edge exit)
5f40b3cb 1463{
9f9f72aa 1464 basic_block bb;
00f96dc9 1465 auto_vec<basic_block, 3> body;
5f40b3cb 1466 unsigned i;
726a989a 1467 gimple_stmt_iterator gsi;
cba1eb61 1468 bool has_debug_stmt = false;
c203e8a7 1469 int_tree_htab_type decl_address (10);
9f9f72aa
AP
1470 basic_block entry_bb = entry->src;
1471 basic_block exit_bb = exit->dest;
5f40b3cb 1472
9f9f72aa 1473 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1474
9771b263 1475 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1476 if (bb != entry_bb && bb != exit_bb)
6b37bdaf
PP
1477 {
1478 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1479 if (is_gimple_debug (gsi_stmt (gsi)))
1480 {
1481 if (gimple_debug_bind_p (gsi_stmt (gsi)))
1482 has_debug_stmt = true;
1483 }
1484 else
1485 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
1486 }
cba1eb61
JJ
1487
1488 if (has_debug_stmt)
9771b263 1489 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
1490 if (bb != entry_bb && bb != exit_bb)
1491 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1492 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 1493 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
1494}
1495
1496/* Returns true if expression EXPR is not defined between ENTRY and
1497 EXIT, i.e. if all its operands are defined outside of the region. */
1498
1499static bool
1500expr_invariant_in_region_p (edge entry, edge exit, tree expr)
1501{
1502 basic_block entry_bb = entry->src;
1503 basic_block exit_bb = exit->dest;
1504 basic_block def_bb;
9f9f72aa
AP
1505
1506 if (is_gimple_min_invariant (expr))
1507 return true;
1508
1509 if (TREE_CODE (expr) == SSA_NAME)
1510 {
726a989a 1511 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
1512 if (def_bb
1513 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
1514 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
1515 return false;
1516
1517 return true;
1518 }
1519
726a989a 1520 return false;
5f40b3cb
ZD
1521}
1522
1523/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
1524 The copies are stored to NAME_COPIES, if NAME was already duplicated,
1525 its duplicate stored in NAME_COPIES is returned.
b8698a0f 1526
5f40b3cb
ZD
1527 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
1528 duplicated, storing the copies in DECL_COPIES. */
1529
1530static tree
c203e8a7
TS
1531separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
1532 int_tree_htab_type *decl_copies,
1533 bool copy_name_p)
5f40b3cb
ZD
1534{
1535 tree copy, var, var_copy;
1536 unsigned idx, uid, nuid;
84baa4b9 1537 struct int_tree_map ielt;
5f40b3cb 1538 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 1539 name_to_copy_elt **slot;
84baa4b9 1540 int_tree_map *dslot;
5f40b3cb
ZD
1541
1542 if (TREE_CODE (name) != SSA_NAME)
1543 return name;
1544
1545 idx = SSA_NAME_VERSION (name);
1546 elt.version = idx;
c203e8a7
TS
1547 slot = name_copies->find_slot_with_hash (&elt, idx,
1548 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 1549 if (slot && *slot)
4a8fb1a1 1550 return (*slot)->new_name;
5f40b3cb 1551
70b5e7dc
RG
1552 if (copy_name_p)
1553 {
1554 copy = duplicate_ssa_name (name, NULL);
1555 nelt = XNEW (struct name_to_copy_elt);
1556 nelt->version = idx;
1557 nelt->new_name = copy;
1558 nelt->field = NULL_TREE;
1559 *slot = nelt;
1560 }
1561 else
1562 {
1563 gcc_assert (!slot);
1564 copy = name;
1565 }
1566
5f40b3cb 1567 var = SSA_NAME_VAR (name);
70b5e7dc
RG
1568 if (!var)
1569 return copy;
1570
5f40b3cb
ZD
1571 uid = DECL_UID (var);
1572 ielt.uid = uid;
84baa4b9
TS
1573 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
1574 if (!dslot->to)
5f40b3cb
ZD
1575 {
1576 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
eb72dc66 1577 DECL_NOT_GIMPLE_REG_P (var_copy) = DECL_NOT_GIMPLE_REG_P (var);
84baa4b9
TS
1578 dslot->uid = uid;
1579 dslot->to = var_copy;
5f40b3cb
ZD
1580
1581 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 1582 it again. */
5f40b3cb
ZD
1583 nuid = DECL_UID (var_copy);
1584 ielt.uid = nuid;
84baa4b9
TS
1585 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
1586 gcc_assert (!dslot->to);
1587 dslot->uid = nuid;
1588 dslot->to = var_copy;
5f40b3cb
ZD
1589 }
1590 else
84baa4b9 1591 var_copy = dslot->to;
5f40b3cb 1592
b2ec94d4 1593 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
1594 return copy;
1595}
1596
9f9f72aa
AP
1597/* Finds the ssa names used in STMT that are defined outside the
1598 region between ENTRY and EXIT and replaces such ssa names with
1599 their duplicates. The duplicates are stored to NAME_COPIES. Base
1600 decls of all ssa names used in STMT (including those defined in
1601 LOOP) are replaced with the new temporary variables; the
1602 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
1603
1604static void
355fe088 1605separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
c203e8a7
TS
1606 name_to_copy_table_type *name_copies,
1607 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
1608{
1609 use_operand_p use;
1610 def_operand_p def;
1611 ssa_op_iter oi;
1612 tree name, copy;
1613 bool copy_name_p;
1614
5f40b3cb 1615 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
1616 {
1617 name = DEF_FROM_PTR (def);
1618 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
1619 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1620 false);
a509ebb5
RL
1621 gcc_assert (copy == name);
1622 }
5f40b3cb
ZD
1623
1624 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
1625 {
1626 name = USE_FROM_PTR (use);
1627 if (TREE_CODE (name) != SSA_NAME)
1628 continue;
1629
9f9f72aa
AP
1630 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
1631 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
1632 copy_name_p);
a509ebb5
RL
1633 SET_USE (use, copy);
1634 }
5f40b3cb
ZD
1635}
1636
b5b8b0ac
AO
1637/* Finds the ssa names used in STMT that are defined outside the
1638 region between ENTRY and EXIT and replaces such ssa names with
1639 their duplicates. The duplicates are stored to NAME_COPIES. Base
1640 decls of all ssa names used in STMT (including those defined in
1641 LOOP) are replaced with the new temporary variables; the
1642 replacement decls are stored in DECL_COPIES. */
1643
1644static bool
355fe088 1645separate_decls_in_region_debug (gimple *stmt,
c203e8a7
TS
1646 name_to_copy_table_type *name_copies,
1647 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
1648{
1649 use_operand_p use;
1650 ssa_op_iter oi;
1651 tree var, name;
1652 struct int_tree_map ielt;
1653 struct name_to_copy_elt elt;
4a8fb1a1 1654 name_to_copy_elt **slot;
84baa4b9 1655 int_tree_map *dslot;
b5b8b0ac 1656
ddb555ed
JJ
1657 if (gimple_debug_bind_p (stmt))
1658 var = gimple_debug_bind_get_var (stmt);
1659 else if (gimple_debug_source_bind_p (stmt))
1660 var = gimple_debug_source_bind_get_var (stmt);
1661 else
1662 return true;
598e67d7 1663 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 1664 return true;
b5b8b0ac
AO
1665 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
1666 ielt.uid = DECL_UID (var);
84baa4b9 1667 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
1668 if (!dslot)
1669 return true;
ddb555ed 1670 if (gimple_debug_bind_p (stmt))
84baa4b9 1671 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 1672 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 1673 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
1674
1675 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
1676 {
1677 name = USE_FROM_PTR (use);
1678 if (TREE_CODE (name) != SSA_NAME)
1679 continue;
1680
1681 elt.version = SSA_NAME_VERSION (name);
c203e8a7 1682 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
1683 if (!slot)
1684 {
1685 gimple_debug_bind_reset_value (stmt);
1686 update_stmt (stmt);
1687 break;
1688 }
1689
4a8fb1a1 1690 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
1691 }
1692
1693 return false;
1694}
1695
0eb7e7aa
RL
1696/* Callback for htab_traverse. Adds a field corresponding to the reduction
1697 specified in SLOT. The type is passed in DATA. */
1698
4a8fb1a1
LC
1699int
1700add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 1701{
b8698a0f 1702
4a8fb1a1 1703 struct reduction_info *const red = *slot;
12efb1d7 1704 tree var = reduc_stmt_res (red->reduc_stmt);
aa06a978
RB
1705 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1706 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
1707
1708 insert_field_into_struct (type, field);
1709
1710 red->field = field;
1711
1712 return 1;
1713}
a509ebb5 1714
5f40b3cb 1715/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 1716 described in SLOT. The type is passed in DATA. */
5f40b3cb 1717
4a8fb1a1
LC
1718int
1719add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1720{
4a8fb1a1 1721 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1722 tree name = ssa_name (elt->version);
70b5e7dc
RG
1723 tree field = build_decl (UNKNOWN_LOCATION,
1724 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1725 TREE_TYPE (name));
5f40b3cb
ZD
1726
1727 insert_field_into_struct (type, field);
1728 elt->field = field;
a509ebb5 1729
5f40b3cb
ZD
1730 return 1;
1731}
1732
b8698a0f
L
1733/* Callback for htab_traverse. A local result is the intermediate result
1734 computed by a single
fa10beec 1735 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1736 This function creates a phi node reflecting these values.
1737 The phi's result will be stored in NEW_PHI field of the
1738 reduction's data structure. */
a509ebb5 1739
4a8fb1a1 1740int
99b1c316 1741create_phi_for_local_result (reduction_info **slot, class loop *loop)
a509ebb5 1742{
4a8fb1a1 1743 struct reduction_info *const reduc = *slot;
a509ebb5 1744 edge e;
538dd0b7 1745 gphi *new_phi;
e67d7a1e 1746 basic_block store_bb, continue_bb;
a509ebb5 1747 tree local_res;
620e594b 1748 location_t locus;
a509ebb5 1749
b8698a0f
L
1750 /* STORE_BB is the block where the phi
1751 should be stored. It is the destination of the loop exit.
726a989a 1752 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
e67d7a1e
TV
1753 continue_bb = single_pred (loop->latch);
1754 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
a509ebb5
RL
1755
1756 /* STORE_BB has two predecessors. One coming from the loop
1757 (the reduction's result is computed at the loop),
b8698a0f
L
1758 and another coming from a block preceding the loop,
1759 when no iterations
1760 are executed (the initial value should be taken). */
e67d7a1e 1761 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
a509ebb5
RL
1762 e = EDGE_PRED (store_bb, 1);
1763 else
1764 e = EDGE_PRED (store_bb, 0);
12efb1d7
TV
1765 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1766 local_res = copy_ssa_name (lhs);
f5045c96 1767 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1768 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1769 add_phi_arg (new_phi, reduc->init, e, locus);
e67d7a1e 1770 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
a509ebb5
RL
1771 reduc->new_phi = new_phi;
1772
1773 return 1;
1774}
5f40b3cb
ZD
1775
1776struct clsn_data
1777{
1778 tree store;
1779 tree load;
1780
1781 basic_block store_bb;
1782 basic_block load_bb;
1783};
1784
a509ebb5 1785/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1786 reduction described in SLOT.
a509ebb5
RL
1787 DATA annotates the place in memory the atomic operation relates to,
1788 and the basic block it needs to be generated in. */
1789
4a8fb1a1
LC
1790int
1791create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1792{
4a8fb1a1 1793 struct reduction_info *const reduc = *slot;
726a989a 1794 gimple_stmt_iterator gsi;
a509ebb5 1795 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1796 tree load_struct;
1797 basic_block bb;
1798 basic_block new_bb;
1799 edge e;
0f900dfa 1800 tree t, addr, ref, x;
726a989a 1801 tree tmp_load, name;
355fe088 1802 gimple *load;
a509ebb5 1803
61d9c527
TV
1804 if (reduc->reduc_addr == NULL_TREE)
1805 {
1806 load_struct = build_simple_mem_ref (clsn_data->load);
1807 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5 1808
61d9c527
TV
1809 addr = build_addr (t);
1810 }
1811 else
1812 {
1813 /* Set the address for the atomic store. */
1814 addr = reduc->reduc_addr;
1815
1816 /* Remove the non-atomic store '*addr = sum'. */
1817 tree res = PHI_RESULT (reduc->keep_res);
1818 use_operand_p use_p;
1819 gimple *stmt;
1820 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1821 gcc_assert (single_use_p);
1822 replace_uses_by (gimple_vdef (stmt),
1823 gimple_vuse (stmt));
1824 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1825 gsi_remove (&gsi, true);
1826 }
a509ebb5
RL
1827
1828 /* Create phi node. */
1829 bb = clsn_data->load_bb;
1830
b13c907a
RB
1831 gsi = gsi_last_bb (bb);
1832 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1833 new_bb = e->dest;
1834
b731b390
JJ
1835 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1836 tmp_load = make_ssa_name (tmp_load);
28567c40
JJ
1837 load = gimple_build_omp_atomic_load (tmp_load, addr,
1838 OMP_MEMORY_ORDER_RELAXED);
a509ebb5 1839 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1840 gsi = gsi_start_bb (new_bb);
1841 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1842
1843 e = split_block (new_bb, load);
1844 new_bb = e->dest;
726a989a 1845 gsi = gsi_start_bb (new_bb);
a509ebb5 1846 ref = tmp_load;
726a989a
RB
1847 x = fold_build2 (reduc->reduction_code,
1848 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1849 PHI_RESULT (reduc->new_phi));
a509ebb5 1850
726a989a
RB
1851 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1852 GSI_CONTINUE_LINKING);
a509ebb5 1853
28567c40
JJ
1854 gimple *store = gimple_build_omp_atomic_store (name,
1855 OMP_MEMORY_ORDER_RELAXED);
1856 gsi_insert_after (&gsi, store, GSI_NEW_STMT);
a509ebb5
RL
1857 return 1;
1858}
1859
b8698a0f
L
1860/* Create the atomic operation at the join point of the threads.
1861 REDUCTION_LIST describes the reductions in the LOOP.
1862 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1863 shared data is stored in and loaded from. */
1864static void
99b1c316 1865create_call_for_reduction (class loop *loop,
c203e8a7 1866 reduction_info_table_type *reduction_list,
a509ebb5
RL
1867 struct clsn_data *ld_st_data)
1868{
99b1c316 1869 reduction_list->traverse <class loop *, create_phi_for_local_result> (loop);
726a989a 1870 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
1871 basic_block continue_bb = single_pred (loop->latch);
1872 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
4a8fb1a1 1873 reduction_list
c203e8a7 1874 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1875}
1876
ae0bce62
RL
1877/* Callback for htab_traverse. Loads the final reduction value at the
1878 join point of all threads, and inserts it in the right place. */
a509ebb5 1879
4a8fb1a1
LC
1880int
1881create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1882{
4a8fb1a1 1883 struct reduction_info *const red = *slot;
355fe088 1884 gimple *stmt;
726a989a 1885 gimple_stmt_iterator gsi;
12efb1d7 1886 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
a509ebb5 1887 tree load_struct;
ae0bce62 1888 tree name;
a509ebb5
RL
1889 tree x;
1890
79855460
TV
1891 /* If there's no exit phi, the result of the reduction is unused. */
1892 if (red->keep_res == NULL)
1893 return 1;
1894
726a989a 1895 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1896 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1897 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1898 NULL_TREE);
a509ebb5 1899
ae0bce62 1900 x = load_struct;
a509ebb5 1901 name = PHI_RESULT (red->keep_res);
726a989a 1902 stmt = gimple_build_assign (name, x);
a509ebb5 1903
726a989a 1904 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1905
726a989a
RB
1906 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1907 !gsi_end_p (gsi); gsi_next (&gsi))
1908 if (gsi_stmt (gsi) == red->keep_res)
1909 {
1910 remove_phi_node (&gsi, false);
1911 return 1;
1912 }
1913 gcc_unreachable ();
a509ebb5
RL
1914}
1915
b8698a0f 1916/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1917 REDUCTION_LIST describes the list of reductions that the
fa10beec 1918 loads should be generated for. */
a509ebb5 1919static void
c203e8a7 1920create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1921 struct clsn_data *ld_st_data)
1922{
726a989a 1923 gimple_stmt_iterator gsi;
a509ebb5 1924 tree t;
355fe088 1925 gimple *stmt;
a509ebb5 1926
726a989a 1927 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1928 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1929 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1930
726a989a 1931 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1932
4a8fb1a1 1933 reduction_list
c203e8a7 1934 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1935
1936}
1937
0eb7e7aa
RL
1938/* Callback for htab_traverse. Store the neutral value for the
1939 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1940 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1941 The reduction is specified in SLOT. The store information is
1942 passed in DATA. */
0eb7e7aa 1943
4a8fb1a1
LC
1944int
1945create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1946{
4a8fb1a1 1947 struct reduction_info *const red = *slot;
726a989a 1948 tree t;
355fe088 1949 gimple *stmt;
726a989a 1950 gimple_stmt_iterator gsi;
12efb1d7 1951 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
726a989a
RB
1952
1953 gsi = gsi_last_bb (clsn_data->store_bb);
1954 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1955 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1956 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1957
1958 return 1;
1959}
1960
a509ebb5
RL
1961/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1962 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1963 specified in SLOT. */
1964
4a8fb1a1
LC
1965int
1966create_loads_and_stores_for_name (name_to_copy_elt **slot,
1967 struct clsn_data *clsn_data)
5f40b3cb 1968{
4a8fb1a1 1969 struct name_to_copy_elt *const elt = *slot;
726a989a 1970 tree t;
355fe088 1971 gimple *stmt;
726a989a 1972 gimple_stmt_iterator gsi;
5f40b3cb 1973 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1974 tree load_struct;
1975
726a989a
RB
1976 gsi = gsi_last_bb (clsn_data->store_bb);
1977 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1978 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1979 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1980
726a989a 1981 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1982 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1983 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1984 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1985 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1986
1987 return 1;
1988}
1989
1990/* Moves all the variables used in LOOP and defined outside of it (including
1991 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1992 name) to a structure created for this purpose. The code
b8698a0f 1993
5f40b3cb
ZD
1994 while (1)
1995 {
1996 use (a);
1997 use (b);
1998 }
1999
2000 is transformed this way:
2001
2002 bb0:
2003 old.a = a;
2004 old.b = b;
2005
2006 bb1:
2007 a' = new->a;
2008 b' = new->b;
2009 while (1)
2010 {
2011 use (a');
2012 use (b');
2013 }
2014
2015 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
2016 pointer `new' is intentionally not initialized (the loop will be split to a
2017 separate function later, and `new' will be initialized from its arguments).
a509ebb5 2018 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
2019 information among the threads. It is initialized here, and
2020 gen_parallel_loop will pass it to create_call_for_reduction that
2021 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 2022 in LOOP. */
5f40b3cb
ZD
2023
2024static void
4a8fb1a1 2025separate_decls_in_region (edge entry, edge exit,
c203e8a7 2026 reduction_info_table_type *reduction_list,
b8698a0f 2027 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 2028 struct clsn_data *ld_st_data)
a509ebb5 2029
5f40b3cb 2030{
9f9f72aa 2031 basic_block bb1 = split_edge (entry);
5f40b3cb 2032 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
2033 name_to_copy_table_type name_copies (10);
2034 int_tree_htab_type decl_copies (10);
5f40b3cb 2035 unsigned i;
726a989a
RB
2036 tree type, type_name, nvar;
2037 gimple_stmt_iterator gsi;
5f40b3cb 2038 struct clsn_data clsn_data;
00f96dc9 2039 auto_vec<basic_block, 3> body;
9f9f72aa
AP
2040 basic_block bb;
2041 basic_block entry_bb = bb1;
2042 basic_block exit_bb = exit->dest;
b5b8b0ac 2043 bool has_debug_stmt = false;
5f40b3cb 2044
726a989a 2045 entry = single_succ_edge (entry_bb);
9f9f72aa 2046 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 2047
9771b263 2048 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 2049 {
b8698a0f 2050 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 2051 {
726a989a
RB
2052 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2053 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 2054 &name_copies, &decl_copies);
726a989a
RB
2055
2056 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac 2057 {
355fe088 2058 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac
AO
2059
2060 if (is_gimple_debug (stmt))
2061 has_debug_stmt = true;
2062 else
2063 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 2064 &name_copies, &decl_copies);
b5b8b0ac 2065 }
9f9f72aa 2066 }
5f40b3cb 2067 }
9f9f72aa 2068
b5b8b0ac
AO
2069 /* Now process debug bind stmts. We must not create decls while
2070 processing debug stmts, so we defer their processing so as to
2071 make sure we will have debug info for as many variables as
2072 possible (all of those that were dealt with in the loop above),
2073 and discard those for which we know there's nothing we can
2074 do. */
2075 if (has_debug_stmt)
9771b263 2076 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
2077 if (bb != entry_bb && bb != exit_bb)
2078 {
2079 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2080 {
355fe088 2081 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac 2082
ddb555ed 2083 if (is_gimple_debug (stmt))
b5b8b0ac 2084 {
c203e8a7
TS
2085 if (separate_decls_in_region_debug (stmt, &name_copies,
2086 &decl_copies))
b5b8b0ac
AO
2087 {
2088 gsi_remove (&gsi, true);
2089 continue;
2090 }
2091 }
2092
2093 gsi_next (&gsi);
2094 }
2095 }
2096
b119c055 2097 if (name_copies.is_empty () && reduction_list->is_empty ())
5f40b3cb
ZD
2098 {
2099 /* It may happen that there is nothing to copy (if there are only
a509ebb5 2100 loop carried and external variables in the loop). */
5f40b3cb
ZD
2101 *arg_struct = NULL;
2102 *new_arg_struct = NULL;
2103 }
2104 else
2105 {
2106 /* Create the type for the structure to store the ssa names to. */
2107 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 2108 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 2109 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
2110 type);
2111 TYPE_NAME (type) = type_name;
2112
4a8fb1a1 2113 name_copies.traverse <tree, add_field_for_name> (type);
b119c055 2114 if (reduction_list && !reduction_list->is_empty ())
0eb7e7aa
RL
2115 {
2116 /* Create the fields for reductions. */
c203e8a7 2117 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 2118 }
5f40b3cb 2119 layout_type (type);
b8698a0f 2120
5f40b3cb
ZD
2121 /* Create the loads and stores. */
2122 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 2123 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 2124 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 2125
a509ebb5
RL
2126 ld_st_data->store = *arg_struct;
2127 ld_st_data->load = *new_arg_struct;
2128 ld_st_data->store_bb = bb0;
2129 ld_st_data->load_bb = bb1;
0eb7e7aa 2130
4a8fb1a1
LC
2131 name_copies
2132 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
2133 (ld_st_data);
a509ebb5 2134
ae0bce62
RL
2135 /* Load the calculation from memory (after the join of the threads). */
2136
b119c055 2137 if (reduction_list && !reduction_list->is_empty ())
a509ebb5 2138 {
4a8fb1a1 2139 reduction_list
c203e8a7
TS
2140 ->traverse <struct clsn_data *, create_stores_for_reduction>
2141 (ld_st_data);
b731b390 2142 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 2143 clsn_data.load_bb = exit->dest;
a509ebb5
RL
2144 clsn_data.store = ld_st_data->store;
2145 create_final_loads_for_reduction (reduction_list, &clsn_data);
2146 }
5f40b3cb 2147 }
5f40b3cb
ZD
2148}
2149
a79b7ec5 2150/* Returns true if FN was created to run in parallel. */
5f40b3cb 2151
62e0a1ed 2152bool
a79b7ec5 2153parallelized_function_p (tree fndecl)
5f40b3cb 2154{
a79b7ec5
TV
2155 cgraph_node *node = cgraph_node::get (fndecl);
2156 gcc_assert (node != NULL);
2157 return node->parallelized_function;
5f40b3cb
ZD
2158}
2159
2160/* Creates and returns an empty function that will receive the body of
2161 a parallelized loop. */
2162
2163static tree
9ff70652 2164create_loop_fn (location_t loc)
5f40b3cb
ZD
2165{
2166 char buf[100];
2167 char *tname;
2168 tree decl, type, name, t;
2169 struct function *act_cfun = cfun;
2170 static unsigned loopfn_num;
2171
5368224f 2172 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
2173 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
2174 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
2175 clean_symbol_name (tname);
2176 name = get_identifier (tname);
2177 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
2178
9ff70652 2179 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
2180 TREE_STATIC (decl) = 1;
2181 TREE_USED (decl) = 1;
2182 DECL_ARTIFICIAL (decl) = 1;
2183 DECL_IGNORED_P (decl) = 0;
2184 TREE_PUBLIC (decl) = 0;
2185 DECL_UNINLINABLE (decl) = 1;
2186 DECL_EXTERNAL (decl) = 0;
2187 DECL_CONTEXT (decl) = NULL_TREE;
2188 DECL_INITIAL (decl) = make_node (BLOCK);
01771d43 2189 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
5f40b3cb 2190
9ff70652 2191 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
2192 DECL_ARTIFICIAL (t) = 1;
2193 DECL_IGNORED_P (t) = 1;
2194 DECL_RESULT (decl) = t;
2195
9ff70652 2196 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
2197 ptr_type_node);
2198 DECL_ARTIFICIAL (t) = 1;
2199 DECL_ARG_TYPE (t) = ptr_type_node;
2200 DECL_CONTEXT (t) = decl;
2201 TREE_USED (t) = 1;
2202 DECL_ARGUMENTS (decl) = t;
2203
182e0d71 2204 allocate_struct_function (decl, false);
5f40b3cb
ZD
2205
2206 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
2207 it. */
5576d6f2 2208 set_cfun (act_cfun);
5f40b3cb
ZD
2209
2210 return decl;
2211}
2212
7c82d827
TV
2213/* Replace uses of NAME by VAL in block BB. */
2214
2215static void
2216replace_uses_in_bb_by (tree name, tree val, basic_block bb)
2217{
355fe088 2218 gimple *use_stmt;
7c82d827
TV
2219 imm_use_iterator imm_iter;
2220
2221 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
2222 {
2223 if (gimple_bb (use_stmt) != bb)
2224 continue;
2225
2226 use_operand_p use_p;
2227 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2228 SET_USE (use_p, val);
2229 }
2230}
2231
7c82d827
TV
2232/* Do transformation from:
2233
2234 <bb preheader>:
2235 ...
2236 goto <bb header>
2237
2238 <bb header>:
2239 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2240 sum_a = PHI <sum_init (preheader), sum_b (latch)>
2241 ...
2242 use (ivtmp_a)
2243 ...
2244 sum_b = sum_a + sum_update
2245 ...
2246 if (ivtmp_a < n)
2247 goto <bb latch>;
2248 else
2249 goto <bb exit>;
2250
2251 <bb latch>:
2252 ivtmp_b = ivtmp_a + 1;
2253 goto <bb header>
2254
2255 <bb exit>:
712cb0bb 2256 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
2257
2258 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
2259 that's <bb header>.
2260
2261 to:
2262
2263 <bb preheader>:
2264 ...
2265 goto <bb newheader>
2266
2267 <bb header>:
2268 ivtmp_a = PHI <ivtmp_c (latch)>
2269 sum_a = PHI <sum_c (latch)>
2270 ...
2271 use (ivtmp_a)
2272 ...
2273 sum_b = sum_a + sum_update
2274 ...
2275 goto <bb latch>;
2276
2277 <bb newheader>:
2278 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2279 sum_c = PHI <sum_init (preheader), sum_b (latch)>
2280 if (ivtmp_c < n + 1)
2281 goto <bb header>;
2282 else
712cb0bb 2283 goto <bb newexit>;
7c82d827
TV
2284
2285 <bb latch>:
2286 ivtmp_b = ivtmp_a + 1;
2287 goto <bb newheader>
2288
712cb0bb
TV
2289 <bb newexit>:
2290 sum_y = PHI <sum_c (newheader)>
2291
7c82d827 2292 <bb exit>:
712cb0bb 2293 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
2294
2295
2296 In unified diff format:
2297
2298 <bb preheader>:
2299 ...
2300- goto <bb header>
2301+ goto <bb newheader>
2302
2303 <bb header>:
2304- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2305- sum_a = PHI <sum_init (preheader), sum_b (latch)>
2306+ ivtmp_a = PHI <ivtmp_c (latch)>
2307+ sum_a = PHI <sum_c (latch)>
2308 ...
2309 use (ivtmp_a)
2310 ...
2311 sum_b = sum_a + sum_update
2312 ...
2313- if (ivtmp_a < n)
2314- goto <bb latch>;
2315+ goto <bb latch>;
2316+
2317+ <bb newheader>:
2318+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
2319+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
2320+ if (ivtmp_c < n + 1)
2321+ goto <bb header>;
2322 else
2323 goto <bb exit>;
2324
2325 <bb latch>:
2326 ivtmp_b = ivtmp_a + 1;
2327- goto <bb header>
2328+ goto <bb newheader>
2329
712cb0bb
TV
2330+ <bb newexit>:
2331+ sum_y = PHI <sum_c (newheader)>
2332
7c82d827 2333 <bb exit>:
712cb0bb
TV
2334- sum_z = PHI <sum_b (cond[1]), ...>
2335+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
2336
2337 Note: the example does not show any virtual phis, but these are handled more
2338 or less as reductions.
b8698a0f 2339
7c82d827
TV
2340
2341 Moves the exit condition of LOOP to the beginning of its header.
2342 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
2343 bound. */
2344
2345static void
99b1c316 2346transform_to_exit_first_loop_alt (class loop *loop,
7c82d827
TV
2347 reduction_info_table_type *reduction_list,
2348 tree bound)
2349{
2350 basic_block header = loop->header;
2351 basic_block latch = loop->latch;
2352 edge exit = single_dom_exit (loop);
2353 basic_block exit_block = exit->dest;
2354 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2355 tree control = gimple_cond_lhs (cond_stmt);
2356 edge e;
2357
338392ed
TV
2358 /* Rewriting virtuals into loop-closed ssa normal form makes this
2359 transformation simpler. It also ensures that the virtuals are in
2360 loop-closed ssa normal from after the transformation, which is required by
2361 create_parallel_loop. */
2362 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
2363
2364 /* Create the new_header block. */
2365 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 2366 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
2367
2368 /* Redirect entry edge to new_header. */
2369 edge entry = loop_preheader_edge (loop);
2370 e = redirect_edge_and_branch (entry, new_header);
2371 gcc_assert (e == entry);
2372
2373 /* Redirect post_inc_edge to new_header. */
2374 edge post_inc_edge = single_succ_edge (latch);
2375 e = redirect_edge_and_branch (post_inc_edge, new_header);
2376 gcc_assert (e == post_inc_edge);
2377
2378 /* Redirect post_cond_edge to header. */
2379 edge post_cond_edge = single_pred_edge (latch);
2380 e = redirect_edge_and_branch (post_cond_edge, header);
2381 gcc_assert (e == post_cond_edge);
2382
712cb0bb
TV
2383 /* Redirect edge_at_split to latch. */
2384 e = redirect_edge_and_branch (edge_at_split, latch);
2385 gcc_assert (e == edge_at_split);
7c82d827
TV
2386
2387 /* Set the new loop bound. */
2388 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 2389 update_stmt (cond_stmt);
7c82d827
TV
2390
2391 /* Repair the ssa. */
2392 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
2393 edge_var_map *vm;
2394 gphi_iterator gsi;
338392ed 2395 int i;
7c82d827
TV
2396 for (gsi = gsi_start_phis (header), i = 0;
2397 !gsi_end_p (gsi) && v->iterate (i, &vm);
2398 gsi_next (&gsi), i++)
2399 {
2400 gphi *phi = gsi.phi ();
2401 tree res_a = PHI_RESULT (phi);
2402
2403 /* Create new phi. */
2404 tree res_c = copy_ssa_name (res_a, phi);
2405 gphi *nphi = create_phi_node (res_c, new_header);
2406
2407 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
2408 replace_uses_in_bb_by (res_a, res_c, new_header);
2409
2410 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
2411 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
2412
338392ed 2413 /* Replace sum_b with sum_c in exit phi. */
7c82d827 2414 tree res_b = redirect_edge_var_map_def (vm);
338392ed 2415 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
2416
2417 struct reduction_info *red = reduction_phi (reduction_list, phi);
2418 gcc_assert (virtual_operand_p (res_a)
2419 || res_a == control
2420 || red != NULL);
2421
2422 if (red)
2423 {
2424 /* Register the new reduction phi. */
2425 red->reduc_phi = nphi;
2426 gimple_set_uid (red->reduc_phi, red->reduc_version);
2427 }
2428 }
2429 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
2430
2431 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
2432 flush_pending_stmts (entry);
2433
2434 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
2435 flush_pending_stmts (post_inc_edge);
2436
d42ba2d2
TV
2437
2438 basic_block new_exit_block = NULL;
2439 if (!single_pred_p (exit->dest))
2440 {
2441 /* Create a new empty exit block, inbetween the new loop header and the
2442 old exit block. The function separate_decls_in_region needs this block
2443 to insert code that is active on loop exit, but not any other path. */
2444 new_exit_block = split_edge (exit);
2445 }
712cb0bb
TV
2446
2447 /* Insert and register the reduction exit phis. */
7c82d827
TV
2448 for (gphi_iterator gsi = gsi_start_phis (exit_block);
2449 !gsi_end_p (gsi);
2450 gsi_next (&gsi))
2451 {
2452 gphi *phi = gsi.phi ();
d42ba2d2 2453 gphi *nphi = NULL;
7c82d827 2454 tree res_z = PHI_RESULT (phi);
d42ba2d2 2455 tree res_c;
712cb0bb 2456
d42ba2d2
TV
2457 if (new_exit_block != NULL)
2458 {
2459 /* Now that we have a new exit block, duplicate the phi of the old
2460 exit block in the new exit block to preserve loop-closed ssa. */
2461 edge succ_new_exit_block = single_succ_edge (new_exit_block);
2462 edge pred_new_exit_block = single_pred_edge (new_exit_block);
2463 tree res_y = copy_ssa_name (res_z, phi);
2464 nphi = create_phi_node (res_y, new_exit_block);
2465 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
2466 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
2467 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
2468 }
2469 else
2470 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
712cb0bb 2471
7c82d827
TV
2472 if (virtual_operand_p (res_z))
2473 continue;
2474
355fe088 2475 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
7c82d827
TV
2476 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
2477 if (red != NULL)
d42ba2d2
TV
2478 red->keep_res = (nphi != NULL
2479 ? nphi
2480 : phi);
7c82d827
TV
2481 }
2482
2483 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
2484 then we're still using some fields, so only bother about fields that are
2485 still used: header and latch.
2486 The loop has a new header bb, so we update it. The latch bb stays the
2487 same. */
2488 loop->header = new_header;
2489
2490 /* Recalculate dominance info. */
2491 free_dominance_info (CDI_DOMINATORS);
2492 calculate_dominance_info (CDI_DOMINATORS);
4a4b6c4c
TV
2493
2494 checking_verify_ssa (true, true);
7c82d827
TV
2495}
2496
2497/* Tries to moves the exit condition of LOOP to the beginning of its header
2498 without duplication of the loop body. NIT is the number of iterations of the
2499 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
2500 transformation is successful. */
2501
2502static bool
99b1c316 2503try_transform_to_exit_first_loop_alt (class loop *loop,
7c82d827
TV
2504 reduction_info_table_type *reduction_list,
2505 tree nit)
2506{
2507 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
2508 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
2509 return false;
7c82d827 2510
d95167ee
TV
2511 /* Check whether the latch contains no phis. */
2512 if (phi_nodes (loop->latch) != NULL)
2513 return false;
2514
7c82d827
TV
2515 /* Check whether the latch contains the loop iv increment. */
2516 edge back = single_succ_edge (loop->latch);
2517 edge exit = single_dom_exit (loop);
2518 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
2519 tree control = gimple_cond_lhs (cond_stmt);
2520 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
2521 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
2522 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
2523 return false;
2524
2525 /* Check whether there's no code between the loop condition and the latch. */
2526 if (!single_pred_p (loop->latch)
2527 || single_pred (loop->latch) != exit->src)
2528 return false;
2529
2530 tree alt_bound = NULL_TREE;
2531 tree nit_type = TREE_TYPE (nit);
2532
2533 /* Figure out whether nit + 1 overflows. */
2534 if (TREE_CODE (nit) == INTEGER_CST)
2535 {
ff22eb12 2536 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
7c82d827
TV
2537 {
2538 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
2539 nit, build_one_cst (nit_type));
2540
2541 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
2542 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2543 return true;
7c82d827
TV
2544 }
2545 else
2546 {
2547 /* Todo: Figure out if we can trigger this, if it's worth to handle
2548 optimally, and if we can handle it optimally. */
fd7b3ef5 2549 return false;
7c82d827
TV
2550 }
2551 }
7c82d827 2552
fd7b3ef5 2553 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 2554
4f75d608
TV
2555 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
2556 iv with base 0 and step 1 that is incremented in the latch, like this:
2557
2558 <bb header>:
2559 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
2560 ...
2561 if (iv_1 < nit)
2562 goto <bb latch>;
2563 else
2564 goto <bb exit>;
2565
2566 <bb latch>:
2567 iv_2 = iv_1 + 1;
2568 goto <bb header>;
2569
2570 The range of iv_1 is [0, nit]. The latch edge is taken for
2571 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
2572 number of latch executions is equal to nit.
2573
2574 The function max_loop_iterations gives us the maximum number of latch
2575 executions, so it gives us the maximum value of nit. */
2576 widest_int nit_max;
2577 if (!max_loop_iterations (loop, &nit_max))
2578 return false;
2579
2580 /* Check if nit + 1 overflows. */
ff22eb12 2581 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
032c80e9 2582 if (nit_max >= type_max)
4f75d608
TV
2583 return false;
2584
355fe088 2585 gimple *def = SSA_NAME_DEF_STMT (nit);
7c82d827 2586
4f75d608 2587 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
2588 if (def
2589 && is_gimple_assign (def)
2590 && gimple_assign_rhs_code (def) == PLUS_EXPR)
2591 {
2592 tree op1 = gimple_assign_rhs1 (def);
2593 tree op2 = gimple_assign_rhs2 (def);
2594 if (integer_minus_onep (op1))
2595 alt_bound = op2;
2596 else if (integer_minus_onep (op2))
2597 alt_bound = op1;
7c82d827
TV
2598 }
2599
9f620bf1 2600 /* If not found, insert nit + 1. */
7c82d827 2601 if (alt_bound == NULL_TREE)
9f620bf1
TV
2602 {
2603 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
2604 build_int_cst_type (nit_type, 1));
2605
2606 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
2607
2608 alt_bound
2609 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
2610 GSI_CONTINUE_LINKING);
2611 }
7c82d827
TV
2612
2613 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
2614 return true;
2615}
2616
2617/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
2618 number of iterations of the loop. REDUCTION_LIST describes the reductions in
2619 LOOP. */
5f40b3cb
ZD
2620
2621static void
99b1c316 2622transform_to_exit_first_loop (class loop *loop,
c203e8a7 2623 reduction_info_table_type *reduction_list,
4a8fb1a1 2624 tree nit)
5f40b3cb
ZD
2625{
2626 basic_block *bbs, *nbbs, ex_bb, orig_header;
2627 unsigned n;
2628 bool ok;
2629 edge exit = single_dom_exit (loop), hpred;
726a989a 2630 tree control, control_name, res, t;
538dd0b7
DM
2631 gphi *phi, *nphi;
2632 gassign *stmt;
2633 gcond *cond_stmt, *cond_nit;
48710229 2634 tree nit_1;
5f40b3cb
ZD
2635
2636 split_block_after_labels (loop->header);
2637 orig_header = single_succ (loop->header);
2638 hpred = single_succ_edge (loop->header);
2639
538dd0b7 2640 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
2641 control = gimple_cond_lhs (cond_stmt);
2642 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
2643
2644 /* Make sure that we have phi nodes on exit for all loop header phis
2645 (create_parallel_loop requires that). */
538dd0b7
DM
2646 for (gphi_iterator gsi = gsi_start_phis (loop->header);
2647 !gsi_end_p (gsi);
2648 gsi_next (&gsi))
5f40b3cb 2649 {
538dd0b7 2650 phi = gsi.phi ();
5f40b3cb 2651 res = PHI_RESULT (phi);
070ecdfd 2652 t = copy_ssa_name (res, phi);
5f40b3cb 2653 SET_PHI_RESULT (phi, t);
5f40b3cb 2654 nphi = create_phi_node (res, orig_header);
9e227d60 2655 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
2656
2657 if (res == control)
2658 {
726a989a 2659 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
2660 update_stmt (cond_stmt);
2661 control = t;
2662 }
2663 }
12037899 2664
5f40b3cb 2665 bbs = get_loop_body_in_dom_order (loop);
48710229 2666
69958396
RL
2667 for (n = 0; bbs[n] != exit->src; n++)
2668 continue;
5f40b3cb 2669 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
2670 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
2671 bbs + 1, n, nbbs);
5f40b3cb
ZD
2672 gcc_assert (ok);
2673 free (bbs);
2674 ex_bb = nbbs[0];
2675 free (nbbs);
2676
b8698a0f 2677 /* Other than reductions, the only gimple reg that should be copied
726a989a 2678 out of the loop is the control variable. */
69958396 2679 exit = single_dom_exit (loop);
5f40b3cb 2680 control_name = NULL_TREE;
538dd0b7
DM
2681 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
2682 !gsi_end_p (gsi); )
5f40b3cb 2683 {
538dd0b7 2684 phi = gsi.phi ();
5f40b3cb 2685 res = PHI_RESULT (phi);
ea057359 2686 if (virtual_operand_p (res))
726a989a
RB
2687 {
2688 gsi_next (&gsi);
2689 continue;
2690 }
5f40b3cb 2691
a509ebb5 2692 /* Check if it is a part of reduction. If it is,
b8698a0f
L
2693 keep the phi at the reduction's keep_res field. The
2694 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
2695 variable when exiting the loop. */
2696
b119c055 2697 if (!reduction_list->is_empty ())
a509ebb5
RL
2698 {
2699 struct reduction_info *red;
2700
2701 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
2702 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
2703 if (red)
726a989a
RB
2704 {
2705 red->keep_res = phi;
2706 gsi_next (&gsi);
2707 continue;
2708 }
a509ebb5 2709 }
726a989a
RB
2710 gcc_assert (control_name == NULL_TREE
2711 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 2712 control_name = res;
726a989a 2713 remove_phi_node (&gsi, false);
5f40b3cb
ZD
2714 }
2715 gcc_assert (control_name != NULL_TREE);
5f40b3cb 2716
b8698a0f 2717 /* Initialize the control variable to number of iterations
48710229 2718 according to the rhs of the exit condition. */
538dd0b7
DM
2719 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2720 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
2721 nit_1 = gimple_cond_rhs (cond_nit);
2722 nit_1 = force_gimple_operand_gsi (&gsi,
2723 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 2724 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 2725 stmt = gimple_build_assign (control_name, nit_1);
726a989a 2726 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2727}
2728
2729/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 2730 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb 2731 NEW_DATA is the variable that should be initialized from the argument
f99c3557
TS
2732 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2733 that number is to be determined later. */
5f40b3cb 2734
a6f4d493 2735static void
99b1c316 2736create_parallel_loop (class loop *loop, tree loop_fn, tree data,
61d9c527
TV
2737 tree new_data, unsigned n_threads, location_t loc,
2738 bool oacc_kernels_p)
5f40b3cb 2739{
726a989a 2740 gimple_stmt_iterator gsi;
61d9c527 2741 basic_block for_bb, ex_bb, continue_bb;
0f900dfa 2742 tree t, param;
538dd0b7 2743 gomp_parallel *omp_par_stmt;
355fe088
TS
2744 gimple *omp_return_stmt1, *omp_return_stmt2;
2745 gimple *phi;
538dd0b7
DM
2746 gcond *cond_stmt;
2747 gomp_for *for_stmt;
2748 gomp_continue *omp_cont_stmt;
726a989a 2749 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
2750 edge exit, nexit, guard, end, e;
2751
61d9c527
TV
2752 if (oacc_kernels_p)
2753 {
25651634
TS
2754 gcc_checking_assert (lookup_attribute ("oacc kernels",
2755 DECL_ATTRIBUTES (cfun->decl)));
b0f271ce
TS
2756 /* Indicate to later processing that this is a parallelized OpenACC
2757 kernels construct. */
2758 DECL_ATTRIBUTES (cfun->decl)
2759 = tree_cons (get_identifier ("oacc kernels parallelized"),
2760 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
61d9c527
TV
2761 }
2762 else
2763 {
b0f271ce
TS
2764 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2765
61d9c527
TV
2766 basic_block bb = loop_preheader_edge (loop)->src;
2767 basic_block paral_bb = single_pred (bb);
2768 gsi = gsi_last_bb (paral_bb);
5f40b3cb 2769
f99c3557 2770 gcc_checking_assert (n_threads != 0);
61d9c527
TV
2771 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2772 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2773 = build_int_cst (integer_type_node, n_threads);
2774 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2775 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 2776
61d9c527 2777 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb 2778
61d9c527
TV
2779 /* Initialize NEW_DATA. */
2780 if (data)
2781 {
2782 gassign *assign_stmt;
538dd0b7 2783
61d9c527 2784 gsi = gsi_after_labels (bb);
726a989a 2785
61d9c527
TV
2786 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2787 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2788 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2789
61d9c527
TV
2790 assign_stmt = gimple_build_assign (new_data,
2791 fold_convert (TREE_TYPE (new_data), param));
2792 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2793 }
5f40b3cb 2794
61d9c527
TV
2795 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2796 bb = split_loop_exit_edge (single_dom_exit (loop));
2797 gsi = gsi_last_bb (bb);
2798 omp_return_stmt1 = gimple_build_omp_return (false);
2799 gimple_set_location (omp_return_stmt1, loc);
2800 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2801 }
5f40b3cb 2802
726a989a 2803 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2804 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2805 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2806
726a989a 2807 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2808 cvar_base = SSA_NAME_VAR (cvar);
2809 phi = SSA_NAME_DEF_STMT (cvar);
2810 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2811 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2812 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2813 initvar);
2814 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2815
1dff453d 2816 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2817 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2818 gsi_remove (&gsi, true);
5f40b3cb
ZD
2819
2820 /* Prepare cfg. */
2821 for_bb = split_edge (loop_preheader_edge (loop));
2822 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2823 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2824 gcc_assert (exit == single_dom_exit (loop));
2825
2826 guard = make_edge (for_bb, ex_bb, 0);
357067f2
JH
2827 /* FIXME: What is the probability? */
2828 guard->probability = profile_probability::guessed_never ();
e67d7a1e
TV
2829 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2830 loop->latch = split_edge (single_succ_edge (loop->latch));
2831 single_pred_edge (loop->latch)->flags = 0;
357067f2 2832 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
e67d7a1e
TV
2833 rescan_loop_exit (end, true, false);
2834
538dd0b7
DM
2835 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2836 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2837 {
620e594b 2838 location_t locus;
538dd0b7 2839 gphi *phi = gpi.phi ();
7781d262 2840 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
355fe088 2841 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
538dd0b7 2842
7781d262
TV
2843 /* If the exit phi is not connected to a header phi in the same loop, this
2844 value is not modified in the loop, and we're done with this phi. */
2845 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2846 && gimple_bb (def_stmt) == loop->header))
1c5211b1
TV
2847 {
2848 locus = gimple_phi_arg_location_from_edge (phi, exit);
2849 add_phi_arg (phi, def, guard, locus);
2850 add_phi_arg (phi, def, end, locus);
2851 continue;
2852 }
f5045c96 2853
7781d262 2854 gphi *stmt = as_a <gphi *> (def_stmt);
f5045c96 2855 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2856 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2857 loop_preheader_edge (loop));
9e227d60 2858 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2859
2860 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2861 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2862 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2863 }
2864 e = redirect_edge_and_branch (exit, nexit->dest);
2865 PENDING_STMT (e) = NULL;
2866
726a989a 2867 /* Emit GIMPLE_OMP_FOR. */
61d9c527 2868 if (oacc_kernels_p)
b0f271ce
TS
2869 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2870 omp-offload.c:execute_oacc_device_lower. */
61d9c527
TV
2871 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2872 else
1f600fea 2873 {
61d9c527 2874 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
028d4092
ML
2875 int chunk_size = param_parloops_chunk_size;
2876 switch (param_parloops_schedule)
61d9c527 2877 {
028d4092 2878 case PARLOOPS_SCHEDULE_STATIC:
61d9c527
TV
2879 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2880 break;
028d4092 2881 case PARLOOPS_SCHEDULE_DYNAMIC:
61d9c527
TV
2882 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2883 break;
028d4092 2884 case PARLOOPS_SCHEDULE_GUIDED:
61d9c527
TV
2885 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2886 break;
028d4092 2887 case PARLOOPS_SCHEDULE_AUTO:
61d9c527
TV
2888 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2889 chunk_size = 0;
2890 break;
028d4092 2891 case PARLOOPS_SCHEDULE_RUNTIME:
61d9c527
TV
2892 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2893 chunk_size = 0;
2894 break;
2895 default:
2896 gcc_unreachable ();
2897 }
2898 if (chunk_size != 0)
2899 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2900 = build_int_cst (integer_type_node, chunk_size);
1f600fea 2901 }
5f40b3cb 2902
61d9c527
TV
2903 for_stmt = gimple_build_omp_for (NULL,
2904 (oacc_kernels_p
2905 ? GF_OMP_FOR_KIND_OACC_LOOP
2906 : GF_OMP_FOR_KIND_FOR),
2907 t, 1, NULL);
2908
2909 gimple_cond_set_lhs (cond_stmt, cvar_base);
2910 type = TREE_TYPE (cvar);
9ff70652 2911 gimple_set_location (for_stmt, loc);
726a989a
RB
2912 gimple_omp_for_set_index (for_stmt, 0, initvar);
2913 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2914 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2915 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2916 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2917 cvar_base,
2918 build_int_cst (type, 1)));
2919
2920 gsi = gsi_last_bb (for_bb);
2921 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2922 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2923
726a989a 2924 /* Emit GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
2925 continue_bb = single_pred (loop->latch);
2926 gsi = gsi_last_bb (continue_bb);
538dd0b7
DM
2927 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2928 gimple_set_location (omp_cont_stmt, loc);
2929 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2930 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2931
726a989a
RB
2932 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2933 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2934 omp_return_stmt2 = gimple_build_omp_return (true);
2935 gimple_set_location (omp_return_stmt2, loc);
2936 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2937
cd7d9fd7
RG
2938 /* After the above dom info is hosed. Re-compute it. */
2939 free_dominance_info (CDI_DOMINATORS);
2940 calculate_dominance_info (CDI_DOMINATORS);
5f40b3cb
ZD
2941}
2942
c75c35e0
TV
2943/* Return number of phis in bb. If COUNT_VIRTUAL_P is false, don't count the
2944 virtual phi. */
2945
2946static unsigned int
2947num_phis (basic_block bb, bool count_virtual_p)
2948{
2949 unsigned int nr_phis = 0;
2950 gphi_iterator gsi;
2951 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2952 {
2953 if (!count_virtual_p && virtual_operand_p (PHI_RESULT (gsi.phi ())))
2954 continue;
2955
2956 nr_phis++;
2957 }
2958
2959 return nr_phis;
2960}
2961
08dab97a 2962/* Generates code to execute the iterations of LOOP in N_THREADS
f99c3557
TS
2963 threads in parallel, which can be 0 if that number is to be determined
2964 later.
08dab97a
RL
2965
2966 NITER describes number of iterations of LOOP.
fa10beec 2967 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2968
2969static void
99b1c316 2970gen_parallel_loop (class loop *loop,
c203e8a7 2971 reduction_info_table_type *reduction_list,
99b1c316 2972 unsigned n_threads, class tree_niter_desc *niter,
61d9c527 2973 bool oacc_kernels_p)
5f40b3cb 2974{
5f40b3cb 2975 tree many_iterations_cond, type, nit;
726a989a
RB
2976 tree arg_struct, new_arg_struct;
2977 gimple_seq stmts;
9f9f72aa 2978 edge entry, exit;
a509ebb5 2979 struct clsn_data clsn_data;
9ff70652 2980 location_t loc;
355fe088 2981 gimple *cond_stmt;
768da0da 2982 unsigned int m_p_thread=2;
5f40b3cb
ZD
2983
2984 /* From
2985
2986 ---------------------------------------------------------------------
2987 loop
2988 {
2989 IV = phi (INIT, IV + STEP)
2990 BODY1;
2991 if (COND)
2992 break;
2993 BODY2;
2994 }
2995 ---------------------------------------------------------------------
2996
2997 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2998 we generate the following code:
2999
3000 ---------------------------------------------------------------------
3001
3002 if (MAY_BE_ZERO
a509ebb5
RL
3003 || NITER < MIN_PER_THREAD * N_THREADS)
3004 goto original;
5f40b3cb
ZD
3005
3006 BODY1;
3007 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 3008 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 3009 load the variables from DATA.
726a989a 3010 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
3011 BODY2;
3012 BODY1;
726a989a
RB
3013 GIMPLE_OMP_CONTINUE;
3014 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
3015 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
3016 goto end;
3017
3018 original:
3019 loop
3020 {
3021 IV = phi (INIT, IV + STEP)
3022 BODY1;
3023 if (COND)
3024 break;
3025 BODY2;
3026 }
3027
3028 end:
3029
3030 */
3031
3032 /* Create two versions of the loop -- in the old one, we know that the
3033 number of iterations is large enough, and we will transform it into the
3034 loop that will be split to loop_fn, the new one will be used for the
3035 remaining iterations. */
a509ebb5 3036
768da0da
RL
3037 /* We should compute a better number-of-iterations value for outer loops.
3038 That is, if we have
3039
3040 for (i = 0; i < n; ++i)
3041 for (j = 0; j < m; ++j)
3042 ...
3043
3044 we should compute nit = n * m, not nit = n.
3045 Also may_be_zero handling would need to be adjusted. */
3046
5f40b3cb
ZD
3047 type = TREE_TYPE (niter->niter);
3048 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
3049 NULL_TREE);
3050 if (stmts)
726a989a 3051 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 3052
61d9c527 3053 if (!oacc_kernels_p)
5f40b3cb 3054 {
61d9c527
TV
3055 if (loop->inner)
3056 m_p_thread=2;
3057 else
3058 m_p_thread=MIN_PER_THREAD;
3059
f99c3557 3060 gcc_checking_assert (n_threads != 0);
61d9c527
TV
3061 many_iterations_cond =
3062 fold_build2 (GE_EXPR, boolean_type_node,
a851ce04 3063 nit, build_int_cst (type, m_p_thread * n_threads - 1));
61d9c527 3064
5f40b3cb 3065 many_iterations_cond
61d9c527
TV
3066 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3067 invert_truthvalue (unshare_expr (niter->may_be_zero)),
3068 many_iterations_cond);
3069 many_iterations_cond
3070 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 3071 if (stmts)
726a989a 3072 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
61d9c527
TV
3073 if (!is_gimple_condexpr (many_iterations_cond))
3074 {
3075 many_iterations_cond
3076 = force_gimple_operand (many_iterations_cond, &stmts,
3077 true, NULL_TREE);
3078 if (stmts)
3079 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
3080 stmts);
3081 }
5f40b3cb 3082
61d9c527 3083 initialize_original_copy_tables ();
5f40b3cb 3084
61d9c527 3085 /* We assume that the loop usually iterates a lot. */
61d9c527 3086 loop_version (loop, many_iterations_cond, NULL,
af2bbc51
JH
3087 profile_probability::likely (),
3088 profile_probability::unlikely (),
3089 profile_probability::likely (),
3090 profile_probability::unlikely (), true);
61d9c527
TV
3091 update_ssa (TODO_update_ssa);
3092 free_original_copy_tables ();
3093 }
5f40b3cb
ZD
3094
3095 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 3096 canonicalize_loop_ivs (loop, &nit, true);
c75c35e0
TV
3097 if (num_phis (loop->header, false) != reduction_list->elements () + 1)
3098 {
3099 /* The call to canonicalize_loop_ivs above failed to "base all the
3100 induction variables in LOOP on a single control one". Do damage
3101 control. */
3102 basic_block preheader = loop_preheader_edge (loop)->src;
3103 basic_block cond_bb = single_pred (preheader);
3104 gcond *cond = as_a <gcond *> (gsi_stmt (gsi_last_bb (cond_bb)));
3105 gimple_cond_make_true (cond);
3106 update_stmt (cond);
3107 /* We've gotten rid of the duplicate loop created by loop_version, but
3108 we can't undo whatever canonicalize_loop_ivs has done.
3109 TODO: Fix this properly by ensuring that the call to
3110 canonicalize_loop_ivs succeeds. */
3111 if (dump_file
3112 && (dump_flags & TDF_DETAILS))
3113 fprintf (dump_file, "canonicalize_loop_ivs failed for loop %d,"
3114 " aborting transformation\n", loop->num);
3115 return;
3116 }
5f40b3cb 3117
7c82d827
TV
3118 /* Ensure that the exit condition is the first statement in the loop.
3119 The common case is that latch of the loop is empty (apart from the
3120 increment) and immediately follows the loop exit test. Attempt to move the
3121 entry of the loop directly before the exit check and increase the number of
3122 iterations of the loop by one. */
a5a57bf3
TV
3123 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
3124 {
3125 if (dump_file
3126 && (dump_flags & TDF_DETAILS))
3127 fprintf (dump_file,
3128 "alternative exit-first loop transform succeeded"
3129 " for loop %d\n", loop->num);
3130 }
3131 else
7c82d827 3132 {
61d9c527
TV
3133 if (oacc_kernels_p)
3134 n_threads = 1;
3135
7c82d827
TV
3136 /* Fall back on the method that handles more cases, but duplicates the
3137 loop body: move the exit condition of LOOP to the beginning of its
3138 header, and duplicate the part of the last iteration that gets disabled
3139 to the exit of the loop. */
3140 transform_to_exit_first_loop (loop, reduction_list, nit);
3141 }
a509ebb5 3142
fa10beec 3143 /* Generate initializations for reductions. */
b119c055 3144 if (!reduction_list->is_empty ())
99b1c316 3145 reduction_list->traverse <class loop *, initialize_reductions> (loop);
5f40b3cb
ZD
3146
3147 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
3148 gcc_assert (single_exit (loop));
3149 entry = loop_preheader_edge (loop);
3150 exit = single_dom_exit (loop);
5f40b3cb 3151
61d9c527
TV
3152 /* This rewrites the body in terms of new variables. This has already
3153 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
3154 if (!oacc_kernels_p)
3155 {
3156 eliminate_local_variables (entry, exit);
3157 /* In the old loop, move all variables non-local to the loop to a
3158 structure and back, and create separate decls for the variables used in
3159 loop. */
3160 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
3161 &new_arg_struct, &clsn_data);
3162 }
3163 else
3164 {
3165 arg_struct = NULL_TREE;
3166 new_arg_struct = NULL_TREE;
3167 clsn_data.load = NULL_TREE;
3168 clsn_data.load_bb = exit->dest;
3169 clsn_data.store = NULL_TREE;
3170 clsn_data.store_bb = NULL;
3171 }
5f40b3cb
ZD
3172
3173 /* Create the parallel constructs. */
9ff70652
JJ
3174 loc = UNKNOWN_LOCATION;
3175 cond_stmt = last_stmt (loop->header);
3176 if (cond_stmt)
3177 loc = gimple_location (cond_stmt);
61d9c527
TV
3178 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
3179 n_threads, loc, oacc_kernels_p);
b119c055 3180 if (!reduction_list->is_empty ())
a509ebb5 3181 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
3182
3183 scev_reset ();
3184
92a6bdbd
SP
3185 /* Free loop bound estimations that could contain references to
3186 removed statements. */
adb7eaa2 3187 free_numbers_of_iterations_estimates (cfun);
5f40b3cb
ZD
3188}
3189
9857228c
SP
3190/* Returns true when LOOP contains vector phi nodes. */
3191
3192static bool
99b1c316 3193loop_has_vector_phi_nodes (class loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
3194{
3195 unsigned i;
3196 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 3197 gphi_iterator gsi;
9857228c 3198 bool res = true;
9857228c
SP
3199
3200 for (i = 0; i < loop->num_nodes; i++)
726a989a 3201 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 3202 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
3203 goto end;
3204
3205 res = false;
3206 end:
3207 free (bbs);
3208 return res;
3209}
3210
08dab97a
RL
3211/* Create a reduction_info struct, initialize it with REDUC_STMT
3212 and PHI, insert it to the REDUCTION_LIST. */
3213
3214static void
c203e8a7 3215build_new_reduction (reduction_info_table_type *reduction_list,
355fe088 3216 gimple *reduc_stmt, gphi *phi)
08dab97a 3217{
4a8fb1a1 3218 reduction_info **slot;
08dab97a 3219 struct reduction_info *new_reduction;
12efb1d7 3220 enum tree_code reduction_code;
08dab97a
RL
3221
3222 gcc_assert (reduc_stmt);
b8698a0f 3223
12efb1d7
TV
3224 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
3225 {
3226 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
355fe088 3227 gimple *def1 = SSA_NAME_DEF_STMT (op1);
12efb1d7
TV
3228 reduction_code = gimple_assign_rhs_code (def1);
3229 }
12efb1d7
TV
3230 else
3231 reduction_code = gimple_assign_rhs_code (reduc_stmt);
d0ee55a1
JJ
3232 /* Check for OpenMP supported reduction. */
3233 switch (reduction_code)
3234 {
3235 case PLUS_EXPR:
3236 case MULT_EXPR:
3237 case MAX_EXPR:
3238 case MIN_EXPR:
3239 case BIT_IOR_EXPR:
3240 case BIT_XOR_EXPR:
3241 case BIT_AND_EXPR:
3242 case TRUTH_OR_EXPR:
3243 case TRUTH_XOR_EXPR:
3244 case TRUTH_AND_EXPR:
3245 break;
3246 default:
3247 return;
3248 }
3249
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 {
3252 fprintf (dump_file,
3253 "Detected reduction. reduction stmt is:\n");
3254 print_gimple_stmt (dump_file, reduc_stmt, 0);
3255 fprintf (dump_file, "\n");
3256 }
12efb1d7 3257
08dab97a 3258 new_reduction = XCNEW (struct reduction_info);
b8698a0f 3259
08dab97a
RL
3260 new_reduction->reduc_stmt = reduc_stmt;
3261 new_reduction->reduc_phi = phi;
5d1fd1de 3262 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
12efb1d7 3263 new_reduction->reduction_code = reduction_code;
c203e8a7 3264 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
3265 *slot = new_reduction;
3266}
3267
5d1fd1de
JJ
3268/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
3269
4a8fb1a1
LC
3270int
3271set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 3272{
4a8fb1a1 3273 struct reduction_info *const red = *slot;
5d1fd1de
JJ
3274 gimple_set_uid (red->reduc_phi, red->reduc_version);
3275 return 1;
3276}
3277
32c91dfc 3278/* Return true if the type of reduction performed by STMT_INFO is suitable
b781a135
RS
3279 for this pass. */
3280
3281static bool
32c91dfc 3282valid_reduction_p (stmt_vec_info stmt_info)
b781a135
RS
3283{
3284 /* Parallelization would reassociate the operation, which isn't
3285 allowed for in-order reductions. */
b781a135
RS
3286 vect_reduction_type reduc_type = STMT_VINFO_REDUC_TYPE (stmt_info);
3287 return reduc_type != FOLD_LEFT_REDUCTION;
3288}
3289
08dab97a
RL
3290/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
3291
3292static void
c203e8a7 3293gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 3294{
538dd0b7 3295 gphi_iterator gsi;
08dab97a 3296 loop_vec_info simple_loop_info;
846b1a18
JJ
3297 auto_vec<gphi *, 4> double_reduc_phis;
3298 auto_vec<gimple *, 4> double_reduc_stmts;
08dab97a 3299
ca823c85
RB
3300 vec_info_shared shared;
3301 simple_loop_info = vect_analyze_loop_form (loop, &shared);
1e6a7b01 3302 if (simple_loop_info == NULL)
1cabb204 3303 goto gather_done;
08dab97a
RL
3304
3305 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3306 {
538dd0b7 3307 gphi *phi = gsi.phi ();
08dab97a
RL
3308 affine_iv iv;
3309 tree res = PHI_RESULT (phi);
3310 bool double_reduc;
3311
ea057359 3312 if (virtual_operand_p (res))
08dab97a
RL
3313 continue;
3314
1e6a7b01
TV
3315 if (simple_iv (loop, loop, res, &iv, true))
3316 continue;
3317
32c91dfc 3318 stmt_vec_info reduc_stmt_info
31de92e3
RB
3319 = parloops_force_simple_reduction (simple_loop_info,
3320 simple_loop_info->lookup_stmt (phi),
3321 &double_reduc, true);
32c91dfc 3322 if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
1e6a7b01
TV
3323 continue;
3324
12efb1d7
TV
3325 if (double_reduc)
3326 {
846b1a18 3327 if (loop->inner->inner != NULL)
12efb1d7
TV
3328 continue;
3329
846b1a18 3330 double_reduc_phis.safe_push (phi);
32c91dfc 3331 double_reduc_stmts.safe_push (reduc_stmt_info->stmt);
846b1a18 3332 continue;
12efb1d7
TV
3333 }
3334
32c91dfc 3335 build_new_reduction (reduction_list, reduc_stmt_info->stmt, phi);
08dab97a 3336 }
2c515559 3337 delete simple_loop_info;
846b1a18
JJ
3338
3339 if (!double_reduc_phis.is_empty ())
3340 {
ca823c85
RB
3341 vec_info_shared shared;
3342 simple_loop_info = vect_analyze_loop_form (loop->inner, &shared);
846b1a18
JJ
3343 if (simple_loop_info)
3344 {
3345 gphi *phi;
3346 unsigned int i;
3347
3348 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
3349 {
3350 affine_iv iv;
3351 tree res = PHI_RESULT (phi);
3352 bool double_reduc;
3353
3354 use_operand_p use_p;
3355 gimple *inner_stmt;
3356 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
3357 gcc_assert (single_use_p);
3358 if (gimple_code (inner_stmt) != GIMPLE_PHI)
3359 continue;
3360 gphi *inner_phi = as_a <gphi *> (inner_stmt);
3361 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
3362 &iv, true))
3363 continue;
3364
32c91dfc
RS
3365 stmt_vec_info inner_phi_info
3366 = simple_loop_info->lookup_stmt (inner_phi);
3367 stmt_vec_info inner_reduc_stmt_info
31de92e3
RB
3368 = parloops_force_simple_reduction (simple_loop_info,
3369 inner_phi_info,
3370 &double_reduc, true);
846b1a18 3371 gcc_assert (!double_reduc);
32c91dfc
RS
3372 if (!inner_reduc_stmt_info
3373 || !valid_reduction_p (inner_reduc_stmt_info))
846b1a18
JJ
3374 continue;
3375
3376 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
3377 }
2c515559 3378 delete simple_loop_info;
846b1a18
JJ
3379 }
3380 }
5d1fd1de 3381
1cabb204 3382 gather_done:
b119c055 3383 if (reduction_list->is_empty ())
1cabb204
TV
3384 return;
3385
5d1fd1de 3386 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
6ef709e5 3387 and delete simple_loop_info, we can set gimple_uid of reduc_phi stmts only
fdce493d
TV
3388 now. */
3389 basic_block bb;
3390 FOR_EACH_BB_FN (bb, cfun)
3391 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3392 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
c203e8a7 3393 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
3394}
3395
3396/* Try to initialize NITER for code generation part. */
3397
3398static bool
99b1c316 3399try_get_loop_niter (loop_p loop, class tree_niter_desc *niter)
08dab97a
RL
3400{
3401 edge exit = single_dom_exit (loop);
3402
3403 gcc_assert (exit);
3404
3405 /* We need to know # of iterations, and there should be no uses of values
3406 defined inside loop outside of it, unless the values are invariants of
3407 the loop. */
3408 if (!number_of_iterations_exit (loop, exit, niter, false))
3409 {
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3411 fprintf (dump_file, " FAILED: number of iterations not known\n");
3412 return false;
3413 }
3414
3415 return true;
3416}
3417
61d9c527
TV
3418/* Return the default def of the first function argument. */
3419
3420static tree
3421get_omp_data_i_param (void)
3422{
3423 tree decl = DECL_ARGUMENTS (cfun->decl);
3424 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
3425 return ssa_default_def (cfun, decl);
3426}
3427
3428/* For PHI in loop header of LOOP, look for pattern:
3429
3430 <bb preheader>
3431 .omp_data_i = &.omp_data_arr;
3432 addr = .omp_data_i->sum;
3433 sum_a = *addr;
3434
3435 <bb header>:
3436 sum_b = PHI <sum_a (preheader), sum_c (latch)>
3437
3438 and return addr. Otherwise, return NULL_TREE. */
3439
3440static tree
99b1c316 3441find_reduc_addr (class loop *loop, gphi *phi)
61d9c527
TV
3442{
3443 edge e = loop_preheader_edge (loop);
3444 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
3445 gimple *stmt = SSA_NAME_DEF_STMT (arg);
3446 if (!gimple_assign_single_p (stmt))
3447 return NULL_TREE;
3448 tree memref = gimple_assign_rhs1 (stmt);
3449 if (TREE_CODE (memref) != MEM_REF)
3450 return NULL_TREE;
3451 tree addr = TREE_OPERAND (memref, 0);
3452
3453 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
3454 if (!gimple_assign_single_p (stmt2))
3455 return NULL_TREE;
3456 tree compref = gimple_assign_rhs1 (stmt2);
3457 if (TREE_CODE (compref) != COMPONENT_REF)
3458 return NULL_TREE;
3459 tree addr2 = TREE_OPERAND (compref, 0);
3460 if (TREE_CODE (addr2) != MEM_REF)
3461 return NULL_TREE;
3462 addr2 = TREE_OPERAND (addr2, 0);
3463 if (TREE_CODE (addr2) != SSA_NAME
3464 || addr2 != get_omp_data_i_param ())
3465 return NULL_TREE;
3466
3467 return addr;
3468}
3469
08dab97a
RL
3470/* Try to initialize REDUCTION_LIST for code generation part.
3471 REDUCTION_LIST describes the reductions. */
3472
3473static bool
4a8fb1a1 3474try_create_reduction_list (loop_p loop,
61d9c527
TV
3475 reduction_info_table_type *reduction_list,
3476 bool oacc_kernels_p)
08dab97a
RL
3477{
3478 edge exit = single_dom_exit (loop);
538dd0b7 3479 gphi_iterator gsi;
08dab97a
RL
3480
3481 gcc_assert (exit);
3482
f993a853
TV
3483 /* Try to get rid of exit phis. */
3484 final_value_replacement_loop (loop);
3485
08dab97a
RL
3486 gather_scalar_reductions (loop, reduction_list);
3487
b8698a0f 3488
08dab97a
RL
3489 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
3490 {
538dd0b7 3491 gphi *phi = gsi.phi ();
08dab97a
RL
3492 struct reduction_info *red;
3493 imm_use_iterator imm_iter;
3494 use_operand_p use_p;
355fe088 3495 gimple *reduc_phi;
08dab97a
RL
3496 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3497
425f5fd4 3498 if (!virtual_operand_p (val))
08dab97a 3499 {
425f5fd4
JJ
3500 if (TREE_CODE (val) != SSA_NAME)
3501 {
3502 if (dump_file && (dump_flags & TDF_DETAILS))
3503 fprintf (dump_file,
3504 " FAILED: exit PHI argument invariant.\n");
3505 return false;
3506 }
3507
08dab97a
RL
3508 if (dump_file && (dump_flags & TDF_DETAILS))
3509 {
3510 fprintf (dump_file, "phi is ");
ef6cb4c7 3511 print_gimple_stmt (dump_file, phi, 0);
08dab97a 3512 fprintf (dump_file, "arg of phi to exit: value ");
ef6cb4c7 3513 print_generic_expr (dump_file, val);
08dab97a
RL
3514 fprintf (dump_file, " used outside loop\n");
3515 fprintf (dump_file,
430002b9 3516 " checking if it is part of reduction pattern:\n");
08dab97a 3517 }
b119c055 3518 if (reduction_list->is_empty ())
08dab97a
RL
3519 {
3520 if (dump_file && (dump_flags & TDF_DETAILS))
3521 fprintf (dump_file,
3522 " FAILED: it is not a part of reduction.\n");
3523 return false;
3524 }
3525 reduc_phi = NULL;
3526 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
3527 {
4942af9b
JJ
3528 if (!gimple_debug_bind_p (USE_STMT (use_p))
3529 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
3530 {
3531 reduc_phi = USE_STMT (use_p);
3532 break;
3533 }
3534 }
3535 red = reduction_phi (reduction_list, reduc_phi);
3536 if (red == NULL)
3537 {
3538 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file,
3540 " FAILED: it is not a part of reduction.\n");
3541 return false;
3542 }
23fab8ae
TV
3543 if (red->keep_res != NULL)
3544 {
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3546 fprintf (dump_file,
3547 " FAILED: reduction has multiple exit phis.\n");
3548 return false;
3549 }
3550 red->keep_res = phi;
08dab97a
RL
3551 if (dump_file && (dump_flags & TDF_DETAILS))
3552 {
3553 fprintf (dump_file, "reduction phi is ");
ef6cb4c7 3554 print_gimple_stmt (dump_file, red->reduc_phi, 0);
08dab97a 3555 fprintf (dump_file, "reduction stmt is ");
ef6cb4c7 3556 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
08dab97a
RL
3557 }
3558 }
3559 }
3560
3561 /* The iterations of the loop may communicate only through bivs whose
3562 iteration space can be distributed efficiently. */
3563 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
3564 {
538dd0b7 3565 gphi *phi = gsi.phi ();
08dab97a
RL
3566 tree def = PHI_RESULT (phi);
3567 affine_iv iv;
3568
ea057359 3569 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
3570 {
3571 struct reduction_info *red;
3572
3573 red = reduction_phi (reduction_list, phi);
3574 if (red == NULL)
3575 {
3576 if (dump_file && (dump_flags & TDF_DETAILS))
3577 fprintf (dump_file,
3578 " FAILED: scalar dependency between iterations\n");
3579 return false;
3580 }
3581 }
3582 }
3583
61d9c527
TV
3584 if (oacc_kernels_p)
3585 {
3586 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
3587 gsi_next (&gsi))
3588 {
3589 gphi *phi = gsi.phi ();
3590 tree def = PHI_RESULT (phi);
3591 affine_iv iv;
3592
3593 if (!virtual_operand_p (def)
3594 && !simple_iv (loop, loop, def, &iv, true))
3595 {
3596 tree addr = find_reduc_addr (loop, phi);
3597 if (addr == NULL_TREE)
3598 return false;
3599 struct reduction_info *red = reduction_phi (reduction_list, phi);
3600 red->reduc_addr = addr;
3601 }
3602 }
3603 }
08dab97a
RL
3604
3605 return true;
3606}
3607
3907c6cf
TV
3608/* Return true if LOOP contains phis with ADDR_EXPR in args. */
3609
3610static bool
99b1c316 3611loop_has_phi_with_address_arg (class loop *loop)
3907c6cf
TV
3612{
3613 basic_block *bbs = get_loop_body (loop);
3614 bool res = false;
3615
3616 unsigned i, j;
3617 gphi_iterator gsi;
3618 for (i = 0; i < loop->num_nodes; i++)
3619 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3620 {
3621 gphi *phi = gsi.phi ();
3622 for (j = 0; j < gimple_phi_num_args (phi); j++)
3623 {
3624 tree arg = gimple_phi_arg_def (phi, j);
3625 if (TREE_CODE (arg) == ADDR_EXPR)
3626 {
3627 /* This should be handled by eliminate_local_variables, but that
3628 function currently ignores phis. */
3629 res = true;
3630 goto end;
3631 }
3632 }
3633 }
3634 end:
3635 free (bbs);
61d9c527
TV
3636
3637 return res;
3638}
3639
3640/* Return true if memory ref REF (corresponding to the stmt at GSI in
3641 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
3642 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
3643 store. Ignore conflicts with SKIP_STMT. */
3644
3645static bool
3646ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
3647 bool ref_is_store, vec<basic_block> region_bbs,
3648 unsigned int i, gimple *skip_stmt)
3649{
3650 basic_block bb = region_bbs[i];
3651 gsi_next (&gsi);
3652
3653 while (true)
3654 {
3655 for (; !gsi_end_p (gsi);
3656 gsi_next (&gsi))
3657 {
3658 gimple *stmt = gsi_stmt (gsi);
3659 if (stmt == skip_stmt)
3660 {
3661 if (dump_file)
3662 {
3663 fprintf (dump_file, "skipping reduction store: ");
ef6cb4c7 3664 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3665 }
3666 continue;
3667 }
3668
3669 if (!gimple_vdef (stmt)
3670 && !gimple_vuse (stmt))
3671 continue;
3672
3673 if (gimple_code (stmt) == GIMPLE_RETURN)
3674 continue;
3675
3676 if (ref_is_store)
3677 {
3678 if (ref_maybe_used_by_stmt_p (stmt, ref))
3679 {
3680 if (dump_file)
3681 {
3682 fprintf (dump_file, "Stmt ");
ef6cb4c7 3683 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3684 }
3685 return true;
3686 }
3687 }
3688 else
3689 {
3690 if (stmt_may_clobber_ref_p_1 (stmt, ref))
3691 {
3692 if (dump_file)
3693 {
3694 fprintf (dump_file, "Stmt ");
ef6cb4c7 3695 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3696 }
3697 return true;
3698 }
3699 }
3700 }
3701 i++;
3702 if (i == region_bbs.length ())
3703 break;
3704 bb = region_bbs[i];
3705 gsi = gsi_start_bb (bb);
3706 }
3707
3708 return false;
3709}
3710
3711/* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
3712 in parallel with REGION_BBS containing the loop. Return the stores of
3713 reduction results in REDUCTION_STORES. */
3714
3715static bool
3716oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3717 reduction_info_table_type *reduction_list,
3718 bitmap reduction_stores)
3719{
3720 tree omp_data_i = get_omp_data_i_param ();
3721
3722 unsigned i;
3723 basic_block bb;
3724 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3725 {
3726 if (bitmap_bit_p (in_loop_bbs, bb->index))
3727 continue;
3728
3729 gimple_stmt_iterator gsi;
3730 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
3731 gsi_next (&gsi))
3732 {
3733 gimple *stmt = gsi_stmt (gsi);
3734 gimple *skip_stmt = NULL;
3735
3736 if (is_gimple_debug (stmt)
3737 || gimple_code (stmt) == GIMPLE_COND)
3738 continue;
3739
3740 ao_ref ref;
3741 bool ref_is_store = false;
3742 if (gimple_assign_load_p (stmt))
3743 {
3744 tree rhs = gimple_assign_rhs1 (stmt);
3745 tree base = get_base_address (rhs);
3746 if (TREE_CODE (base) == MEM_REF
3747 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
3748 continue;
3749
3750 tree lhs = gimple_assign_lhs (stmt);
3751 if (TREE_CODE (lhs) == SSA_NAME
3752 && has_single_use (lhs))
3753 {
3754 use_operand_p use_p;
3755 gimple *use_stmt;
f45ce17d 3756 struct reduction_info *red;
61d9c527 3757 single_imm_use (lhs, &use_p, &use_stmt);
f45ce17d
TV
3758 if (gimple_code (use_stmt) == GIMPLE_PHI
3759 && (red = reduction_phi (reduction_list, use_stmt)))
61d9c527 3760 {
61d9c527
TV
3761 tree val = PHI_RESULT (red->keep_res);
3762 if (has_single_use (val))
3763 {
3764 single_imm_use (val, &use_p, &use_stmt);
3765 if (gimple_store_p (use_stmt))
3766 {
3767 unsigned int id
3768 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
3769 bitmap_set_bit (reduction_stores, id);
3770 skip_stmt = use_stmt;
3771 if (dump_file)
3772 {
3773 fprintf (dump_file, "found reduction load: ");
ef6cb4c7 3774 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3775 }
3776 }
3777 }
3778 }
3779 }
3780
3781 ao_ref_init (&ref, rhs);
3782 }
3783 else if (gimple_store_p (stmt))
3784 {
3785 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3786 ref_is_store = true;
3787 }
3788 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3789 continue;
3790 else if (!gimple_has_side_effects (stmt)
3791 && !gimple_could_trap_p (stmt)
36bbc05d 3792 && !stmt_could_throw_p (cfun, stmt)
61d9c527
TV
3793 && !gimple_vdef (stmt)
3794 && !gimple_vuse (stmt))
3795 continue;
8e4284d0 3796 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
61d9c527
TV
3797 continue;
3798 else if (gimple_code (stmt) == GIMPLE_RETURN)
3799 continue;
3800 else
3801 {
3802 if (dump_file)
3803 {
3804 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
ef6cb4c7 3805 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3806 }
3807 return false;
3808 }
3809
3810 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3811 i, skip_stmt))
3812 {
3813 if (dump_file)
3814 {
3815 fprintf (dump_file, "conflicts with entry/exit stmt: ");
ef6cb4c7 3816 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3817 }
3818 return false;
3819 }
3820 }
3821 }
3822
3823 return true;
3824}
3825
3826/* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3827 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3828 if any changes were made. */
3829
3830static bool
3831oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3832 bitmap reduction_stores)
3833{
3834 tree gang_pos = NULL_TREE;
3835 bool changed = false;
3836
3837 unsigned i;
3838 basic_block bb;
3839 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3840 {
3841 if (bitmap_bit_p (in_loop_bbs, bb->index))
3842 continue;
3843
3844 gimple_stmt_iterator gsi;
3845 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3846 {
3847 gimple *stmt = gsi_stmt (gsi);
3848
3849 if (!gimple_store_p (stmt))
3850 {
3851 /* Update gsi to point to next stmt. */
3852 gsi_next (&gsi);
3853 continue;
3854 }
3855
3856 if (bitmap_bit_p (reduction_stores,
3857 SSA_NAME_VERSION (gimple_vdef (stmt))))
3858 {
3859 if (dump_file)
3860 {
3861 fprintf (dump_file,
3862 "skipped reduction store for single-gang"
3863 " neutering: ");
ef6cb4c7 3864 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3865 }
3866
3867 /* Update gsi to point to next stmt. */
3868 gsi_next (&gsi);
3869 continue;
3870 }
3871
3872 changed = true;
3873
3874 if (gang_pos == NULL_TREE)
3875 {
3876 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3877 gcall *gang_single
3878 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3879 gang_pos = make_ssa_name (integer_type_node);
3880 gimple_call_set_lhs (gang_single, gang_pos);
3881 gimple_stmt_iterator start
3882 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3883 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3884 gimple_set_vuse (gang_single, vuse);
3885 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3886 }
3887
3888 if (dump_file)
3889 {
3890 fprintf (dump_file,
3891 "found store that needs single-gang neutering: ");
ef6cb4c7 3892 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3893 }
3894
3895 {
3896 /* Split block before store. */
3897 gimple_stmt_iterator gsi2 = gsi;
3898 gsi_prev (&gsi2);
3899 edge e;
3900 if (gsi_end_p (gsi2))
3901 {
3902 e = split_block_after_labels (bb);
3903 gsi2 = gsi_last_bb (bb);
3904 }
3905 else
3906 e = split_block (bb, gsi_stmt (gsi2));
3907 basic_block bb2 = e->dest;
3908
3909 /* Split block after store. */
3910 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3911 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3912 basic_block bb3 = e2->dest;
3913
3914 gimple *cond
3915 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3916 NULL_TREE, NULL_TREE);
3917 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3918
3919 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
357067f2
JH
3920 /* FIXME: What is the probability? */
3921 e3->probability = profile_probability::guessed_never ();
61d9c527
TV
3922 e->flags = EDGE_TRUE_VALUE;
3923
3924 tree vdef = gimple_vdef (stmt);
3925 tree vuse = gimple_vuse (stmt);
3926
3927 tree phi_res = copy_ssa_name (vdef);
3928 gphi *new_phi = create_phi_node (phi_res, bb3);
3929 replace_uses_by (vdef, phi_res);
3930 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3931 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3932
3933 /* Update gsi to point to next stmt. */
3934 bb = bb3;
3935 gsi = gsi_start_bb (bb);
3936 }
3937 }
3938 }
3939
3940 return changed;
3941}
3942
3943/* Return true if the statements before and after the LOOP can be executed in
3944 parallel with the function containing the loop. Resolve conflicting stores
3945 outside LOOP by guarding them such that only a single gang executes them. */
3946
3947static bool
99b1c316 3948oacc_entry_exit_ok (class loop *loop,
61d9c527
TV
3949 reduction_info_table_type *reduction_list)
3950{
3951 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3952 vec<basic_block> region_bbs
3953 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3954
3955 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3956 bitmap_clear (in_loop_bbs);
3957 for (unsigned int i = 0; i < loop->num_nodes; i++)
3958 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3959
3960 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3961 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3962 reduction_stores);
3963
3964 if (res)
3965 {
3966 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3967 reduction_stores);
3968 if (changed)
3969 {
3970 free_dominance_info (CDI_DOMINATORS);
3971 calculate_dominance_info (CDI_DOMINATORS);
3972 }
3973 }
3974
4089c340 3975 region_bbs.release ();
61d9c527
TV
3976 free (loop_bbs);
3977
3978 BITMAP_FREE (in_loop_bbs);
3979 BITMAP_FREE (reduction_stores);
3980
3907c6cf
TV
3981 return res;
3982}
3983
5f40b3cb
ZD
3984/* Detect parallel loops and generate parallel code using libgomp
3985 primitives. Returns true if some loop was parallelized, false
3986 otherwise. */
3987
09489eb8 3988static bool
61d9c527 3989parallelize_loops (bool oacc_kernels_p)
5f40b3cb 3990{
f99c3557 3991 unsigned n_threads;
5f40b3cb 3992 bool changed = false;
99b1c316
MS
3993 class loop *loop;
3994 class loop *skip_loop = NULL;
3995 class tree_niter_desc niter_desc;
f873b205 3996 struct obstack parloop_obstack;
8adfe01d 3997 HOST_WIDE_INT estimated;
f873b205 3998
5f40b3cb 3999 /* Do not parallelize loops in the functions created by parallelization. */
61d9c527
TV
4000 if (!oacc_kernels_p
4001 && parallelized_function_p (cfun->decl))
5f40b3cb 4002 return false;
61d9c527
TV
4003
4004 /* Do not parallelize loops in offloaded functions. */
4005 if (!oacc_kernels_p
629b3d75 4006 && oacc_get_fn_attrib (cfun->decl) != NULL)
61d9c527
TV
4007 return false;
4008
8adfe01d
RL
4009 if (cfun->has_nonlocal_label)
4010 return false;
5f40b3cb 4011
f99c3557
TS
4012 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
4013 the argument to -ftree-parallelize-loops. */
4014 if (oacc_kernels_p)
4015 n_threads = 0;
4016 else
4017 n_threads = flag_tree_parallelize_loops;
4018
f873b205 4019 gcc_obstack_init (&parloop_obstack);
c203e8a7 4020 reduction_info_table_type reduction_list (10);
a509ebb5 4021
61d9c527
TV
4022 calculate_dominance_info (CDI_DOMINATORS);
4023
f0bd40b1 4024 FOR_EACH_LOOP (loop, 0)
5f40b3cb 4025 {
e67d7a1e
TV
4026 if (loop == skip_loop)
4027 {
61d9c527
TV
4028 if (!loop->in_oacc_kernels_region
4029 && dump_file && (dump_flags & TDF_DETAILS))
e67d7a1e
TV
4030 fprintf (dump_file,
4031 "Skipping loop %d as inner loop of parallelized loop\n",
4032 loop->num);
4033
4034 skip_loop = loop->inner;
4035 continue;
4036 }
4037 else
4038 skip_loop = NULL;
4039
4a8fb1a1 4040 reduction_list.empty ();
61d9c527
TV
4041
4042 if (oacc_kernels_p)
4043 {
4044 if (!loop->in_oacc_kernels_region)
4045 continue;
4046
4047 /* Don't try to parallelize inner loops in an oacc kernels region. */
4048 if (loop->inner)
4049 skip_loop = loop->inner;
4050
4051 if (dump_file && (dump_flags & TDF_DETAILS))
4052 fprintf (dump_file,
4053 "Trying loop %d with header bb %d in oacc kernels"
4054 " region\n", loop->num, loop->header->index);
4055 }
4056
48710229
RL
4057 if (dump_file && (dump_flags & TDF_DETAILS))
4058 {
4059 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
4060 if (loop->inner)
4061 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
4062 else
4063 fprintf (dump_file, "loop %d is innermost\n",loop->num);
4064 }
b8698a0f 4065
48710229
RL
4066 if (!single_dom_exit (loop))
4067 {
b8698a0f 4068
48710229
RL
4069 if (dump_file && (dump_flags & TDF_DETAILS))
4070 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 4071
08dab97a 4072 continue;
48710229 4073 }
08dab97a
RL
4074
4075 if (/* And of course, the loop must be parallelizable. */
4076 !can_duplicate_loop_p (loop)
1d4af1e8 4077 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 4078 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 4079 /* FIXME: the check for vector phi nodes could be removed. */
69958396 4080 || loop_has_vector_phi_nodes (loop))
08dab97a 4081 continue;
e5b332cd 4082
a851ce04 4083 estimated = estimated_loop_iterations_int (loop);
e5b332cd 4084 if (estimated == -1)
a851ce04 4085 estimated = get_likely_max_loop_iterations_int (loop);
87d4d0ee 4086 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 4087 count and frequency correctly now. */
87d4d0ee 4088 if (!flag_loop_parallelize_all
61d9c527 4089 && !oacc_kernels_p
e5b332cd 4090 && ((estimated != -1
a851ce04
RB
4091 && (estimated
4092 < ((HOST_WIDE_INT) n_threads
4093 * (loop->inner ? 2 : MIN_PER_THREAD) - 1)))
87d4d0ee
SP
4094 /* Do not bother with loops in cold areas. */
4095 || optimize_loop_nest_for_size_p (loop)))
08dab97a 4096 continue;
b8698a0f 4097
08dab97a
RL
4098 if (!try_get_loop_niter (loop, &niter_desc))
4099 continue;
4100
61d9c527 4101 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
08dab97a
RL
4102 continue;
4103
3907c6cf
TV
4104 if (loop_has_phi_with_address_arg (loop))
4105 continue;
4106
a851ce04 4107 if (!loop->can_be_parallel
f873b205 4108 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
4109 continue;
4110
61d9c527
TV
4111 if (oacc_kernels_p
4112 && !oacc_entry_exit_ok (loop, &reduction_list))
4113 {
4114 if (dump_file)
4115 fprintf (dump_file, "entry/exit not ok: FAILED\n");
4116 continue;
4117 }
4118
5f40b3cb 4119 changed = true;
e67d7a1e 4120 skip_loop = loop->inner;
558b3185 4121
bbeeac91
DM
4122 if (dump_enabled_p ())
4123 {
4124 dump_user_location_t loop_loc = find_loop_location (loop);
4125 if (loop->inner)
4126 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4127 "parallelizing outer loop %d\n", loop->num);
4128 else
4129 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
4130 "parallelizing inner loop %d\n", loop->num);
4131 }
61d9c527 4132
c203e8a7 4133 gen_parallel_loop (loop, &reduction_list,
61d9c527 4134 n_threads, &niter_desc, oacc_kernels_p);
5f40b3cb
ZD
4135 }
4136
f873b205 4137 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
4138
4139 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
4140 which local variables will escape. Reset the points-to solution
4141 for ESCAPED. */
6b8ed145 4142 if (changed)
d086d311 4143 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 4144
5f40b3cb
ZD
4145 return changed;
4146}
4147
c1bf2a39
AM
4148/* Parallelization. */
4149
c1bf2a39
AM
4150namespace {
4151
4152const pass_data pass_data_parallelize_loops =
4153{
4154 GIMPLE_PASS, /* type */
4155 "parloops", /* name */
4156 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
4157 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
4158 ( PROP_cfg | PROP_ssa ), /* properties_required */
4159 0, /* properties_provided */
4160 0, /* properties_destroyed */
4161 0, /* todo_flags_start */
3bea341f 4162 0, /* todo_flags_finish */
c1bf2a39
AM
4163};
4164
4165class pass_parallelize_loops : public gimple_opt_pass
4166{
4167public:
4168 pass_parallelize_loops (gcc::context *ctxt)
61d9c527
TV
4169 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
4170 oacc_kernels_p (false)
c1bf2a39
AM
4171 {}
4172
4173 /* opt_pass methods: */
f99c3557
TS
4174 virtual bool gate (function *)
4175 {
4176 if (oacc_kernels_p)
4177 return flag_openacc;
4178 else
4179 return flag_tree_parallelize_loops > 1;
4180 }
be55bfe6 4181 virtual unsigned int execute (function *);
61d9c527
TV
4182 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
4183 void set_pass_param (unsigned int n, bool param)
4184 {
4185 gcc_assert (n == 0);
4186 oacc_kernels_p = param;
4187 }
c1bf2a39 4188
61d9c527
TV
4189 private:
4190 bool oacc_kernels_p;
c1bf2a39
AM
4191}; // class pass_parallelize_loops
4192
be55bfe6
TS
4193unsigned
4194pass_parallelize_loops::execute (function *fun)
4195{
e9ff08b2
TV
4196 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
4197 if (nthreads == NULL_TREE)
4198 return 0;
4199
12db0814
TV
4200 bool in_loop_pipeline = scev_initialized_p ();
4201 if (!in_loop_pipeline)
4202 loop_optimizer_init (LOOPS_NORMAL
4203 | LOOPS_HAVE_RECORDED_EXITS);
4204
4205 if (number_of_loops (fun) <= 1)
4206 return 0;
4207
4208 if (!in_loop_pipeline)
4209 {
4210 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
4211 scev_initialize ();
4212 }
4213
4214 unsigned int todo = 0;
61d9c527 4215 if (parallelize_loops (oacc_kernels_p))
18751894
TV
4216 {
4217 fun->curr_properties &= ~(PROP_gimple_eomp);
e67d7a1e 4218
b2b29377 4219 checking_verify_loop_structure ();
e67d7a1e 4220
12db0814
TV
4221 todo |= TODO_update_ssa;
4222 }
4223
4224 if (!in_loop_pipeline)
4225 {
4226 scev_finalize ();
4227 loop_optimizer_finalize ();
18751894
TV
4228 }
4229
12db0814 4230 return todo;
be55bfe6
TS
4231}
4232
c1bf2a39
AM
4233} // anon namespace
4234
4235gimple_opt_pass *
4236make_pass_parallelize_loops (gcc::context *ctxt)
4237{
4238 return new pass_parallelize_loops (ctxt);
4239}