]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-if-conv.c
In PR70010, a function is marked with target(no-vsx) to disable VSX code
[thirdparty/gcc.git] / gcc / tree-if-conv.c
1 /* If-conversion for vectorizer.
2 Copyright (C) 2004-2019 Free Software Foundation, Inc.
3 Contributed by Devang Patel <dpatel@apple.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass implements a tree level if-conversion of loops. Its
22 initial goal is to help the vectorizer to vectorize loops with
23 conditions.
24
25 A short description of if-conversion:
26
27 o Decide if a loop is if-convertible or not.
28 o Walk all loop basic blocks in breadth first order (BFS order).
29 o Remove conditional statements (at the end of basic block)
30 and propagate condition into destination basic blocks'
31 predicate list.
32 o Replace modify expression with conditional modify expression
33 using current basic block's condition.
34 o Merge all basic blocks
35 o Replace phi nodes with conditional modify expr
36 o Merge all basic blocks into header
37
38 Sample transformation:
39
40 INPUT
41 -----
42
43 # i_23 = PHI <0(0), i_18(10)>;
44 <L0>:;
45 j_15 = A[i_23];
46 if (j_15 > 41) goto <L1>; else goto <L17>;
47
48 <L17>:;
49 goto <bb 3> (<L3>);
50
51 <L1>:;
52
53 # iftmp.2_4 = PHI <0(8), 42(2)>;
54 <L3>:;
55 A[i_23] = iftmp.2_4;
56 i_18 = i_23 + 1;
57 if (i_18 <= 15) goto <L19>; else goto <L18>;
58
59 <L19>:;
60 goto <bb 1> (<L0>);
61
62 <L18>:;
63
64 OUTPUT
65 ------
66
67 # i_23 = PHI <0(0), i_18(10)>;
68 <L0>:;
69 j_15 = A[i_23];
70
71 <L3>:;
72 iftmp.2_4 = j_15 > 41 ? 42 : 0;
73 A[i_23] = iftmp.2_4;
74 i_18 = i_23 + 1;
75 if (i_18 <= 15) goto <L19>; else goto <L18>;
76
77 <L19>:;
78 goto <bb 1> (<L0>);
79
80 <L18>:;
81 */
82
83 #include "config.h"
84 #include "system.h"
85 #include "coretypes.h"
86 #include "backend.h"
87 #include "rtl.h"
88 #include "tree.h"
89 #include "gimple.h"
90 #include "cfghooks.h"
91 #include "tree-pass.h"
92 #include "ssa.h"
93 #include "expmed.h"
94 #include "optabs-query.h"
95 #include "gimple-pretty-print.h"
96 #include "alias.h"
97 #include "fold-const.h"
98 #include "stor-layout.h"
99 #include "gimple-fold.h"
100 #include "gimplify.h"
101 #include "gimple-iterator.h"
102 #include "gimplify-me.h"
103 #include "tree-cfg.h"
104 #include "tree-into-ssa.h"
105 #include "tree-ssa.h"
106 #include "cfgloop.h"
107 #include "tree-data-ref.h"
108 #include "tree-scalar-evolution.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop-ivopts.h"
112 #include "tree-ssa-address.h"
113 #include "dbgcnt.h"
114 #include "tree-hash-traits.h"
115 #include "varasm.h"
116 #include "builtins.h"
117 #include "params.h"
118 #include "cfganal.h"
119 #include "internal-fn.h"
120 #include "fold-const.h"
121 #include "tree-ssa-sccvn.h"
122 #include "tree-cfgcleanup.h"
123 #include "tree-ssa-dse.h"
124
125 /* Only handle PHIs with no more arguments unless we are asked to by
126 simd pragma. */
127 #define MAX_PHI_ARG_NUM \
128 ((unsigned) PARAM_VALUE (PARAM_MAX_TREE_IF_CONVERSION_PHI_ARGS))
129
130 /* True if we've converted a statement that was only executed when some
131 condition C was true, and if for correctness we need to predicate the
132 statement to ensure that it is a no-op when C is false. See
133 predicate_statements for the kinds of predication we support. */
134 static bool need_to_predicate;
135
136 /* Indicate if there are any complicated PHIs that need to be handled in
137 if-conversion. Complicated PHI has more than two arguments and can't
138 be degenerated to two arguments PHI. See more information in comment
139 before phi_convertible_by_degenerating_args. */
140 static bool any_complicated_phi;
141
142 /* Hash for struct innermost_loop_behavior. It depends on the user to
143 free the memory. */
144
145 struct innermost_loop_behavior_hash : nofree_ptr_hash <innermost_loop_behavior>
146 {
147 static inline hashval_t hash (const value_type &);
148 static inline bool equal (const value_type &,
149 const compare_type &);
150 };
151
152 inline hashval_t
153 innermost_loop_behavior_hash::hash (const value_type &e)
154 {
155 hashval_t hash;
156
157 hash = iterative_hash_expr (e->base_address, 0);
158 hash = iterative_hash_expr (e->offset, hash);
159 hash = iterative_hash_expr (e->init, hash);
160 return iterative_hash_expr (e->step, hash);
161 }
162
163 inline bool
164 innermost_loop_behavior_hash::equal (const value_type &e1,
165 const compare_type &e2)
166 {
167 if ((e1->base_address && !e2->base_address)
168 || (!e1->base_address && e2->base_address)
169 || (!e1->offset && e2->offset)
170 || (e1->offset && !e2->offset)
171 || (!e1->init && e2->init)
172 || (e1->init && !e2->init)
173 || (!e1->step && e2->step)
174 || (e1->step && !e2->step))
175 return false;
176
177 if (e1->base_address && e2->base_address
178 && !operand_equal_p (e1->base_address, e2->base_address, 0))
179 return false;
180 if (e1->offset && e2->offset
181 && !operand_equal_p (e1->offset, e2->offset, 0))
182 return false;
183 if (e1->init && e2->init
184 && !operand_equal_p (e1->init, e2->init, 0))
185 return false;
186 if (e1->step && e2->step
187 && !operand_equal_p (e1->step, e2->step, 0))
188 return false;
189
190 return true;
191 }
192
193 /* List of basic blocks in if-conversion-suitable order. */
194 static basic_block *ifc_bbs;
195
196 /* Hash table to store <DR's innermost loop behavior, DR> pairs. */
197 static hash_map<innermost_loop_behavior_hash,
198 data_reference_p> *innermost_DR_map;
199
200 /* Hash table to store <base reference, DR> pairs. */
201 static hash_map<tree_operand_hash, data_reference_p> *baseref_DR_map;
202
203 /* List of redundant SSA names: the first should be replaced by the second. */
204 static vec< std::pair<tree, tree> > redundant_ssa_names;
205
206 /* Structure used to predicate basic blocks. This is attached to the
207 ->aux field of the BBs in the loop to be if-converted. */
208 struct bb_predicate {
209
210 /* The condition under which this basic block is executed. */
211 tree predicate;
212
213 /* PREDICATE is gimplified, and the sequence of statements is
214 recorded here, in order to avoid the duplication of computations
215 that occur in previous conditions. See PR44483. */
216 gimple_seq predicate_gimplified_stmts;
217 };
218
219 /* Returns true when the basic block BB has a predicate. */
220
221 static inline bool
222 bb_has_predicate (basic_block bb)
223 {
224 return bb->aux != NULL;
225 }
226
227 /* Returns the gimplified predicate for basic block BB. */
228
229 static inline tree
230 bb_predicate (basic_block bb)
231 {
232 return ((struct bb_predicate *) bb->aux)->predicate;
233 }
234
235 /* Sets the gimplified predicate COND for basic block BB. */
236
237 static inline void
238 set_bb_predicate (basic_block bb, tree cond)
239 {
240 gcc_assert ((TREE_CODE (cond) == TRUTH_NOT_EXPR
241 && is_gimple_condexpr (TREE_OPERAND (cond, 0)))
242 || is_gimple_condexpr (cond));
243 ((struct bb_predicate *) bb->aux)->predicate = cond;
244 }
245
246 /* Returns the sequence of statements of the gimplification of the
247 predicate for basic block BB. */
248
249 static inline gimple_seq
250 bb_predicate_gimplified_stmts (basic_block bb)
251 {
252 return ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts;
253 }
254
255 /* Sets the sequence of statements STMTS of the gimplification of the
256 predicate for basic block BB. */
257
258 static inline void
259 set_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
260 {
261 ((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts = stmts;
262 }
263
264 /* Adds the sequence of statements STMTS to the sequence of statements
265 of the predicate for basic block BB. */
266
267 static inline void
268 add_bb_predicate_gimplified_stmts (basic_block bb, gimple_seq stmts)
269 {
270 /* We might have updated some stmts in STMTS via force_gimple_operand
271 calling fold_stmt and that producing multiple stmts. Delink immediate
272 uses so update_ssa after loop versioning doesn't get confused for
273 the not yet inserted predicates.
274 ??? This should go away once we reliably avoid updating stmts
275 not in any BB. */
276 for (gimple_stmt_iterator gsi = gsi_start (stmts);
277 !gsi_end_p (gsi); gsi_next (&gsi))
278 {
279 gimple *stmt = gsi_stmt (gsi);
280 delink_stmt_imm_use (stmt);
281 gimple_set_modified (stmt, true);
282 }
283 gimple_seq_add_seq_without_update
284 (&(((struct bb_predicate *) bb->aux)->predicate_gimplified_stmts), stmts);
285 }
286
287 /* Initializes to TRUE the predicate of basic block BB. */
288
289 static inline void
290 init_bb_predicate (basic_block bb)
291 {
292 bb->aux = XNEW (struct bb_predicate);
293 set_bb_predicate_gimplified_stmts (bb, NULL);
294 set_bb_predicate (bb, boolean_true_node);
295 }
296
297 /* Release the SSA_NAMEs associated with the predicate of basic block BB. */
298
299 static inline void
300 release_bb_predicate (basic_block bb)
301 {
302 gimple_seq stmts = bb_predicate_gimplified_stmts (bb);
303 if (stmts)
304 {
305 /* Ensure that these stmts haven't yet been added to a bb. */
306 if (flag_checking)
307 for (gimple_stmt_iterator i = gsi_start (stmts);
308 !gsi_end_p (i); gsi_next (&i))
309 gcc_assert (! gimple_bb (gsi_stmt (i)));
310
311 /* Discard them. */
312 gimple_seq_discard (stmts);
313 set_bb_predicate_gimplified_stmts (bb, NULL);
314 }
315 }
316
317 /* Free the predicate of basic block BB. */
318
319 static inline void
320 free_bb_predicate (basic_block bb)
321 {
322 if (!bb_has_predicate (bb))
323 return;
324
325 release_bb_predicate (bb);
326 free (bb->aux);
327 bb->aux = NULL;
328 }
329
330 /* Reinitialize predicate of BB with the true predicate. */
331
332 static inline void
333 reset_bb_predicate (basic_block bb)
334 {
335 if (!bb_has_predicate (bb))
336 init_bb_predicate (bb);
337 else
338 {
339 release_bb_predicate (bb);
340 set_bb_predicate (bb, boolean_true_node);
341 }
342 }
343
344 /* Returns a new SSA_NAME of type TYPE that is assigned the value of
345 the expression EXPR. Inserts the statement created for this
346 computation before GSI and leaves the iterator GSI at the same
347 statement. */
348
349 static tree
350 ifc_temp_var (tree type, tree expr, gimple_stmt_iterator *gsi)
351 {
352 tree new_name = make_temp_ssa_name (type, NULL, "_ifc_");
353 gimple *stmt = gimple_build_assign (new_name, expr);
354 gimple_set_vuse (stmt, gimple_vuse (gsi_stmt (*gsi)));
355 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
356 return new_name;
357 }
358
359 /* Return true when COND is a false predicate. */
360
361 static inline bool
362 is_false_predicate (tree cond)
363 {
364 return (cond != NULL_TREE
365 && (cond == boolean_false_node
366 || integer_zerop (cond)));
367 }
368
369 /* Return true when COND is a true predicate. */
370
371 static inline bool
372 is_true_predicate (tree cond)
373 {
374 return (cond == NULL_TREE
375 || cond == boolean_true_node
376 || integer_onep (cond));
377 }
378
379 /* Returns true when BB has a predicate that is not trivial: true or
380 NULL_TREE. */
381
382 static inline bool
383 is_predicated (basic_block bb)
384 {
385 return !is_true_predicate (bb_predicate (bb));
386 }
387
388 /* Parses the predicate COND and returns its comparison code and
389 operands OP0 and OP1. */
390
391 static enum tree_code
392 parse_predicate (tree cond, tree *op0, tree *op1)
393 {
394 gimple *s;
395
396 if (TREE_CODE (cond) == SSA_NAME
397 && is_gimple_assign (s = SSA_NAME_DEF_STMT (cond)))
398 {
399 if (TREE_CODE_CLASS (gimple_assign_rhs_code (s)) == tcc_comparison)
400 {
401 *op0 = gimple_assign_rhs1 (s);
402 *op1 = gimple_assign_rhs2 (s);
403 return gimple_assign_rhs_code (s);
404 }
405
406 else if (gimple_assign_rhs_code (s) == TRUTH_NOT_EXPR)
407 {
408 tree op = gimple_assign_rhs1 (s);
409 tree type = TREE_TYPE (op);
410 enum tree_code code = parse_predicate (op, op0, op1);
411
412 return code == ERROR_MARK ? ERROR_MARK
413 : invert_tree_comparison (code, HONOR_NANS (type));
414 }
415
416 return ERROR_MARK;
417 }
418
419 if (COMPARISON_CLASS_P (cond))
420 {
421 *op0 = TREE_OPERAND (cond, 0);
422 *op1 = TREE_OPERAND (cond, 1);
423 return TREE_CODE (cond);
424 }
425
426 return ERROR_MARK;
427 }
428
429 /* Returns the fold of predicate C1 OR C2 at location LOC. */
430
431 static tree
432 fold_or_predicates (location_t loc, tree c1, tree c2)
433 {
434 tree op1a, op1b, op2a, op2b;
435 enum tree_code code1 = parse_predicate (c1, &op1a, &op1b);
436 enum tree_code code2 = parse_predicate (c2, &op2a, &op2b);
437
438 if (code1 != ERROR_MARK && code2 != ERROR_MARK)
439 {
440 tree t = maybe_fold_or_comparisons (boolean_type_node, code1, op1a, op1b,
441 code2, op2a, op2b);
442 if (t)
443 return t;
444 }
445
446 return fold_build2_loc (loc, TRUTH_OR_EXPR, boolean_type_node, c1, c2);
447 }
448
449 /* Returns either a COND_EXPR or the folded expression if the folded
450 expression is a MIN_EXPR, a MAX_EXPR, an ABS_EXPR,
451 a constant or a SSA_NAME. */
452
453 static tree
454 fold_build_cond_expr (tree type, tree cond, tree rhs, tree lhs)
455 {
456 tree rhs1, lhs1, cond_expr;
457
458 /* If COND is comparison r != 0 and r has boolean type, convert COND
459 to SSA_NAME to accept by vect bool pattern. */
460 if (TREE_CODE (cond) == NE_EXPR)
461 {
462 tree op0 = TREE_OPERAND (cond, 0);
463 tree op1 = TREE_OPERAND (cond, 1);
464 if (TREE_CODE (op0) == SSA_NAME
465 && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
466 && (integer_zerop (op1)))
467 cond = op0;
468 }
469 cond_expr = fold_ternary (COND_EXPR, type, cond, rhs, lhs);
470
471 if (cond_expr == NULL_TREE)
472 return build3 (COND_EXPR, type, cond, rhs, lhs);
473
474 STRIP_USELESS_TYPE_CONVERSION (cond_expr);
475
476 if (is_gimple_val (cond_expr))
477 return cond_expr;
478
479 if (TREE_CODE (cond_expr) == ABS_EXPR)
480 {
481 rhs1 = TREE_OPERAND (cond_expr, 1);
482 STRIP_USELESS_TYPE_CONVERSION (rhs1);
483 if (is_gimple_val (rhs1))
484 return build1 (ABS_EXPR, type, rhs1);
485 }
486
487 if (TREE_CODE (cond_expr) == MIN_EXPR
488 || TREE_CODE (cond_expr) == MAX_EXPR)
489 {
490 lhs1 = TREE_OPERAND (cond_expr, 0);
491 STRIP_USELESS_TYPE_CONVERSION (lhs1);
492 rhs1 = TREE_OPERAND (cond_expr, 1);
493 STRIP_USELESS_TYPE_CONVERSION (rhs1);
494 if (is_gimple_val (rhs1) && is_gimple_val (lhs1))
495 return build2 (TREE_CODE (cond_expr), type, lhs1, rhs1);
496 }
497 return build3 (COND_EXPR, type, cond, rhs, lhs);
498 }
499
500 /* Add condition NC to the predicate list of basic block BB. LOOP is
501 the loop to be if-converted. Use predicate of cd-equivalent block
502 for join bb if it exists: we call basic blocks bb1 and bb2
503 cd-equivalent if they are executed under the same condition. */
504
505 static inline void
506 add_to_predicate_list (class loop *loop, basic_block bb, tree nc)
507 {
508 tree bc, *tp;
509 basic_block dom_bb;
510
511 if (is_true_predicate (nc))
512 return;
513
514 /* If dominance tells us this basic block is always executed,
515 don't record any predicates for it. */
516 if (dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
517 return;
518
519 dom_bb = get_immediate_dominator (CDI_DOMINATORS, bb);
520 /* We use notion of cd equivalence to get simpler predicate for
521 join block, e.g. if join block has 2 predecessors with predicates
522 p1 & p2 and p1 & !p2, we'd like to get p1 for it instead of
523 p1 & p2 | p1 & !p2. */
524 if (dom_bb != loop->header
525 && get_immediate_dominator (CDI_POST_DOMINATORS, dom_bb) == bb)
526 {
527 gcc_assert (flow_bb_inside_loop_p (loop, dom_bb));
528 bc = bb_predicate (dom_bb);
529 if (!is_true_predicate (bc))
530 set_bb_predicate (bb, bc);
531 else
532 gcc_assert (is_true_predicate (bb_predicate (bb)));
533 if (dump_file && (dump_flags & TDF_DETAILS))
534 fprintf (dump_file, "Use predicate of bb#%d for bb#%d\n",
535 dom_bb->index, bb->index);
536 return;
537 }
538
539 if (!is_predicated (bb))
540 bc = nc;
541 else
542 {
543 bc = bb_predicate (bb);
544 bc = fold_or_predicates (EXPR_LOCATION (bc), nc, bc);
545 if (is_true_predicate (bc))
546 {
547 reset_bb_predicate (bb);
548 return;
549 }
550 }
551
552 /* Allow a TRUTH_NOT_EXPR around the main predicate. */
553 if (TREE_CODE (bc) == TRUTH_NOT_EXPR)
554 tp = &TREE_OPERAND (bc, 0);
555 else
556 tp = &bc;
557 if (!is_gimple_condexpr (*tp))
558 {
559 gimple_seq stmts;
560 *tp = force_gimple_operand_1 (*tp, &stmts, is_gimple_condexpr, NULL_TREE);
561 add_bb_predicate_gimplified_stmts (bb, stmts);
562 }
563 set_bb_predicate (bb, bc);
564 }
565
566 /* Add the condition COND to the previous condition PREV_COND, and add
567 this to the predicate list of the destination of edge E. LOOP is
568 the loop to be if-converted. */
569
570 static void
571 add_to_dst_predicate_list (class loop *loop, edge e,
572 tree prev_cond, tree cond)
573 {
574 if (!flow_bb_inside_loop_p (loop, e->dest))
575 return;
576
577 if (!is_true_predicate (prev_cond))
578 cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
579 prev_cond, cond);
580
581 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, e->dest))
582 add_to_predicate_list (loop, e->dest, cond);
583 }
584
585 /* Return true if one of the successor edges of BB exits LOOP. */
586
587 static bool
588 bb_with_exit_edge_p (class loop *loop, basic_block bb)
589 {
590 edge e;
591 edge_iterator ei;
592
593 FOR_EACH_EDGE (e, ei, bb->succs)
594 if (loop_exit_edge_p (loop, e))
595 return true;
596
597 return false;
598 }
599
600 /* Given PHI which has more than two arguments, this function checks if
601 it's if-convertible by degenerating its arguments. Specifically, if
602 below two conditions are satisfied:
603
604 1) Number of PHI arguments with different values equals to 2 and one
605 argument has the only occurrence.
606 2) The edge corresponding to the unique argument isn't critical edge.
607
608 Such PHI can be handled as PHIs have only two arguments. For example,
609 below PHI:
610
611 res = PHI <A_1(e1), A_1(e2), A_2(e3)>;
612
613 can be transformed into:
614
615 res = (predicate of e3) ? A_2 : A_1;
616
617 Return TRUE if it is the case, FALSE otherwise. */
618
619 static bool
620 phi_convertible_by_degenerating_args (gphi *phi)
621 {
622 edge e;
623 tree arg, t1 = NULL, t2 = NULL;
624 unsigned int i, i1 = 0, i2 = 0, n1 = 0, n2 = 0;
625 unsigned int num_args = gimple_phi_num_args (phi);
626
627 gcc_assert (num_args > 2);
628
629 for (i = 0; i < num_args; i++)
630 {
631 arg = gimple_phi_arg_def (phi, i);
632 if (t1 == NULL || operand_equal_p (t1, arg, 0))
633 {
634 n1++;
635 i1 = i;
636 t1 = arg;
637 }
638 else if (t2 == NULL || operand_equal_p (t2, arg, 0))
639 {
640 n2++;
641 i2 = i;
642 t2 = arg;
643 }
644 else
645 return false;
646 }
647
648 if (n1 != 1 && n2 != 1)
649 return false;
650
651 /* Check if the edge corresponding to the unique arg is critical. */
652 e = gimple_phi_arg_edge (phi, (n1 == 1) ? i1 : i2);
653 if (EDGE_COUNT (e->src->succs) > 1)
654 return false;
655
656 return true;
657 }
658
659 /* Return true when PHI is if-convertible. PHI is part of loop LOOP
660 and it belongs to basic block BB. Note at this point, it is sure
661 that PHI is if-convertible. This function updates global variable
662 ANY_COMPLICATED_PHI if PHI is complicated. */
663
664 static bool
665 if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi)
666 {
667 if (dump_file && (dump_flags & TDF_DETAILS))
668 {
669 fprintf (dump_file, "-------------------------\n");
670 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
671 }
672
673 if (bb != loop->header
674 && gimple_phi_num_args (phi) > 2
675 && !phi_convertible_by_degenerating_args (phi))
676 any_complicated_phi = true;
677
678 return true;
679 }
680
681 /* Records the status of a data reference. This struct is attached to
682 each DR->aux field. */
683
684 struct ifc_dr {
685 bool rw_unconditionally;
686 bool w_unconditionally;
687 bool written_at_least_once;
688
689 tree rw_predicate;
690 tree w_predicate;
691 tree base_w_predicate;
692 };
693
694 #define IFC_DR(DR) ((struct ifc_dr *) (DR)->aux)
695 #define DR_BASE_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->written_at_least_once)
696 #define DR_RW_UNCONDITIONALLY(DR) (IFC_DR (DR)->rw_unconditionally)
697 #define DR_W_UNCONDITIONALLY(DR) (IFC_DR (DR)->w_unconditionally)
698
699 /* Iterates over DR's and stores refs, DR and base refs, DR pairs in
700 HASH tables. While storing them in HASH table, it checks if the
701 reference is unconditionally read or written and stores that as a flag
702 information. For base reference it checks if it is written atlest once
703 unconditionally and stores it as flag information along with DR.
704 In other words for every data reference A in STMT there exist other
705 accesses to a data reference with the same base with predicates that
706 add up (OR-up) to the true predicate: this ensures that the data
707 reference A is touched (read or written) on every iteration of the
708 if-converted loop. */
709 static void
710 hash_memrefs_baserefs_and_store_DRs_read_written_info (data_reference_p a)
711 {
712
713 data_reference_p *master_dr, *base_master_dr;
714 tree base_ref = DR_BASE_OBJECT (a);
715 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
716 tree ca = bb_predicate (gimple_bb (DR_STMT (a)));
717 bool exist1, exist2;
718
719 master_dr = &innermost_DR_map->get_or_insert (innermost, &exist1);
720 if (!exist1)
721 *master_dr = a;
722
723 if (DR_IS_WRITE (a))
724 {
725 IFC_DR (*master_dr)->w_predicate
726 = fold_or_predicates (UNKNOWN_LOCATION, ca,
727 IFC_DR (*master_dr)->w_predicate);
728 if (is_true_predicate (IFC_DR (*master_dr)->w_predicate))
729 DR_W_UNCONDITIONALLY (*master_dr) = true;
730 }
731 IFC_DR (*master_dr)->rw_predicate
732 = fold_or_predicates (UNKNOWN_LOCATION, ca,
733 IFC_DR (*master_dr)->rw_predicate);
734 if (is_true_predicate (IFC_DR (*master_dr)->rw_predicate))
735 DR_RW_UNCONDITIONALLY (*master_dr) = true;
736
737 if (DR_IS_WRITE (a))
738 {
739 base_master_dr = &baseref_DR_map->get_or_insert (base_ref, &exist2);
740 if (!exist2)
741 *base_master_dr = a;
742 IFC_DR (*base_master_dr)->base_w_predicate
743 = fold_or_predicates (UNKNOWN_LOCATION, ca,
744 IFC_DR (*base_master_dr)->base_w_predicate);
745 if (is_true_predicate (IFC_DR (*base_master_dr)->base_w_predicate))
746 DR_BASE_W_UNCONDITIONALLY (*base_master_dr) = true;
747 }
748 }
749
750 /* Return TRUE if can prove the index IDX of an array reference REF is
751 within array bound. Return false otherwise. */
752
753 static bool
754 idx_within_array_bound (tree ref, tree *idx, void *dta)
755 {
756 wi::overflow_type overflow;
757 widest_int niter, valid_niter, delta, wi_step;
758 tree ev, init, step;
759 tree low, high;
760 class loop *loop = (class loop*) dta;
761
762 /* Only support within-bound access for array references. */
763 if (TREE_CODE (ref) != ARRAY_REF)
764 return false;
765
766 /* For arrays at the end of the structure, we are not guaranteed that they
767 do not really extend over their declared size. However, for arrays of
768 size greater than one, this is unlikely to be intended. */
769 if (array_at_struct_end_p (ref))
770 return false;
771
772 ev = analyze_scalar_evolution (loop, *idx);
773 ev = instantiate_parameters (loop, ev);
774 init = initial_condition (ev);
775 step = evolution_part_in_loop_num (ev, loop->num);
776
777 if (!init || TREE_CODE (init) != INTEGER_CST
778 || (step && TREE_CODE (step) != INTEGER_CST))
779 return false;
780
781 low = array_ref_low_bound (ref);
782 high = array_ref_up_bound (ref);
783
784 /* The case of nonconstant bounds could be handled, but it would be
785 complicated. */
786 if (TREE_CODE (low) != INTEGER_CST
787 || !high || TREE_CODE (high) != INTEGER_CST)
788 return false;
789
790 /* Check if the intial idx is within bound. */
791 if (wi::to_widest (init) < wi::to_widest (low)
792 || wi::to_widest (init) > wi::to_widest (high))
793 return false;
794
795 /* The idx is always within bound. */
796 if (!step || integer_zerop (step))
797 return true;
798
799 if (!max_loop_iterations (loop, &niter))
800 return false;
801
802 if (wi::to_widest (step) < 0)
803 {
804 delta = wi::to_widest (init) - wi::to_widest (low);
805 wi_step = -wi::to_widest (step);
806 }
807 else
808 {
809 delta = wi::to_widest (high) - wi::to_widest (init);
810 wi_step = wi::to_widest (step);
811 }
812
813 valid_niter = wi::div_floor (delta, wi_step, SIGNED, &overflow);
814 /* The iteration space of idx is within array bound. */
815 if (!overflow && niter <= valid_niter)
816 return true;
817
818 return false;
819 }
820
821 /* Return TRUE if ref is a within bound array reference. */
822
823 static bool
824 ref_within_array_bound (gimple *stmt, tree ref)
825 {
826 class loop *loop = loop_containing_stmt (stmt);
827
828 gcc_assert (loop != NULL);
829 return for_each_index (&ref, idx_within_array_bound, loop);
830 }
831
832
833 /* Given a memory reference expression T, return TRUE if base object
834 it refers to is writable. The base object of a memory reference
835 is the main object being referenced, which is returned by function
836 get_base_address. */
837
838 static bool
839 base_object_writable (tree ref)
840 {
841 tree base_tree = get_base_address (ref);
842
843 return (base_tree
844 && DECL_P (base_tree)
845 && decl_binds_to_current_def_p (base_tree)
846 && !TREE_READONLY (base_tree));
847 }
848
849 /* Return true when the memory references of STMT won't trap in the
850 if-converted code. There are two things that we have to check for:
851
852 - writes to memory occur to writable memory: if-conversion of
853 memory writes transforms the conditional memory writes into
854 unconditional writes, i.e. "if (cond) A[i] = foo" is transformed
855 into "A[i] = cond ? foo : A[i]", and as the write to memory may not
856 be executed at all in the original code, it may be a readonly
857 memory. To check that A is not const-qualified, we check that
858 there exists at least an unconditional write to A in the current
859 function.
860
861 - reads or writes to memory are valid memory accesses for every
862 iteration. To check that the memory accesses are correctly formed
863 and that we are allowed to read and write in these locations, we
864 check that the memory accesses to be if-converted occur at every
865 iteration unconditionally.
866
867 Returns true for the memory reference in STMT, same memory reference
868 is read or written unconditionally atleast once and the base memory
869 reference is written unconditionally once. This is to check reference
870 will not write fault. Also retuns true if the memory reference is
871 unconditionally read once then we are conditionally writing to memory
872 which is defined as read and write and is bound to the definition
873 we are seeing. */
874 static bool
875 ifcvt_memrefs_wont_trap (gimple *stmt, vec<data_reference_p> drs)
876 {
877 /* If DR didn't see a reference here we can't use it to tell
878 whether the ref traps or not. */
879 if (gimple_uid (stmt) == 0)
880 return false;
881
882 data_reference_p *master_dr, *base_master_dr;
883 data_reference_p a = drs[gimple_uid (stmt) - 1];
884
885 tree base = DR_BASE_OBJECT (a);
886 innermost_loop_behavior *innermost = &DR_INNERMOST (a);
887
888 gcc_assert (DR_STMT (a) == stmt);
889 gcc_assert (DR_BASE_ADDRESS (a) || DR_OFFSET (a)
890 || DR_INIT (a) || DR_STEP (a));
891
892 master_dr = innermost_DR_map->get (innermost);
893 gcc_assert (master_dr != NULL);
894
895 base_master_dr = baseref_DR_map->get (base);
896
897 /* If a is unconditionally written to it doesn't trap. */
898 if (DR_W_UNCONDITIONALLY (*master_dr))
899 return true;
900
901 /* If a is unconditionally accessed then ...
902
903 Even a is conditional access, we can treat it as an unconditional
904 one if it's an array reference and all its index are within array
905 bound. */
906 if (DR_RW_UNCONDITIONALLY (*master_dr)
907 || ref_within_array_bound (stmt, DR_REF (a)))
908 {
909 /* an unconditional read won't trap. */
910 if (DR_IS_READ (a))
911 return true;
912
913 /* an unconditionaly write won't trap if the base is written
914 to unconditionally. */
915 if (base_master_dr
916 && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
917 return flag_store_data_races;
918 /* or the base is known to be not readonly. */
919 else if (base_object_writable (DR_REF (a)))
920 return flag_store_data_races;
921 }
922
923 return false;
924 }
925
926 /* Return true if STMT could be converted into a masked load or store
927 (conditional load or store based on a mask computed from bb predicate). */
928
929 static bool
930 ifcvt_can_use_mask_load_store (gimple *stmt)
931 {
932 /* Check whether this is a load or store. */
933 tree lhs = gimple_assign_lhs (stmt);
934 bool is_load;
935 tree ref;
936 if (gimple_store_p (stmt))
937 {
938 if (!is_gimple_val (gimple_assign_rhs1 (stmt)))
939 return false;
940 is_load = false;
941 ref = lhs;
942 }
943 else if (gimple_assign_load_p (stmt))
944 {
945 is_load = true;
946 ref = gimple_assign_rhs1 (stmt);
947 }
948 else
949 return false;
950
951 if (may_be_nonaddressable_p (ref))
952 return false;
953
954 /* Mask should be integer mode of the same size as the load/store
955 mode. */
956 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
957 if (!int_mode_for_mode (mode).exists () || VECTOR_MODE_P (mode))
958 return false;
959
960 if (can_vec_mask_load_store_p (mode, VOIDmode, is_load))
961 return true;
962
963 return false;
964 }
965
966 /* Return true if STMT could be converted from an operation that is
967 unconditional to one that is conditional on a bb predicate mask. */
968
969 static bool
970 ifcvt_can_predicate (gimple *stmt)
971 {
972 basic_block bb = gimple_bb (stmt);
973
974 if (!(flag_tree_loop_vectorize || bb->loop_father->force_vectorize)
975 || bb->loop_father->dont_vectorize
976 || gimple_has_volatile_ops (stmt))
977 return false;
978
979 if (gimple_assign_single_p (stmt))
980 return ifcvt_can_use_mask_load_store (stmt);
981
982 tree_code code = gimple_assign_rhs_code (stmt);
983 tree lhs_type = TREE_TYPE (gimple_assign_lhs (stmt));
984 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
985 if (!types_compatible_p (lhs_type, rhs_type))
986 return false;
987 internal_fn cond_fn = get_conditional_internal_fn (code);
988 return (cond_fn != IFN_LAST
989 && vectorized_internal_fn_supported_p (cond_fn, lhs_type));
990 }
991
992 /* Return true when STMT is if-convertible.
993
994 GIMPLE_ASSIGN statement is not if-convertible if,
995 - it is not movable,
996 - it could trap,
997 - LHS is not var decl. */
998
999 static bool
1000 if_convertible_gimple_assign_stmt_p (gimple *stmt,
1001 vec<data_reference_p> refs)
1002 {
1003 tree lhs = gimple_assign_lhs (stmt);
1004
1005 if (dump_file && (dump_flags & TDF_DETAILS))
1006 {
1007 fprintf (dump_file, "-------------------------\n");
1008 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1009 }
1010
1011 if (!is_gimple_reg_type (TREE_TYPE (lhs)))
1012 return false;
1013
1014 /* Some of these constrains might be too conservative. */
1015 if (stmt_ends_bb_p (stmt)
1016 || gimple_has_volatile_ops (stmt)
1017 || (TREE_CODE (lhs) == SSA_NAME
1018 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
1019 || gimple_has_side_effects (stmt))
1020 {
1021 if (dump_file && (dump_flags & TDF_DETAILS))
1022 fprintf (dump_file, "stmt not suitable for ifcvt\n");
1023 return false;
1024 }
1025
1026 /* tree-into-ssa.c uses GF_PLF_1, so avoid it, because
1027 in between if_convertible_loop_p and combine_blocks
1028 we can perform loop versioning. */
1029 gimple_set_plf (stmt, GF_PLF_2, false);
1030
1031 if ((! gimple_vuse (stmt)
1032 || gimple_could_trap_p_1 (stmt, false, false)
1033 || ! ifcvt_memrefs_wont_trap (stmt, refs))
1034 && gimple_could_trap_p (stmt))
1035 {
1036 if (ifcvt_can_predicate (stmt))
1037 {
1038 gimple_set_plf (stmt, GF_PLF_2, true);
1039 need_to_predicate = true;
1040 return true;
1041 }
1042 if (dump_file && (dump_flags & TDF_DETAILS))
1043 fprintf (dump_file, "tree could trap...\n");
1044 return false;
1045 }
1046
1047 /* When if-converting stores force versioning, likewise if we
1048 ended up generating store data races. */
1049 if (gimple_vdef (stmt))
1050 need_to_predicate = true;
1051
1052 return true;
1053 }
1054
1055 /* Return true when STMT is if-convertible.
1056
1057 A statement is if-convertible if:
1058 - it is an if-convertible GIMPLE_ASSIGN,
1059 - it is a GIMPLE_LABEL or a GIMPLE_COND,
1060 - it is builtins call. */
1061
1062 static bool
1063 if_convertible_stmt_p (gimple *stmt, vec<data_reference_p> refs)
1064 {
1065 switch (gimple_code (stmt))
1066 {
1067 case GIMPLE_LABEL:
1068 case GIMPLE_DEBUG:
1069 case GIMPLE_COND:
1070 return true;
1071
1072 case GIMPLE_ASSIGN:
1073 return if_convertible_gimple_assign_stmt_p (stmt, refs);
1074
1075 case GIMPLE_CALL:
1076 {
1077 tree fndecl = gimple_call_fndecl (stmt);
1078 if (fndecl)
1079 {
1080 int flags = gimple_call_flags (stmt);
1081 if ((flags & ECF_CONST)
1082 && !(flags & ECF_LOOPING_CONST_OR_PURE)
1083 /* We can only vectorize some builtins at the moment,
1084 so restrict if-conversion to those. */
1085 && fndecl_built_in_p (fndecl))
1086 return true;
1087 }
1088 return false;
1089 }
1090
1091 default:
1092 /* Don't know what to do with 'em so don't do anything. */
1093 if (dump_file && (dump_flags & TDF_DETAILS))
1094 {
1095 fprintf (dump_file, "don't know what to do\n");
1096 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
1097 }
1098 return false;
1099 }
1100
1101 return true;
1102 }
1103
1104 /* Assumes that BB has more than 1 predecessors.
1105 Returns false if at least one successor is not on critical edge
1106 and true otherwise. */
1107
1108 static inline bool
1109 all_preds_critical_p (basic_block bb)
1110 {
1111 edge e;
1112 edge_iterator ei;
1113
1114 FOR_EACH_EDGE (e, ei, bb->preds)
1115 if (EDGE_COUNT (e->src->succs) == 1)
1116 return false;
1117 return true;
1118 }
1119
1120 /* Return true when BB is if-convertible. This routine does not check
1121 basic block's statements and phis.
1122
1123 A basic block is not if-convertible if:
1124 - it is non-empty and it is after the exit block (in BFS order),
1125 - it is after the exit block but before the latch,
1126 - its edges are not normal.
1127
1128 EXIT_BB is the basic block containing the exit of the LOOP. BB is
1129 inside LOOP. */
1130
1131 static bool
1132 if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb)
1133 {
1134 edge e;
1135 edge_iterator ei;
1136
1137 if (dump_file && (dump_flags & TDF_DETAILS))
1138 fprintf (dump_file, "----------[%d]-------------\n", bb->index);
1139
1140 if (EDGE_COUNT (bb->succs) > 2)
1141 return false;
1142
1143 if (exit_bb)
1144 {
1145 if (bb != loop->latch)
1146 {
1147 if (dump_file && (dump_flags & TDF_DETAILS))
1148 fprintf (dump_file, "basic block after exit bb but before latch\n");
1149 return false;
1150 }
1151 else if (!empty_block_p (bb))
1152 {
1153 if (dump_file && (dump_flags & TDF_DETAILS))
1154 fprintf (dump_file, "non empty basic block after exit bb\n");
1155 return false;
1156 }
1157 else if (bb == loop->latch
1158 && bb != exit_bb
1159 && !dominated_by_p (CDI_DOMINATORS, bb, exit_bb))
1160 {
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1162 fprintf (dump_file, "latch is not dominated by exit_block\n");
1163 return false;
1164 }
1165 }
1166
1167 /* Be less adventurous and handle only normal edges. */
1168 FOR_EACH_EDGE (e, ei, bb->succs)
1169 if (e->flags & (EDGE_EH | EDGE_ABNORMAL | EDGE_IRREDUCIBLE_LOOP))
1170 {
1171 if (dump_file && (dump_flags & TDF_DETAILS))
1172 fprintf (dump_file, "Difficult to handle edges\n");
1173 return false;
1174 }
1175
1176 return true;
1177 }
1178
1179 /* Return true when all predecessor blocks of BB are visited. The
1180 VISITED bitmap keeps track of the visited blocks. */
1181
1182 static bool
1183 pred_blocks_visited_p (basic_block bb, bitmap *visited)
1184 {
1185 edge e;
1186 edge_iterator ei;
1187 FOR_EACH_EDGE (e, ei, bb->preds)
1188 if (!bitmap_bit_p (*visited, e->src->index))
1189 return false;
1190
1191 return true;
1192 }
1193
1194 /* Get body of a LOOP in suitable order for if-conversion. It is
1195 caller's responsibility to deallocate basic block list.
1196 If-conversion suitable order is, breadth first sort (BFS) order
1197 with an additional constraint: select a block only if all its
1198 predecessors are already selected. */
1199
1200 static basic_block *
1201 get_loop_body_in_if_conv_order (const class loop *loop)
1202 {
1203 basic_block *blocks, *blocks_in_bfs_order;
1204 basic_block bb;
1205 bitmap visited;
1206 unsigned int index = 0;
1207 unsigned int visited_count = 0;
1208
1209 gcc_assert (loop->num_nodes);
1210 gcc_assert (loop->latch != EXIT_BLOCK_PTR_FOR_FN (cfun));
1211
1212 blocks = XCNEWVEC (basic_block, loop->num_nodes);
1213 visited = BITMAP_ALLOC (NULL);
1214
1215 blocks_in_bfs_order = get_loop_body_in_bfs_order (loop);
1216
1217 index = 0;
1218 while (index < loop->num_nodes)
1219 {
1220 bb = blocks_in_bfs_order [index];
1221
1222 if (bb->flags & BB_IRREDUCIBLE_LOOP)
1223 {
1224 free (blocks_in_bfs_order);
1225 BITMAP_FREE (visited);
1226 free (blocks);
1227 return NULL;
1228 }
1229
1230 if (!bitmap_bit_p (visited, bb->index))
1231 {
1232 if (pred_blocks_visited_p (bb, &visited)
1233 || bb == loop->header)
1234 {
1235 /* This block is now visited. */
1236 bitmap_set_bit (visited, bb->index);
1237 blocks[visited_count++] = bb;
1238 }
1239 }
1240
1241 index++;
1242
1243 if (index == loop->num_nodes
1244 && visited_count != loop->num_nodes)
1245 /* Not done yet. */
1246 index = 0;
1247 }
1248 free (blocks_in_bfs_order);
1249 BITMAP_FREE (visited);
1250 return blocks;
1251 }
1252
1253 /* Returns true when the analysis of the predicates for all the basic
1254 blocks in LOOP succeeded.
1255
1256 predicate_bbs first allocates the predicates of the basic blocks.
1257 These fields are then initialized with the tree expressions
1258 representing the predicates under which a basic block is executed
1259 in the LOOP. As the loop->header is executed at each iteration, it
1260 has the "true" predicate. Other statements executed under a
1261 condition are predicated with that condition, for example
1262
1263 | if (x)
1264 | S1;
1265 | else
1266 | S2;
1267
1268 S1 will be predicated with "x", and
1269 S2 will be predicated with "!x". */
1270
1271 static void
1272 predicate_bbs (loop_p loop)
1273 {
1274 unsigned int i;
1275
1276 for (i = 0; i < loop->num_nodes; i++)
1277 init_bb_predicate (ifc_bbs[i]);
1278
1279 for (i = 0; i < loop->num_nodes; i++)
1280 {
1281 basic_block bb = ifc_bbs[i];
1282 tree cond;
1283 gimple *stmt;
1284
1285 /* The loop latch and loop exit block are always executed and
1286 have no extra conditions to be processed: skip them. */
1287 if (bb == loop->latch
1288 || bb_with_exit_edge_p (loop, bb))
1289 {
1290 reset_bb_predicate (bb);
1291 continue;
1292 }
1293
1294 cond = bb_predicate (bb);
1295 stmt = last_stmt (bb);
1296 if (stmt && gimple_code (stmt) == GIMPLE_COND)
1297 {
1298 tree c2;
1299 edge true_edge, false_edge;
1300 location_t loc = gimple_location (stmt);
1301 tree c = build2_loc (loc, gimple_cond_code (stmt),
1302 boolean_type_node,
1303 gimple_cond_lhs (stmt),
1304 gimple_cond_rhs (stmt));
1305
1306 /* Add new condition into destination's predicate list. */
1307 extract_true_false_edges_from_block (gimple_bb (stmt),
1308 &true_edge, &false_edge);
1309
1310 /* If C is true, then TRUE_EDGE is taken. */
1311 add_to_dst_predicate_list (loop, true_edge, unshare_expr (cond),
1312 unshare_expr (c));
1313
1314 /* If C is false, then FALSE_EDGE is taken. */
1315 c2 = build1_loc (loc, TRUTH_NOT_EXPR, boolean_type_node,
1316 unshare_expr (c));
1317 add_to_dst_predicate_list (loop, false_edge,
1318 unshare_expr (cond), c2);
1319
1320 cond = NULL_TREE;
1321 }
1322
1323 /* If current bb has only one successor, then consider it as an
1324 unconditional goto. */
1325 if (single_succ_p (bb))
1326 {
1327 basic_block bb_n = single_succ (bb);
1328
1329 /* The successor bb inherits the predicate of its
1330 predecessor. If there is no predicate in the predecessor
1331 bb, then consider the successor bb as always executed. */
1332 if (cond == NULL_TREE)
1333 cond = boolean_true_node;
1334
1335 add_to_predicate_list (loop, bb_n, cond);
1336 }
1337 }
1338
1339 /* The loop header is always executed. */
1340 reset_bb_predicate (loop->header);
1341 gcc_assert (bb_predicate_gimplified_stmts (loop->header) == NULL
1342 && bb_predicate_gimplified_stmts (loop->latch) == NULL);
1343 }
1344
1345 /* Build region by adding loop pre-header and post-header blocks. */
1346
1347 static vec<basic_block>
1348 build_region (class loop *loop)
1349 {
1350 vec<basic_block> region = vNULL;
1351 basic_block exit_bb = NULL;
1352
1353 gcc_assert (ifc_bbs);
1354 /* The first element is loop pre-header. */
1355 region.safe_push (loop_preheader_edge (loop)->src);
1356
1357 for (unsigned int i = 0; i < loop->num_nodes; i++)
1358 {
1359 basic_block bb = ifc_bbs[i];
1360 region.safe_push (bb);
1361 /* Find loop postheader. */
1362 edge e;
1363 edge_iterator ei;
1364 FOR_EACH_EDGE (e, ei, bb->succs)
1365 if (loop_exit_edge_p (loop, e))
1366 {
1367 exit_bb = e->dest;
1368 break;
1369 }
1370 }
1371 /* The last element is loop post-header. */
1372 gcc_assert (exit_bb);
1373 region.safe_push (exit_bb);
1374 return region;
1375 }
1376
1377 /* Return true when LOOP is if-convertible. This is a helper function
1378 for if_convertible_loop_p. REFS and DDRS are initialized and freed
1379 in if_convertible_loop_p. */
1380
1381 static bool
1382 if_convertible_loop_p_1 (class loop *loop, vec<data_reference_p> *refs)
1383 {
1384 unsigned int i;
1385 basic_block exit_bb = NULL;
1386 vec<basic_block> region;
1387
1388 if (find_data_references_in_loop (loop, refs) == chrec_dont_know)
1389 return false;
1390
1391 calculate_dominance_info (CDI_DOMINATORS);
1392
1393 /* Allow statements that can be handled during if-conversion. */
1394 ifc_bbs = get_loop_body_in_if_conv_order (loop);
1395 if (!ifc_bbs)
1396 {
1397 if (dump_file && (dump_flags & TDF_DETAILS))
1398 fprintf (dump_file, "Irreducible loop\n");
1399 return false;
1400 }
1401
1402 for (i = 0; i < loop->num_nodes; i++)
1403 {
1404 basic_block bb = ifc_bbs[i];
1405
1406 if (!if_convertible_bb_p (loop, bb, exit_bb))
1407 return false;
1408
1409 if (bb_with_exit_edge_p (loop, bb))
1410 exit_bb = bb;
1411 }
1412
1413 for (i = 0; i < loop->num_nodes; i++)
1414 {
1415 basic_block bb = ifc_bbs[i];
1416 gimple_stmt_iterator gsi;
1417
1418 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1419 switch (gimple_code (gsi_stmt (gsi)))
1420 {
1421 case GIMPLE_LABEL:
1422 case GIMPLE_ASSIGN:
1423 case GIMPLE_CALL:
1424 case GIMPLE_DEBUG:
1425 case GIMPLE_COND:
1426 gimple_set_uid (gsi_stmt (gsi), 0);
1427 break;
1428 default:
1429 return false;
1430 }
1431 }
1432
1433 data_reference_p dr;
1434
1435 innermost_DR_map
1436 = new hash_map<innermost_loop_behavior_hash, data_reference_p>;
1437 baseref_DR_map = new hash_map<tree_operand_hash, data_reference_p>;
1438
1439 /* Compute post-dominator tree locally. */
1440 region = build_region (loop);
1441 calculate_dominance_info_for_region (CDI_POST_DOMINATORS, region);
1442
1443 predicate_bbs (loop);
1444
1445 /* Free post-dominator tree since it is not used after predication. */
1446 free_dominance_info_for_region (cfun, CDI_POST_DOMINATORS, region);
1447 region.release ();
1448
1449 for (i = 0; refs->iterate (i, &dr); i++)
1450 {
1451 tree ref = DR_REF (dr);
1452
1453 dr->aux = XNEW (struct ifc_dr);
1454 DR_BASE_W_UNCONDITIONALLY (dr) = false;
1455 DR_RW_UNCONDITIONALLY (dr) = false;
1456 DR_W_UNCONDITIONALLY (dr) = false;
1457 IFC_DR (dr)->rw_predicate = boolean_false_node;
1458 IFC_DR (dr)->w_predicate = boolean_false_node;
1459 IFC_DR (dr)->base_w_predicate = boolean_false_node;
1460 if (gimple_uid (DR_STMT (dr)) == 0)
1461 gimple_set_uid (DR_STMT (dr), i + 1);
1462
1463 /* If DR doesn't have innermost loop behavior or it's a compound
1464 memory reference, we synthesize its innermost loop behavior
1465 for hashing. */
1466 if (TREE_CODE (ref) == COMPONENT_REF
1467 || TREE_CODE (ref) == IMAGPART_EXPR
1468 || TREE_CODE (ref) == REALPART_EXPR
1469 || !(DR_BASE_ADDRESS (dr) || DR_OFFSET (dr)
1470 || DR_INIT (dr) || DR_STEP (dr)))
1471 {
1472 while (TREE_CODE (ref) == COMPONENT_REF
1473 || TREE_CODE (ref) == IMAGPART_EXPR
1474 || TREE_CODE (ref) == REALPART_EXPR)
1475 ref = TREE_OPERAND (ref, 0);
1476
1477 memset (&DR_INNERMOST (dr), 0, sizeof (DR_INNERMOST (dr)));
1478 DR_BASE_ADDRESS (dr) = ref;
1479 }
1480 hash_memrefs_baserefs_and_store_DRs_read_written_info (dr);
1481 }
1482
1483 for (i = 0; i < loop->num_nodes; i++)
1484 {
1485 basic_block bb = ifc_bbs[i];
1486 gimple_stmt_iterator itr;
1487
1488 /* Check the if-convertibility of statements in predicated BBs. */
1489 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb))
1490 for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr))
1491 if (!if_convertible_stmt_p (gsi_stmt (itr), *refs))
1492 return false;
1493 }
1494
1495 /* Checking PHIs needs to be done after stmts, as the fact whether there
1496 are any masked loads or stores affects the tests. */
1497 for (i = 0; i < loop->num_nodes; i++)
1498 {
1499 basic_block bb = ifc_bbs[i];
1500 gphi_iterator itr;
1501
1502 for (itr = gsi_start_phis (bb); !gsi_end_p (itr); gsi_next (&itr))
1503 if (!if_convertible_phi_p (loop, bb, itr.phi ()))
1504 return false;
1505 }
1506
1507 if (dump_file)
1508 fprintf (dump_file, "Applying if-conversion\n");
1509
1510 return true;
1511 }
1512
1513 /* Return true when LOOP is if-convertible.
1514 LOOP is if-convertible if:
1515 - it is innermost,
1516 - it has two or more basic blocks,
1517 - it has only one exit,
1518 - loop header is not the exit edge,
1519 - if its basic blocks and phi nodes are if convertible. */
1520
1521 static bool
1522 if_convertible_loop_p (class loop *loop)
1523 {
1524 edge e;
1525 edge_iterator ei;
1526 bool res = false;
1527 vec<data_reference_p> refs;
1528
1529 /* Handle only innermost loop. */
1530 if (!loop || loop->inner)
1531 {
1532 if (dump_file && (dump_flags & TDF_DETAILS))
1533 fprintf (dump_file, "not innermost loop\n");
1534 return false;
1535 }
1536
1537 /* If only one block, no need for if-conversion. */
1538 if (loop->num_nodes <= 2)
1539 {
1540 if (dump_file && (dump_flags & TDF_DETAILS))
1541 fprintf (dump_file, "less than 2 basic blocks\n");
1542 return false;
1543 }
1544
1545 /* More than one loop exit is too much to handle. */
1546 if (!single_exit (loop))
1547 {
1548 if (dump_file && (dump_flags & TDF_DETAILS))
1549 fprintf (dump_file, "multiple exits\n");
1550 return false;
1551 }
1552
1553 /* If one of the loop header's edge is an exit edge then do not
1554 apply if-conversion. */
1555 FOR_EACH_EDGE (e, ei, loop->header->succs)
1556 if (loop_exit_edge_p (loop, e))
1557 return false;
1558
1559 refs.create (5);
1560 res = if_convertible_loop_p_1 (loop, &refs);
1561
1562 data_reference_p dr;
1563 unsigned int i;
1564 for (i = 0; refs.iterate (i, &dr); i++)
1565 free (dr->aux);
1566
1567 free_data_refs (refs);
1568
1569 delete innermost_DR_map;
1570 innermost_DR_map = NULL;
1571
1572 delete baseref_DR_map;
1573 baseref_DR_map = NULL;
1574
1575 return res;
1576 }
1577
1578 /* Returns true if def-stmt for phi argument ARG is simple increment/decrement
1579 which is in predicated basic block.
1580 In fact, the following PHI pattern is searching:
1581 loop-header:
1582 reduc_1 = PHI <..., reduc_2>
1583 ...
1584 if (...)
1585 reduc_3 = ...
1586 reduc_2 = PHI <reduc_1, reduc_3>
1587
1588 ARG_0 and ARG_1 are correspondent PHI arguments.
1589 REDUC, OP0 and OP1 contain reduction stmt and its operands.
1590 EXTENDED is true if PHI has > 2 arguments. */
1591
1592 static bool
1593 is_cond_scalar_reduction (gimple *phi, gimple **reduc, tree arg_0, tree arg_1,
1594 tree *op0, tree *op1, bool extended)
1595 {
1596 tree lhs, r_op1, r_op2;
1597 gimple *stmt;
1598 gimple *header_phi = NULL;
1599 enum tree_code reduction_op;
1600 basic_block bb = gimple_bb (phi);
1601 class loop *loop = bb->loop_father;
1602 edge latch_e = loop_latch_edge (loop);
1603 imm_use_iterator imm_iter;
1604 use_operand_p use_p;
1605 edge e;
1606 edge_iterator ei;
1607 bool result = false;
1608 if (TREE_CODE (arg_0) != SSA_NAME || TREE_CODE (arg_1) != SSA_NAME)
1609 return false;
1610
1611 if (!extended && gimple_code (SSA_NAME_DEF_STMT (arg_0)) == GIMPLE_PHI)
1612 {
1613 lhs = arg_1;
1614 header_phi = SSA_NAME_DEF_STMT (arg_0);
1615 stmt = SSA_NAME_DEF_STMT (arg_1);
1616 }
1617 else if (gimple_code (SSA_NAME_DEF_STMT (arg_1)) == GIMPLE_PHI)
1618 {
1619 lhs = arg_0;
1620 header_phi = SSA_NAME_DEF_STMT (arg_1);
1621 stmt = SSA_NAME_DEF_STMT (arg_0);
1622 }
1623 else
1624 return false;
1625 if (gimple_bb (header_phi) != loop->header)
1626 return false;
1627
1628 if (PHI_ARG_DEF_FROM_EDGE (header_phi, latch_e) != PHI_RESULT (phi))
1629 return false;
1630
1631 if (gimple_code (stmt) != GIMPLE_ASSIGN
1632 || gimple_has_volatile_ops (stmt))
1633 return false;
1634
1635 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
1636 return false;
1637
1638 if (!is_predicated (gimple_bb (stmt)))
1639 return false;
1640
1641 /* Check that stmt-block is predecessor of phi-block. */
1642 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->succs)
1643 if (e->dest == bb)
1644 {
1645 result = true;
1646 break;
1647 }
1648 if (!result)
1649 return false;
1650
1651 if (!has_single_use (lhs))
1652 return false;
1653
1654 reduction_op = gimple_assign_rhs_code (stmt);
1655 if (reduction_op != PLUS_EXPR && reduction_op != MINUS_EXPR)
1656 return false;
1657 r_op1 = gimple_assign_rhs1 (stmt);
1658 r_op2 = gimple_assign_rhs2 (stmt);
1659
1660 /* Make R_OP1 to hold reduction variable. */
1661 if (r_op2 == PHI_RESULT (header_phi)
1662 && reduction_op == PLUS_EXPR)
1663 std::swap (r_op1, r_op2);
1664 else if (r_op1 != PHI_RESULT (header_phi))
1665 return false;
1666
1667 /* Check that R_OP1 is used in reduction stmt or in PHI only. */
1668 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, r_op1)
1669 {
1670 gimple *use_stmt = USE_STMT (use_p);
1671 if (is_gimple_debug (use_stmt))
1672 continue;
1673 if (use_stmt == stmt)
1674 continue;
1675 if (gimple_code (use_stmt) != GIMPLE_PHI)
1676 return false;
1677 }
1678
1679 *op0 = r_op1; *op1 = r_op2;
1680 *reduc = stmt;
1681 return true;
1682 }
1683
1684 /* Converts conditional scalar reduction into unconditional form, e.g.
1685 bb_4
1686 if (_5 != 0) goto bb_5 else goto bb_6
1687 end_bb_4
1688 bb_5
1689 res_6 = res_13 + 1;
1690 end_bb_5
1691 bb_6
1692 # res_2 = PHI <res_13(4), res_6(5)>
1693 end_bb_6
1694
1695 will be converted into sequence
1696 _ifc__1 = _5 != 0 ? 1 : 0;
1697 res_2 = res_13 + _ifc__1;
1698 Argument SWAP tells that arguments of conditional expression should be
1699 swapped.
1700 Returns rhs of resulting PHI assignment. */
1701
1702 static tree
1703 convert_scalar_cond_reduction (gimple *reduc, gimple_stmt_iterator *gsi,
1704 tree cond, tree op0, tree op1, bool swap)
1705 {
1706 gimple_stmt_iterator stmt_it;
1707 gimple *new_assign;
1708 tree rhs;
1709 tree rhs1 = gimple_assign_rhs1 (reduc);
1710 tree tmp = make_temp_ssa_name (TREE_TYPE (rhs1), NULL, "_ifc_");
1711 tree c;
1712 tree zero = build_zero_cst (TREE_TYPE (rhs1));
1713
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1715 {
1716 fprintf (dump_file, "Found cond scalar reduction.\n");
1717 print_gimple_stmt (dump_file, reduc, 0, TDF_SLIM);
1718 }
1719
1720 /* Build cond expression using COND and constant operand
1721 of reduction rhs. */
1722 c = fold_build_cond_expr (TREE_TYPE (rhs1),
1723 unshare_expr (cond),
1724 swap ? zero : op1,
1725 swap ? op1 : zero);
1726
1727 /* Create assignment stmt and insert it at GSI. */
1728 new_assign = gimple_build_assign (tmp, c);
1729 gsi_insert_before (gsi, new_assign, GSI_SAME_STMT);
1730 /* Build rhs for unconditional increment/decrement. */
1731 rhs = fold_build2 (gimple_assign_rhs_code (reduc),
1732 TREE_TYPE (rhs1), op0, tmp);
1733
1734 /* Delete original reduction stmt. */
1735 stmt_it = gsi_for_stmt (reduc);
1736 gsi_remove (&stmt_it, true);
1737 release_defs (reduc);
1738 return rhs;
1739 }
1740
1741 /* Produce condition for all occurrences of ARG in PHI node. */
1742
1743 static tree
1744 gen_phi_arg_condition (gphi *phi, vec<int> *occur,
1745 gimple_stmt_iterator *gsi)
1746 {
1747 int len;
1748 int i;
1749 tree cond = NULL_TREE;
1750 tree c;
1751 edge e;
1752
1753 len = occur->length ();
1754 gcc_assert (len > 0);
1755 for (i = 0; i < len; i++)
1756 {
1757 e = gimple_phi_arg_edge (phi, (*occur)[i]);
1758 c = bb_predicate (e->src);
1759 if (is_true_predicate (c))
1760 {
1761 cond = c;
1762 break;
1763 }
1764 c = force_gimple_operand_gsi_1 (gsi, unshare_expr (c),
1765 is_gimple_condexpr, NULL_TREE,
1766 true, GSI_SAME_STMT);
1767 if (cond != NULL_TREE)
1768 {
1769 /* Must build OR expression. */
1770 cond = fold_or_predicates (EXPR_LOCATION (c), c, cond);
1771 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1772 is_gimple_condexpr, NULL_TREE,
1773 true, GSI_SAME_STMT);
1774 }
1775 else
1776 cond = c;
1777 }
1778 gcc_assert (cond != NULL_TREE);
1779 return cond;
1780 }
1781
1782 /* Local valueization callback that follows all-use SSA edges. */
1783
1784 static tree
1785 ifcvt_follow_ssa_use_edges (tree val)
1786 {
1787 return val;
1788 }
1789
1790 /* Replace a scalar PHI node with a COND_EXPR using COND as condition.
1791 This routine can handle PHI nodes with more than two arguments.
1792
1793 For example,
1794 S1: A = PHI <x1(1), x2(5)>
1795 is converted into,
1796 S2: A = cond ? x1 : x2;
1797
1798 The generated code is inserted at GSI that points to the top of
1799 basic block's statement list.
1800 If PHI node has more than two arguments a chain of conditional
1801 expression is produced. */
1802
1803
1804 static void
1805 predicate_scalar_phi (gphi *phi, gimple_stmt_iterator *gsi)
1806 {
1807 gimple *new_stmt = NULL, *reduc;
1808 tree rhs, res, arg0, arg1, op0, op1, scev;
1809 tree cond;
1810 unsigned int index0;
1811 unsigned int max, args_len;
1812 edge e;
1813 basic_block bb;
1814 unsigned int i;
1815
1816 res = gimple_phi_result (phi);
1817 if (virtual_operand_p (res))
1818 return;
1819
1820 if ((rhs = degenerate_phi_result (phi))
1821 || ((scev = analyze_scalar_evolution (gimple_bb (phi)->loop_father,
1822 res))
1823 && !chrec_contains_undetermined (scev)
1824 && scev != res
1825 && (rhs = gimple_phi_arg_def (phi, 0))))
1826 {
1827 if (dump_file && (dump_flags & TDF_DETAILS))
1828 {
1829 fprintf (dump_file, "Degenerate phi!\n");
1830 print_gimple_stmt (dump_file, phi, 0, TDF_SLIM);
1831 }
1832 new_stmt = gimple_build_assign (res, rhs);
1833 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1834 update_stmt (new_stmt);
1835 return;
1836 }
1837
1838 bb = gimple_bb (phi);
1839 if (EDGE_COUNT (bb->preds) == 2)
1840 {
1841 /* Predicate ordinary PHI node with 2 arguments. */
1842 edge first_edge, second_edge;
1843 basic_block true_bb;
1844 first_edge = EDGE_PRED (bb, 0);
1845 second_edge = EDGE_PRED (bb, 1);
1846 cond = bb_predicate (first_edge->src);
1847 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1848 std::swap (first_edge, second_edge);
1849 if (EDGE_COUNT (first_edge->src->succs) > 1)
1850 {
1851 cond = bb_predicate (second_edge->src);
1852 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1853 cond = TREE_OPERAND (cond, 0);
1854 else
1855 first_edge = second_edge;
1856 }
1857 else
1858 cond = bb_predicate (first_edge->src);
1859 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1860 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1861 is_gimple_condexpr, NULL_TREE,
1862 true, GSI_SAME_STMT);
1863 true_bb = first_edge->src;
1864 if (EDGE_PRED (bb, 1)->src == true_bb)
1865 {
1866 arg0 = gimple_phi_arg_def (phi, 1);
1867 arg1 = gimple_phi_arg_def (phi, 0);
1868 }
1869 else
1870 {
1871 arg0 = gimple_phi_arg_def (phi, 0);
1872 arg1 = gimple_phi_arg_def (phi, 1);
1873 }
1874 if (is_cond_scalar_reduction (phi, &reduc, arg0, arg1,
1875 &op0, &op1, false))
1876 /* Convert reduction stmt into vectorizable form. */
1877 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1878 true_bb != gimple_bb (reduc));
1879 else
1880 /* Build new RHS using selected condition and arguments. */
1881 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1882 arg0, arg1);
1883 new_stmt = gimple_build_assign (res, rhs);
1884 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1885 gimple_stmt_iterator new_gsi = gsi_for_stmt (new_stmt);
1886 if (fold_stmt (&new_gsi, ifcvt_follow_ssa_use_edges))
1887 {
1888 new_stmt = gsi_stmt (new_gsi);
1889 update_stmt (new_stmt);
1890 }
1891
1892 if (dump_file && (dump_flags & TDF_DETAILS))
1893 {
1894 fprintf (dump_file, "new phi replacement stmt\n");
1895 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
1896 }
1897 return;
1898 }
1899
1900 /* Create hashmap for PHI node which contain vector of argument indexes
1901 having the same value. */
1902 bool swap = false;
1903 hash_map<tree_operand_hash, auto_vec<int> > phi_arg_map;
1904 unsigned int num_args = gimple_phi_num_args (phi);
1905 int max_ind = -1;
1906 /* Vector of different PHI argument values. */
1907 auto_vec<tree> args (num_args);
1908
1909 /* Compute phi_arg_map. */
1910 for (i = 0; i < num_args; i++)
1911 {
1912 tree arg;
1913
1914 arg = gimple_phi_arg_def (phi, i);
1915 if (!phi_arg_map.get (arg))
1916 args.quick_push (arg);
1917 phi_arg_map.get_or_insert (arg).safe_push (i);
1918 }
1919
1920 /* Determine element with max number of occurrences. */
1921 max_ind = -1;
1922 max = 1;
1923 args_len = args.length ();
1924 for (i = 0; i < args_len; i++)
1925 {
1926 unsigned int len;
1927 if ((len = phi_arg_map.get (args[i])->length ()) > max)
1928 {
1929 max_ind = (int) i;
1930 max = len;
1931 }
1932 }
1933
1934 /* Put element with max number of occurences to the end of ARGS. */
1935 if (max_ind != -1 && max_ind +1 != (int) args_len)
1936 std::swap (args[args_len - 1], args[max_ind]);
1937
1938 /* Handle one special case when number of arguments with different values
1939 is equal 2 and one argument has the only occurrence. Such PHI can be
1940 handled as if would have only 2 arguments. */
1941 if (args_len == 2 && phi_arg_map.get (args[0])->length () == 1)
1942 {
1943 vec<int> *indexes;
1944 indexes = phi_arg_map.get (args[0]);
1945 index0 = (*indexes)[0];
1946 arg0 = args[0];
1947 arg1 = args[1];
1948 e = gimple_phi_arg_edge (phi, index0);
1949 cond = bb_predicate (e->src);
1950 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
1951 {
1952 swap = true;
1953 cond = TREE_OPERAND (cond, 0);
1954 }
1955 /* Gimplify the condition to a valid cond-expr conditonal operand. */
1956 cond = force_gimple_operand_gsi_1 (gsi, unshare_expr (cond),
1957 is_gimple_condexpr, NULL_TREE,
1958 true, GSI_SAME_STMT);
1959 if (!(is_cond_scalar_reduction (phi, &reduc, arg0 , arg1,
1960 &op0, &op1, true)))
1961 rhs = fold_build_cond_expr (TREE_TYPE (res), unshare_expr (cond),
1962 swap? arg1 : arg0,
1963 swap? arg0 : arg1);
1964 else
1965 /* Convert reduction stmt into vectorizable form. */
1966 rhs = convert_scalar_cond_reduction (reduc, gsi, cond, op0, op1,
1967 swap);
1968 new_stmt = gimple_build_assign (res, rhs);
1969 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1970 update_stmt (new_stmt);
1971 }
1972 else
1973 {
1974 /* Common case. */
1975 vec<int> *indexes;
1976 tree type = TREE_TYPE (gimple_phi_result (phi));
1977 tree lhs;
1978 arg1 = args[1];
1979 for (i = 0; i < args_len; i++)
1980 {
1981 arg0 = args[i];
1982 indexes = phi_arg_map.get (args[i]);
1983 if (i != args_len - 1)
1984 lhs = make_temp_ssa_name (type, NULL, "_ifc_");
1985 else
1986 lhs = res;
1987 cond = gen_phi_arg_condition (phi, indexes, gsi);
1988 rhs = fold_build_cond_expr (type, unshare_expr (cond),
1989 arg0, arg1);
1990 new_stmt = gimple_build_assign (lhs, rhs);
1991 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
1992 update_stmt (new_stmt);
1993 arg1 = lhs;
1994 }
1995 }
1996
1997 if (dump_file && (dump_flags & TDF_DETAILS))
1998 {
1999 fprintf (dump_file, "new extended phi replacement stmt\n");
2000 print_gimple_stmt (dump_file, new_stmt, 0, TDF_SLIM);
2001 }
2002 }
2003
2004 /* Replaces in LOOP all the scalar phi nodes other than those in the
2005 LOOP->header block with conditional modify expressions. */
2006
2007 static void
2008 predicate_all_scalar_phis (class loop *loop)
2009 {
2010 basic_block bb;
2011 unsigned int orig_loop_num_nodes = loop->num_nodes;
2012 unsigned int i;
2013
2014 for (i = 1; i < orig_loop_num_nodes; i++)
2015 {
2016 gphi *phi;
2017 gimple_stmt_iterator gsi;
2018 gphi_iterator phi_gsi;
2019 bb = ifc_bbs[i];
2020
2021 if (bb == loop->header)
2022 continue;
2023
2024 phi_gsi = gsi_start_phis (bb);
2025 if (gsi_end_p (phi_gsi))
2026 continue;
2027
2028 gsi = gsi_after_labels (bb);
2029 while (!gsi_end_p (phi_gsi))
2030 {
2031 phi = phi_gsi.phi ();
2032 if (virtual_operand_p (gimple_phi_result (phi)))
2033 gsi_next (&phi_gsi);
2034 else
2035 {
2036 predicate_scalar_phi (phi, &gsi);
2037 remove_phi_node (&phi_gsi, false);
2038 }
2039 }
2040 }
2041 }
2042
2043 /* Insert in each basic block of LOOP the statements produced by the
2044 gimplification of the predicates. */
2045
2046 static void
2047 insert_gimplified_predicates (loop_p loop)
2048 {
2049 unsigned int i;
2050
2051 for (i = 0; i < loop->num_nodes; i++)
2052 {
2053 basic_block bb = ifc_bbs[i];
2054 gimple_seq stmts;
2055 if (!is_predicated (bb))
2056 gcc_assert (bb_predicate_gimplified_stmts (bb) == NULL);
2057 if (!is_predicated (bb))
2058 {
2059 /* Do not insert statements for a basic block that is not
2060 predicated. Also make sure that the predicate of the
2061 basic block is set to true. */
2062 reset_bb_predicate (bb);
2063 continue;
2064 }
2065
2066 stmts = bb_predicate_gimplified_stmts (bb);
2067 if (stmts)
2068 {
2069 if (need_to_predicate)
2070 {
2071 /* Insert the predicate of the BB just after the label,
2072 as the if-conversion of memory writes will use this
2073 predicate. */
2074 gimple_stmt_iterator gsi = gsi_after_labels (bb);
2075 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2076 }
2077 else
2078 {
2079 /* Insert the predicate of the BB at the end of the BB
2080 as this would reduce the register pressure: the only
2081 use of this predicate will be in successor BBs. */
2082 gimple_stmt_iterator gsi = gsi_last_bb (bb);
2083
2084 if (gsi_end_p (gsi)
2085 || stmt_ends_bb_p (gsi_stmt (gsi)))
2086 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2087 else
2088 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
2089 }
2090
2091 /* Once the sequence is code generated, set it to NULL. */
2092 set_bb_predicate_gimplified_stmts (bb, NULL);
2093 }
2094 }
2095 }
2096
2097 /* Helper function for predicate_statements. Returns index of existent
2098 mask if it was created for given SIZE and -1 otherwise. */
2099
2100 static int
2101 mask_exists (int size, vec<int> vec)
2102 {
2103 unsigned int ix;
2104 int v;
2105 FOR_EACH_VEC_ELT (vec, ix, v)
2106 if (v == size)
2107 return (int) ix;
2108 return -1;
2109 }
2110
2111 /* Helper function for predicate_statements. STMT is a memory read or
2112 write and it needs to be predicated by MASK. Return a statement
2113 that does so. */
2114
2115 static gimple *
2116 predicate_load_or_store (gimple_stmt_iterator *gsi, gassign *stmt, tree mask)
2117 {
2118 gcall *new_stmt;
2119
2120 tree lhs = gimple_assign_lhs (stmt);
2121 tree rhs = gimple_assign_rhs1 (stmt);
2122 tree ref = TREE_CODE (lhs) == SSA_NAME ? rhs : lhs;
2123 mark_addressable (ref);
2124 tree addr = force_gimple_operand_gsi (gsi, build_fold_addr_expr (ref),
2125 true, NULL_TREE, true, GSI_SAME_STMT);
2126 tree ptr = build_int_cst (reference_alias_ptr_type (ref),
2127 get_object_alignment (ref));
2128 /* Copy points-to info if possible. */
2129 if (TREE_CODE (addr) == SSA_NAME && !SSA_NAME_PTR_INFO (addr))
2130 copy_ref_info (build2 (MEM_REF, TREE_TYPE (ref), addr, ptr),
2131 ref);
2132 if (TREE_CODE (lhs) == SSA_NAME)
2133 {
2134 new_stmt
2135 = gimple_build_call_internal (IFN_MASK_LOAD, 3, addr,
2136 ptr, mask);
2137 gimple_call_set_lhs (new_stmt, lhs);
2138 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
2139 }
2140 else
2141 {
2142 new_stmt
2143 = gimple_build_call_internal (IFN_MASK_STORE, 4, addr, ptr,
2144 mask, rhs);
2145 gimple_move_vops (new_stmt, stmt);
2146 }
2147 gimple_call_set_nothrow (new_stmt, true);
2148 return new_stmt;
2149 }
2150
2151 /* STMT uses OP_LHS. Check whether it is equivalent to:
2152
2153 ... = OP_MASK ? OP_LHS : X;
2154
2155 Return X if so, otherwise return null. OP_MASK is an SSA_NAME that is
2156 known to have value OP_COND. */
2157
2158 static tree
2159 check_redundant_cond_expr (gimple *stmt, tree op_mask, tree op_cond,
2160 tree op_lhs)
2161 {
2162 gassign *assign = dyn_cast <gassign *> (stmt);
2163 if (!assign || gimple_assign_rhs_code (assign) != COND_EXPR)
2164 return NULL_TREE;
2165
2166 tree use_cond = gimple_assign_rhs1 (assign);
2167 tree if_true = gimple_assign_rhs2 (assign);
2168 tree if_false = gimple_assign_rhs3 (assign);
2169
2170 if ((use_cond == op_mask || operand_equal_p (use_cond, op_cond, 0))
2171 && if_true == op_lhs)
2172 return if_false;
2173
2174 if (inverse_conditions_p (use_cond, op_cond) && if_false == op_lhs)
2175 return if_true;
2176
2177 return NULL_TREE;
2178 }
2179
2180 /* Return true if VALUE is available for use at STMT. SSA_NAMES is
2181 the set of SSA names defined earlier in STMT's block. */
2182
2183 static bool
2184 value_available_p (gimple *stmt, hash_set<tree_ssa_name_hash> *ssa_names,
2185 tree value)
2186 {
2187 if (is_gimple_min_invariant (value))
2188 return true;
2189
2190 if (TREE_CODE (value) == SSA_NAME)
2191 {
2192 if (SSA_NAME_IS_DEFAULT_DEF (value))
2193 return true;
2194
2195 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (value));
2196 basic_block use_bb = gimple_bb (stmt);
2197 return (def_bb == use_bb
2198 ? ssa_names->contains (value)
2199 : dominated_by_p (CDI_DOMINATORS, use_bb, def_bb));
2200 }
2201
2202 return false;
2203 }
2204
2205 /* Helper function for predicate_statements. STMT is a potentially-trapping
2206 arithmetic operation that needs to be predicated by MASK, an SSA_NAME that
2207 has value COND. Return a statement that does so. SSA_NAMES is the set of
2208 SSA names defined earlier in STMT's block. */
2209
2210 static gimple *
2211 predicate_rhs_code (gassign *stmt, tree mask, tree cond,
2212 hash_set<tree_ssa_name_hash> *ssa_names)
2213 {
2214 tree lhs = gimple_assign_lhs (stmt);
2215 tree_code code = gimple_assign_rhs_code (stmt);
2216 unsigned int nops = gimple_num_ops (stmt);
2217 internal_fn cond_fn = get_conditional_internal_fn (code);
2218
2219 /* Construct the arguments to the conditional internal function. */
2220 auto_vec<tree, 8> args;
2221 args.safe_grow (nops + 1);
2222 args[0] = mask;
2223 for (unsigned int i = 1; i < nops; ++i)
2224 args[i] = gimple_op (stmt, i);
2225 args[nops] = NULL_TREE;
2226
2227 /* Look for uses of the result to see whether they are COND_EXPRs that can
2228 be folded into the conditional call. */
2229 imm_use_iterator imm_iter;
2230 gimple *use_stmt;
2231 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, lhs)
2232 {
2233 tree new_else = check_redundant_cond_expr (use_stmt, mask, cond, lhs);
2234 if (new_else && value_available_p (stmt, ssa_names, new_else))
2235 {
2236 if (!args[nops])
2237 args[nops] = new_else;
2238 if (operand_equal_p (new_else, args[nops], 0))
2239 {
2240 /* We have:
2241
2242 LHS = IFN_COND (MASK, ..., ELSE);
2243 X = MASK ? LHS : ELSE;
2244
2245 which makes X equivalent to LHS. */
2246 tree use_lhs = gimple_assign_lhs (use_stmt);
2247 redundant_ssa_names.safe_push (std::make_pair (use_lhs, lhs));
2248 }
2249 }
2250 }
2251 if (!args[nops])
2252 args[nops] = targetm.preferred_else_value (cond_fn, TREE_TYPE (lhs),
2253 nops - 1, &args[1]);
2254
2255 /* Create and insert the call. */
2256 gcall *new_stmt = gimple_build_call_internal_vec (cond_fn, args);
2257 gimple_call_set_lhs (new_stmt, lhs);
2258 gimple_call_set_nothrow (new_stmt, true);
2259
2260 return new_stmt;
2261 }
2262
2263 /* Predicate each write to memory in LOOP.
2264
2265 This function transforms control flow constructs containing memory
2266 writes of the form:
2267
2268 | for (i = 0; i < N; i++)
2269 | if (cond)
2270 | A[i] = expr;
2271
2272 into the following form that does not contain control flow:
2273
2274 | for (i = 0; i < N; i++)
2275 | A[i] = cond ? expr : A[i];
2276
2277 The original CFG looks like this:
2278
2279 | bb_0
2280 | i = 0
2281 | end_bb_0
2282 |
2283 | bb_1
2284 | if (i < N) goto bb_5 else goto bb_2
2285 | end_bb_1
2286 |
2287 | bb_2
2288 | cond = some_computation;
2289 | if (cond) goto bb_3 else goto bb_4
2290 | end_bb_2
2291 |
2292 | bb_3
2293 | A[i] = expr;
2294 | goto bb_4
2295 | end_bb_3
2296 |
2297 | bb_4
2298 | goto bb_1
2299 | end_bb_4
2300
2301 insert_gimplified_predicates inserts the computation of the COND
2302 expression at the beginning of the destination basic block:
2303
2304 | bb_0
2305 | i = 0
2306 | end_bb_0
2307 |
2308 | bb_1
2309 | if (i < N) goto bb_5 else goto bb_2
2310 | end_bb_1
2311 |
2312 | bb_2
2313 | cond = some_computation;
2314 | if (cond) goto bb_3 else goto bb_4
2315 | end_bb_2
2316 |
2317 | bb_3
2318 | cond = some_computation;
2319 | A[i] = expr;
2320 | goto bb_4
2321 | end_bb_3
2322 |
2323 | bb_4
2324 | goto bb_1
2325 | end_bb_4
2326
2327 predicate_statements is then predicating the memory write as follows:
2328
2329 | bb_0
2330 | i = 0
2331 | end_bb_0
2332 |
2333 | bb_1
2334 | if (i < N) goto bb_5 else goto bb_2
2335 | end_bb_1
2336 |
2337 | bb_2
2338 | if (cond) goto bb_3 else goto bb_4
2339 | end_bb_2
2340 |
2341 | bb_3
2342 | cond = some_computation;
2343 | A[i] = cond ? expr : A[i];
2344 | goto bb_4
2345 | end_bb_3
2346 |
2347 | bb_4
2348 | goto bb_1
2349 | end_bb_4
2350
2351 and finally combine_blocks removes the basic block boundaries making
2352 the loop vectorizable:
2353
2354 | bb_0
2355 | i = 0
2356 | if (i < N) goto bb_5 else goto bb_1
2357 | end_bb_0
2358 |
2359 | bb_1
2360 | cond = some_computation;
2361 | A[i] = cond ? expr : A[i];
2362 | if (i < N) goto bb_5 else goto bb_4
2363 | end_bb_1
2364 |
2365 | bb_4
2366 | goto bb_1
2367 | end_bb_4
2368 */
2369
2370 static void
2371 predicate_statements (loop_p loop)
2372 {
2373 unsigned int i, orig_loop_num_nodes = loop->num_nodes;
2374 auto_vec<int, 1> vect_sizes;
2375 auto_vec<tree, 1> vect_masks;
2376 hash_set<tree_ssa_name_hash> ssa_names;
2377
2378 for (i = 1; i < orig_loop_num_nodes; i++)
2379 {
2380 gimple_stmt_iterator gsi;
2381 basic_block bb = ifc_bbs[i];
2382 tree cond = bb_predicate (bb);
2383 bool swap;
2384 int index;
2385
2386 if (is_true_predicate (cond))
2387 continue;
2388
2389 swap = false;
2390 if (TREE_CODE (cond) == TRUTH_NOT_EXPR)
2391 {
2392 swap = true;
2393 cond = TREE_OPERAND (cond, 0);
2394 }
2395
2396 vect_sizes.truncate (0);
2397 vect_masks.truncate (0);
2398
2399 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
2400 {
2401 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
2402 if (!stmt)
2403 ;
2404 else if (is_false_predicate (cond)
2405 && gimple_vdef (stmt))
2406 {
2407 unlink_stmt_vdef (stmt);
2408 gsi_remove (&gsi, true);
2409 release_defs (stmt);
2410 continue;
2411 }
2412 else if (gimple_plf (stmt, GF_PLF_2))
2413 {
2414 tree lhs = gimple_assign_lhs (stmt);
2415 tree mask;
2416 gimple *new_stmt;
2417 gimple_seq stmts = NULL;
2418 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
2419 /* We checked before setting GF_PLF_2 that an equivalent
2420 integer mode exists. */
2421 int bitsize = GET_MODE_BITSIZE (mode).to_constant ();
2422 if (!vect_sizes.is_empty ()
2423 && (index = mask_exists (bitsize, vect_sizes)) != -1)
2424 /* Use created mask. */
2425 mask = vect_masks[index];
2426 else
2427 {
2428 if (COMPARISON_CLASS_P (cond))
2429 mask = gimple_build (&stmts, TREE_CODE (cond),
2430 boolean_type_node,
2431 TREE_OPERAND (cond, 0),
2432 TREE_OPERAND (cond, 1));
2433 else
2434 mask = cond;
2435
2436 if (swap)
2437 {
2438 tree true_val
2439 = constant_boolean_node (true, TREE_TYPE (mask));
2440 mask = gimple_build (&stmts, BIT_XOR_EXPR,
2441 TREE_TYPE (mask), mask, true_val);
2442 }
2443 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2444
2445 /* Save mask and its size for further use. */
2446 vect_sizes.safe_push (bitsize);
2447 vect_masks.safe_push (mask);
2448 }
2449 if (gimple_assign_single_p (stmt))
2450 new_stmt = predicate_load_or_store (&gsi, stmt, mask);
2451 else
2452 new_stmt = predicate_rhs_code (stmt, mask, cond, &ssa_names);
2453
2454 gsi_replace (&gsi, new_stmt, true);
2455 }
2456 else if (gimple_vdef (stmt))
2457 {
2458 tree lhs = gimple_assign_lhs (stmt);
2459 tree rhs = gimple_assign_rhs1 (stmt);
2460 tree type = TREE_TYPE (lhs);
2461
2462 lhs = ifc_temp_var (type, unshare_expr (lhs), &gsi);
2463 rhs = ifc_temp_var (type, unshare_expr (rhs), &gsi);
2464 if (swap)
2465 std::swap (lhs, rhs);
2466 cond = force_gimple_operand_gsi_1 (&gsi, unshare_expr (cond),
2467 is_gimple_condexpr, NULL_TREE,
2468 true, GSI_SAME_STMT);
2469 rhs = fold_build_cond_expr (type, unshare_expr (cond), rhs, lhs);
2470 gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi));
2471 update_stmt (stmt);
2472 }
2473 tree lhs = gimple_get_lhs (gsi_stmt (gsi));
2474 if (lhs && TREE_CODE (lhs) == SSA_NAME)
2475 ssa_names.add (lhs);
2476 gsi_next (&gsi);
2477 }
2478 ssa_names.empty ();
2479 }
2480 }
2481
2482 /* Remove all GIMPLE_CONDs and GIMPLE_LABELs of all the basic blocks
2483 other than the exit and latch of the LOOP. Also resets the
2484 GIMPLE_DEBUG information. */
2485
2486 static void
2487 remove_conditions_and_labels (loop_p loop)
2488 {
2489 gimple_stmt_iterator gsi;
2490 unsigned int i;
2491
2492 for (i = 0; i < loop->num_nodes; i++)
2493 {
2494 basic_block bb = ifc_bbs[i];
2495
2496 if (bb_with_exit_edge_p (loop, bb)
2497 || bb == loop->latch)
2498 continue;
2499
2500 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); )
2501 switch (gimple_code (gsi_stmt (gsi)))
2502 {
2503 case GIMPLE_COND:
2504 case GIMPLE_LABEL:
2505 gsi_remove (&gsi, true);
2506 break;
2507
2508 case GIMPLE_DEBUG:
2509 /* ??? Should there be conditional GIMPLE_DEBUG_BINDs? */
2510 if (gimple_debug_bind_p (gsi_stmt (gsi)))
2511 {
2512 gimple_debug_bind_reset_value (gsi_stmt (gsi));
2513 update_stmt (gsi_stmt (gsi));
2514 }
2515 gsi_next (&gsi);
2516 break;
2517
2518 default:
2519 gsi_next (&gsi);
2520 }
2521 }
2522 }
2523
2524 /* Combine all the basic blocks from LOOP into one or two super basic
2525 blocks. Replace PHI nodes with conditional modify expressions. */
2526
2527 static void
2528 combine_blocks (class loop *loop)
2529 {
2530 basic_block bb, exit_bb, merge_target_bb;
2531 unsigned int orig_loop_num_nodes = loop->num_nodes;
2532 unsigned int i;
2533 edge e;
2534 edge_iterator ei;
2535
2536 remove_conditions_and_labels (loop);
2537 insert_gimplified_predicates (loop);
2538 predicate_all_scalar_phis (loop);
2539
2540 if (need_to_predicate)
2541 predicate_statements (loop);
2542
2543 /* Merge basic blocks: first remove all the edges in the loop,
2544 except for those from the exit block. */
2545 exit_bb = NULL;
2546 bool *predicated = XNEWVEC (bool, orig_loop_num_nodes);
2547 for (i = 0; i < orig_loop_num_nodes; i++)
2548 {
2549 bb = ifc_bbs[i];
2550 predicated[i] = !is_true_predicate (bb_predicate (bb));
2551 free_bb_predicate (bb);
2552 if (bb_with_exit_edge_p (loop, bb))
2553 {
2554 gcc_assert (exit_bb == NULL);
2555 exit_bb = bb;
2556 }
2557 }
2558 gcc_assert (exit_bb != loop->latch);
2559
2560 for (i = 1; i < orig_loop_num_nodes; i++)
2561 {
2562 bb = ifc_bbs[i];
2563
2564 for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));)
2565 {
2566 if (e->src == exit_bb)
2567 ei_next (&ei);
2568 else
2569 remove_edge (e);
2570 }
2571 }
2572
2573 if (exit_bb != NULL)
2574 {
2575 if (exit_bb != loop->header)
2576 {
2577 /* Connect this node to loop header. */
2578 make_single_succ_edge (loop->header, exit_bb, EDGE_FALLTHRU);
2579 set_immediate_dominator (CDI_DOMINATORS, exit_bb, loop->header);
2580 }
2581
2582 /* Redirect non-exit edges to loop->latch. */
2583 FOR_EACH_EDGE (e, ei, exit_bb->succs)
2584 {
2585 if (!loop_exit_edge_p (loop, e))
2586 redirect_edge_and_branch (e, loop->latch);
2587 }
2588 set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb);
2589 }
2590 else
2591 {
2592 /* If the loop does not have an exit, reconnect header and latch. */
2593 make_edge (loop->header, loop->latch, EDGE_FALLTHRU);
2594 set_immediate_dominator (CDI_DOMINATORS, loop->latch, loop->header);
2595 }
2596
2597 merge_target_bb = loop->header;
2598
2599 /* Get at the virtual def valid for uses starting at the first block
2600 we merge into the header. Without a virtual PHI the loop has the
2601 same virtual use on all stmts. */
2602 gphi *vphi = get_virtual_phi (loop->header);
2603 tree last_vdef = NULL_TREE;
2604 if (vphi)
2605 {
2606 last_vdef = gimple_phi_result (vphi);
2607 for (gimple_stmt_iterator gsi = gsi_start_bb (loop->header);
2608 ! gsi_end_p (gsi); gsi_next (&gsi))
2609 if (gimple_vdef (gsi_stmt (gsi)))
2610 last_vdef = gimple_vdef (gsi_stmt (gsi));
2611 }
2612 for (i = 1; i < orig_loop_num_nodes; i++)
2613 {
2614 gimple_stmt_iterator gsi;
2615 gimple_stmt_iterator last;
2616
2617 bb = ifc_bbs[i];
2618
2619 if (bb == exit_bb || bb == loop->latch)
2620 continue;
2621
2622 /* We release virtual PHIs late because we have to propagate them
2623 out using the current VUSE. The def might be the one used
2624 after the loop. */
2625 vphi = get_virtual_phi (bb);
2626 if (vphi)
2627 {
2628 imm_use_iterator iter;
2629 use_operand_p use_p;
2630 gimple *use_stmt;
2631 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2632 {
2633 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2634 SET_USE (use_p, last_vdef);
2635 }
2636 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2637 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2638 gsi = gsi_for_stmt (vphi);
2639 remove_phi_node (&gsi, true);
2640 }
2641
2642 /* Make stmts member of loop->header and clear range info from all stmts
2643 in BB which is now no longer executed conditional on a predicate we
2644 could have derived it from. */
2645 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2646 {
2647 gimple *stmt = gsi_stmt (gsi);
2648 gimple_set_bb (stmt, merge_target_bb);
2649 /* Update virtual operands. */
2650 if (last_vdef)
2651 {
2652 use_operand_p use_p = ssa_vuse_operand (stmt);
2653 if (use_p
2654 && USE_FROM_PTR (use_p) != last_vdef)
2655 SET_USE (use_p, last_vdef);
2656 if (gimple_vdef (stmt))
2657 last_vdef = gimple_vdef (stmt);
2658 }
2659 if (predicated[i])
2660 {
2661 ssa_op_iter i;
2662 tree op;
2663 FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF)
2664 reset_flow_sensitive_info (op);
2665 }
2666 }
2667
2668 /* Update stmt list. */
2669 last = gsi_last_bb (merge_target_bb);
2670 gsi_insert_seq_after_without_update (&last, bb_seq (bb), GSI_NEW_STMT);
2671 set_bb_seq (bb, NULL);
2672
2673 delete_basic_block (bb);
2674 }
2675
2676 /* If possible, merge loop header to the block with the exit edge.
2677 This reduces the number of basic blocks to two, to please the
2678 vectorizer that handles only loops with two nodes. */
2679 if (exit_bb
2680 && exit_bb != loop->header)
2681 {
2682 /* We release virtual PHIs late because we have to propagate them
2683 out using the current VUSE. The def might be the one used
2684 after the loop. */
2685 vphi = get_virtual_phi (exit_bb);
2686 if (vphi)
2687 {
2688 imm_use_iterator iter;
2689 use_operand_p use_p;
2690 gimple *use_stmt;
2691 FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi))
2692 {
2693 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
2694 SET_USE (use_p, last_vdef);
2695 }
2696 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi)))
2697 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1;
2698 gimple_stmt_iterator gsi = gsi_for_stmt (vphi);
2699 remove_phi_node (&gsi, true);
2700 }
2701
2702 if (can_merge_blocks_p (loop->header, exit_bb))
2703 merge_blocks (loop->header, exit_bb);
2704 }
2705
2706 free (ifc_bbs);
2707 ifc_bbs = NULL;
2708 free (predicated);
2709 }
2710
2711 /* Version LOOP before if-converting it; the original loop
2712 will be if-converted, the new copy of the loop will not,
2713 and the LOOP_VECTORIZED internal call will be guarding which
2714 loop to execute. The vectorizer pass will fold this
2715 internal call into either true or false.
2716
2717 Note that this function intentionally invalidates profile. Both edges
2718 out of LOOP_VECTORIZED must have 100% probability so the profile remains
2719 consistent after the condition is folded in the vectorizer. */
2720
2721 static class loop *
2722 version_loop_for_if_conversion (class loop *loop, vec<gimple *> *preds)
2723 {
2724 basic_block cond_bb;
2725 tree cond = make_ssa_name (boolean_type_node);
2726 class loop *new_loop;
2727 gimple *g;
2728 gimple_stmt_iterator gsi;
2729 unsigned int save_length;
2730
2731 g = gimple_build_call_internal (IFN_LOOP_VECTORIZED, 2,
2732 build_int_cst (integer_type_node, loop->num),
2733 integer_zero_node);
2734 gimple_call_set_lhs (g, cond);
2735
2736 /* Save BB->aux around loop_version as that uses the same field. */
2737 save_length = loop->inner ? loop->inner->num_nodes : loop->num_nodes;
2738 void **saved_preds = XALLOCAVEC (void *, save_length);
2739 for (unsigned i = 0; i < save_length; i++)
2740 saved_preds[i] = ifc_bbs[i]->aux;
2741
2742 initialize_original_copy_tables ();
2743 /* At this point we invalidate porfile confistency until IFN_LOOP_VECTORIZED
2744 is re-merged in the vectorizer. */
2745 new_loop = loop_version (loop, cond, &cond_bb,
2746 profile_probability::always (),
2747 profile_probability::always (),
2748 profile_probability::always (),
2749 profile_probability::always (), true);
2750 free_original_copy_tables ();
2751
2752 for (unsigned i = 0; i < save_length; i++)
2753 ifc_bbs[i]->aux = saved_preds[i];
2754
2755 if (new_loop == NULL)
2756 return NULL;
2757
2758 new_loop->dont_vectorize = true;
2759 new_loop->force_vectorize = false;
2760 gsi = gsi_last_bb (cond_bb);
2761 gimple_call_set_arg (g, 1, build_int_cst (integer_type_node, new_loop->num));
2762 if (preds)
2763 preds->safe_push (g);
2764 gsi_insert_before (&gsi, g, GSI_SAME_STMT);
2765 update_ssa (TODO_update_ssa);
2766 return new_loop;
2767 }
2768
2769 /* Return true when LOOP satisfies the follow conditions that will
2770 allow it to be recognized by the vectorizer for outer-loop
2771 vectorization:
2772 - The loop is not the root node of the loop tree.
2773 - The loop has exactly one inner loop.
2774 - The loop has a single exit.
2775 - The loop header has a single successor, which is the inner
2776 loop header.
2777 - Each of the inner and outer loop latches have a single
2778 predecessor.
2779 - The loop exit block has a single predecessor, which is the
2780 inner loop's exit block. */
2781
2782 static bool
2783 versionable_outer_loop_p (class loop *loop)
2784 {
2785 if (!loop_outer (loop)
2786 || loop->dont_vectorize
2787 || !loop->inner
2788 || loop->inner->next
2789 || !single_exit (loop)
2790 || !single_succ_p (loop->header)
2791 || single_succ (loop->header) != loop->inner->header
2792 || !single_pred_p (loop->latch)
2793 || !single_pred_p (loop->inner->latch))
2794 return false;
2795
2796 basic_block outer_exit = single_pred (loop->latch);
2797 basic_block inner_exit = single_pred (loop->inner->latch);
2798
2799 if (!single_pred_p (outer_exit) || single_pred (outer_exit) != inner_exit)
2800 return false;
2801
2802 if (dump_file)
2803 fprintf (dump_file, "Found vectorizable outer loop for versioning\n");
2804
2805 return true;
2806 }
2807
2808 /* Performs splitting of critical edges. Skip splitting and return false
2809 if LOOP will not be converted because:
2810
2811 - LOOP is not well formed.
2812 - LOOP has PHI with more than MAX_PHI_ARG_NUM arguments.
2813
2814 Last restriction is valid only if AGGRESSIVE_IF_CONV is false. */
2815
2816 static bool
2817 ifcvt_split_critical_edges (class loop *loop, bool aggressive_if_conv)
2818 {
2819 basic_block *body;
2820 basic_block bb;
2821 unsigned int num = loop->num_nodes;
2822 unsigned int i;
2823 gimple *stmt;
2824 edge e;
2825 edge_iterator ei;
2826 auto_vec<edge> critical_edges;
2827
2828 /* Loop is not well formed. */
2829 if (num <= 2 || loop->inner || !single_exit (loop))
2830 return false;
2831
2832 body = get_loop_body (loop);
2833 for (i = 0; i < num; i++)
2834 {
2835 bb = body[i];
2836 if (!aggressive_if_conv
2837 && phi_nodes (bb)
2838 && EDGE_COUNT (bb->preds) > MAX_PHI_ARG_NUM)
2839 {
2840 if (dump_file && (dump_flags & TDF_DETAILS))
2841 fprintf (dump_file,
2842 "BB %d has complicated PHI with more than %u args.\n",
2843 bb->index, MAX_PHI_ARG_NUM);
2844
2845 free (body);
2846 return false;
2847 }
2848 if (bb == loop->latch || bb_with_exit_edge_p (loop, bb))
2849 continue;
2850
2851 stmt = last_stmt (bb);
2852 /* Skip basic blocks not ending with conditional branch. */
2853 if (!stmt || gimple_code (stmt) != GIMPLE_COND)
2854 continue;
2855
2856 FOR_EACH_EDGE (e, ei, bb->succs)
2857 if (EDGE_CRITICAL_P (e) && e->dest->loop_father == loop)
2858 critical_edges.safe_push (e);
2859 }
2860 free (body);
2861
2862 while (critical_edges.length () > 0)
2863 {
2864 e = critical_edges.pop ();
2865 /* Don't split if bb can be predicated along non-critical edge. */
2866 if (EDGE_COUNT (e->dest->preds) > 2 || all_preds_critical_p (e->dest))
2867 split_edge (e);
2868 }
2869
2870 return true;
2871 }
2872
2873 /* Delete redundant statements produced by predication which prevents
2874 loop vectorization. */
2875
2876 static void
2877 ifcvt_local_dce (class loop *loop)
2878 {
2879 gimple *stmt;
2880 gimple *stmt1;
2881 gimple *phi;
2882 gimple_stmt_iterator gsi;
2883 auto_vec<gimple *> worklist;
2884 enum gimple_code code;
2885 use_operand_p use_p;
2886 imm_use_iterator imm_iter;
2887 std::pair <tree, tree> *name_pair;
2888 unsigned int i;
2889
2890 FOR_EACH_VEC_ELT (redundant_ssa_names, i, name_pair)
2891 replace_uses_by (name_pair->first, name_pair->second);
2892 redundant_ssa_names.release ();
2893
2894 /* The loop has a single BB only. */
2895 basic_block bb = loop->header;
2896 tree latch_vdef = NULL_TREE;
2897
2898 worklist.create (64);
2899 /* Consider all phi as live statements. */
2900 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2901 {
2902 phi = gsi_stmt (gsi);
2903 gimple_set_plf (phi, GF_PLF_2, true);
2904 worklist.safe_push (phi);
2905 if (virtual_operand_p (gimple_phi_result (phi)))
2906 latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2907 }
2908 /* Consider load/store statements, CALL and COND as live. */
2909 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2910 {
2911 stmt = gsi_stmt (gsi);
2912 if (gimple_store_p (stmt)
2913 || gimple_assign_load_p (stmt)
2914 || is_gimple_debug (stmt))
2915 {
2916 gimple_set_plf (stmt, GF_PLF_2, true);
2917 worklist.safe_push (stmt);
2918 continue;
2919 }
2920 code = gimple_code (stmt);
2921 if (code == GIMPLE_COND || code == GIMPLE_CALL)
2922 {
2923 gimple_set_plf (stmt, GF_PLF_2, true);
2924 worklist.safe_push (stmt);
2925 continue;
2926 }
2927 gimple_set_plf (stmt, GF_PLF_2, false);
2928
2929 if (code == GIMPLE_ASSIGN)
2930 {
2931 tree lhs = gimple_assign_lhs (stmt);
2932 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2933 {
2934 stmt1 = USE_STMT (use_p);
2935 if (gimple_bb (stmt1) != bb)
2936 {
2937 gimple_set_plf (stmt, GF_PLF_2, true);
2938 worklist.safe_push (stmt);
2939 break;
2940 }
2941 }
2942 }
2943 }
2944 /* Propagate liveness through arguments of live stmt. */
2945 while (worklist.length () > 0)
2946 {
2947 ssa_op_iter iter;
2948 use_operand_p use_p;
2949 tree use;
2950
2951 stmt = worklist.pop ();
2952 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2953 {
2954 use = USE_FROM_PTR (use_p);
2955 if (TREE_CODE (use) != SSA_NAME)
2956 continue;
2957 stmt1 = SSA_NAME_DEF_STMT (use);
2958 if (gimple_bb (stmt1) != bb
2959 || gimple_plf (stmt1, GF_PLF_2))
2960 continue;
2961 gimple_set_plf (stmt1, GF_PLF_2, true);
2962 worklist.safe_push (stmt1);
2963 }
2964 }
2965 /* Delete dead statements. */
2966 gsi = gsi_start_bb (bb);
2967 while (!gsi_end_p (gsi))
2968 {
2969 stmt = gsi_stmt (gsi);
2970 if (gimple_store_p (stmt))
2971 {
2972 tree lhs = gimple_get_lhs (stmt);
2973 ao_ref write;
2974 ao_ref_init (&write, lhs);
2975
2976 if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
2977 == DSE_STORE_DEAD)
2978 delete_dead_or_redundant_assignment (&gsi, "dead");
2979 else
2980 gsi_next (&gsi);
2981 continue;
2982 }
2983
2984 if (gimple_plf (stmt, GF_PLF_2))
2985 {
2986 gsi_next (&gsi);
2987 continue;
2988 }
2989 if (dump_file && (dump_flags & TDF_DETAILS))
2990 {
2991 fprintf (dump_file, "Delete dead stmt in bb#%d\n", bb->index);
2992 print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
2993 }
2994 gsi_remove (&gsi, true);
2995 release_defs (stmt);
2996 }
2997 }
2998
2999 /* If-convert LOOP when it is legal. For the moment this pass has no
3000 profitability analysis. Returns non-zero todo flags when something
3001 changed. */
3002
3003 unsigned int
3004 tree_if_conversion (class loop *loop, vec<gimple *> *preds)
3005 {
3006 unsigned int todo = 0;
3007 bool aggressive_if_conv;
3008 class loop *rloop;
3009 bitmap exit_bbs;
3010
3011 again:
3012 rloop = NULL;
3013 ifc_bbs = NULL;
3014 need_to_predicate = false;
3015 any_complicated_phi = false;
3016
3017 /* Apply more aggressive if-conversion when loop or its outer loop were
3018 marked with simd pragma. When that's the case, we try to if-convert
3019 loop containing PHIs with more than MAX_PHI_ARG_NUM arguments. */
3020 aggressive_if_conv = loop->force_vectorize;
3021 if (!aggressive_if_conv)
3022 {
3023 class loop *outer_loop = loop_outer (loop);
3024 if (outer_loop && outer_loop->force_vectorize)
3025 aggressive_if_conv = true;
3026 }
3027
3028 if (!ifcvt_split_critical_edges (loop, aggressive_if_conv))
3029 goto cleanup;
3030
3031 if (!if_convertible_loop_p (loop)
3032 || !dbg_cnt (if_conversion_tree))
3033 goto cleanup;
3034
3035 if ((need_to_predicate || any_complicated_phi)
3036 && ((!flag_tree_loop_vectorize && !loop->force_vectorize)
3037 || loop->dont_vectorize))
3038 goto cleanup;
3039
3040 /* Since we have no cost model, always version loops unless the user
3041 specified -ftree-loop-if-convert or unless versioning is required.
3042 Either version this loop, or if the pattern is right for outer-loop
3043 vectorization, version the outer loop. In the latter case we will
3044 still if-convert the original inner loop. */
3045 if (need_to_predicate
3046 || any_complicated_phi
3047 || flag_tree_loop_if_convert != 1)
3048 {
3049 class loop *vloop
3050 = (versionable_outer_loop_p (loop_outer (loop))
3051 ? loop_outer (loop) : loop);
3052 class loop *nloop = version_loop_for_if_conversion (vloop, preds);
3053 if (nloop == NULL)
3054 goto cleanup;
3055 if (vloop != loop)
3056 {
3057 /* If versionable_outer_loop_p decided to version the
3058 outer loop, version also the inner loop of the non-vectorized
3059 loop copy. So we transform:
3060 loop1
3061 loop2
3062 into:
3063 if (LOOP_VECTORIZED (1, 3))
3064 {
3065 loop1
3066 loop2
3067 }
3068 else
3069 loop3 (copy of loop1)
3070 if (LOOP_VECTORIZED (4, 5))
3071 loop4 (copy of loop2)
3072 else
3073 loop5 (copy of loop4) */
3074 gcc_assert (nloop->inner && nloop->inner->next == NULL);
3075 rloop = nloop->inner;
3076 }
3077 }
3078
3079 /* Now all statements are if-convertible. Combine all the basic
3080 blocks into one huge basic block doing the if-conversion
3081 on-the-fly. */
3082 combine_blocks (loop);
3083
3084 /* Perform local CSE, this esp. helps the vectorizer analysis if loads
3085 and stores are involved. CSE only the loop body, not the entry
3086 PHIs, those are to be kept in sync with the non-if-converted copy.
3087 ??? We'll still keep dead stores though. */
3088 exit_bbs = BITMAP_ALLOC (NULL);
3089 bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
3090 bitmap_set_bit (exit_bbs, loop->latch->index);
3091 todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
3092
3093 /* Delete dead predicate computations. */
3094 ifcvt_local_dce (loop);
3095 BITMAP_FREE (exit_bbs);
3096
3097 todo |= TODO_cleanup_cfg;
3098
3099 cleanup:
3100 if (ifc_bbs)
3101 {
3102 unsigned int i;
3103
3104 for (i = 0; i < loop->num_nodes; i++)
3105 free_bb_predicate (ifc_bbs[i]);
3106
3107 free (ifc_bbs);
3108 ifc_bbs = NULL;
3109 }
3110 if (rloop != NULL)
3111 {
3112 loop = rloop;
3113 goto again;
3114 }
3115
3116 return todo;
3117 }
3118
3119 /* Tree if-conversion pass management. */
3120
3121 namespace {
3122
3123 const pass_data pass_data_if_conversion =
3124 {
3125 GIMPLE_PASS, /* type */
3126 "ifcvt", /* name */
3127 OPTGROUP_NONE, /* optinfo_flags */
3128 TV_TREE_LOOP_IFCVT, /* tv_id */
3129 ( PROP_cfg | PROP_ssa ), /* properties_required */
3130 0, /* properties_provided */
3131 0, /* properties_destroyed */
3132 0, /* todo_flags_start */
3133 0, /* todo_flags_finish */
3134 };
3135
3136 class pass_if_conversion : public gimple_opt_pass
3137 {
3138 public:
3139 pass_if_conversion (gcc::context *ctxt)
3140 : gimple_opt_pass (pass_data_if_conversion, ctxt)
3141 {}
3142
3143 /* opt_pass methods: */
3144 virtual bool gate (function *);
3145 virtual unsigned int execute (function *);
3146
3147 }; // class pass_if_conversion
3148
3149 bool
3150 pass_if_conversion::gate (function *fun)
3151 {
3152 return (((flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
3153 && flag_tree_loop_if_convert != 0)
3154 || flag_tree_loop_if_convert == 1);
3155 }
3156
3157 unsigned int
3158 pass_if_conversion::execute (function *fun)
3159 {
3160 class loop *loop;
3161 unsigned todo = 0;
3162
3163 if (number_of_loops (fun) <= 1)
3164 return 0;
3165
3166 auto_vec<gimple *> preds;
3167 FOR_EACH_LOOP (loop, 0)
3168 if (flag_tree_loop_if_convert == 1
3169 || ((flag_tree_loop_vectorize || loop->force_vectorize)
3170 && !loop->dont_vectorize))
3171 todo |= tree_if_conversion (loop, &preds);
3172
3173 if (todo)
3174 {
3175 free_numbers_of_iterations_estimates (fun);
3176 scev_reset ();
3177 }
3178
3179 if (flag_checking)
3180 {
3181 basic_block bb;
3182 FOR_EACH_BB_FN (bb, fun)
3183 gcc_assert (!bb->aux);
3184 }
3185
3186 /* Perform IL update now, it might elide some loops. */
3187 if (todo & TODO_cleanup_cfg)
3188 {
3189 cleanup_tree_cfg ();
3190 if (need_ssa_update_p (fun))
3191 todo |= TODO_update_ssa;
3192 }
3193 if (todo & TODO_update_ssa_any)
3194 update_ssa (todo & TODO_update_ssa_any);
3195
3196 /* If if-conversion elided the loop fall back to the original one. */
3197 for (unsigned i = 0; i < preds.length (); ++i)
3198 {
3199 gimple *g = preds[i];
3200 if (!gimple_bb (g))
3201 continue;
3202 unsigned ifcvt_loop = tree_to_uhwi (gimple_call_arg (g, 0));
3203 if (!get_loop (fun, ifcvt_loop))
3204 {
3205 if (dump_file)
3206 fprintf (dump_file, "If-converted loop vanished\n");
3207 fold_loop_internal_call (g, boolean_false_node);
3208 }
3209 }
3210
3211 return 0;
3212 }
3213
3214 } // anon namespace
3215
3216 gimple_opt_pass *
3217 make_pass_if_conversion (gcc::context *ctxt)
3218 {
3219 return new pass_if_conversion (ctxt);
3220 }