]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
Add source information to -fverbose-asm
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
818ab71a 2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
c7131fb2 31#include "fold-const.h"
60393bbc 32#include "cfganal.h"
45b0be94 33#include "gimplify.h"
5be5c238 34#include "gimple-iterator.h"
18f429e2 35#include "gimplify-me.h"
442b4905 36#include "tree-cfg.h"
e28030cf 37#include "tree-ssa-loop-manip.h"
442b4905 38#include "tree-into-ssa.h"
7a300452 39#include "tree-ssa.h"
ebfd146a 40#include "cfgloop.h"
ebfd146a
IR
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
2a93954e 43#include "tree-ssa-loop-ivopts.h"
ebfd146a
IR
44
45/*************************************************************************
46 Simple Loop Peeling Utilities
47
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
50
51
52/* Renames the use *OP_P. */
53
54static void
55rename_use_op (use_operand_p op_p)
56{
57 tree new_name;
58
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
61
62 new_name = get_current_def (USE_FROM_PTR (op_p));
63
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
67
68 /* An ordinary ssa name defined in the loop. */
69
70 SET_USE (op_p, new_name);
71}
72
73
a6c51a12
YR
74/* Renames the variables in basic block BB. Allow renaming of PHI argumnets
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
ebfd146a 77
2cfc56b9 78static void
a6c51a12 79rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
ebfd146a 80{
355fe088 81 gimple *stmt;
ebfd146a
IR
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
a6c51a12
YR
87 struct loop *outer_loop = NULL;
88
89 if (rename_from_outer_loop)
90 {
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
93 }
ebfd146a 94
538dd0b7
DM
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
ebfd146a
IR
97 {
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
101 }
102
2cfc56b9 103 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 104 {
a6c51a12
YR
105 if (!flow_bb_inside_loop_p (loop, e->src)
106 && (!rename_from_outer_loop || e->src != outer_loop->header))
ebfd146a 107 continue;
538dd0b7
DM
108 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
109 gsi_next (&gsi))
110 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
111 }
112}
113
114
a79683d5 115struct adjust_info
684f25f4
AO
116{
117 tree from, to;
118 basic_block bb;
a79683d5 119};
684f25f4 120
684f25f4
AO
121/* A stack of values to be adjusted in debug stmts. We have to
122 process them LIFO, so that the closest substitution applies. If we
123 processed them FIFO, without the stack, we might substitute uses
124 with a PHI DEF that would soon become non-dominant, and when we got
125 to the suitable one, it wouldn't have anything to substitute any
126 more. */
ff4c81cc 127static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
128
129/* Adjust any debug stmts that referenced AI->from values to use the
130 loop-closed AI->to, if the references are dominated by AI->bb and
131 not by the definition of AI->from. */
132
133static void
134adjust_debug_stmts_now (adjust_info *ai)
135{
136 basic_block bbphi = ai->bb;
137 tree orig_def = ai->from;
138 tree new_def = ai->to;
139 imm_use_iterator imm_iter;
355fe088 140 gimple *stmt;
684f25f4
AO
141 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
142
143 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
144
145 /* Adjust any debug stmts that held onto non-loop-closed
146 references. */
147 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
148 {
149 use_operand_p use_p;
150 basic_block bbuse;
151
152 if (!is_gimple_debug (stmt))
153 continue;
154
155 gcc_assert (gimple_debug_bind_p (stmt));
156
157 bbuse = gimple_bb (stmt);
158
159 if ((bbuse == bbphi
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
161 && !(bbuse == bbdef
162 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
163 {
164 if (new_def)
165 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
166 SET_USE (use_p, new_def);
167 else
168 {
169 gimple_debug_bind_reset_value (stmt);
170 update_stmt (stmt);
171 }
172 }
173 }
174}
175
176/* Adjust debug stmts as scheduled before. */
177
178static void
179adjust_vec_debug_stmts (void)
180{
181 if (!MAY_HAVE_DEBUG_STMTS)
182 return;
183
9771b263 184 gcc_assert (adjust_vec.exists ());
684f25f4 185
9771b263 186 while (!adjust_vec.is_empty ())
684f25f4 187 {
9771b263
DN
188 adjust_debug_stmts_now (&adjust_vec.last ());
189 adjust_vec.pop ();
684f25f4
AO
190 }
191
9771b263 192 adjust_vec.release ();
684f25f4
AO
193}
194
195/* Adjust any debug stmts that referenced FROM values to use the
196 loop-closed TO, if the references are dominated by BB and not by
197 the definition of FROM. If adjust_vec is non-NULL, adjustments
198 will be postponed until adjust_vec_debug_stmts is called. */
199
200static void
201adjust_debug_stmts (tree from, tree to, basic_block bb)
202{
203 adjust_info ai;
204
a471762f
RG
205 if (MAY_HAVE_DEBUG_STMTS
206 && TREE_CODE (from) == SSA_NAME
a52ca739 207 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 208 && ! virtual_operand_p (from))
684f25f4
AO
209 {
210 ai.from = from;
211 ai.to = to;
212 ai.bb = bb;
213
9771b263
DN
214 if (adjust_vec.exists ())
215 adjust_vec.safe_push (ai);
684f25f4
AO
216 else
217 adjust_debug_stmts_now (&ai);
218 }
219}
220
221/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
222 to adjust any debug stmts that referenced the old phi arg,
223 presumably non-loop-closed references left over from other
224 transformations. */
225
226static void
355fe088 227adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
684f25f4
AO
228{
229 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
230
231 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
232
233 if (MAY_HAVE_DEBUG_STMTS)
234 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
235 gimple_bb (update_phi));
236}
237
ebfd146a 238
ebfd146a
IR
239/* Update PHI nodes for a guard of the LOOP.
240
241 Input:
242 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
243 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
244 originates from the guard-bb, skips LOOP and reaches the (unique) exit
245 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
246 We denote this bb NEW_MERGE_BB because before the guard code was added
247 it had a single predecessor (the LOOP header), and now it became a merge
248 point of two paths - the path that ends with the LOOP exit-edge, and
249 the path that ends with GUARD_EDGE.
250 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
251 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
252
253 ===> The CFG before the guard-code was added:
254 LOOP_header_bb:
255 loop_body
256 if (exit_loop) goto update_bb
257 else goto LOOP_header_bb
258 update_bb:
259
260 ==> The CFG after the guard-code was added:
261 guard_bb:
262 if (LOOP_guard_condition) goto new_merge_bb
263 else goto LOOP_header_bb
264 LOOP_header_bb:
265 loop_body
266 if (exit_loop_condition) goto new_merge_bb
267 else goto LOOP_header_bb
268 new_merge_bb:
269 goto update_bb
270 update_bb:
271
272 ==> The CFG after this function:
273 guard_bb:
274 if (LOOP_guard_condition) goto new_merge_bb
275 else goto LOOP_header_bb
276 LOOP_header_bb:
277 loop_body
278 if (exit_loop_condition) goto new_exit_bb
279 else goto LOOP_header_bb
280 new_exit_bb:
281 new_merge_bb:
282 goto update_bb
283 update_bb:
284
285 This function:
286 1. creates and updates the relevant phi nodes to account for the new
287 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
288 1.1. Create phi nodes at NEW_MERGE_BB.
289 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
290 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
291 2. preserves loop-closed-ssa-form by creating the required phi nodes
292 at the exit of LOOP (i.e, in NEW_EXIT_BB).
293
294 There are two flavors to this function:
295
296 slpeel_update_phi_nodes_for_guard1:
297 Here the guard controls whether we enter or skip LOOP, where LOOP is a
298 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
299 for variables that have phis in the loop header.
300
301 slpeel_update_phi_nodes_for_guard2:
302 Here the guard controls whether we enter or skip LOOP, where LOOP is an
303 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
304 for variables that have phis in the loop exit.
305
306 I.E., the overall structure is:
307
308 loop1_preheader_bb:
309 guard1 (goto loop1/merge1_bb)
310 loop1
311 loop1_exit_bb:
312 guard2 (goto merge1_bb/merge2_bb)
313 merge1_bb
314 loop2
315 loop2_exit_bb
316 merge2_bb
317 next_bb
318
319 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
320 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
321 that have phis in loop1->header).
322
323 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
324 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
325 that have phis in next_bb). It also adds some of these phis to
326 loop1_exit_bb.
327
328 slpeel_update_phi_nodes_for_guard1 is always called before
329 slpeel_update_phi_nodes_for_guard2. They are both needed in order
330 to create correct data-flow and loop-closed-ssa-form.
331
332 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
333 that change between iterations of a loop (and therefore have a phi-node
334 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
b8698a0f
L
335 phis for variables that are used out of the loop (and therefore have
336 loop-closed exit phis). Some variables may be both updated between
ebfd146a
IR
337 iterations and used after the loop. This is why in loop1_exit_bb we
338 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
339 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
340
341 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
342 an original loop. i.e., we have:
343
344 orig_loop
345 guard_bb (goto LOOP/new_merge)
346 new_loop <-- LOOP
347 new_exit
348 new_merge
349 next_bb
350
351 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
352 have:
353
354 new_loop
355 guard_bb (goto LOOP/new_merge)
356 orig_loop <-- LOOP
357 new_exit
358 new_merge
359 next_bb
360
361 The SSA names defined in the original loop have a current
026c3cfd
AH
362 reaching definition that records the corresponding new ssa-name
363 used in the new duplicated loop copy.
ebfd146a
IR
364 */
365
366/* Function slpeel_update_phi_nodes_for_guard1
b8698a0f 367
ebfd146a
IR
368 Input:
369 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
370 - DEFS - a bitmap of ssa names to mark new names for which we recorded
b8698a0f
L
371 information.
372
ebfd146a
IR
373 In the context of the overall structure, we have:
374
b8698a0f 375 loop1_preheader_bb:
ebfd146a
IR
376 guard1 (goto loop1/merge1_bb)
377LOOP-> loop1
378 loop1_exit_bb:
379 guard2 (goto merge1_bb/merge2_bb)
380 merge1_bb
381 loop2
382 loop2_exit_bb
383 merge2_bb
384 next_bb
385
386 For each name updated between loop iterations (i.e - for each name that has
387 an entry (loop-header) phi in LOOP) we create a new phi in:
388 1. merge1_bb (to account for the edge from guard1)
389 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
390*/
391
392static void
393slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
c334023f 394 bool is_new_loop, basic_block *new_exit_bb)
ebfd146a 395{
538dd0b7
DM
396 gphi *orig_phi, *new_phi;
397 gphi *update_phi, *update_phi2;
ebfd146a
IR
398 tree guard_arg, loop_arg;
399 basic_block new_merge_bb = guard_edge->dest;
400 edge e = EDGE_SUCC (new_merge_bb, 0);
401 basic_block update_bb = e->dest;
402 basic_block orig_bb = loop->header;
403 edge new_exit_e;
404 tree current_new_name;
538dd0b7 405 gphi_iterator gsi_orig, gsi_update;
ebfd146a
IR
406
407 /* Create new bb between loop and new_merge_bb. */
408 *new_exit_bb = split_edge (single_exit (loop));
409
410 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
411
412 for (gsi_orig = gsi_start_phis (orig_bb),
413 gsi_update = gsi_start_phis (update_bb);
414 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
415 gsi_next (&gsi_orig), gsi_next (&gsi_update))
416 {
e20f6b4b 417 source_location loop_locus, guard_locus;
070ecdfd 418 tree new_res;
538dd0b7
DM
419 orig_phi = gsi_orig.phi ();
420 update_phi = gsi_update.phi ();
ebfd146a 421
ebfd146a
IR
422 /** 1. Handle new-merge-point phis **/
423
424 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 425 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 426 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
427
428 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
429 of LOOP. Set the two phi args in NEW_PHI for these edges: */
430 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
b8698a0f
L
431 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
432 EDGE_SUCC (loop->latch,
f5045c96 433 0));
ebfd146a 434 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
b8698a0f
L
435 guard_locus
436 = gimple_phi_arg_location_from_edge (orig_phi,
f5045c96 437 loop_preheader_edge (loop));
ebfd146a 438
9e227d60
DC
439 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
440 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
ebfd146a
IR
441
442 /* 1.3. Update phi in successor block. */
443 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
444 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
684f25f4 445 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
446 update_phi2 = new_phi;
447
448
449 /** 2. Handle loop-closed-ssa-form phis **/
450
ea057359 451 if (virtual_operand_p (PHI_RESULT (orig_phi)))
ebfd146a
IR
452 continue;
453
454 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 455 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 456 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
457
458 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 459 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
ebfd146a
IR
460
461 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
462 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
463 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
464 PHI_RESULT (new_phi));
ebfd146a
IR
465
466 /* 2.4. Record the newly created name with set_current_def.
467 We want to find a name such that
468 name = get_current_def (orig_loop_name)
469 and to set its current definition as follows:
470 set_current_def (name, new_phi_name)
471
472 If LOOP is a new loop then loop_arg is already the name we're
473 looking for. If LOOP is the original loop, then loop_arg is
474 the orig_loop_name and the relevant name is recorded in its
475 current reaching definition. */
476 if (is_new_loop)
477 current_new_name = loop_arg;
478 else
479 {
480 current_new_name = get_current_def (loop_arg);
481 /* current_def is not available only if the variable does not
482 change inside the loop, in which case we also don't care
483 about recording a current_def for it because we won't be
484 trying to create loop-exit-phis for it. */
485 if (!current_new_name)
486 continue;
487 }
39719c84
JJ
488 tree new_name = get_current_def (current_new_name);
489 /* Because of peeled_chrec optimization it is possible that we have
490 set this earlier. Verify the PHI has the same value. */
491 if (new_name)
492 {
355fe088 493 gimple *phi = SSA_NAME_DEF_STMT (new_name);
39719c84
JJ
494 gcc_assert (gimple_code (phi) == GIMPLE_PHI
495 && gimple_bb (phi) == *new_exit_bb
496 && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
497 == loop_arg));
498 continue;
499 }
ebfd146a
IR
500
501 set_current_def (current_new_name, PHI_RESULT (new_phi));
ebfd146a
IR
502 }
503}
504
505
506/* Function slpeel_update_phi_nodes_for_guard2
507
508 Input:
509 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
510
511 In the context of the overall structure, we have:
512
b8698a0f 513 loop1_preheader_bb:
ebfd146a
IR
514 guard1 (goto loop1/merge1_bb)
515 loop1
b8698a0f 516 loop1_exit_bb:
ebfd146a
IR
517 guard2 (goto merge1_bb/merge2_bb)
518 merge1_bb
519LOOP-> loop2
520 loop2_exit_bb
521 merge2_bb
522 next_bb
523
524 For each name used out side the loop (i.e - for each name that has an exit
525 phi in next_bb) we create a new phi in:
b8698a0f 526 1. merge2_bb (to account for the edge from guard_bb)
ebfd146a
IR
527 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
528 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
529 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
530*/
531
532static void
533slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
534 bool is_new_loop, basic_block *new_exit_bb)
535{
538dd0b7
DM
536 gphi *orig_phi, *new_phi;
537 gphi *update_phi, *update_phi2;
ebfd146a
IR
538 tree guard_arg, loop_arg;
539 basic_block new_merge_bb = guard_edge->dest;
540 edge e = EDGE_SUCC (new_merge_bb, 0);
541 basic_block update_bb = e->dest;
542 edge new_exit_e;
543 tree orig_def, orig_def_new_name;
544 tree new_name, new_name2;
545 tree arg;
538dd0b7 546 gphi_iterator gsi;
ebfd146a
IR
547
548 /* Create new bb between loop and new_merge_bb. */
549 *new_exit_bb = split_edge (single_exit (loop));
550
551 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
552
553 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
554 {
070ecdfd 555 tree new_res;
538dd0b7 556 update_phi = gsi.phi ();
ebfd146a
IR
557 orig_phi = update_phi;
558 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
559 /* This loop-closed-phi actually doesn't represent a use
b8698a0f 560 out of the loop - the phi arg is a constant. */
ebfd146a
IR
561 if (TREE_CODE (orig_def) != SSA_NAME)
562 continue;
563 orig_def_new_name = get_current_def (orig_def);
564 arg = NULL_TREE;
565
566 /** 1. Handle new-merge-point phis **/
567
568 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 569 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 570 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
571
572 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
573 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
574 new_name = orig_def;
575 new_name2 = NULL_TREE;
576 if (orig_def_new_name)
577 {
578 new_name = orig_def_new_name;
579 /* Some variables have both loop-entry-phis and loop-exit-phis.
580 Such variables were given yet newer names by phis placed in
581 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
582 new_name2 = get_current_def (get_current_def (orig_name)). */
583 new_name2 = get_current_def (new_name);
584 }
b8698a0f 585
ebfd146a
IR
586 if (is_new_loop)
587 {
588 guard_arg = orig_def;
589 loop_arg = new_name;
590 }
591 else
592 {
593 guard_arg = new_name;
594 loop_arg = orig_def;
595 }
596 if (new_name2)
597 guard_arg = new_name2;
b8698a0f 598
9e227d60
DC
599 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
600 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
ebfd146a
IR
601
602 /* 1.3. Update phi in successor block. */
603 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
684f25f4 604 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
605 update_phi2 = new_phi;
606
607
608 /** 2. Handle loop-closed-ssa-form phis **/
609
18d08617
RB
610 if (virtual_operand_p (PHI_RESULT (orig_phi)))
611 continue;
612
ebfd146a 613 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 614 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 615 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
616
617 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 618 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
ebfd146a
IR
619
620 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
621 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
622 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
623 PHI_RESULT (new_phi));
ebfd146a
IR
624
625
626 /** 3. Handle loop-closed-ssa-form phis for first loop **/
627
628 /* 3.1. Find the relevant names that need an exit-phi in
629 GUARD_BB, i.e. names for which
630 slpeel_update_phi_nodes_for_guard1 had not already created a
631 phi node. This is the case for names that are used outside
632 the loop (and therefore need an exit phi) but are not updated
633 across loop iterations (and therefore don't have a
634 loop-header-phi).
635
636 slpeel_update_phi_nodes_for_guard1 is responsible for
637 creating loop-exit phis in GUARD_BB for names that have a
638 loop-header-phi. When such a phi is created we also record
639 the new name in its current definition. If this new name
640 exists, then guard_arg was set to this new name (see 1.2
641 above). Therefore, if guard_arg is not this new name, this
642 is an indication that an exit-phi in GUARD_BB was not yet
643 created, so we take care of it here. */
644 if (guard_arg == new_name2)
645 continue;
646 arg = guard_arg;
647
648 /* 3.2. Generate new phi node in GUARD_BB: */
b731b390 649 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 650 new_phi = create_phi_node (new_res, guard_edge->src);
ebfd146a
IR
651
652 /* 3.3. GUARD_BB has one incoming edge: */
653 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
b8698a0f 654 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
9e227d60 655 UNKNOWN_LOCATION);
ebfd146a
IR
656
657 /* 3.4. Update phi in successor of GUARD_BB: */
658 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
659 == guard_arg);
684f25f4
AO
660 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
661 PHI_RESULT (new_phi));
ebfd146a
IR
662 }
663}
664
665
666/* Make the LOOP iterate NITERS times. This is done by adding a new IV
667 that starts at zero, increases by one and its limit is NITERS.
668
669 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
670
671void
672slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
673{
674 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
675 gcond *cond_stmt;
676 gcond *orig_cond;
ebfd146a
IR
677 edge exit_edge = single_exit (loop);
678 gimple_stmt_iterator loop_cond_gsi;
679 gimple_stmt_iterator incr_gsi;
680 bool insert_after;
681 tree init = build_int_cst (TREE_TYPE (niters), 0);
682 tree step = build_int_cst (TREE_TYPE (niters), 1);
b05e0233 683 source_location loop_loc;
ebfd146a
IR
684 enum tree_code code;
685
686 orig_cond = get_loop_exit_condition (loop);
687 gcc_assert (orig_cond);
688 loop_cond_gsi = gsi_for_stmt (orig_cond);
689
690 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
691 create_iv (init, step, NULL_TREE, loop,
692 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
693
694 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
695 true, NULL_TREE, true,
696 GSI_SAME_STMT);
697 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
698 true, GSI_SAME_STMT);
699
700 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
701 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
702 NULL_TREE);
703
704 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
705
706 /* Remove old loop exit test: */
707 gsi_remove (&loop_cond_gsi, true);
6f723d33 708 free_stmt_vec_info (orig_cond);
ebfd146a
IR
709
710 loop_loc = find_loop_location (loop);
73fbfcad 711 if (dump_enabled_p ())
ebfd146a 712 {
b05e0233
RB
713 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
714 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
715 LOCATION_LINE (loop_loc));
78c60e3d 716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
ebfd146a 717 }
ebfd146a
IR
718 loop->nb_iterations = niters;
719}
720
5ce9450f
JJ
721/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
722 For all PHI arguments in FROM->dest and TO->dest from those
723 edges ensure that TO->dest PHI arguments have current_def
724 to that in from. */
725
726static void
727slpeel_duplicate_current_defs_from_edges (edge from, edge to)
728{
729 gimple_stmt_iterator gsi_from, gsi_to;
730
731 for (gsi_from = gsi_start_phis (from->dest),
732 gsi_to = gsi_start_phis (to->dest);
14ba8d6d 733 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
5ce9450f 734 {
355fe088
TS
735 gimple *from_phi = gsi_stmt (gsi_from);
736 gimple *to_phi = gsi_stmt (gsi_to);
5ce9450f 737 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
14ba8d6d
RB
738 if (TREE_CODE (from_arg) != SSA_NAME)
739 {
740 gsi_next (&gsi_from);
741 continue;
742 }
5ce9450f 743 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
14ba8d6d
RB
744 if (TREE_CODE (to_arg) != SSA_NAME)
745 {
746 gsi_next (&gsi_to);
747 continue;
748 }
749 if (get_current_def (to_arg) == NULL_TREE)
5ce9450f 750 set_current_def (to_arg, get_current_def (from_arg));
14ba8d6d
RB
751 gsi_next (&gsi_from);
752 gsi_next (&gsi_to);
5ce9450f
JJ
753 }
754}
755
ebfd146a 756
b8698a0f 757/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
758 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
759 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
760 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
761 entry or exit of LOOP. */
ebfd146a
IR
762
763struct loop *
5ce9450f
JJ
764slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
765 struct loop *scalar_loop, edge e)
ebfd146a
IR
766{
767 struct loop *new_loop;
768 basic_block *new_bbs, *bbs;
769 bool at_exit;
770 bool was_imm_dom;
b8698a0f 771 basic_block exit_dest;
ebfd146a 772 edge exit, new_exit;
a6c51a12 773 bool duplicate_outer_loop = false;
ebfd146a 774
2cfc56b9
RB
775 exit = single_exit (loop);
776 at_exit = (e == exit);
ebfd146a
IR
777 if (!at_exit && e != loop_preheader_edge (loop))
778 return NULL;
779
5ce9450f
JJ
780 if (scalar_loop == NULL)
781 scalar_loop = loop;
782
783 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
784 get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
a6c51a12
YR
785 /* Allow duplication of outer loops. */
786 if (scalar_loop->inner)
787 duplicate_outer_loop = true;
ebfd146a 788 /* Check whether duplication is possible. */
5ce9450f 789 if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
ebfd146a
IR
790 {
791 free (bbs);
792 return NULL;
793 }
794
795 /* Generate new loop structure. */
5ce9450f
JJ
796 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
797 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 798
2cfc56b9 799 exit_dest = exit->dest;
b8698a0f
L
800 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
801 exit_dest) == loop->header ?
ebfd146a
IR
802 true : false);
803
2cfc56b9
RB
804 /* Also copy the pre-header, this avoids jumping through hoops to
805 duplicate the loop entry PHI arguments. Create an empty
806 pre-header unconditionally for this. */
5ce9450f 807 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 808 edge entry_e = single_pred_edge (preheader);
5ce9450f
JJ
809 bbs[scalar_loop->num_nodes] = preheader;
810 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 811
5ce9450f
JJ
812 exit = single_exit (scalar_loop);
813 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 814 &exit, 1, &new_exit, NULL,
f14540b6 815 e->src, true);
5ce9450f
JJ
816 exit = single_exit (loop);
817 basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
ebfd146a 818
5ce9450f
JJ
819 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
820
821 if (scalar_loop != loop)
822 {
823 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
824 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
825 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
826 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
827 header) to have current_def set, so copy them over. */
828 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
829 exit);
830 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
831 0),
832 EDGE_SUCC (loop->latch, 0));
833 }
b8698a0f 834
ebfd146a
IR
835 if (at_exit) /* Add the loop copy at exit. */
836 {
5ce9450f
JJ
837 if (scalar_loop != loop)
838 {
538dd0b7 839 gphi_iterator gsi;
5ce9450f
JJ
840 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
841
842 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
843 gsi_next (&gsi))
844 {
538dd0b7 845 gphi *phi = gsi.phi ();
5ce9450f
JJ
846 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
847 location_t orig_locus
848 = gimple_phi_arg_location_from_edge (phi, e);
849
850 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
851 }
852 }
2cfc56b9
RB
853 redirect_edge_and_branch_force (e, new_preheader);
854 flush_pending_stmts (e);
855 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
a6c51a12 856 if (was_imm_dom || duplicate_outer_loop)
5ce9450f 857 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
858
859 /* And remove the non-necessary forwarder again. Keep the other
860 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
861 redirect_edge_pred (single_succ_edge (preheader),
862 single_pred (preheader));
2cfc56b9 863 delete_basic_block (preheader);
5ce9450f
JJ
864 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
865 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
866 }
867 else /* Add the copy at entry. */
868 {
5ce9450f
JJ
869 if (scalar_loop != loop)
870 {
871 /* Remove the non-necessary forwarder of scalar_loop again. */
872 redirect_edge_pred (single_succ_edge (preheader),
873 single_pred (preheader));
874 delete_basic_block (preheader);
875 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
876 loop_preheader_edge (scalar_loop)->src);
877 preheader = split_edge (loop_preheader_edge (loop));
878 entry_e = single_pred_edge (preheader);
879 }
880
2cfc56b9
RB
881 redirect_edge_and_branch_force (entry_e, new_preheader);
882 flush_pending_stmts (entry_e);
883 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
884
885 redirect_edge_and_branch_force (new_exit, preheader);
886 flush_pending_stmts (new_exit);
887 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
888
889 /* And remove the non-necessary forwarder again. Keep the other
890 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
891 redirect_edge_pred (single_succ_edge (new_preheader),
892 single_pred (new_preheader));
2cfc56b9
RB
893 delete_basic_block (new_preheader);
894 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
895 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
896 }
897
5ce9450f 898 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
a6c51a12 899 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
2cfc56b9 900
5ce9450f
JJ
901 if (scalar_loop != loop)
902 {
903 /* Update new_loop->header PHIs, so that on the preheader
904 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 905 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
906 edge orig_e = loop_preheader_edge (loop);
907 edge new_e = loop_preheader_edge (new_loop);
908
909 for (gsi_orig = gsi_start_phis (loop->header),
910 gsi_new = gsi_start_phis (new_loop->header);
911 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
912 gsi_next (&gsi_orig), gsi_next (&gsi_new))
913 {
538dd0b7
DM
914 gphi *orig_phi = gsi_orig.phi ();
915 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
916 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
917 location_t orig_locus
918 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
919
920 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
921 }
922 }
923
ebfd146a
IR
924 free (new_bbs);
925 free (bbs);
926
b2b29377 927 checking_verify_dominators (CDI_DOMINATORS);
2cfc56b9 928
ebfd146a
IR
929 return new_loop;
930}
931
932
933/* Given the condition statement COND, put it as the last statement
934 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
b8698a0f 935 Assumes that this is the single exit of the guarded loop.
86290011 936 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
ebfd146a
IR
937
938static edge
86290011
RG
939slpeel_add_loop_guard (basic_block guard_bb, tree cond,
940 gimple_seq cond_expr_stmt_list,
e78410bf
JH
941 basic_block exit_bb, basic_block dom_bb,
942 int probability)
ebfd146a
IR
943{
944 gimple_stmt_iterator gsi;
945 edge new_e, enter_e;
538dd0b7 946 gcond *cond_stmt;
ebfd146a
IR
947 gimple_seq gimplify_stmt_list = NULL;
948
949 enter_e = EDGE_SUCC (guard_bb, 0);
950 enter_e->flags &= ~EDGE_FALLTHRU;
951 enter_e->flags |= EDGE_FALSE_VALUE;
952 gsi = gsi_last_bb (guard_bb);
953
f7a06a98
RG
954 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
955 NULL_TREE);
86290011
RG
956 if (gimplify_stmt_list)
957 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
f7a06a98 958 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
86290011
RG
959 if (cond_expr_stmt_list)
960 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
ebfd146a
IR
961
962 gsi = gsi_last_bb (guard_bb);
963 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
964
965 /* Add new edge to connect guard block to the merge/loop-exit block. */
966 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
e78410bf
JH
967
968 new_e->count = guard_bb->count;
969 new_e->probability = probability;
970 new_e->count = apply_probability (enter_e->count, probability);
971 enter_e->count -= new_e->count;
972 enter_e->probability = inverse_probability (probability);
ebfd146a
IR
973 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
974 return new_e;
975}
976
977
978/* This function verifies that the following restrictions apply to LOOP:
a6c51a12
YR
979 (1) it consists of exactly 2 basic blocks - header, and an empty latch
980 for innermost loop and 5 basic blocks for outer-loop.
981 (2) it is single entry, single exit
982 (3) its exit condition is the last stmt in the header
983 (4) E is the entry/exit edge of LOOP.
ebfd146a
IR
984 */
985
986bool
987slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
988{
989 edge exit_e = single_exit (loop);
990 edge entry_e = loop_preheader_edge (loop);
538dd0b7 991 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a 992 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a6c51a12 993 unsigned int num_bb = loop->inner? 5 : 2;
ebfd146a 994
ebfd146a
IR
995 /* All loops have an outer scope; the only case loop->outer is NULL is for
996 the function itself. */
a6c51a12
YR
997 if (!loop_outer (loop)
998 || loop->num_nodes != num_bb
ebfd146a
IR
999 || !empty_block_p (loop->latch)
1000 || !single_exit (loop)
1001 /* Verify that new loop exit condition can be trivially modified. */
1002 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1003 || (e != exit_e && e != entry_e))
1004 return false;
1005
1006 return true;
1007}
1008
ebfd146a 1009static void
b2b29377
MM
1010slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop,
1011 struct loop *second_loop)
ebfd146a 1012{
b2b29377
MM
1013 if (!flag_checking)
1014 return;
1015
ebfd146a
IR
1016 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1017 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1018 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1019
1020 /* A guard that controls whether the second_loop is to be executed or skipped
1021 is placed in first_loop->exit. first_loop->exit therefore has two
1022 successors - one is the preheader of second_loop, and the other is a bb
1023 after second_loop.
1024 */
1025 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
b8698a0f 1026
ebfd146a
IR
1027 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1028 of second_loop. */
b8698a0f 1029
ebfd146a
IR
1030 /* The preheader of new_loop is expected to have two predecessors:
1031 first_loop->exit and the block that precedes first_loop. */
1032
b8698a0f 1033 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
ebfd146a
IR
1034 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1035 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1036 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1037 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
b8698a0f 1038
ebfd146a
IR
1039 /* Verify that the other successor of first_loop->exit is after the
1040 second_loop. */
1041 /* TODO */
1042}
ebfd146a
IR
1043
1044/* If the run time cost model check determines that vectorization is
1045 not profitable and hence scalar loop should be generated then set
1046 FIRST_NITERS to prologue peeled iterations. This will allow all the
1047 iterations to be executed in the prologue peeled scalar loop. */
1048
1049static void
1050set_prologue_iterations (basic_block bb_before_first_loop,
5d2eb24b 1051 tree *first_niters,
ebfd146a 1052 struct loop *loop,
e78410bf
JH
1053 unsigned int th,
1054 int probability)
ebfd146a
IR
1055{
1056 edge e;
1057 basic_block cond_bb, then_bb;
1058 tree var, prologue_after_cost_adjust_name;
1059 gimple_stmt_iterator gsi;
538dd0b7 1060 gphi *newphi;
ebfd146a 1061 edge e_true, e_false, e_fallthru;
538dd0b7 1062 gcond *cond_stmt;
f7a06a98 1063 gimple_seq stmts = NULL;
ebfd146a 1064 tree cost_pre_condition = NULL_TREE;
b8698a0f 1065 tree scalar_loop_iters =
ebfd146a
IR
1066 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1067
1068 e = single_pred_edge (bb_before_first_loop);
c3284718 1069 cond_bb = split_edge (e);
ebfd146a
IR
1070
1071 e = single_pred_edge (bb_before_first_loop);
c3284718 1072 then_bb = split_edge (e);
ebfd146a
IR
1073 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1074
1075 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1076 EDGE_FALSE_VALUE);
1077 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1078
1079 e_true = EDGE_PRED (then_bb, 0);
1080 e_true->flags &= ~EDGE_FALLTHRU;
1081 e_true->flags |= EDGE_TRUE_VALUE;
1082
e78410bf
JH
1083 e_true->probability = probability;
1084 e_false->probability = inverse_probability (probability);
1085 e_true->count = apply_probability (cond_bb->count, probability);
1086 e_false->count = cond_bb->count - e_true->count;
1087 then_bb->frequency = EDGE_FREQUENCY (e_true);
1088 then_bb->count = e_true->count;
1089
ebfd146a 1090 e_fallthru = EDGE_SUCC (then_bb, 0);
e78410bf 1091 e_fallthru->count = then_bb->count;
ebfd146a 1092
f7a06a98 1093 gsi = gsi_last_bb (cond_bb);
ebfd146a 1094 cost_pre_condition =
b8698a0f 1095 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
ebfd146a
IR
1096 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1097 cost_pre_condition =
f7a06a98
RG
1098 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1099 NULL_TREE, false, GSI_CONTINUE_LINKING);
1100 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1101 NULL_TREE, NULL_TREE);
ebfd146a 1102 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
b8698a0f 1103
ebfd146a
IR
1104 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1105 "prologue_after_cost_adjust");
b8698a0f 1106 prologue_after_cost_adjust_name =
ebfd146a
IR
1107 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1108
1109 gsi = gsi_last_bb (then_bb);
1110 if (stmts)
1111 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1112
1113 newphi = create_phi_node (var, bb_before_first_loop);
b8698a0f 1114 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
9e227d60
DC
1115 UNKNOWN_LOCATION);
1116 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
ebfd146a 1117
5d2eb24b 1118 *first_niters = PHI_RESULT (newphi);
ebfd146a
IR
1119}
1120
ebfd146a
IR
1121/* Function slpeel_tree_peel_loop_to_edge.
1122
1123 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1124 that is placed on the entry (exit) edge E of LOOP. After this transformation
1125 we have two loops one after the other - first-loop iterates FIRST_NITERS
1126 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
b8698a0f 1127 If the cost model indicates that it is profitable to emit a scalar
ebfd146a
IR
1128 loop instead of the vector one, then the prolog (epilog) loop will iterate
1129 for the entire unchanged scalar iterations of the loop.
1130
1131 Input:
1132 - LOOP: the loop to be peeled.
5ce9450f
JJ
1133 - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1134 should be copied.
ebfd146a
IR
1135 - E: the exit or entry edge of LOOP.
1136 If it is the entry edge, we peel the first iterations of LOOP. In this
1137 case first-loop is LOOP, and second-loop is the newly created loop.
1138 If it is the exit edge, we peel the last iterations of LOOP. In this
1139 case, first-loop is the newly created loop, and second-loop is LOOP.
1140 - NITERS: the number of iterations that LOOP iterates.
1141 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1142 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1143 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1144 is false, the caller of this function may want to take care of this
1145 (this can be useful if we don't want new stmts added to first-loop).
1146 - TH: cost model profitability threshold of iterations for vectorization.
1147 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1148 during versioning and hence needs to occur during
b8698a0f 1149 prologue generation or whether cost model check
ebfd146a
IR
1150 has not occurred during prologue generation and hence
1151 needs to occur during epilogue generation.
e78410bf
JH
1152 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1153 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
b8698a0f 1154
ebfd146a
IR
1155
1156 Output:
1157 The function returns a pointer to the new loop-copy, or NULL if it failed
1158 to perform the transformation.
1159
1160 The function generates two if-then-else guards: one before the first loop,
1161 and the other before the second loop:
1162 The first guard is:
1163 if (FIRST_NITERS == 0) then skip the first loop,
1164 and go directly to the second loop.
1165 The second guard is:
1166 if (FIRST_NITERS == NITERS) then skip the second loop.
1167
86290011
RG
1168 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1169 then the generated condition is combined with COND_EXPR and the
1170 statements in COND_EXPR_STMT_LIST are emitted together with it.
1171
ebfd146a
IR
1172 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1173 FORNOW the resulting code will not be in loop-closed-ssa form.
1174*/
1175
5ce9450f
JJ
1176static struct loop *
1177slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
5d2eb24b 1178 edge e, tree *first_niters,
ebfd146a 1179 tree niters, bool update_first_loop_count,
86290011 1180 unsigned int th, bool check_profitability,
e78410bf
JH
1181 tree cond_expr, gimple_seq cond_expr_stmt_list,
1182 int bound1, int bound2)
ebfd146a
IR
1183{
1184 struct loop *new_loop = NULL, *first_loop, *second_loop;
1185 edge skip_e;
1186 tree pre_condition = NULL_TREE;
ebfd146a
IR
1187 basic_block bb_before_second_loop, bb_after_second_loop;
1188 basic_block bb_before_first_loop;
1189 basic_block bb_between_loops;
1190 basic_block new_exit_bb;
538dd0b7 1191 gphi_iterator gsi;
ebfd146a 1192 edge exit_e = single_exit (loop);
b05e0233 1193 source_location loop_loc;
e78410bf
JH
1194 /* There are many aspects to how likely the first loop is going to be executed.
1195 Without histogram we can't really do good job. Simply set it to
1196 2/3, so the first loop is not reordered to the end of function and
1197 the hot path through stays short. */
1198 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1199 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1200 int probability_of_second_loop;
b8698a0f 1201
ebfd146a
IR
1202 if (!slpeel_can_duplicate_loop_p (loop, e))
1203 return NULL;
b8698a0f 1204
141310ef
RB
1205 /* We might have a queued need to update virtual SSA form. As we
1206 delete the update SSA machinery below after doing a regular
1207 incremental SSA update during loop copying make sure we don't
1208 lose that fact.
1209 ??? Needing to update virtual SSA form by renaming is unfortunate
1210 but not all of the vectorizer code inserting new loads / stores
1211 properly assigns virtual operands to those statements. */
1212 update_ssa (TODO_update_ssa_only_virtuals);
1213
e20f6b4b
JJ
1214 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1215 in the exit bb and rename all the uses after the loop. This simplifies
1216 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1217 (but normally loop closed SSA form doesn't require virtual PHIs to be
1218 in the same form). Doing this early simplifies the checking what
1219 uses should be renamed. */
1220 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1221 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 1222 {
538dd0b7 1223 gphi *phi = gsi.phi ();
e20f6b4b
JJ
1224 for (gsi = gsi_start_phis (exit_e->dest);
1225 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1226 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
1227 break;
1228 if (gsi_end_p (gsi))
1229 {
b731b390 1230 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 1231 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
1232 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1233 imm_use_iterator imm_iter;
355fe088 1234 gimple *stmt;
e20f6b4b
JJ
1235 use_operand_p use_p;
1236
9e227d60 1237 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
1238 gimple_phi_set_result (new_phi, new_vop);
1239 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
b5fd0440
YR
1240 if (stmt != new_phi
1241 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
e20f6b4b
JJ
1242 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1243 SET_USE (use_p, new_vop);
1244 }
1245 break;
1246 }
ebfd146a
IR
1247
1248 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1249 Resulting CFG would be:
1250
1251 first_loop:
1252 do {
1253 } while ...
1254
1255 second_loop:
1256 do {
1257 } while ...
1258
1259 orig_exit_bb:
1260 */
b8698a0f 1261
5ce9450f
JJ
1262 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1263 e)))
ebfd146a
IR
1264 {
1265 loop_loc = find_loop_location (loop);
78c60e3d
SS
1266 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1267 "tree_duplicate_loop_to_edge_cfg failed.\n");
ebfd146a
IR
1268 return NULL;
1269 }
b8698a0f 1270
684f25f4
AO
1271 if (MAY_HAVE_DEBUG_STMTS)
1272 {
9771b263 1273 gcc_assert (!adjust_vec.exists ());
ff4c81cc 1274 adjust_vec.create (32);
684f25f4
AO
1275 }
1276
ebfd146a
IR
1277 if (e == exit_e)
1278 {
1279 /* NEW_LOOP was placed after LOOP. */
1280 first_loop = loop;
1281 second_loop = new_loop;
1282 }
1283 else
1284 {
1285 /* NEW_LOOP was placed before LOOP. */
1286 first_loop = new_loop;
1287 second_loop = loop;
1288 }
1289
ebfd146a
IR
1290 /* 2. Add the guard code in one of the following ways:
1291
1292 2.a Add the guard that controls whether the first loop is executed.
1293 This occurs when this function is invoked for prologue or epilogue
1294 generation and when the cost model check can be done at compile time.
1295
1296 Resulting CFG would be:
1297
1298 bb_before_first_loop:
1299 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1300 GOTO first-loop
1301
1302 first_loop:
1303 do {
1304 } while ...
1305
1306 bb_before_second_loop:
1307
1308 second_loop:
1309 do {
1310 } while ...
1311
1312 orig_exit_bb:
1313
1314 2.b Add the cost model check that allows the prologue
1315 to iterate for the entire unchanged scalar
1316 iterations of the loop in the event that the cost
1317 model indicates that the scalar loop is more
1318 profitable than the vector one. This occurs when
1319 this function is invoked for prologue generation
1320 and the cost model check needs to be done at run
1321 time.
1322
1323 Resulting CFG after prologue peeling would be:
1324
1325 if (scalar_loop_iterations <= th)
1326 FIRST_NITERS = scalar_loop_iterations
1327
1328 bb_before_first_loop:
1329 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1330 GOTO first-loop
1331
1332 first_loop:
1333 do {
1334 } while ...
1335
1336 bb_before_second_loop:
1337
1338 second_loop:
1339 do {
1340 } while ...
1341
1342 orig_exit_bb:
1343
1344 2.c Add the cost model check that allows the epilogue
1345 to iterate for the entire unchanged scalar
1346 iterations of the loop in the event that the cost
1347 model indicates that the scalar loop is more
1348 profitable than the vector one. This occurs when
1349 this function is invoked for epilogue generation
1350 and the cost model check needs to be done at run
86290011
RG
1351 time. This check is combined with any pre-existing
1352 check in COND_EXPR to avoid versioning.
ebfd146a
IR
1353
1354 Resulting CFG after prologue peeling would be:
1355
1356 bb_before_first_loop:
1357 if ((scalar_loop_iterations <= th)
1358 ||
1359 FIRST_NITERS == 0) GOTO bb_before_second_loop
1360 GOTO first-loop
1361
1362 first_loop:
1363 do {
1364 } while ...
1365
1366 bb_before_second_loop:
1367
1368 second_loop:
1369 do {
1370 } while ...
1371
1372 orig_exit_bb:
1373 */
1374
1375 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
2cfc56b9
RB
1376 /* Loop copying insterted a forwarder block for us here. */
1377 bb_before_second_loop = single_exit (first_loop)->dest;
ebfd146a 1378
e78410bf
JH
1379 probability_of_second_loop = (inverse_probability (first_guard_probability)
1380 + combine_probabilities (second_guard_probability,
1381 first_guard_probability));
1382 /* Theoretically preheader edge of first loop and exit edge should have
1383 same frequencies. Loop exit probablities are however easy to get wrong.
1384 It is safer to copy value from original loop entry. */
1385 bb_before_second_loop->frequency
8ddb5a29
TJ
1386 = combine_probabilities (bb_before_first_loop->frequency,
1387 probability_of_second_loop);
e78410bf
JH
1388 bb_before_second_loop->count
1389 = apply_probability (bb_before_first_loop->count,
1390 probability_of_second_loop);
1391 single_succ_edge (bb_before_second_loop)->count
1392 = bb_before_second_loop->count;
1393
ebfd146a
IR
1394 /* Epilogue peeling. */
1395 if (!update_first_loop_count)
1396 {
95b3eff3
RB
1397 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1398 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1399 unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1400 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1401 limit = limit + 1;
1402 if (check_profitability
1403 && th > limit)
1404 limit = th;
ebfd146a 1405 pre_condition =
95b3eff3
RB
1406 fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1407 build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
86290011
RG
1408 if (cond_expr)
1409 {
1410 pre_condition =
1411 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1412 pre_condition,
1413 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1414 cond_expr));
1415 }
ebfd146a
IR
1416 }
1417
b8698a0f 1418 /* Prologue peeling. */
ebfd146a
IR
1419 else
1420 {
1421 if (check_profitability)
1422 set_prologue_iterations (bb_before_first_loop, first_niters,
e78410bf 1423 loop, th, first_guard_probability);
ebfd146a
IR
1424
1425 pre_condition =
5d2eb24b
IR
1426 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1427 build_int_cst (TREE_TYPE (*first_niters), 0));
ebfd146a
IR
1428 }
1429
1430 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
86290011 1431 cond_expr_stmt_list,
e78410bf
JH
1432 bb_before_second_loop, bb_before_first_loop,
1433 inverse_probability (first_guard_probability));
1434 scale_loop_profile (first_loop, first_guard_probability,
1435 check_profitability && (int)th > bound1 ? th : bound1);
ebfd146a
IR
1436 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1437 first_loop == new_loop,
c334023f 1438 &new_exit_bb);
ebfd146a
IR
1439
1440
1441 /* 3. Add the guard that controls whether the second loop is executed.
1442 Resulting CFG would be:
1443
1444 bb_before_first_loop:
1445 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1446 GOTO first-loop
1447
1448 first_loop:
1449 do {
1450 } while ...
1451
1452 bb_between_loops:
1453 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1454 GOTO bb_before_second_loop
1455
1456 bb_before_second_loop:
1457
1458 second_loop:
1459 do {
1460 } while ...
1461
1462 bb_after_second_loop:
1463
1464 orig_exit_bb:
1465 */
1466
1467 bb_between_loops = new_exit_bb;
1468 bb_after_second_loop = split_edge (single_exit (second_loop));
1469
b8698a0f 1470 pre_condition =
5d2eb24b 1471 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
86290011 1472 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
e78410bf
JH
1473 bb_after_second_loop, bb_before_first_loop,
1474 inverse_probability (second_guard_probability));
1475 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
ebfd146a
IR
1476 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1477 second_loop == new_loop, &new_exit_bb);
1478
1479 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1480 */
1481 if (update_first_loop_count)
5d2eb24b 1482 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
ebfd146a 1483
040d39ee
RG
1484 delete_update_ssa ();
1485
684f25f4
AO
1486 adjust_vec_debug_stmts ();
1487
ebfd146a
IR
1488 return new_loop;
1489}
1490
1491/* Function vect_get_loop_location.
1492
1493 Extract the location of the loop in the source code.
1494 If the loop is not well formed for vectorization, an estimated
1495 location is calculated.
1496 Return the loop location if succeed and NULL if not. */
1497
b05e0233 1498source_location
ebfd146a
IR
1499find_loop_location (struct loop *loop)
1500{
355fe088 1501 gimple *stmt = NULL;
ebfd146a
IR
1502 basic_block bb;
1503 gimple_stmt_iterator si;
1504
1505 if (!loop)
b05e0233 1506 return UNKNOWN_LOCATION;
ebfd146a
IR
1507
1508 stmt = get_loop_exit_condition (loop);
1509
502498d5
JJ
1510 if (stmt
1511 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1512 return gimple_location (stmt);
1513
1514 /* If we got here the loop is probably not "well formed",
1515 try to estimate the loop location */
1516
1517 if (!loop->header)
b05e0233 1518 return UNKNOWN_LOCATION;
ebfd146a
IR
1519
1520 bb = loop->header;
1521
1522 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1523 {
1524 stmt = gsi_stmt (si);
502498d5 1525 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1526 return gimple_location (stmt);
1527 }
1528
b05e0233 1529 return UNKNOWN_LOCATION;
ebfd146a
IR
1530}
1531
1532
ebfd146a
IR
1533/* Function vect_can_advance_ivs_p
1534
b8698a0f
L
1535 In case the number of iterations that LOOP iterates is unknown at compile
1536 time, an epilog loop will be generated, and the loop induction variables
1537 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
1538 the epilog loop. Here we check that the access function of the loop IVs
1539 and the expression that represents the loop bound are simple enough.
1540 These restrictions will be relaxed in the future. */
1541
b8698a0f 1542bool
ebfd146a
IR
1543vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1544{
1545 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1546 basic_block bb = loop->header;
355fe088 1547 gimple *phi;
538dd0b7 1548 gphi_iterator gsi;
ebfd146a
IR
1549
1550 /* Analyze phi functions of the loop header. */
1551
73fbfcad 1552 if (dump_enabled_p ())
e645e942 1553 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
1554 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1555 {
ebfd146a
IR
1556 tree evolution_part;
1557
538dd0b7 1558 phi = gsi.phi ();
73fbfcad 1559 if (dump_enabled_p ())
ebfd146a 1560 {
78c60e3d
SS
1561 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1562 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
1563 }
1564
1565 /* Skip virtual phi's. The data dependences that are associated with
1566 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1567
ea057359 1568 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1569 {
73fbfcad 1570 if (dump_enabled_p ())
78c60e3d 1571 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1572 "virtual phi. skip.\n");
ebfd146a
IR
1573 continue;
1574 }
1575
1576 /* Skip reduction phis. */
1577
1578 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1579 {
73fbfcad 1580 if (dump_enabled_p ())
78c60e3d 1581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1582 "reduc phi. skip.\n");
ebfd146a
IR
1583 continue;
1584 }
1585
1586 /* Analyze the evolution function. */
1587
afb119be
RB
1588 evolution_part
1589 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
1590 if (evolution_part == NULL_TREE)
1591 {
73fbfcad 1592 if (dump_enabled_p ())
afb119be 1593 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 1594 "No access function or evolution.\n");
ebfd146a
IR
1595 return false;
1596 }
b8698a0f 1597
2a93954e
AH
1598 /* FORNOW: We do not transform initial conditions of IVs
1599 which evolution functions are not invariants in the loop. */
1600
1601 if (!expr_invariant_in_loop_p (loop, evolution_part))
1602 {
1603 if (dump_enabled_p ())
1604 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1605 "evolution not invariant in loop.\n");
1606 return false;
1607 }
1608
b8698a0f 1609 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
1610 which evolution functions are a polynomial of degree >= 2. */
1611
1612 if (tree_is_chrec (evolution_part))
2a93954e
AH
1613 {
1614 if (dump_enabled_p ())
1615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1616 "evolution is chrec.\n");
1617 return false;
1618 }
ebfd146a
IR
1619 }
1620
1621 return true;
1622}
1623
1624
1625/* Function vect_update_ivs_after_vectorizer.
1626
1627 "Advance" the induction variables of LOOP to the value they should take
1628 after the execution of LOOP. This is currently necessary because the
1629 vectorizer does not handle induction variables that are used after the
1630 loop. Such a situation occurs when the last iterations of LOOP are
1631 peeled, because:
1632 1. We introduced new uses after LOOP for IVs that were not originally used
1633 after LOOP: the IVs of LOOP are now used by an epilog loop.
1634 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1635 times, whereas the loop IVs should be bumped N times.
1636
1637 Input:
1638 - LOOP - a loop that is going to be vectorized. The last few iterations
1639 of LOOP were peeled.
1640 - NITERS - the number of iterations that LOOP executes (before it is
1641 vectorized). i.e, the number of times the ivs should be bumped.
1642 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1643 coming out from LOOP on which there are uses of the LOOP ivs
1644 (this is the path from LOOP->exit to epilog_loop->preheader).
1645
1646 The new definitions of the ivs are placed in LOOP->exit.
1647 The phi args associated with the edge UPDATE_E in the bb
1648 UPDATE_E->dest are updated accordingly.
1649
1650 Assumption 1: Like the rest of the vectorizer, this function assumes
1651 a single loop exit that has a single predecessor.
1652
1653 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1654 organized in the same order.
1655
1656 Assumption 3: The access function of the ivs is simple enough (see
1657 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1658
1659 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 1660 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
1661 that leads to the epilog loop; other paths skip the epilog loop). This
1662 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1663 needs to have its phis updated.
1664 */
1665
1666static void
b8698a0f 1667vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
ebfd146a
IR
1668 edge update_e)
1669{
1670 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1671 basic_block exit_bb = single_exit (loop)->dest;
538dd0b7
DM
1672 gphi *phi, *phi1;
1673 gphi_iterator gsi, gsi1;
ebfd146a
IR
1674 basic_block update_bb = update_e->dest;
1675
47c32082 1676 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
ebfd146a
IR
1677
1678 /* Make sure there exists a single-predecessor exit bb: */
1679 gcc_assert (single_pred_p (exit_bb));
1680
1681 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1682 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1683 gsi_next (&gsi), gsi_next (&gsi1))
1684 {
ebfd146a 1685 tree init_expr;
550918ca
RG
1686 tree step_expr, off;
1687 tree type;
ebfd146a
IR
1688 tree var, ni, ni_name;
1689 gimple_stmt_iterator last_gsi;
0ac168a1 1690 stmt_vec_info stmt_info;
ebfd146a 1691
538dd0b7
DM
1692 phi = gsi.phi ();
1693 phi1 = gsi1.phi ();
73fbfcad 1694 if (dump_enabled_p ())
ebfd146a 1695 {
78c60e3d
SS
1696 dump_printf_loc (MSG_NOTE, vect_location,
1697 "vect_update_ivs_after_vectorizer: phi: ");
1698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
1699 }
1700
1701 /* Skip virtual phi's. */
ea057359 1702 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1703 {
73fbfcad 1704 if (dump_enabled_p ())
78c60e3d 1705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1706 "virtual phi. skip.\n");
ebfd146a
IR
1707 continue;
1708 }
1709
1710 /* Skip reduction phis. */
0ac168a1 1711 stmt_info = vinfo_for_stmt (phi);
c9e33662
RB
1712 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1713 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
b8698a0f 1714 {
73fbfcad 1715 if (dump_enabled_p ())
78c60e3d 1716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1717 "reduc phi. skip.\n");
ebfd146a 1718 continue;
b8698a0f 1719 }
ebfd146a 1720
0ac168a1
RG
1721 type = TREE_TYPE (gimple_phi_result (phi));
1722 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1723 step_expr = unshare_expr (step_expr);
b8698a0f 1724
ebfd146a
IR
1725 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1726 of degree >= 2 or exponential. */
0ac168a1 1727 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 1728
0ac168a1 1729 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 1730
550918ca
RG
1731 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1732 fold_convert (TREE_TYPE (step_expr), niters),
1733 step_expr);
0ac168a1 1734 if (POINTER_TYPE_P (type))
5d49b6a7 1735 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 1736 else
0ac168a1
RG
1737 ni = fold_build2 (PLUS_EXPR, type,
1738 init_expr, fold_convert (type, off));
ebfd146a 1739
0ac168a1 1740 var = create_tmp_var (type, "tmp");
ebfd146a
IR
1741
1742 last_gsi = gsi_last_bb (exit_bb);
1743 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1744 true, GSI_SAME_STMT);
b8698a0f 1745
ebfd146a 1746 /* Fix phi expressions in the successor bb. */
684f25f4 1747 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
1748 }
1749}
1750
ebfd146a
IR
1751/* Function vect_do_peeling_for_loop_bound
1752
1753 Peel the last iterations of the loop represented by LOOP_VINFO.
b8698a0f 1754 The peeled iterations form a new epilog loop. Given that the loop now
ebfd146a
IR
1755 iterates NITERS times, the new epilog loop iterates
1756 NITERS % VECTORIZATION_FACTOR times.
b8698a0f 1757
d9157f15
IE
1758 If CHECK_PROFITABILITY is 1 then profitability check is generated
1759 using TH as a cost model profitability threshold of iterations for
1760 vectorization.
1761
b8698a0f 1762 The original loop will later be made to iterate
86290011
RG
1763 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1764
1765 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1766 test. */
ebfd146a 1767
b8698a0f 1768void
f3c92486
RB
1769vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1770 tree ni_name, tree ratio_mult_vf_name,
368117e8 1771 unsigned int th, bool check_profitability)
ebfd146a 1772{
ebfd146a 1773 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 1774 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a
IR
1775 struct loop *new_loop;
1776 edge update_e;
1777 basic_block preheader;
1778 int loop_num;
d68d56b5 1779 int max_iter;
368117e8
RG
1780 tree cond_expr = NULL_TREE;
1781 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 1782
73fbfcad 1783 if (dump_enabled_p ())
ccb3ad87 1784 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1785 "=== vect_do_peeling_for_loop_bound ===\n");
ebfd146a
IR
1786
1787 initialize_original_copy_tables ();
1788
b8698a0f 1789 loop_num = loop->num;
ebfd146a 1790
5ce9450f
JJ
1791 new_loop
1792 = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1793 &ratio_mult_vf_name, ni_name, false,
1794 th, check_profitability,
1795 cond_expr, cond_expr_stmt_list,
1796 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
ebfd146a
IR
1797 gcc_assert (new_loop);
1798 gcc_assert (loop_num == loop->num);
b2b29377 1799 slpeel_checking_verify_cfg_after_peeling (loop, new_loop);
ebfd146a
IR
1800
1801 /* A guard that controls whether the new_loop is to be executed or skipped
1802 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1803 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1804 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1805 is on the path where the LOOP IVs are used and need to be updated. */
1806
1807 preheader = loop_preheader_edge (new_loop)->src;
1808 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1809 update_e = EDGE_PRED (preheader, 0);
1810 else
1811 update_e = EDGE_PRED (preheader, 1);
1812
b8698a0f 1813 /* Update IVs of original loop as if they were advanced
ebfd146a 1814 by ratio_mult_vf_name steps. */
b8698a0f 1815 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
ebfd146a 1816
22458c5a
JH
1817 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1818 and this means N-2 loopback edge executions.
1819
1820 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1821 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1822 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1823 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1824 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
368117e8 1825 if (check_profitability)
22458c5a 1826 max_iter = MAX (max_iter, (int) th - 1);
807e902e 1827 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 1828 dump_printf (MSG_NOTE,
78c60e3d
SS
1829 "Setting upper bound of nb iterations for epilogue "
1830 "loop to %d\n", max_iter);
7d5a99f4 1831
ebfd146a
IR
1832 /* After peeling we have to reset scalar evolution analyzer. */
1833 scev_reset ();
1834
1835 free_original_copy_tables ();
1836}
1837
1838
1839/* Function vect_gen_niters_for_prolog_loop
1840
1841 Set the number of iterations for the loop represented by LOOP_VINFO
1842 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1843 and the misalignment of DR - the data reference recorded in
b8698a0f 1844 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
ebfd146a
IR
1845 this loop, the data reference DR will refer to an aligned location.
1846
1847 The following computation is generated:
1848
1849 If the misalignment of DR is known at compile time:
1850 addr_mis = int mis = DR_MISALIGNMENT (dr);
1851 Else, compute address misalignment in bytes:
5aea1e76 1852 addr_mis = addr & (vectype_align - 1)
ebfd146a
IR
1853
1854 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1855
1856 (elem_size = element type size; an element is the scalar element whose type
1857 is the inner type of the vectype)
1858
1859 When the step of the data-ref in the loop is not 1 (as in interleaved data
1860 and SLP), the number of iterations of the prolog must be divided by the step
1861 (which is equal to the size of interleaved group).
1862
1863 The above formulas assume that VF == number of elements in the vector. This
1864 may not hold when there are multiple-types in the loop.
1865 In this case, for some data-references in the loop the VF does not represent
1866 the number of elements that fit in the vector. Therefore, instead of VF we
1867 use TYPE_VECTOR_SUBPARTS. */
1868
b8698a0f 1869static tree
e78410bf 1870vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
ebfd146a
IR
1871{
1872 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1873 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1874 tree var;
1875 gimple_seq stmts;
1876 tree iters, iters_name;
1877 edge pe;
1878 basic_block new_bb;
355fe088 1879 gimple *dr_stmt = DR_STMT (dr);
ebfd146a
IR
1880 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1881 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1882 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1883 tree niters_type = TREE_TYPE (loop_niters);
ebfd146a
IR
1884 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1885
b8698a0f 1886 pe = loop_preheader_edge (loop);
ebfd146a 1887
15e693cc 1888 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 1889 {
15e693cc 1890 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 1891
73fbfcad 1892 if (dump_enabled_p ())
ccb3ad87 1893 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1894 "known peeling = %d.\n", npeel);
ebfd146a 1895
720f5239 1896 iters = build_int_cst (niters_type, npeel);
15e693cc 1897 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
1898 }
1899 else
1900 {
1901 gimple_seq new_stmts = NULL;
d8ba5b19
RG
1902 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1903 tree offset = negative
5fa79de8 1904 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
b8698a0f 1905 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
d8ba5b19 1906 &new_stmts, offset, loop);
96f9265a 1907 tree type = unsigned_type_for (TREE_TYPE (start_addr));
5aea1e76
UW
1908 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1909 HOST_WIDE_INT elem_size =
1910 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1911 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
ebfd146a
IR
1912 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1913 tree nelements_tree = build_int_cst (type, nelements);
1914 tree byte_misalign;
1915 tree elem_misalign;
1916
1917 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1918 gcc_assert (!new_bb);
b8698a0f 1919
5aea1e76 1920 /* Create: byte_misalign = addr & (vectype_align - 1) */
b8698a0f 1921 byte_misalign =
720f5239 1922 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
5aea1e76 1923 vectype_align_minus_1);
b8698a0f 1924
ebfd146a
IR
1925 /* Create: elem_misalign = byte_misalign / element_size */
1926 elem_misalign =
1927 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1928
1929 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
d8ba5b19
RG
1930 if (negative)
1931 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1932 else
1933 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
ebfd146a
IR
1934 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1935 iters = fold_convert (niters_type, iters);
e78410bf 1936 *bound = nelements;
ebfd146a
IR
1937 }
1938
1939 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1940 /* If the loop bound is known at compile time we already verified that it is
1941 greater than vf; since the misalignment ('iters') is at most vf, there's
1942 no need to generate the MIN_EXPR in this case. */
1943 if (TREE_CODE (loop_niters) != INTEGER_CST)
1944 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1945
73fbfcad 1946 if (dump_enabled_p ())
ebfd146a 1947 {
ccb3ad87 1948 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1949 "niters for prolog loop: ");
ccb3ad87 1950 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1951 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1952 }
1953
1954 var = create_tmp_var (niters_type, "prolog_loop_niters");
ebfd146a
IR
1955 stmts = NULL;
1956 iters_name = force_gimple_operand (iters, &stmts, false, var);
1957
1958 /* Insert stmt on loop preheader edge. */
1959 if (stmts)
1960 {
1961 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1962 gcc_assert (!new_bb);
1963 }
1964
b8698a0f 1965 return iters_name;
ebfd146a
IR
1966}
1967
1968
1969/* Function vect_update_init_of_dr
1970
1971 NITERS iterations were peeled from LOOP. DR represents a data reference
1972 in LOOP. This function updates the information recorded in DR to
b8698a0f 1973 account for the fact that the first NITERS iterations had already been
ebfd146a
IR
1974 executed. Specifically, it updates the OFFSET field of DR. */
1975
1976static void
1977vect_update_init_of_dr (struct data_reference *dr, tree niters)
1978{
1979 tree offset = DR_OFFSET (dr);
b8698a0f 1980
ebfd146a
IR
1981 niters = fold_build2 (MULT_EXPR, sizetype,
1982 fold_convert (sizetype, niters),
1983 fold_convert (sizetype, DR_STEP (dr)));
587aa063
RG
1984 offset = fold_build2 (PLUS_EXPR, sizetype,
1985 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1986 DR_OFFSET (dr) = offset;
1987}
1988
1989
1990/* Function vect_update_inits_of_drs
1991
b8698a0f
L
1992 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1993 This function updates the information recorded for the data references in
1994 the loop to account for the fact that the first NITERS iterations had
ebfd146a
IR
1995 already been executed. Specifically, it updates the initial_condition of
1996 the access_function of all the data_references in the loop. */
1997
1998static void
1999vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2000{
2001 unsigned int i;
9771b263 2002 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 2003 struct data_reference *dr;
78c60e3d 2004
73fbfcad 2005 if (dump_enabled_p ())
ccb3ad87 2006 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2007 "=== vect_update_inits_of_dr ===\n");
ebfd146a 2008
9771b263 2009 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
2010 vect_update_init_of_dr (dr, niters);
2011}
2012
2013
2014/* Function vect_do_peeling_for_alignment
2015
2016 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2017 'niters' is set to the misalignment of one of the data references in the
2018 loop, thereby forcing it to refer to an aligned location at the beginning
2019 of the execution of this loop. The data reference for which we are
d9157f15
IE
2020 peeling is recorded in LOOP_VINFO_UNALIGNED_DR.
2021
2022 If CHECK_PROFITABILITY is 1 then profitability check is generated
2023 using TH as a cost model profitability threshold of iterations for
2024 vectorization. */
ebfd146a
IR
2025
2026void
f3c92486 2027vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
368117e8 2028 unsigned int th, bool check_profitability)
ebfd146a
IR
2029{
2030 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 2031 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
f3c92486 2032 tree niters_of_prolog_loop;
b61b1f17 2033 tree wide_prolog_niters;
ebfd146a 2034 struct loop *new_loop;
03fd03d5 2035 int max_iter;
e78410bf 2036 int bound = 0;
ebfd146a 2037
73fbfcad 2038 if (dump_enabled_p ())
9cc1fb4b
XDL
2039 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2040 "loop peeled for vectorization to enhance"
2041 " alignment\n");
ebfd146a
IR
2042
2043 initialize_original_copy_tables ();
2044
f3c92486
RB
2045 gimple_seq stmts = NULL;
2046 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5d2eb24b 2047 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
e78410bf
JH
2048 ni_name,
2049 &bound);
ebfd146a 2050
ebfd146a
IR
2051 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2052 new_loop =
5ce9450f
JJ
2053 slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2054 loop_preheader_edge (loop),
5d2eb24b 2055 &niters_of_prolog_loop, ni_name, true,
e78410bf 2056 th, check_profitability, NULL_TREE, NULL,
5ce9450f 2057 bound, 0);
ebfd146a
IR
2058
2059 gcc_assert (new_loop);
b2b29377 2060 slpeel_checking_verify_cfg_after_peeling (new_loop, loop);
22458c5a
JH
2061 /* For vectorization factor N, we need to copy at most N-1 values
2062 for alignment and this means N-2 loopback edge executions. */
2063 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
368117e8 2064 if (check_profitability)
22458c5a 2065 max_iter = MAX (max_iter, (int) th - 1);
807e902e 2066 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 2067 dump_printf (MSG_NOTE,
78c60e3d
SS
2068 "Setting upper bound of nb iterations for prologue "
2069 "loop to %d\n", max_iter);
ebfd146a
IR
2070
2071 /* Update number of times loop executes. */
ebfd146a 2072 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
15e693cc 2073 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
95b3eff3
RB
2074 LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2075 TREE_TYPE (ni_name),
2076 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
ebfd146a 2077
5d2eb24b
IR
2078 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2079 wide_prolog_niters = niters_of_prolog_loop;
2080 else
2081 {
2082 gimple_seq seq = NULL;
2083 edge pe = loop_preheader_edge (loop);
2084 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2085 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
5d2eb24b
IR
2086 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2087 var);
2088 if (seq)
2089 {
2090 /* Insert stmt on loop preheader edge. */
2091 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2092 gcc_assert (!new_bb);
2093 }
2094 }
2095
ebfd146a 2096 /* Update the init conditions of the access functions of all data refs. */
b61b1f17 2097 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
ebfd146a
IR
2098
2099 /* After peeling we have to reset scalar evolution analyzer. */
2100 scev_reset ();
2101
2102 free_original_copy_tables ();
2103}
2104
01d32b2b
BC
2105/* Function vect_create_cond_for_niters_checks.
2106
2107 Create a conditional expression that represents the run-time checks for
2108 loop's niter. The loop is guaranteed to to terminate if the run-time
2109 checks hold.
2110
2111 Input:
2112 COND_EXPR - input conditional expression. New conditions will be chained
2113 with logical AND operation. If it is NULL, then the function
2114 is used to return the number of alias checks.
2115 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2116 to be checked.
2117
2118 Output:
2119 COND_EXPR - conditional expression.
2120
2121 The returned COND_EXPR is the conditional expression to be used in the
2122 if statement that controls which version of the loop gets executed at
2123 runtime. */
2124
2125static void
2126vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2127{
2128 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2129
2130 if (*cond_expr)
2131 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2132 *cond_expr, part_cond_expr);
2133 else
2134 *cond_expr = part_cond_expr;
2135}
ebfd146a
IR
2136
2137/* Function vect_create_cond_for_align_checks.
2138
2139 Create a conditional expression that represents the alignment checks for
2140 all of data references (array element references) whose alignment must be
2141 checked at runtime.
2142
2143 Input:
2144 COND_EXPR - input conditional expression. New conditions will be chained
2145 with logical AND operation.
2146 LOOP_VINFO - two fields of the loop information are used.
2147 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2148 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2149
2150 Output:
2151 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2152 expression.
2153 The returned value is the conditional expression to be used in the if
2154 statement that controls which version of the loop gets executed at runtime.
2155
2156 The algorithm makes two assumptions:
2157 1) The number of bytes "n" in a vector is a power of 2.
2158 2) An address "a" is aligned if a%n is zero and that this
2159 test can be done as a&(n-1) == 0. For example, for 16
2160 byte vectors the test is a&0xf == 0. */
2161
2162static void
2163vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2164 tree *cond_expr,
2165 gimple_seq *cond_expr_stmt_list)
2166{
2167 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
355fe088 2168 vec<gimple *> may_misalign_stmts
ebfd146a 2169 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
355fe088 2170 gimple *ref_stmt;
ebfd146a
IR
2171 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2172 tree mask_cst;
2173 unsigned int i;
ebfd146a
IR
2174 tree int_ptrsize_type;
2175 char tmp_name[20];
2176 tree or_tmp_name = NULL_TREE;
83d5977e 2177 tree and_tmp_name;
355fe088 2178 gimple *and_stmt;
ebfd146a
IR
2179 tree ptrsize_zero;
2180 tree part_cond_expr;
2181
2182 /* Check that mask is one less than a power of 2, i.e., mask is
2183 all zeros followed by all ones. */
2184 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2185
96f9265a 2186 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
2187
2188 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2189 of the first vector of the i'th data reference. */
2190
9771b263 2191 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2192 {
2193 gimple_seq new_stmt_list = NULL;
2194 tree addr_base;
83d5977e
RG
2195 tree addr_tmp_name;
2196 tree new_or_tmp_name;
355fe088 2197 gimple *addr_stmt, *or_stmt;
d8ba5b19
RG
2198 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2199 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2200 bool negative = tree_int_cst_compare
2201 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2202 tree offset = negative
aad83b7c 2203 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
ebfd146a
IR
2204
2205 /* create: addr_tmp = (int)(address_of_first_vector) */
2206 addr_base =
2207 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
d8ba5b19 2208 offset, loop);
ebfd146a
IR
2209 if (new_stmt_list != NULL)
2210 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2211
83d5977e
RG
2212 sprintf (tmp_name, "addr2int%d", i);
2213 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2214 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2215 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2216
2217 /* The addresses are OR together. */
2218
2219 if (or_tmp_name != NULL_TREE)
2220 {
2221 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2222 sprintf (tmp_name, "orptrs%d", i);
2223 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2224 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2225 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2226 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2227 or_tmp_name = new_or_tmp_name;
2228 }
2229 else
2230 or_tmp_name = addr_tmp_name;
2231
2232 } /* end for i */
2233
2234 mask_cst = build_int_cst (int_ptrsize_type, mask);
2235
2236 /* create: and_tmp = or_tmp & mask */
83d5977e 2237 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2238
0d0e4a03
JJ
2239 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2240 or_tmp_name, mask_cst);
ebfd146a
IR
2241 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2242
2243 /* Make and_tmp the left operand of the conditional test against zero.
2244 if and_tmp has a nonzero bit then some address is unaligned. */
2245 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2246 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2247 and_tmp_name, ptrsize_zero);
2248 if (*cond_expr)
2249 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2250 *cond_expr, part_cond_expr);
2251 else
2252 *cond_expr = part_cond_expr;
2253}
2254
ebfd146a
IR
2255/* Function vect_create_cond_for_alias_checks.
2256
2257 Create a conditional expression that represents the run-time checks for
2258 overlapping of address ranges represented by a list of data references
2259 relations passed as input.
2260
2261 Input:
2262 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2263 with logical AND operation. If it is NULL, then the function
2264 is used to return the number of alias checks.
ebfd146a
IR
2265 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2266 to be checked.
2267
2268 Output:
2269 COND_EXPR - conditional expression.
ebfd146a 2270
a05a89fa 2271 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2272 statement that controls which version of the loop gets executed at runtime.
2273*/
2274
a05a89fa 2275void
4bdd44c4 2276vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2277{
93bdc3ed 2278 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa
CH
2279 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2280 tree part_cond_expr;
ebfd146a
IR
2281
2282 /* Create expression
36fc3799
RS
2283 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2284 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
b8698a0f 2285 &&
ebfd146a
IR
2286 ...
2287 &&
36fc3799
RS
2288 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2289 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
ebfd146a 2290
a05a89fa 2291 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2292 return;
2293
a05a89fa 2294 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
ebfd146a 2295 {
93bdc3ed
CH
2296 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2297 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
a05a89fa
CH
2298 tree segment_length_a = dr_a.seg_len;
2299 tree segment_length_b = dr_b.seg_len;
97816be4
BC
2300 tree addr_base_a = DR_BASE_ADDRESS (dr_a.dr);
2301 tree addr_base_b = DR_BASE_ADDRESS (dr_b.dr);
2302 tree offset_a = DR_OFFSET (dr_a.dr), offset_b = DR_OFFSET (dr_b.dr);
2303
2304 offset_a = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_a),
2305 offset_a, DR_INIT (dr_a.dr));
2306 offset_b = fold_build2 (PLUS_EXPR, TREE_TYPE (offset_b),
2307 offset_b, DR_INIT (dr_b.dr));
2308 addr_base_a = fold_build_pointer_plus (addr_base_a, offset_a);
2309 addr_base_b = fold_build_pointer_plus (addr_base_b, offset_b);
ebfd146a 2310
73fbfcad 2311 if (dump_enabled_p ())
ebfd146a 2312 {
ccb3ad87 2313 dump_printf_loc (MSG_NOTE, vect_location,
a05a89fa
CH
2314 "create runtime check for data references ");
2315 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
ccb3ad87 2316 dump_printf (MSG_NOTE, " and ");
a05a89fa
CH
2317 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2318 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
2319 }
2320
a05a89fa
CH
2321 tree seg_a_min = addr_base_a;
2322 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
82d89471
BM
2323 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2324 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2325 [a, a+12) */
a05a89fa 2326 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
82d89471
BM
2327 {
2328 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2329 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2330 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2331 }
d8ba5b19 2332
a05a89fa
CH
2333 tree seg_b_min = addr_base_b;
2334 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2335 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
82d89471
BM
2336 {
2337 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2338 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2339 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2340 }
ebfd146a 2341
b8698a0f 2342 part_cond_expr =
ebfd146a 2343 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
36fc3799
RS
2344 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2345 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
b8698a0f 2346
ebfd146a
IR
2347 if (*cond_expr)
2348 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2349 *cond_expr, part_cond_expr);
2350 else
2351 *cond_expr = part_cond_expr;
2352 }
ebfd146a 2353
73fbfcad 2354 if (dump_enabled_p ())
ccb3ad87 2355 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2356 "created %u versioning for alias checks.\n",
a05a89fa 2357 comp_alias_ddrs.length ());
ebfd146a
IR
2358}
2359
2360
2361/* Function vect_loop_versioning.
b8698a0f 2362
ebfd146a
IR
2363 If the loop has data references that may or may not be aligned or/and
2364 has data reference relations whose independence was not proven then
2365 two versions of the loop need to be generated, one which is vectorized
2366 and one which isn't. A test is then generated to control which of the
2367 loops is executed. The test checks for the alignment of all of the
2368 data references that may or may not be aligned. An additional
2369 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2370 independence was not proven. The vectorized version of loop is
2371 executed only if both alias and alignment tests are passed.
2372
ebfd146a 2373 The test generated to check which version of loop is executed
b8698a0f 2374 is modified to also check for profitability as indicated by the
d9157f15 2375 cost model threshold TH.
86290011
RG
2376
2377 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2378 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2379
2380void
368117e8
RG
2381vect_loop_versioning (loop_vec_info loop_vinfo,
2382 unsigned int th, bool check_profitability)
ebfd146a 2383{
01d32b2b 2384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
5ce9450f 2385 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2386 basic_block condition_bb;
538dd0b7
DM
2387 gphi_iterator gsi;
2388 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2389 basic_block merge_bb;
2390 basic_block new_exit_bb;
2391 edge new_exit_e, e;
538dd0b7 2392 gphi *orig_phi, *new_phi;
368117e8 2393 tree cond_expr = NULL_TREE;
d68d56b5 2394 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a
IR
2395 tree arg;
2396 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2397 gimple_seq gimplify_stmt_list = NULL;
2398 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
9cc1fb4b
XDL
2399 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2400 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
01d32b2b 2401 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
ebfd146a 2402
368117e8 2403 if (check_profitability)
01d32b2b
BC
2404 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2405 build_int_cst (TREE_TYPE (scalar_loop_iters),
2406 th));
2407
2408 if (version_niter)
2409 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2410
2411 if (cond_expr)
2412 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2413 is_gimple_condexpr, NULL_TREE);
ebfd146a 2414
9cc1fb4b 2415 if (version_align)
d68d56b5
RG
2416 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2417 &cond_expr_stmt_list);
ebfd146a 2418
9cc1fb4b 2419 if (version_alias)
4bdd44c4 2420 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
86290011 2421
d68d56b5
RG
2422 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2423 is_gimple_condexpr, NULL_TREE);
2424 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
2425
2426 initialize_original_copy_tables ();
5ce9450f
JJ
2427 if (scalar_loop)
2428 {
2429 edge scalar_e;
2430 basic_block preheader, scalar_preheader;
2431
2432 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2433 scale LOOP's frequencies instead. */
01d32b2b
BC
2434 nloop = loop_version (scalar_loop, cond_expr, &condition_bb, prob,
2435 REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
5ce9450f
JJ
2436 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2437 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2438 while we need to move it above LOOP's preheader. */
2439 e = loop_preheader_edge (loop);
2440 scalar_e = loop_preheader_edge (scalar_loop);
2441 gcc_assert (empty_block_p (e->src)
2442 && single_pred_p (e->src));
2443 gcc_assert (empty_block_p (scalar_e->src)
2444 && single_pred_p (scalar_e->src));
2445 gcc_assert (single_pred_p (condition_bb));
2446 preheader = e->src;
2447 scalar_preheader = scalar_e->src;
2448 scalar_e = find_edge (condition_bb, scalar_preheader);
2449 e = single_pred_edge (preheader);
2450 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2451 scalar_preheader);
2452 redirect_edge_and_branch_force (scalar_e, preheader);
2453 redirect_edge_and_branch_force (e, condition_bb);
2454 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2455 single_pred (condition_bb));
2456 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2457 single_pred (scalar_preheader));
2458 set_immediate_dominator (CDI_DOMINATORS, preheader,
2459 condition_bb);
2460 }
2461 else
01d32b2b
BC
2462 nloop = loop_version (loop, cond_expr, &condition_bb,
2463 prob, prob, REG_BR_PROB_BASE - prob, true);
2464
2465 if (version_niter)
2466 {
2467 /* The versioned loop could be infinite, we need to clear existing
2468 niter information which is copied from the original loop. */
2469 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2470 vect_free_loop_info_assumptions (nloop);
2471 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2472 loop_constraint_set (loop, LOOP_C_INFINITE);
2473 }
9cc1fb4b 2474
b05e0233 2475 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
2476 && dump_enabled_p ())
2477 {
2478 if (version_alias)
2479 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2480 "loop versioned for vectorization because of "
2481 "possible aliasing\n");
2482 if (version_align)
2483 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2484 "loop versioned for vectorization to enhance "
2485 "alignment\n");
2486
2487 }
c3284718 2488 free_original_copy_tables ();
ebfd146a 2489
b8698a0f 2490 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
2491 vectorization - that the loop exit block has a single predecessor.
2492 After versioning, the exit block of both loop versions is the same
2493 basic block (i.e. it has two predecessors). Just in order to simplify
2494 following transformations in the vectorizer, we fix this situation
2495 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
2496 with the proper loop-exit phis to maintain loop-closed-form.
2497 If loop versioning wasn't done from loop, but scalar_loop instead,
2498 merge_bb will have already just a single successor. */
b8698a0f 2499
ebfd146a 2500 merge_bb = single_exit (loop)->dest;
5ce9450f 2501 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 2502 {
5ce9450f
JJ
2503 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2504 new_exit_bb = split_edge (single_exit (loop));
2505 new_exit_e = single_exit (loop);
2506 e = EDGE_SUCC (new_exit_bb, 0);
2507
2508 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2509 {
2510 tree new_res;
538dd0b7 2511 orig_phi = gsi.phi ();
b731b390 2512 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
2513 new_phi = create_phi_node (new_res, new_exit_bb);
2514 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2515 add_phi_arg (new_phi, arg, new_exit_e,
2516 gimple_phi_arg_location_from_edge (orig_phi, e));
2517 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2518 }
b8698a0f 2519 }
ebfd146a
IR
2520
2521 /* End loop-exit-fixes after versioning. */
2522
d68d56b5 2523 if (cond_expr_stmt_list)
ebfd146a
IR
2524 {
2525 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 2526 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 2527 GSI_SAME_STMT);
ebfd146a 2528 }
90eb75f2 2529 update_ssa (TODO_update_ssa);
ebfd146a 2530}