]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
re PR c++/67845 (ICE on invalid use of const qualifier on x86_64-linux-gnu in merge_t...
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
c7131fb2 26#include "backend.h"
9fdcd34e 27#include "cfghooks.h"
40e23961 28#include "tree.h"
c7131fb2 29#include "gimple.h"
60393bbc 30#include "hard-reg-set.h"
c7131fb2
AM
31#include "ssa.h"
32#include "alias.h"
33#include "fold-const.h"
60393bbc 34#include "cfganal.h"
cf835838 35#include "gimple-pretty-print.h"
2fb9a547 36#include "internal-fn.h"
45b0be94 37#include "gimplify.h"
5be5c238 38#include "gimple-iterator.h"
18f429e2 39#include "gimplify-me.h"
442b4905 40#include "tree-cfg.h"
e28030cf 41#include "tree-ssa-loop-manip.h"
442b4905 42#include "tree-into-ssa.h"
7a300452 43#include "tree-ssa.h"
7ee2468b 44#include "tree-pass.h"
ebfd146a 45#include "cfgloop.h"
718f9c0f 46#include "diagnostic-core.h"
ebfd146a
IR
47#include "tree-scalar-evolution.h"
48#include "tree-vectorizer.h"
49#include "langhooks.h"
50
51/*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58/* Renames the use *OP_P. */
59
60static void
61rename_use_op (use_operand_p op_p)
62{
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77}
78
79
a6c51a12
YR
80/* Renames the variables in basic block BB. Allow renaming of PHI argumnets
81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
82 true. */
ebfd146a 83
2cfc56b9 84static void
a6c51a12 85rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
ebfd146a 86{
355fe088 87 gimple *stmt;
ebfd146a
IR
88 use_operand_p use_p;
89 ssa_op_iter iter;
90 edge e;
91 edge_iterator ei;
92 struct loop *loop = bb->loop_father;
a6c51a12
YR
93 struct loop *outer_loop = NULL;
94
95 if (rename_from_outer_loop)
96 {
97 gcc_assert (loop);
98 outer_loop = loop_outer (loop);
99 }
ebfd146a 100
538dd0b7
DM
101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
102 gsi_next (&gsi))
ebfd146a
IR
103 {
104 stmt = gsi_stmt (gsi);
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
106 rename_use_op (use_p);
107 }
108
2cfc56b9 109 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 110 {
a6c51a12
YR
111 if (!flow_bb_inside_loop_p (loop, e->src)
112 && (!rename_from_outer_loop || e->src != outer_loop->header))
ebfd146a 113 continue;
538dd0b7
DM
114 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
115 gsi_next (&gsi))
116 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
117 }
118}
119
120
a79683d5 121struct adjust_info
684f25f4
AO
122{
123 tree from, to;
124 basic_block bb;
a79683d5 125};
684f25f4 126
684f25f4
AO
127/* A stack of values to be adjusted in debug stmts. We have to
128 process them LIFO, so that the closest substitution applies. If we
129 processed them FIFO, without the stack, we might substitute uses
130 with a PHI DEF that would soon become non-dominant, and when we got
131 to the suitable one, it wouldn't have anything to substitute any
132 more. */
ff4c81cc 133static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
134
135/* Adjust any debug stmts that referenced AI->from values to use the
136 loop-closed AI->to, if the references are dominated by AI->bb and
137 not by the definition of AI->from. */
138
139static void
140adjust_debug_stmts_now (adjust_info *ai)
141{
142 basic_block bbphi = ai->bb;
143 tree orig_def = ai->from;
144 tree new_def = ai->to;
145 imm_use_iterator imm_iter;
355fe088 146 gimple *stmt;
684f25f4
AO
147 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
148
149 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
150
151 /* Adjust any debug stmts that held onto non-loop-closed
152 references. */
153 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
154 {
155 use_operand_p use_p;
156 basic_block bbuse;
157
158 if (!is_gimple_debug (stmt))
159 continue;
160
161 gcc_assert (gimple_debug_bind_p (stmt));
162
163 bbuse = gimple_bb (stmt);
164
165 if ((bbuse == bbphi
166 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
167 && !(bbuse == bbdef
168 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
169 {
170 if (new_def)
171 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
172 SET_USE (use_p, new_def);
173 else
174 {
175 gimple_debug_bind_reset_value (stmt);
176 update_stmt (stmt);
177 }
178 }
179 }
180}
181
182/* Adjust debug stmts as scheduled before. */
183
184static void
185adjust_vec_debug_stmts (void)
186{
187 if (!MAY_HAVE_DEBUG_STMTS)
188 return;
189
9771b263 190 gcc_assert (adjust_vec.exists ());
684f25f4 191
9771b263 192 while (!adjust_vec.is_empty ())
684f25f4 193 {
9771b263
DN
194 adjust_debug_stmts_now (&adjust_vec.last ());
195 adjust_vec.pop ();
684f25f4
AO
196 }
197
9771b263 198 adjust_vec.release ();
684f25f4
AO
199}
200
201/* Adjust any debug stmts that referenced FROM values to use the
202 loop-closed TO, if the references are dominated by BB and not by
203 the definition of FROM. If adjust_vec is non-NULL, adjustments
204 will be postponed until adjust_vec_debug_stmts is called. */
205
206static void
207adjust_debug_stmts (tree from, tree to, basic_block bb)
208{
209 adjust_info ai;
210
a471762f
RG
211 if (MAY_HAVE_DEBUG_STMTS
212 && TREE_CODE (from) == SSA_NAME
a52ca739 213 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 214 && ! virtual_operand_p (from))
684f25f4
AO
215 {
216 ai.from = from;
217 ai.to = to;
218 ai.bb = bb;
219
9771b263
DN
220 if (adjust_vec.exists ())
221 adjust_vec.safe_push (ai);
684f25f4
AO
222 else
223 adjust_debug_stmts_now (&ai);
224 }
225}
226
227/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
228 to adjust any debug stmts that referenced the old phi arg,
229 presumably non-loop-closed references left over from other
230 transformations. */
231
232static void
355fe088 233adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
684f25f4
AO
234{
235 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
236
237 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
238
239 if (MAY_HAVE_DEBUG_STMTS)
240 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
241 gimple_bb (update_phi));
242}
243
ebfd146a 244
ebfd146a
IR
245/* Update PHI nodes for a guard of the LOOP.
246
247 Input:
248 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
249 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
250 originates from the guard-bb, skips LOOP and reaches the (unique) exit
251 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
252 We denote this bb NEW_MERGE_BB because before the guard code was added
253 it had a single predecessor (the LOOP header), and now it became a merge
254 point of two paths - the path that ends with the LOOP exit-edge, and
255 the path that ends with GUARD_EDGE.
256 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
257 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
258
259 ===> The CFG before the guard-code was added:
260 LOOP_header_bb:
261 loop_body
262 if (exit_loop) goto update_bb
263 else goto LOOP_header_bb
264 update_bb:
265
266 ==> The CFG after the guard-code was added:
267 guard_bb:
268 if (LOOP_guard_condition) goto new_merge_bb
269 else goto LOOP_header_bb
270 LOOP_header_bb:
271 loop_body
272 if (exit_loop_condition) goto new_merge_bb
273 else goto LOOP_header_bb
274 new_merge_bb:
275 goto update_bb
276 update_bb:
277
278 ==> The CFG after this function:
279 guard_bb:
280 if (LOOP_guard_condition) goto new_merge_bb
281 else goto LOOP_header_bb
282 LOOP_header_bb:
283 loop_body
284 if (exit_loop_condition) goto new_exit_bb
285 else goto LOOP_header_bb
286 new_exit_bb:
287 new_merge_bb:
288 goto update_bb
289 update_bb:
290
291 This function:
292 1. creates and updates the relevant phi nodes to account for the new
293 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
294 1.1. Create phi nodes at NEW_MERGE_BB.
295 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
296 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
297 2. preserves loop-closed-ssa-form by creating the required phi nodes
298 at the exit of LOOP (i.e, in NEW_EXIT_BB).
299
300 There are two flavors to this function:
301
302 slpeel_update_phi_nodes_for_guard1:
303 Here the guard controls whether we enter or skip LOOP, where LOOP is a
304 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
305 for variables that have phis in the loop header.
306
307 slpeel_update_phi_nodes_for_guard2:
308 Here the guard controls whether we enter or skip LOOP, where LOOP is an
309 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
310 for variables that have phis in the loop exit.
311
312 I.E., the overall structure is:
313
314 loop1_preheader_bb:
315 guard1 (goto loop1/merge1_bb)
316 loop1
317 loop1_exit_bb:
318 guard2 (goto merge1_bb/merge2_bb)
319 merge1_bb
320 loop2
321 loop2_exit_bb
322 merge2_bb
323 next_bb
324
325 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
326 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
327 that have phis in loop1->header).
328
329 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
330 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
331 that have phis in next_bb). It also adds some of these phis to
332 loop1_exit_bb.
333
334 slpeel_update_phi_nodes_for_guard1 is always called before
335 slpeel_update_phi_nodes_for_guard2. They are both needed in order
336 to create correct data-flow and loop-closed-ssa-form.
337
338 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
339 that change between iterations of a loop (and therefore have a phi-node
340 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
b8698a0f
L
341 phis for variables that are used out of the loop (and therefore have
342 loop-closed exit phis). Some variables may be both updated between
ebfd146a
IR
343 iterations and used after the loop. This is why in loop1_exit_bb we
344 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
345 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
346
347 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
348 an original loop. i.e., we have:
349
350 orig_loop
351 guard_bb (goto LOOP/new_merge)
352 new_loop <-- LOOP
353 new_exit
354 new_merge
355 next_bb
356
357 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
358 have:
359
360 new_loop
361 guard_bb (goto LOOP/new_merge)
362 orig_loop <-- LOOP
363 new_exit
364 new_merge
365 next_bb
366
367 The SSA names defined in the original loop have a current
026c3cfd
AH
368 reaching definition that records the corresponding new ssa-name
369 used in the new duplicated loop copy.
ebfd146a
IR
370 */
371
372/* Function slpeel_update_phi_nodes_for_guard1
b8698a0f 373
ebfd146a
IR
374 Input:
375 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
376 - DEFS - a bitmap of ssa names to mark new names for which we recorded
b8698a0f
L
377 information.
378
ebfd146a
IR
379 In the context of the overall structure, we have:
380
b8698a0f 381 loop1_preheader_bb:
ebfd146a
IR
382 guard1 (goto loop1/merge1_bb)
383LOOP-> loop1
384 loop1_exit_bb:
385 guard2 (goto merge1_bb/merge2_bb)
386 merge1_bb
387 loop2
388 loop2_exit_bb
389 merge2_bb
390 next_bb
391
392 For each name updated between loop iterations (i.e - for each name that has
393 an entry (loop-header) phi in LOOP) we create a new phi in:
394 1. merge1_bb (to account for the edge from guard1)
395 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
396*/
397
398static void
399slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
c334023f 400 bool is_new_loop, basic_block *new_exit_bb)
ebfd146a 401{
538dd0b7
DM
402 gphi *orig_phi, *new_phi;
403 gphi *update_phi, *update_phi2;
ebfd146a
IR
404 tree guard_arg, loop_arg;
405 basic_block new_merge_bb = guard_edge->dest;
406 edge e = EDGE_SUCC (new_merge_bb, 0);
407 basic_block update_bb = e->dest;
408 basic_block orig_bb = loop->header;
409 edge new_exit_e;
410 tree current_new_name;
538dd0b7 411 gphi_iterator gsi_orig, gsi_update;
ebfd146a
IR
412
413 /* Create new bb between loop and new_merge_bb. */
414 *new_exit_bb = split_edge (single_exit (loop));
415
416 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
417
418 for (gsi_orig = gsi_start_phis (orig_bb),
419 gsi_update = gsi_start_phis (update_bb);
420 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
421 gsi_next (&gsi_orig), gsi_next (&gsi_update))
422 {
e20f6b4b 423 source_location loop_locus, guard_locus;
070ecdfd 424 tree new_res;
538dd0b7
DM
425 orig_phi = gsi_orig.phi ();
426 update_phi = gsi_update.phi ();
ebfd146a 427
ebfd146a
IR
428 /** 1. Handle new-merge-point phis **/
429
430 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 431 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 432 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
433
434 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
435 of LOOP. Set the two phi args in NEW_PHI for these edges: */
436 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
b8698a0f
L
437 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
438 EDGE_SUCC (loop->latch,
f5045c96 439 0));
ebfd146a 440 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
b8698a0f
L
441 guard_locus
442 = gimple_phi_arg_location_from_edge (orig_phi,
f5045c96 443 loop_preheader_edge (loop));
ebfd146a 444
9e227d60
DC
445 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
446 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
ebfd146a
IR
447
448 /* 1.3. Update phi in successor block. */
449 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
450 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
684f25f4 451 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
452 update_phi2 = new_phi;
453
454
455 /** 2. Handle loop-closed-ssa-form phis **/
456
ea057359 457 if (virtual_operand_p (PHI_RESULT (orig_phi)))
ebfd146a
IR
458 continue;
459
460 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 461 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 462 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
463
464 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 465 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
ebfd146a
IR
466
467 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
468 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
469 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
470 PHI_RESULT (new_phi));
ebfd146a
IR
471
472 /* 2.4. Record the newly created name with set_current_def.
473 We want to find a name such that
474 name = get_current_def (orig_loop_name)
475 and to set its current definition as follows:
476 set_current_def (name, new_phi_name)
477
478 If LOOP is a new loop then loop_arg is already the name we're
479 looking for. If LOOP is the original loop, then loop_arg is
480 the orig_loop_name and the relevant name is recorded in its
481 current reaching definition. */
482 if (is_new_loop)
483 current_new_name = loop_arg;
484 else
485 {
486 current_new_name = get_current_def (loop_arg);
487 /* current_def is not available only if the variable does not
488 change inside the loop, in which case we also don't care
489 about recording a current_def for it because we won't be
490 trying to create loop-exit-phis for it. */
491 if (!current_new_name)
492 continue;
493 }
39719c84
JJ
494 tree new_name = get_current_def (current_new_name);
495 /* Because of peeled_chrec optimization it is possible that we have
496 set this earlier. Verify the PHI has the same value. */
497 if (new_name)
498 {
355fe088 499 gimple *phi = SSA_NAME_DEF_STMT (new_name);
39719c84
JJ
500 gcc_assert (gimple_code (phi) == GIMPLE_PHI
501 && gimple_bb (phi) == *new_exit_bb
502 && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
503 == loop_arg));
504 continue;
505 }
ebfd146a
IR
506
507 set_current_def (current_new_name, PHI_RESULT (new_phi));
ebfd146a
IR
508 }
509}
510
511
512/* Function slpeel_update_phi_nodes_for_guard2
513
514 Input:
515 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
516
517 In the context of the overall structure, we have:
518
b8698a0f 519 loop1_preheader_bb:
ebfd146a
IR
520 guard1 (goto loop1/merge1_bb)
521 loop1
b8698a0f 522 loop1_exit_bb:
ebfd146a
IR
523 guard2 (goto merge1_bb/merge2_bb)
524 merge1_bb
525LOOP-> loop2
526 loop2_exit_bb
527 merge2_bb
528 next_bb
529
530 For each name used out side the loop (i.e - for each name that has an exit
531 phi in next_bb) we create a new phi in:
b8698a0f 532 1. merge2_bb (to account for the edge from guard_bb)
ebfd146a
IR
533 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
534 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
535 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
536*/
537
538static void
539slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
540 bool is_new_loop, basic_block *new_exit_bb)
541{
538dd0b7
DM
542 gphi *orig_phi, *new_phi;
543 gphi *update_phi, *update_phi2;
ebfd146a
IR
544 tree guard_arg, loop_arg;
545 basic_block new_merge_bb = guard_edge->dest;
546 edge e = EDGE_SUCC (new_merge_bb, 0);
547 basic_block update_bb = e->dest;
548 edge new_exit_e;
549 tree orig_def, orig_def_new_name;
550 tree new_name, new_name2;
551 tree arg;
538dd0b7 552 gphi_iterator gsi;
ebfd146a
IR
553
554 /* Create new bb between loop and new_merge_bb. */
555 *new_exit_bb = split_edge (single_exit (loop));
556
557 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
558
559 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
560 {
070ecdfd 561 tree new_res;
538dd0b7 562 update_phi = gsi.phi ();
ebfd146a
IR
563 orig_phi = update_phi;
564 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
565 /* This loop-closed-phi actually doesn't represent a use
b8698a0f 566 out of the loop - the phi arg is a constant. */
ebfd146a
IR
567 if (TREE_CODE (orig_def) != SSA_NAME)
568 continue;
569 orig_def_new_name = get_current_def (orig_def);
570 arg = NULL_TREE;
571
572 /** 1. Handle new-merge-point phis **/
573
574 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
b731b390 575 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 576 new_phi = create_phi_node (new_res, new_merge_bb);
ebfd146a
IR
577
578 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
579 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
580 new_name = orig_def;
581 new_name2 = NULL_TREE;
582 if (orig_def_new_name)
583 {
584 new_name = orig_def_new_name;
585 /* Some variables have both loop-entry-phis and loop-exit-phis.
586 Such variables were given yet newer names by phis placed in
587 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
588 new_name2 = get_current_def (get_current_def (orig_name)). */
589 new_name2 = get_current_def (new_name);
590 }
b8698a0f 591
ebfd146a
IR
592 if (is_new_loop)
593 {
594 guard_arg = orig_def;
595 loop_arg = new_name;
596 }
597 else
598 {
599 guard_arg = new_name;
600 loop_arg = orig_def;
601 }
602 if (new_name2)
603 guard_arg = new_name2;
b8698a0f 604
9e227d60
DC
605 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
606 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
ebfd146a
IR
607
608 /* 1.3. Update phi in successor block. */
609 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
684f25f4 610 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
ebfd146a
IR
611 update_phi2 = new_phi;
612
613
614 /** 2. Handle loop-closed-ssa-form phis **/
615
616 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
b731b390 617 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 618 new_phi = create_phi_node (new_res, *new_exit_bb);
ebfd146a
IR
619
620 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
9e227d60 621 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
ebfd146a
IR
622
623 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
624 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
684f25f4
AO
625 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
626 PHI_RESULT (new_phi));
ebfd146a
IR
627
628
629 /** 3. Handle loop-closed-ssa-form phis for first loop **/
630
631 /* 3.1. Find the relevant names that need an exit-phi in
632 GUARD_BB, i.e. names for which
633 slpeel_update_phi_nodes_for_guard1 had not already created a
634 phi node. This is the case for names that are used outside
635 the loop (and therefore need an exit phi) but are not updated
636 across loop iterations (and therefore don't have a
637 loop-header-phi).
638
639 slpeel_update_phi_nodes_for_guard1 is responsible for
640 creating loop-exit phis in GUARD_BB for names that have a
641 loop-header-phi. When such a phi is created we also record
642 the new name in its current definition. If this new name
643 exists, then guard_arg was set to this new name (see 1.2
644 above). Therefore, if guard_arg is not this new name, this
645 is an indication that an exit-phi in GUARD_BB was not yet
646 created, so we take care of it here. */
647 if (guard_arg == new_name2)
648 continue;
649 arg = guard_arg;
650
651 /* 3.2. Generate new phi node in GUARD_BB: */
b731b390 652 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
070ecdfd 653 new_phi = create_phi_node (new_res, guard_edge->src);
ebfd146a
IR
654
655 /* 3.3. GUARD_BB has one incoming edge: */
656 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
b8698a0f 657 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
9e227d60 658 UNKNOWN_LOCATION);
ebfd146a
IR
659
660 /* 3.4. Update phi in successor of GUARD_BB: */
661 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
662 == guard_arg);
684f25f4
AO
663 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
664 PHI_RESULT (new_phi));
ebfd146a
IR
665 }
666}
667
668
669/* Make the LOOP iterate NITERS times. This is done by adding a new IV
670 that starts at zero, increases by one and its limit is NITERS.
671
672 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
673
674void
675slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
676{
677 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
678 gcond *cond_stmt;
679 gcond *orig_cond;
ebfd146a
IR
680 edge exit_edge = single_exit (loop);
681 gimple_stmt_iterator loop_cond_gsi;
682 gimple_stmt_iterator incr_gsi;
683 bool insert_after;
684 tree init = build_int_cst (TREE_TYPE (niters), 0);
685 tree step = build_int_cst (TREE_TYPE (niters), 1);
b05e0233 686 source_location loop_loc;
ebfd146a
IR
687 enum tree_code code;
688
689 orig_cond = get_loop_exit_condition (loop);
690 gcc_assert (orig_cond);
691 loop_cond_gsi = gsi_for_stmt (orig_cond);
692
693 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
694 create_iv (init, step, NULL_TREE, loop,
695 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
696
697 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
698 true, NULL_TREE, true,
699 GSI_SAME_STMT);
700 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
701 true, GSI_SAME_STMT);
702
703 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
704 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
705 NULL_TREE);
706
707 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
708
709 /* Remove old loop exit test: */
710 gsi_remove (&loop_cond_gsi, true);
6f723d33 711 free_stmt_vec_info (orig_cond);
ebfd146a
IR
712
713 loop_loc = find_loop_location (loop);
73fbfcad 714 if (dump_enabled_p ())
ebfd146a 715 {
b05e0233
RB
716 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
717 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
718 LOCATION_LINE (loop_loc));
78c60e3d 719 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
e645e942 720 dump_printf (MSG_NOTE, "\n");
ebfd146a 721 }
ebfd146a
IR
722 loop->nb_iterations = niters;
723}
724
5ce9450f
JJ
725/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
726 For all PHI arguments in FROM->dest and TO->dest from those
727 edges ensure that TO->dest PHI arguments have current_def
728 to that in from. */
729
730static void
731slpeel_duplicate_current_defs_from_edges (edge from, edge to)
732{
733 gimple_stmt_iterator gsi_from, gsi_to;
734
735 for (gsi_from = gsi_start_phis (from->dest),
736 gsi_to = gsi_start_phis (to->dest);
737 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
738 gsi_next (&gsi_from), gsi_next (&gsi_to))
739 {
355fe088
TS
740 gimple *from_phi = gsi_stmt (gsi_from);
741 gimple *to_phi = gsi_stmt (gsi_to);
5ce9450f
JJ
742 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
743 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
744 if (TREE_CODE (from_arg) == SSA_NAME
745 && TREE_CODE (to_arg) == SSA_NAME
746 && get_current_def (to_arg) == NULL_TREE)
747 set_current_def (to_arg, get_current_def (from_arg));
748 }
749}
750
ebfd146a 751
b8698a0f 752/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
753 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
754 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
755 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
756 entry or exit of LOOP. */
ebfd146a
IR
757
758struct loop *
5ce9450f
JJ
759slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
760 struct loop *scalar_loop, edge e)
ebfd146a
IR
761{
762 struct loop *new_loop;
763 basic_block *new_bbs, *bbs;
764 bool at_exit;
765 bool was_imm_dom;
b8698a0f 766 basic_block exit_dest;
ebfd146a 767 edge exit, new_exit;
a6c51a12 768 bool duplicate_outer_loop = false;
ebfd146a 769
2cfc56b9
RB
770 exit = single_exit (loop);
771 at_exit = (e == exit);
ebfd146a
IR
772 if (!at_exit && e != loop_preheader_edge (loop))
773 return NULL;
774
5ce9450f
JJ
775 if (scalar_loop == NULL)
776 scalar_loop = loop;
777
778 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
779 get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
a6c51a12
YR
780 /* Allow duplication of outer loops. */
781 if (scalar_loop->inner)
782 duplicate_outer_loop = true;
ebfd146a 783 /* Check whether duplication is possible. */
5ce9450f 784 if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
ebfd146a
IR
785 {
786 free (bbs);
787 return NULL;
788 }
789
790 /* Generate new loop structure. */
5ce9450f
JJ
791 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
792 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 793
2cfc56b9 794 exit_dest = exit->dest;
b8698a0f
L
795 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
796 exit_dest) == loop->header ?
ebfd146a
IR
797 true : false);
798
2cfc56b9
RB
799 /* Also copy the pre-header, this avoids jumping through hoops to
800 duplicate the loop entry PHI arguments. Create an empty
801 pre-header unconditionally for this. */
5ce9450f 802 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 803 edge entry_e = single_pred_edge (preheader);
5ce9450f
JJ
804 bbs[scalar_loop->num_nodes] = preheader;
805 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 806
5ce9450f
JJ
807 exit = single_exit (scalar_loop);
808 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 809 &exit, 1, &new_exit, NULL,
f14540b6 810 e->src, true);
5ce9450f
JJ
811 exit = single_exit (loop);
812 basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
ebfd146a 813
5ce9450f
JJ
814 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
815
816 if (scalar_loop != loop)
817 {
818 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
819 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
820 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
821 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
822 header) to have current_def set, so copy them over. */
823 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
824 exit);
825 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
826 0),
827 EDGE_SUCC (loop->latch, 0));
828 }
b8698a0f 829
ebfd146a
IR
830 if (at_exit) /* Add the loop copy at exit. */
831 {
5ce9450f
JJ
832 if (scalar_loop != loop)
833 {
538dd0b7 834 gphi_iterator gsi;
5ce9450f
JJ
835 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
836
837 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
838 gsi_next (&gsi))
839 {
538dd0b7 840 gphi *phi = gsi.phi ();
5ce9450f
JJ
841 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
842 location_t orig_locus
843 = gimple_phi_arg_location_from_edge (phi, e);
844
845 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
846 }
847 }
2cfc56b9
RB
848 redirect_edge_and_branch_force (e, new_preheader);
849 flush_pending_stmts (e);
850 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
a6c51a12 851 if (was_imm_dom || duplicate_outer_loop)
5ce9450f 852 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
853
854 /* And remove the non-necessary forwarder again. Keep the other
855 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
856 redirect_edge_pred (single_succ_edge (preheader),
857 single_pred (preheader));
2cfc56b9 858 delete_basic_block (preheader);
5ce9450f
JJ
859 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
860 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
861 }
862 else /* Add the copy at entry. */
863 {
5ce9450f
JJ
864 if (scalar_loop != loop)
865 {
866 /* Remove the non-necessary forwarder of scalar_loop again. */
867 redirect_edge_pred (single_succ_edge (preheader),
868 single_pred (preheader));
869 delete_basic_block (preheader);
870 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
871 loop_preheader_edge (scalar_loop)->src);
872 preheader = split_edge (loop_preheader_edge (loop));
873 entry_e = single_pred_edge (preheader);
874 }
875
2cfc56b9
RB
876 redirect_edge_and_branch_force (entry_e, new_preheader);
877 flush_pending_stmts (entry_e);
878 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
879
880 redirect_edge_and_branch_force (new_exit, preheader);
881 flush_pending_stmts (new_exit);
882 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
883
884 /* And remove the non-necessary forwarder again. Keep the other
885 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
886 redirect_edge_pred (single_succ_edge (new_preheader),
887 single_pred (new_preheader));
2cfc56b9
RB
888 delete_basic_block (new_preheader);
889 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
890 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
891 }
892
5ce9450f 893 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
a6c51a12 894 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
2cfc56b9 895
5ce9450f
JJ
896 if (scalar_loop != loop)
897 {
898 /* Update new_loop->header PHIs, so that on the preheader
899 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 900 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
901 edge orig_e = loop_preheader_edge (loop);
902 edge new_e = loop_preheader_edge (new_loop);
903
904 for (gsi_orig = gsi_start_phis (loop->header),
905 gsi_new = gsi_start_phis (new_loop->header);
906 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
907 gsi_next (&gsi_orig), gsi_next (&gsi_new))
908 {
538dd0b7
DM
909 gphi *orig_phi = gsi_orig.phi ();
910 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
911 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
912 location_t orig_locus
913 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
914
915 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
916 }
917 }
918
ebfd146a
IR
919 free (new_bbs);
920 free (bbs);
921
b2b29377 922 checking_verify_dominators (CDI_DOMINATORS);
2cfc56b9 923
ebfd146a
IR
924 return new_loop;
925}
926
927
928/* Given the condition statement COND, put it as the last statement
929 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
b8698a0f 930 Assumes that this is the single exit of the guarded loop.
86290011 931 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
ebfd146a
IR
932
933static edge
86290011
RG
934slpeel_add_loop_guard (basic_block guard_bb, tree cond,
935 gimple_seq cond_expr_stmt_list,
e78410bf
JH
936 basic_block exit_bb, basic_block dom_bb,
937 int probability)
ebfd146a
IR
938{
939 gimple_stmt_iterator gsi;
940 edge new_e, enter_e;
538dd0b7 941 gcond *cond_stmt;
ebfd146a
IR
942 gimple_seq gimplify_stmt_list = NULL;
943
944 enter_e = EDGE_SUCC (guard_bb, 0);
945 enter_e->flags &= ~EDGE_FALLTHRU;
946 enter_e->flags |= EDGE_FALSE_VALUE;
947 gsi = gsi_last_bb (guard_bb);
948
f7a06a98
RG
949 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
950 NULL_TREE);
86290011
RG
951 if (gimplify_stmt_list)
952 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
f7a06a98 953 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
86290011
RG
954 if (cond_expr_stmt_list)
955 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
ebfd146a
IR
956
957 gsi = gsi_last_bb (guard_bb);
958 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
959
960 /* Add new edge to connect guard block to the merge/loop-exit block. */
961 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
e78410bf
JH
962
963 new_e->count = guard_bb->count;
964 new_e->probability = probability;
965 new_e->count = apply_probability (enter_e->count, probability);
966 enter_e->count -= new_e->count;
967 enter_e->probability = inverse_probability (probability);
ebfd146a
IR
968 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
969 return new_e;
970}
971
972
973/* This function verifies that the following restrictions apply to LOOP:
a6c51a12
YR
974 (1) it consists of exactly 2 basic blocks - header, and an empty latch
975 for innermost loop and 5 basic blocks for outer-loop.
976 (2) it is single entry, single exit
977 (3) its exit condition is the last stmt in the header
978 (4) E is the entry/exit edge of LOOP.
ebfd146a
IR
979 */
980
981bool
982slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
983{
984 edge exit_e = single_exit (loop);
985 edge entry_e = loop_preheader_edge (loop);
538dd0b7 986 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a 987 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a6c51a12 988 unsigned int num_bb = loop->inner? 5 : 2;
ebfd146a 989
ebfd146a
IR
990 /* All loops have an outer scope; the only case loop->outer is NULL is for
991 the function itself. */
a6c51a12
YR
992 if (!loop_outer (loop)
993 || loop->num_nodes != num_bb
ebfd146a
IR
994 || !empty_block_p (loop->latch)
995 || !single_exit (loop)
996 /* Verify that new loop exit condition can be trivially modified. */
997 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
998 || (e != exit_e && e != entry_e))
999 return false;
1000
1001 return true;
1002}
1003
ebfd146a 1004static void
b2b29377
MM
1005slpeel_checking_verify_cfg_after_peeling (struct loop *first_loop,
1006 struct loop *second_loop)
ebfd146a 1007{
b2b29377
MM
1008 if (!flag_checking)
1009 return;
1010
ebfd146a
IR
1011 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1012 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1013 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1014
1015 /* A guard that controls whether the second_loop is to be executed or skipped
1016 is placed in first_loop->exit. first_loop->exit therefore has two
1017 successors - one is the preheader of second_loop, and the other is a bb
1018 after second_loop.
1019 */
1020 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
b8698a0f 1021
ebfd146a
IR
1022 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1023 of second_loop. */
b8698a0f 1024
ebfd146a
IR
1025 /* The preheader of new_loop is expected to have two predecessors:
1026 first_loop->exit and the block that precedes first_loop. */
1027
b8698a0f 1028 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
ebfd146a
IR
1029 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1030 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1031 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1032 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
b8698a0f 1033
ebfd146a
IR
1034 /* Verify that the other successor of first_loop->exit is after the
1035 second_loop. */
1036 /* TODO */
1037}
ebfd146a
IR
1038
1039/* If the run time cost model check determines that vectorization is
1040 not profitable and hence scalar loop should be generated then set
1041 FIRST_NITERS to prologue peeled iterations. This will allow all the
1042 iterations to be executed in the prologue peeled scalar loop. */
1043
1044static void
1045set_prologue_iterations (basic_block bb_before_first_loop,
5d2eb24b 1046 tree *first_niters,
ebfd146a 1047 struct loop *loop,
e78410bf
JH
1048 unsigned int th,
1049 int probability)
ebfd146a
IR
1050{
1051 edge e;
1052 basic_block cond_bb, then_bb;
1053 tree var, prologue_after_cost_adjust_name;
1054 gimple_stmt_iterator gsi;
538dd0b7 1055 gphi *newphi;
ebfd146a 1056 edge e_true, e_false, e_fallthru;
538dd0b7 1057 gcond *cond_stmt;
f7a06a98 1058 gimple_seq stmts = NULL;
ebfd146a 1059 tree cost_pre_condition = NULL_TREE;
b8698a0f 1060 tree scalar_loop_iters =
ebfd146a
IR
1061 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1062
1063 e = single_pred_edge (bb_before_first_loop);
c3284718 1064 cond_bb = split_edge (e);
ebfd146a
IR
1065
1066 e = single_pred_edge (bb_before_first_loop);
c3284718 1067 then_bb = split_edge (e);
ebfd146a
IR
1068 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1069
1070 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1071 EDGE_FALSE_VALUE);
1072 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1073
1074 e_true = EDGE_PRED (then_bb, 0);
1075 e_true->flags &= ~EDGE_FALLTHRU;
1076 e_true->flags |= EDGE_TRUE_VALUE;
1077
e78410bf
JH
1078 e_true->probability = probability;
1079 e_false->probability = inverse_probability (probability);
1080 e_true->count = apply_probability (cond_bb->count, probability);
1081 e_false->count = cond_bb->count - e_true->count;
1082 then_bb->frequency = EDGE_FREQUENCY (e_true);
1083 then_bb->count = e_true->count;
1084
ebfd146a 1085 e_fallthru = EDGE_SUCC (then_bb, 0);
e78410bf 1086 e_fallthru->count = then_bb->count;
ebfd146a 1087
f7a06a98 1088 gsi = gsi_last_bb (cond_bb);
ebfd146a 1089 cost_pre_condition =
b8698a0f 1090 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
ebfd146a
IR
1091 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1092 cost_pre_condition =
f7a06a98
RG
1093 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1094 NULL_TREE, false, GSI_CONTINUE_LINKING);
1095 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1096 NULL_TREE, NULL_TREE);
ebfd146a 1097 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
b8698a0f 1098
ebfd146a
IR
1099 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1100 "prologue_after_cost_adjust");
b8698a0f 1101 prologue_after_cost_adjust_name =
ebfd146a
IR
1102 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1103
1104 gsi = gsi_last_bb (then_bb);
1105 if (stmts)
1106 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1107
1108 newphi = create_phi_node (var, bb_before_first_loop);
b8698a0f 1109 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
9e227d60
DC
1110 UNKNOWN_LOCATION);
1111 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
ebfd146a 1112
5d2eb24b 1113 *first_niters = PHI_RESULT (newphi);
ebfd146a
IR
1114}
1115
ebfd146a
IR
1116/* Function slpeel_tree_peel_loop_to_edge.
1117
1118 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1119 that is placed on the entry (exit) edge E of LOOP. After this transformation
1120 we have two loops one after the other - first-loop iterates FIRST_NITERS
1121 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
b8698a0f 1122 If the cost model indicates that it is profitable to emit a scalar
ebfd146a
IR
1123 loop instead of the vector one, then the prolog (epilog) loop will iterate
1124 for the entire unchanged scalar iterations of the loop.
1125
1126 Input:
1127 - LOOP: the loop to be peeled.
5ce9450f
JJ
1128 - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1129 should be copied.
ebfd146a
IR
1130 - E: the exit or entry edge of LOOP.
1131 If it is the entry edge, we peel the first iterations of LOOP. In this
1132 case first-loop is LOOP, and second-loop is the newly created loop.
1133 If it is the exit edge, we peel the last iterations of LOOP. In this
1134 case, first-loop is the newly created loop, and second-loop is LOOP.
1135 - NITERS: the number of iterations that LOOP iterates.
1136 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1137 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1138 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1139 is false, the caller of this function may want to take care of this
1140 (this can be useful if we don't want new stmts added to first-loop).
1141 - TH: cost model profitability threshold of iterations for vectorization.
1142 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1143 during versioning and hence needs to occur during
b8698a0f 1144 prologue generation or whether cost model check
ebfd146a
IR
1145 has not occurred during prologue generation and hence
1146 needs to occur during epilogue generation.
e78410bf
JH
1147 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1148 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
b8698a0f 1149
ebfd146a
IR
1150
1151 Output:
1152 The function returns a pointer to the new loop-copy, or NULL if it failed
1153 to perform the transformation.
1154
1155 The function generates two if-then-else guards: one before the first loop,
1156 and the other before the second loop:
1157 The first guard is:
1158 if (FIRST_NITERS == 0) then skip the first loop,
1159 and go directly to the second loop.
1160 The second guard is:
1161 if (FIRST_NITERS == NITERS) then skip the second loop.
1162
86290011
RG
1163 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1164 then the generated condition is combined with COND_EXPR and the
1165 statements in COND_EXPR_STMT_LIST are emitted together with it.
1166
ebfd146a
IR
1167 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1168 FORNOW the resulting code will not be in loop-closed-ssa form.
1169*/
1170
5ce9450f
JJ
1171static struct loop *
1172slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
5d2eb24b 1173 edge e, tree *first_niters,
ebfd146a 1174 tree niters, bool update_first_loop_count,
86290011 1175 unsigned int th, bool check_profitability,
e78410bf
JH
1176 tree cond_expr, gimple_seq cond_expr_stmt_list,
1177 int bound1, int bound2)
ebfd146a
IR
1178{
1179 struct loop *new_loop = NULL, *first_loop, *second_loop;
1180 edge skip_e;
1181 tree pre_condition = NULL_TREE;
ebfd146a
IR
1182 basic_block bb_before_second_loop, bb_after_second_loop;
1183 basic_block bb_before_first_loop;
1184 basic_block bb_between_loops;
1185 basic_block new_exit_bb;
538dd0b7 1186 gphi_iterator gsi;
ebfd146a 1187 edge exit_e = single_exit (loop);
b05e0233 1188 source_location loop_loc;
e78410bf
JH
1189 /* There are many aspects to how likely the first loop is going to be executed.
1190 Without histogram we can't really do good job. Simply set it to
1191 2/3, so the first loop is not reordered to the end of function and
1192 the hot path through stays short. */
1193 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1194 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1195 int probability_of_second_loop;
b8698a0f 1196
ebfd146a
IR
1197 if (!slpeel_can_duplicate_loop_p (loop, e))
1198 return NULL;
b8698a0f 1199
141310ef
RB
1200 /* We might have a queued need to update virtual SSA form. As we
1201 delete the update SSA machinery below after doing a regular
1202 incremental SSA update during loop copying make sure we don't
1203 lose that fact.
1204 ??? Needing to update virtual SSA form by renaming is unfortunate
1205 but not all of the vectorizer code inserting new loads / stores
1206 properly assigns virtual operands to those statements. */
1207 update_ssa (TODO_update_ssa_only_virtuals);
1208
e20f6b4b
JJ
1209 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1210 in the exit bb and rename all the uses after the loop. This simplifies
1211 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1212 (but normally loop closed SSA form doesn't require virtual PHIs to be
1213 in the same form). Doing this early simplifies the checking what
1214 uses should be renamed. */
1215 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1216 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 1217 {
538dd0b7 1218 gphi *phi = gsi.phi ();
e20f6b4b
JJ
1219 for (gsi = gsi_start_phis (exit_e->dest);
1220 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1221 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
1222 break;
1223 if (gsi_end_p (gsi))
1224 {
b731b390 1225 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 1226 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
1227 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1228 imm_use_iterator imm_iter;
355fe088 1229 gimple *stmt;
e20f6b4b
JJ
1230 use_operand_p use_p;
1231
9e227d60 1232 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
1233 gimple_phi_set_result (new_phi, new_vop);
1234 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
b5fd0440
YR
1235 if (stmt != new_phi
1236 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
e20f6b4b
JJ
1237 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1238 SET_USE (use_p, new_vop);
1239 }
1240 break;
1241 }
ebfd146a
IR
1242
1243 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1244 Resulting CFG would be:
1245
1246 first_loop:
1247 do {
1248 } while ...
1249
1250 second_loop:
1251 do {
1252 } while ...
1253
1254 orig_exit_bb:
1255 */
b8698a0f 1256
5ce9450f
JJ
1257 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1258 e)))
ebfd146a
IR
1259 {
1260 loop_loc = find_loop_location (loop);
78c60e3d
SS
1261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1262 "tree_duplicate_loop_to_edge_cfg failed.\n");
ebfd146a
IR
1263 return NULL;
1264 }
b8698a0f 1265
684f25f4
AO
1266 if (MAY_HAVE_DEBUG_STMTS)
1267 {
9771b263 1268 gcc_assert (!adjust_vec.exists ());
ff4c81cc 1269 adjust_vec.create (32);
684f25f4
AO
1270 }
1271
ebfd146a
IR
1272 if (e == exit_e)
1273 {
1274 /* NEW_LOOP was placed after LOOP. */
1275 first_loop = loop;
1276 second_loop = new_loop;
1277 }
1278 else
1279 {
1280 /* NEW_LOOP was placed before LOOP. */
1281 first_loop = new_loop;
1282 second_loop = loop;
1283 }
1284
ebfd146a
IR
1285 /* 2. Add the guard code in one of the following ways:
1286
1287 2.a Add the guard that controls whether the first loop is executed.
1288 This occurs when this function is invoked for prologue or epilogue
1289 generation and when the cost model check can be done at compile time.
1290
1291 Resulting CFG would be:
1292
1293 bb_before_first_loop:
1294 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1295 GOTO first-loop
1296
1297 first_loop:
1298 do {
1299 } while ...
1300
1301 bb_before_second_loop:
1302
1303 second_loop:
1304 do {
1305 } while ...
1306
1307 orig_exit_bb:
1308
1309 2.b Add the cost model check that allows the prologue
1310 to iterate for the entire unchanged scalar
1311 iterations of the loop in the event that the cost
1312 model indicates that the scalar loop is more
1313 profitable than the vector one. This occurs when
1314 this function is invoked for prologue generation
1315 and the cost model check needs to be done at run
1316 time.
1317
1318 Resulting CFG after prologue peeling would be:
1319
1320 if (scalar_loop_iterations <= th)
1321 FIRST_NITERS = scalar_loop_iterations
1322
1323 bb_before_first_loop:
1324 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1325 GOTO first-loop
1326
1327 first_loop:
1328 do {
1329 } while ...
1330
1331 bb_before_second_loop:
1332
1333 second_loop:
1334 do {
1335 } while ...
1336
1337 orig_exit_bb:
1338
1339 2.c Add the cost model check that allows the epilogue
1340 to iterate for the entire unchanged scalar
1341 iterations of the loop in the event that the cost
1342 model indicates that the scalar loop is more
1343 profitable than the vector one. This occurs when
1344 this function is invoked for epilogue generation
1345 and the cost model check needs to be done at run
86290011
RG
1346 time. This check is combined with any pre-existing
1347 check in COND_EXPR to avoid versioning.
ebfd146a
IR
1348
1349 Resulting CFG after prologue peeling would be:
1350
1351 bb_before_first_loop:
1352 if ((scalar_loop_iterations <= th)
1353 ||
1354 FIRST_NITERS == 0) GOTO bb_before_second_loop
1355 GOTO first-loop
1356
1357 first_loop:
1358 do {
1359 } while ...
1360
1361 bb_before_second_loop:
1362
1363 second_loop:
1364 do {
1365 } while ...
1366
1367 orig_exit_bb:
1368 */
1369
1370 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
2cfc56b9
RB
1371 /* Loop copying insterted a forwarder block for us here. */
1372 bb_before_second_loop = single_exit (first_loop)->dest;
ebfd146a 1373
e78410bf
JH
1374 probability_of_second_loop = (inverse_probability (first_guard_probability)
1375 + combine_probabilities (second_guard_probability,
1376 first_guard_probability));
1377 /* Theoretically preheader edge of first loop and exit edge should have
1378 same frequencies. Loop exit probablities are however easy to get wrong.
1379 It is safer to copy value from original loop entry. */
1380 bb_before_second_loop->frequency
8ddb5a29
TJ
1381 = combine_probabilities (bb_before_first_loop->frequency,
1382 probability_of_second_loop);
e78410bf
JH
1383 bb_before_second_loop->count
1384 = apply_probability (bb_before_first_loop->count,
1385 probability_of_second_loop);
1386 single_succ_edge (bb_before_second_loop)->count
1387 = bb_before_second_loop->count;
1388
ebfd146a
IR
1389 /* Epilogue peeling. */
1390 if (!update_first_loop_count)
1391 {
95b3eff3
RB
1392 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1393 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1394 unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1395 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1396 limit = limit + 1;
1397 if (check_profitability
1398 && th > limit)
1399 limit = th;
ebfd146a 1400 pre_condition =
95b3eff3
RB
1401 fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1402 build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
86290011
RG
1403 if (cond_expr)
1404 {
1405 pre_condition =
1406 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1407 pre_condition,
1408 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1409 cond_expr));
1410 }
ebfd146a
IR
1411 }
1412
b8698a0f 1413 /* Prologue peeling. */
ebfd146a
IR
1414 else
1415 {
1416 if (check_profitability)
1417 set_prologue_iterations (bb_before_first_loop, first_niters,
e78410bf 1418 loop, th, first_guard_probability);
ebfd146a
IR
1419
1420 pre_condition =
5d2eb24b
IR
1421 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1422 build_int_cst (TREE_TYPE (*first_niters), 0));
ebfd146a
IR
1423 }
1424
1425 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
86290011 1426 cond_expr_stmt_list,
e78410bf
JH
1427 bb_before_second_loop, bb_before_first_loop,
1428 inverse_probability (first_guard_probability));
1429 scale_loop_profile (first_loop, first_guard_probability,
1430 check_profitability && (int)th > bound1 ? th : bound1);
ebfd146a
IR
1431 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1432 first_loop == new_loop,
c334023f 1433 &new_exit_bb);
ebfd146a
IR
1434
1435
1436 /* 3. Add the guard that controls whether the second loop is executed.
1437 Resulting CFG would be:
1438
1439 bb_before_first_loop:
1440 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1441 GOTO first-loop
1442
1443 first_loop:
1444 do {
1445 } while ...
1446
1447 bb_between_loops:
1448 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1449 GOTO bb_before_second_loop
1450
1451 bb_before_second_loop:
1452
1453 second_loop:
1454 do {
1455 } while ...
1456
1457 bb_after_second_loop:
1458
1459 orig_exit_bb:
1460 */
1461
1462 bb_between_loops = new_exit_bb;
1463 bb_after_second_loop = split_edge (single_exit (second_loop));
1464
b8698a0f 1465 pre_condition =
5d2eb24b 1466 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
86290011 1467 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
e78410bf
JH
1468 bb_after_second_loop, bb_before_first_loop,
1469 inverse_probability (second_guard_probability));
1470 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
ebfd146a
IR
1471 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1472 second_loop == new_loop, &new_exit_bb);
1473
1474 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1475 */
1476 if (update_first_loop_count)
5d2eb24b 1477 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
ebfd146a 1478
040d39ee
RG
1479 delete_update_ssa ();
1480
684f25f4
AO
1481 adjust_vec_debug_stmts ();
1482
ebfd146a
IR
1483 return new_loop;
1484}
1485
1486/* Function vect_get_loop_location.
1487
1488 Extract the location of the loop in the source code.
1489 If the loop is not well formed for vectorization, an estimated
1490 location is calculated.
1491 Return the loop location if succeed and NULL if not. */
1492
b05e0233 1493source_location
ebfd146a
IR
1494find_loop_location (struct loop *loop)
1495{
355fe088 1496 gimple *stmt = NULL;
ebfd146a
IR
1497 basic_block bb;
1498 gimple_stmt_iterator si;
1499
1500 if (!loop)
b05e0233 1501 return UNKNOWN_LOCATION;
ebfd146a
IR
1502
1503 stmt = get_loop_exit_condition (loop);
1504
502498d5
JJ
1505 if (stmt
1506 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1507 return gimple_location (stmt);
1508
1509 /* If we got here the loop is probably not "well formed",
1510 try to estimate the loop location */
1511
1512 if (!loop->header)
b05e0233 1513 return UNKNOWN_LOCATION;
ebfd146a
IR
1514
1515 bb = loop->header;
1516
1517 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1518 {
1519 stmt = gsi_stmt (si);
502498d5 1520 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1521 return gimple_location (stmt);
1522 }
1523
b05e0233 1524 return UNKNOWN_LOCATION;
ebfd146a
IR
1525}
1526
1527
ebfd146a
IR
1528/* Function vect_can_advance_ivs_p
1529
b8698a0f
L
1530 In case the number of iterations that LOOP iterates is unknown at compile
1531 time, an epilog loop will be generated, and the loop induction variables
1532 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
1533 the epilog loop. Here we check that the access function of the loop IVs
1534 and the expression that represents the loop bound are simple enough.
1535 These restrictions will be relaxed in the future. */
1536
b8698a0f 1537bool
ebfd146a
IR
1538vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1539{
1540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1541 basic_block bb = loop->header;
355fe088 1542 gimple *phi;
538dd0b7 1543 gphi_iterator gsi;
ebfd146a
IR
1544
1545 /* Analyze phi functions of the loop header. */
1546
73fbfcad 1547 if (dump_enabled_p ())
e645e942 1548 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
1549 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1550 {
ebfd146a
IR
1551 tree evolution_part;
1552
538dd0b7 1553 phi = gsi.phi ();
73fbfcad 1554 if (dump_enabled_p ())
ebfd146a 1555 {
78c60e3d
SS
1556 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 1558 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1559 }
1560
1561 /* Skip virtual phi's. The data dependences that are associated with
1562 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1563
ea057359 1564 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1565 {
73fbfcad 1566 if (dump_enabled_p ())
78c60e3d 1567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1568 "virtual phi. skip.\n");
ebfd146a
IR
1569 continue;
1570 }
1571
1572 /* Skip reduction phis. */
1573
1574 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1575 {
73fbfcad 1576 if (dump_enabled_p ())
78c60e3d 1577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1578 "reduc phi. skip.\n");
ebfd146a
IR
1579 continue;
1580 }
1581
1582 /* Analyze the evolution function. */
1583
afb119be
RB
1584 evolution_part
1585 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
1586 if (evolution_part == NULL_TREE)
1587 {
73fbfcad 1588 if (dump_enabled_p ())
afb119be 1589 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 1590 "No access function or evolution.\n");
ebfd146a
IR
1591 return false;
1592 }
b8698a0f
L
1593
1594 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
1595 which evolution functions are a polynomial of degree >= 2. */
1596
1597 if (tree_is_chrec (evolution_part))
b8698a0f 1598 return false;
ebfd146a
IR
1599 }
1600
1601 return true;
1602}
1603
1604
1605/* Function vect_update_ivs_after_vectorizer.
1606
1607 "Advance" the induction variables of LOOP to the value they should take
1608 after the execution of LOOP. This is currently necessary because the
1609 vectorizer does not handle induction variables that are used after the
1610 loop. Such a situation occurs when the last iterations of LOOP are
1611 peeled, because:
1612 1. We introduced new uses after LOOP for IVs that were not originally used
1613 after LOOP: the IVs of LOOP are now used by an epilog loop.
1614 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1615 times, whereas the loop IVs should be bumped N times.
1616
1617 Input:
1618 - LOOP - a loop that is going to be vectorized. The last few iterations
1619 of LOOP were peeled.
1620 - NITERS - the number of iterations that LOOP executes (before it is
1621 vectorized). i.e, the number of times the ivs should be bumped.
1622 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1623 coming out from LOOP on which there are uses of the LOOP ivs
1624 (this is the path from LOOP->exit to epilog_loop->preheader).
1625
1626 The new definitions of the ivs are placed in LOOP->exit.
1627 The phi args associated with the edge UPDATE_E in the bb
1628 UPDATE_E->dest are updated accordingly.
1629
1630 Assumption 1: Like the rest of the vectorizer, this function assumes
1631 a single loop exit that has a single predecessor.
1632
1633 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1634 organized in the same order.
1635
1636 Assumption 3: The access function of the ivs is simple enough (see
1637 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1638
1639 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 1640 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
1641 that leads to the epilog loop; other paths skip the epilog loop). This
1642 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1643 needs to have its phis updated.
1644 */
1645
1646static void
b8698a0f 1647vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
ebfd146a
IR
1648 edge update_e)
1649{
1650 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1651 basic_block exit_bb = single_exit (loop)->dest;
538dd0b7
DM
1652 gphi *phi, *phi1;
1653 gphi_iterator gsi, gsi1;
ebfd146a
IR
1654 basic_block update_bb = update_e->dest;
1655
47c32082 1656 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
ebfd146a
IR
1657
1658 /* Make sure there exists a single-predecessor exit bb: */
1659 gcc_assert (single_pred_p (exit_bb));
1660
1661 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1662 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1663 gsi_next (&gsi), gsi_next (&gsi1))
1664 {
ebfd146a 1665 tree init_expr;
550918ca
RG
1666 tree step_expr, off;
1667 tree type;
ebfd146a
IR
1668 tree var, ni, ni_name;
1669 gimple_stmt_iterator last_gsi;
0ac168a1 1670 stmt_vec_info stmt_info;
ebfd146a 1671
538dd0b7
DM
1672 phi = gsi.phi ();
1673 phi1 = gsi1.phi ();
73fbfcad 1674 if (dump_enabled_p ())
ebfd146a 1675 {
78c60e3d
SS
1676 dump_printf_loc (MSG_NOTE, vect_location,
1677 "vect_update_ivs_after_vectorizer: phi: ");
1678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 1679 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1680 }
1681
1682 /* Skip virtual phi's. */
ea057359 1683 if (virtual_operand_p (PHI_RESULT (phi)))
ebfd146a 1684 {
73fbfcad 1685 if (dump_enabled_p ())
78c60e3d 1686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1687 "virtual phi. skip.\n");
ebfd146a
IR
1688 continue;
1689 }
1690
1691 /* Skip reduction phis. */
0ac168a1
RG
1692 stmt_info = vinfo_for_stmt (phi);
1693 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
b8698a0f 1694 {
73fbfcad 1695 if (dump_enabled_p ())
78c60e3d 1696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1697 "reduc phi. skip.\n");
ebfd146a 1698 continue;
b8698a0f 1699 }
ebfd146a 1700
0ac168a1
RG
1701 type = TREE_TYPE (gimple_phi_result (phi));
1702 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1703 step_expr = unshare_expr (step_expr);
b8698a0f 1704
ebfd146a
IR
1705 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1706 of degree >= 2 or exponential. */
0ac168a1 1707 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 1708
0ac168a1 1709 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 1710
550918ca
RG
1711 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1712 fold_convert (TREE_TYPE (step_expr), niters),
1713 step_expr);
0ac168a1 1714 if (POINTER_TYPE_P (type))
5d49b6a7 1715 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 1716 else
0ac168a1
RG
1717 ni = fold_build2 (PLUS_EXPR, type,
1718 init_expr, fold_convert (type, off));
ebfd146a 1719
0ac168a1 1720 var = create_tmp_var (type, "tmp");
ebfd146a
IR
1721
1722 last_gsi = gsi_last_bb (exit_bb);
1723 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1724 true, GSI_SAME_STMT);
b8698a0f 1725
ebfd146a 1726 /* Fix phi expressions in the successor bb. */
684f25f4 1727 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
1728 }
1729}
1730
ebfd146a
IR
1731/* Function vect_do_peeling_for_loop_bound
1732
1733 Peel the last iterations of the loop represented by LOOP_VINFO.
b8698a0f 1734 The peeled iterations form a new epilog loop. Given that the loop now
ebfd146a
IR
1735 iterates NITERS times, the new epilog loop iterates
1736 NITERS % VECTORIZATION_FACTOR times.
b8698a0f
L
1737
1738 The original loop will later be made to iterate
86290011
RG
1739 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1740
1741 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1742 test. */
ebfd146a 1743
b8698a0f 1744void
f3c92486
RB
1745vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1746 tree ni_name, tree ratio_mult_vf_name,
368117e8 1747 unsigned int th, bool check_profitability)
ebfd146a 1748{
ebfd146a 1749 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 1750 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a
IR
1751 struct loop *new_loop;
1752 edge update_e;
1753 basic_block preheader;
1754 int loop_num;
d68d56b5 1755 int max_iter;
368117e8
RG
1756 tree cond_expr = NULL_TREE;
1757 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 1758
73fbfcad 1759 if (dump_enabled_p ())
ccb3ad87 1760 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1761 "=== vect_do_peeling_for_loop_bound ===\n");
ebfd146a
IR
1762
1763 initialize_original_copy_tables ();
1764
b8698a0f 1765 loop_num = loop->num;
ebfd146a 1766
5ce9450f
JJ
1767 new_loop
1768 = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1769 &ratio_mult_vf_name, ni_name, false,
1770 th, check_profitability,
1771 cond_expr, cond_expr_stmt_list,
1772 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
ebfd146a
IR
1773 gcc_assert (new_loop);
1774 gcc_assert (loop_num == loop->num);
b2b29377 1775 slpeel_checking_verify_cfg_after_peeling (loop, new_loop);
ebfd146a
IR
1776
1777 /* A guard that controls whether the new_loop is to be executed or skipped
1778 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1779 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1780 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1781 is on the path where the LOOP IVs are used and need to be updated. */
1782
1783 preheader = loop_preheader_edge (new_loop)->src;
1784 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1785 update_e = EDGE_PRED (preheader, 0);
1786 else
1787 update_e = EDGE_PRED (preheader, 1);
1788
b8698a0f 1789 /* Update IVs of original loop as if they were advanced
ebfd146a 1790 by ratio_mult_vf_name steps. */
b8698a0f 1791 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
ebfd146a 1792
22458c5a
JH
1793 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1794 and this means N-2 loopback edge executions.
1795
1796 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1797 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1798 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1799 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1800 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
368117e8 1801 if (check_profitability)
22458c5a 1802 max_iter = MAX (max_iter, (int) th - 1);
807e902e 1803 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 1804 dump_printf (MSG_NOTE,
78c60e3d
SS
1805 "Setting upper bound of nb iterations for epilogue "
1806 "loop to %d\n", max_iter);
7d5a99f4 1807
ebfd146a
IR
1808 /* After peeling we have to reset scalar evolution analyzer. */
1809 scev_reset ();
1810
1811 free_original_copy_tables ();
1812}
1813
1814
1815/* Function vect_gen_niters_for_prolog_loop
1816
1817 Set the number of iterations for the loop represented by LOOP_VINFO
1818 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1819 and the misalignment of DR - the data reference recorded in
b8698a0f 1820 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
ebfd146a
IR
1821 this loop, the data reference DR will refer to an aligned location.
1822
1823 The following computation is generated:
1824
1825 If the misalignment of DR is known at compile time:
1826 addr_mis = int mis = DR_MISALIGNMENT (dr);
1827 Else, compute address misalignment in bytes:
5aea1e76 1828 addr_mis = addr & (vectype_align - 1)
ebfd146a
IR
1829
1830 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1831
1832 (elem_size = element type size; an element is the scalar element whose type
1833 is the inner type of the vectype)
1834
1835 When the step of the data-ref in the loop is not 1 (as in interleaved data
1836 and SLP), the number of iterations of the prolog must be divided by the step
1837 (which is equal to the size of interleaved group).
1838
1839 The above formulas assume that VF == number of elements in the vector. This
1840 may not hold when there are multiple-types in the loop.
1841 In this case, for some data-references in the loop the VF does not represent
1842 the number of elements that fit in the vector. Therefore, instead of VF we
1843 use TYPE_VECTOR_SUBPARTS. */
1844
b8698a0f 1845static tree
e78410bf 1846vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
ebfd146a
IR
1847{
1848 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1849 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1850 tree var;
1851 gimple_seq stmts;
1852 tree iters, iters_name;
1853 edge pe;
1854 basic_block new_bb;
355fe088 1855 gimple *dr_stmt = DR_STMT (dr);
ebfd146a
IR
1856 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1857 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1858 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1859 tree niters_type = TREE_TYPE (loop_niters);
ebfd146a
IR
1860 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1861
b8698a0f 1862 pe = loop_preheader_edge (loop);
ebfd146a 1863
15e693cc 1864 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 1865 {
15e693cc 1866 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 1867
73fbfcad 1868 if (dump_enabled_p ())
ccb3ad87 1869 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1870 "known peeling = %d.\n", npeel);
ebfd146a 1871
720f5239 1872 iters = build_int_cst (niters_type, npeel);
15e693cc 1873 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
1874 }
1875 else
1876 {
1877 gimple_seq new_stmts = NULL;
d8ba5b19
RG
1878 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1879 tree offset = negative
5fa79de8 1880 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
b8698a0f 1881 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
d8ba5b19 1882 &new_stmts, offset, loop);
96f9265a 1883 tree type = unsigned_type_for (TREE_TYPE (start_addr));
5aea1e76
UW
1884 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1885 HOST_WIDE_INT elem_size =
1886 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1887 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
ebfd146a
IR
1888 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1889 tree nelements_tree = build_int_cst (type, nelements);
1890 tree byte_misalign;
1891 tree elem_misalign;
1892
1893 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1894 gcc_assert (!new_bb);
b8698a0f 1895
5aea1e76 1896 /* Create: byte_misalign = addr & (vectype_align - 1) */
b8698a0f 1897 byte_misalign =
720f5239 1898 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
5aea1e76 1899 vectype_align_minus_1);
b8698a0f 1900
ebfd146a
IR
1901 /* Create: elem_misalign = byte_misalign / element_size */
1902 elem_misalign =
1903 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1904
1905 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
d8ba5b19
RG
1906 if (negative)
1907 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1908 else
1909 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
ebfd146a
IR
1910 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1911 iters = fold_convert (niters_type, iters);
e78410bf 1912 *bound = nelements;
ebfd146a
IR
1913 }
1914
1915 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1916 /* If the loop bound is known at compile time we already verified that it is
1917 greater than vf; since the misalignment ('iters') is at most vf, there's
1918 no need to generate the MIN_EXPR in this case. */
1919 if (TREE_CODE (loop_niters) != INTEGER_CST)
1920 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1921
73fbfcad 1922 if (dump_enabled_p ())
ebfd146a 1923 {
ccb3ad87 1924 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1925 "niters for prolog loop: ");
ccb3ad87 1926 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1927 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1928 }
1929
1930 var = create_tmp_var (niters_type, "prolog_loop_niters");
ebfd146a
IR
1931 stmts = NULL;
1932 iters_name = force_gimple_operand (iters, &stmts, false, var);
1933
1934 /* Insert stmt on loop preheader edge. */
1935 if (stmts)
1936 {
1937 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1938 gcc_assert (!new_bb);
1939 }
1940
b8698a0f 1941 return iters_name;
ebfd146a
IR
1942}
1943
1944
1945/* Function vect_update_init_of_dr
1946
1947 NITERS iterations were peeled from LOOP. DR represents a data reference
1948 in LOOP. This function updates the information recorded in DR to
b8698a0f 1949 account for the fact that the first NITERS iterations had already been
ebfd146a
IR
1950 executed. Specifically, it updates the OFFSET field of DR. */
1951
1952static void
1953vect_update_init_of_dr (struct data_reference *dr, tree niters)
1954{
1955 tree offset = DR_OFFSET (dr);
b8698a0f 1956
ebfd146a
IR
1957 niters = fold_build2 (MULT_EXPR, sizetype,
1958 fold_convert (sizetype, niters),
1959 fold_convert (sizetype, DR_STEP (dr)));
587aa063
RG
1960 offset = fold_build2 (PLUS_EXPR, sizetype,
1961 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1962 DR_OFFSET (dr) = offset;
1963}
1964
1965
1966/* Function vect_update_inits_of_drs
1967
b8698a0f
L
1968 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1969 This function updates the information recorded for the data references in
1970 the loop to account for the fact that the first NITERS iterations had
ebfd146a
IR
1971 already been executed. Specifically, it updates the initial_condition of
1972 the access_function of all the data_references in the loop. */
1973
1974static void
1975vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1976{
1977 unsigned int i;
9771b263 1978 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 1979 struct data_reference *dr;
78c60e3d 1980
73fbfcad 1981 if (dump_enabled_p ())
ccb3ad87 1982 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1983 "=== vect_update_inits_of_dr ===\n");
ebfd146a 1984
9771b263 1985 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1986 vect_update_init_of_dr (dr, niters);
1987}
1988
1989
1990/* Function vect_do_peeling_for_alignment
1991
1992 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1993 'niters' is set to the misalignment of one of the data references in the
1994 loop, thereby forcing it to refer to an aligned location at the beginning
1995 of the execution of this loop. The data reference for which we are
1996 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1997
1998void
f3c92486 1999vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
368117e8 2000 unsigned int th, bool check_profitability)
ebfd146a
IR
2001{
2002 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 2003 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
f3c92486 2004 tree niters_of_prolog_loop;
b61b1f17 2005 tree wide_prolog_niters;
ebfd146a 2006 struct loop *new_loop;
03fd03d5 2007 int max_iter;
e78410bf 2008 int bound = 0;
ebfd146a 2009
73fbfcad 2010 if (dump_enabled_p ())
9cc1fb4b
XDL
2011 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2012 "loop peeled for vectorization to enhance"
2013 " alignment\n");
ebfd146a
IR
2014
2015 initialize_original_copy_tables ();
2016
f3c92486
RB
2017 gimple_seq stmts = NULL;
2018 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5d2eb24b 2019 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
e78410bf
JH
2020 ni_name,
2021 &bound);
ebfd146a 2022
ebfd146a
IR
2023 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2024 new_loop =
5ce9450f
JJ
2025 slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2026 loop_preheader_edge (loop),
5d2eb24b 2027 &niters_of_prolog_loop, ni_name, true,
e78410bf 2028 th, check_profitability, NULL_TREE, NULL,
5ce9450f 2029 bound, 0);
ebfd146a
IR
2030
2031 gcc_assert (new_loop);
b2b29377 2032 slpeel_checking_verify_cfg_after_peeling (new_loop, loop);
22458c5a
JH
2033 /* For vectorization factor N, we need to copy at most N-1 values
2034 for alignment and this means N-2 loopback edge executions. */
2035 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
368117e8 2036 if (check_profitability)
22458c5a 2037 max_iter = MAX (max_iter, (int) th - 1);
807e902e 2038 record_niter_bound (new_loop, max_iter, false, true);
ccb3ad87 2039 dump_printf (MSG_NOTE,
78c60e3d
SS
2040 "Setting upper bound of nb iterations for prologue "
2041 "loop to %d\n", max_iter);
ebfd146a
IR
2042
2043 /* Update number of times loop executes. */
ebfd146a 2044 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
15e693cc 2045 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
95b3eff3
RB
2046 LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2047 TREE_TYPE (ni_name),
2048 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
ebfd146a 2049
5d2eb24b
IR
2050 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2051 wide_prolog_niters = niters_of_prolog_loop;
2052 else
2053 {
2054 gimple_seq seq = NULL;
2055 edge pe = loop_preheader_edge (loop);
2056 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2057 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
5d2eb24b
IR
2058 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2059 var);
2060 if (seq)
2061 {
2062 /* Insert stmt on loop preheader edge. */
2063 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2064 gcc_assert (!new_bb);
2065 }
2066 }
2067
ebfd146a 2068 /* Update the init conditions of the access functions of all data refs. */
b61b1f17 2069 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
ebfd146a
IR
2070
2071 /* After peeling we have to reset scalar evolution analyzer. */
2072 scev_reset ();
2073
2074 free_original_copy_tables ();
2075}
2076
2077
2078/* Function vect_create_cond_for_align_checks.
2079
2080 Create a conditional expression that represents the alignment checks for
2081 all of data references (array element references) whose alignment must be
2082 checked at runtime.
2083
2084 Input:
2085 COND_EXPR - input conditional expression. New conditions will be chained
2086 with logical AND operation.
2087 LOOP_VINFO - two fields of the loop information are used.
2088 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2089 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2090
2091 Output:
2092 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2093 expression.
2094 The returned value is the conditional expression to be used in the if
2095 statement that controls which version of the loop gets executed at runtime.
2096
2097 The algorithm makes two assumptions:
2098 1) The number of bytes "n" in a vector is a power of 2.
2099 2) An address "a" is aligned if a%n is zero and that this
2100 test can be done as a&(n-1) == 0. For example, for 16
2101 byte vectors the test is a&0xf == 0. */
2102
2103static void
2104vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2105 tree *cond_expr,
2106 gimple_seq *cond_expr_stmt_list)
2107{
2108 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
355fe088 2109 vec<gimple *> may_misalign_stmts
ebfd146a 2110 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
355fe088 2111 gimple *ref_stmt;
ebfd146a
IR
2112 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2113 tree mask_cst;
2114 unsigned int i;
ebfd146a
IR
2115 tree int_ptrsize_type;
2116 char tmp_name[20];
2117 tree or_tmp_name = NULL_TREE;
83d5977e 2118 tree and_tmp_name;
355fe088 2119 gimple *and_stmt;
ebfd146a
IR
2120 tree ptrsize_zero;
2121 tree part_cond_expr;
2122
2123 /* Check that mask is one less than a power of 2, i.e., mask is
2124 all zeros followed by all ones. */
2125 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2126
96f9265a 2127 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
2128
2129 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2130 of the first vector of the i'th data reference. */
2131
9771b263 2132 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2133 {
2134 gimple_seq new_stmt_list = NULL;
2135 tree addr_base;
83d5977e
RG
2136 tree addr_tmp_name;
2137 tree new_or_tmp_name;
355fe088 2138 gimple *addr_stmt, *or_stmt;
d8ba5b19
RG
2139 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2140 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2141 bool negative = tree_int_cst_compare
2142 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2143 tree offset = negative
aad83b7c 2144 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
ebfd146a
IR
2145
2146 /* create: addr_tmp = (int)(address_of_first_vector) */
2147 addr_base =
2148 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
d8ba5b19 2149 offset, loop);
ebfd146a
IR
2150 if (new_stmt_list != NULL)
2151 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2152
83d5977e
RG
2153 sprintf (tmp_name, "addr2int%d", i);
2154 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2155 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2156 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2157
2158 /* The addresses are OR together. */
2159
2160 if (or_tmp_name != NULL_TREE)
2161 {
2162 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2163 sprintf (tmp_name, "orptrs%d", i);
2164 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2165 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2166 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2167 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2168 or_tmp_name = new_or_tmp_name;
2169 }
2170 else
2171 or_tmp_name = addr_tmp_name;
2172
2173 } /* end for i */
2174
2175 mask_cst = build_int_cst (int_ptrsize_type, mask);
2176
2177 /* create: and_tmp = or_tmp & mask */
83d5977e 2178 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2179
0d0e4a03
JJ
2180 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2181 or_tmp_name, mask_cst);
ebfd146a
IR
2182 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2183
2184 /* Make and_tmp the left operand of the conditional test against zero.
2185 if and_tmp has a nonzero bit then some address is unaligned. */
2186 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2187 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2188 and_tmp_name, ptrsize_zero);
2189 if (*cond_expr)
2190 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2191 *cond_expr, part_cond_expr);
2192 else
2193 *cond_expr = part_cond_expr;
2194}
2195
ebfd146a
IR
2196/* Function vect_create_cond_for_alias_checks.
2197
2198 Create a conditional expression that represents the run-time checks for
2199 overlapping of address ranges represented by a list of data references
2200 relations passed as input.
2201
2202 Input:
2203 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2204 with logical AND operation. If it is NULL, then the function
2205 is used to return the number of alias checks.
ebfd146a
IR
2206 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2207 to be checked.
2208
2209 Output:
2210 COND_EXPR - conditional expression.
ebfd146a 2211
a05a89fa 2212 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2213 statement that controls which version of the loop gets executed at runtime.
2214*/
2215
a05a89fa 2216void
4bdd44c4 2217vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2218{
93bdc3ed 2219 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa
CH
2220 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2221 tree part_cond_expr;
ebfd146a
IR
2222
2223 /* Create expression
36fc3799
RS
2224 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2225 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
b8698a0f 2226 &&
ebfd146a
IR
2227 ...
2228 &&
36fc3799
RS
2229 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2230 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
ebfd146a 2231
a05a89fa 2232 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2233 return;
2234
a05a89fa 2235 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
ebfd146a 2236 {
93bdc3ed
CH
2237 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2238 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
a05a89fa
CH
2239 tree segment_length_a = dr_a.seg_len;
2240 tree segment_length_b = dr_b.seg_len;
ebfd146a 2241
a05a89fa 2242 tree addr_base_a
93bdc3ed 2243 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
a05a89fa 2244 tree addr_base_b
93bdc3ed 2245 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
ebfd146a 2246
73fbfcad 2247 if (dump_enabled_p ())
ebfd146a 2248 {
ccb3ad87 2249 dump_printf_loc (MSG_NOTE, vect_location,
a05a89fa
CH
2250 "create runtime check for data references ");
2251 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
ccb3ad87 2252 dump_printf (MSG_NOTE, " and ");
a05a89fa
CH
2253 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2254 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
2255 }
2256
a05a89fa
CH
2257 tree seg_a_min = addr_base_a;
2258 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
82d89471
BM
2259 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2260 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2261 [a, a+12) */
a05a89fa 2262 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
82d89471
BM
2263 {
2264 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2265 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2266 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2267 }
d8ba5b19 2268
a05a89fa
CH
2269 tree seg_b_min = addr_base_b;
2270 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2271 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
82d89471
BM
2272 {
2273 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2274 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2275 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2276 }
ebfd146a 2277
b8698a0f 2278 part_cond_expr =
ebfd146a 2279 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
36fc3799
RS
2280 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2281 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
b8698a0f 2282
ebfd146a
IR
2283 if (*cond_expr)
2284 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2285 *cond_expr, part_cond_expr);
2286 else
2287 *cond_expr = part_cond_expr;
2288 }
ebfd146a 2289
73fbfcad 2290 if (dump_enabled_p ())
ccb3ad87 2291 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2292 "created %u versioning for alias checks.\n",
a05a89fa
CH
2293 comp_alias_ddrs.length ());
2294
2295 comp_alias_ddrs.release ();
ebfd146a
IR
2296}
2297
2298
2299/* Function vect_loop_versioning.
b8698a0f 2300
ebfd146a
IR
2301 If the loop has data references that may or may not be aligned or/and
2302 has data reference relations whose independence was not proven then
2303 two versions of the loop need to be generated, one which is vectorized
2304 and one which isn't. A test is then generated to control which of the
2305 loops is executed. The test checks for the alignment of all of the
2306 data references that may or may not be aligned. An additional
2307 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2308 independence was not proven. The vectorized version of loop is
2309 executed only if both alias and alignment tests are passed.
2310
ebfd146a 2311 The test generated to check which version of loop is executed
b8698a0f 2312 is modified to also check for profitability as indicated by the
86290011
RG
2313 cost model initially.
2314
2315 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2316 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2317
2318void
368117e8
RG
2319vect_loop_versioning (loop_vec_info loop_vinfo,
2320 unsigned int th, bool check_profitability)
ebfd146a
IR
2321{
2322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5ce9450f 2323 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2324 basic_block condition_bb;
538dd0b7
DM
2325 gphi_iterator gsi;
2326 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2327 basic_block merge_bb;
2328 basic_block new_exit_bb;
2329 edge new_exit_e, e;
538dd0b7 2330 gphi *orig_phi, *new_phi;
368117e8 2331 tree cond_expr = NULL_TREE;
d68d56b5 2332 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a
IR
2333 tree arg;
2334 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2335 gimple_seq gimplify_stmt_list = NULL;
2336 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
9cc1fb4b
XDL
2337 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2338 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
ebfd146a 2339
368117e8
RG
2340 if (check_profitability)
2341 {
2342 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2343 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2344 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2345 is_gimple_condexpr, NULL_TREE);
2346 }
ebfd146a 2347
9cc1fb4b 2348 if (version_align)
d68d56b5
RG
2349 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2350 &cond_expr_stmt_list);
ebfd146a 2351
9cc1fb4b 2352 if (version_alias)
4bdd44c4 2353 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
86290011 2354
d68d56b5
RG
2355 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2356 is_gimple_condexpr, NULL_TREE);
2357 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
2358
2359 initialize_original_copy_tables ();
5ce9450f
JJ
2360 if (scalar_loop)
2361 {
2362 edge scalar_e;
2363 basic_block preheader, scalar_preheader;
2364
2365 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2366 scale LOOP's frequencies instead. */
2367 loop_version (scalar_loop, cond_expr, &condition_bb,
2368 prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2369 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2370 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2371 while we need to move it above LOOP's preheader. */
2372 e = loop_preheader_edge (loop);
2373 scalar_e = loop_preheader_edge (scalar_loop);
2374 gcc_assert (empty_block_p (e->src)
2375 && single_pred_p (e->src));
2376 gcc_assert (empty_block_p (scalar_e->src)
2377 && single_pred_p (scalar_e->src));
2378 gcc_assert (single_pred_p (condition_bb));
2379 preheader = e->src;
2380 scalar_preheader = scalar_e->src;
2381 scalar_e = find_edge (condition_bb, scalar_preheader);
2382 e = single_pred_edge (preheader);
2383 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2384 scalar_preheader);
2385 redirect_edge_and_branch_force (scalar_e, preheader);
2386 redirect_edge_and_branch_force (e, condition_bb);
2387 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2388 single_pred (condition_bb));
2389 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2390 single_pred (scalar_preheader));
2391 set_immediate_dominator (CDI_DOMINATORS, preheader,
2392 condition_bb);
2393 }
2394 else
2395 loop_version (loop, cond_expr, &condition_bb,
2396 prob, prob, REG_BR_PROB_BASE - prob, true);
9cc1fb4b 2397
b05e0233 2398 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
2399 && dump_enabled_p ())
2400 {
2401 if (version_alias)
2402 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2403 "loop versioned for vectorization because of "
2404 "possible aliasing\n");
2405 if (version_align)
2406 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2407 "loop versioned for vectorization to enhance "
2408 "alignment\n");
2409
2410 }
c3284718 2411 free_original_copy_tables ();
ebfd146a 2412
b8698a0f 2413 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
2414 vectorization - that the loop exit block has a single predecessor.
2415 After versioning, the exit block of both loop versions is the same
2416 basic block (i.e. it has two predecessors). Just in order to simplify
2417 following transformations in the vectorizer, we fix this situation
2418 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
2419 with the proper loop-exit phis to maintain loop-closed-form.
2420 If loop versioning wasn't done from loop, but scalar_loop instead,
2421 merge_bb will have already just a single successor. */
b8698a0f 2422
ebfd146a 2423 merge_bb = single_exit (loop)->dest;
5ce9450f 2424 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 2425 {
5ce9450f
JJ
2426 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2427 new_exit_bb = split_edge (single_exit (loop));
2428 new_exit_e = single_exit (loop);
2429 e = EDGE_SUCC (new_exit_bb, 0);
2430
2431 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2432 {
2433 tree new_res;
538dd0b7 2434 orig_phi = gsi.phi ();
b731b390 2435 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
2436 new_phi = create_phi_node (new_res, new_exit_bb);
2437 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2438 add_phi_arg (new_phi, arg, new_exit_e,
2439 gimple_phi_arg_location_from_edge (orig_phi, e));
2440 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2441 }
b8698a0f 2442 }
ebfd146a
IR
2443
2444 /* End loop-exit-fixes after versioning. */
2445
d68d56b5 2446 if (cond_expr_stmt_list)
ebfd146a
IR
2447 {
2448 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 2449 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 2450 GSI_SAME_STMT);
ebfd146a 2451 }
90eb75f2 2452 update_ssa (TODO_update_ssa);
ebfd146a 2453}