]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
* gimple.h (gimple_build_assign_with_ops): Add unary arg overload.
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
48e1416a 1/* Vectorizer Specific Loop Manipulations
3aea1f79 2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
94ea8568 28#include "predict.h"
29#include "vec.h"
30#include "hashtab.h"
31#include "hash-set.h"
32#include "machmode.h"
33#include "hard-reg-set.h"
34#include "input.h"
35#include "function.h"
36#include "dominance.h"
37#include "cfg.h"
38#include "cfganal.h"
fb85abff 39#include "basic-block.h"
ce084dfc 40#include "gimple-pretty-print.h"
bc61cadb 41#include "tree-ssa-alias.h"
42#include "internal-fn.h"
43#include "gimple-expr.h"
44#include "is-a.h"
073c1fd5 45#include "gimple.h"
a8783bee 46#include "gimplify.h"
dcf1a1ec 47#include "gimple-iterator.h"
e795d6e1 48#include "gimplify-me.h"
073c1fd5 49#include "gimple-ssa.h"
50#include "tree-cfg.h"
51#include "tree-phinodes.h"
52#include "ssa-iterators.h"
9ed99284 53#include "stringpool.h"
073c1fd5 54#include "tree-ssanames.h"
05d9c18a 55#include "tree-ssa-loop-manip.h"
073c1fd5 56#include "tree-into-ssa.h"
69ee5dbb 57#include "tree-ssa.h"
b9ed1410 58#include "tree-pass.h"
fb85abff 59#include "cfgloop.h"
0b205f4c 60#include "diagnostic-core.h"
fb85abff 61#include "tree-scalar-evolution.h"
62#include "tree-vectorizer.h"
63#include "langhooks.h"
64
65/*************************************************************************
66 Simple Loop Peeling Utilities
67
68 Utilities to support loop peeling for vectorization purposes.
69 *************************************************************************/
70
71
72/* Renames the use *OP_P. */
73
74static void
75rename_use_op (use_operand_p op_p)
76{
77 tree new_name;
78
79 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
80 return;
81
82 new_name = get_current_def (USE_FROM_PTR (op_p));
83
84 /* Something defined outside of the loop. */
85 if (!new_name)
86 return;
87
88 /* An ordinary ssa name defined in the loop. */
89
90 SET_USE (op_p, new_name);
91}
92
93
94/* Renames the variables in basic block BB. */
95
c9b2c569 96static void
fb85abff 97rename_variables_in_bb (basic_block bb)
98{
99 gimple_stmt_iterator gsi;
100 gimple stmt;
101 use_operand_p use_p;
102 ssa_op_iter iter;
103 edge e;
104 edge_iterator ei;
105 struct loop *loop = bb->loop_father;
106
107 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
108 {
109 stmt = gsi_stmt (gsi);
110 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
111 rename_use_op (use_p);
112 }
113
c9b2c569 114 FOR_EACH_EDGE (e, ei, bb->preds)
fb85abff 115 {
c9b2c569 116 if (!flow_bb_inside_loop_p (loop, e->src))
fb85abff 117 continue;
c9b2c569 118 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
fb85abff 119 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
120 }
121}
122
123
b123eaab 124typedef struct
125{
126 tree from, to;
127 basic_block bb;
128} adjust_info;
129
b123eaab 130/* A stack of values to be adjusted in debug stmts. We have to
131 process them LIFO, so that the closest substitution applies. If we
132 processed them FIFO, without the stack, we might substitute uses
133 with a PHI DEF that would soon become non-dominant, and when we got
134 to the suitable one, it wouldn't have anything to substitute any
135 more. */
d70aebca 136static vec<adjust_info, va_heap> adjust_vec;
b123eaab 137
138/* Adjust any debug stmts that referenced AI->from values to use the
139 loop-closed AI->to, if the references are dominated by AI->bb and
140 not by the definition of AI->from. */
141
142static void
143adjust_debug_stmts_now (adjust_info *ai)
144{
145 basic_block bbphi = ai->bb;
146 tree orig_def = ai->from;
147 tree new_def = ai->to;
148 imm_use_iterator imm_iter;
149 gimple stmt;
150 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
151
152 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
153
154 /* Adjust any debug stmts that held onto non-loop-closed
155 references. */
156 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
157 {
158 use_operand_p use_p;
159 basic_block bbuse;
160
161 if (!is_gimple_debug (stmt))
162 continue;
163
164 gcc_assert (gimple_debug_bind_p (stmt));
165
166 bbuse = gimple_bb (stmt);
167
168 if ((bbuse == bbphi
169 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
170 && !(bbuse == bbdef
171 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
172 {
173 if (new_def)
174 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
175 SET_USE (use_p, new_def);
176 else
177 {
178 gimple_debug_bind_reset_value (stmt);
179 update_stmt (stmt);
180 }
181 }
182 }
183}
184
185/* Adjust debug stmts as scheduled before. */
186
187static void
188adjust_vec_debug_stmts (void)
189{
190 if (!MAY_HAVE_DEBUG_STMTS)
191 return;
192
f1f41a6c 193 gcc_assert (adjust_vec.exists ());
b123eaab 194
f1f41a6c 195 while (!adjust_vec.is_empty ())
b123eaab 196 {
f1f41a6c 197 adjust_debug_stmts_now (&adjust_vec.last ());
198 adjust_vec.pop ();
b123eaab 199 }
200
f1f41a6c 201 adjust_vec.release ();
b123eaab 202}
203
204/* Adjust any debug stmts that referenced FROM values to use the
205 loop-closed TO, if the references are dominated by BB and not by
206 the definition of FROM. If adjust_vec is non-NULL, adjustments
207 will be postponed until adjust_vec_debug_stmts is called. */
208
209static void
210adjust_debug_stmts (tree from, tree to, basic_block bb)
211{
212 adjust_info ai;
213
0087edc6 214 if (MAY_HAVE_DEBUG_STMTS
215 && TREE_CODE (from) == SSA_NAME
2510e5cd 216 && ! SSA_NAME_IS_DEFAULT_DEF (from)
0087edc6 217 && ! virtual_operand_p (from))
b123eaab 218 {
219 ai.from = from;
220 ai.to = to;
221 ai.bb = bb;
222
f1f41a6c 223 if (adjust_vec.exists ())
224 adjust_vec.safe_push (ai);
b123eaab 225 else
226 adjust_debug_stmts_now (&ai);
227 }
228}
229
230/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
231 to adjust any debug stmts that referenced the old phi arg,
232 presumably non-loop-closed references left over from other
233 transformations. */
234
235static void
236adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
237{
238 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
239
240 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
241
242 if (MAY_HAVE_DEBUG_STMTS)
243 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
244 gimple_bb (update_phi));
245}
246
fb85abff 247
fb85abff 248/* Update PHI nodes for a guard of the LOOP.
249
250 Input:
251 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
252 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
253 originates from the guard-bb, skips LOOP and reaches the (unique) exit
254 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
255 We denote this bb NEW_MERGE_BB because before the guard code was added
256 it had a single predecessor (the LOOP header), and now it became a merge
257 point of two paths - the path that ends with the LOOP exit-edge, and
258 the path that ends with GUARD_EDGE.
259 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
260 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
261
262 ===> The CFG before the guard-code was added:
263 LOOP_header_bb:
264 loop_body
265 if (exit_loop) goto update_bb
266 else goto LOOP_header_bb
267 update_bb:
268
269 ==> The CFG after the guard-code was added:
270 guard_bb:
271 if (LOOP_guard_condition) goto new_merge_bb
272 else goto LOOP_header_bb
273 LOOP_header_bb:
274 loop_body
275 if (exit_loop_condition) goto new_merge_bb
276 else goto LOOP_header_bb
277 new_merge_bb:
278 goto update_bb
279 update_bb:
280
281 ==> The CFG after this function:
282 guard_bb:
283 if (LOOP_guard_condition) goto new_merge_bb
284 else goto LOOP_header_bb
285 LOOP_header_bb:
286 loop_body
287 if (exit_loop_condition) goto new_exit_bb
288 else goto LOOP_header_bb
289 new_exit_bb:
290 new_merge_bb:
291 goto update_bb
292 update_bb:
293
294 This function:
295 1. creates and updates the relevant phi nodes to account for the new
296 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
297 1.1. Create phi nodes at NEW_MERGE_BB.
298 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
299 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
300 2. preserves loop-closed-ssa-form by creating the required phi nodes
301 at the exit of LOOP (i.e, in NEW_EXIT_BB).
302
303 There are two flavors to this function:
304
305 slpeel_update_phi_nodes_for_guard1:
306 Here the guard controls whether we enter or skip LOOP, where LOOP is a
307 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
308 for variables that have phis in the loop header.
309
310 slpeel_update_phi_nodes_for_guard2:
311 Here the guard controls whether we enter or skip LOOP, where LOOP is an
312 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
313 for variables that have phis in the loop exit.
314
315 I.E., the overall structure is:
316
317 loop1_preheader_bb:
318 guard1 (goto loop1/merge1_bb)
319 loop1
320 loop1_exit_bb:
321 guard2 (goto merge1_bb/merge2_bb)
322 merge1_bb
323 loop2
324 loop2_exit_bb
325 merge2_bb
326 next_bb
327
328 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
329 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
330 that have phis in loop1->header).
331
332 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
333 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
334 that have phis in next_bb). It also adds some of these phis to
335 loop1_exit_bb.
336
337 slpeel_update_phi_nodes_for_guard1 is always called before
338 slpeel_update_phi_nodes_for_guard2. They are both needed in order
339 to create correct data-flow and loop-closed-ssa-form.
340
341 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
342 that change between iterations of a loop (and therefore have a phi-node
343 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
48e1416a 344 phis for variables that are used out of the loop (and therefore have
345 loop-closed exit phis). Some variables may be both updated between
fb85abff 346 iterations and used after the loop. This is why in loop1_exit_bb we
347 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
348 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
349
350 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
351 an original loop. i.e., we have:
352
353 orig_loop
354 guard_bb (goto LOOP/new_merge)
355 new_loop <-- LOOP
356 new_exit
357 new_merge
358 next_bb
359
360 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
361 have:
362
363 new_loop
364 guard_bb (goto LOOP/new_merge)
365 orig_loop <-- LOOP
366 new_exit
367 new_merge
368 next_bb
369
370 The SSA names defined in the original loop have a current
371 reaching definition that that records the corresponding new
372 ssa-name used in the new duplicated loop copy.
373 */
374
375/* Function slpeel_update_phi_nodes_for_guard1
48e1416a 376
fb85abff 377 Input:
378 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
379 - DEFS - a bitmap of ssa names to mark new names for which we recorded
48e1416a 380 information.
381
fb85abff 382 In the context of the overall structure, we have:
383
48e1416a 384 loop1_preheader_bb:
fb85abff 385 guard1 (goto loop1/merge1_bb)
386LOOP-> loop1
387 loop1_exit_bb:
388 guard2 (goto merge1_bb/merge2_bb)
389 merge1_bb
390 loop2
391 loop2_exit_bb
392 merge2_bb
393 next_bb
394
395 For each name updated between loop iterations (i.e - for each name that has
396 an entry (loop-header) phi in LOOP) we create a new phi in:
397 1. merge1_bb (to account for the edge from guard1)
398 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
399*/
400
401static void
402slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
1e8cf080 403 bool is_new_loop, basic_block *new_exit_bb)
fb85abff 404{
405 gimple orig_phi, new_phi;
406 gimple update_phi, update_phi2;
407 tree guard_arg, loop_arg;
408 basic_block new_merge_bb = guard_edge->dest;
409 edge e = EDGE_SUCC (new_merge_bb, 0);
410 basic_block update_bb = e->dest;
411 basic_block orig_bb = loop->header;
412 edge new_exit_e;
413 tree current_new_name;
fb85abff 414 gimple_stmt_iterator gsi_orig, gsi_update;
415
416 /* Create new bb between loop and new_merge_bb. */
417 *new_exit_bb = split_edge (single_exit (loop));
418
419 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
420
421 for (gsi_orig = gsi_start_phis (orig_bb),
422 gsi_update = gsi_start_phis (update_bb);
423 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
424 gsi_next (&gsi_orig), gsi_next (&gsi_update))
425 {
38091110 426 source_location loop_locus, guard_locus;
874117c8 427 tree new_res;
fb85abff 428 orig_phi = gsi_stmt (gsi_orig);
429 update_phi = gsi_stmt (gsi_update);
430
fb85abff 431 /** 1. Handle new-merge-point phis **/
432
433 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
874117c8 434 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
435 new_phi = create_phi_node (new_res, new_merge_bb);
fb85abff 436
437 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
438 of LOOP. Set the two phi args in NEW_PHI for these edges: */
439 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
48e1416a 440 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
441 EDGE_SUCC (loop->latch,
efbcb6de 442 0));
fb85abff 443 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
48e1416a 444 guard_locus
445 = gimple_phi_arg_location_from_edge (orig_phi,
efbcb6de 446 loop_preheader_edge (loop));
fb85abff 447
60d535d2 448 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
449 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
fb85abff 450
451 /* 1.3. Update phi in successor block. */
452 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
453 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
b123eaab 454 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
fb85abff 455 update_phi2 = new_phi;
456
457
458 /** 2. Handle loop-closed-ssa-form phis **/
459
7c782c9b 460 if (virtual_operand_p (PHI_RESULT (orig_phi)))
fb85abff 461 continue;
462
463 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
874117c8 464 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
465 new_phi = create_phi_node (new_res, *new_exit_bb);
fb85abff 466
467 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
60d535d2 468 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
fb85abff 469
470 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
471 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
b123eaab 472 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
473 PHI_RESULT (new_phi));
fb85abff 474
475 /* 2.4. Record the newly created name with set_current_def.
476 We want to find a name such that
477 name = get_current_def (orig_loop_name)
478 and to set its current definition as follows:
479 set_current_def (name, new_phi_name)
480
481 If LOOP is a new loop then loop_arg is already the name we're
482 looking for. If LOOP is the original loop, then loop_arg is
483 the orig_loop_name and the relevant name is recorded in its
484 current reaching definition. */
485 if (is_new_loop)
486 current_new_name = loop_arg;
487 else
488 {
489 current_new_name = get_current_def (loop_arg);
490 /* current_def is not available only if the variable does not
491 change inside the loop, in which case we also don't care
492 about recording a current_def for it because we won't be
493 trying to create loop-exit-phis for it. */
494 if (!current_new_name)
495 continue;
496 }
9dbe1d59 497 tree new_name = get_current_def (current_new_name);
498 /* Because of peeled_chrec optimization it is possible that we have
499 set this earlier. Verify the PHI has the same value. */
500 if (new_name)
501 {
502 gimple phi = SSA_NAME_DEF_STMT (new_name);
503 gcc_assert (gimple_code (phi) == GIMPLE_PHI
504 && gimple_bb (phi) == *new_exit_bb
505 && (PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop))
506 == loop_arg));
507 continue;
508 }
fb85abff 509
510 set_current_def (current_new_name, PHI_RESULT (new_phi));
fb85abff 511 }
512}
513
514
515/* Function slpeel_update_phi_nodes_for_guard2
516
517 Input:
518 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
519
520 In the context of the overall structure, we have:
521
48e1416a 522 loop1_preheader_bb:
fb85abff 523 guard1 (goto loop1/merge1_bb)
524 loop1
48e1416a 525 loop1_exit_bb:
fb85abff 526 guard2 (goto merge1_bb/merge2_bb)
527 merge1_bb
528LOOP-> loop2
529 loop2_exit_bb
530 merge2_bb
531 next_bb
532
533 For each name used out side the loop (i.e - for each name that has an exit
534 phi in next_bb) we create a new phi in:
48e1416a 535 1. merge2_bb (to account for the edge from guard_bb)
fb85abff 536 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
537 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
538 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
539*/
540
541static void
542slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
543 bool is_new_loop, basic_block *new_exit_bb)
544{
545 gimple orig_phi, new_phi;
546 gimple update_phi, update_phi2;
547 tree guard_arg, loop_arg;
548 basic_block new_merge_bb = guard_edge->dest;
549 edge e = EDGE_SUCC (new_merge_bb, 0);
550 basic_block update_bb = e->dest;
551 edge new_exit_e;
552 tree orig_def, orig_def_new_name;
553 tree new_name, new_name2;
554 tree arg;
555 gimple_stmt_iterator gsi;
556
557 /* Create new bb between loop and new_merge_bb. */
558 *new_exit_bb = split_edge (single_exit (loop));
559
560 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
561
562 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
563 {
874117c8 564 tree new_res;
fb85abff 565 update_phi = gsi_stmt (gsi);
566 orig_phi = update_phi;
567 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
568 /* This loop-closed-phi actually doesn't represent a use
48e1416a 569 out of the loop - the phi arg is a constant. */
fb85abff 570 if (TREE_CODE (orig_def) != SSA_NAME)
571 continue;
572 orig_def_new_name = get_current_def (orig_def);
573 arg = NULL_TREE;
574
575 /** 1. Handle new-merge-point phis **/
576
577 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
874117c8 578 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
579 new_phi = create_phi_node (new_res, new_merge_bb);
fb85abff 580
581 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
582 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
583 new_name = orig_def;
584 new_name2 = NULL_TREE;
585 if (orig_def_new_name)
586 {
587 new_name = orig_def_new_name;
588 /* Some variables have both loop-entry-phis and loop-exit-phis.
589 Such variables were given yet newer names by phis placed in
590 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
591 new_name2 = get_current_def (get_current_def (orig_name)). */
592 new_name2 = get_current_def (new_name);
593 }
48e1416a 594
fb85abff 595 if (is_new_loop)
596 {
597 guard_arg = orig_def;
598 loop_arg = new_name;
599 }
600 else
601 {
602 guard_arg = new_name;
603 loop_arg = orig_def;
604 }
605 if (new_name2)
606 guard_arg = new_name2;
48e1416a 607
60d535d2 608 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
609 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
fb85abff 610
611 /* 1.3. Update phi in successor block. */
612 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
b123eaab 613 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
fb85abff 614 update_phi2 = new_phi;
615
616
617 /** 2. Handle loop-closed-ssa-form phis **/
618
619 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
874117c8 620 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
621 new_phi = create_phi_node (new_res, *new_exit_bb);
fb85abff 622
623 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
60d535d2 624 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
fb85abff 625
626 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
627 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
b123eaab 628 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
629 PHI_RESULT (new_phi));
fb85abff 630
631
632 /** 3. Handle loop-closed-ssa-form phis for first loop **/
633
634 /* 3.1. Find the relevant names that need an exit-phi in
635 GUARD_BB, i.e. names for which
636 slpeel_update_phi_nodes_for_guard1 had not already created a
637 phi node. This is the case for names that are used outside
638 the loop (and therefore need an exit phi) but are not updated
639 across loop iterations (and therefore don't have a
640 loop-header-phi).
641
642 slpeel_update_phi_nodes_for_guard1 is responsible for
643 creating loop-exit phis in GUARD_BB for names that have a
644 loop-header-phi. When such a phi is created we also record
645 the new name in its current definition. If this new name
646 exists, then guard_arg was set to this new name (see 1.2
647 above). Therefore, if guard_arg is not this new name, this
648 is an indication that an exit-phi in GUARD_BB was not yet
649 created, so we take care of it here. */
650 if (guard_arg == new_name2)
651 continue;
652 arg = guard_arg;
653
654 /* 3.2. Generate new phi node in GUARD_BB: */
874117c8 655 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
656 new_phi = create_phi_node (new_res, guard_edge->src);
fb85abff 657
658 /* 3.3. GUARD_BB has one incoming edge: */
659 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
48e1416a 660 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
60d535d2 661 UNKNOWN_LOCATION);
fb85abff 662
663 /* 3.4. Update phi in successor of GUARD_BB: */
664 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
665 == guard_arg);
b123eaab 666 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
667 PHI_RESULT (new_phi));
fb85abff 668 }
669}
670
671
672/* Make the LOOP iterate NITERS times. This is done by adding a new IV
673 that starts at zero, increases by one and its limit is NITERS.
674
675 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
676
677void
678slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
679{
680 tree indx_before_incr, indx_after_incr;
681 gimple cond_stmt;
682 gimple orig_cond;
683 edge exit_edge = single_exit (loop);
684 gimple_stmt_iterator loop_cond_gsi;
685 gimple_stmt_iterator incr_gsi;
686 bool insert_after;
687 tree init = build_int_cst (TREE_TYPE (niters), 0);
688 tree step = build_int_cst (TREE_TYPE (niters), 1);
36f39b2e 689 source_location loop_loc;
fb85abff 690 enum tree_code code;
691
692 orig_cond = get_loop_exit_condition (loop);
693 gcc_assert (orig_cond);
694 loop_cond_gsi = gsi_for_stmt (orig_cond);
695
696 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
697 create_iv (init, step, NULL_TREE, loop,
698 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
699
700 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
701 true, NULL_TREE, true,
702 GSI_SAME_STMT);
703 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
704 true, GSI_SAME_STMT);
705
706 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
707 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
708 NULL_TREE);
709
710 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
711
712 /* Remove old loop exit test: */
713 gsi_remove (&loop_cond_gsi, true);
706567b8 714 free_stmt_vec_info (orig_cond);
fb85abff 715
716 loop_loc = find_loop_location (loop);
6d8fb6cf 717 if (dump_enabled_p ())
fb85abff 718 {
36f39b2e 719 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
720 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
721 LOCATION_LINE (loop_loc));
7bd765d4 722 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
78bb46f5 723 dump_printf (MSG_NOTE, "\n");
fb85abff 724 }
fb85abff 725 loop->nb_iterations = niters;
726}
727
c71d3c24 728/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
729 For all PHI arguments in FROM->dest and TO->dest from those
730 edges ensure that TO->dest PHI arguments have current_def
731 to that in from. */
732
733static void
734slpeel_duplicate_current_defs_from_edges (edge from, edge to)
735{
736 gimple_stmt_iterator gsi_from, gsi_to;
737
738 for (gsi_from = gsi_start_phis (from->dest),
739 gsi_to = gsi_start_phis (to->dest);
740 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);
741 gsi_next (&gsi_from), gsi_next (&gsi_to))
742 {
743 gimple from_phi = gsi_stmt (gsi_from);
744 gimple to_phi = gsi_stmt (gsi_to);
745 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
746 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
747 if (TREE_CODE (from_arg) == SSA_NAME
748 && TREE_CODE (to_arg) == SSA_NAME
749 && get_current_def (to_arg) == NULL_TREE)
750 set_current_def (to_arg, get_current_def (from_arg));
751 }
752}
753
fb85abff 754
48e1416a 755/* Given LOOP this function generates a new copy of it and puts it
c71d3c24 756 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
757 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
758 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
759 entry or exit of LOOP. */
fb85abff 760
761struct loop *
c71d3c24 762slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
763 struct loop *scalar_loop, edge e)
fb85abff 764{
765 struct loop *new_loop;
766 basic_block *new_bbs, *bbs;
767 bool at_exit;
768 bool was_imm_dom;
48e1416a 769 basic_block exit_dest;
fb85abff 770 edge exit, new_exit;
fb85abff 771
c9b2c569 772 exit = single_exit (loop);
773 at_exit = (e == exit);
fb85abff 774 if (!at_exit && e != loop_preheader_edge (loop))
775 return NULL;
776
c71d3c24 777 if (scalar_loop == NULL)
778 scalar_loop = loop;
779
780 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
781 get_loop_body_with_size (scalar_loop, bbs, scalar_loop->num_nodes);
fb85abff 782
783 /* Check whether duplication is possible. */
c71d3c24 784 if (!can_copy_bbs_p (bbs, scalar_loop->num_nodes))
fb85abff 785 {
786 free (bbs);
787 return NULL;
788 }
789
790 /* Generate new loop structure. */
c71d3c24 791 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
792 duplicate_subloops (scalar_loop, new_loop);
fb85abff 793
c9b2c569 794 exit_dest = exit->dest;
48e1416a 795 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
796 exit_dest) == loop->header ?
fb85abff 797 true : false);
798
c9b2c569 799 /* Also copy the pre-header, this avoids jumping through hoops to
800 duplicate the loop entry PHI arguments. Create an empty
801 pre-header unconditionally for this. */
c71d3c24 802 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
c9b2c569 803 edge entry_e = single_pred_edge (preheader);
c71d3c24 804 bbs[scalar_loop->num_nodes] = preheader;
805 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
fb85abff 806
c71d3c24 807 exit = single_exit (scalar_loop);
808 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
fb85abff 809 &exit, 1, &new_exit, NULL,
d99f53b2 810 e->src, true);
c71d3c24 811 exit = single_exit (loop);
812 basic_block new_preheader = new_bbs[scalar_loop->num_nodes];
fb85abff 813
c71d3c24 814 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
fb85abff 815
c71d3c24 816 if (scalar_loop != loop)
817 {
818 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
819 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
820 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
821 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
822 header) to have current_def set, so copy them over. */
823 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
824 exit);
825 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
826 0),
827 EDGE_SUCC (loop->latch, 0));
828 }
48e1416a 829
fb85abff 830 if (at_exit) /* Add the loop copy at exit. */
831 {
c71d3c24 832 if (scalar_loop != loop)
833 {
834 gimple_stmt_iterator gsi;
835 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
836
837 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
838 gsi_next (&gsi))
839 {
840 gimple phi = gsi_stmt (gsi);
841 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
842 location_t orig_locus
843 = gimple_phi_arg_location_from_edge (phi, e);
844
845 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
846 }
847 }
c9b2c569 848 redirect_edge_and_branch_force (e, new_preheader);
849 flush_pending_stmts (e);
850 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
fb85abff 851 if (was_imm_dom)
c71d3c24 852 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
c9b2c569 853
854 /* And remove the non-necessary forwarder again. Keep the other
855 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 856 redirect_edge_pred (single_succ_edge (preheader),
857 single_pred (preheader));
c9b2c569 858 delete_basic_block (preheader);
c71d3c24 859 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
860 loop_preheader_edge (scalar_loop)->src);
fb85abff 861 }
862 else /* Add the copy at entry. */
863 {
c71d3c24 864 if (scalar_loop != loop)
865 {
866 /* Remove the non-necessary forwarder of scalar_loop again. */
867 redirect_edge_pred (single_succ_edge (preheader),
868 single_pred (preheader));
869 delete_basic_block (preheader);
870 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
871 loop_preheader_edge (scalar_loop)->src);
872 preheader = split_edge (loop_preheader_edge (loop));
873 entry_e = single_pred_edge (preheader);
874 }
875
c9b2c569 876 redirect_edge_and_branch_force (entry_e, new_preheader);
877 flush_pending_stmts (entry_e);
878 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
879
880 redirect_edge_and_branch_force (new_exit, preheader);
881 flush_pending_stmts (new_exit);
882 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
883
884 /* And remove the non-necessary forwarder again. Keep the other
885 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 886 redirect_edge_pred (single_succ_edge (new_preheader),
887 single_pred (new_preheader));
c9b2c569 888 delete_basic_block (new_preheader);
889 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
890 loop_preheader_edge (new_loop)->src);
fb85abff 891 }
892
c71d3c24 893 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
c9b2c569 894 rename_variables_in_bb (new_bbs[i]);
895
c71d3c24 896 if (scalar_loop != loop)
897 {
898 /* Update new_loop->header PHIs, so that on the preheader
899 edge they are the ones from loop rather than scalar_loop. */
900 gimple_stmt_iterator gsi_orig, gsi_new;
901 edge orig_e = loop_preheader_edge (loop);
902 edge new_e = loop_preheader_edge (new_loop);
903
904 for (gsi_orig = gsi_start_phis (loop->header),
905 gsi_new = gsi_start_phis (new_loop->header);
906 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
907 gsi_next (&gsi_orig), gsi_next (&gsi_new))
908 {
909 gimple orig_phi = gsi_stmt (gsi_orig);
910 gimple new_phi = gsi_stmt (gsi_new);
911 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
912 location_t orig_locus
913 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
914
915 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
916 }
917 }
918
fb85abff 919 free (new_bbs);
920 free (bbs);
921
c9b2c569 922#ifdef ENABLE_CHECKING
923 verify_dominators (CDI_DOMINATORS);
924#endif
925
fb85abff 926 return new_loop;
927}
928
929
930/* Given the condition statement COND, put it as the last statement
931 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
48e1416a 932 Assumes that this is the single exit of the guarded loop.
23a3430d 933 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
fb85abff 934
935static edge
23a3430d 936slpeel_add_loop_guard (basic_block guard_bb, tree cond,
937 gimple_seq cond_expr_stmt_list,
877584e4 938 basic_block exit_bb, basic_block dom_bb,
939 int probability)
fb85abff 940{
941 gimple_stmt_iterator gsi;
942 edge new_e, enter_e;
943 gimple cond_stmt;
944 gimple_seq gimplify_stmt_list = NULL;
945
946 enter_e = EDGE_SUCC (guard_bb, 0);
947 enter_e->flags &= ~EDGE_FALLTHRU;
948 enter_e->flags |= EDGE_FALSE_VALUE;
949 gsi = gsi_last_bb (guard_bb);
950
87ae3989 951 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
952 NULL_TREE);
23a3430d 953 if (gimplify_stmt_list)
954 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
87ae3989 955 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
23a3430d 956 if (cond_expr_stmt_list)
957 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
fb85abff 958
959 gsi = gsi_last_bb (guard_bb);
960 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
961
962 /* Add new edge to connect guard block to the merge/loop-exit block. */
963 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
877584e4 964
965 new_e->count = guard_bb->count;
966 new_e->probability = probability;
967 new_e->count = apply_probability (enter_e->count, probability);
968 enter_e->count -= new_e->count;
969 enter_e->probability = inverse_probability (probability);
fb85abff 970 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
971 return new_e;
972}
973
974
975/* This function verifies that the following restrictions apply to LOOP:
976 (1) it is innermost
977 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
978 (3) it is single entry, single exit
979 (4) its exit condition is the last stmt in the header
980 (5) E is the entry/exit edge of LOOP.
981 */
982
983bool
984slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
985{
986 edge exit_e = single_exit (loop);
987 edge entry_e = loop_preheader_edge (loop);
988 gimple orig_cond = get_loop_exit_condition (loop);
989 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
990
fb85abff 991 if (loop->inner
992 /* All loops have an outer scope; the only case loop->outer is NULL is for
993 the function itself. */
994 || !loop_outer (loop)
995 || loop->num_nodes != 2
996 || !empty_block_p (loop->latch)
997 || !single_exit (loop)
998 /* Verify that new loop exit condition can be trivially modified. */
999 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1000 || (e != exit_e && e != entry_e))
1001 return false;
1002
1003 return true;
1004}
1005
1006#ifdef ENABLE_CHECKING
1007static void
1008slpeel_verify_cfg_after_peeling (struct loop *first_loop,
1009 struct loop *second_loop)
1010{
1011 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
1012 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
1013 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1014
1015 /* A guard that controls whether the second_loop is to be executed or skipped
1016 is placed in first_loop->exit. first_loop->exit therefore has two
1017 successors - one is the preheader of second_loop, and the other is a bb
1018 after second_loop.
1019 */
1020 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
48e1416a 1021
fb85abff 1022 /* 1. Verify that one of the successors of first_loop->exit is the preheader
1023 of second_loop. */
48e1416a 1024
fb85abff 1025 /* The preheader of new_loop is expected to have two predecessors:
1026 first_loop->exit and the block that precedes first_loop. */
1027
48e1416a 1028 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
fb85abff 1029 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1030 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1031 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1032 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
48e1416a 1033
fb85abff 1034 /* Verify that the other successor of first_loop->exit is after the
1035 second_loop. */
1036 /* TODO */
1037}
1038#endif
1039
1040/* If the run time cost model check determines that vectorization is
1041 not profitable and hence scalar loop should be generated then set
1042 FIRST_NITERS to prologue peeled iterations. This will allow all the
1043 iterations to be executed in the prologue peeled scalar loop. */
1044
1045static void
1046set_prologue_iterations (basic_block bb_before_first_loop,
2f630015 1047 tree *first_niters,
fb85abff 1048 struct loop *loop,
877584e4 1049 unsigned int th,
1050 int probability)
fb85abff 1051{
1052 edge e;
1053 basic_block cond_bb, then_bb;
1054 tree var, prologue_after_cost_adjust_name;
1055 gimple_stmt_iterator gsi;
1056 gimple newphi;
1057 edge e_true, e_false, e_fallthru;
1058 gimple cond_stmt;
87ae3989 1059 gimple_seq stmts = NULL;
fb85abff 1060 tree cost_pre_condition = NULL_TREE;
48e1416a 1061 tree scalar_loop_iters =
fb85abff 1062 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
1063
1064 e = single_pred_edge (bb_before_first_loop);
9af5ce0c 1065 cond_bb = split_edge (e);
fb85abff 1066
1067 e = single_pred_edge (bb_before_first_loop);
9af5ce0c 1068 then_bb = split_edge (e);
fb85abff 1069 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1070
1071 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1072 EDGE_FALSE_VALUE);
1073 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1074
1075 e_true = EDGE_PRED (then_bb, 0);
1076 e_true->flags &= ~EDGE_FALLTHRU;
1077 e_true->flags |= EDGE_TRUE_VALUE;
1078
877584e4 1079 e_true->probability = probability;
1080 e_false->probability = inverse_probability (probability);
1081 e_true->count = apply_probability (cond_bb->count, probability);
1082 e_false->count = cond_bb->count - e_true->count;
1083 then_bb->frequency = EDGE_FREQUENCY (e_true);
1084 then_bb->count = e_true->count;
1085
fb85abff 1086 e_fallthru = EDGE_SUCC (then_bb, 0);
877584e4 1087 e_fallthru->count = then_bb->count;
fb85abff 1088
87ae3989 1089 gsi = gsi_last_bb (cond_bb);
fb85abff 1090 cost_pre_condition =
48e1416a 1091 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
fb85abff 1092 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1093 cost_pre_condition =
87ae3989 1094 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
1095 NULL_TREE, false, GSI_CONTINUE_LINKING);
1096 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
1097 NULL_TREE, NULL_TREE);
fb85abff 1098 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
48e1416a 1099
fb85abff 1100 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1101 "prologue_after_cost_adjust");
48e1416a 1102 prologue_after_cost_adjust_name =
fb85abff 1103 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
1104
1105 gsi = gsi_last_bb (then_bb);
1106 if (stmts)
1107 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
1108
1109 newphi = create_phi_node (var, bb_before_first_loop);
48e1416a 1110 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
60d535d2 1111 UNKNOWN_LOCATION);
1112 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
fb85abff 1113
2f630015 1114 *first_niters = PHI_RESULT (newphi);
fb85abff 1115}
1116
fb85abff 1117/* Function slpeel_tree_peel_loop_to_edge.
1118
1119 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1120 that is placed on the entry (exit) edge E of LOOP. After this transformation
1121 we have two loops one after the other - first-loop iterates FIRST_NITERS
1122 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
48e1416a 1123 If the cost model indicates that it is profitable to emit a scalar
fb85abff 1124 loop instead of the vector one, then the prolog (epilog) loop will iterate
1125 for the entire unchanged scalar iterations of the loop.
1126
1127 Input:
1128 - LOOP: the loop to be peeled.
c71d3c24 1129 - SCALAR_LOOP: if non-NULL, the alternate loop from which basic blocks
1130 should be copied.
fb85abff 1131 - E: the exit or entry edge of LOOP.
1132 If it is the entry edge, we peel the first iterations of LOOP. In this
1133 case first-loop is LOOP, and second-loop is the newly created loop.
1134 If it is the exit edge, we peel the last iterations of LOOP. In this
1135 case, first-loop is the newly created loop, and second-loop is LOOP.
1136 - NITERS: the number of iterations that LOOP iterates.
1137 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1138 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1139 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1140 is false, the caller of this function may want to take care of this
1141 (this can be useful if we don't want new stmts added to first-loop).
1142 - TH: cost model profitability threshold of iterations for vectorization.
1143 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1144 during versioning and hence needs to occur during
48e1416a 1145 prologue generation or whether cost model check
fb85abff 1146 has not occurred during prologue generation and hence
1147 needs to occur during epilogue generation.
877584e4 1148 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1149 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
48e1416a 1150
fb85abff 1151
1152 Output:
1153 The function returns a pointer to the new loop-copy, or NULL if it failed
1154 to perform the transformation.
1155
1156 The function generates two if-then-else guards: one before the first loop,
1157 and the other before the second loop:
1158 The first guard is:
1159 if (FIRST_NITERS == 0) then skip the first loop,
1160 and go directly to the second loop.
1161 The second guard is:
1162 if (FIRST_NITERS == NITERS) then skip the second loop.
1163
23a3430d 1164 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1165 then the generated condition is combined with COND_EXPR and the
1166 statements in COND_EXPR_STMT_LIST are emitted together with it.
1167
fb85abff 1168 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1169 FORNOW the resulting code will not be in loop-closed-ssa form.
1170*/
1171
c71d3c24 1172static struct loop *
1173slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loop *scalar_loop,
2f630015 1174 edge e, tree *first_niters,
fb85abff 1175 tree niters, bool update_first_loop_count,
23a3430d 1176 unsigned int th, bool check_profitability,
877584e4 1177 tree cond_expr, gimple_seq cond_expr_stmt_list,
1178 int bound1, int bound2)
fb85abff 1179{
1180 struct loop *new_loop = NULL, *first_loop, *second_loop;
1181 edge skip_e;
1182 tree pre_condition = NULL_TREE;
fb85abff 1183 basic_block bb_before_second_loop, bb_after_second_loop;
1184 basic_block bb_before_first_loop;
1185 basic_block bb_between_loops;
1186 basic_block new_exit_bb;
38091110 1187 gimple_stmt_iterator gsi;
fb85abff 1188 edge exit_e = single_exit (loop);
36f39b2e 1189 source_location loop_loc;
877584e4 1190 /* There are many aspects to how likely the first loop is going to be executed.
1191 Without histogram we can't really do good job. Simply set it to
1192 2/3, so the first loop is not reordered to the end of function and
1193 the hot path through stays short. */
1194 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1195 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1196 int probability_of_second_loop;
48e1416a 1197
fb85abff 1198 if (!slpeel_can_duplicate_loop_p (loop, e))
1199 return NULL;
48e1416a 1200
f68f7ce8 1201 /* We might have a queued need to update virtual SSA form. As we
1202 delete the update SSA machinery below after doing a regular
1203 incremental SSA update during loop copying make sure we don't
1204 lose that fact.
1205 ??? Needing to update virtual SSA form by renaming is unfortunate
1206 but not all of the vectorizer code inserting new loads / stores
1207 properly assigns virtual operands to those statements. */
1208 update_ssa (TODO_update_ssa_only_virtuals);
1209
38091110 1210 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1211 in the exit bb and rename all the uses after the loop. This simplifies
1212 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1213 (but normally loop closed SSA form doesn't require virtual PHIs to be
1214 in the same form). Doing this early simplifies the checking what
1215 uses should be renamed. */
1216 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1217 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1218 {
1219 gimple phi = gsi_stmt (gsi);
1220 for (gsi = gsi_start_phis (exit_e->dest);
1221 !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1222 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1223 break;
1224 if (gsi_end_p (gsi))
1225 {
874117c8 1226 tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1227 gimple new_phi = create_phi_node (new_vop, exit_e->dest);
38091110 1228 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1229 imm_use_iterator imm_iter;
1230 gimple stmt;
38091110 1231 use_operand_p use_p;
1232
60d535d2 1233 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
38091110 1234 gimple_phi_set_result (new_phi, new_vop);
1235 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1236 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1237 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1238 SET_USE (use_p, new_vop);
1239 }
1240 break;
1241 }
fb85abff 1242
1243 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1244 Resulting CFG would be:
1245
1246 first_loop:
1247 do {
1248 } while ...
1249
1250 second_loop:
1251 do {
1252 } while ...
1253
1254 orig_exit_bb:
1255 */
48e1416a 1256
c71d3c24 1257 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop,
1258 e)))
fb85abff 1259 {
1260 loop_loc = find_loop_location (loop);
7bd765d4 1261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1262 "tree_duplicate_loop_to_edge_cfg failed.\n");
fb85abff 1263 return NULL;
1264 }
48e1416a 1265
b123eaab 1266 if (MAY_HAVE_DEBUG_STMTS)
1267 {
f1f41a6c 1268 gcc_assert (!adjust_vec.exists ());
d70aebca 1269 adjust_vec.create (32);
b123eaab 1270 }
1271
fb85abff 1272 if (e == exit_e)
1273 {
1274 /* NEW_LOOP was placed after LOOP. */
1275 first_loop = loop;
1276 second_loop = new_loop;
1277 }
1278 else
1279 {
1280 /* NEW_LOOP was placed before LOOP. */
1281 first_loop = new_loop;
1282 second_loop = loop;
1283 }
1284
fb85abff 1285 /* 2. Add the guard code in one of the following ways:
1286
1287 2.a Add the guard that controls whether the first loop is executed.
1288 This occurs when this function is invoked for prologue or epilogue
1289 generation and when the cost model check can be done at compile time.
1290
1291 Resulting CFG would be:
1292
1293 bb_before_first_loop:
1294 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1295 GOTO first-loop
1296
1297 first_loop:
1298 do {
1299 } while ...
1300
1301 bb_before_second_loop:
1302
1303 second_loop:
1304 do {
1305 } while ...
1306
1307 orig_exit_bb:
1308
1309 2.b Add the cost model check that allows the prologue
1310 to iterate for the entire unchanged scalar
1311 iterations of the loop in the event that the cost
1312 model indicates that the scalar loop is more
1313 profitable than the vector one. This occurs when
1314 this function is invoked for prologue generation
1315 and the cost model check needs to be done at run
1316 time.
1317
1318 Resulting CFG after prologue peeling would be:
1319
1320 if (scalar_loop_iterations <= th)
1321 FIRST_NITERS = scalar_loop_iterations
1322
1323 bb_before_first_loop:
1324 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1325 GOTO first-loop
1326
1327 first_loop:
1328 do {
1329 } while ...
1330
1331 bb_before_second_loop:
1332
1333 second_loop:
1334 do {
1335 } while ...
1336
1337 orig_exit_bb:
1338
1339 2.c Add the cost model check that allows the epilogue
1340 to iterate for the entire unchanged scalar
1341 iterations of the loop in the event that the cost
1342 model indicates that the scalar loop is more
1343 profitable than the vector one. This occurs when
1344 this function is invoked for epilogue generation
1345 and the cost model check needs to be done at run
23a3430d 1346 time. This check is combined with any pre-existing
1347 check in COND_EXPR to avoid versioning.
fb85abff 1348
1349 Resulting CFG after prologue peeling would be:
1350
1351 bb_before_first_loop:
1352 if ((scalar_loop_iterations <= th)
1353 ||
1354 FIRST_NITERS == 0) GOTO bb_before_second_loop
1355 GOTO first-loop
1356
1357 first_loop:
1358 do {
1359 } while ...
1360
1361 bb_before_second_loop:
1362
1363 second_loop:
1364 do {
1365 } while ...
1366
1367 orig_exit_bb:
1368 */
1369
1370 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
c9b2c569 1371 /* Loop copying insterted a forwarder block for us here. */
1372 bb_before_second_loop = single_exit (first_loop)->dest;
fb85abff 1373
877584e4 1374 probability_of_second_loop = (inverse_probability (first_guard_probability)
1375 + combine_probabilities (second_guard_probability,
1376 first_guard_probability));
1377 /* Theoretically preheader edge of first loop and exit edge should have
1378 same frequencies. Loop exit probablities are however easy to get wrong.
1379 It is safer to copy value from original loop entry. */
1380 bb_before_second_loop->frequency
f9d4b7f4 1381 = combine_probabilities (bb_before_first_loop->frequency,
1382 probability_of_second_loop);
877584e4 1383 bb_before_second_loop->count
1384 = apply_probability (bb_before_first_loop->count,
1385 probability_of_second_loop);
1386 single_succ_edge (bb_before_second_loop)->count
1387 = bb_before_second_loop->count;
1388
fb85abff 1389 /* Epilogue peeling. */
1390 if (!update_first_loop_count)
1391 {
796f6cba 1392 loop_vec_info loop_vinfo = loop_vec_info_for_loop (loop);
1393 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
1394 unsigned limit = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
1395 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1396 limit = limit + 1;
1397 if (check_profitability
1398 && th > limit)
1399 limit = th;
fb85abff 1400 pre_condition =
796f6cba 1401 fold_build2 (LT_EXPR, boolean_type_node, scalar_loop_iters,
1402 build_int_cst (TREE_TYPE (scalar_loop_iters), limit));
23a3430d 1403 if (cond_expr)
1404 {
1405 pre_condition =
1406 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1407 pre_condition,
1408 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1409 cond_expr));
1410 }
fb85abff 1411 }
1412
48e1416a 1413 /* Prologue peeling. */
fb85abff 1414 else
1415 {
1416 if (check_profitability)
1417 set_prologue_iterations (bb_before_first_loop, first_niters,
877584e4 1418 loop, th, first_guard_probability);
fb85abff 1419
1420 pre_condition =
2f630015 1421 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1422 build_int_cst (TREE_TYPE (*first_niters), 0));
fb85abff 1423 }
1424
1425 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
23a3430d 1426 cond_expr_stmt_list,
877584e4 1427 bb_before_second_loop, bb_before_first_loop,
1428 inverse_probability (first_guard_probability));
1429 scale_loop_profile (first_loop, first_guard_probability,
1430 check_profitability && (int)th > bound1 ? th : bound1);
fb85abff 1431 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1432 first_loop == new_loop,
1e8cf080 1433 &new_exit_bb);
fb85abff 1434
1435
1436 /* 3. Add the guard that controls whether the second loop is executed.
1437 Resulting CFG would be:
1438
1439 bb_before_first_loop:
1440 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1441 GOTO first-loop
1442
1443 first_loop:
1444 do {
1445 } while ...
1446
1447 bb_between_loops:
1448 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1449 GOTO bb_before_second_loop
1450
1451 bb_before_second_loop:
1452
1453 second_loop:
1454 do {
1455 } while ...
1456
1457 bb_after_second_loop:
1458
1459 orig_exit_bb:
1460 */
1461
1462 bb_between_loops = new_exit_bb;
1463 bb_after_second_loop = split_edge (single_exit (second_loop));
1464
48e1416a 1465 pre_condition =
2f630015 1466 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
23a3430d 1467 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
877584e4 1468 bb_after_second_loop, bb_before_first_loop,
1469 inverse_probability (second_guard_probability));
1470 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
fb85abff 1471 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1472 second_loop == new_loop, &new_exit_bb);
1473
1474 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1475 */
1476 if (update_first_loop_count)
2f630015 1477 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
fb85abff 1478
5d206f11 1479 delete_update_ssa ();
1480
b123eaab 1481 adjust_vec_debug_stmts ();
1482
fb85abff 1483 return new_loop;
1484}
1485
1486/* Function vect_get_loop_location.
1487
1488 Extract the location of the loop in the source code.
1489 If the loop is not well formed for vectorization, an estimated
1490 location is calculated.
1491 Return the loop location if succeed and NULL if not. */
1492
36f39b2e 1493source_location
fb85abff 1494find_loop_location (struct loop *loop)
1495{
1496 gimple stmt = NULL;
1497 basic_block bb;
1498 gimple_stmt_iterator si;
1499
1500 if (!loop)
36f39b2e 1501 return UNKNOWN_LOCATION;
fb85abff 1502
1503 stmt = get_loop_exit_condition (loop);
1504
b2d978a6 1505 if (stmt
1506 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
fb85abff 1507 return gimple_location (stmt);
1508
1509 /* If we got here the loop is probably not "well formed",
1510 try to estimate the loop location */
1511
1512 if (!loop->header)
36f39b2e 1513 return UNKNOWN_LOCATION;
fb85abff 1514
1515 bb = loop->header;
1516
1517 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1518 {
1519 stmt = gsi_stmt (si);
b2d978a6 1520 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
fb85abff 1521 return gimple_location (stmt);
1522 }
1523
36f39b2e 1524 return UNKNOWN_LOCATION;
fb85abff 1525}
1526
1527
fb85abff 1528/* Function vect_can_advance_ivs_p
1529
48e1416a 1530 In case the number of iterations that LOOP iterates is unknown at compile
1531 time, an epilog loop will be generated, and the loop induction variables
1532 (IVs) will be "advanced" to the value they are supposed to take just before
fb85abff 1533 the epilog loop. Here we check that the access function of the loop IVs
1534 and the expression that represents the loop bound are simple enough.
1535 These restrictions will be relaxed in the future. */
1536
48e1416a 1537bool
fb85abff 1538vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1539{
1540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1541 basic_block bb = loop->header;
1542 gimple phi;
1543 gimple_stmt_iterator gsi;
1544
1545 /* Analyze phi functions of the loop header. */
1546
6d8fb6cf 1547 if (dump_enabled_p ())
78bb46f5 1548 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
fb85abff 1549 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1550 {
fb85abff 1551 tree evolution_part;
1552
1553 phi = gsi_stmt (gsi);
6d8fb6cf 1554 if (dump_enabled_p ())
fb85abff 1555 {
7bd765d4 1556 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
78bb46f5 1558 dump_printf (MSG_NOTE, "\n");
fb85abff 1559 }
1560
1561 /* Skip virtual phi's. The data dependences that are associated with
1562 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1563
7c782c9b 1564 if (virtual_operand_p (PHI_RESULT (phi)))
fb85abff 1565 {
6d8fb6cf 1566 if (dump_enabled_p ())
7bd765d4 1567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1568 "virtual phi. skip.\n");
fb85abff 1569 continue;
1570 }
1571
1572 /* Skip reduction phis. */
1573
1574 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1575 {
6d8fb6cf 1576 if (dump_enabled_p ())
7bd765d4 1577 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1578 "reduc phi. skip.\n");
fb85abff 1579 continue;
1580 }
1581
1582 /* Analyze the evolution function. */
1583
2cd0995e 1584 evolution_part
1585 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
fb85abff 1586 if (evolution_part == NULL_TREE)
1587 {
6d8fb6cf 1588 if (dump_enabled_p ())
2cd0995e 1589 dump_printf (MSG_MISSED_OPTIMIZATION,
78bb46f5 1590 "No access function or evolution.\n");
fb85abff 1591 return false;
1592 }
48e1416a 1593
1594 /* FORNOW: We do not transform initial conditions of IVs
fb85abff 1595 which evolution functions are a polynomial of degree >= 2. */
1596
1597 if (tree_is_chrec (evolution_part))
48e1416a 1598 return false;
fb85abff 1599 }
1600
1601 return true;
1602}
1603
1604
1605/* Function vect_update_ivs_after_vectorizer.
1606
1607 "Advance" the induction variables of LOOP to the value they should take
1608 after the execution of LOOP. This is currently necessary because the
1609 vectorizer does not handle induction variables that are used after the
1610 loop. Such a situation occurs when the last iterations of LOOP are
1611 peeled, because:
1612 1. We introduced new uses after LOOP for IVs that were not originally used
1613 after LOOP: the IVs of LOOP are now used by an epilog loop.
1614 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1615 times, whereas the loop IVs should be bumped N times.
1616
1617 Input:
1618 - LOOP - a loop that is going to be vectorized. The last few iterations
1619 of LOOP were peeled.
1620 - NITERS - the number of iterations that LOOP executes (before it is
1621 vectorized). i.e, the number of times the ivs should be bumped.
1622 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1623 coming out from LOOP on which there are uses of the LOOP ivs
1624 (this is the path from LOOP->exit to epilog_loop->preheader).
1625
1626 The new definitions of the ivs are placed in LOOP->exit.
1627 The phi args associated with the edge UPDATE_E in the bb
1628 UPDATE_E->dest are updated accordingly.
1629
1630 Assumption 1: Like the rest of the vectorizer, this function assumes
1631 a single loop exit that has a single predecessor.
1632
1633 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1634 organized in the same order.
1635
1636 Assumption 3: The access function of the ivs is simple enough (see
1637 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1638
1639 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
48e1416a 1640 coming out of LOOP on which the ivs of LOOP are used (this is the path
fb85abff 1641 that leads to the epilog loop; other paths skip the epilog loop). This
1642 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1643 needs to have its phis updated.
1644 */
1645
1646static void
48e1416a 1647vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
fb85abff 1648 edge update_e)
1649{
1650 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1651 basic_block exit_bb = single_exit (loop)->dest;
1652 gimple phi, phi1;
1653 gimple_stmt_iterator gsi, gsi1;
1654 basic_block update_bb = update_e->dest;
1655
f3d6d99e 1656 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
fb85abff 1657
1658 /* Make sure there exists a single-predecessor exit bb: */
1659 gcc_assert (single_pred_p (exit_bb));
1660
1661 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1662 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1663 gsi_next (&gsi), gsi_next (&gsi1))
1664 {
fb85abff 1665 tree init_expr;
1efcacec 1666 tree step_expr, off;
1667 tree type;
fb85abff 1668 tree var, ni, ni_name;
1669 gimple_stmt_iterator last_gsi;
86faead7 1670 stmt_vec_info stmt_info;
fb85abff 1671
1672 phi = gsi_stmt (gsi);
1673 phi1 = gsi_stmt (gsi1);
6d8fb6cf 1674 if (dump_enabled_p ())
fb85abff 1675 {
7bd765d4 1676 dump_printf_loc (MSG_NOTE, vect_location,
1677 "vect_update_ivs_after_vectorizer: phi: ");
1678 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
78bb46f5 1679 dump_printf (MSG_NOTE, "\n");
fb85abff 1680 }
1681
1682 /* Skip virtual phi's. */
7c782c9b 1683 if (virtual_operand_p (PHI_RESULT (phi)))
fb85abff 1684 {
6d8fb6cf 1685 if (dump_enabled_p ())
7bd765d4 1686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1687 "virtual phi. skip.\n");
fb85abff 1688 continue;
1689 }
1690
1691 /* Skip reduction phis. */
86faead7 1692 stmt_info = vinfo_for_stmt (phi);
1693 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
48e1416a 1694 {
6d8fb6cf 1695 if (dump_enabled_p ())
7bd765d4 1696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1697 "reduc phi. skip.\n");
fb85abff 1698 continue;
48e1416a 1699 }
fb85abff 1700
86faead7 1701 type = TREE_TYPE (gimple_phi_result (phi));
1702 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1703 step_expr = unshare_expr (step_expr);
48e1416a 1704
fb85abff 1705 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1706 of degree >= 2 or exponential. */
86faead7 1707 gcc_assert (!tree_is_chrec (step_expr));
fb85abff 1708
86faead7 1709 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fb85abff 1710
1efcacec 1711 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1712 fold_convert (TREE_TYPE (step_expr), niters),
1713 step_expr);
86faead7 1714 if (POINTER_TYPE_P (type))
2cc66f2a 1715 ni = fold_build_pointer_plus (init_expr, off);
fb85abff 1716 else
86faead7 1717 ni = fold_build2 (PLUS_EXPR, type,
1718 init_expr, fold_convert (type, off));
fb85abff 1719
86faead7 1720 var = create_tmp_var (type, "tmp");
fb85abff 1721
1722 last_gsi = gsi_last_bb (exit_bb);
1723 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1724 true, GSI_SAME_STMT);
48e1416a 1725
fb85abff 1726 /* Fix phi expressions in the successor bb. */
b123eaab 1727 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
fb85abff 1728 }
1729}
1730
fb85abff 1731/* Function vect_do_peeling_for_loop_bound
1732
1733 Peel the last iterations of the loop represented by LOOP_VINFO.
48e1416a 1734 The peeled iterations form a new epilog loop. Given that the loop now
fb85abff 1735 iterates NITERS times, the new epilog loop iterates
1736 NITERS % VECTORIZATION_FACTOR times.
48e1416a 1737
1738 The original loop will later be made to iterate
23a3430d 1739 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1740
1741 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1742 test. */
fb85abff 1743
48e1416a 1744void
782fd1d1 1745vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1746 tree ni_name, tree ratio_mult_vf_name,
e7430948 1747 unsigned int th, bool check_profitability)
fb85abff 1748{
fb85abff 1749 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
c71d3c24 1750 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
fb85abff 1751 struct loop *new_loop;
1752 edge update_e;
1753 basic_block preheader;
1754 int loop_num;
2afdcbed 1755 int max_iter;
e7430948 1756 tree cond_expr = NULL_TREE;
1757 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 1758
6d8fb6cf 1759 if (dump_enabled_p ())
b055bc88 1760 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1761 "=== vect_do_peeling_for_loop_bound ===\n");
fb85abff 1762
1763 initialize_original_copy_tables ();
1764
48e1416a 1765 loop_num = loop->num;
fb85abff 1766
c71d3c24 1767 new_loop
1768 = slpeel_tree_peel_loop_to_edge (loop, scalar_loop, single_exit (loop),
1769 &ratio_mult_vf_name, ni_name, false,
1770 th, check_profitability,
1771 cond_expr, cond_expr_stmt_list,
1772 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
fb85abff 1773 gcc_assert (new_loop);
1774 gcc_assert (loop_num == loop->num);
1775#ifdef ENABLE_CHECKING
1776 slpeel_verify_cfg_after_peeling (loop, new_loop);
1777#endif
1778
1779 /* A guard that controls whether the new_loop is to be executed or skipped
1780 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1781 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1782 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1783 is on the path where the LOOP IVs are used and need to be updated. */
1784
1785 preheader = loop_preheader_edge (new_loop)->src;
1786 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1787 update_e = EDGE_PRED (preheader, 0);
1788 else
1789 update_e = EDGE_PRED (preheader, 1);
1790
48e1416a 1791 /* Update IVs of original loop as if they were advanced
fb85abff 1792 by ratio_mult_vf_name steps. */
48e1416a 1793 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
fb85abff 1794
d3f1934c 1795 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1796 and this means N-2 loopback edge executions.
1797
1798 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1799 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1800 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1801 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1802 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
e7430948 1803 if (check_profitability)
d3f1934c 1804 max_iter = MAX (max_iter, (int) th - 1);
e913b5cd 1805 record_niter_bound (new_loop, max_iter, false, true);
b055bc88 1806 dump_printf (MSG_NOTE,
7bd765d4 1807 "Setting upper bound of nb iterations for epilogue "
1808 "loop to %d\n", max_iter);
15fa0281 1809
fb85abff 1810 /* After peeling we have to reset scalar evolution analyzer. */
1811 scev_reset ();
1812
1813 free_original_copy_tables ();
1814}
1815
1816
1817/* Function vect_gen_niters_for_prolog_loop
1818
1819 Set the number of iterations for the loop represented by LOOP_VINFO
1820 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1821 and the misalignment of DR - the data reference recorded in
48e1416a 1822 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
fb85abff 1823 this loop, the data reference DR will refer to an aligned location.
1824
1825 The following computation is generated:
1826
1827 If the misalignment of DR is known at compile time:
1828 addr_mis = int mis = DR_MISALIGNMENT (dr);
1829 Else, compute address misalignment in bytes:
482a44fa 1830 addr_mis = addr & (vectype_align - 1)
fb85abff 1831
1832 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1833
1834 (elem_size = element type size; an element is the scalar element whose type
1835 is the inner type of the vectype)
1836
1837 When the step of the data-ref in the loop is not 1 (as in interleaved data
1838 and SLP), the number of iterations of the prolog must be divided by the step
1839 (which is equal to the size of interleaved group).
1840
1841 The above formulas assume that VF == number of elements in the vector. This
1842 may not hold when there are multiple-types in the loop.
1843 In this case, for some data-references in the loop the VF does not represent
1844 the number of elements that fit in the vector. Therefore, instead of VF we
1845 use TYPE_VECTOR_SUBPARTS. */
1846
48e1416a 1847static tree
877584e4 1848vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
fb85abff 1849{
1850 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1851 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1852 tree var;
1853 gimple_seq stmts;
1854 tree iters, iters_name;
1855 edge pe;
1856 basic_block new_bb;
1857 gimple dr_stmt = DR_STMT (dr);
1858 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1859 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1860 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1861 tree niters_type = TREE_TYPE (loop_niters);
fb85abff 1862 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1863
48e1416a 1864 pe = loop_preheader_edge (loop);
fb85abff 1865
313a5120 1866 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
fb85abff 1867 {
313a5120 1868 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1869
6d8fb6cf 1870 if (dump_enabled_p ())
b055bc88 1871 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1872 "known peeling = %d.\n", npeel);
fb85abff 1873
0822b158 1874 iters = build_int_cst (niters_type, npeel);
313a5120 1875 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1876 }
1877 else
1878 {
1879 gimple_seq new_stmts = NULL;
f1b8c740 1880 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1881 tree offset = negative
1882 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
48e1416a 1883 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
f1b8c740 1884 &new_stmts, offset, loop);
3cea8318 1885 tree type = unsigned_type_for (TREE_TYPE (start_addr));
482a44fa 1886 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1887 HOST_WIDE_INT elem_size =
1888 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1889 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
fb85abff 1890 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1891 tree nelements_tree = build_int_cst (type, nelements);
1892 tree byte_misalign;
1893 tree elem_misalign;
1894
1895 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1896 gcc_assert (!new_bb);
48e1416a 1897
482a44fa 1898 /* Create: byte_misalign = addr & (vectype_align - 1) */
48e1416a 1899 byte_misalign =
0822b158 1900 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
482a44fa 1901 vectype_align_minus_1);
48e1416a 1902
fb85abff 1903 /* Create: elem_misalign = byte_misalign / element_size */
1904 elem_misalign =
1905 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1906
1907 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
f1b8c740 1908 if (negative)
1909 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1910 else
1911 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
fb85abff 1912 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1913 iters = fold_convert (niters_type, iters);
877584e4 1914 *bound = nelements;
fb85abff 1915 }
1916
1917 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1918 /* If the loop bound is known at compile time we already verified that it is
1919 greater than vf; since the misalignment ('iters') is at most vf, there's
1920 no need to generate the MIN_EXPR in this case. */
1921 if (TREE_CODE (loop_niters) != INTEGER_CST)
1922 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1923
6d8fb6cf 1924 if (dump_enabled_p ())
fb85abff 1925 {
b055bc88 1926 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1927 "niters for prolog loop: ");
b055bc88 1928 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
78bb46f5 1929 dump_printf (MSG_NOTE, "\n");
fb85abff 1930 }
1931
1932 var = create_tmp_var (niters_type, "prolog_loop_niters");
fb85abff 1933 stmts = NULL;
1934 iters_name = force_gimple_operand (iters, &stmts, false, var);
1935
1936 /* Insert stmt on loop preheader edge. */
1937 if (stmts)
1938 {
1939 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1940 gcc_assert (!new_bb);
1941 }
1942
48e1416a 1943 return iters_name;
fb85abff 1944}
1945
1946
1947/* Function vect_update_init_of_dr
1948
1949 NITERS iterations were peeled from LOOP. DR represents a data reference
1950 in LOOP. This function updates the information recorded in DR to
48e1416a 1951 account for the fact that the first NITERS iterations had already been
fb85abff 1952 executed. Specifically, it updates the OFFSET field of DR. */
1953
1954static void
1955vect_update_init_of_dr (struct data_reference *dr, tree niters)
1956{
1957 tree offset = DR_OFFSET (dr);
48e1416a 1958
fb85abff 1959 niters = fold_build2 (MULT_EXPR, sizetype,
1960 fold_convert (sizetype, niters),
1961 fold_convert (sizetype, DR_STEP (dr)));
87f9ffa4 1962 offset = fold_build2 (PLUS_EXPR, sizetype,
1963 fold_convert (sizetype, offset), niters);
fb85abff 1964 DR_OFFSET (dr) = offset;
1965}
1966
1967
1968/* Function vect_update_inits_of_drs
1969
48e1416a 1970 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1971 This function updates the information recorded for the data references in
1972 the loop to account for the fact that the first NITERS iterations had
fb85abff 1973 already been executed. Specifically, it updates the initial_condition of
1974 the access_function of all the data_references in the loop. */
1975
1976static void
1977vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1978{
1979 unsigned int i;
f1f41a6c 1980 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1981 struct data_reference *dr;
7bd765d4 1982
6d8fb6cf 1983 if (dump_enabled_p ())
b055bc88 1984 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1985 "=== vect_update_inits_of_dr ===\n");
fb85abff 1986
f1f41a6c 1987 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1988 vect_update_init_of_dr (dr, niters);
1989}
1990
1991
1992/* Function vect_do_peeling_for_alignment
1993
1994 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1995 'niters' is set to the misalignment of one of the data references in the
1996 loop, thereby forcing it to refer to an aligned location at the beginning
1997 of the execution of this loop. The data reference for which we are
1998 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1999
2000void
782fd1d1 2001vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
e7430948 2002 unsigned int th, bool check_profitability)
fb85abff 2003{
2004 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
c71d3c24 2005 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
782fd1d1 2006 tree niters_of_prolog_loop;
be2e5c02 2007 tree wide_prolog_niters;
fb85abff 2008 struct loop *new_loop;
b6556916 2009 int max_iter;
877584e4 2010 int bound = 0;
fb85abff 2011
6d8fb6cf 2012 if (dump_enabled_p ())
6ee2edad 2013 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2014 "loop peeled for vectorization to enhance"
2015 " alignment\n");
fb85abff 2016
2017 initialize_original_copy_tables ();
2018
782fd1d1 2019 gimple_seq stmts = NULL;
2020 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2f630015 2021 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
877584e4 2022 ni_name,
2023 &bound);
fb85abff 2024
fb85abff 2025 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2026 new_loop =
c71d3c24 2027 slpeel_tree_peel_loop_to_edge (loop, scalar_loop,
2028 loop_preheader_edge (loop),
2f630015 2029 &niters_of_prolog_loop, ni_name, true,
877584e4 2030 th, check_profitability, NULL_TREE, NULL,
c71d3c24 2031 bound, 0);
fb85abff 2032
2033 gcc_assert (new_loop);
2034#ifdef ENABLE_CHECKING
2035 slpeel_verify_cfg_after_peeling (new_loop, loop);
2036#endif
d3f1934c 2037 /* For vectorization factor N, we need to copy at most N-1 values
2038 for alignment and this means N-2 loopback edge executions. */
2039 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
e7430948 2040 if (check_profitability)
d3f1934c 2041 max_iter = MAX (max_iter, (int) th - 1);
e913b5cd 2042 record_niter_bound (new_loop, max_iter, false, true);
b055bc88 2043 dump_printf (MSG_NOTE,
7bd765d4 2044 "Setting upper bound of nb iterations for prologue "
2045 "loop to %d\n", max_iter);
fb85abff 2046
2047 /* Update number of times loop executes. */
fb85abff 2048 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
313a5120 2049 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
796f6cba 2050 LOOP_VINFO_NITERSM1 (loop_vinfo) = fold_build2 (MINUS_EXPR,
2051 TREE_TYPE (ni_name),
2052 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_of_prolog_loop);
fb85abff 2053
2f630015 2054 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
2055 wide_prolog_niters = niters_of_prolog_loop;
2056 else
2057 {
2058 gimple_seq seq = NULL;
2059 edge pe = loop_preheader_edge (loop);
2060 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
2061 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2f630015 2062 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
2063 var);
2064 if (seq)
2065 {
2066 /* Insert stmt on loop preheader edge. */
2067 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
2068 gcc_assert (!new_bb);
2069 }
2070 }
2071
fb85abff 2072 /* Update the init conditions of the access functions of all data refs. */
be2e5c02 2073 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
fb85abff 2074
2075 /* After peeling we have to reset scalar evolution analyzer. */
2076 scev_reset ();
2077
2078 free_original_copy_tables ();
2079}
2080
2081
2082/* Function vect_create_cond_for_align_checks.
2083
2084 Create a conditional expression that represents the alignment checks for
2085 all of data references (array element references) whose alignment must be
2086 checked at runtime.
2087
2088 Input:
2089 COND_EXPR - input conditional expression. New conditions will be chained
2090 with logical AND operation.
2091 LOOP_VINFO - two fields of the loop information are used.
2092 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2093 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2094
2095 Output:
2096 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2097 expression.
2098 The returned value is the conditional expression to be used in the if
2099 statement that controls which version of the loop gets executed at runtime.
2100
2101 The algorithm makes two assumptions:
2102 1) The number of bytes "n" in a vector is a power of 2.
2103 2) An address "a" is aligned if a%n is zero and that this
2104 test can be done as a&(n-1) == 0. For example, for 16
2105 byte vectors the test is a&0xf == 0. */
2106
2107static void
2108vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2109 tree *cond_expr,
2110 gimple_seq *cond_expr_stmt_list)
2111{
2112 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f1f41a6c 2113 vec<gimple> may_misalign_stmts
fb85abff 2114 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2115 gimple ref_stmt;
2116 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2117 tree mask_cst;
2118 unsigned int i;
fb85abff 2119 tree int_ptrsize_type;
2120 char tmp_name[20];
2121 tree or_tmp_name = NULL_TREE;
03d37e4e 2122 tree and_tmp_name;
fb85abff 2123 gimple and_stmt;
2124 tree ptrsize_zero;
2125 tree part_cond_expr;
2126
2127 /* Check that mask is one less than a power of 2, i.e., mask is
2128 all zeros followed by all ones. */
2129 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2130
3cea8318 2131 int_ptrsize_type = signed_type_for (ptr_type_node);
fb85abff 2132
2133 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2134 of the first vector of the i'th data reference. */
2135
f1f41a6c 2136 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
fb85abff 2137 {
2138 gimple_seq new_stmt_list = NULL;
2139 tree addr_base;
03d37e4e 2140 tree addr_tmp_name;
2141 tree new_or_tmp_name;
fb85abff 2142 gimple addr_stmt, or_stmt;
f1b8c740 2143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2144 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2145 bool negative = tree_int_cst_compare
2146 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2147 tree offset = negative
2148 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
fb85abff 2149
2150 /* create: addr_tmp = (int)(address_of_first_vector) */
2151 addr_base =
2152 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
f1b8c740 2153 offset, loop);
fb85abff 2154 if (new_stmt_list != NULL)
2155 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2156
03d37e4e 2157 sprintf (tmp_name, "addr2int%d", i);
2158 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
fb85abff 2159 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
806413d2 2160 addr_base);
fb85abff 2161 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2162
2163 /* The addresses are OR together. */
2164
2165 if (or_tmp_name != NULL_TREE)
2166 {
2167 /* create: or_tmp = or_tmp | addr_tmp */
03d37e4e 2168 sprintf (tmp_name, "orptrs%d", i);
2169 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
fb85abff 2170 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2171 new_or_tmp_name,
2172 or_tmp_name, addr_tmp_name);
fb85abff 2173 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2174 or_tmp_name = new_or_tmp_name;
2175 }
2176 else
2177 or_tmp_name = addr_tmp_name;
2178
2179 } /* end for i */
2180
2181 mask_cst = build_int_cst (int_ptrsize_type, mask);
2182
2183 /* create: and_tmp = or_tmp & mask */
03d37e4e 2184 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
fb85abff 2185
2186 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2187 or_tmp_name, mask_cst);
fb85abff 2188 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2189
2190 /* Make and_tmp the left operand of the conditional test against zero.
2191 if and_tmp has a nonzero bit then some address is unaligned. */
2192 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2193 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2194 and_tmp_name, ptrsize_zero);
2195 if (*cond_expr)
2196 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2197 *cond_expr, part_cond_expr);
2198 else
2199 *cond_expr = part_cond_expr;
2200}
2201
fb85abff 2202/* Function vect_create_cond_for_alias_checks.
2203
2204 Create a conditional expression that represents the run-time checks for
2205 overlapping of address ranges represented by a list of data references
2206 relations passed as input.
2207
2208 Input:
2209 COND_EXPR - input conditional expression. New conditions will be chained
8a7b0f48 2210 with logical AND operation. If it is NULL, then the function
2211 is used to return the number of alias checks.
fb85abff 2212 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2213 to be checked.
2214
2215 Output:
2216 COND_EXPR - conditional expression.
fb85abff 2217
8a7b0f48 2218 The returned COND_EXPR is the conditional expression to be used in the if
fb85abff 2219 statement that controls which version of the loop gets executed at runtime.
2220*/
2221
8a7b0f48 2222void
90d4c4af 2223vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
fb85abff 2224{
43d14b66 2225 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
8a7b0f48 2226 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2227 tree part_cond_expr;
fb85abff 2228
2229 /* Create expression
33767455 2230 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2231 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
48e1416a 2232 &&
fb85abff 2233 ...
2234 &&
33767455 2235 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2236 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
fb85abff 2237
8a7b0f48 2238 if (comp_alias_ddrs.is_empty ())
fb85abff 2239 return;
2240
8a7b0f48 2241 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
fb85abff 2242 {
43d14b66 2243 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2244 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
8a7b0f48 2245 tree segment_length_a = dr_a.seg_len;
2246 tree segment_length_b = dr_b.seg_len;
fb85abff 2247
8a7b0f48 2248 tree addr_base_a
43d14b66 2249 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
8a7b0f48 2250 tree addr_base_b
43d14b66 2251 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
fb85abff 2252
6d8fb6cf 2253 if (dump_enabled_p ())
fb85abff 2254 {
b055bc88 2255 dump_printf_loc (MSG_NOTE, vect_location,
8a7b0f48 2256 "create runtime check for data references ");
2257 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
b055bc88 2258 dump_printf (MSG_NOTE, " and ");
8a7b0f48 2259 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2260 dump_printf (MSG_NOTE, "\n");
fb85abff 2261 }
2262
8a7b0f48 2263 tree seg_a_min = addr_base_a;
2264 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
6e984e6f 2265 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
2266 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
2267 [a, a+12) */
8a7b0f48 2268 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
6e984e6f 2269 {
2270 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_a.dr)));
2271 seg_a_min = fold_build_pointer_plus (seg_a_max, unit_size);
2272 seg_a_max = fold_build_pointer_plus (addr_base_a, unit_size);
2273 }
f1b8c740 2274
8a7b0f48 2275 tree seg_b_min = addr_base_b;
2276 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2277 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
6e984e6f 2278 {
2279 tree unit_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_b.dr)));
2280 seg_b_min = fold_build_pointer_plus (seg_b_max, unit_size);
2281 seg_b_max = fold_build_pointer_plus (addr_base_b, unit_size);
2282 }
fb85abff 2283
48e1416a 2284 part_cond_expr =
fb85abff 2285 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
33767455 2286 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2287 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
48e1416a 2288
fb85abff 2289 if (*cond_expr)
2290 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2291 *cond_expr, part_cond_expr);
2292 else
2293 *cond_expr = part_cond_expr;
2294 }
fb85abff 2295
6d8fb6cf 2296 if (dump_enabled_p ())
b055bc88 2297 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 2298 "created %u versioning for alias checks.\n",
8a7b0f48 2299 comp_alias_ddrs.length ());
2300
2301 comp_alias_ddrs.release ();
fb85abff 2302}
2303
2304
2305/* Function vect_loop_versioning.
48e1416a 2306
fb85abff 2307 If the loop has data references that may or may not be aligned or/and
2308 has data reference relations whose independence was not proven then
2309 two versions of the loop need to be generated, one which is vectorized
2310 and one which isn't. A test is then generated to control which of the
2311 loops is executed. The test checks for the alignment of all of the
2312 data references that may or may not be aligned. An additional
2313 sequence of runtime tests is generated for each pairs of DDRs whose
48e1416a 2314 independence was not proven. The vectorized version of loop is
2315 executed only if both alias and alignment tests are passed.
2316
fb85abff 2317 The test generated to check which version of loop is executed
48e1416a 2318 is modified to also check for profitability as indicated by the
23a3430d 2319 cost model initially.
2320
2321 The versioning precondition(s) are placed in *COND_EXPR and
2afdcbed 2322 *COND_EXPR_STMT_LIST. */
fb85abff 2323
2324void
e7430948 2325vect_loop_versioning (loop_vec_info loop_vinfo,
2326 unsigned int th, bool check_profitability)
fb85abff 2327{
2328 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
c71d3c24 2329 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
fb85abff 2330 basic_block condition_bb;
2331 gimple_stmt_iterator gsi, cond_exp_gsi;
2332 basic_block merge_bb;
2333 basic_block new_exit_bb;
2334 edge new_exit_e, e;
2335 gimple orig_phi, new_phi;
e7430948 2336 tree cond_expr = NULL_TREE;
2afdcbed 2337 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 2338 tree arg;
2339 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2340 gimple_seq gimplify_stmt_list = NULL;
2341 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
6ee2edad 2342 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2343 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
fb85abff 2344
e7430948 2345 if (check_profitability)
2346 {
2347 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2348 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2349 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2350 is_gimple_condexpr, NULL_TREE);
2351 }
fb85abff 2352
6ee2edad 2353 if (version_align)
2afdcbed 2354 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2355 &cond_expr_stmt_list);
fb85abff 2356
6ee2edad 2357 if (version_alias)
90d4c4af 2358 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
23a3430d 2359
2afdcbed 2360 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2361 is_gimple_condexpr, NULL_TREE);
2362 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
fb85abff 2363
2364 initialize_original_copy_tables ();
c71d3c24 2365 if (scalar_loop)
2366 {
2367 edge scalar_e;
2368 basic_block preheader, scalar_preheader;
2369
2370 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2371 scale LOOP's frequencies instead. */
2372 loop_version (scalar_loop, cond_expr, &condition_bb,
2373 prob, REG_BR_PROB_BASE, REG_BR_PROB_BASE - prob, true);
2374 scale_loop_frequencies (loop, prob, REG_BR_PROB_BASE);
2375 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2376 while we need to move it above LOOP's preheader. */
2377 e = loop_preheader_edge (loop);
2378 scalar_e = loop_preheader_edge (scalar_loop);
2379 gcc_assert (empty_block_p (e->src)
2380 && single_pred_p (e->src));
2381 gcc_assert (empty_block_p (scalar_e->src)
2382 && single_pred_p (scalar_e->src));
2383 gcc_assert (single_pred_p (condition_bb));
2384 preheader = e->src;
2385 scalar_preheader = scalar_e->src;
2386 scalar_e = find_edge (condition_bb, scalar_preheader);
2387 e = single_pred_edge (preheader);
2388 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2389 scalar_preheader);
2390 redirect_edge_and_branch_force (scalar_e, preheader);
2391 redirect_edge_and_branch_force (e, condition_bb);
2392 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2393 single_pred (condition_bb));
2394 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2395 single_pred (scalar_preheader));
2396 set_immediate_dominator (CDI_DOMINATORS, preheader,
2397 condition_bb);
2398 }
2399 else
2400 loop_version (loop, cond_expr, &condition_bb,
2401 prob, prob, REG_BR_PROB_BASE - prob, true);
6ee2edad 2402
36f39b2e 2403 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
6ee2edad 2404 && dump_enabled_p ())
2405 {
2406 if (version_alias)
2407 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2408 "loop versioned for vectorization because of "
2409 "possible aliasing\n");
2410 if (version_align)
2411 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2412 "loop versioned for vectorization to enhance "
2413 "alignment\n");
2414
2415 }
9af5ce0c 2416 free_original_copy_tables ();
fb85abff 2417
48e1416a 2418 /* Loop versioning violates an assumption we try to maintain during
fb85abff 2419 vectorization - that the loop exit block has a single predecessor.
2420 After versioning, the exit block of both loop versions is the same
2421 basic block (i.e. it has two predecessors). Just in order to simplify
2422 following transformations in the vectorizer, we fix this situation
2423 here by adding a new (empty) block on the exit-edge of the loop,
c71d3c24 2424 with the proper loop-exit phis to maintain loop-closed-form.
2425 If loop versioning wasn't done from loop, but scalar_loop instead,
2426 merge_bb will have already just a single successor. */
48e1416a 2427
fb85abff 2428 merge_bb = single_exit (loop)->dest;
c71d3c24 2429 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
fb85abff 2430 {
c71d3c24 2431 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2432 new_exit_bb = split_edge (single_exit (loop));
2433 new_exit_e = single_exit (loop);
2434 e = EDGE_SUCC (new_exit_bb, 0);
2435
2436 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2437 {
2438 tree new_res;
2439 orig_phi = gsi_stmt (gsi);
2440 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2441 new_phi = create_phi_node (new_res, new_exit_bb);
2442 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2443 add_phi_arg (new_phi, arg, new_exit_e,
2444 gimple_phi_arg_location_from_edge (orig_phi, e));
2445 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2446 }
48e1416a 2447 }
fb85abff 2448
2449 /* End loop-exit-fixes after versioning. */
2450
2afdcbed 2451 if (cond_expr_stmt_list)
fb85abff 2452 {
2453 cond_exp_gsi = gsi_last_bb (condition_bb);
2afdcbed 2454 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
23a3430d 2455 GSI_SAME_STMT);
fb85abff 2456 }
95e19962 2457 update_ssa (TODO_update_ssa);
fb85abff 2458}