]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
* gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
48e1416a 1/* Vectorizer Specific Loop Manipulations
711789cc 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
28#include "basic-block.h"
ce084dfc 29#include "gimple-pretty-print.h"
bc61cadb 30#include "tree-ssa-alias.h"
31#include "internal-fn.h"
32#include "gimple-expr.h"
33#include "is-a.h"
e795d6e1 34#include "gimple.h"
a8783bee 35#include "gimplify.h"
dcf1a1ec 36#include "gimple-iterator.h"
e795d6e1 37#include "gimplify-me.h"
073c1fd5 38#include "gimple-ssa.h"
39#include "tree-cfg.h"
40#include "tree-phinodes.h"
41#include "ssa-iterators.h"
9ed99284 42#include "stringpool.h"
073c1fd5 43#include "tree-ssanames.h"
05d9c18a 44#include "tree-ssa-loop-manip.h"
073c1fd5 45#include "tree-into-ssa.h"
69ee5dbb 46#include "tree-ssa.h"
b9ed1410 47#include "tree-pass.h"
fb85abff 48#include "cfgloop.h"
0b205f4c 49#include "diagnostic-core.h"
fb85abff 50#include "tree-scalar-evolution.h"
51#include "tree-vectorizer.h"
52#include "langhooks.h"
53
54/*************************************************************************
55 Simple Loop Peeling Utilities
56
57 Utilities to support loop peeling for vectorization purposes.
58 *************************************************************************/
59
60
61/* Renames the use *OP_P. */
62
63static void
64rename_use_op (use_operand_p op_p)
65{
66 tree new_name;
67
68 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
69 return;
70
71 new_name = get_current_def (USE_FROM_PTR (op_p));
72
73 /* Something defined outside of the loop. */
74 if (!new_name)
75 return;
76
77 /* An ordinary ssa name defined in the loop. */
78
79 SET_USE (op_p, new_name);
80}
81
82
83/* Renames the variables in basic block BB. */
84
c9b2c569 85static void
fb85abff 86rename_variables_in_bb (basic_block bb)
87{
88 gimple_stmt_iterator gsi;
89 gimple stmt;
90 use_operand_p use_p;
91 ssa_op_iter iter;
92 edge e;
93 edge_iterator ei;
94 struct loop *loop = bb->loop_father;
95
96 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
97 {
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
101 }
102
c9b2c569 103 FOR_EACH_EDGE (e, ei, bb->preds)
fb85abff 104 {
c9b2c569 105 if (!flow_bb_inside_loop_p (loop, e->src))
fb85abff 106 continue;
c9b2c569 107 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
fb85abff 108 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
109 }
110}
111
112
b123eaab 113typedef struct
114{
115 tree from, to;
116 basic_block bb;
117} adjust_info;
118
b123eaab 119/* A stack of values to be adjusted in debug stmts. We have to
120 process them LIFO, so that the closest substitution applies. If we
121 processed them FIFO, without the stack, we might substitute uses
122 with a PHI DEF that would soon become non-dominant, and when we got
123 to the suitable one, it wouldn't have anything to substitute any
124 more. */
d70aebca 125static vec<adjust_info, va_heap> adjust_vec;
b123eaab 126
127/* Adjust any debug stmts that referenced AI->from values to use the
128 loop-closed AI->to, if the references are dominated by AI->bb and
129 not by the definition of AI->from. */
130
131static void
132adjust_debug_stmts_now (adjust_info *ai)
133{
134 basic_block bbphi = ai->bb;
135 tree orig_def = ai->from;
136 tree new_def = ai->to;
137 imm_use_iterator imm_iter;
138 gimple stmt;
139 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
140
141 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
142
143 /* Adjust any debug stmts that held onto non-loop-closed
144 references. */
145 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
146 {
147 use_operand_p use_p;
148 basic_block bbuse;
149
150 if (!is_gimple_debug (stmt))
151 continue;
152
153 gcc_assert (gimple_debug_bind_p (stmt));
154
155 bbuse = gimple_bb (stmt);
156
157 if ((bbuse == bbphi
158 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
159 && !(bbuse == bbdef
160 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
161 {
162 if (new_def)
163 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
164 SET_USE (use_p, new_def);
165 else
166 {
167 gimple_debug_bind_reset_value (stmt);
168 update_stmt (stmt);
169 }
170 }
171 }
172}
173
174/* Adjust debug stmts as scheduled before. */
175
176static void
177adjust_vec_debug_stmts (void)
178{
179 if (!MAY_HAVE_DEBUG_STMTS)
180 return;
181
f1f41a6c 182 gcc_assert (adjust_vec.exists ());
b123eaab 183
f1f41a6c 184 while (!adjust_vec.is_empty ())
b123eaab 185 {
f1f41a6c 186 adjust_debug_stmts_now (&adjust_vec.last ());
187 adjust_vec.pop ();
b123eaab 188 }
189
f1f41a6c 190 adjust_vec.release ();
b123eaab 191}
192
193/* Adjust any debug stmts that referenced FROM values to use the
194 loop-closed TO, if the references are dominated by BB and not by
195 the definition of FROM. If adjust_vec is non-NULL, adjustments
196 will be postponed until adjust_vec_debug_stmts is called. */
197
198static void
199adjust_debug_stmts (tree from, tree to, basic_block bb)
200{
201 adjust_info ai;
202
0087edc6 203 if (MAY_HAVE_DEBUG_STMTS
204 && TREE_CODE (from) == SSA_NAME
2510e5cd 205 && ! SSA_NAME_IS_DEFAULT_DEF (from)
0087edc6 206 && ! virtual_operand_p (from))
b123eaab 207 {
208 ai.from = from;
209 ai.to = to;
210 ai.bb = bb;
211
f1f41a6c 212 if (adjust_vec.exists ())
213 adjust_vec.safe_push (ai);
b123eaab 214 else
215 adjust_debug_stmts_now (&ai);
216 }
217}
218
219/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
220 to adjust any debug stmts that referenced the old phi arg,
221 presumably non-loop-closed references left over from other
222 transformations. */
223
224static void
225adjust_phi_and_debug_stmts (gimple update_phi, edge e, tree new_def)
226{
227 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
228
229 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
230
231 if (MAY_HAVE_DEBUG_STMTS)
232 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
233 gimple_bb (update_phi));
234}
235
fb85abff 236
fb85abff 237/* Update PHI nodes for a guard of the LOOP.
238
239 Input:
240 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
241 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
242 originates from the guard-bb, skips LOOP and reaches the (unique) exit
243 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
244 We denote this bb NEW_MERGE_BB because before the guard code was added
245 it had a single predecessor (the LOOP header), and now it became a merge
246 point of two paths - the path that ends with the LOOP exit-edge, and
247 the path that ends with GUARD_EDGE.
248 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
249 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
250
251 ===> The CFG before the guard-code was added:
252 LOOP_header_bb:
253 loop_body
254 if (exit_loop) goto update_bb
255 else goto LOOP_header_bb
256 update_bb:
257
258 ==> The CFG after the guard-code was added:
259 guard_bb:
260 if (LOOP_guard_condition) goto new_merge_bb
261 else goto LOOP_header_bb
262 LOOP_header_bb:
263 loop_body
264 if (exit_loop_condition) goto new_merge_bb
265 else goto LOOP_header_bb
266 new_merge_bb:
267 goto update_bb
268 update_bb:
269
270 ==> The CFG after this function:
271 guard_bb:
272 if (LOOP_guard_condition) goto new_merge_bb
273 else goto LOOP_header_bb
274 LOOP_header_bb:
275 loop_body
276 if (exit_loop_condition) goto new_exit_bb
277 else goto LOOP_header_bb
278 new_exit_bb:
279 new_merge_bb:
280 goto update_bb
281 update_bb:
282
283 This function:
284 1. creates and updates the relevant phi nodes to account for the new
285 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
286 1.1. Create phi nodes at NEW_MERGE_BB.
287 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
288 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
289 2. preserves loop-closed-ssa-form by creating the required phi nodes
290 at the exit of LOOP (i.e, in NEW_EXIT_BB).
291
292 There are two flavors to this function:
293
294 slpeel_update_phi_nodes_for_guard1:
295 Here the guard controls whether we enter or skip LOOP, where LOOP is a
296 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
297 for variables that have phis in the loop header.
298
299 slpeel_update_phi_nodes_for_guard2:
300 Here the guard controls whether we enter or skip LOOP, where LOOP is an
301 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
302 for variables that have phis in the loop exit.
303
304 I.E., the overall structure is:
305
306 loop1_preheader_bb:
307 guard1 (goto loop1/merge1_bb)
308 loop1
309 loop1_exit_bb:
310 guard2 (goto merge1_bb/merge2_bb)
311 merge1_bb
312 loop2
313 loop2_exit_bb
314 merge2_bb
315 next_bb
316
317 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
318 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
319 that have phis in loop1->header).
320
321 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
322 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
323 that have phis in next_bb). It also adds some of these phis to
324 loop1_exit_bb.
325
326 slpeel_update_phi_nodes_for_guard1 is always called before
327 slpeel_update_phi_nodes_for_guard2. They are both needed in order
328 to create correct data-flow and loop-closed-ssa-form.
329
330 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
331 that change between iterations of a loop (and therefore have a phi-node
332 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
48e1416a 333 phis for variables that are used out of the loop (and therefore have
334 loop-closed exit phis). Some variables may be both updated between
fb85abff 335 iterations and used after the loop. This is why in loop1_exit_bb we
336 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
337 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
338
339 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
340 an original loop. i.e., we have:
341
342 orig_loop
343 guard_bb (goto LOOP/new_merge)
344 new_loop <-- LOOP
345 new_exit
346 new_merge
347 next_bb
348
349 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
350 have:
351
352 new_loop
353 guard_bb (goto LOOP/new_merge)
354 orig_loop <-- LOOP
355 new_exit
356 new_merge
357 next_bb
358
359 The SSA names defined in the original loop have a current
360 reaching definition that that records the corresponding new
361 ssa-name used in the new duplicated loop copy.
362 */
363
364/* Function slpeel_update_phi_nodes_for_guard1
48e1416a 365
fb85abff 366 Input:
367 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
368 - DEFS - a bitmap of ssa names to mark new names for which we recorded
48e1416a 369 information.
370
fb85abff 371 In the context of the overall structure, we have:
372
48e1416a 373 loop1_preheader_bb:
fb85abff 374 guard1 (goto loop1/merge1_bb)
375LOOP-> loop1
376 loop1_exit_bb:
377 guard2 (goto merge1_bb/merge2_bb)
378 merge1_bb
379 loop2
380 loop2_exit_bb
381 merge2_bb
382 next_bb
383
384 For each name updated between loop iterations (i.e - for each name that has
385 an entry (loop-header) phi in LOOP) we create a new phi in:
386 1. merge1_bb (to account for the edge from guard1)
387 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
388*/
389
390static void
391slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
1e8cf080 392 bool is_new_loop, basic_block *new_exit_bb)
fb85abff 393{
394 gimple orig_phi, new_phi;
395 gimple update_phi, update_phi2;
396 tree guard_arg, loop_arg;
397 basic_block new_merge_bb = guard_edge->dest;
398 edge e = EDGE_SUCC (new_merge_bb, 0);
399 basic_block update_bb = e->dest;
400 basic_block orig_bb = loop->header;
401 edge new_exit_e;
402 tree current_new_name;
fb85abff 403 gimple_stmt_iterator gsi_orig, gsi_update;
404
405 /* Create new bb between loop and new_merge_bb. */
406 *new_exit_bb = split_edge (single_exit (loop));
407
408 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
409
410 for (gsi_orig = gsi_start_phis (orig_bb),
411 gsi_update = gsi_start_phis (update_bb);
412 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
413 gsi_next (&gsi_orig), gsi_next (&gsi_update))
414 {
38091110 415 source_location loop_locus, guard_locus;
874117c8 416 tree new_res;
fb85abff 417 orig_phi = gsi_stmt (gsi_orig);
418 update_phi = gsi_stmt (gsi_update);
419
fb85abff 420 /** 1. Handle new-merge-point phis **/
421
422 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
874117c8 423 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
424 new_phi = create_phi_node (new_res, new_merge_bb);
fb85abff 425
426 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
427 of LOOP. Set the two phi args in NEW_PHI for these edges: */
428 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
48e1416a 429 loop_locus = gimple_phi_arg_location_from_edge (orig_phi,
430 EDGE_SUCC (loop->latch,
efbcb6de 431 0));
fb85abff 432 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
48e1416a 433 guard_locus
434 = gimple_phi_arg_location_from_edge (orig_phi,
efbcb6de 435 loop_preheader_edge (loop));
fb85abff 436
60d535d2 437 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus);
438 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus);
fb85abff 439
440 /* 1.3. Update phi in successor block. */
441 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
442 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
b123eaab 443 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
fb85abff 444 update_phi2 = new_phi;
445
446
447 /** 2. Handle loop-closed-ssa-form phis **/
448
7c782c9b 449 if (virtual_operand_p (PHI_RESULT (orig_phi)))
fb85abff 450 continue;
451
452 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
874117c8 453 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
454 new_phi = create_phi_node (new_res, *new_exit_bb);
fb85abff 455
456 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
60d535d2 457 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus);
fb85abff 458
459 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
460 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
b123eaab 461 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
462 PHI_RESULT (new_phi));
fb85abff 463
464 /* 2.4. Record the newly created name with set_current_def.
465 We want to find a name such that
466 name = get_current_def (orig_loop_name)
467 and to set its current definition as follows:
468 set_current_def (name, new_phi_name)
469
470 If LOOP is a new loop then loop_arg is already the name we're
471 looking for. If LOOP is the original loop, then loop_arg is
472 the orig_loop_name and the relevant name is recorded in its
473 current reaching definition. */
474 if (is_new_loop)
475 current_new_name = loop_arg;
476 else
477 {
478 current_new_name = get_current_def (loop_arg);
479 /* current_def is not available only if the variable does not
480 change inside the loop, in which case we also don't care
481 about recording a current_def for it because we won't be
482 trying to create loop-exit-phis for it. */
483 if (!current_new_name)
484 continue;
485 }
486 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
487
488 set_current_def (current_new_name, PHI_RESULT (new_phi));
fb85abff 489 }
490}
491
492
493/* Function slpeel_update_phi_nodes_for_guard2
494
495 Input:
496 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
497
498 In the context of the overall structure, we have:
499
48e1416a 500 loop1_preheader_bb:
fb85abff 501 guard1 (goto loop1/merge1_bb)
502 loop1
48e1416a 503 loop1_exit_bb:
fb85abff 504 guard2 (goto merge1_bb/merge2_bb)
505 merge1_bb
506LOOP-> loop2
507 loop2_exit_bb
508 merge2_bb
509 next_bb
510
511 For each name used out side the loop (i.e - for each name that has an exit
512 phi in next_bb) we create a new phi in:
48e1416a 513 1. merge2_bb (to account for the edge from guard_bb)
fb85abff 514 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
515 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
516 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
517*/
518
519static void
520slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
521 bool is_new_loop, basic_block *new_exit_bb)
522{
523 gimple orig_phi, new_phi;
524 gimple update_phi, update_phi2;
525 tree guard_arg, loop_arg;
526 basic_block new_merge_bb = guard_edge->dest;
527 edge e = EDGE_SUCC (new_merge_bb, 0);
528 basic_block update_bb = e->dest;
529 edge new_exit_e;
530 tree orig_def, orig_def_new_name;
531 tree new_name, new_name2;
532 tree arg;
533 gimple_stmt_iterator gsi;
534
535 /* Create new bb between loop and new_merge_bb. */
536 *new_exit_bb = split_edge (single_exit (loop));
537
538 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
539
540 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
541 {
874117c8 542 tree new_res;
fb85abff 543 update_phi = gsi_stmt (gsi);
544 orig_phi = update_phi;
545 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
546 /* This loop-closed-phi actually doesn't represent a use
48e1416a 547 out of the loop - the phi arg is a constant. */
fb85abff 548 if (TREE_CODE (orig_def) != SSA_NAME)
549 continue;
550 orig_def_new_name = get_current_def (orig_def);
551 arg = NULL_TREE;
552
553 /** 1. Handle new-merge-point phis **/
554
555 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
874117c8 556 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
557 new_phi = create_phi_node (new_res, new_merge_bb);
fb85abff 558
559 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
560 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
561 new_name = orig_def;
562 new_name2 = NULL_TREE;
563 if (orig_def_new_name)
564 {
565 new_name = orig_def_new_name;
566 /* Some variables have both loop-entry-phis and loop-exit-phis.
567 Such variables were given yet newer names by phis placed in
568 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
569 new_name2 = get_current_def (get_current_def (orig_name)). */
570 new_name2 = get_current_def (new_name);
571 }
48e1416a 572
fb85abff 573 if (is_new_loop)
574 {
575 guard_arg = orig_def;
576 loop_arg = new_name;
577 }
578 else
579 {
580 guard_arg = new_name;
581 loop_arg = orig_def;
582 }
583 if (new_name2)
584 guard_arg = new_name2;
48e1416a 585
60d535d2 586 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION);
587 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
fb85abff 588
589 /* 1.3. Update phi in successor block. */
590 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
b123eaab 591 adjust_phi_and_debug_stmts (update_phi, e, PHI_RESULT (new_phi));
fb85abff 592 update_phi2 = new_phi;
593
594
595 /** 2. Handle loop-closed-ssa-form phis **/
596
597 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
874117c8 598 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
599 new_phi = create_phi_node (new_res, *new_exit_bb);
fb85abff 600
601 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
60d535d2 602 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION);
fb85abff 603
604 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
605 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
b123eaab 606 adjust_phi_and_debug_stmts (update_phi2, new_exit_e,
607 PHI_RESULT (new_phi));
fb85abff 608
609
610 /** 3. Handle loop-closed-ssa-form phis for first loop **/
611
612 /* 3.1. Find the relevant names that need an exit-phi in
613 GUARD_BB, i.e. names for which
614 slpeel_update_phi_nodes_for_guard1 had not already created a
615 phi node. This is the case for names that are used outside
616 the loop (and therefore need an exit phi) but are not updated
617 across loop iterations (and therefore don't have a
618 loop-header-phi).
619
620 slpeel_update_phi_nodes_for_guard1 is responsible for
621 creating loop-exit phis in GUARD_BB for names that have a
622 loop-header-phi. When such a phi is created we also record
623 the new name in its current definition. If this new name
624 exists, then guard_arg was set to this new name (see 1.2
625 above). Therefore, if guard_arg is not this new name, this
626 is an indication that an exit-phi in GUARD_BB was not yet
627 created, so we take care of it here. */
628 if (guard_arg == new_name2)
629 continue;
630 arg = guard_arg;
631
632 /* 3.2. Generate new phi node in GUARD_BB: */
874117c8 633 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
634 new_phi = create_phi_node (new_res, guard_edge->src);
fb85abff 635
636 /* 3.3. GUARD_BB has one incoming edge: */
637 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
48e1416a 638 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0),
60d535d2 639 UNKNOWN_LOCATION);
fb85abff 640
641 /* 3.4. Update phi in successor of GUARD_BB: */
642 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
643 == guard_arg);
b123eaab 644 adjust_phi_and_debug_stmts (update_phi2, guard_edge,
645 PHI_RESULT (new_phi));
fb85abff 646 }
647}
648
649
650/* Make the LOOP iterate NITERS times. This is done by adding a new IV
651 that starts at zero, increases by one and its limit is NITERS.
652
653 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
654
655void
656slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
657{
658 tree indx_before_incr, indx_after_incr;
659 gimple cond_stmt;
660 gimple orig_cond;
661 edge exit_edge = single_exit (loop);
662 gimple_stmt_iterator loop_cond_gsi;
663 gimple_stmt_iterator incr_gsi;
664 bool insert_after;
665 tree init = build_int_cst (TREE_TYPE (niters), 0);
666 tree step = build_int_cst (TREE_TYPE (niters), 1);
36f39b2e 667 source_location loop_loc;
fb85abff 668 enum tree_code code;
669
670 orig_cond = get_loop_exit_condition (loop);
671 gcc_assert (orig_cond);
672 loop_cond_gsi = gsi_for_stmt (orig_cond);
673
674 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
675 create_iv (init, step, NULL_TREE, loop,
676 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
677
678 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
679 true, NULL_TREE, true,
680 GSI_SAME_STMT);
681 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
682 true, GSI_SAME_STMT);
683
684 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
685 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
686 NULL_TREE);
687
688 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
689
690 /* Remove old loop exit test: */
691 gsi_remove (&loop_cond_gsi, true);
706567b8 692 free_stmt_vec_info (orig_cond);
fb85abff 693
694 loop_loc = find_loop_location (loop);
6d8fb6cf 695 if (dump_enabled_p ())
fb85abff 696 {
36f39b2e 697 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
698 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
699 LOCATION_LINE (loop_loc));
7bd765d4 700 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
78bb46f5 701 dump_printf (MSG_NOTE, "\n");
fb85abff 702 }
fb85abff 703 loop->nb_iterations = niters;
704}
705
706
48e1416a 707/* Given LOOP this function generates a new copy of it and puts it
fb85abff 708 on E which is either the entry or exit of LOOP. */
709
710struct loop *
711slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
712{
713 struct loop *new_loop;
714 basic_block *new_bbs, *bbs;
715 bool at_exit;
716 bool was_imm_dom;
48e1416a 717 basic_block exit_dest;
fb85abff 718 edge exit, new_exit;
fb85abff 719
c9b2c569 720 exit = single_exit (loop);
721 at_exit = (e == exit);
fb85abff 722 if (!at_exit && e != loop_preheader_edge (loop))
723 return NULL;
724
c9b2c569 725 bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
726 get_loop_body_with_size (loop, bbs, loop->num_nodes);
fb85abff 727
728 /* Check whether duplication is possible. */
729 if (!can_copy_bbs_p (bbs, loop->num_nodes))
730 {
731 free (bbs);
732 return NULL;
733 }
734
735 /* Generate new loop structure. */
736 new_loop = duplicate_loop (loop, loop_outer (loop));
c9b2c569 737 duplicate_subloops (loop, new_loop);
fb85abff 738
c9b2c569 739 exit_dest = exit->dest;
48e1416a 740 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
741 exit_dest) == loop->header ?
fb85abff 742 true : false);
743
c9b2c569 744 /* Also copy the pre-header, this avoids jumping through hoops to
745 duplicate the loop entry PHI arguments. Create an empty
746 pre-header unconditionally for this. */
747 basic_block preheader = split_edge (loop_preheader_edge (loop));
748 edge entry_e = single_pred_edge (preheader);
749 bbs[loop->num_nodes] = preheader;
750 new_bbs = XNEWVEC (basic_block, loop->num_nodes + 1);
fb85abff 751
c9b2c569 752 copy_bbs (bbs, loop->num_nodes + 1, new_bbs,
fb85abff 753 &exit, 1, &new_exit, NULL,
d99f53b2 754 e->src, true);
c9b2c569 755 basic_block new_preheader = new_bbs[loop->num_nodes];
fb85abff 756
c9b2c569 757 add_phi_args_after_copy (new_bbs, loop->num_nodes + 1, NULL);
48e1416a 758
fb85abff 759 if (at_exit) /* Add the loop copy at exit. */
760 {
c9b2c569 761 redirect_edge_and_branch_force (e, new_preheader);
762 flush_pending_stmts (e);
763 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
fb85abff 764 if (was_imm_dom)
765 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
c9b2c569 766
767 /* And remove the non-necessary forwarder again. Keep the other
768 one so we have a proper pre-header for the loop at the exit edge. */
769 redirect_edge_pred (single_succ_edge (preheader), single_pred (preheader));
770 delete_basic_block (preheader);
771 set_immediate_dominator (CDI_DOMINATORS, loop->header,
772 loop_preheader_edge (loop)->src);
fb85abff 773 }
774 else /* Add the copy at entry. */
775 {
c9b2c569 776 redirect_edge_and_branch_force (entry_e, new_preheader);
777 flush_pending_stmts (entry_e);
778 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
779
780 redirect_edge_and_branch_force (new_exit, preheader);
781 flush_pending_stmts (new_exit);
782 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
783
784 /* And remove the non-necessary forwarder again. Keep the other
785 one so we have a proper pre-header for the loop at the exit edge. */
786 redirect_edge_pred (single_succ_edge (new_preheader), single_pred (new_preheader));
787 delete_basic_block (new_preheader);
788 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
789 loop_preheader_edge (new_loop)->src);
fb85abff 790 }
791
c9b2c569 792 for (unsigned i = 0; i < loop->num_nodes+1; i++)
793 rename_variables_in_bb (new_bbs[i]);
794
fb85abff 795 free (new_bbs);
796 free (bbs);
797
c9b2c569 798#ifdef ENABLE_CHECKING
799 verify_dominators (CDI_DOMINATORS);
800#endif
801
fb85abff 802 return new_loop;
803}
804
805
806/* Given the condition statement COND, put it as the last statement
807 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
48e1416a 808 Assumes that this is the single exit of the guarded loop.
23a3430d 809 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */
fb85abff 810
811static edge
23a3430d 812slpeel_add_loop_guard (basic_block guard_bb, tree cond,
813 gimple_seq cond_expr_stmt_list,
877584e4 814 basic_block exit_bb, basic_block dom_bb,
815 int probability)
fb85abff 816{
817 gimple_stmt_iterator gsi;
818 edge new_e, enter_e;
819 gimple cond_stmt;
820 gimple_seq gimplify_stmt_list = NULL;
821
822 enter_e = EDGE_SUCC (guard_bb, 0);
823 enter_e->flags &= ~EDGE_FALLTHRU;
824 enter_e->flags |= EDGE_FALSE_VALUE;
825 gsi = gsi_last_bb (guard_bb);
826
87ae3989 827 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
828 NULL_TREE);
23a3430d 829 if (gimplify_stmt_list)
830 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
87ae3989 831 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
23a3430d 832 if (cond_expr_stmt_list)
833 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT);
fb85abff 834
835 gsi = gsi_last_bb (guard_bb);
836 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
837
838 /* Add new edge to connect guard block to the merge/loop-exit block. */
839 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
877584e4 840
841 new_e->count = guard_bb->count;
842 new_e->probability = probability;
843 new_e->count = apply_probability (enter_e->count, probability);
844 enter_e->count -= new_e->count;
845 enter_e->probability = inverse_probability (probability);
fb85abff 846 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
847 return new_e;
848}
849
850
851/* This function verifies that the following restrictions apply to LOOP:
852 (1) it is innermost
853 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
854 (3) it is single entry, single exit
855 (4) its exit condition is the last stmt in the header
856 (5) E is the entry/exit edge of LOOP.
857 */
858
859bool
860slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
861{
862 edge exit_e = single_exit (loop);
863 edge entry_e = loop_preheader_edge (loop);
864 gimple orig_cond = get_loop_exit_condition (loop);
865 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
866
fb85abff 867 if (loop->inner
868 /* All loops have an outer scope; the only case loop->outer is NULL is for
869 the function itself. */
870 || !loop_outer (loop)
871 || loop->num_nodes != 2
872 || !empty_block_p (loop->latch)
873 || !single_exit (loop)
874 /* Verify that new loop exit condition can be trivially modified. */
875 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
876 || (e != exit_e && e != entry_e))
877 return false;
878
879 return true;
880}
881
882#ifdef ENABLE_CHECKING
883static void
884slpeel_verify_cfg_after_peeling (struct loop *first_loop,
885 struct loop *second_loop)
886{
887 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
888 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
889 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
890
891 /* A guard that controls whether the second_loop is to be executed or skipped
892 is placed in first_loop->exit. first_loop->exit therefore has two
893 successors - one is the preheader of second_loop, and the other is a bb
894 after second_loop.
895 */
896 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
48e1416a 897
fb85abff 898 /* 1. Verify that one of the successors of first_loop->exit is the preheader
899 of second_loop. */
48e1416a 900
fb85abff 901 /* The preheader of new_loop is expected to have two predecessors:
902 first_loop->exit and the block that precedes first_loop. */
903
48e1416a 904 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
fb85abff 905 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
906 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
907 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
908 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
48e1416a 909
fb85abff 910 /* Verify that the other successor of first_loop->exit is after the
911 second_loop. */
912 /* TODO */
913}
914#endif
915
916/* If the run time cost model check determines that vectorization is
917 not profitable and hence scalar loop should be generated then set
918 FIRST_NITERS to prologue peeled iterations. This will allow all the
919 iterations to be executed in the prologue peeled scalar loop. */
920
921static void
922set_prologue_iterations (basic_block bb_before_first_loop,
2f630015 923 tree *first_niters,
fb85abff 924 struct loop *loop,
877584e4 925 unsigned int th,
926 int probability)
fb85abff 927{
928 edge e;
929 basic_block cond_bb, then_bb;
930 tree var, prologue_after_cost_adjust_name;
931 gimple_stmt_iterator gsi;
932 gimple newphi;
933 edge e_true, e_false, e_fallthru;
934 gimple cond_stmt;
87ae3989 935 gimple_seq stmts = NULL;
fb85abff 936 tree cost_pre_condition = NULL_TREE;
48e1416a 937 tree scalar_loop_iters =
fb85abff 938 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
939
940 e = single_pred_edge (bb_before_first_loop);
9af5ce0c 941 cond_bb = split_edge (e);
fb85abff 942
943 e = single_pred_edge (bb_before_first_loop);
9af5ce0c 944 then_bb = split_edge (e);
fb85abff 945 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
946
947 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
948 EDGE_FALSE_VALUE);
949 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
950
951 e_true = EDGE_PRED (then_bb, 0);
952 e_true->flags &= ~EDGE_FALLTHRU;
953 e_true->flags |= EDGE_TRUE_VALUE;
954
877584e4 955 e_true->probability = probability;
956 e_false->probability = inverse_probability (probability);
957 e_true->count = apply_probability (cond_bb->count, probability);
958 e_false->count = cond_bb->count - e_true->count;
959 then_bb->frequency = EDGE_FREQUENCY (e_true);
960 then_bb->count = e_true->count;
961
fb85abff 962 e_fallthru = EDGE_SUCC (then_bb, 0);
877584e4 963 e_fallthru->count = then_bb->count;
fb85abff 964
87ae3989 965 gsi = gsi_last_bb (cond_bb);
fb85abff 966 cost_pre_condition =
48e1416a 967 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
fb85abff 968 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
969 cost_pre_condition =
87ae3989 970 force_gimple_operand_gsi_1 (&gsi, cost_pre_condition, is_gimple_condexpr,
971 NULL_TREE, false, GSI_CONTINUE_LINKING);
972 cond_stmt = gimple_build_cond_from_tree (cost_pre_condition,
973 NULL_TREE, NULL_TREE);
fb85abff 974 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
48e1416a 975
fb85abff 976 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
977 "prologue_after_cost_adjust");
48e1416a 978 prologue_after_cost_adjust_name =
fb85abff 979 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
980
981 gsi = gsi_last_bb (then_bb);
982 if (stmts)
983 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
984
985 newphi = create_phi_node (var, bb_before_first_loop);
48e1416a 986 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru,
60d535d2 987 UNKNOWN_LOCATION);
988 add_phi_arg (newphi, *first_niters, e_false, UNKNOWN_LOCATION);
fb85abff 989
2f630015 990 *first_niters = PHI_RESULT (newphi);
fb85abff 991}
992
fb85abff 993/* Function slpeel_tree_peel_loop_to_edge.
994
995 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
996 that is placed on the entry (exit) edge E of LOOP. After this transformation
997 we have two loops one after the other - first-loop iterates FIRST_NITERS
998 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
48e1416a 999 If the cost model indicates that it is profitable to emit a scalar
fb85abff 1000 loop instead of the vector one, then the prolog (epilog) loop will iterate
1001 for the entire unchanged scalar iterations of the loop.
1002
1003 Input:
1004 - LOOP: the loop to be peeled.
1005 - E: the exit or entry edge of LOOP.
1006 If it is the entry edge, we peel the first iterations of LOOP. In this
1007 case first-loop is LOOP, and second-loop is the newly created loop.
1008 If it is the exit edge, we peel the last iterations of LOOP. In this
1009 case, first-loop is the newly created loop, and second-loop is LOOP.
1010 - NITERS: the number of iterations that LOOP iterates.
1011 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
1012 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
1013 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1014 is false, the caller of this function may want to take care of this
1015 (this can be useful if we don't want new stmts added to first-loop).
1016 - TH: cost model profitability threshold of iterations for vectorization.
1017 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
1018 during versioning and hence needs to occur during
48e1416a 1019 prologue generation or whether cost model check
fb85abff 1020 has not occurred during prologue generation and hence
1021 needs to occur during epilogue generation.
877584e4 1022 - BOUND1 is the upper bound on number of iterations of the first loop (if known)
1023 - BOUND2 is the upper bound on number of iterations of the second loop (if known)
48e1416a 1024
fb85abff 1025
1026 Output:
1027 The function returns a pointer to the new loop-copy, or NULL if it failed
1028 to perform the transformation.
1029
1030 The function generates two if-then-else guards: one before the first loop,
1031 and the other before the second loop:
1032 The first guard is:
1033 if (FIRST_NITERS == 0) then skip the first loop,
1034 and go directly to the second loop.
1035 The second guard is:
1036 if (FIRST_NITERS == NITERS) then skip the second loop.
1037
23a3430d 1038 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given
1039 then the generated condition is combined with COND_EXPR and the
1040 statements in COND_EXPR_STMT_LIST are emitted together with it.
1041
fb85abff 1042 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1043 FORNOW the resulting code will not be in loop-closed-ssa form.
1044*/
1045
1046static struct loop*
48e1416a 1047slpeel_tree_peel_loop_to_edge (struct loop *loop,
2f630015 1048 edge e, tree *first_niters,
fb85abff 1049 tree niters, bool update_first_loop_count,
23a3430d 1050 unsigned int th, bool check_profitability,
877584e4 1051 tree cond_expr, gimple_seq cond_expr_stmt_list,
1052 int bound1, int bound2)
fb85abff 1053{
1054 struct loop *new_loop = NULL, *first_loop, *second_loop;
1055 edge skip_e;
1056 tree pre_condition = NULL_TREE;
fb85abff 1057 basic_block bb_before_second_loop, bb_after_second_loop;
1058 basic_block bb_before_first_loop;
1059 basic_block bb_between_loops;
1060 basic_block new_exit_bb;
38091110 1061 gimple_stmt_iterator gsi;
fb85abff 1062 edge exit_e = single_exit (loop);
36f39b2e 1063 source_location loop_loc;
fb85abff 1064 tree cost_pre_condition = NULL_TREE;
877584e4 1065 /* There are many aspects to how likely the first loop is going to be executed.
1066 Without histogram we can't really do good job. Simply set it to
1067 2/3, so the first loop is not reordered to the end of function and
1068 the hot path through stays short. */
1069 int first_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1070 int second_guard_probability = 2 * REG_BR_PROB_BASE / 3;
1071 int probability_of_second_loop;
48e1416a 1072
fb85abff 1073 if (!slpeel_can_duplicate_loop_p (loop, e))
1074 return NULL;
48e1416a 1075
f68f7ce8 1076 /* We might have a queued need to update virtual SSA form. As we
1077 delete the update SSA machinery below after doing a regular
1078 incremental SSA update during loop copying make sure we don't
1079 lose that fact.
1080 ??? Needing to update virtual SSA form by renaming is unfortunate
1081 but not all of the vectorizer code inserting new loads / stores
1082 properly assigns virtual operands to those statements. */
1083 update_ssa (TODO_update_ssa_only_virtuals);
1084
38091110 1085 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1086 in the exit bb and rename all the uses after the loop. This simplifies
1087 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1088 (but normally loop closed SSA form doesn't require virtual PHIs to be
1089 in the same form). Doing this early simplifies the checking what
1090 uses should be renamed. */
1091 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1092 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1093 {
1094 gimple phi = gsi_stmt (gsi);
1095 for (gsi = gsi_start_phis (exit_e->dest);
1096 !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1097 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1098 break;
1099 if (gsi_end_p (gsi))
1100 {
874117c8 1101 tree new_vop = copy_ssa_name (PHI_RESULT (phi), NULL);
1102 gimple new_phi = create_phi_node (new_vop, exit_e->dest);
38091110 1103 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1104 imm_use_iterator imm_iter;
1105 gimple stmt;
38091110 1106 use_operand_p use_p;
1107
60d535d2 1108 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
38091110 1109 gimple_phi_set_result (new_phi, new_vop);
1110 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
1111 if (stmt != new_phi && gimple_bb (stmt) != loop->header)
1112 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1113 SET_USE (use_p, new_vop);
1114 }
1115 break;
1116 }
fb85abff 1117
1118 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1119 Resulting CFG would be:
1120
1121 first_loop:
1122 do {
1123 } while ...
1124
1125 second_loop:
1126 do {
1127 } while ...
1128
1129 orig_exit_bb:
1130 */
48e1416a 1131
fb85abff 1132 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
1133 {
1134 loop_loc = find_loop_location (loop);
7bd765d4 1135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1136 "tree_duplicate_loop_to_edge_cfg failed.\n");
fb85abff 1137 return NULL;
1138 }
48e1416a 1139
b123eaab 1140 if (MAY_HAVE_DEBUG_STMTS)
1141 {
f1f41a6c 1142 gcc_assert (!adjust_vec.exists ());
d70aebca 1143 adjust_vec.create (32);
b123eaab 1144 }
1145
fb85abff 1146 if (e == exit_e)
1147 {
1148 /* NEW_LOOP was placed after LOOP. */
1149 first_loop = loop;
1150 second_loop = new_loop;
1151 }
1152 else
1153 {
1154 /* NEW_LOOP was placed before LOOP. */
1155 first_loop = new_loop;
1156 second_loop = loop;
1157 }
1158
fb85abff 1159 /* 2. Add the guard code in one of the following ways:
1160
1161 2.a Add the guard that controls whether the first loop is executed.
1162 This occurs when this function is invoked for prologue or epilogue
1163 generation and when the cost model check can be done at compile time.
1164
1165 Resulting CFG would be:
1166
1167 bb_before_first_loop:
1168 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1169 GOTO first-loop
1170
1171 first_loop:
1172 do {
1173 } while ...
1174
1175 bb_before_second_loop:
1176
1177 second_loop:
1178 do {
1179 } while ...
1180
1181 orig_exit_bb:
1182
1183 2.b Add the cost model check that allows the prologue
1184 to iterate for the entire unchanged scalar
1185 iterations of the loop in the event that the cost
1186 model indicates that the scalar loop is more
1187 profitable than the vector one. This occurs when
1188 this function is invoked for prologue generation
1189 and the cost model check needs to be done at run
1190 time.
1191
1192 Resulting CFG after prologue peeling would be:
1193
1194 if (scalar_loop_iterations <= th)
1195 FIRST_NITERS = scalar_loop_iterations
1196
1197 bb_before_first_loop:
1198 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1199 GOTO first-loop
1200
1201 first_loop:
1202 do {
1203 } while ...
1204
1205 bb_before_second_loop:
1206
1207 second_loop:
1208 do {
1209 } while ...
1210
1211 orig_exit_bb:
1212
1213 2.c Add the cost model check that allows the epilogue
1214 to iterate for the entire unchanged scalar
1215 iterations of the loop in the event that the cost
1216 model indicates that the scalar loop is more
1217 profitable than the vector one. This occurs when
1218 this function is invoked for epilogue generation
1219 and the cost model check needs to be done at run
23a3430d 1220 time. This check is combined with any pre-existing
1221 check in COND_EXPR to avoid versioning.
fb85abff 1222
1223 Resulting CFG after prologue peeling would be:
1224
1225 bb_before_first_loop:
1226 if ((scalar_loop_iterations <= th)
1227 ||
1228 FIRST_NITERS == 0) GOTO bb_before_second_loop
1229 GOTO first-loop
1230
1231 first_loop:
1232 do {
1233 } while ...
1234
1235 bb_before_second_loop:
1236
1237 second_loop:
1238 do {
1239 } while ...
1240
1241 orig_exit_bb:
1242 */
1243
1244 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
c9b2c569 1245 /* Loop copying insterted a forwarder block for us here. */
1246 bb_before_second_loop = single_exit (first_loop)->dest;
fb85abff 1247
877584e4 1248 probability_of_second_loop = (inverse_probability (first_guard_probability)
1249 + combine_probabilities (second_guard_probability,
1250 first_guard_probability));
1251 /* Theoretically preheader edge of first loop and exit edge should have
1252 same frequencies. Loop exit probablities are however easy to get wrong.
1253 It is safer to copy value from original loop entry. */
1254 bb_before_second_loop->frequency
f9d4b7f4 1255 = combine_probabilities (bb_before_first_loop->frequency,
1256 probability_of_second_loop);
877584e4 1257 bb_before_second_loop->count
1258 = apply_probability (bb_before_first_loop->count,
1259 probability_of_second_loop);
1260 single_succ_edge (bb_before_second_loop)->count
1261 = bb_before_second_loop->count;
1262
fb85abff 1263 /* Epilogue peeling. */
1264 if (!update_first_loop_count)
1265 {
1266 pre_condition =
2f630015 1267 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1268 build_int_cst (TREE_TYPE (*first_niters), 0));
fb85abff 1269 if (check_profitability)
1270 {
1271 tree scalar_loop_iters
1272 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1273 (loop_vec_info_for_loop (loop)));
48e1416a 1274 cost_pre_condition =
1275 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
fb85abff 1276 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1277
1278 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1279 cost_pre_condition, pre_condition);
1280 }
23a3430d 1281 if (cond_expr)
1282 {
1283 pre_condition =
1284 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1285 pre_condition,
1286 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node,
1287 cond_expr));
1288 }
fb85abff 1289 }
1290
48e1416a 1291 /* Prologue peeling. */
fb85abff 1292 else
1293 {
1294 if (check_profitability)
1295 set_prologue_iterations (bb_before_first_loop, first_niters,
877584e4 1296 loop, th, first_guard_probability);
fb85abff 1297
1298 pre_condition =
2f630015 1299 fold_build2 (LE_EXPR, boolean_type_node, *first_niters,
1300 build_int_cst (TREE_TYPE (*first_niters), 0));
fb85abff 1301 }
1302
1303 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
23a3430d 1304 cond_expr_stmt_list,
877584e4 1305 bb_before_second_loop, bb_before_first_loop,
1306 inverse_probability (first_guard_probability));
1307 scale_loop_profile (first_loop, first_guard_probability,
1308 check_profitability && (int)th > bound1 ? th : bound1);
fb85abff 1309 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1310 first_loop == new_loop,
1e8cf080 1311 &new_exit_bb);
fb85abff 1312
1313
1314 /* 3. Add the guard that controls whether the second loop is executed.
1315 Resulting CFG would be:
1316
1317 bb_before_first_loop:
1318 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1319 GOTO first-loop
1320
1321 first_loop:
1322 do {
1323 } while ...
1324
1325 bb_between_loops:
1326 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1327 GOTO bb_before_second_loop
1328
1329 bb_before_second_loop:
1330
1331 second_loop:
1332 do {
1333 } while ...
1334
1335 bb_after_second_loop:
1336
1337 orig_exit_bb:
1338 */
1339
1340 bb_between_loops = new_exit_bb;
1341 bb_after_second_loop = split_edge (single_exit (second_loop));
1342
48e1416a 1343 pre_condition =
2f630015 1344 fold_build2 (EQ_EXPR, boolean_type_node, *first_niters, niters);
23a3430d 1345 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL,
877584e4 1346 bb_after_second_loop, bb_before_first_loop,
1347 inverse_probability (second_guard_probability));
1348 scale_loop_profile (second_loop, probability_of_second_loop, bound2);
fb85abff 1349 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1350 second_loop == new_loop, &new_exit_bb);
1351
1352 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1353 */
1354 if (update_first_loop_count)
2f630015 1355 slpeel_make_loop_iterate_ntimes (first_loop, *first_niters);
fb85abff 1356
5d206f11 1357 delete_update_ssa ();
1358
b123eaab 1359 adjust_vec_debug_stmts ();
1360
fb85abff 1361 return new_loop;
1362}
1363
1364/* Function vect_get_loop_location.
1365
1366 Extract the location of the loop in the source code.
1367 If the loop is not well formed for vectorization, an estimated
1368 location is calculated.
1369 Return the loop location if succeed and NULL if not. */
1370
36f39b2e 1371source_location
fb85abff 1372find_loop_location (struct loop *loop)
1373{
1374 gimple stmt = NULL;
1375 basic_block bb;
1376 gimple_stmt_iterator si;
1377
1378 if (!loop)
36f39b2e 1379 return UNKNOWN_LOCATION;
fb85abff 1380
1381 stmt = get_loop_exit_condition (loop);
1382
b2d978a6 1383 if (stmt
1384 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
fb85abff 1385 return gimple_location (stmt);
1386
1387 /* If we got here the loop is probably not "well formed",
1388 try to estimate the loop location */
1389
1390 if (!loop->header)
36f39b2e 1391 return UNKNOWN_LOCATION;
fb85abff 1392
1393 bb = loop->header;
1394
1395 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1396 {
1397 stmt = gsi_stmt (si);
b2d978a6 1398 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
fb85abff 1399 return gimple_location (stmt);
1400 }
1401
36f39b2e 1402 return UNKNOWN_LOCATION;
fb85abff 1403}
1404
1405
fb85abff 1406/* Function vect_can_advance_ivs_p
1407
48e1416a 1408 In case the number of iterations that LOOP iterates is unknown at compile
1409 time, an epilog loop will be generated, and the loop induction variables
1410 (IVs) will be "advanced" to the value they are supposed to take just before
fb85abff 1411 the epilog loop. Here we check that the access function of the loop IVs
1412 and the expression that represents the loop bound are simple enough.
1413 These restrictions will be relaxed in the future. */
1414
48e1416a 1415bool
fb85abff 1416vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1417{
1418 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1419 basic_block bb = loop->header;
1420 gimple phi;
1421 gimple_stmt_iterator gsi;
1422
1423 /* Analyze phi functions of the loop header. */
1424
6d8fb6cf 1425 if (dump_enabled_p ())
78bb46f5 1426 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
fb85abff 1427 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1428 {
fb85abff 1429 tree evolution_part;
1430
1431 phi = gsi_stmt (gsi);
6d8fb6cf 1432 if (dump_enabled_p ())
fb85abff 1433 {
7bd765d4 1434 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1435 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
78bb46f5 1436 dump_printf (MSG_NOTE, "\n");
fb85abff 1437 }
1438
1439 /* Skip virtual phi's. The data dependences that are associated with
1440 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1441
7c782c9b 1442 if (virtual_operand_p (PHI_RESULT (phi)))
fb85abff 1443 {
6d8fb6cf 1444 if (dump_enabled_p ())
7bd765d4 1445 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1446 "virtual phi. skip.\n");
fb85abff 1447 continue;
1448 }
1449
1450 /* Skip reduction phis. */
1451
1452 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1453 {
6d8fb6cf 1454 if (dump_enabled_p ())
7bd765d4 1455 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1456 "reduc phi. skip.\n");
fb85abff 1457 continue;
1458 }
1459
1460 /* Analyze the evolution function. */
1461
2cd0995e 1462 evolution_part
1463 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
fb85abff 1464 if (evolution_part == NULL_TREE)
1465 {
6d8fb6cf 1466 if (dump_enabled_p ())
2cd0995e 1467 dump_printf (MSG_MISSED_OPTIMIZATION,
78bb46f5 1468 "No access function or evolution.\n");
fb85abff 1469 return false;
1470 }
48e1416a 1471
1472 /* FORNOW: We do not transform initial conditions of IVs
fb85abff 1473 which evolution functions are a polynomial of degree >= 2. */
1474
1475 if (tree_is_chrec (evolution_part))
48e1416a 1476 return false;
fb85abff 1477 }
1478
1479 return true;
1480}
1481
1482
1483/* Function vect_update_ivs_after_vectorizer.
1484
1485 "Advance" the induction variables of LOOP to the value they should take
1486 after the execution of LOOP. This is currently necessary because the
1487 vectorizer does not handle induction variables that are used after the
1488 loop. Such a situation occurs when the last iterations of LOOP are
1489 peeled, because:
1490 1. We introduced new uses after LOOP for IVs that were not originally used
1491 after LOOP: the IVs of LOOP are now used by an epilog loop.
1492 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1493 times, whereas the loop IVs should be bumped N times.
1494
1495 Input:
1496 - LOOP - a loop that is going to be vectorized. The last few iterations
1497 of LOOP were peeled.
1498 - NITERS - the number of iterations that LOOP executes (before it is
1499 vectorized). i.e, the number of times the ivs should be bumped.
1500 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1501 coming out from LOOP on which there are uses of the LOOP ivs
1502 (this is the path from LOOP->exit to epilog_loop->preheader).
1503
1504 The new definitions of the ivs are placed in LOOP->exit.
1505 The phi args associated with the edge UPDATE_E in the bb
1506 UPDATE_E->dest are updated accordingly.
1507
1508 Assumption 1: Like the rest of the vectorizer, this function assumes
1509 a single loop exit that has a single predecessor.
1510
1511 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1512 organized in the same order.
1513
1514 Assumption 3: The access function of the ivs is simple enough (see
1515 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1516
1517 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
48e1416a 1518 coming out of LOOP on which the ivs of LOOP are used (this is the path
fb85abff 1519 that leads to the epilog loop; other paths skip the epilog loop). This
1520 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1521 needs to have its phis updated.
1522 */
1523
1524static void
48e1416a 1525vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
fb85abff 1526 edge update_e)
1527{
1528 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1529 basic_block exit_bb = single_exit (loop)->dest;
1530 gimple phi, phi1;
1531 gimple_stmt_iterator gsi, gsi1;
1532 basic_block update_bb = update_e->dest;
1533
f3d6d99e 1534 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
fb85abff 1535
1536 /* Make sure there exists a single-predecessor exit bb: */
1537 gcc_assert (single_pred_p (exit_bb));
1538
1539 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1540 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1541 gsi_next (&gsi), gsi_next (&gsi1))
1542 {
fb85abff 1543 tree init_expr;
1efcacec 1544 tree step_expr, off;
1545 tree type;
fb85abff 1546 tree var, ni, ni_name;
1547 gimple_stmt_iterator last_gsi;
86faead7 1548 stmt_vec_info stmt_info;
fb85abff 1549
1550 phi = gsi_stmt (gsi);
1551 phi1 = gsi_stmt (gsi1);
6d8fb6cf 1552 if (dump_enabled_p ())
fb85abff 1553 {
7bd765d4 1554 dump_printf_loc (MSG_NOTE, vect_location,
1555 "vect_update_ivs_after_vectorizer: phi: ");
1556 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
78bb46f5 1557 dump_printf (MSG_NOTE, "\n");
fb85abff 1558 }
1559
1560 /* Skip virtual phi's. */
7c782c9b 1561 if (virtual_operand_p (PHI_RESULT (phi)))
fb85abff 1562 {
6d8fb6cf 1563 if (dump_enabled_p ())
7bd765d4 1564 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1565 "virtual phi. skip.\n");
fb85abff 1566 continue;
1567 }
1568
1569 /* Skip reduction phis. */
86faead7 1570 stmt_info = vinfo_for_stmt (phi);
1571 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
48e1416a 1572 {
6d8fb6cf 1573 if (dump_enabled_p ())
7bd765d4 1574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1575 "reduc phi. skip.\n");
fb85abff 1576 continue;
48e1416a 1577 }
fb85abff 1578
86faead7 1579 type = TREE_TYPE (gimple_phi_result (phi));
1580 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
1581 step_expr = unshare_expr (step_expr);
48e1416a 1582
fb85abff 1583 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1584 of degree >= 2 or exponential. */
86faead7 1585 gcc_assert (!tree_is_chrec (step_expr));
fb85abff 1586
86faead7 1587 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fb85abff 1588
1efcacec 1589 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1590 fold_convert (TREE_TYPE (step_expr), niters),
1591 step_expr);
86faead7 1592 if (POINTER_TYPE_P (type))
2cc66f2a 1593 ni = fold_build_pointer_plus (init_expr, off);
fb85abff 1594 else
86faead7 1595 ni = fold_build2 (PLUS_EXPR, type,
1596 init_expr, fold_convert (type, off));
fb85abff 1597
86faead7 1598 var = create_tmp_var (type, "tmp");
fb85abff 1599
1600 last_gsi = gsi_last_bb (exit_bb);
1601 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var,
1602 true, GSI_SAME_STMT);
48e1416a 1603
fb85abff 1604 /* Fix phi expressions in the successor bb. */
b123eaab 1605 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
fb85abff 1606 }
1607}
1608
fb85abff 1609/* Function vect_do_peeling_for_loop_bound
1610
1611 Peel the last iterations of the loop represented by LOOP_VINFO.
48e1416a 1612 The peeled iterations form a new epilog loop. Given that the loop now
fb85abff 1613 iterates NITERS times, the new epilog loop iterates
1614 NITERS % VECTORIZATION_FACTOR times.
48e1416a 1615
1616 The original loop will later be made to iterate
23a3430d 1617 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO).
1618
1619 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated
1620 test. */
fb85abff 1621
48e1416a 1622void
782fd1d1 1623vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo,
1624 tree ni_name, tree ratio_mult_vf_name,
e7430948 1625 unsigned int th, bool check_profitability)
fb85abff 1626{
fb85abff 1627 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1628 struct loop *new_loop;
1629 edge update_e;
1630 basic_block preheader;
1631 int loop_num;
2afdcbed 1632 int max_iter;
e7430948 1633 tree cond_expr = NULL_TREE;
1634 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 1635
6d8fb6cf 1636 if (dump_enabled_p ())
b055bc88 1637 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1638 "=== vect_do_peeling_for_loop_bound ===\n");
fb85abff 1639
1640 initialize_original_copy_tables ();
1641
48e1416a 1642 loop_num = loop->num;
fb85abff 1643
fb85abff 1644 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
2f630015 1645 &ratio_mult_vf_name, ni_name, false,
23a3430d 1646 th, check_profitability,
877584e4 1647 cond_expr, cond_expr_stmt_list,
1648 0, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
fb85abff 1649 gcc_assert (new_loop);
1650 gcc_assert (loop_num == loop->num);
1651#ifdef ENABLE_CHECKING
1652 slpeel_verify_cfg_after_peeling (loop, new_loop);
1653#endif
1654
1655 /* A guard that controls whether the new_loop is to be executed or skipped
1656 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1657 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1658 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1659 is on the path where the LOOP IVs are used and need to be updated. */
1660
1661 preheader = loop_preheader_edge (new_loop)->src;
1662 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
1663 update_e = EDGE_PRED (preheader, 0);
1664 else
1665 update_e = EDGE_PRED (preheader, 1);
1666
48e1416a 1667 /* Update IVs of original loop as if they were advanced
fb85abff 1668 by ratio_mult_vf_name steps. */
48e1416a 1669 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
fb85abff 1670
d3f1934c 1671 /* For vectorization factor N, we need to copy last N-1 values in epilogue
1672 and this means N-2 loopback edge executions.
1673
1674 PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
1675 will execute at least LOOP_VINFO_VECT_FACTOR times. */
1676 max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1677 ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
1678 : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
e7430948 1679 if (check_profitability)
d3f1934c 1680 max_iter = MAX (max_iter, (int) th - 1);
cf8f0e63 1681 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
b055bc88 1682 dump_printf (MSG_NOTE,
7bd765d4 1683 "Setting upper bound of nb iterations for epilogue "
1684 "loop to %d\n", max_iter);
15fa0281 1685
fb85abff 1686 /* After peeling we have to reset scalar evolution analyzer. */
1687 scev_reset ();
1688
1689 free_original_copy_tables ();
1690}
1691
1692
1693/* Function vect_gen_niters_for_prolog_loop
1694
1695 Set the number of iterations for the loop represented by LOOP_VINFO
1696 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1697 and the misalignment of DR - the data reference recorded in
48e1416a 1698 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
fb85abff 1699 this loop, the data reference DR will refer to an aligned location.
1700
1701 The following computation is generated:
1702
1703 If the misalignment of DR is known at compile time:
1704 addr_mis = int mis = DR_MISALIGNMENT (dr);
1705 Else, compute address misalignment in bytes:
482a44fa 1706 addr_mis = addr & (vectype_align - 1)
fb85abff 1707
1708 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step)
1709
1710 (elem_size = element type size; an element is the scalar element whose type
1711 is the inner type of the vectype)
1712
1713 When the step of the data-ref in the loop is not 1 (as in interleaved data
1714 and SLP), the number of iterations of the prolog must be divided by the step
1715 (which is equal to the size of interleaved group).
1716
1717 The above formulas assume that VF == number of elements in the vector. This
1718 may not hold when there are multiple-types in the loop.
1719 In this case, for some data-references in the loop the VF does not represent
1720 the number of elements that fit in the vector. Therefore, instead of VF we
1721 use TYPE_VECTOR_SUBPARTS. */
1722
48e1416a 1723static tree
877584e4 1724vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters, int *bound)
fb85abff 1725{
1726 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1727 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1728 tree var;
1729 gimple_seq stmts;
1730 tree iters, iters_name;
1731 edge pe;
1732 basic_block new_bb;
1733 gimple dr_stmt = DR_STMT (dr);
1734 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1735 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1736 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1737 tree niters_type = TREE_TYPE (loop_niters);
fb85abff 1738 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1739
48e1416a 1740 pe = loop_preheader_edge (loop);
fb85abff 1741
313a5120 1742 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
fb85abff 1743 {
313a5120 1744 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1745
6d8fb6cf 1746 if (dump_enabled_p ())
b055bc88 1747 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1748 "known peeling = %d.\n", npeel);
fb85abff 1749
0822b158 1750 iters = build_int_cst (niters_type, npeel);
313a5120 1751 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1752 }
1753 else
1754 {
1755 gimple_seq new_stmts = NULL;
f1b8c740 1756 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1757 tree offset = negative
1758 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
48e1416a 1759 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
f1b8c740 1760 &new_stmts, offset, loop);
3cea8318 1761 tree type = unsigned_type_for (TREE_TYPE (start_addr));
482a44fa 1762 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
1763 HOST_WIDE_INT elem_size =
1764 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1765 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
fb85abff 1766 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
1767 tree nelements_tree = build_int_cst (type, nelements);
1768 tree byte_misalign;
1769 tree elem_misalign;
1770
1771 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts);
1772 gcc_assert (!new_bb);
48e1416a 1773
482a44fa 1774 /* Create: byte_misalign = addr & (vectype_align - 1) */
48e1416a 1775 byte_misalign =
0822b158 1776 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
482a44fa 1777 vectype_align_minus_1);
48e1416a 1778
fb85abff 1779 /* Create: elem_misalign = byte_misalign / element_size */
1780 elem_misalign =
1781 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
1782
1783 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
f1b8c740 1784 if (negative)
1785 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1786 else
1787 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
fb85abff 1788 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1789 iters = fold_convert (niters_type, iters);
877584e4 1790 *bound = nelements;
fb85abff 1791 }
1792
1793 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1794 /* If the loop bound is known at compile time we already verified that it is
1795 greater than vf; since the misalignment ('iters') is at most vf, there's
1796 no need to generate the MIN_EXPR in this case. */
1797 if (TREE_CODE (loop_niters) != INTEGER_CST)
1798 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
1799
6d8fb6cf 1800 if (dump_enabled_p ())
fb85abff 1801 {
b055bc88 1802 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1803 "niters for prolog loop: ");
b055bc88 1804 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
78bb46f5 1805 dump_printf (MSG_NOTE, "\n");
fb85abff 1806 }
1807
1808 var = create_tmp_var (niters_type, "prolog_loop_niters");
fb85abff 1809 stmts = NULL;
1810 iters_name = force_gimple_operand (iters, &stmts, false, var);
1811
1812 /* Insert stmt on loop preheader edge. */
1813 if (stmts)
1814 {
1815 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1816 gcc_assert (!new_bb);
1817 }
1818
48e1416a 1819 return iters_name;
fb85abff 1820}
1821
1822
1823/* Function vect_update_init_of_dr
1824
1825 NITERS iterations were peeled from LOOP. DR represents a data reference
1826 in LOOP. This function updates the information recorded in DR to
48e1416a 1827 account for the fact that the first NITERS iterations had already been
fb85abff 1828 executed. Specifically, it updates the OFFSET field of DR. */
1829
1830static void
1831vect_update_init_of_dr (struct data_reference *dr, tree niters)
1832{
1833 tree offset = DR_OFFSET (dr);
48e1416a 1834
fb85abff 1835 niters = fold_build2 (MULT_EXPR, sizetype,
1836 fold_convert (sizetype, niters),
1837 fold_convert (sizetype, DR_STEP (dr)));
87f9ffa4 1838 offset = fold_build2 (PLUS_EXPR, sizetype,
1839 fold_convert (sizetype, offset), niters);
fb85abff 1840 DR_OFFSET (dr) = offset;
1841}
1842
1843
1844/* Function vect_update_inits_of_drs
1845
48e1416a 1846 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1847 This function updates the information recorded for the data references in
1848 the loop to account for the fact that the first NITERS iterations had
fb85abff 1849 already been executed. Specifically, it updates the initial_condition of
1850 the access_function of all the data_references in the loop. */
1851
1852static void
1853vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1854{
1855 unsigned int i;
f1f41a6c 1856 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1857 struct data_reference *dr;
7bd765d4 1858
6d8fb6cf 1859 if (dump_enabled_p ())
b055bc88 1860 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1861 "=== vect_update_inits_of_dr ===\n");
fb85abff 1862
f1f41a6c 1863 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1864 vect_update_init_of_dr (dr, niters);
1865}
1866
1867
1868/* Function vect_do_peeling_for_alignment
1869
1870 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1871 'niters' is set to the misalignment of one of the data references in the
1872 loop, thereby forcing it to refer to an aligned location at the beginning
1873 of the execution of this loop. The data reference for which we are
1874 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1875
1876void
782fd1d1 1877vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, tree ni_name,
e7430948 1878 unsigned int th, bool check_profitability)
fb85abff 1879{
1880 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
782fd1d1 1881 tree niters_of_prolog_loop;
be2e5c02 1882 tree wide_prolog_niters;
fb85abff 1883 struct loop *new_loop;
b6556916 1884 int max_iter;
877584e4 1885 int bound = 0;
fb85abff 1886
6d8fb6cf 1887 if (dump_enabled_p ())
6ee2edad 1888 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1889 "loop peeled for vectorization to enhance"
1890 " alignment\n");
fb85abff 1891
1892 initialize_original_copy_tables ();
1893
782fd1d1 1894 gimple_seq stmts = NULL;
1895 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2f630015 1896 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo,
877584e4 1897 ni_name,
1898 &bound);
fb85abff 1899
fb85abff 1900 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1901 new_loop =
1902 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
2f630015 1903 &niters_of_prolog_loop, ni_name, true,
877584e4 1904 th, check_profitability, NULL_TREE, NULL,
1905 bound,
1906 0);
fb85abff 1907
1908 gcc_assert (new_loop);
1909#ifdef ENABLE_CHECKING
1910 slpeel_verify_cfg_after_peeling (new_loop, loop);
1911#endif
d3f1934c 1912 /* For vectorization factor N, we need to copy at most N-1 values
1913 for alignment and this means N-2 loopback edge executions. */
1914 max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
e7430948 1915 if (check_profitability)
d3f1934c 1916 max_iter = MAX (max_iter, (int) th - 1);
cf8f0e63 1917 record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
b055bc88 1918 dump_printf (MSG_NOTE,
7bd765d4 1919 "Setting upper bound of nb iterations for prologue "
1920 "loop to %d\n", max_iter);
fb85abff 1921
1922 /* Update number of times loop executes. */
fb85abff 1923 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
313a5120 1924 TREE_TYPE (ni_name), ni_name, niters_of_prolog_loop);
fb85abff 1925
2f630015 1926 if (types_compatible_p (sizetype, TREE_TYPE (niters_of_prolog_loop)))
1927 wide_prolog_niters = niters_of_prolog_loop;
1928 else
1929 {
1930 gimple_seq seq = NULL;
1931 edge pe = loop_preheader_edge (loop);
1932 tree wide_iters = fold_convert (sizetype, niters_of_prolog_loop);
1933 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
2f630015 1934 wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
1935 var);
1936 if (seq)
1937 {
1938 /* Insert stmt on loop preheader edge. */
1939 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1940 gcc_assert (!new_bb);
1941 }
1942 }
1943
fb85abff 1944 /* Update the init conditions of the access functions of all data refs. */
be2e5c02 1945 vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
fb85abff 1946
1947 /* After peeling we have to reset scalar evolution analyzer. */
1948 scev_reset ();
1949
1950 free_original_copy_tables ();
1951}
1952
1953
1954/* Function vect_create_cond_for_align_checks.
1955
1956 Create a conditional expression that represents the alignment checks for
1957 all of data references (array element references) whose alignment must be
1958 checked at runtime.
1959
1960 Input:
1961 COND_EXPR - input conditional expression. New conditions will be chained
1962 with logical AND operation.
1963 LOOP_VINFO - two fields of the loop information are used.
1964 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1965 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1966
1967 Output:
1968 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1969 expression.
1970 The returned value is the conditional expression to be used in the if
1971 statement that controls which version of the loop gets executed at runtime.
1972
1973 The algorithm makes two assumptions:
1974 1) The number of bytes "n" in a vector is a power of 2.
1975 2) An address "a" is aligned if a%n is zero and that this
1976 test can be done as a&(n-1) == 0. For example, for 16
1977 byte vectors the test is a&0xf == 0. */
1978
1979static void
1980vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1981 tree *cond_expr,
1982 gimple_seq *cond_expr_stmt_list)
1983{
1984 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
f1f41a6c 1985 vec<gimple> may_misalign_stmts
fb85abff 1986 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1987 gimple ref_stmt;
1988 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1989 tree mask_cst;
1990 unsigned int i;
fb85abff 1991 tree int_ptrsize_type;
1992 char tmp_name[20];
1993 tree or_tmp_name = NULL_TREE;
03d37e4e 1994 tree and_tmp_name;
fb85abff 1995 gimple and_stmt;
1996 tree ptrsize_zero;
1997 tree part_cond_expr;
1998
1999 /* Check that mask is one less than a power of 2, i.e., mask is
2000 all zeros followed by all ones. */
2001 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2002
3cea8318 2003 int_ptrsize_type = signed_type_for (ptr_type_node);
fb85abff 2004
2005 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2006 of the first vector of the i'th data reference. */
2007
f1f41a6c 2008 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
fb85abff 2009 {
2010 gimple_seq new_stmt_list = NULL;
2011 tree addr_base;
03d37e4e 2012 tree addr_tmp_name;
2013 tree new_or_tmp_name;
fb85abff 2014 gimple addr_stmt, or_stmt;
f1b8c740 2015 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2016 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2017 bool negative = tree_int_cst_compare
2018 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2019 tree offset = negative
2020 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : NULL_TREE;
fb85abff 2021
2022 /* create: addr_tmp = (int)(address_of_first_vector) */
2023 addr_base =
2024 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
f1b8c740 2025 offset, loop);
fb85abff 2026 if (new_stmt_list != NULL)
2027 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2028
03d37e4e 2029 sprintf (tmp_name, "addr2int%d", i);
2030 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
fb85abff 2031 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name,
2032 addr_base, NULL_TREE);
fb85abff 2033 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2034
2035 /* The addresses are OR together. */
2036
2037 if (or_tmp_name != NULL_TREE)
2038 {
2039 /* create: or_tmp = or_tmp | addr_tmp */
03d37e4e 2040 sprintf (tmp_name, "orptrs%d", i);
2041 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
fb85abff 2042 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR,
2043 new_or_tmp_name,
2044 or_tmp_name, addr_tmp_name);
fb85abff 2045 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2046 or_tmp_name = new_or_tmp_name;
2047 }
2048 else
2049 or_tmp_name = addr_tmp_name;
2050
2051 } /* end for i */
2052
2053 mask_cst = build_int_cst (int_ptrsize_type, mask);
2054
2055 /* create: and_tmp = or_tmp & mask */
03d37e4e 2056 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
fb85abff 2057
2058 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name,
2059 or_tmp_name, mask_cst);
fb85abff 2060 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2061
2062 /* Make and_tmp the left operand of the conditional test against zero.
2063 if and_tmp has a nonzero bit then some address is unaligned. */
2064 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2065 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2066 and_tmp_name, ptrsize_zero);
2067 if (*cond_expr)
2068 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2069 *cond_expr, part_cond_expr);
2070 else
2071 *cond_expr = part_cond_expr;
2072}
2073
fb85abff 2074/* Function vect_create_cond_for_alias_checks.
2075
2076 Create a conditional expression that represents the run-time checks for
2077 overlapping of address ranges represented by a list of data references
2078 relations passed as input.
2079
2080 Input:
2081 COND_EXPR - input conditional expression. New conditions will be chained
8a7b0f48 2082 with logical AND operation. If it is NULL, then the function
2083 is used to return the number of alias checks.
fb85abff 2084 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2085 to be checked.
2086
2087 Output:
2088 COND_EXPR - conditional expression.
fb85abff 2089
8a7b0f48 2090 The returned COND_EXPR is the conditional expression to be used in the if
fb85abff 2091 statement that controls which version of the loop gets executed at runtime.
2092*/
2093
8a7b0f48 2094void
90d4c4af 2095vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
fb85abff 2096{
43d14b66 2097 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
8a7b0f48 2098 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2099 tree part_cond_expr;
fb85abff 2100
2101 /* Create expression
33767455 2102 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2103 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
48e1416a 2104 &&
fb85abff 2105 ...
2106 &&
33767455 2107 ((store_ptr_n + store_segment_length_n) <= load_ptr_n)
2108 || (load_ptr_n + load_segment_length_n) <= store_ptr_n)) */
fb85abff 2109
8a7b0f48 2110 if (comp_alias_ddrs.is_empty ())
fb85abff 2111 return;
2112
8a7b0f48 2113 for (size_t i = 0, s = comp_alias_ddrs.length (); i < s; ++i)
fb85abff 2114 {
43d14b66 2115 const dr_with_seg_len& dr_a = comp_alias_ddrs[i].first;
2116 const dr_with_seg_len& dr_b = comp_alias_ddrs[i].second;
8a7b0f48 2117 tree segment_length_a = dr_a.seg_len;
2118 tree segment_length_b = dr_b.seg_len;
fb85abff 2119
8a7b0f48 2120 tree addr_base_a
43d14b66 2121 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr), dr_a.offset);
8a7b0f48 2122 tree addr_base_b
43d14b66 2123 = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr), dr_b.offset);
fb85abff 2124
6d8fb6cf 2125 if (dump_enabled_p ())
fb85abff 2126 {
b055bc88 2127 dump_printf_loc (MSG_NOTE, vect_location,
8a7b0f48 2128 "create runtime check for data references ");
2129 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_a.dr));
b055bc88 2130 dump_printf (MSG_NOTE, " and ");
8a7b0f48 2131 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr_b.dr));
2132 dump_printf (MSG_NOTE, "\n");
fb85abff 2133 }
2134
8a7b0f48 2135 tree seg_a_min = addr_base_a;
2136 tree seg_a_max = fold_build_pointer_plus (addr_base_a, segment_length_a);
2137 if (tree_int_cst_compare (DR_STEP (dr_a.dr), size_zero_node) < 0)
f1b8c740 2138 seg_a_min = seg_a_max, seg_a_max = addr_base_a;
2139
8a7b0f48 2140 tree seg_b_min = addr_base_b;
2141 tree seg_b_max = fold_build_pointer_plus (addr_base_b, segment_length_b);
2142 if (tree_int_cst_compare (DR_STEP (dr_b.dr), size_zero_node) < 0)
f1b8c740 2143 seg_b_min = seg_b_max, seg_b_max = addr_base_b;
fb85abff 2144
48e1416a 2145 part_cond_expr =
fb85abff 2146 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
33767455 2147 fold_build2 (LE_EXPR, boolean_type_node, seg_a_max, seg_b_min),
2148 fold_build2 (LE_EXPR, boolean_type_node, seg_b_max, seg_a_min));
48e1416a 2149
fb85abff 2150 if (*cond_expr)
2151 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2152 *cond_expr, part_cond_expr);
2153 else
2154 *cond_expr = part_cond_expr;
2155 }
fb85abff 2156
6d8fb6cf 2157 if (dump_enabled_p ())
b055bc88 2158 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 2159 "created %u versioning for alias checks.\n",
8a7b0f48 2160 comp_alias_ddrs.length ());
2161
2162 comp_alias_ddrs.release ();
fb85abff 2163}
2164
2165
2166/* Function vect_loop_versioning.
48e1416a 2167
fb85abff 2168 If the loop has data references that may or may not be aligned or/and
2169 has data reference relations whose independence was not proven then
2170 two versions of the loop need to be generated, one which is vectorized
2171 and one which isn't. A test is then generated to control which of the
2172 loops is executed. The test checks for the alignment of all of the
2173 data references that may or may not be aligned. An additional
2174 sequence of runtime tests is generated for each pairs of DDRs whose
48e1416a 2175 independence was not proven. The vectorized version of loop is
2176 executed only if both alias and alignment tests are passed.
2177
fb85abff 2178 The test generated to check which version of loop is executed
48e1416a 2179 is modified to also check for profitability as indicated by the
23a3430d 2180 cost model initially.
2181
2182 The versioning precondition(s) are placed in *COND_EXPR and
2afdcbed 2183 *COND_EXPR_STMT_LIST. */
fb85abff 2184
2185void
e7430948 2186vect_loop_versioning (loop_vec_info loop_vinfo,
2187 unsigned int th, bool check_profitability)
fb85abff 2188{
2189 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 2190 basic_block condition_bb;
2191 gimple_stmt_iterator gsi, cond_exp_gsi;
2192 basic_block merge_bb;
2193 basic_block new_exit_bb;
2194 edge new_exit_e, e;
2195 gimple orig_phi, new_phi;
e7430948 2196 tree cond_expr = NULL_TREE;
2afdcbed 2197 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 2198 tree arg;
2199 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
2200 gimple_seq gimplify_stmt_list = NULL;
2201 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
6ee2edad 2202 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2203 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
fb85abff 2204
e7430948 2205 if (check_profitability)
2206 {
2207 cond_expr = fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
2208 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
2209 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2210 is_gimple_condexpr, NULL_TREE);
2211 }
fb85abff 2212
6ee2edad 2213 if (version_align)
2afdcbed 2214 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2215 &cond_expr_stmt_list);
fb85abff 2216
6ee2edad 2217 if (version_alias)
90d4c4af 2218 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
23a3430d 2219
2afdcbed 2220 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2221 is_gimple_condexpr, NULL_TREE);
2222 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
fb85abff 2223
2224 initialize_original_copy_tables ();
2afdcbed 2225 loop_version (loop, cond_expr, &condition_bb,
f018d957 2226 prob, prob, REG_BR_PROB_BASE - prob, true);
6ee2edad 2227
36f39b2e 2228 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
6ee2edad 2229 && dump_enabled_p ())
2230 {
2231 if (version_alias)
2232 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2233 "loop versioned for vectorization because of "
2234 "possible aliasing\n");
2235 if (version_align)
2236 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2237 "loop versioned for vectorization to enhance "
2238 "alignment\n");
2239
2240 }
9af5ce0c 2241 free_original_copy_tables ();
fb85abff 2242
48e1416a 2243 /* Loop versioning violates an assumption we try to maintain during
fb85abff 2244 vectorization - that the loop exit block has a single predecessor.
2245 After versioning, the exit block of both loop versions is the same
2246 basic block (i.e. it has two predecessors). Just in order to simplify
2247 following transformations in the vectorizer, we fix this situation
2248 here by adding a new (empty) block on the exit-edge of the loop,
2249 with the proper loop-exit phis to maintain loop-closed-form. */
48e1416a 2250
fb85abff 2251 merge_bb = single_exit (loop)->dest;
2252 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
2253 new_exit_bb = split_edge (single_exit (loop));
2254 new_exit_e = single_exit (loop);
2255 e = EDGE_SUCC (new_exit_bb, 0);
2256
2257 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2258 {
874117c8 2259 tree new_res;
fb85abff 2260 orig_phi = gsi_stmt (gsi);
874117c8 2261 new_res = copy_ssa_name (PHI_RESULT (orig_phi), NULL);
2262 new_phi = create_phi_node (new_res, new_exit_bb);
fb85abff 2263 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
48e1416a 2264 add_phi_arg (new_phi, arg, new_exit_e,
60d535d2 2265 gimple_phi_arg_location_from_edge (orig_phi, e));
b123eaab 2266 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
48e1416a 2267 }
fb85abff 2268
c7b85126 2269
2270 /* Extract load statements on memrefs with zero-stride accesses. */
2271
2272 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2273 {
2274 /* In the loop body, we iterate each statement to check if it is a load.
2275 Then we check the DR_STEP of the data reference. If DR_STEP is zero,
2276 then we will hoist the load statement to the loop preheader. */
2277
2278 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2279 int nbbs = loop->num_nodes;
2280
2281 for (int i = 0; i < nbbs; ++i)
2282 {
2283 for (gimple_stmt_iterator si = gsi_start_bb (bbs[i]);
2284 !gsi_end_p (si);)
2285 {
2286 gimple stmt = gsi_stmt (si);
2287 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2288 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2289
2290 if (is_gimple_assign (stmt)
2291 && (!dr
2292 || (DR_IS_READ (dr) && integer_zerop (DR_STEP (dr)))))
2293 {
2294 bool hoist = true;
2295 ssa_op_iter iter;
2296 tree var;
2297
2298 /* We hoist a statement if all SSA uses in it are defined
2299 outside of the loop. */
2300 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE)
2301 {
2302 gimple def = SSA_NAME_DEF_STMT (var);
2303 if (!gimple_nop_p (def)
2304 && flow_bb_inside_loop_p (loop, gimple_bb (def)))
2305 {
2306 hoist = false;
2307 break;
2308 }
2309 }
2310
2311 if (hoist)
2312 {
2313 if (dr)
2314 gimple_set_vuse (stmt, NULL);
2315
2316 gsi_remove (&si, false);
2317 gsi_insert_on_edge_immediate (loop_preheader_edge (loop),
2318 stmt);
2319
2320 if (dump_enabled_p ())
2321 {
2322 dump_printf_loc
2323 (MSG_NOTE, vect_location,
2324 "hoisting out of the vectorized loop: ");
2325 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2326 dump_printf (MSG_NOTE, "\n");
2327 }
2328 continue;
2329 }
2330 }
2331 gsi_next (&si);
2332 }
2333 }
2334 }
2335
fb85abff 2336 /* End loop-exit-fixes after versioning. */
2337
2afdcbed 2338 if (cond_expr_stmt_list)
fb85abff 2339 {
2340 cond_exp_gsi = gsi_last_bb (condition_bb);
2afdcbed 2341 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
23a3430d 2342 GSI_SAME_STMT);
fb85abff 2343 }
95e19962 2344 update_ssa (TODO_update_ssa);
fb85abff 2345}