]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
* final.c (compute_alignments): Use counts rather than frequencies.
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
c7131fb2 31#include "fold-const.h"
60393bbc 32#include "cfganal.h"
45b0be94 33#include "gimplify.h"
5be5c238 34#include "gimple-iterator.h"
18f429e2 35#include "gimplify-me.h"
442b4905 36#include "tree-cfg.h"
e28030cf 37#include "tree-ssa-loop-manip.h"
442b4905 38#include "tree-into-ssa.h"
7a300452 39#include "tree-ssa.h"
ebfd146a 40#include "cfgloop.h"
ebfd146a
IR
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
2a93954e 43#include "tree-ssa-loop-ivopts.h"
ebfd146a
IR
44
45/*************************************************************************
46 Simple Loop Peeling Utilities
47
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
50
51
52/* Renames the use *OP_P. */
53
54static void
55rename_use_op (use_operand_p op_p)
56{
57 tree new_name;
58
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
61
62 new_name = get_current_def (USE_FROM_PTR (op_p));
63
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
67
68 /* An ordinary ssa name defined in the loop. */
69
70 SET_USE (op_p, new_name);
71}
72
73
cb330ba5 74/* Renames the variables in basic block BB. Allow renaming of PHI arguments
a6c51a12
YR
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
ebfd146a 77
2cfc56b9 78static void
a6c51a12 79rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
ebfd146a 80{
355fe088 81 gimple *stmt;
ebfd146a
IR
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
a6c51a12
YR
87 struct loop *outer_loop = NULL;
88
89 if (rename_from_outer_loop)
90 {
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
93 }
ebfd146a 94
538dd0b7
DM
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
ebfd146a
IR
97 {
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
101 }
102
2cfc56b9 103 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 104 {
cb330ba5
JJ
105 if (!flow_bb_inside_loop_p (loop, e->src))
106 {
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
110 {
111 if (outer_loop->inner->next)
112 {
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
119 }
cb330ba5
JJ
120 }
121 }
538dd0b7
DM
122 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
123 gsi_next (&gsi))
124 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
125 }
126}
127
128
a79683d5 129struct adjust_info
684f25f4
AO
130{
131 tree from, to;
132 basic_block bb;
a79683d5 133};
684f25f4 134
684f25f4
AO
135/* A stack of values to be adjusted in debug stmts. We have to
136 process them LIFO, so that the closest substitution applies. If we
137 processed them FIFO, without the stack, we might substitute uses
138 with a PHI DEF that would soon become non-dominant, and when we got
139 to the suitable one, it wouldn't have anything to substitute any
140 more. */
ff4c81cc 141static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
142
143/* Adjust any debug stmts that referenced AI->from values to use the
144 loop-closed AI->to, if the references are dominated by AI->bb and
145 not by the definition of AI->from. */
146
147static void
148adjust_debug_stmts_now (adjust_info *ai)
149{
150 basic_block bbphi = ai->bb;
151 tree orig_def = ai->from;
152 tree new_def = ai->to;
153 imm_use_iterator imm_iter;
355fe088 154 gimple *stmt;
684f25f4
AO
155 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
156
157 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
158
159 /* Adjust any debug stmts that held onto non-loop-closed
160 references. */
161 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
162 {
163 use_operand_p use_p;
164 basic_block bbuse;
165
166 if (!is_gimple_debug (stmt))
167 continue;
168
169 gcc_assert (gimple_debug_bind_p (stmt));
170
171 bbuse = gimple_bb (stmt);
172
173 if ((bbuse == bbphi
174 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
175 && !(bbuse == bbdef
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
177 {
178 if (new_def)
179 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
180 SET_USE (use_p, new_def);
181 else
182 {
183 gimple_debug_bind_reset_value (stmt);
184 update_stmt (stmt);
185 }
186 }
187 }
188}
189
190/* Adjust debug stmts as scheduled before. */
191
192static void
193adjust_vec_debug_stmts (void)
194{
195 if (!MAY_HAVE_DEBUG_STMTS)
196 return;
197
9771b263 198 gcc_assert (adjust_vec.exists ());
684f25f4 199
9771b263 200 while (!adjust_vec.is_empty ())
684f25f4 201 {
9771b263
DN
202 adjust_debug_stmts_now (&adjust_vec.last ());
203 adjust_vec.pop ();
684f25f4 204 }
684f25f4
AO
205}
206
207/* Adjust any debug stmts that referenced FROM values to use the
208 loop-closed TO, if the references are dominated by BB and not by
209 the definition of FROM. If adjust_vec is non-NULL, adjustments
210 will be postponed until adjust_vec_debug_stmts is called. */
211
212static void
213adjust_debug_stmts (tree from, tree to, basic_block bb)
214{
215 adjust_info ai;
216
a471762f
RG
217 if (MAY_HAVE_DEBUG_STMTS
218 && TREE_CODE (from) == SSA_NAME
a52ca739 219 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 220 && ! virtual_operand_p (from))
684f25f4
AO
221 {
222 ai.from = from;
223 ai.to = to;
224 ai.bb = bb;
225
9771b263
DN
226 if (adjust_vec.exists ())
227 adjust_vec.safe_push (ai);
684f25f4
AO
228 else
229 adjust_debug_stmts_now (&ai);
230 }
231}
232
233/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
234 to adjust any debug stmts that referenced the old phi arg,
235 presumably non-loop-closed references left over from other
236 transformations. */
237
238static void
355fe088 239adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
684f25f4
AO
240{
241 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
242
243 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
244
245 if (MAY_HAVE_DEBUG_STMTS)
246 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
247 gimple_bb (update_phi));
248}
249
ebfd146a
IR
250/* Make the LOOP iterate NITERS times. This is done by adding a new IV
251 that starts at zero, increases by one and its limit is NITERS.
252
253 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
254
255void
256slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
257{
258 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
259 gcond *cond_stmt;
260 gcond *orig_cond;
ebfd146a
IR
261 edge exit_edge = single_exit (loop);
262 gimple_stmt_iterator loop_cond_gsi;
263 gimple_stmt_iterator incr_gsi;
264 bool insert_after;
265 tree init = build_int_cst (TREE_TYPE (niters), 0);
266 tree step = build_int_cst (TREE_TYPE (niters), 1);
b05e0233 267 source_location loop_loc;
ebfd146a
IR
268 enum tree_code code;
269
270 orig_cond = get_loop_exit_condition (loop);
271 gcc_assert (orig_cond);
272 loop_cond_gsi = gsi_for_stmt (orig_cond);
273
274 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
275 create_iv (init, step, NULL_TREE, loop,
276 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
277
278 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
279 true, NULL_TREE, true,
280 GSI_SAME_STMT);
281 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
282 true, GSI_SAME_STMT);
283
284 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
285 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
286 NULL_TREE);
287
288 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
289
290 /* Remove old loop exit test: */
291 gsi_remove (&loop_cond_gsi, true);
6f723d33 292 free_stmt_vec_info (orig_cond);
ebfd146a
IR
293
294 loop_loc = find_loop_location (loop);
73fbfcad 295 if (dump_enabled_p ())
ebfd146a 296 {
b05e0233
RB
297 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
298 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
299 LOCATION_LINE (loop_loc));
78c60e3d 300 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
ebfd146a 301 }
71118889
RS
302
303 /* Record the number of latch iterations. */
304 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
305 build_int_cst (TREE_TYPE (niters), 1));
ebfd146a
IR
306}
307
5ce9450f
JJ
308/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
309 For all PHI arguments in FROM->dest and TO->dest from those
310 edges ensure that TO->dest PHI arguments have current_def
311 to that in from. */
312
313static void
314slpeel_duplicate_current_defs_from_edges (edge from, edge to)
315{
316 gimple_stmt_iterator gsi_from, gsi_to;
317
318 for (gsi_from = gsi_start_phis (from->dest),
319 gsi_to = gsi_start_phis (to->dest);
14ba8d6d 320 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
5ce9450f 321 {
355fe088
TS
322 gimple *from_phi = gsi_stmt (gsi_from);
323 gimple *to_phi = gsi_stmt (gsi_to);
5ce9450f 324 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1a5da5b6
RB
325 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
326 if (virtual_operand_p (from_arg))
327 {
14ba8d6d
RB
328 gsi_next (&gsi_from);
329 continue;
330 }
1a5da5b6
RB
331 if (virtual_operand_p (to_arg))
332 {
14ba8d6d
RB
333 gsi_next (&gsi_to);
334 continue;
335 }
1a5da5b6
RB
336 if (TREE_CODE (from_arg) != SSA_NAME)
337 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
338 else
339 {
340 if (get_current_def (to_arg) == NULL_TREE)
341 set_current_def (to_arg, get_current_def (from_arg));
342 }
14ba8d6d
RB
343 gsi_next (&gsi_from);
344 gsi_next (&gsi_to);
5ce9450f 345 }
1a5da5b6
RB
346
347 gphi *from_phi = get_virtual_phi (from->dest);
348 gphi *to_phi = get_virtual_phi (to->dest);
349 if (from_phi)
350 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
351 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
5ce9450f
JJ
352}
353
ebfd146a 354
b8698a0f 355/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
356 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
357 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
358 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
359 entry or exit of LOOP. */
ebfd146a
IR
360
361struct loop *
5ce9450f
JJ
362slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
363 struct loop *scalar_loop, edge e)
ebfd146a
IR
364{
365 struct loop *new_loop;
3884da6f 366 basic_block *new_bbs, *bbs, *pbbs;
ebfd146a
IR
367 bool at_exit;
368 bool was_imm_dom;
b8698a0f 369 basic_block exit_dest;
ebfd146a 370 edge exit, new_exit;
a6c51a12 371 bool duplicate_outer_loop = false;
ebfd146a 372
2cfc56b9
RB
373 exit = single_exit (loop);
374 at_exit = (e == exit);
ebfd146a
IR
375 if (!at_exit && e != loop_preheader_edge (loop))
376 return NULL;
377
5ce9450f
JJ
378 if (scalar_loop == NULL)
379 scalar_loop = loop;
380
381 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
3884da6f
BC
382 pbbs = bbs + 1;
383 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
a6c51a12
YR
384 /* Allow duplication of outer loops. */
385 if (scalar_loop->inner)
386 duplicate_outer_loop = true;
ebfd146a 387 /* Check whether duplication is possible. */
3884da6f 388 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
ebfd146a
IR
389 {
390 free (bbs);
391 return NULL;
392 }
393
394 /* Generate new loop structure. */
5ce9450f
JJ
395 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
396 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 397
2cfc56b9 398 exit_dest = exit->dest;
b8698a0f
L
399 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
400 exit_dest) == loop->header ?
ebfd146a
IR
401 true : false);
402
2cfc56b9
RB
403 /* Also copy the pre-header, this avoids jumping through hoops to
404 duplicate the loop entry PHI arguments. Create an empty
405 pre-header unconditionally for this. */
5ce9450f 406 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 407 edge entry_e = single_pred_edge (preheader);
3884da6f 408 bbs[0] = preheader;
5ce9450f 409 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 410
5ce9450f
JJ
411 exit = single_exit (scalar_loop);
412 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 413 &exit, 1, &new_exit, NULL,
3884da6f 414 at_exit ? loop->latch : e->src, true);
5ce9450f 415 exit = single_exit (loop);
3884da6f 416 basic_block new_preheader = new_bbs[0];
ebfd146a 417
5ce9450f
JJ
418 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
419
420 if (scalar_loop != loop)
421 {
422 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
423 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
424 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
425 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
426 header) to have current_def set, so copy them over. */
427 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
428 exit);
429 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
430 0),
431 EDGE_SUCC (loop->latch, 0));
432 }
b8698a0f 433
ebfd146a
IR
434 if (at_exit) /* Add the loop copy at exit. */
435 {
5ce9450f
JJ
436 if (scalar_loop != loop)
437 {
538dd0b7 438 gphi_iterator gsi;
5ce9450f
JJ
439 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
440
441 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
442 gsi_next (&gsi))
443 {
538dd0b7 444 gphi *phi = gsi.phi ();
5ce9450f
JJ
445 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
446 location_t orig_locus
447 = gimple_phi_arg_location_from_edge (phi, e);
448
449 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
450 }
451 }
2cfc56b9
RB
452 redirect_edge_and_branch_force (e, new_preheader);
453 flush_pending_stmts (e);
454 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
a6c51a12 455 if (was_imm_dom || duplicate_outer_loop)
5ce9450f 456 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
457
458 /* And remove the non-necessary forwarder again. Keep the other
459 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
460 redirect_edge_pred (single_succ_edge (preheader),
461 single_pred (preheader));
2cfc56b9 462 delete_basic_block (preheader);
5ce9450f
JJ
463 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
464 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
465 }
466 else /* Add the copy at entry. */
467 {
5ce9450f
JJ
468 if (scalar_loop != loop)
469 {
470 /* Remove the non-necessary forwarder of scalar_loop again. */
471 redirect_edge_pred (single_succ_edge (preheader),
472 single_pred (preheader));
473 delete_basic_block (preheader);
474 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
475 loop_preheader_edge (scalar_loop)->src);
476 preheader = split_edge (loop_preheader_edge (loop));
477 entry_e = single_pred_edge (preheader);
478 }
479
2cfc56b9
RB
480 redirect_edge_and_branch_force (entry_e, new_preheader);
481 flush_pending_stmts (entry_e);
482 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
483
484 redirect_edge_and_branch_force (new_exit, preheader);
485 flush_pending_stmts (new_exit);
486 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
487
488 /* And remove the non-necessary forwarder again. Keep the other
489 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
490 redirect_edge_pred (single_succ_edge (new_preheader),
491 single_pred (new_preheader));
2cfc56b9
RB
492 delete_basic_block (new_preheader);
493 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
494 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
495 }
496
166b8799
BC
497 /* Skip new preheader since it's deleted if copy loop is added at entry. */
498 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
a6c51a12 499 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
2cfc56b9 500
5ce9450f
JJ
501 if (scalar_loop != loop)
502 {
503 /* Update new_loop->header PHIs, so that on the preheader
504 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 505 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
506 edge orig_e = loop_preheader_edge (loop);
507 edge new_e = loop_preheader_edge (new_loop);
508
509 for (gsi_orig = gsi_start_phis (loop->header),
510 gsi_new = gsi_start_phis (new_loop->header);
511 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
512 gsi_next (&gsi_orig), gsi_next (&gsi_new))
513 {
538dd0b7
DM
514 gphi *orig_phi = gsi_orig.phi ();
515 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
516 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
517 location_t orig_locus
518 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
519
520 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
521 }
522 }
523
ebfd146a
IR
524 free (new_bbs);
525 free (bbs);
526
b2b29377 527 checking_verify_dominators (CDI_DOMINATORS);
2cfc56b9 528
ebfd146a
IR
529 return new_loop;
530}
531
532
a5e3d614
BC
533/* Given the condition expression COND, put it as the last statement of
534 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
535 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
3e907b90
BC
536 skip the loop. PROBABILITY is the skip edge's probability. Mark the
537 new edge as irreducible if IRREDUCIBLE_P is true. */
ebfd146a
IR
538
539static edge
86290011 540slpeel_add_loop_guard (basic_block guard_bb, tree cond,
a5e3d614 541 basic_block guard_to, basic_block dom_bb,
357067f2 542 profile_probability probability, bool irreducible_p)
ebfd146a
IR
543{
544 gimple_stmt_iterator gsi;
545 edge new_e, enter_e;
538dd0b7 546 gcond *cond_stmt;
ebfd146a
IR
547 gimple_seq gimplify_stmt_list = NULL;
548
549 enter_e = EDGE_SUCC (guard_bb, 0);
550 enter_e->flags &= ~EDGE_FALLTHRU;
551 enter_e->flags |= EDGE_FALSE_VALUE;
552 gsi = gsi_last_bb (guard_bb);
553
f7a06a98
RG
554 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
555 NULL_TREE);
86290011 556 if (gimplify_stmt_list)
a5e3d614 557 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
ebfd146a 558
a5e3d614 559 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
ebfd146a
IR
560 gsi = gsi_last_bb (guard_bb);
561 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
562
563 /* Add new edge to connect guard block to the merge/loop-exit block. */
a5e3d614 564 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
e78410bf 565
e78410bf 566 new_e->probability = probability;
3e907b90
BC
567 if (irreducible_p)
568 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
569
357067f2 570 enter_e->probability = probability.invert ();
a5e3d614 571 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
5281a167
RB
572
573 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
574 if (enter_e->dest->loop_father->header == enter_e->dest)
575 split_edge (enter_e);
576
ebfd146a
IR
577 return new_e;
578}
579
580
581/* This function verifies that the following restrictions apply to LOOP:
a6c51a12
YR
582 (1) it consists of exactly 2 basic blocks - header, and an empty latch
583 for innermost loop and 5 basic blocks for outer-loop.
584 (2) it is single entry, single exit
585 (3) its exit condition is the last stmt in the header
586 (4) E is the entry/exit edge of LOOP.
ebfd146a
IR
587 */
588
589bool
590slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
591{
592 edge exit_e = single_exit (loop);
593 edge entry_e = loop_preheader_edge (loop);
538dd0b7 594 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a 595 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a6c51a12 596 unsigned int num_bb = loop->inner? 5 : 2;
ebfd146a 597
62fdbf29
BC
598 /* All loops have an outer scope; the only case loop->outer is NULL is for
599 the function itself. */
600 if (!loop_outer (loop)
a6c51a12 601 || loop->num_nodes != num_bb
ebfd146a
IR
602 || !empty_block_p (loop->latch)
603 || !single_exit (loop)
604 /* Verify that new loop exit condition can be trivially modified. */
605 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
606 || (e != exit_e && e != entry_e))
607 return false;
608
609 return true;
610}
611
a5e3d614
BC
612/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
613 in the exit bb and rename all the uses after the loop. This simplifies
614 the *guard[12] routines, which assume loop closed SSA form for all PHIs
615 (but normally loop closed SSA form doesn't require virtual PHIs to be
616 in the same form). Doing this early simplifies the checking what
617 uses should be renamed. */
ebfd146a
IR
618
619static void
a5e3d614 620create_lcssa_for_virtual_phi (struct loop *loop)
ebfd146a 621{
538dd0b7 622 gphi_iterator gsi;
ebfd146a 623 edge exit_e = single_exit (loop);
b8698a0f 624
e20f6b4b 625 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 626 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 627 {
538dd0b7 628 gphi *phi = gsi.phi ();
e20f6b4b
JJ
629 for (gsi = gsi_start_phis (exit_e->dest);
630 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 631 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
632 break;
633 if (gsi_end_p (gsi))
634 {
b731b390 635 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 636 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
637 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
638 imm_use_iterator imm_iter;
355fe088 639 gimple *stmt;
e20f6b4b
JJ
640 use_operand_p use_p;
641
9e227d60 642 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
643 gimple_phi_set_result (new_phi, new_vop);
644 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
b5fd0440
YR
645 if (stmt != new_phi
646 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
e20f6b4b
JJ
647 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
648 SET_USE (use_p, new_vop);
649 }
650 break;
651 }
ebfd146a 652
ebfd146a
IR
653}
654
655/* Function vect_get_loop_location.
656
657 Extract the location of the loop in the source code.
658 If the loop is not well formed for vectorization, an estimated
659 location is calculated.
660 Return the loop location if succeed and NULL if not. */
661
b05e0233 662source_location
ebfd146a
IR
663find_loop_location (struct loop *loop)
664{
355fe088 665 gimple *stmt = NULL;
ebfd146a
IR
666 basic_block bb;
667 gimple_stmt_iterator si;
668
669 if (!loop)
b05e0233 670 return UNKNOWN_LOCATION;
ebfd146a
IR
671
672 stmt = get_loop_exit_condition (loop);
673
502498d5
JJ
674 if (stmt
675 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
676 return gimple_location (stmt);
677
678 /* If we got here the loop is probably not "well formed",
679 try to estimate the loop location */
680
681 if (!loop->header)
b05e0233 682 return UNKNOWN_LOCATION;
ebfd146a
IR
683
684 bb = loop->header;
685
686 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
687 {
688 stmt = gsi_stmt (si);
502498d5 689 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
690 return gimple_location (stmt);
691 }
692
b05e0233 693 return UNKNOWN_LOCATION;
ebfd146a
IR
694}
695
a5e3d614
BC
696/* Return true if PHI defines an IV of the loop to be vectorized. */
697
698static bool
699iv_phi_p (gphi *phi)
700{
701 if (virtual_operand_p (PHI_RESULT (phi)))
702 return false;
703
704 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
705 gcc_assert (stmt_info != NULL);
706 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
707 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
708 return false;
709
710 return true;
711}
ebfd146a 712
ebfd146a
IR
713/* Function vect_can_advance_ivs_p
714
b8698a0f
L
715 In case the number of iterations that LOOP iterates is unknown at compile
716 time, an epilog loop will be generated, and the loop induction variables
717 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
718 the epilog loop. Here we check that the access function of the loop IVs
719 and the expression that represents the loop bound are simple enough.
720 These restrictions will be relaxed in the future. */
721
b8698a0f 722bool
ebfd146a
IR
723vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
724{
725 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
726 basic_block bb = loop->header;
538dd0b7 727 gphi_iterator gsi;
ebfd146a
IR
728
729 /* Analyze phi functions of the loop header. */
730
73fbfcad 731 if (dump_enabled_p ())
e645e942 732 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
733 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
734 {
ebfd146a
IR
735 tree evolution_part;
736
a5e3d614 737 gphi *phi = gsi.phi ();
73fbfcad 738 if (dump_enabled_p ())
ebfd146a 739 {
78c60e3d
SS
740 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
742 }
743
744 /* Skip virtual phi's. The data dependences that are associated with
a5e3d614 745 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
ebfd146a 746
a5e3d614
BC
747 Skip reduction phis. */
748 if (!iv_phi_p (phi))
ebfd146a 749 {
73fbfcad 750 if (dump_enabled_p ())
a5e3d614
BC
751 dump_printf_loc (MSG_NOTE, vect_location,
752 "reduc or virtual phi. skip.\n");
ebfd146a
IR
753 continue;
754 }
755
ebfd146a
IR
756 /* Analyze the evolution function. */
757
afb119be
RB
758 evolution_part
759 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
760 if (evolution_part == NULL_TREE)
761 {
73fbfcad 762 if (dump_enabled_p ())
afb119be 763 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 764 "No access function or evolution.\n");
ebfd146a
IR
765 return false;
766 }
b8698a0f 767
2a93954e
AH
768 /* FORNOW: We do not transform initial conditions of IVs
769 which evolution functions are not invariants in the loop. */
770
771 if (!expr_invariant_in_loop_p (loop, evolution_part))
772 {
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
775 "evolution not invariant in loop.\n");
776 return false;
777 }
778
b8698a0f 779 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
780 which evolution functions are a polynomial of degree >= 2. */
781
782 if (tree_is_chrec (evolution_part))
2a93954e
AH
783 {
784 if (dump_enabled_p ())
785 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
786 "evolution is chrec.\n");
787 return false;
788 }
ebfd146a
IR
789 }
790
791 return true;
792}
793
794
795/* Function vect_update_ivs_after_vectorizer.
796
797 "Advance" the induction variables of LOOP to the value they should take
798 after the execution of LOOP. This is currently necessary because the
799 vectorizer does not handle induction variables that are used after the
800 loop. Such a situation occurs when the last iterations of LOOP are
801 peeled, because:
802 1. We introduced new uses after LOOP for IVs that were not originally used
803 after LOOP: the IVs of LOOP are now used by an epilog loop.
804 2. LOOP is going to be vectorized; this means that it will iterate N/VF
805 times, whereas the loop IVs should be bumped N times.
806
807 Input:
808 - LOOP - a loop that is going to be vectorized. The last few iterations
809 of LOOP were peeled.
810 - NITERS - the number of iterations that LOOP executes (before it is
811 vectorized). i.e, the number of times the ivs should be bumped.
812 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
813 coming out from LOOP on which there are uses of the LOOP ivs
814 (this is the path from LOOP->exit to epilog_loop->preheader).
815
816 The new definitions of the ivs are placed in LOOP->exit.
817 The phi args associated with the edge UPDATE_E in the bb
818 UPDATE_E->dest are updated accordingly.
819
820 Assumption 1: Like the rest of the vectorizer, this function assumes
821 a single loop exit that has a single predecessor.
822
823 Assumption 2: The phi nodes in the LOOP header and in update_bb are
824 organized in the same order.
825
826 Assumption 3: The access function of the ivs is simple enough (see
827 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
828
829 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 830 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
831 that leads to the epilog loop; other paths skip the epilog loop). This
832 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
833 needs to have its phis updated.
834 */
835
836static void
a5e3d614
BC
837vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
838 tree niters, edge update_e)
ebfd146a 839{
538dd0b7 840 gphi_iterator gsi, gsi1;
a5e3d614 841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 842 basic_block update_bb = update_e->dest;
a5e3d614 843 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a
IR
844
845 /* Make sure there exists a single-predecessor exit bb: */
846 gcc_assert (single_pred_p (exit_bb));
a5e3d614 847 gcc_assert (single_succ_edge (exit_bb) == update_e);
ebfd146a
IR
848
849 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
850 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
851 gsi_next (&gsi), gsi_next (&gsi1))
852 {
ebfd146a 853 tree init_expr;
550918ca
RG
854 tree step_expr, off;
855 tree type;
ebfd146a
IR
856 tree var, ni, ni_name;
857 gimple_stmt_iterator last_gsi;
858
a5e3d614
BC
859 gphi *phi = gsi.phi ();
860 gphi *phi1 = gsi1.phi ();
73fbfcad 861 if (dump_enabled_p ())
a5e3d614
BC
862 {
863 dump_printf_loc (MSG_NOTE, vect_location,
864 "vect_update_ivs_after_vectorizer: phi: ");
78c60e3d 865 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
a5e3d614 866 }
ebfd146a 867
a5e3d614
BC
868 /* Skip reduction and virtual phis. */
869 if (!iv_phi_p (phi))
ebfd146a 870 {
73fbfcad 871 if (dump_enabled_p ())
a5e3d614
BC
872 dump_printf_loc (MSG_NOTE, vect_location,
873 "reduc or virtual phi. skip.\n");
ebfd146a
IR
874 continue;
875 }
876
0ac168a1 877 type = TREE_TYPE (gimple_phi_result (phi));
a5e3d614 878 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
0ac168a1 879 step_expr = unshare_expr (step_expr);
b8698a0f 880
ebfd146a
IR
881 /* FORNOW: We do not support IVs whose evolution function is a polynomial
882 of degree >= 2 or exponential. */
0ac168a1 883 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 884
0ac168a1 885 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 886
550918ca
RG
887 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
888 fold_convert (TREE_TYPE (step_expr), niters),
889 step_expr);
0ac168a1 890 if (POINTER_TYPE_P (type))
5d49b6a7 891 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 892 else
0ac168a1
RG
893 ni = fold_build2 (PLUS_EXPR, type,
894 init_expr, fold_convert (type, off));
ebfd146a 895
0ac168a1 896 var = create_tmp_var (type, "tmp");
ebfd146a
IR
897
898 last_gsi = gsi_last_bb (exit_bb);
a5e3d614
BC
899 gimple_seq new_stmts = NULL;
900 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
901 /* Exit_bb shouldn't be empty. */
902 if (!gsi_end_p (last_gsi))
903 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
904 else
905 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
b8698a0f 906
ebfd146a 907 /* Fix phi expressions in the successor bb. */
684f25f4 908 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
909 }
910}
911
a5e3d614 912/* Function vect_gen_prolog_loop_niters
d9157f15 913
a5e3d614
BC
914 Generate the number of iterations which should be peeled as prolog for the
915 loop represented by LOOP_VINFO. It is calculated as the misalignment of
916 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
917 As a result, after the execution of this loop, the data reference DR will
918 refer to an aligned location. The following computation is generated:
ebfd146a
IR
919
920 If the misalignment of DR is known at compile time:
921 addr_mis = int mis = DR_MISALIGNMENT (dr);
922 Else, compute address misalignment in bytes:
5aea1e76 923 addr_mis = addr & (vectype_align - 1)
ebfd146a 924
a5e3d614 925 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
ebfd146a
IR
926
927 (elem_size = element type size; an element is the scalar element whose type
928 is the inner type of the vectype)
929
a5e3d614 930 The computations will be emitted at the end of BB. We also compute and
cbb22e61 931 store upper bound (included) of the result in BOUND.
a5e3d614 932
ebfd146a
IR
933 When the step of the data-ref in the loop is not 1 (as in interleaved data
934 and SLP), the number of iterations of the prolog must be divided by the step
935 (which is equal to the size of interleaved group).
936
937 The above formulas assume that VF == number of elements in the vector. This
938 may not hold when there are multiple-types in the loop.
939 In this case, for some data-references in the loop the VF does not represent
940 the number of elements that fit in the vector. Therefore, instead of VF we
941 use TYPE_VECTOR_SUBPARTS. */
942
b8698a0f 943static tree
a5e3d614
BC
944vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
945 basic_block bb, int *bound)
ebfd146a
IR
946{
947 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
ebfd146a 948 tree var;
a5e3d614
BC
949 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
950 gimple_seq stmts = NULL, new_stmts = NULL;
ebfd146a 951 tree iters, iters_name;
355fe088 952 gimple *dr_stmt = DR_STMT (dr);
ebfd146a
IR
953 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
954 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f702e7d4 955 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
ebfd146a 956
15e693cc 957 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 958 {
15e693cc 959 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 960
73fbfcad 961 if (dump_enabled_p ())
ccb3ad87 962 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 963 "known peeling = %d.\n", npeel);
ebfd146a 964
720f5239 965 iters = build_int_cst (niters_type, npeel);
cbb22e61 966 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
967 }
968 else
969 {
d8ba5b19
RG
970 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
971 tree offset = negative
5fa79de8 972 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
b8698a0f 973 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3f5e8a76 974 &stmts, offset);
96f9265a 975 tree type = unsigned_type_for (TREE_TYPE (start_addr));
f702e7d4
RS
976 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
977 HOST_WIDE_INT elem_size
978 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
5aea1e76 979 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
f702e7d4
RS
980 HOST_WIDE_INT align_in_elems = target_align / elem_size;
981 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
982 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
983 tree misalign_in_bytes;
984 tree misalign_in_elems;
985
986 /* Create: misalign_in_bytes = addr & (target_align - 1). */
987 misalign_in_bytes
988 = fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
989 target_align_minus_1);
990
991 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
992 misalign_in_elems
993 = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes, elem_size_log);
994
995 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
996 & (align_in_elems - 1)). */
d8ba5b19 997 if (negative)
f702e7d4
RS
998 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
999 align_in_elems_tree);
d8ba5b19 1000 else
f702e7d4
RS
1001 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1002 misalign_in_elems);
1003 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
ebfd146a 1004 iters = fold_convert (niters_type, iters);
f702e7d4 1005 *bound = align_in_elems - 1;
ebfd146a
IR
1006 }
1007
73fbfcad 1008 if (dump_enabled_p ())
ebfd146a 1009 {
ccb3ad87 1010 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1011 "niters for prolog loop: ");
ccb3ad87 1012 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1013 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1014 }
1015
1016 var = create_tmp_var (niters_type, "prolog_loop_niters");
a5e3d614 1017 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
ebfd146a 1018
a5e3d614
BC
1019 if (new_stmts)
1020 gimple_seq_add_seq (&stmts, new_stmts);
ebfd146a
IR
1021 if (stmts)
1022 {
a5e3d614
BC
1023 gcc_assert (single_succ_p (bb));
1024 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1025 if (gsi_end_p (gsi))
1026 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1027 else
1028 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
ebfd146a 1029 }
b8698a0f 1030 return iters_name;
ebfd146a
IR
1031}
1032
1033
1034/* Function vect_update_init_of_dr
1035
1036 NITERS iterations were peeled from LOOP. DR represents a data reference
1037 in LOOP. This function updates the information recorded in DR to
b8698a0f 1038 account for the fact that the first NITERS iterations had already been
ebfd146a
IR
1039 executed. Specifically, it updates the OFFSET field of DR. */
1040
1041static void
1042vect_update_init_of_dr (struct data_reference *dr, tree niters)
1043{
1044 tree offset = DR_OFFSET (dr);
b8698a0f 1045
ebfd146a
IR
1046 niters = fold_build2 (MULT_EXPR, sizetype,
1047 fold_convert (sizetype, niters),
1048 fold_convert (sizetype, DR_STEP (dr)));
587aa063
RG
1049 offset = fold_build2 (PLUS_EXPR, sizetype,
1050 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1051 DR_OFFSET (dr) = offset;
1052}
1053
1054
1055/* Function vect_update_inits_of_drs
1056
b8698a0f
L
1057 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1058 This function updates the information recorded for the data references in
1059 the loop to account for the fact that the first NITERS iterations had
ebfd146a
IR
1060 already been executed. Specifically, it updates the initial_condition of
1061 the access_function of all the data_references in the loop. */
1062
1063static void
1064vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1065{
1066 unsigned int i;
9771b263 1067 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 1068 struct data_reference *dr;
a5e3d614
BC
1069
1070 if (dump_enabled_p ())
ccb3ad87 1071 dump_printf_loc (MSG_NOTE, vect_location,
a5e3d614
BC
1072 "=== vect_update_inits_of_dr ===\n");
1073
1074 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1075 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1076 {
1077 gimple_seq seq;
1078 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1079 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1080
1081 niters = fold_convert (sizetype, niters);
1082 niters = force_gimple_operand (niters, &seq, false, var);
1083 if (seq)
1084 {
1085 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1086 gcc_assert (!new_bb);
1087 }
1088 }
ebfd146a 1089
9771b263 1090 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1091 vect_update_init_of_dr (dr, niters);
1092}
1093
1094
a5e3d614 1095/* This function builds ni_name = number of iterations. Statements
7078979b
BC
1096 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1097 it to TRUE if new ssa_var is generated. */
a5e3d614
BC
1098
1099tree
7078979b 1100vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
a5e3d614
BC
1101{
1102 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1103 if (TREE_CODE (ni) == INTEGER_CST)
1104 return ni;
1105 else
1106 {
1107 tree ni_name, var;
1108 gimple_seq stmts = NULL;
1109 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1110
1111 var = create_tmp_var (TREE_TYPE (ni), "niters");
1112 ni_name = force_gimple_operand (ni, &stmts, false, var);
1113 if (stmts)
7078979b
BC
1114 {
1115 gsi_insert_seq_on_edge_immediate (pe, stmts);
1116 if (new_var_p != NULL)
1117 *new_var_p = true;
1118 }
a5e3d614
BC
1119
1120 return ni_name;
1121 }
1122}
1123
cbb22e61
BC
1124/* Calculate the number of iterations above which vectorized loop will be
1125 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1126 of prolog loop. If it's integer const, the integer number is also passed
1127 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1128 number of iterations of prolog loop. VFM1 is vector factor minus one.
1129 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1130 (rather than vectorized) loop will be executed. This function stores
1131 upper bound (included) of the result in BOUND_SCALAR. */
a5e3d614
BC
1132
1133static tree
1134vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
cbb22e61
BC
1135 int bound_prolog, int vfm1, int th,
1136 int *bound_scalar, bool check_profitability)
a5e3d614
BC
1137{
1138 tree type = TREE_TYPE (niters_prolog);
1139 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
cbb22e61 1140 build_int_cst (type, vfm1));
a5e3d614 1141
cbb22e61 1142 *bound_scalar = vfm1 + bound_prolog;
a5e3d614
BC
1143 if (check_profitability)
1144 {
cbb22e61
BC
1145 /* TH indicates the minimum niters of vectorized loop, while we
1146 compute the maximum niters of scalar loop. */
1147 th--;
a5e3d614
BC
1148 /* Peeling for constant times. */
1149 if (int_niters_prolog >= 0)
1150 {
cbb22e61
BC
1151 *bound_scalar = (int_niters_prolog + vfm1 < th
1152 ? th
1153 : vfm1 + int_niters_prolog);
1154 return build_int_cst (type, *bound_scalar);
a5e3d614 1155 }
cbb22e61
BC
1156 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1157 bound (inlcuded) of niters of prolog loop. */
1158 if (th >= vfm1 + bound_prolog)
a5e3d614 1159 {
cbb22e61 1160 *bound_scalar = th;
a5e3d614
BC
1161 return build_int_cst (type, th);
1162 }
cbb22e61
BC
1163 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1164 else if (th > vfm1)
a5e3d614
BC
1165 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1166 }
1167 return niters;
1168}
1169
1170/* This function generates the following statements:
ebfd146a 1171
a5e3d614
BC
1172 niters = number of iterations loop executes (after peeling)
1173 niters_vector = niters / vf
d9157f15 1174
a5e3d614
BC
1175 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1176 true if NITERS doesn't overflow. */
ebfd146a
IR
1177
1178void
a5e3d614
BC
1179vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1180 tree *niters_vector_ptr, bool niters_no_overflow)
ebfd146a 1181{
a5e3d614 1182 tree ni_minus_gap, var;
7078979b 1183 tree niters_vector, type = TREE_TYPE (niters);
a5e3d614
BC
1184 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1185 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
7078979b 1186 tree log_vf = build_int_cst (type, exact_log2 (vf));
a5e3d614
BC
1187
1188 /* If epilogue loop is required because of data accesses with gaps, we
1189 subtract one iteration from the total number of iterations here for
1190 correct calculation of RATIO. */
1191 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1192 {
7078979b
BC
1193 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1194 build_one_cst (type));
a5e3d614
BC
1195 if (!is_gimple_val (ni_minus_gap))
1196 {
7078979b 1197 var = create_tmp_var (type, "ni_gap");
a5e3d614
BC
1198 gimple *stmts = NULL;
1199 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1200 true, var);
1201 gsi_insert_seq_on_edge_immediate (pe, stmts);
1202 }
1203 }
1204 else
1205 ni_minus_gap = niters;
1206
1207 /* Create: niters >> log2(vf) */
1208 /* If it's known that niters == number of latch executions + 1 doesn't
1209 overflow, we can generate niters >> log2(vf); otherwise we generate
1210 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1211 will be at least one. */
1212 if (niters_no_overflow)
7078979b 1213 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
a5e3d614
BC
1214 else
1215 niters_vector
7078979b
BC
1216 = fold_build2 (PLUS_EXPR, type,
1217 fold_build2 (RSHIFT_EXPR, type,
1218 fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1219 build_int_cst (type, vf)),
a5e3d614 1220 log_vf),
7078979b 1221 build_int_cst (type, 1));
a5e3d614
BC
1222
1223 if (!is_gimple_val (niters_vector))
1224 {
7078979b
BC
1225 var = create_tmp_var (type, "bnd");
1226 gimple_seq stmts = NULL;
a5e3d614
BC
1227 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1228 gsi_insert_seq_on_edge_immediate (pe, stmts);
7078979b
BC
1229 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1230 we set range information to make niters analyzer's life easier. */
1231 if (stmts != NULL)
8e6cdc90
RS
1232 set_range_info (niters_vector, VR_RANGE,
1233 wi::to_wide (build_int_cst (type, 1)),
1234 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1235 TYPE_MAX_VALUE (type),
1236 log_vf)));
a5e3d614
BC
1237 }
1238 *niters_vector_ptr = niters_vector;
1239
1240 return;
1241}
1242
1243/* Given NITERS_VECTOR which is the number of iterations for vectorized
1244 loop specified by LOOP_VINFO after vectorization, compute the number
1245 of iterations before vectorization (niters_vector * vf) and store it
1246 to NITERS_VECTOR_MULT_VF_PTR. */
1247
1248static void
1249vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1250 tree niters_vector,
1251 tree *niters_vector_mult_vf_ptr)
1252{
1253 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
ebfd146a 1254 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614
BC
1255 tree type = TREE_TYPE (niters_vector);
1256 tree log_vf = build_int_cst (type, exact_log2 (vf));
1257 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a 1258
a5e3d614
BC
1259 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1260 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1261 niters_vector, log_vf);
1262 if (!is_gimple_val (niters_vector_mult_vf))
1263 {
1264 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1265 gimple_seq stmts = NULL;
1266 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1267 &stmts, true, var);
1268 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1269 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1270 }
1271 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1272}
1273
1274/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1275 from SECOND/FIRST and puts it at the original loop's preheader/exit
1276 edge, the two loops are arranged as below:
1277
1278 preheader_a:
1279 first_loop:
1280 header_a:
1281 i_1 = PHI<i_0, i_2>;
1282 ...
1283 i_2 = i_1 + 1;
1284 if (cond_a)
1285 goto latch_a;
1286 else
1287 goto between_bb;
1288 latch_a:
1289 goto header_a;
1290
1291 between_bb:
1292 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1293
1294 second_loop:
1295 header_b:
1296 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1297 or with i_2 if no LCSSA phi is created
1298 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1299 ...
1300 i_4 = i_3 + 1;
1301 if (cond_b)
1302 goto latch_b;
1303 else
1304 goto exit_bb;
1305 latch_b:
1306 goto header_b;
1307
1308 exit_bb:
1309
1310 This function creates loop closed SSA for the first loop; update the
1311 second loop's PHI nodes by replacing argument on incoming edge with the
1312 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1313 is false, Loop closed ssa phis will only be created for non-iv phis for
1314 the first loop.
1315
1316 This function assumes exit bb of the first loop is preheader bb of the
1317 second loop, i.e, between_bb in the example code. With PHIs updated,
1318 the second loop will execute rest iterations of the first. */
ebfd146a 1319
a5e3d614
BC
1320static void
1321slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1322 struct loop *first, struct loop *second,
1323 bool create_lcssa_for_iv_phis)
1324{
1325 gphi_iterator gsi_update, gsi_orig;
1326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1327
1328 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1329 edge second_preheader_e = loop_preheader_edge (second);
1330 basic_block between_bb = single_exit (first)->dest;
1331
1332 gcc_assert (between_bb == second_preheader_e->src);
1333 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1334 /* Either the first loop or the second is the loop to be vectorized. */
1335 gcc_assert (loop == first || loop == second);
1336
1337 for (gsi_orig = gsi_start_phis (first->header),
1338 gsi_update = gsi_start_phis (second->header);
1339 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1340 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1341 {
1342 gphi *orig_phi = gsi_orig.phi ();
1343 gphi *update_phi = gsi_update.phi ();
1344
1345 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1346 /* Generate lcssa PHI node for the first loop. */
1347 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1348 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1349 {
1350 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1351 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1352 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1353 arg = new_res;
1354 }
1355
1356 /* Update PHI node in the second loop by replacing arg on the loop's
1357 incoming edge. */
1358 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1359 }
1360}
1361
1362/* Function slpeel_add_loop_guard adds guard skipping from the beginning
1363 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1364 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1365 appear like below:
1366
1367 guard_bb:
1368 if (cond)
1369 goto merge_bb;
1370 else
1371 goto skip_loop;
1372
1373 skip_loop:
1374 header_a:
1375 i_1 = PHI<i_0, i_2>;
1376 ...
1377 i_2 = i_1 + 1;
1378 if (cond_a)
1379 goto latch_a;
1380 else
1381 goto exit_a;
1382 latch_a:
1383 goto header_a;
1384
1385 exit_a:
1386 i_5 = PHI<i_2>;
1387
1388 merge_bb:
1389 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1390
1391 update_loop:
1392 header_b:
1393 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1394 ...
1395 i_4 = i_3 + 1;
1396 if (cond_b)
1397 goto latch_b;
1398 else
1399 goto exit_bb;
1400 latch_b:
1401 goto header_b;
1402
1403 exit_bb:
1404
1405 This function creates PHI nodes at merge_bb and replaces the use of i_5
1406 in the update_loop's PHI node with the result of new PHI result. */
1407
1408static void
1409slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1410 struct loop *update_loop,
1411 edge guard_edge, edge merge_edge)
1412{
1413 source_location merge_loc, guard_loc;
1414 edge orig_e = loop_preheader_edge (skip_loop);
1415 edge update_e = loop_preheader_edge (update_loop);
1416 gphi_iterator gsi_orig, gsi_update;
1417
1418 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1419 gsi_update = gsi_start_phis (update_loop->header));
1420 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1421 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1422 {
1423 gphi *orig_phi = gsi_orig.phi ();
1424 gphi *update_phi = gsi_update.phi ();
1425
1426 /* Generate new phi node at merge bb of the guard. */
1427 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1428 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1429
1430 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1431 args in NEW_PHI for these edges. */
1432 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1433 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1434 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1435 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1436 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1437 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1438
1439 /* Update phi in UPDATE_PHI. */
1440 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1441 }
1442}
1443
1444/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1445 this function searches for the corresponding lcssa phi node in exit
1446 bb of LOOP. If it is found, return the phi result; otherwise return
1447 NULL. */
1448
1449static tree
1450find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1451 gphi *lcssa_phi)
1452{
1453 gphi_iterator gsi;
1454 edge e = single_exit (loop);
1455
1456 gcc_assert (single_pred_p (e->dest));
1457 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1458 {
1459 gphi *phi = gsi.phi ();
1460 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1461 PHI_ARG_DEF (lcssa_phi, 0), 0))
1462 return PHI_RESULT (phi);
1463 }
1464 return NULL_TREE;
1465}
1466
1467/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1468 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1469 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1470 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1471 The CFG looks like:
1472
1473 loop:
1474 header_a:
1475 i_1 = PHI<i_0, i_2>;
1476 ...
1477 i_2 = i_1 + 1;
1478 if (cond_a)
1479 goto latch_a;
1480 else
1481 goto exit_a;
1482 latch_a:
1483 goto header_a;
1484
1485 exit_a:
1486
1487 guard_bb:
1488 if (cond)
1489 goto merge_bb;
1490 else
1491 goto epilog_loop;
1492
1493 ;; fall_through_bb
1494
1495 epilog_loop:
1496 header_b:
1497 i_3 = PHI<i_2, i_4>;
1498 ...
1499 i_4 = i_3 + 1;
1500 if (cond_b)
1501 goto latch_b;
1502 else
1503 goto merge_bb;
1504 latch_b:
1505 goto header_b;
1506
1507 merge_bb:
1508 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1509
1510 exit_bb:
1511 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1512
1513 For each name used out side EPILOG (i.e - for each name that has a lcssa
1514 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1515 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1516 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1517 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1518 in exit_bb will also be updated. */
1519
1520static void
1521slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1522 edge guard_edge, edge merge_edge)
1523{
1524 gphi_iterator gsi;
1525 basic_block merge_bb = guard_edge->dest;
1526
1527 gcc_assert (single_succ_p (merge_bb));
1528 edge e = single_succ_edge (merge_bb);
1529 basic_block exit_bb = e->dest;
1530 gcc_assert (single_pred_p (exit_bb));
1531 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1532
1533 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1534 {
1535 gphi *update_phi = gsi.phi ();
1536 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1537 /* This loop-closed-phi actually doesn't represent a use out of the
1538 loop - the phi arg is a constant. */
1539 if (TREE_CODE (old_arg) != SSA_NAME)
1540 continue;
1541
1542 tree merge_arg = get_current_def (old_arg);
1543 if (!merge_arg)
1544 merge_arg = old_arg;
1545
1546 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1547 /* If the var is live after loop but not a reduction, we simply
1548 use the old arg. */
1549 if (!guard_arg)
1550 guard_arg = old_arg;
1551
1552 /* Create new phi node in MERGE_BB: */
1553 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1554 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1555
1556 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1557 the two PHI args in merge_phi for these edges. */
1558 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1559 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1560
1561 /* Update the original phi in exit_bb. */
1562 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1563 }
1564}
1565
1566/* EPILOG loop is duplicated from the original loop for vectorizing,
1567 the arg of its loop closed ssa PHI needs to be updated. */
1568
1569static void
1570slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1571{
1572 gphi_iterator gsi;
1573 basic_block exit_bb = single_exit (epilog)->dest;
1574
1575 gcc_assert (single_pred_p (exit_bb));
1576 edge e = EDGE_PRED (exit_bb, 0);
1577 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1578 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1579}
1580
1581/* Function vect_do_peeling.
1582
1583 Input:
1584 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1585
1586 preheader:
1587 LOOP:
1588 header_bb:
1589 loop_body
1590 if (exit_loop_cond) goto exit_bb
1591 else goto header_bb
1592 exit_bb:
1593
1594 - NITERS: The number of iterations of the loop.
1595 - NITERSM1: The number of iterations of the loop's latch.
1596 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1597 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1598 CHECK_PROFITABILITY is true.
1599 Output:
1600 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1601
1602 This function peels prolog and epilog from the loop, adds guards skipping
1603 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1604 would look like:
1605
1606 guard_bb_1:
1607 if (prefer_scalar_loop) goto merge_bb_1
1608 else goto guard_bb_2
1609
1610 guard_bb_2:
1611 if (skip_prolog) goto merge_bb_2
1612 else goto prolog_preheader
1613
1614 prolog_preheader:
1615 PROLOG:
1616 prolog_header_bb:
1617 prolog_body
1618 if (exit_prolog_cond) goto prolog_exit_bb
1619 else goto prolog_header_bb
1620 prolog_exit_bb:
1621
1622 merge_bb_2:
1623
1624 vector_preheader:
1625 VECTOR LOOP:
1626 vector_header_bb:
1627 vector_body
1628 if (exit_vector_cond) goto vector_exit_bb
1629 else goto vector_header_bb
1630 vector_exit_bb:
1631
1632 guard_bb_3:
1633 if (skip_epilog) goto merge_bb_3
1634 else goto epilog_preheader
1635
1636 merge_bb_1:
1637
1638 epilog_preheader:
1639 EPILOG:
1640 epilog_header_bb:
1641 epilog_body
1642 if (exit_epilog_cond) goto merge_bb_3
1643 else goto epilog_header_bb
1644
1645 merge_bb_3:
1646
1647 Note this function peels prolog and epilog only if it's necessary,
1648 as well as guards.
598eaaa2 1649 Returns created epilogue or NULL.
a5e3d614
BC
1650
1651 TODO: Guard for prefer_scalar_loop should be emitted along with
1652 versioning conditions if loop versioning is needed. */
1653
598eaaa2
YR
1654
1655struct loop *
a5e3d614
BC
1656vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1657 tree *niters_vector, int th, bool check_profitability,
1658 bool niters_no_overflow)
1659{
1660 edge e, guard_e;
1661 tree type = TREE_TYPE (niters), guard_cond;
1662 basic_block guard_bb, guard_to;
357067f2 1663 profile_probability prob_prolog, prob_vector, prob_epilog;
cbb22e61 1664 int bound_prolog = 0, bound_scalar = 0, bound = 0;
a5e3d614
BC
1665 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1666 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1667 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1668 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1669
1670 if (!prolog_peeling && !epilog_peeling)
598eaaa2 1671 return NULL;
a5e3d614 1672
357067f2 1673 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
a5e3d614
BC
1674 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1675 vf = 3;
357067f2
JH
1676 prob_prolog = prob_epilog = profile_probability::guessed_always ()
1677 .apply_scale (vf - 1, vf);
a5e3d614
BC
1678 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1679
598eaaa2 1680 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614 1681 struct loop *first_loop = loop;
3e907b90 1682 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
a5e3d614
BC
1683 create_lcssa_for_virtual_phi (loop);
1684 update_ssa (TODO_update_ssa_only_virtuals);
1685
1686 if (MAY_HAVE_DEBUG_STMTS)
1687 {
1688 gcc_assert (!adjust_vec.exists ());
1689 adjust_vec.create (32);
1690 }
ebfd146a
IR
1691 initialize_original_copy_tables ();
1692
a5e3d614
BC
1693 /* Prolog loop may be skipped. */
1694 bool skip_prolog = (prolog_peeling != 0);
704c28ee
BC
1695 /* Skip to epilog if scalar loop may be preferred. It's only needed
1696 when we peel for epilog loop and when it hasn't been checked with
1697 loop versioning. */
1698 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1699 && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
a5e3d614
BC
1700 /* Epilog loop must be executed if the number of iterations for epilog
1701 loop is known at compile time, otherwise we need to add a check at
1702 the end of vector loop and skip to the end of epilog loop. */
1703 bool skip_epilog = (prolog_peeling < 0
1704 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1705 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1706 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1707 skip_epilog = false;
1708
1709 /* Record the anchor bb at which guard should be placed if scalar loop
1710 may be preferred. */
1711 basic_block anchor = loop_preheader_edge (loop)->src;
1712 if (skip_vector)
25c99850
BC
1713 {
1714 split_edge (loop_preheader_edge (loop));
1715
1716 /* Due to the order in which we peel prolog and epilog, we first
1717 propagate probability to the whole loop. The purpose is to
1718 avoid adjusting probabilities of both prolog and vector loops
1719 separately. Note in this case, the probability of epilog loop
1720 needs to be scaled back later. */
1721 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
357067f2 1722 if (prob_vector.initialized_p ())
af2bbc51
JH
1723 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1724 scale_loop_profile (loop, prob_vector, bound);
25c99850 1725 }
a5e3d614
BC
1726
1727 tree niters_prolog = build_int_cst (type, 0);
1728 source_location loop_loc = find_loop_location (loop);
1729 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1730 if (prolog_peeling)
5d2eb24b 1731 {
a5e3d614
BC
1732 e = loop_preheader_edge (loop);
1733 if (!slpeel_can_duplicate_loop_p (loop, e))
5d2eb24b 1734 {
a5e3d614
BC
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1736 "loop can't be duplicated to preheader edge.\n");
1737 gcc_unreachable ();
1738 }
1739 /* Peel prolog and put it on preheader edge of loop. */
1740 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1741 if (!prolog)
1742 {
1743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1744 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1745 gcc_unreachable ();
1746 }
1747 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1748 first_loop = prolog;
1749 reset_original_copy_tables ();
1750
1751 /* Generate and update the number of iterations for prolog loop. */
1752 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1753 &bound_prolog);
1754 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1755
1756 /* Skip the prolog loop. */
1757 if (skip_prolog)
1758 {
1759 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1760 niters_prolog, build_int_cst (type, 0));
1761 guard_bb = loop_preheader_edge (prolog)->src;
25c99850 1762 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
a5e3d614
BC
1763 guard_to = split_edge (loop_preheader_edge (loop));
1764 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1765 guard_to, guard_bb,
357067f2 1766 prob_prolog.invert (),
3e907b90 1767 irred_flag);
a5e3d614
BC
1768 e = EDGE_PRED (guard_to, 0);
1769 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1770 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
25c99850 1771
af2bbc51
JH
1772 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1773 scale_loop_profile (prolog, prob_prolog, bound_prolog);
a5e3d614
BC
1774 }
1775 /* Update init address of DRs. */
1776 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1777 /* Update niters for vector loop. */
1778 LOOP_VINFO_NITERS (loop_vinfo)
1779 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1780 LOOP_VINFO_NITERSM1 (loop_vinfo)
1781 = fold_build2 (MINUS_EXPR, type,
1782 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
7078979b
BC
1783 bool new_var_p = false;
1784 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1785 /* It's guaranteed that vector loop bound before vectorization is at
1786 least VF, so set range information for newly generated var. */
1787 if (new_var_p)
1788 set_range_info (niters, VR_RANGE,
8e6cdc90
RS
1789 wi::to_wide (build_int_cst (type, vf)),
1790 wi::to_wide (TYPE_MAX_VALUE (type)));
a5e3d614 1791
cbb22e61
BC
1792 /* Prolog iterates at most bound_prolog times, latch iterates at
1793 most bound_prolog - 1 times. */
1794 record_niter_bound (prolog, bound_prolog - 1, false, true);
a5e3d614
BC
1795 delete_update_ssa ();
1796 adjust_vec_debug_stmts ();
1797 scev_reset ();
5d2eb24b
IR
1798 }
1799
a5e3d614
BC
1800 if (epilog_peeling)
1801 {
1802 e = single_exit (loop);
1803 if (!slpeel_can_duplicate_loop_p (loop, e))
1804 {
1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1806 "loop can't be duplicated to exit edge.\n");
1807 gcc_unreachable ();
1808 }
1809 /* Peel epilog and put it on exit edge of loop. */
1810 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1811 if (!epilog)
1812 {
1813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1814 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1815 gcc_unreachable ();
1816 }
1817 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1818
1819 /* Scalar version loop may be preferred. In this case, add guard
1820 and skip to epilog. Note this only happens when the number of
1821 iterations of loop is unknown at compile time, otherwise this
1822 won't be vectorized. */
1823 if (skip_vector)
1824 {
cbb22e61
BC
1825 /* Additional epilogue iteration is peeled if gap exists. */
1826 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
a5e3d614 1827 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
cbb22e61
BC
1828 bound_prolog,
1829 peel_for_gaps ? vf : vf - 1,
1830 th, &bound_scalar,
a5e3d614 1831 check_profitability);
cbb22e61
BC
1832 /* Build guard against NITERSM1 since NITERS may overflow. */
1833 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
a5e3d614
BC
1834 guard_bb = anchor;
1835 guard_to = split_edge (loop_preheader_edge (epilog));
1836 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1837 guard_to, guard_bb,
357067f2 1838 prob_vector.invert (),
3e907b90 1839 irred_flag);
a5e3d614
BC
1840 e = EDGE_PRED (guard_to, 0);
1841 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1842 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
25c99850
BC
1843
1844 /* Simply propagate profile info from guard_bb to guard_to which is
1845 a merge point of control flow. */
25c99850 1846 guard_to->count = guard_bb->count;
af2bbc51
JH
1847 /* Scale probability of epilog loop back.
1848 FIXME: We should avoid scaling down and back up. Profile may
1849 get lost if we scale down to 0. */
357067f2
JH
1850 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1851 / prob_vector.to_reg_br_prob_base ();
10ea2672
JH
1852 basic_block *bbs = get_loop_body (epilog);
1853 scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
af2bbc51
JH
1854 REG_BR_PROB_BASE);
1855 free (bbs);
a5e3d614 1856 }
ebfd146a 1857
25c99850 1858 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
a5e3d614
BC
1859 tree niters_vector_mult_vf;
1860 /* If loop is peeled for non-zero constant times, now niters refers to
1861 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1862 overflows. */
1863 niters_no_overflow |= (prolog_peeling > 0);
1864 vect_gen_vector_loop_niters (loop_vinfo, niters,
1865 niters_vector, niters_no_overflow);
1866 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1867 &niters_vector_mult_vf);
1868 /* Update IVs of original loop as if they were advanced by
1869 niters_vector_mult_vf steps. */
1870 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1871 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1872 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1873 update_e);
1874
1875 if (skip_epilog)
1876 {
1877 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1878 niters, niters_vector_mult_vf);
1879 guard_bb = single_exit (loop)->dest;
1880 guard_to = split_edge (single_exit (epilog));
1881 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1882 skip_vector ? anchor : guard_bb,
357067f2 1883 prob_epilog.invert (),
3e907b90 1884 irred_flag);
a5e3d614
BC
1885 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1886 single_exit (epilog));
25c99850
BC
1887 /* Only need to handle basic block before epilog loop if it's not
1888 the guard_bb, which is the case when skip_vector is true. */
1889 if (guard_bb != bb_before_epilog)
1890 {
357067f2 1891 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
25c99850 1892
af2bbc51 1893 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
25c99850 1894 }
af2bbc51 1895 scale_loop_profile (epilog, prob_epilog, bound);
a5e3d614
BC
1896 }
1897 else
1898 slpeel_update_phi_nodes_for_lcssa (epilog);
ebfd146a 1899
cbb22e61 1900 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
a5e3d614 1901 /* We share epilog loop with scalar version loop. */
cbb22e61
BC
1902 bound = MAX (bound, bound_scalar - 1);
1903 record_niter_bound (epilog, bound, false, true);
a5e3d614
BC
1904
1905 delete_update_ssa ();
1906 adjust_vec_debug_stmts ();
1907 scev_reset ();
1908 }
1909 adjust_vec.release ();
ebfd146a 1910 free_original_copy_tables ();
598eaaa2
YR
1911
1912 return epilog;
ebfd146a
IR
1913}
1914
01d32b2b
BC
1915/* Function vect_create_cond_for_niters_checks.
1916
1917 Create a conditional expression that represents the run-time checks for
1918 loop's niter. The loop is guaranteed to to terminate if the run-time
1919 checks hold.
1920
1921 Input:
1922 COND_EXPR - input conditional expression. New conditions will be chained
1923 with logical AND operation. If it is NULL, then the function
1924 is used to return the number of alias checks.
1925 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1926 to be checked.
1927
1928 Output:
1929 COND_EXPR - conditional expression.
1930
1931 The returned COND_EXPR is the conditional expression to be used in the
1932 if statement that controls which version of the loop gets executed at
1933 runtime. */
1934
1935static void
1936vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1937{
1938 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1939
1940 if (*cond_expr)
1941 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942 *cond_expr, part_cond_expr);
1943 else
1944 *cond_expr = part_cond_expr;
1945}
ebfd146a 1946
9adee305
RS
1947/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
1948 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
1949
1950static void
1951chain_cond_expr (tree *cond_expr, tree part_cond_expr)
1952{
1953 if (*cond_expr)
1954 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1955 *cond_expr, part_cond_expr);
1956 else
1957 *cond_expr = part_cond_expr;
1958}
1959
ebfd146a
IR
1960/* Function vect_create_cond_for_align_checks.
1961
1962 Create a conditional expression that represents the alignment checks for
1963 all of data references (array element references) whose alignment must be
1964 checked at runtime.
1965
1966 Input:
1967 COND_EXPR - input conditional expression. New conditions will be chained
1968 with logical AND operation.
1969 LOOP_VINFO - two fields of the loop information are used.
1970 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1971 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1972
1973 Output:
1974 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1975 expression.
1976 The returned value is the conditional expression to be used in the if
1977 statement that controls which version of the loop gets executed at runtime.
1978
1979 The algorithm makes two assumptions:
1980 1) The number of bytes "n" in a vector is a power of 2.
1981 2) An address "a" is aligned if a%n is zero and that this
1982 test can be done as a&(n-1) == 0. For example, for 16
1983 byte vectors the test is a&0xf == 0. */
1984
1985static void
1986vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1987 tree *cond_expr,
1988 gimple_seq *cond_expr_stmt_list)
1989{
355fe088 1990 vec<gimple *> may_misalign_stmts
ebfd146a 1991 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
355fe088 1992 gimple *ref_stmt;
ebfd146a
IR
1993 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1994 tree mask_cst;
1995 unsigned int i;
ebfd146a
IR
1996 tree int_ptrsize_type;
1997 char tmp_name[20];
1998 tree or_tmp_name = NULL_TREE;
83d5977e 1999 tree and_tmp_name;
355fe088 2000 gimple *and_stmt;
ebfd146a
IR
2001 tree ptrsize_zero;
2002 tree part_cond_expr;
2003
2004 /* Check that mask is one less than a power of 2, i.e., mask is
2005 all zeros followed by all ones. */
2006 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2007
96f9265a 2008 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
2009
2010 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2011 of the first vector of the i'th data reference. */
2012
9771b263 2013 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2014 {
2015 gimple_seq new_stmt_list = NULL;
2016 tree addr_base;
83d5977e
RG
2017 tree addr_tmp_name;
2018 tree new_or_tmp_name;
355fe088 2019 gimple *addr_stmt, *or_stmt;
d8ba5b19
RG
2020 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2021 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2022 bool negative = tree_int_cst_compare
2023 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2024 tree offset = negative
aad83b7c 2025 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
ebfd146a
IR
2026
2027 /* create: addr_tmp = (int)(address_of_first_vector) */
2028 addr_base =
2029 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
3f5e8a76 2030 offset);
ebfd146a
IR
2031 if (new_stmt_list != NULL)
2032 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2033
83d5977e
RG
2034 sprintf (tmp_name, "addr2int%d", i);
2035 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2036 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2037 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2038
2039 /* The addresses are OR together. */
2040
2041 if (or_tmp_name != NULL_TREE)
2042 {
2043 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2044 sprintf (tmp_name, "orptrs%d", i);
2045 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2046 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2047 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2048 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2049 or_tmp_name = new_or_tmp_name;
2050 }
2051 else
2052 or_tmp_name = addr_tmp_name;
2053
2054 } /* end for i */
2055
2056 mask_cst = build_int_cst (int_ptrsize_type, mask);
2057
2058 /* create: and_tmp = or_tmp & mask */
83d5977e 2059 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2060
0d0e4a03
JJ
2061 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2062 or_tmp_name, mask_cst);
ebfd146a
IR
2063 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2064
2065 /* Make and_tmp the left operand of the conditional test against zero.
2066 if and_tmp has a nonzero bit then some address is unaligned. */
2067 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2068 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2069 and_tmp_name, ptrsize_zero);
9adee305
RS
2070 chain_cond_expr (cond_expr, part_cond_expr);
2071}
2072
2073/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2074 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2075 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2076 and this new condition are true. Treat a null *COND_EXPR as "true". */
2077
2078static void
2079vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2080{
2081 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2082 unsigned int i;
2083 vec_object_pair *pair;
2084 FOR_EACH_VEC_ELT (pairs, i, pair)
2085 {
2086 tree addr1 = build_fold_addr_expr (pair->first);
2087 tree addr2 = build_fold_addr_expr (pair->second);
2088 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2089 addr1, addr2);
2090 chain_cond_expr (cond_expr, part_cond_expr);
2091 }
ebfd146a
IR
2092}
2093
ebfd146a
IR
2094/* Function vect_create_cond_for_alias_checks.
2095
2096 Create a conditional expression that represents the run-time checks for
2097 overlapping of address ranges represented by a list of data references
2098 relations passed as input.
2099
2100 Input:
2101 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2102 with logical AND operation. If it is NULL, then the function
2103 is used to return the number of alias checks.
ebfd146a
IR
2104 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2105 to be checked.
2106
2107 Output:
2108 COND_EXPR - conditional expression.
ebfd146a 2109
a05a89fa 2110 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2111 statement that controls which version of the loop gets executed at runtime.
2112*/
2113
a05a89fa 2114void
4bdd44c4 2115vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2116{
93bdc3ed 2117 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa 2118 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
ebfd146a 2119
a05a89fa 2120 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2121 return;
2122
9cbd2d97
BC
2123 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2124 &comp_alias_ddrs, cond_expr);
73fbfcad 2125 if (dump_enabled_p ())
ccb3ad87 2126 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2127 "created %u versioning for alias checks.\n",
a05a89fa 2128 comp_alias_ddrs.length ());
ebfd146a
IR
2129}
2130
2131
2132/* Function vect_loop_versioning.
b8698a0f 2133
ebfd146a
IR
2134 If the loop has data references that may or may not be aligned or/and
2135 has data reference relations whose independence was not proven then
2136 two versions of the loop need to be generated, one which is vectorized
2137 and one which isn't. A test is then generated to control which of the
2138 loops is executed. The test checks for the alignment of all of the
2139 data references that may or may not be aligned. An additional
2140 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2141 independence was not proven. The vectorized version of loop is
2142 executed only if both alias and alignment tests are passed.
2143
ebfd146a 2144 The test generated to check which version of loop is executed
b8698a0f 2145 is modified to also check for profitability as indicated by the
d9157f15 2146 cost model threshold TH.
86290011
RG
2147
2148 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2149 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2150
2151void
368117e8
RG
2152vect_loop_versioning (loop_vec_info loop_vinfo,
2153 unsigned int th, bool check_profitability)
ebfd146a 2154{
01d32b2b 2155 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
5ce9450f 2156 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2157 basic_block condition_bb;
538dd0b7
DM
2158 gphi_iterator gsi;
2159 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2160 basic_block merge_bb;
2161 basic_block new_exit_bb;
2162 edge new_exit_e, e;
538dd0b7 2163 gphi *orig_phi, *new_phi;
368117e8 2164 tree cond_expr = NULL_TREE;
d68d56b5 2165 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 2166 tree arg;
af2bbc51 2167 profile_probability prob = profile_probability::likely ();
ebfd146a 2168 gimple_seq gimplify_stmt_list = NULL;
fdd29374 2169 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
9cc1fb4b
XDL
2170 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2171 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
01d32b2b 2172 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
ebfd146a 2173
368117e8 2174 if (check_profitability)
80be3333 2175 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
01d32b2b 2176 build_int_cst (TREE_TYPE (scalar_loop_iters),
fdd29374 2177 th - 1));
01d32b2b
BC
2178
2179 if (version_niter)
2180 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2181
2182 if (cond_expr)
2183 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2184 is_gimple_condexpr, NULL_TREE);
ebfd146a 2185
9cc1fb4b 2186 if (version_align)
d68d56b5
RG
2187 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2188 &cond_expr_stmt_list);
ebfd146a 2189
9cc1fb4b 2190 if (version_alias)
9adee305
RS
2191 {
2192 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
2193 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
2194 }
86290011 2195
d68d56b5
RG
2196 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2197 is_gimple_condexpr, NULL_TREE);
2198 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
2199
2200 initialize_original_copy_tables ();
5ce9450f
JJ
2201 if (scalar_loop)
2202 {
2203 edge scalar_e;
2204 basic_block preheader, scalar_preheader;
2205
2206 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2207 scale LOOP's frequencies instead. */
5d3ebb71 2208 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
af2bbc51
JH
2209 prob, prob.invert (), prob, prob.invert (), true);
2210 scale_loop_frequencies (loop, prob);
5ce9450f
JJ
2211 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2212 while we need to move it above LOOP's preheader. */
2213 e = loop_preheader_edge (loop);
2214 scalar_e = loop_preheader_edge (scalar_loop);
2215 gcc_assert (empty_block_p (e->src)
2216 && single_pred_p (e->src));
2217 gcc_assert (empty_block_p (scalar_e->src)
2218 && single_pred_p (scalar_e->src));
2219 gcc_assert (single_pred_p (condition_bb));
2220 preheader = e->src;
2221 scalar_preheader = scalar_e->src;
2222 scalar_e = find_edge (condition_bb, scalar_preheader);
2223 e = single_pred_edge (preheader);
2224 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2225 scalar_preheader);
2226 redirect_edge_and_branch_force (scalar_e, preheader);
2227 redirect_edge_and_branch_force (e, condition_bb);
2228 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2229 single_pred (condition_bb));
2230 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2231 single_pred (scalar_preheader));
2232 set_immediate_dominator (CDI_DOMINATORS, preheader,
2233 condition_bb);
2234 }
2235 else
01d32b2b 2236 nloop = loop_version (loop, cond_expr, &condition_bb,
af2bbc51 2237 prob, prob.invert (), prob, prob.invert (), true);
01d32b2b
BC
2238
2239 if (version_niter)
2240 {
2241 /* The versioned loop could be infinite, we need to clear existing
2242 niter information which is copied from the original loop. */
2243 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2244 vect_free_loop_info_assumptions (nloop);
2245 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2246 loop_constraint_set (loop, LOOP_C_INFINITE);
2247 }
9cc1fb4b 2248
b05e0233 2249 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
2250 && dump_enabled_p ())
2251 {
2252 if (version_alias)
2253 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2254 "loop versioned for vectorization because of "
2255 "possible aliasing\n");
2256 if (version_align)
2257 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2258 "loop versioned for vectorization to enhance "
2259 "alignment\n");
2260
2261 }
c3284718 2262 free_original_copy_tables ();
ebfd146a 2263
b8698a0f 2264 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
2265 vectorization - that the loop exit block has a single predecessor.
2266 After versioning, the exit block of both loop versions is the same
2267 basic block (i.e. it has two predecessors). Just in order to simplify
2268 following transformations in the vectorizer, we fix this situation
2269 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
2270 with the proper loop-exit phis to maintain loop-closed-form.
2271 If loop versioning wasn't done from loop, but scalar_loop instead,
2272 merge_bb will have already just a single successor. */
b8698a0f 2273
ebfd146a 2274 merge_bb = single_exit (loop)->dest;
5ce9450f 2275 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 2276 {
5ce9450f
JJ
2277 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2278 new_exit_bb = split_edge (single_exit (loop));
2279 new_exit_e = single_exit (loop);
2280 e = EDGE_SUCC (new_exit_bb, 0);
2281
2282 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2283 {
2284 tree new_res;
538dd0b7 2285 orig_phi = gsi.phi ();
b731b390 2286 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
2287 new_phi = create_phi_node (new_res, new_exit_bb);
2288 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2289 add_phi_arg (new_phi, arg, new_exit_e,
2290 gimple_phi_arg_location_from_edge (orig_phi, e));
2291 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2292 }
b8698a0f 2293 }
ebfd146a
IR
2294
2295 /* End loop-exit-fixes after versioning. */
2296
d68d56b5 2297 if (cond_expr_stmt_list)
ebfd146a
IR
2298 {
2299 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 2300 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 2301 GSI_SAME_STMT);
ebfd146a 2302 }
90eb75f2 2303 update_ssa (TODO_update_ssa);
ebfd146a 2304}