]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
Handle data dependence relations with different bases
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
c7131fb2 31#include "fold-const.h"
60393bbc 32#include "cfganal.h"
45b0be94 33#include "gimplify.h"
5be5c238 34#include "gimple-iterator.h"
18f429e2 35#include "gimplify-me.h"
442b4905 36#include "tree-cfg.h"
e28030cf 37#include "tree-ssa-loop-manip.h"
442b4905 38#include "tree-into-ssa.h"
7a300452 39#include "tree-ssa.h"
ebfd146a 40#include "cfgloop.h"
ebfd146a
IR
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
2a93954e 43#include "tree-ssa-loop-ivopts.h"
ebfd146a
IR
44
45/*************************************************************************
46 Simple Loop Peeling Utilities
47
48 Utilities to support loop peeling for vectorization purposes.
49 *************************************************************************/
50
51
52/* Renames the use *OP_P. */
53
54static void
55rename_use_op (use_operand_p op_p)
56{
57 tree new_name;
58
59 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
60 return;
61
62 new_name = get_current_def (USE_FROM_PTR (op_p));
63
64 /* Something defined outside of the loop. */
65 if (!new_name)
66 return;
67
68 /* An ordinary ssa name defined in the loop. */
69
70 SET_USE (op_p, new_name);
71}
72
73
cb330ba5 74/* Renames the variables in basic block BB. Allow renaming of PHI arguments
a6c51a12
YR
75 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
76 true. */
ebfd146a 77
2cfc56b9 78static void
a6c51a12 79rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
ebfd146a 80{
355fe088 81 gimple *stmt;
ebfd146a
IR
82 use_operand_p use_p;
83 ssa_op_iter iter;
84 edge e;
85 edge_iterator ei;
86 struct loop *loop = bb->loop_father;
a6c51a12
YR
87 struct loop *outer_loop = NULL;
88
89 if (rename_from_outer_loop)
90 {
91 gcc_assert (loop);
92 outer_loop = loop_outer (loop);
93 }
ebfd146a 94
538dd0b7
DM
95 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
96 gsi_next (&gsi))
ebfd146a
IR
97 {
98 stmt = gsi_stmt (gsi);
99 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
100 rename_use_op (use_p);
101 }
102
2cfc56b9 103 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 104 {
cb330ba5
JJ
105 if (!flow_bb_inside_loop_p (loop, e->src))
106 {
107 if (!rename_from_outer_loop)
108 continue;
109 if (e->src != outer_loop->header)
110 {
111 if (outer_loop->inner->next)
112 {
113 /* If outer_loop has 2 inner loops, allow there to
114 be an extra basic block which decides which of the
115 two loops to use using LOOP_VECTORIZED. */
116 if (!single_pred_p (e->src)
117 || single_pred (e->src) != outer_loop->header)
118 continue;
119 }
120 else
121 continue;
122 }
123 }
538dd0b7
DM
124 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
125 gsi_next (&gsi))
126 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
127 }
128}
129
130
a79683d5 131struct adjust_info
684f25f4
AO
132{
133 tree from, to;
134 basic_block bb;
a79683d5 135};
684f25f4 136
684f25f4
AO
137/* A stack of values to be adjusted in debug stmts. We have to
138 process them LIFO, so that the closest substitution applies. If we
139 processed them FIFO, without the stack, we might substitute uses
140 with a PHI DEF that would soon become non-dominant, and when we got
141 to the suitable one, it wouldn't have anything to substitute any
142 more. */
ff4c81cc 143static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
144
145/* Adjust any debug stmts that referenced AI->from values to use the
146 loop-closed AI->to, if the references are dominated by AI->bb and
147 not by the definition of AI->from. */
148
149static void
150adjust_debug_stmts_now (adjust_info *ai)
151{
152 basic_block bbphi = ai->bb;
153 tree orig_def = ai->from;
154 tree new_def = ai->to;
155 imm_use_iterator imm_iter;
355fe088 156 gimple *stmt;
684f25f4
AO
157 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
158
159 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
160
161 /* Adjust any debug stmts that held onto non-loop-closed
162 references. */
163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
164 {
165 use_operand_p use_p;
166 basic_block bbuse;
167
168 if (!is_gimple_debug (stmt))
169 continue;
170
171 gcc_assert (gimple_debug_bind_p (stmt));
172
173 bbuse = gimple_bb (stmt);
174
175 if ((bbuse == bbphi
176 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
177 && !(bbuse == bbdef
178 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
179 {
180 if (new_def)
181 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
182 SET_USE (use_p, new_def);
183 else
184 {
185 gimple_debug_bind_reset_value (stmt);
186 update_stmt (stmt);
187 }
188 }
189 }
190}
191
192/* Adjust debug stmts as scheduled before. */
193
194static void
195adjust_vec_debug_stmts (void)
196{
197 if (!MAY_HAVE_DEBUG_STMTS)
198 return;
199
9771b263 200 gcc_assert (adjust_vec.exists ());
684f25f4 201
9771b263 202 while (!adjust_vec.is_empty ())
684f25f4 203 {
9771b263
DN
204 adjust_debug_stmts_now (&adjust_vec.last ());
205 adjust_vec.pop ();
684f25f4 206 }
684f25f4
AO
207}
208
209/* Adjust any debug stmts that referenced FROM values to use the
210 loop-closed TO, if the references are dominated by BB and not by
211 the definition of FROM. If adjust_vec is non-NULL, adjustments
212 will be postponed until adjust_vec_debug_stmts is called. */
213
214static void
215adjust_debug_stmts (tree from, tree to, basic_block bb)
216{
217 adjust_info ai;
218
a471762f
RG
219 if (MAY_HAVE_DEBUG_STMTS
220 && TREE_CODE (from) == SSA_NAME
a52ca739 221 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 222 && ! virtual_operand_p (from))
684f25f4
AO
223 {
224 ai.from = from;
225 ai.to = to;
226 ai.bb = bb;
227
9771b263
DN
228 if (adjust_vec.exists ())
229 adjust_vec.safe_push (ai);
684f25f4
AO
230 else
231 adjust_debug_stmts_now (&ai);
232 }
233}
234
235/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
236 to adjust any debug stmts that referenced the old phi arg,
237 presumably non-loop-closed references left over from other
238 transformations. */
239
240static void
355fe088 241adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
684f25f4
AO
242{
243 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
244
245 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
246
247 if (MAY_HAVE_DEBUG_STMTS)
248 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
249 gimple_bb (update_phi));
250}
251
ebfd146a
IR
252/* Make the LOOP iterate NITERS times. This is done by adding a new IV
253 that starts at zero, increases by one and its limit is NITERS.
254
255 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
256
257void
258slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
259{
260 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
261 gcond *cond_stmt;
262 gcond *orig_cond;
ebfd146a
IR
263 edge exit_edge = single_exit (loop);
264 gimple_stmt_iterator loop_cond_gsi;
265 gimple_stmt_iterator incr_gsi;
266 bool insert_after;
267 tree init = build_int_cst (TREE_TYPE (niters), 0);
268 tree step = build_int_cst (TREE_TYPE (niters), 1);
b05e0233 269 source_location loop_loc;
ebfd146a
IR
270 enum tree_code code;
271
272 orig_cond = get_loop_exit_condition (loop);
273 gcc_assert (orig_cond);
274 loop_cond_gsi = gsi_for_stmt (orig_cond);
275
276 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
277 create_iv (init, step, NULL_TREE, loop,
278 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
279
280 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
281 true, NULL_TREE, true,
282 GSI_SAME_STMT);
283 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
284 true, GSI_SAME_STMT);
285
286 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
287 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
288 NULL_TREE);
289
290 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
291
292 /* Remove old loop exit test: */
293 gsi_remove (&loop_cond_gsi, true);
6f723d33 294 free_stmt_vec_info (orig_cond);
ebfd146a
IR
295
296 loop_loc = find_loop_location (loop);
73fbfcad 297 if (dump_enabled_p ())
ebfd146a 298 {
b05e0233
RB
299 if (LOCATION_LOCUS (loop_loc) != UNKNOWN_LOCATION)
300 dump_printf (MSG_NOTE, "\nloop at %s:%d: ", LOCATION_FILE (loop_loc),
301 LOCATION_LINE (loop_loc));
78c60e3d 302 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
ebfd146a 303 }
71118889
RS
304
305 /* Record the number of latch iterations. */
306 loop->nb_iterations = fold_build2 (MINUS_EXPR, TREE_TYPE (niters), niters,
307 build_int_cst (TREE_TYPE (niters), 1));
ebfd146a
IR
308}
309
5ce9450f
JJ
310/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
311 For all PHI arguments in FROM->dest and TO->dest from those
312 edges ensure that TO->dest PHI arguments have current_def
313 to that in from. */
314
315static void
316slpeel_duplicate_current_defs_from_edges (edge from, edge to)
317{
318 gimple_stmt_iterator gsi_from, gsi_to;
319
320 for (gsi_from = gsi_start_phis (from->dest),
321 gsi_to = gsi_start_phis (to->dest);
14ba8d6d 322 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
5ce9450f 323 {
355fe088
TS
324 gimple *from_phi = gsi_stmt (gsi_from);
325 gimple *to_phi = gsi_stmt (gsi_to);
5ce9450f 326 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1a5da5b6
RB
327 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
328 if (virtual_operand_p (from_arg))
329 {
14ba8d6d
RB
330 gsi_next (&gsi_from);
331 continue;
332 }
1a5da5b6
RB
333 if (virtual_operand_p (to_arg))
334 {
14ba8d6d
RB
335 gsi_next (&gsi_to);
336 continue;
337 }
1a5da5b6
RB
338 if (TREE_CODE (from_arg) != SSA_NAME)
339 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
340 else
341 {
342 if (get_current_def (to_arg) == NULL_TREE)
343 set_current_def (to_arg, get_current_def (from_arg));
344 }
14ba8d6d
RB
345 gsi_next (&gsi_from);
346 gsi_next (&gsi_to);
5ce9450f 347 }
1a5da5b6
RB
348
349 gphi *from_phi = get_virtual_phi (from->dest);
350 gphi *to_phi = get_virtual_phi (to->dest);
351 if (from_phi)
352 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
353 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
5ce9450f
JJ
354}
355
ebfd146a 356
b8698a0f 357/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
358 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
359 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
360 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
361 entry or exit of LOOP. */
ebfd146a
IR
362
363struct loop *
5ce9450f
JJ
364slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
365 struct loop *scalar_loop, edge e)
ebfd146a
IR
366{
367 struct loop *new_loop;
3884da6f 368 basic_block *new_bbs, *bbs, *pbbs;
ebfd146a
IR
369 bool at_exit;
370 bool was_imm_dom;
b8698a0f 371 basic_block exit_dest;
ebfd146a 372 edge exit, new_exit;
a6c51a12 373 bool duplicate_outer_loop = false;
ebfd146a 374
2cfc56b9
RB
375 exit = single_exit (loop);
376 at_exit = (e == exit);
ebfd146a
IR
377 if (!at_exit && e != loop_preheader_edge (loop))
378 return NULL;
379
5ce9450f
JJ
380 if (scalar_loop == NULL)
381 scalar_loop = loop;
382
383 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
3884da6f
BC
384 pbbs = bbs + 1;
385 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
a6c51a12
YR
386 /* Allow duplication of outer loops. */
387 if (scalar_loop->inner)
388 duplicate_outer_loop = true;
ebfd146a 389 /* Check whether duplication is possible. */
3884da6f 390 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
ebfd146a
IR
391 {
392 free (bbs);
393 return NULL;
394 }
395
396 /* Generate new loop structure. */
5ce9450f
JJ
397 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
398 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 399
2cfc56b9 400 exit_dest = exit->dest;
b8698a0f
L
401 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
402 exit_dest) == loop->header ?
ebfd146a
IR
403 true : false);
404
2cfc56b9
RB
405 /* Also copy the pre-header, this avoids jumping through hoops to
406 duplicate the loop entry PHI arguments. Create an empty
407 pre-header unconditionally for this. */
5ce9450f 408 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 409 edge entry_e = single_pred_edge (preheader);
3884da6f 410 bbs[0] = preheader;
5ce9450f 411 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 412
5ce9450f
JJ
413 exit = single_exit (scalar_loop);
414 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 415 &exit, 1, &new_exit, NULL,
3884da6f 416 at_exit ? loop->latch : e->src, true);
5ce9450f 417 exit = single_exit (loop);
3884da6f 418 basic_block new_preheader = new_bbs[0];
ebfd146a 419
5ce9450f
JJ
420 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
421
422 if (scalar_loop != loop)
423 {
424 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
425 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
426 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
427 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
428 header) to have current_def set, so copy them over. */
429 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
430 exit);
431 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
432 0),
433 EDGE_SUCC (loop->latch, 0));
434 }
b8698a0f 435
ebfd146a
IR
436 if (at_exit) /* Add the loop copy at exit. */
437 {
5ce9450f
JJ
438 if (scalar_loop != loop)
439 {
538dd0b7 440 gphi_iterator gsi;
5ce9450f
JJ
441 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
442
443 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
444 gsi_next (&gsi))
445 {
538dd0b7 446 gphi *phi = gsi.phi ();
5ce9450f
JJ
447 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
448 location_t orig_locus
449 = gimple_phi_arg_location_from_edge (phi, e);
450
451 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
452 }
453 }
2cfc56b9
RB
454 redirect_edge_and_branch_force (e, new_preheader);
455 flush_pending_stmts (e);
456 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
a6c51a12 457 if (was_imm_dom || duplicate_outer_loop)
5ce9450f 458 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
459
460 /* And remove the non-necessary forwarder again. Keep the other
461 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
462 redirect_edge_pred (single_succ_edge (preheader),
463 single_pred (preheader));
2cfc56b9 464 delete_basic_block (preheader);
5ce9450f
JJ
465 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
466 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
467 }
468 else /* Add the copy at entry. */
469 {
5ce9450f
JJ
470 if (scalar_loop != loop)
471 {
472 /* Remove the non-necessary forwarder of scalar_loop again. */
473 redirect_edge_pred (single_succ_edge (preheader),
474 single_pred (preheader));
475 delete_basic_block (preheader);
476 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
477 loop_preheader_edge (scalar_loop)->src);
478 preheader = split_edge (loop_preheader_edge (loop));
479 entry_e = single_pred_edge (preheader);
480 }
481
2cfc56b9
RB
482 redirect_edge_and_branch_force (entry_e, new_preheader);
483 flush_pending_stmts (entry_e);
484 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
485
486 redirect_edge_and_branch_force (new_exit, preheader);
487 flush_pending_stmts (new_exit);
488 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
489
490 /* And remove the non-necessary forwarder again. Keep the other
491 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
492 redirect_edge_pred (single_succ_edge (new_preheader),
493 single_pred (new_preheader));
2cfc56b9
RB
494 delete_basic_block (new_preheader);
495 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
496 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
497 }
498
5ce9450f 499 for (unsigned i = 0; i < scalar_loop->num_nodes + 1; i++)
a6c51a12 500 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
2cfc56b9 501
5ce9450f
JJ
502 if (scalar_loop != loop)
503 {
504 /* Update new_loop->header PHIs, so that on the preheader
505 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 506 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
507 edge orig_e = loop_preheader_edge (loop);
508 edge new_e = loop_preheader_edge (new_loop);
509
510 for (gsi_orig = gsi_start_phis (loop->header),
511 gsi_new = gsi_start_phis (new_loop->header);
512 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
513 gsi_next (&gsi_orig), gsi_next (&gsi_new))
514 {
538dd0b7
DM
515 gphi *orig_phi = gsi_orig.phi ();
516 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
517 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
518 location_t orig_locus
519 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
520
521 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
522 }
523 }
524
ebfd146a
IR
525 free (new_bbs);
526 free (bbs);
527
b2b29377 528 checking_verify_dominators (CDI_DOMINATORS);
2cfc56b9 529
ebfd146a
IR
530 return new_loop;
531}
532
533
a5e3d614
BC
534/* Given the condition expression COND, put it as the last statement of
535 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
536 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
3e907b90
BC
537 skip the loop. PROBABILITY is the skip edge's probability. Mark the
538 new edge as irreducible if IRREDUCIBLE_P is true. */
ebfd146a
IR
539
540static edge
86290011 541slpeel_add_loop_guard (basic_block guard_bb, tree cond,
a5e3d614 542 basic_block guard_to, basic_block dom_bb,
357067f2 543 profile_probability probability, bool irreducible_p)
ebfd146a
IR
544{
545 gimple_stmt_iterator gsi;
546 edge new_e, enter_e;
538dd0b7 547 gcond *cond_stmt;
ebfd146a
IR
548 gimple_seq gimplify_stmt_list = NULL;
549
550 enter_e = EDGE_SUCC (guard_bb, 0);
551 enter_e->flags &= ~EDGE_FALLTHRU;
552 enter_e->flags |= EDGE_FALSE_VALUE;
553 gsi = gsi_last_bb (guard_bb);
554
f7a06a98
RG
555 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
556 NULL_TREE);
86290011 557 if (gimplify_stmt_list)
a5e3d614 558 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
ebfd146a 559
a5e3d614 560 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
ebfd146a
IR
561 gsi = gsi_last_bb (guard_bb);
562 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
563
564 /* Add new edge to connect guard block to the merge/loop-exit block. */
a5e3d614 565 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
e78410bf
JH
566
567 new_e->count = guard_bb->count;
568 new_e->probability = probability;
3995f3a2 569 new_e->count = enter_e->count.apply_probability (probability);
3e907b90
BC
570 if (irreducible_p)
571 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
572
e78410bf 573 enter_e->count -= new_e->count;
357067f2 574 enter_e->probability = probability.invert ();
a5e3d614 575 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
5281a167
RB
576
577 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
578 if (enter_e->dest->loop_father->header == enter_e->dest)
579 split_edge (enter_e);
580
ebfd146a
IR
581 return new_e;
582}
583
584
585/* This function verifies that the following restrictions apply to LOOP:
a6c51a12
YR
586 (1) it consists of exactly 2 basic blocks - header, and an empty latch
587 for innermost loop and 5 basic blocks for outer-loop.
588 (2) it is single entry, single exit
589 (3) its exit condition is the last stmt in the header
590 (4) E is the entry/exit edge of LOOP.
ebfd146a
IR
591 */
592
593bool
594slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
595{
596 edge exit_e = single_exit (loop);
597 edge entry_e = loop_preheader_edge (loop);
538dd0b7 598 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a 599 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a6c51a12 600 unsigned int num_bb = loop->inner? 5 : 2;
ebfd146a 601
62fdbf29
BC
602 /* All loops have an outer scope; the only case loop->outer is NULL is for
603 the function itself. */
604 if (!loop_outer (loop)
a6c51a12 605 || loop->num_nodes != num_bb
ebfd146a
IR
606 || !empty_block_p (loop->latch)
607 || !single_exit (loop)
608 /* Verify that new loop exit condition can be trivially modified. */
609 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
610 || (e != exit_e && e != entry_e))
611 return false;
612
613 return true;
614}
615
a5e3d614
BC
616/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
617 in the exit bb and rename all the uses after the loop. This simplifies
618 the *guard[12] routines, which assume loop closed SSA form for all PHIs
619 (but normally loop closed SSA form doesn't require virtual PHIs to be
620 in the same form). Doing this early simplifies the checking what
621 uses should be renamed. */
ebfd146a
IR
622
623static void
a5e3d614 624create_lcssa_for_virtual_phi (struct loop *loop)
ebfd146a 625{
538dd0b7 626 gphi_iterator gsi;
ebfd146a 627 edge exit_e = single_exit (loop);
b8698a0f 628
e20f6b4b 629 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 630 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 631 {
538dd0b7 632 gphi *phi = gsi.phi ();
e20f6b4b
JJ
633 for (gsi = gsi_start_phis (exit_e->dest);
634 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 635 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
636 break;
637 if (gsi_end_p (gsi))
638 {
b731b390 639 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 640 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
641 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
642 imm_use_iterator imm_iter;
355fe088 643 gimple *stmt;
e20f6b4b
JJ
644 use_operand_p use_p;
645
9e227d60 646 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
647 gimple_phi_set_result (new_phi, new_vop);
648 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
b5fd0440
YR
649 if (stmt != new_phi
650 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
e20f6b4b
JJ
651 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
652 SET_USE (use_p, new_vop);
653 }
654 break;
655 }
ebfd146a 656
ebfd146a
IR
657}
658
659/* Function vect_get_loop_location.
660
661 Extract the location of the loop in the source code.
662 If the loop is not well formed for vectorization, an estimated
663 location is calculated.
664 Return the loop location if succeed and NULL if not. */
665
b05e0233 666source_location
ebfd146a
IR
667find_loop_location (struct loop *loop)
668{
355fe088 669 gimple *stmt = NULL;
ebfd146a
IR
670 basic_block bb;
671 gimple_stmt_iterator si;
672
673 if (!loop)
b05e0233 674 return UNKNOWN_LOCATION;
ebfd146a
IR
675
676 stmt = get_loop_exit_condition (loop);
677
502498d5
JJ
678 if (stmt
679 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
680 return gimple_location (stmt);
681
682 /* If we got here the loop is probably not "well formed",
683 try to estimate the loop location */
684
685 if (!loop->header)
b05e0233 686 return UNKNOWN_LOCATION;
ebfd146a
IR
687
688 bb = loop->header;
689
690 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
691 {
692 stmt = gsi_stmt (si);
502498d5 693 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
694 return gimple_location (stmt);
695 }
696
b05e0233 697 return UNKNOWN_LOCATION;
ebfd146a
IR
698}
699
a5e3d614
BC
700/* Return true if PHI defines an IV of the loop to be vectorized. */
701
702static bool
703iv_phi_p (gphi *phi)
704{
705 if (virtual_operand_p (PHI_RESULT (phi)))
706 return false;
707
708 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
709 gcc_assert (stmt_info != NULL);
710 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
711 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
712 return false;
713
714 return true;
715}
ebfd146a 716
ebfd146a
IR
717/* Function vect_can_advance_ivs_p
718
b8698a0f
L
719 In case the number of iterations that LOOP iterates is unknown at compile
720 time, an epilog loop will be generated, and the loop induction variables
721 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
722 the epilog loop. Here we check that the access function of the loop IVs
723 and the expression that represents the loop bound are simple enough.
724 These restrictions will be relaxed in the future. */
725
b8698a0f 726bool
ebfd146a
IR
727vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
728{
729 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
730 basic_block bb = loop->header;
538dd0b7 731 gphi_iterator gsi;
ebfd146a
IR
732
733 /* Analyze phi functions of the loop header. */
734
73fbfcad 735 if (dump_enabled_p ())
e645e942 736 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
737 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738 {
ebfd146a
IR
739 tree evolution_part;
740
a5e3d614 741 gphi *phi = gsi.phi ();
73fbfcad 742 if (dump_enabled_p ())
ebfd146a 743 {
78c60e3d
SS
744 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
745 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
746 }
747
748 /* Skip virtual phi's. The data dependences that are associated with
a5e3d614 749 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
ebfd146a 750
a5e3d614
BC
751 Skip reduction phis. */
752 if (!iv_phi_p (phi))
ebfd146a 753 {
73fbfcad 754 if (dump_enabled_p ())
a5e3d614
BC
755 dump_printf_loc (MSG_NOTE, vect_location,
756 "reduc or virtual phi. skip.\n");
ebfd146a
IR
757 continue;
758 }
759
ebfd146a
IR
760 /* Analyze the evolution function. */
761
afb119be
RB
762 evolution_part
763 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
764 if (evolution_part == NULL_TREE)
765 {
73fbfcad 766 if (dump_enabled_p ())
afb119be 767 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 768 "No access function or evolution.\n");
ebfd146a
IR
769 return false;
770 }
b8698a0f 771
2a93954e
AH
772 /* FORNOW: We do not transform initial conditions of IVs
773 which evolution functions are not invariants in the loop. */
774
775 if (!expr_invariant_in_loop_p (loop, evolution_part))
776 {
777 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
779 "evolution not invariant in loop.\n");
780 return false;
781 }
782
b8698a0f 783 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
784 which evolution functions are a polynomial of degree >= 2. */
785
786 if (tree_is_chrec (evolution_part))
2a93954e
AH
787 {
788 if (dump_enabled_p ())
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
790 "evolution is chrec.\n");
791 return false;
792 }
ebfd146a
IR
793 }
794
795 return true;
796}
797
798
799/* Function vect_update_ivs_after_vectorizer.
800
801 "Advance" the induction variables of LOOP to the value they should take
802 after the execution of LOOP. This is currently necessary because the
803 vectorizer does not handle induction variables that are used after the
804 loop. Such a situation occurs when the last iterations of LOOP are
805 peeled, because:
806 1. We introduced new uses after LOOP for IVs that were not originally used
807 after LOOP: the IVs of LOOP are now used by an epilog loop.
808 2. LOOP is going to be vectorized; this means that it will iterate N/VF
809 times, whereas the loop IVs should be bumped N times.
810
811 Input:
812 - LOOP - a loop that is going to be vectorized. The last few iterations
813 of LOOP were peeled.
814 - NITERS - the number of iterations that LOOP executes (before it is
815 vectorized). i.e, the number of times the ivs should be bumped.
816 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
817 coming out from LOOP on which there are uses of the LOOP ivs
818 (this is the path from LOOP->exit to epilog_loop->preheader).
819
820 The new definitions of the ivs are placed in LOOP->exit.
821 The phi args associated with the edge UPDATE_E in the bb
822 UPDATE_E->dest are updated accordingly.
823
824 Assumption 1: Like the rest of the vectorizer, this function assumes
825 a single loop exit that has a single predecessor.
826
827 Assumption 2: The phi nodes in the LOOP header and in update_bb are
828 organized in the same order.
829
830 Assumption 3: The access function of the ivs is simple enough (see
831 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
832
833 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 834 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
835 that leads to the epilog loop; other paths skip the epilog loop). This
836 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
837 needs to have its phis updated.
838 */
839
840static void
a5e3d614
BC
841vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
842 tree niters, edge update_e)
ebfd146a 843{
538dd0b7 844 gphi_iterator gsi, gsi1;
a5e3d614 845 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 846 basic_block update_bb = update_e->dest;
a5e3d614 847 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a
IR
848
849 /* Make sure there exists a single-predecessor exit bb: */
850 gcc_assert (single_pred_p (exit_bb));
a5e3d614 851 gcc_assert (single_succ_edge (exit_bb) == update_e);
ebfd146a
IR
852
853 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
854 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
855 gsi_next (&gsi), gsi_next (&gsi1))
856 {
ebfd146a 857 tree init_expr;
550918ca
RG
858 tree step_expr, off;
859 tree type;
ebfd146a
IR
860 tree var, ni, ni_name;
861 gimple_stmt_iterator last_gsi;
862
a5e3d614
BC
863 gphi *phi = gsi.phi ();
864 gphi *phi1 = gsi1.phi ();
73fbfcad 865 if (dump_enabled_p ())
a5e3d614
BC
866 {
867 dump_printf_loc (MSG_NOTE, vect_location,
868 "vect_update_ivs_after_vectorizer: phi: ");
78c60e3d 869 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
a5e3d614 870 }
ebfd146a 871
a5e3d614
BC
872 /* Skip reduction and virtual phis. */
873 if (!iv_phi_p (phi))
ebfd146a 874 {
73fbfcad 875 if (dump_enabled_p ())
a5e3d614
BC
876 dump_printf_loc (MSG_NOTE, vect_location,
877 "reduc or virtual phi. skip.\n");
ebfd146a
IR
878 continue;
879 }
880
0ac168a1 881 type = TREE_TYPE (gimple_phi_result (phi));
a5e3d614 882 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
0ac168a1 883 step_expr = unshare_expr (step_expr);
b8698a0f 884
ebfd146a
IR
885 /* FORNOW: We do not support IVs whose evolution function is a polynomial
886 of degree >= 2 or exponential. */
0ac168a1 887 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 888
0ac168a1 889 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 890
550918ca
RG
891 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
892 fold_convert (TREE_TYPE (step_expr), niters),
893 step_expr);
0ac168a1 894 if (POINTER_TYPE_P (type))
5d49b6a7 895 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 896 else
0ac168a1
RG
897 ni = fold_build2 (PLUS_EXPR, type,
898 init_expr, fold_convert (type, off));
ebfd146a 899
0ac168a1 900 var = create_tmp_var (type, "tmp");
ebfd146a
IR
901
902 last_gsi = gsi_last_bb (exit_bb);
a5e3d614
BC
903 gimple_seq new_stmts = NULL;
904 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
905 /* Exit_bb shouldn't be empty. */
906 if (!gsi_end_p (last_gsi))
907 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
908 else
909 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
b8698a0f 910
ebfd146a 911 /* Fix phi expressions in the successor bb. */
684f25f4 912 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
913 }
914}
915
a5e3d614 916/* Function vect_gen_prolog_loop_niters
d9157f15 917
a5e3d614
BC
918 Generate the number of iterations which should be peeled as prolog for the
919 loop represented by LOOP_VINFO. It is calculated as the misalignment of
920 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
921 As a result, after the execution of this loop, the data reference DR will
922 refer to an aligned location. The following computation is generated:
ebfd146a
IR
923
924 If the misalignment of DR is known at compile time:
925 addr_mis = int mis = DR_MISALIGNMENT (dr);
926 Else, compute address misalignment in bytes:
5aea1e76 927 addr_mis = addr & (vectype_align - 1)
ebfd146a 928
a5e3d614 929 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
ebfd146a
IR
930
931 (elem_size = element type size; an element is the scalar element whose type
932 is the inner type of the vectype)
933
a5e3d614 934 The computations will be emitted at the end of BB. We also compute and
cbb22e61 935 store upper bound (included) of the result in BOUND.
a5e3d614 936
ebfd146a
IR
937 When the step of the data-ref in the loop is not 1 (as in interleaved data
938 and SLP), the number of iterations of the prolog must be divided by the step
939 (which is equal to the size of interleaved group).
940
941 The above formulas assume that VF == number of elements in the vector. This
942 may not hold when there are multiple-types in the loop.
943 In this case, for some data-references in the loop the VF does not represent
944 the number of elements that fit in the vector. Therefore, instead of VF we
945 use TYPE_VECTOR_SUBPARTS. */
946
b8698a0f 947static tree
a5e3d614
BC
948vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
949 basic_block bb, int *bound)
ebfd146a
IR
950{
951 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
ebfd146a 952 tree var;
a5e3d614
BC
953 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
954 gimple_seq stmts = NULL, new_stmts = NULL;
ebfd146a 955 tree iters, iters_name;
355fe088 956 gimple *dr_stmt = DR_STMT (dr);
ebfd146a
IR
957 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
958 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
959 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
ebfd146a
IR
960 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
961
15e693cc 962 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 963 {
15e693cc 964 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 965
73fbfcad 966 if (dump_enabled_p ())
ccb3ad87 967 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 968 "known peeling = %d.\n", npeel);
ebfd146a 969
720f5239 970 iters = build_int_cst (niters_type, npeel);
cbb22e61 971 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
972 }
973 else
974 {
d8ba5b19
RG
975 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
976 tree offset = negative
5fa79de8 977 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
b8698a0f 978 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
3f5e8a76 979 &stmts, offset);
96f9265a 980 tree type = unsigned_type_for (TREE_TYPE (start_addr));
5aea1e76
UW
981 tree vectype_align_minus_1 = build_int_cst (type, vectype_align - 1);
982 HOST_WIDE_INT elem_size =
983 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
984 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
ebfd146a
IR
985 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
986 tree nelements_tree = build_int_cst (type, nelements);
987 tree byte_misalign;
988 tree elem_misalign;
989
5aea1e76 990 /* Create: byte_misalign = addr & (vectype_align - 1) */
b8698a0f 991 byte_misalign =
a5e3d614
BC
992 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr),
993 vectype_align_minus_1);
b8698a0f 994
ebfd146a
IR
995 /* Create: elem_misalign = byte_misalign / element_size */
996 elem_misalign =
a5e3d614 997 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
ebfd146a
IR
998
999 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
d8ba5b19
RG
1000 if (negative)
1001 iters = fold_build2 (MINUS_EXPR, type, elem_misalign, nelements_tree);
1002 else
1003 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
ebfd146a
IR
1004 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
1005 iters = fold_convert (niters_type, iters);
cbb22e61 1006 *bound = nelements - 1;
ebfd146a
IR
1007 }
1008
73fbfcad 1009 if (dump_enabled_p ())
ebfd146a 1010 {
ccb3ad87 1011 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1012 "niters for prolog loop: ");
ccb3ad87 1013 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1014 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1015 }
1016
1017 var = create_tmp_var (niters_type, "prolog_loop_niters");
a5e3d614 1018 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
ebfd146a 1019
a5e3d614
BC
1020 if (new_stmts)
1021 gimple_seq_add_seq (&stmts, new_stmts);
ebfd146a
IR
1022 if (stmts)
1023 {
a5e3d614
BC
1024 gcc_assert (single_succ_p (bb));
1025 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1026 if (gsi_end_p (gsi))
1027 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1028 else
1029 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
ebfd146a 1030 }
b8698a0f 1031 return iters_name;
ebfd146a
IR
1032}
1033
1034
1035/* Function vect_update_init_of_dr
1036
1037 NITERS iterations were peeled from LOOP. DR represents a data reference
1038 in LOOP. This function updates the information recorded in DR to
b8698a0f 1039 account for the fact that the first NITERS iterations had already been
ebfd146a
IR
1040 executed. Specifically, it updates the OFFSET field of DR. */
1041
1042static void
1043vect_update_init_of_dr (struct data_reference *dr, tree niters)
1044{
1045 tree offset = DR_OFFSET (dr);
b8698a0f 1046
ebfd146a
IR
1047 niters = fold_build2 (MULT_EXPR, sizetype,
1048 fold_convert (sizetype, niters),
1049 fold_convert (sizetype, DR_STEP (dr)));
587aa063
RG
1050 offset = fold_build2 (PLUS_EXPR, sizetype,
1051 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1052 DR_OFFSET (dr) = offset;
1053}
1054
1055
1056/* Function vect_update_inits_of_drs
1057
b8698a0f
L
1058 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1059 This function updates the information recorded for the data references in
1060 the loop to account for the fact that the first NITERS iterations had
ebfd146a
IR
1061 already been executed. Specifically, it updates the initial_condition of
1062 the access_function of all the data_references in the loop. */
1063
1064static void
1065vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1066{
1067 unsigned int i;
9771b263 1068 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 1069 struct data_reference *dr;
a5e3d614
BC
1070
1071 if (dump_enabled_p ())
ccb3ad87 1072 dump_printf_loc (MSG_NOTE, vect_location,
a5e3d614
BC
1073 "=== vect_update_inits_of_dr ===\n");
1074
1075 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1076 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1077 {
1078 gimple_seq seq;
1079 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1080 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1081
1082 niters = fold_convert (sizetype, niters);
1083 niters = force_gimple_operand (niters, &seq, false, var);
1084 if (seq)
1085 {
1086 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1087 gcc_assert (!new_bb);
1088 }
1089 }
ebfd146a 1090
9771b263 1091 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1092 vect_update_init_of_dr (dr, niters);
1093}
1094
1095
a5e3d614 1096/* This function builds ni_name = number of iterations. Statements
7078979b
BC
1097 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1098 it to TRUE if new ssa_var is generated. */
a5e3d614
BC
1099
1100tree
7078979b 1101vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
a5e3d614
BC
1102{
1103 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1104 if (TREE_CODE (ni) == INTEGER_CST)
1105 return ni;
1106 else
1107 {
1108 tree ni_name, var;
1109 gimple_seq stmts = NULL;
1110 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1111
1112 var = create_tmp_var (TREE_TYPE (ni), "niters");
1113 ni_name = force_gimple_operand (ni, &stmts, false, var);
1114 if (stmts)
7078979b
BC
1115 {
1116 gsi_insert_seq_on_edge_immediate (pe, stmts);
1117 if (new_var_p != NULL)
1118 *new_var_p = true;
1119 }
a5e3d614
BC
1120
1121 return ni_name;
1122 }
1123}
1124
cbb22e61
BC
1125/* Calculate the number of iterations above which vectorized loop will be
1126 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1127 of prolog loop. If it's integer const, the integer number is also passed
1128 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (included) of
1129 number of iterations of prolog loop. VFM1 is vector factor minus one.
1130 If CHECK_PROFITABILITY is true, TH is the threshold below which scalar
1131 (rather than vectorized) loop will be executed. This function stores
1132 upper bound (included) of the result in BOUND_SCALAR. */
a5e3d614
BC
1133
1134static tree
1135vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
cbb22e61
BC
1136 int bound_prolog, int vfm1, int th,
1137 int *bound_scalar, bool check_profitability)
a5e3d614
BC
1138{
1139 tree type = TREE_TYPE (niters_prolog);
1140 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
cbb22e61 1141 build_int_cst (type, vfm1));
a5e3d614 1142
cbb22e61 1143 *bound_scalar = vfm1 + bound_prolog;
a5e3d614
BC
1144 if (check_profitability)
1145 {
cbb22e61
BC
1146 /* TH indicates the minimum niters of vectorized loop, while we
1147 compute the maximum niters of scalar loop. */
1148 th--;
a5e3d614
BC
1149 /* Peeling for constant times. */
1150 if (int_niters_prolog >= 0)
1151 {
cbb22e61
BC
1152 *bound_scalar = (int_niters_prolog + vfm1 < th
1153 ? th
1154 : vfm1 + int_niters_prolog);
1155 return build_int_cst (type, *bound_scalar);
a5e3d614 1156 }
cbb22e61
BC
1157 /* Peeling for unknown times. Note BOUND_PROLOG is the upper
1158 bound (inlcuded) of niters of prolog loop. */
1159 if (th >= vfm1 + bound_prolog)
a5e3d614 1160 {
cbb22e61 1161 *bound_scalar = th;
a5e3d614
BC
1162 return build_int_cst (type, th);
1163 }
cbb22e61
BC
1164 /* Need to do runtime comparison, but BOUND_SCALAR remains the same. */
1165 else if (th > vfm1)
a5e3d614
BC
1166 return fold_build2 (MAX_EXPR, type, build_int_cst (type, th), niters);
1167 }
1168 return niters;
1169}
1170
1171/* This function generates the following statements:
ebfd146a 1172
a5e3d614
BC
1173 niters = number of iterations loop executes (after peeling)
1174 niters_vector = niters / vf
d9157f15 1175
a5e3d614
BC
1176 and places them on the loop preheader edge. NITERS_NO_OVERFLOW is
1177 true if NITERS doesn't overflow. */
ebfd146a
IR
1178
1179void
a5e3d614
BC
1180vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
1181 tree *niters_vector_ptr, bool niters_no_overflow)
ebfd146a 1182{
a5e3d614 1183 tree ni_minus_gap, var;
7078979b 1184 tree niters_vector, type = TREE_TYPE (niters);
a5e3d614
BC
1185 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1186 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
7078979b 1187 tree log_vf = build_int_cst (type, exact_log2 (vf));
a5e3d614
BC
1188
1189 /* If epilogue loop is required because of data accesses with gaps, we
1190 subtract one iteration from the total number of iterations here for
1191 correct calculation of RATIO. */
1192 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1193 {
7078979b
BC
1194 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1195 build_one_cst (type));
a5e3d614
BC
1196 if (!is_gimple_val (ni_minus_gap))
1197 {
7078979b 1198 var = create_tmp_var (type, "ni_gap");
a5e3d614
BC
1199 gimple *stmts = NULL;
1200 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1201 true, var);
1202 gsi_insert_seq_on_edge_immediate (pe, stmts);
1203 }
1204 }
1205 else
1206 ni_minus_gap = niters;
1207
1208 /* Create: niters >> log2(vf) */
1209 /* If it's known that niters == number of latch executions + 1 doesn't
1210 overflow, we can generate niters >> log2(vf); otherwise we generate
1211 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1212 will be at least one. */
1213 if (niters_no_overflow)
7078979b 1214 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
a5e3d614
BC
1215 else
1216 niters_vector
7078979b
BC
1217 = fold_build2 (PLUS_EXPR, type,
1218 fold_build2 (RSHIFT_EXPR, type,
1219 fold_build2 (MINUS_EXPR, type, ni_minus_gap,
1220 build_int_cst (type, vf)),
a5e3d614 1221 log_vf),
7078979b 1222 build_int_cst (type, 1));
a5e3d614
BC
1223
1224 if (!is_gimple_val (niters_vector))
1225 {
7078979b
BC
1226 var = create_tmp_var (type, "bnd");
1227 gimple_seq stmts = NULL;
a5e3d614
BC
1228 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1229 gsi_insert_seq_on_edge_immediate (pe, stmts);
7078979b
BC
1230 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1231 we set range information to make niters analyzer's life easier. */
1232 if (stmts != NULL)
1233 set_range_info (niters_vector, VR_RANGE, build_int_cst (type, 1),
1234 fold_build2 (RSHIFT_EXPR, type,
1235 TYPE_MAX_VALUE (type), log_vf));
a5e3d614
BC
1236 }
1237 *niters_vector_ptr = niters_vector;
1238
1239 return;
1240}
1241
1242/* Given NITERS_VECTOR which is the number of iterations for vectorized
1243 loop specified by LOOP_VINFO after vectorization, compute the number
1244 of iterations before vectorization (niters_vector * vf) and store it
1245 to NITERS_VECTOR_MULT_VF_PTR. */
1246
1247static void
1248vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1249 tree niters_vector,
1250 tree *niters_vector_mult_vf_ptr)
1251{
1252 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
ebfd146a 1253 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614
BC
1254 tree type = TREE_TYPE (niters_vector);
1255 tree log_vf = build_int_cst (type, exact_log2 (vf));
1256 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a 1257
a5e3d614
BC
1258 gcc_assert (niters_vector_mult_vf_ptr != NULL);
1259 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
1260 niters_vector, log_vf);
1261 if (!is_gimple_val (niters_vector_mult_vf))
1262 {
1263 tree var = create_tmp_var (type, "niters_vector_mult_vf");
1264 gimple_seq stmts = NULL;
1265 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
1266 &stmts, true, var);
1267 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
1268 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1269 }
1270 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
1271}
1272
1273/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
1274 from SECOND/FIRST and puts it at the original loop's preheader/exit
1275 edge, the two loops are arranged as below:
1276
1277 preheader_a:
1278 first_loop:
1279 header_a:
1280 i_1 = PHI<i_0, i_2>;
1281 ...
1282 i_2 = i_1 + 1;
1283 if (cond_a)
1284 goto latch_a;
1285 else
1286 goto between_bb;
1287 latch_a:
1288 goto header_a;
1289
1290 between_bb:
1291 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
1292
1293 second_loop:
1294 header_b:
1295 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
1296 or with i_2 if no LCSSA phi is created
1297 under condition of CREATE_LCSSA_FOR_IV_PHIS.
1298 ...
1299 i_4 = i_3 + 1;
1300 if (cond_b)
1301 goto latch_b;
1302 else
1303 goto exit_bb;
1304 latch_b:
1305 goto header_b;
1306
1307 exit_bb:
1308
1309 This function creates loop closed SSA for the first loop; update the
1310 second loop's PHI nodes by replacing argument on incoming edge with the
1311 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
1312 is false, Loop closed ssa phis will only be created for non-iv phis for
1313 the first loop.
1314
1315 This function assumes exit bb of the first loop is preheader bb of the
1316 second loop, i.e, between_bb in the example code. With PHIs updated,
1317 the second loop will execute rest iterations of the first. */
ebfd146a 1318
a5e3d614
BC
1319static void
1320slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
1321 struct loop *first, struct loop *second,
1322 bool create_lcssa_for_iv_phis)
1323{
1324 gphi_iterator gsi_update, gsi_orig;
1325 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1326
1327 edge first_latch_e = EDGE_SUCC (first->latch, 0);
1328 edge second_preheader_e = loop_preheader_edge (second);
1329 basic_block between_bb = single_exit (first)->dest;
1330
1331 gcc_assert (between_bb == second_preheader_e->src);
1332 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
1333 /* Either the first loop or the second is the loop to be vectorized. */
1334 gcc_assert (loop == first || loop == second);
1335
1336 for (gsi_orig = gsi_start_phis (first->header),
1337 gsi_update = gsi_start_phis (second->header);
1338 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1339 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1340 {
1341 gphi *orig_phi = gsi_orig.phi ();
1342 gphi *update_phi = gsi_update.phi ();
1343
1344 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
1345 /* Generate lcssa PHI node for the first loop. */
1346 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
1347 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
1348 {
1349 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1350 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
1351 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
1352 arg = new_res;
1353 }
1354
1355 /* Update PHI node in the second loop by replacing arg on the loop's
1356 incoming edge. */
1357 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
1358 }
1359}
1360
1361/* Function slpeel_add_loop_guard adds guard skipping from the beginning
1362 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
1363 are two pred edges of the merge point before UPDATE_LOOP. The two loops
1364 appear like below:
1365
1366 guard_bb:
1367 if (cond)
1368 goto merge_bb;
1369 else
1370 goto skip_loop;
1371
1372 skip_loop:
1373 header_a:
1374 i_1 = PHI<i_0, i_2>;
1375 ...
1376 i_2 = i_1 + 1;
1377 if (cond_a)
1378 goto latch_a;
1379 else
1380 goto exit_a;
1381 latch_a:
1382 goto header_a;
1383
1384 exit_a:
1385 i_5 = PHI<i_2>;
1386
1387 merge_bb:
1388 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
1389
1390 update_loop:
1391 header_b:
1392 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
1393 ...
1394 i_4 = i_3 + 1;
1395 if (cond_b)
1396 goto latch_b;
1397 else
1398 goto exit_bb;
1399 latch_b:
1400 goto header_b;
1401
1402 exit_bb:
1403
1404 This function creates PHI nodes at merge_bb and replaces the use of i_5
1405 in the update_loop's PHI node with the result of new PHI result. */
1406
1407static void
1408slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
1409 struct loop *update_loop,
1410 edge guard_edge, edge merge_edge)
1411{
1412 source_location merge_loc, guard_loc;
1413 edge orig_e = loop_preheader_edge (skip_loop);
1414 edge update_e = loop_preheader_edge (update_loop);
1415 gphi_iterator gsi_orig, gsi_update;
1416
1417 for ((gsi_orig = gsi_start_phis (skip_loop->header),
1418 gsi_update = gsi_start_phis (update_loop->header));
1419 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
1420 gsi_next (&gsi_orig), gsi_next (&gsi_update))
1421 {
1422 gphi *orig_phi = gsi_orig.phi ();
1423 gphi *update_phi = gsi_update.phi ();
1424
1425 /* Generate new phi node at merge bb of the guard. */
1426 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
1427 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
1428
1429 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
1430 args in NEW_PHI for these edges. */
1431 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
1432 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1433 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
1434 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1435 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
1436 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
1437
1438 /* Update phi in UPDATE_PHI. */
1439 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
1440 }
1441}
1442
1443/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
1444 this function searches for the corresponding lcssa phi node in exit
1445 bb of LOOP. If it is found, return the phi result; otherwise return
1446 NULL. */
1447
1448static tree
1449find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
1450 gphi *lcssa_phi)
1451{
1452 gphi_iterator gsi;
1453 edge e = single_exit (loop);
1454
1455 gcc_assert (single_pred_p (e->dest));
1456 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1457 {
1458 gphi *phi = gsi.phi ();
1459 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
1460 PHI_ARG_DEF (lcssa_phi, 0), 0))
1461 return PHI_RESULT (phi);
1462 }
1463 return NULL_TREE;
1464}
1465
1466/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
1467 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
1468 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
1469 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
1470 The CFG looks like:
1471
1472 loop:
1473 header_a:
1474 i_1 = PHI<i_0, i_2>;
1475 ...
1476 i_2 = i_1 + 1;
1477 if (cond_a)
1478 goto latch_a;
1479 else
1480 goto exit_a;
1481 latch_a:
1482 goto header_a;
1483
1484 exit_a:
1485
1486 guard_bb:
1487 if (cond)
1488 goto merge_bb;
1489 else
1490 goto epilog_loop;
1491
1492 ;; fall_through_bb
1493
1494 epilog_loop:
1495 header_b:
1496 i_3 = PHI<i_2, i_4>;
1497 ...
1498 i_4 = i_3 + 1;
1499 if (cond_b)
1500 goto latch_b;
1501 else
1502 goto merge_bb;
1503 latch_b:
1504 goto header_b;
1505
1506 merge_bb:
1507 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
1508
1509 exit_bb:
1510 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
1511
1512 For each name used out side EPILOG (i.e - for each name that has a lcssa
1513 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
1514 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
1515 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
1516 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
1517 in exit_bb will also be updated. */
1518
1519static void
1520slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
1521 edge guard_edge, edge merge_edge)
1522{
1523 gphi_iterator gsi;
1524 basic_block merge_bb = guard_edge->dest;
1525
1526 gcc_assert (single_succ_p (merge_bb));
1527 edge e = single_succ_edge (merge_bb);
1528 basic_block exit_bb = e->dest;
1529 gcc_assert (single_pred_p (exit_bb));
1530 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
1531
1532 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1533 {
1534 gphi *update_phi = gsi.phi ();
1535 tree old_arg = PHI_ARG_DEF (update_phi, 0);
1536 /* This loop-closed-phi actually doesn't represent a use out of the
1537 loop - the phi arg is a constant. */
1538 if (TREE_CODE (old_arg) != SSA_NAME)
1539 continue;
1540
1541 tree merge_arg = get_current_def (old_arg);
1542 if (!merge_arg)
1543 merge_arg = old_arg;
1544
1545 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
1546 /* If the var is live after loop but not a reduction, we simply
1547 use the old arg. */
1548 if (!guard_arg)
1549 guard_arg = old_arg;
1550
1551 /* Create new phi node in MERGE_BB: */
1552 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
1553 gphi *merge_phi = create_phi_node (new_res, merge_bb);
1554
1555 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
1556 the two PHI args in merge_phi for these edges. */
1557 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
1558 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
1559
1560 /* Update the original phi in exit_bb. */
1561 adjust_phi_and_debug_stmts (update_phi, e, new_res);
1562 }
1563}
1564
1565/* EPILOG loop is duplicated from the original loop for vectorizing,
1566 the arg of its loop closed ssa PHI needs to be updated. */
1567
1568static void
1569slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
1570{
1571 gphi_iterator gsi;
1572 basic_block exit_bb = single_exit (epilog)->dest;
1573
1574 gcc_assert (single_pred_p (exit_bb));
1575 edge e = EDGE_PRED (exit_bb, 0);
1576 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1577 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
1578}
1579
1580/* Function vect_do_peeling.
1581
1582 Input:
1583 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
1584
1585 preheader:
1586 LOOP:
1587 header_bb:
1588 loop_body
1589 if (exit_loop_cond) goto exit_bb
1590 else goto header_bb
1591 exit_bb:
1592
1593 - NITERS: The number of iterations of the loop.
1594 - NITERSM1: The number of iterations of the loop's latch.
1595 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
1596 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
1597 CHECK_PROFITABILITY is true.
1598 Output:
1599 - NITERS_VECTOR: The number of iterations of loop after vectorization.
1600
1601 This function peels prolog and epilog from the loop, adds guards skipping
1602 PROLOG and EPILOG for various conditions. As a result, the changed CFG
1603 would look like:
1604
1605 guard_bb_1:
1606 if (prefer_scalar_loop) goto merge_bb_1
1607 else goto guard_bb_2
1608
1609 guard_bb_2:
1610 if (skip_prolog) goto merge_bb_2
1611 else goto prolog_preheader
1612
1613 prolog_preheader:
1614 PROLOG:
1615 prolog_header_bb:
1616 prolog_body
1617 if (exit_prolog_cond) goto prolog_exit_bb
1618 else goto prolog_header_bb
1619 prolog_exit_bb:
1620
1621 merge_bb_2:
1622
1623 vector_preheader:
1624 VECTOR LOOP:
1625 vector_header_bb:
1626 vector_body
1627 if (exit_vector_cond) goto vector_exit_bb
1628 else goto vector_header_bb
1629 vector_exit_bb:
1630
1631 guard_bb_3:
1632 if (skip_epilog) goto merge_bb_3
1633 else goto epilog_preheader
1634
1635 merge_bb_1:
1636
1637 epilog_preheader:
1638 EPILOG:
1639 epilog_header_bb:
1640 epilog_body
1641 if (exit_epilog_cond) goto merge_bb_3
1642 else goto epilog_header_bb
1643
1644 merge_bb_3:
1645
1646 Note this function peels prolog and epilog only if it's necessary,
1647 as well as guards.
598eaaa2 1648 Returns created epilogue or NULL.
a5e3d614
BC
1649
1650 TODO: Guard for prefer_scalar_loop should be emitted along with
1651 versioning conditions if loop versioning is needed. */
1652
598eaaa2
YR
1653
1654struct loop *
a5e3d614
BC
1655vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
1656 tree *niters_vector, int th, bool check_profitability,
1657 bool niters_no_overflow)
1658{
1659 edge e, guard_e;
1660 tree type = TREE_TYPE (niters), guard_cond;
1661 basic_block guard_bb, guard_to;
357067f2 1662 profile_probability prob_prolog, prob_vector, prob_epilog;
cbb22e61 1663 int bound_prolog = 0, bound_scalar = 0, bound = 0;
a5e3d614
BC
1664 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1665 int prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
1666 bool epilog_peeling = (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
1667 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
1668
1669 if (!prolog_peeling && !epilog_peeling)
598eaaa2 1670 return NULL;
a5e3d614 1671
357067f2 1672 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
a5e3d614
BC
1673 if ((vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo)) == 2)
1674 vf = 3;
357067f2
JH
1675 prob_prolog = prob_epilog = profile_probability::guessed_always ()
1676 .apply_scale (vf - 1, vf);
a5e3d614
BC
1677 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1678
598eaaa2 1679 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614 1680 struct loop *first_loop = loop;
3e907b90 1681 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
a5e3d614
BC
1682 create_lcssa_for_virtual_phi (loop);
1683 update_ssa (TODO_update_ssa_only_virtuals);
1684
1685 if (MAY_HAVE_DEBUG_STMTS)
1686 {
1687 gcc_assert (!adjust_vec.exists ());
1688 adjust_vec.create (32);
1689 }
ebfd146a
IR
1690 initialize_original_copy_tables ();
1691
a5e3d614
BC
1692 /* Prolog loop may be skipped. */
1693 bool skip_prolog = (prolog_peeling != 0);
704c28ee
BC
1694 /* Skip to epilog if scalar loop may be preferred. It's only needed
1695 when we peel for epilog loop and when it hasn't been checked with
1696 loop versioning. */
1697 bool skip_vector = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1698 && !LOOP_REQUIRES_VERSIONING (loop_vinfo));
a5e3d614
BC
1699 /* Epilog loop must be executed if the number of iterations for epilog
1700 loop is known at compile time, otherwise we need to add a check at
1701 the end of vector loop and skip to the end of epilog loop. */
1702 bool skip_epilog = (prolog_peeling < 0
1703 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1704 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
1705 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1706 skip_epilog = false;
1707
1708 /* Record the anchor bb at which guard should be placed if scalar loop
1709 may be preferred. */
1710 basic_block anchor = loop_preheader_edge (loop)->src;
1711 if (skip_vector)
25c99850
BC
1712 {
1713 split_edge (loop_preheader_edge (loop));
1714
1715 /* Due to the order in which we peel prolog and epilog, we first
1716 propagate probability to the whole loop. The purpose is to
1717 avoid adjusting probabilities of both prolog and vector loops
1718 separately. Note in this case, the probability of epilog loop
1719 needs to be scaled back later. */
1720 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
357067f2 1721 if (prob_vector.initialized_p ())
af2bbc51
JH
1722 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
1723 scale_loop_profile (loop, prob_vector, bound);
25c99850 1724 }
a5e3d614
BC
1725
1726 tree niters_prolog = build_int_cst (type, 0);
1727 source_location loop_loc = find_loop_location (loop);
1728 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
1729 if (prolog_peeling)
5d2eb24b 1730 {
a5e3d614
BC
1731 e = loop_preheader_edge (loop);
1732 if (!slpeel_can_duplicate_loop_p (loop, e))
5d2eb24b 1733 {
a5e3d614
BC
1734 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1735 "loop can't be duplicated to preheader edge.\n");
1736 gcc_unreachable ();
1737 }
1738 /* Peel prolog and put it on preheader edge of loop. */
1739 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1740 if (!prolog)
1741 {
1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1743 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1744 gcc_unreachable ();
1745 }
1746 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
1747 first_loop = prolog;
1748 reset_original_copy_tables ();
1749
1750 /* Generate and update the number of iterations for prolog loop. */
1751 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
1752 &bound_prolog);
1753 slpeel_make_loop_iterate_ntimes (prolog, niters_prolog);
1754
1755 /* Skip the prolog loop. */
1756 if (skip_prolog)
1757 {
1758 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1759 niters_prolog, build_int_cst (type, 0));
1760 guard_bb = loop_preheader_edge (prolog)->src;
25c99850 1761 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
a5e3d614
BC
1762 guard_to = split_edge (loop_preheader_edge (loop));
1763 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1764 guard_to, guard_bb,
357067f2 1765 prob_prolog.invert (),
3e907b90 1766 irred_flag);
a5e3d614
BC
1767 e = EDGE_PRED (guard_to, 0);
1768 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1769 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
25c99850 1770
af2bbc51
JH
1771 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
1772 scale_loop_profile (prolog, prob_prolog, bound_prolog);
a5e3d614
BC
1773 }
1774 /* Update init address of DRs. */
1775 vect_update_inits_of_drs (loop_vinfo, niters_prolog);
1776 /* Update niters for vector loop. */
1777 LOOP_VINFO_NITERS (loop_vinfo)
1778 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
1779 LOOP_VINFO_NITERSM1 (loop_vinfo)
1780 = fold_build2 (MINUS_EXPR, type,
1781 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
7078979b
BC
1782 bool new_var_p = false;
1783 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
1784 /* It's guaranteed that vector loop bound before vectorization is at
1785 least VF, so set range information for newly generated var. */
1786 if (new_var_p)
1787 set_range_info (niters, VR_RANGE,
1788 build_int_cst (type, vf), TYPE_MAX_VALUE (type));
a5e3d614 1789
cbb22e61
BC
1790 /* Prolog iterates at most bound_prolog times, latch iterates at
1791 most bound_prolog - 1 times. */
1792 record_niter_bound (prolog, bound_prolog - 1, false, true);
a5e3d614
BC
1793 delete_update_ssa ();
1794 adjust_vec_debug_stmts ();
1795 scev_reset ();
5d2eb24b
IR
1796 }
1797
a5e3d614
BC
1798 if (epilog_peeling)
1799 {
1800 e = single_exit (loop);
1801 if (!slpeel_can_duplicate_loop_p (loop, e))
1802 {
1803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1804 "loop can't be duplicated to exit edge.\n");
1805 gcc_unreachable ();
1806 }
1807 /* Peel epilog and put it on exit edge of loop. */
1808 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
1809 if (!epilog)
1810 {
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
1812 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
1813 gcc_unreachable ();
1814 }
1815 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
1816
1817 /* Scalar version loop may be preferred. In this case, add guard
1818 and skip to epilog. Note this only happens when the number of
1819 iterations of loop is unknown at compile time, otherwise this
1820 won't be vectorized. */
1821 if (skip_vector)
1822 {
cbb22e61
BC
1823 /* Additional epilogue iteration is peeled if gap exists. */
1824 bool peel_for_gaps = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
a5e3d614 1825 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
cbb22e61
BC
1826 bound_prolog,
1827 peel_for_gaps ? vf : vf - 1,
1828 th, &bound_scalar,
a5e3d614 1829 check_profitability);
cbb22e61
BC
1830 /* Build guard against NITERSM1 since NITERS may overflow. */
1831 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
a5e3d614
BC
1832 guard_bb = anchor;
1833 guard_to = split_edge (loop_preheader_edge (epilog));
1834 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
1835 guard_to, guard_bb,
357067f2 1836 prob_vector.invert (),
3e907b90 1837 irred_flag);
a5e3d614
BC
1838 e = EDGE_PRED (guard_to, 0);
1839 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
1840 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
25c99850
BC
1841
1842 /* Simply propagate profile info from guard_bb to guard_to which is
1843 a merge point of control flow. */
1844 guard_to->frequency = guard_bb->frequency;
1845 guard_to->count = guard_bb->count;
1846 single_succ_edge (guard_to)->count = guard_to->count;
af2bbc51
JH
1847 /* Scale probability of epilog loop back.
1848 FIXME: We should avoid scaling down and back up. Profile may
1849 get lost if we scale down to 0. */
357067f2
JH
1850 int scale_up = REG_BR_PROB_BASE * REG_BR_PROB_BASE
1851 / prob_vector.to_reg_br_prob_base ();
10ea2672
JH
1852 basic_block *bbs = get_loop_body (epilog);
1853 scale_bbs_frequencies_int (bbs, epilog->num_nodes, scale_up,
af2bbc51
JH
1854 REG_BR_PROB_BASE);
1855 free (bbs);
a5e3d614 1856 }
ebfd146a 1857
25c99850 1858 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
a5e3d614
BC
1859 tree niters_vector_mult_vf;
1860 /* If loop is peeled for non-zero constant times, now niters refers to
1861 orig_niters - prolog_peeling, it won't overflow even the orig_niters
1862 overflows. */
1863 niters_no_overflow |= (prolog_peeling > 0);
1864 vect_gen_vector_loop_niters (loop_vinfo, niters,
1865 niters_vector, niters_no_overflow);
1866 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
1867 &niters_vector_mult_vf);
1868 /* Update IVs of original loop as if they were advanced by
1869 niters_vector_mult_vf steps. */
1870 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
1871 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
1872 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
1873 update_e);
1874
1875 if (skip_epilog)
1876 {
1877 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
1878 niters, niters_vector_mult_vf);
1879 guard_bb = single_exit (loop)->dest;
1880 guard_to = split_edge (single_exit (epilog));
1881 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
1882 skip_vector ? anchor : guard_bb,
357067f2 1883 prob_epilog.invert (),
3e907b90 1884 irred_flag);
a5e3d614
BC
1885 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
1886 single_exit (epilog));
25c99850
BC
1887 /* Only need to handle basic block before epilog loop if it's not
1888 the guard_bb, which is the case when skip_vector is true. */
1889 if (guard_bb != bb_before_epilog)
1890 {
357067f2 1891 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
25c99850 1892
af2bbc51 1893 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
25c99850 1894 }
af2bbc51 1895 scale_loop_profile (epilog, prob_epilog, bound);
a5e3d614
BC
1896 }
1897 else
1898 slpeel_update_phi_nodes_for_lcssa (epilog);
ebfd146a 1899
cbb22e61 1900 bound = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? vf - 1 : vf - 2;
a5e3d614 1901 /* We share epilog loop with scalar version loop. */
cbb22e61
BC
1902 bound = MAX (bound, bound_scalar - 1);
1903 record_niter_bound (epilog, bound, false, true);
a5e3d614
BC
1904
1905 delete_update_ssa ();
1906 adjust_vec_debug_stmts ();
1907 scev_reset ();
1908 }
1909 adjust_vec.release ();
ebfd146a 1910 free_original_copy_tables ();
598eaaa2
YR
1911
1912 return epilog;
ebfd146a
IR
1913}
1914
01d32b2b
BC
1915/* Function vect_create_cond_for_niters_checks.
1916
1917 Create a conditional expression that represents the run-time checks for
1918 loop's niter. The loop is guaranteed to to terminate if the run-time
1919 checks hold.
1920
1921 Input:
1922 COND_EXPR - input conditional expression. New conditions will be chained
1923 with logical AND operation. If it is NULL, then the function
1924 is used to return the number of alias checks.
1925 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
1926 to be checked.
1927
1928 Output:
1929 COND_EXPR - conditional expression.
1930
1931 The returned COND_EXPR is the conditional expression to be used in the
1932 if statement that controls which version of the loop gets executed at
1933 runtime. */
1934
1935static void
1936vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
1937{
1938 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
1939
1940 if (*cond_expr)
1941 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1942 *cond_expr, part_cond_expr);
1943 else
1944 *cond_expr = part_cond_expr;
1945}
ebfd146a
IR
1946
1947/* Function vect_create_cond_for_align_checks.
1948
1949 Create a conditional expression that represents the alignment checks for
1950 all of data references (array element references) whose alignment must be
1951 checked at runtime.
1952
1953 Input:
1954 COND_EXPR - input conditional expression. New conditions will be chained
1955 with logical AND operation.
1956 LOOP_VINFO - two fields of the loop information are used.
1957 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
1958 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
1959
1960 Output:
1961 COND_EXPR_STMT_LIST - statements needed to construct the conditional
1962 expression.
1963 The returned value is the conditional expression to be used in the if
1964 statement that controls which version of the loop gets executed at runtime.
1965
1966 The algorithm makes two assumptions:
1967 1) The number of bytes "n" in a vector is a power of 2.
1968 2) An address "a" is aligned if a%n is zero and that this
1969 test can be done as a&(n-1) == 0. For example, for 16
1970 byte vectors the test is a&0xf == 0. */
1971
1972static void
1973vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
1974 tree *cond_expr,
1975 gimple_seq *cond_expr_stmt_list)
1976{
355fe088 1977 vec<gimple *> may_misalign_stmts
ebfd146a 1978 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
355fe088 1979 gimple *ref_stmt;
ebfd146a
IR
1980 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
1981 tree mask_cst;
1982 unsigned int i;
ebfd146a
IR
1983 tree int_ptrsize_type;
1984 char tmp_name[20];
1985 tree or_tmp_name = NULL_TREE;
83d5977e 1986 tree and_tmp_name;
355fe088 1987 gimple *and_stmt;
ebfd146a
IR
1988 tree ptrsize_zero;
1989 tree part_cond_expr;
1990
1991 /* Check that mask is one less than a power of 2, i.e., mask is
1992 all zeros followed by all ones. */
1993 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
1994
96f9265a 1995 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
1996
1997 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
1998 of the first vector of the i'th data reference. */
1999
9771b263 2000 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2001 {
2002 gimple_seq new_stmt_list = NULL;
2003 tree addr_base;
83d5977e
RG
2004 tree addr_tmp_name;
2005 tree new_or_tmp_name;
355fe088 2006 gimple *addr_stmt, *or_stmt;
d8ba5b19
RG
2007 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2008 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2009 bool negative = tree_int_cst_compare
2010 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2011 tree offset = negative
aad83b7c 2012 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
ebfd146a
IR
2013
2014 /* create: addr_tmp = (int)(address_of_first_vector) */
2015 addr_base =
2016 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
3f5e8a76 2017 offset);
ebfd146a
IR
2018 if (new_stmt_list != NULL)
2019 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2020
83d5977e
RG
2021 sprintf (tmp_name, "addr2int%d", i);
2022 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2023 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2024 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2025
2026 /* The addresses are OR together. */
2027
2028 if (or_tmp_name != NULL_TREE)
2029 {
2030 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2031 sprintf (tmp_name, "orptrs%d", i);
2032 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2033 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2034 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2035 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2036 or_tmp_name = new_or_tmp_name;
2037 }
2038 else
2039 or_tmp_name = addr_tmp_name;
2040
2041 } /* end for i */
2042
2043 mask_cst = build_int_cst (int_ptrsize_type, mask);
2044
2045 /* create: and_tmp = or_tmp & mask */
83d5977e 2046 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2047
0d0e4a03
JJ
2048 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2049 or_tmp_name, mask_cst);
ebfd146a
IR
2050 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2051
2052 /* Make and_tmp the left operand of the conditional test against zero.
2053 if and_tmp has a nonzero bit then some address is unaligned. */
2054 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2055 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2056 and_tmp_name, ptrsize_zero);
2057 if (*cond_expr)
2058 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2059 *cond_expr, part_cond_expr);
2060 else
2061 *cond_expr = part_cond_expr;
2062}
2063
ebfd146a
IR
2064/* Function vect_create_cond_for_alias_checks.
2065
2066 Create a conditional expression that represents the run-time checks for
2067 overlapping of address ranges represented by a list of data references
2068 relations passed as input.
2069
2070 Input:
2071 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2072 with logical AND operation. If it is NULL, then the function
2073 is used to return the number of alias checks.
ebfd146a
IR
2074 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2075 to be checked.
2076
2077 Output:
2078 COND_EXPR - conditional expression.
ebfd146a 2079
a05a89fa 2080 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2081 statement that controls which version of the loop gets executed at runtime.
2082*/
2083
a05a89fa 2084void
4bdd44c4 2085vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2086{
93bdc3ed 2087 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa 2088 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
ebfd146a 2089
a05a89fa 2090 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2091 return;
2092
9cbd2d97
BC
2093 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2094 &comp_alias_ddrs, cond_expr);
73fbfcad 2095 if (dump_enabled_p ())
ccb3ad87 2096 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2097 "created %u versioning for alias checks.\n",
a05a89fa 2098 comp_alias_ddrs.length ());
ebfd146a
IR
2099}
2100
2101
2102/* Function vect_loop_versioning.
b8698a0f 2103
ebfd146a
IR
2104 If the loop has data references that may or may not be aligned or/and
2105 has data reference relations whose independence was not proven then
2106 two versions of the loop need to be generated, one which is vectorized
2107 and one which isn't. A test is then generated to control which of the
2108 loops is executed. The test checks for the alignment of all of the
2109 data references that may or may not be aligned. An additional
2110 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2111 independence was not proven. The vectorized version of loop is
2112 executed only if both alias and alignment tests are passed.
2113
ebfd146a 2114 The test generated to check which version of loop is executed
b8698a0f 2115 is modified to also check for profitability as indicated by the
d9157f15 2116 cost model threshold TH.
86290011
RG
2117
2118 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2119 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2120
2121void
368117e8
RG
2122vect_loop_versioning (loop_vec_info loop_vinfo,
2123 unsigned int th, bool check_profitability)
ebfd146a 2124{
01d32b2b 2125 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
5ce9450f 2126 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2127 basic_block condition_bb;
538dd0b7
DM
2128 gphi_iterator gsi;
2129 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2130 basic_block merge_bb;
2131 basic_block new_exit_bb;
2132 edge new_exit_e, e;
538dd0b7 2133 gphi *orig_phi, *new_phi;
368117e8 2134 tree cond_expr = NULL_TREE;
d68d56b5 2135 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 2136 tree arg;
af2bbc51 2137 profile_probability prob = profile_probability::likely ();
ebfd146a 2138 gimple_seq gimplify_stmt_list = NULL;
fdd29374 2139 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
9cc1fb4b
XDL
2140 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2141 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
01d32b2b 2142 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
ebfd146a 2143
368117e8 2144 if (check_profitability)
80be3333 2145 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
01d32b2b 2146 build_int_cst (TREE_TYPE (scalar_loop_iters),
fdd29374 2147 th - 1));
01d32b2b
BC
2148
2149 if (version_niter)
2150 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
2151
2152 if (cond_expr)
2153 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
2154 is_gimple_condexpr, NULL_TREE);
ebfd146a 2155
9cc1fb4b 2156 if (version_align)
d68d56b5
RG
2157 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
2158 &cond_expr_stmt_list);
ebfd146a 2159
9cc1fb4b 2160 if (version_alias)
4bdd44c4 2161 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
86290011 2162
d68d56b5
RG
2163 cond_expr = force_gimple_operand_1 (cond_expr, &gimplify_stmt_list,
2164 is_gimple_condexpr, NULL_TREE);
2165 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
2166
2167 initialize_original_copy_tables ();
5ce9450f
JJ
2168 if (scalar_loop)
2169 {
2170 edge scalar_e;
2171 basic_block preheader, scalar_preheader;
2172
2173 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
2174 scale LOOP's frequencies instead. */
5d3ebb71 2175 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
af2bbc51
JH
2176 prob, prob.invert (), prob, prob.invert (), true);
2177 scale_loop_frequencies (loop, prob);
5ce9450f
JJ
2178 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
2179 while we need to move it above LOOP's preheader. */
2180 e = loop_preheader_edge (loop);
2181 scalar_e = loop_preheader_edge (scalar_loop);
2182 gcc_assert (empty_block_p (e->src)
2183 && single_pred_p (e->src));
2184 gcc_assert (empty_block_p (scalar_e->src)
2185 && single_pred_p (scalar_e->src));
2186 gcc_assert (single_pred_p (condition_bb));
2187 preheader = e->src;
2188 scalar_preheader = scalar_e->src;
2189 scalar_e = find_edge (condition_bb, scalar_preheader);
2190 e = single_pred_edge (preheader);
2191 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
2192 scalar_preheader);
2193 redirect_edge_and_branch_force (scalar_e, preheader);
2194 redirect_edge_and_branch_force (e, condition_bb);
2195 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
2196 single_pred (condition_bb));
2197 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
2198 single_pred (scalar_preheader));
2199 set_immediate_dominator (CDI_DOMINATORS, preheader,
2200 condition_bb);
2201 }
2202 else
01d32b2b 2203 nloop = loop_version (loop, cond_expr, &condition_bb,
af2bbc51 2204 prob, prob.invert (), prob, prob.invert (), true);
01d32b2b
BC
2205
2206 if (version_niter)
2207 {
2208 /* The versioned loop could be infinite, we need to clear existing
2209 niter information which is copied from the original loop. */
2210 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
2211 vect_free_loop_info_assumptions (nloop);
2212 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
2213 loop_constraint_set (loop, LOOP_C_INFINITE);
2214 }
9cc1fb4b 2215
b05e0233 2216 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
2217 && dump_enabled_p ())
2218 {
2219 if (version_alias)
2220 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2221 "loop versioned for vectorization because of "
2222 "possible aliasing\n");
2223 if (version_align)
2224 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2225 "loop versioned for vectorization to enhance "
2226 "alignment\n");
2227
2228 }
c3284718 2229 free_original_copy_tables ();
ebfd146a 2230
b8698a0f 2231 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
2232 vectorization - that the loop exit block has a single predecessor.
2233 After versioning, the exit block of both loop versions is the same
2234 basic block (i.e. it has two predecessors). Just in order to simplify
2235 following transformations in the vectorizer, we fix this situation
2236 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
2237 with the proper loop-exit phis to maintain loop-closed-form.
2238 If loop versioning wasn't done from loop, but scalar_loop instead,
2239 merge_bb will have already just a single successor. */
b8698a0f 2240
ebfd146a 2241 merge_bb = single_exit (loop)->dest;
5ce9450f 2242 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 2243 {
5ce9450f
JJ
2244 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
2245 new_exit_bb = split_edge (single_exit (loop));
2246 new_exit_e = single_exit (loop);
2247 e = EDGE_SUCC (new_exit_bb, 0);
2248
2249 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2250 {
2251 tree new_res;
538dd0b7 2252 orig_phi = gsi.phi ();
b731b390 2253 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
2254 new_phi = create_phi_node (new_res, new_exit_bb);
2255 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
2256 add_phi_arg (new_phi, arg, new_exit_e,
2257 gimple_phi_arg_location_from_edge (orig_phi, e));
2258 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
2259 }
b8698a0f 2260 }
ebfd146a
IR
2261
2262 /* End loop-exit-fixes after versioning. */
2263
d68d56b5 2264 if (cond_expr_stmt_list)
ebfd146a
IR
2265 {
2266 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 2267 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 2268 GSI_SAME_STMT);
ebfd146a 2269 }
90eb75f2 2270 update_ssa (TODO_update_ssa);
ebfd146a 2271}