]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
Restore correct iv step for fully-masked loops
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
48e1416a 1/* Vectorizer Specific Loop Manipulations
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
b20a8bb4 26#include "tree.h"
9ef16211 27#include "gimple.h"
7c29e30e 28#include "cfghooks.h"
29#include "tree-pass.h"
9ef16211 30#include "ssa.h"
9ef16211 31#include "fold-const.h"
94ea8568 32#include "cfganal.h"
a8783bee 33#include "gimplify.h"
dcf1a1ec 34#include "gimple-iterator.h"
e795d6e1 35#include "gimplify-me.h"
073c1fd5 36#include "tree-cfg.h"
05d9c18a 37#include "tree-ssa-loop-manip.h"
073c1fd5 38#include "tree-into-ssa.h"
69ee5dbb 39#include "tree-ssa.h"
fb85abff 40#include "cfgloop.h"
fb85abff 41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
12117f39 43#include "tree-ssa-loop-ivopts.h"
cde959e7 44#include "gimple-fold.h"
60b29a7e 45#include "tree-ssa-loop-niter.h"
46#include "internal-fn.h"
47#include "stor-layout.h"
48#include "optabs-query.h"
49#include "vec-perm-indices.h"
fb85abff 50
51/*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58/* Renames the use *OP_P. */
59
60static void
61rename_use_op (use_operand_p op_p)
62{
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77}
78
79
ccd0a9f9 80/* Renames the variables in basic block BB. Allow renaming of PHI arguments
5ee742c4 81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
82 true. */
fb85abff 83
c9b2c569 84static void
5ee742c4 85rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
fb85abff 86{
42acab1c 87 gimple *stmt;
fb85abff 88 use_operand_p use_p;
89 ssa_op_iter iter;
90 edge e;
91 edge_iterator ei;
92 struct loop *loop = bb->loop_father;
5ee742c4 93 struct loop *outer_loop = NULL;
94
95 if (rename_from_outer_loop)
96 {
97 gcc_assert (loop);
98 outer_loop = loop_outer (loop);
99 }
fb85abff 100
1a91d914 101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
102 gsi_next (&gsi))
fb85abff 103 {
104 stmt = gsi_stmt (gsi);
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
106 rename_use_op (use_p);
107 }
108
c9b2c569 109 FOR_EACH_EDGE (e, ei, bb->preds)
fb85abff 110 {
ccd0a9f9 111 if (!flow_bb_inside_loop_p (loop, e->src))
112 {
113 if (!rename_from_outer_loop)
114 continue;
115 if (e->src != outer_loop->header)
116 {
117 if (outer_loop->inner->next)
118 {
119 /* If outer_loop has 2 inner loops, allow there to
120 be an extra basic block which decides which of the
121 two loops to use using LOOP_VECTORIZED. */
122 if (!single_pred_p (e->src)
123 || single_pred (e->src) != outer_loop->header)
124 continue;
125 }
ccd0a9f9 126 }
127 }
1a91d914 128 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
129 gsi_next (&gsi))
130 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
fb85abff 131 }
132}
133
134
6dc50383 135struct adjust_info
b123eaab 136{
137 tree from, to;
138 basic_block bb;
6dc50383 139};
b123eaab 140
b123eaab 141/* A stack of values to be adjusted in debug stmts. We have to
142 process them LIFO, so that the closest substitution applies. If we
143 processed them FIFO, without the stack, we might substitute uses
144 with a PHI DEF that would soon become non-dominant, and when we got
145 to the suitable one, it wouldn't have anything to substitute any
146 more. */
d70aebca 147static vec<adjust_info, va_heap> adjust_vec;
b123eaab 148
149/* Adjust any debug stmts that referenced AI->from values to use the
150 loop-closed AI->to, if the references are dominated by AI->bb and
151 not by the definition of AI->from. */
152
153static void
154adjust_debug_stmts_now (adjust_info *ai)
155{
156 basic_block bbphi = ai->bb;
157 tree orig_def = ai->from;
158 tree new_def = ai->to;
159 imm_use_iterator imm_iter;
42acab1c 160 gimple *stmt;
b123eaab 161 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
162
163 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
164
165 /* Adjust any debug stmts that held onto non-loop-closed
166 references. */
167 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
168 {
169 use_operand_p use_p;
170 basic_block bbuse;
171
172 if (!is_gimple_debug (stmt))
173 continue;
174
175 gcc_assert (gimple_debug_bind_p (stmt));
176
177 bbuse = gimple_bb (stmt);
178
179 if ((bbuse == bbphi
180 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
181 && !(bbuse == bbdef
182 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
183 {
184 if (new_def)
185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
186 SET_USE (use_p, new_def);
187 else
188 {
189 gimple_debug_bind_reset_value (stmt);
190 update_stmt (stmt);
191 }
192 }
193 }
194}
195
196/* Adjust debug stmts as scheduled before. */
197
198static void
199adjust_vec_debug_stmts (void)
200{
c64f38bf 201 if (!MAY_HAVE_DEBUG_BIND_STMTS)
b123eaab 202 return;
203
f1f41a6c 204 gcc_assert (adjust_vec.exists ());
b123eaab 205
f1f41a6c 206 while (!adjust_vec.is_empty ())
b123eaab 207 {
f1f41a6c 208 adjust_debug_stmts_now (&adjust_vec.last ());
209 adjust_vec.pop ();
b123eaab 210 }
b123eaab 211}
212
213/* Adjust any debug stmts that referenced FROM values to use the
214 loop-closed TO, if the references are dominated by BB and not by
215 the definition of FROM. If adjust_vec is non-NULL, adjustments
216 will be postponed until adjust_vec_debug_stmts is called. */
217
218static void
219adjust_debug_stmts (tree from, tree to, basic_block bb)
220{
221 adjust_info ai;
222
c64f38bf 223 if (MAY_HAVE_DEBUG_BIND_STMTS
0087edc6 224 && TREE_CODE (from) == SSA_NAME
2510e5cd 225 && ! SSA_NAME_IS_DEFAULT_DEF (from)
0087edc6 226 && ! virtual_operand_p (from))
b123eaab 227 {
228 ai.from = from;
229 ai.to = to;
230 ai.bb = bb;
231
f1f41a6c 232 if (adjust_vec.exists ())
233 adjust_vec.safe_push (ai);
b123eaab 234 else
235 adjust_debug_stmts_now (&ai);
236 }
237}
238
239/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
240 to adjust any debug stmts that referenced the old phi arg,
241 presumably non-loop-closed references left over from other
242 transformations. */
243
244static void
42acab1c 245adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
b123eaab 246{
247 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
248
249 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
250
c64f38bf 251 if (MAY_HAVE_DEBUG_BIND_STMTS)
b123eaab 252 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
253 gimple_bb (update_phi));
254}
255
60b29a7e 256/* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
257 the mask should have during the first iteration and NEXT_MASK is the
258 value that it should have on subsequent iterations. */
cde959e7 259
60b29a7e 260static void
261vect_set_loop_mask (struct loop *loop, tree mask, tree init_mask,
262 tree next_mask)
263{
264 gphi *phi = create_phi_node (mask, loop->header);
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
267}
cde959e7 268
60b29a7e 269/* Add SEQ to the end of LOOP's preheader block. */
fb85abff 270
60b29a7e 271static void
272add_preheader_seq (struct loop *loop, gimple_seq seq)
273{
274 if (seq)
275 {
276 edge pe = loop_preheader_edge (loop);
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
278 gcc_assert (!new_bb);
279 }
280}
fb85abff 281
60b29a7e 282/* Add SEQ to the beginning of LOOP's header block. */
283
284static void
285add_header_seq (struct loop *loop, gimple_seq seq)
286{
287 if (seq)
288 {
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
291 }
292}
293
294/* Return true if the target can interleave elements of two vectors.
295 OFFSET is 0 if the first half of the vectors should be interleaved
296 or 1 if the second half should. When returning true, store the
297 associated permutation in INDICES. */
298
299static bool
300interleave_supported_p (vec_perm_indices *indices, tree vectype,
301 unsigned int offset)
302{
303 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
304 poly_uint64 base = exact_div (nelts, 2) * offset;
305 vec_perm_builder sel (nelts, 2, 3);
306 for (unsigned int i = 0; i < 3; ++i)
307 {
308 sel.quick_push (base + i);
309 sel.quick_push (base + i + nelts);
310 }
311 indices->new_vector (sel, 2, nelts);
312 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
313}
314
315/* Try to use permutes to define the masks in DEST_RGM using the masks
316 in SRC_RGM, given that the former has twice as many masks as the
317 latter. Return true on success, adding any new statements to SEQ. */
318
319static bool
320vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
321 rgroup_masks *src_rgm)
322{
323 tree src_masktype = src_rgm->mask_type;
324 tree dest_masktype = dest_rgm->mask_type;
325 machine_mode src_mode = TYPE_MODE (src_masktype);
326 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
327 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
328 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
329 {
330 /* Unpacking the source masks gives at least as many mask bits as
331 we need. We can then VIEW_CONVERT any excess bits away. */
332 tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
333 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
334 {
335 tree src = src_rgm->masks[i / 2];
336 tree dest = dest_rgm->masks[i];
88e81b08 337 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
338 ? VEC_UNPACK_HI_EXPR
60b29a7e 339 : VEC_UNPACK_LO_EXPR);
340 gassign *stmt;
341 if (dest_masktype == unpack_masktype)
342 stmt = gimple_build_assign (dest, code, src);
343 else
344 {
345 tree temp = make_ssa_name (unpack_masktype);
346 stmt = gimple_build_assign (temp, code, src);
347 gimple_seq_add_stmt (seq, stmt);
348 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
349 build1 (VIEW_CONVERT_EXPR,
350 dest_masktype, temp));
351 }
352 gimple_seq_add_stmt (seq, stmt);
353 }
354 return true;
355 }
356 vec_perm_indices indices[2];
357 if (dest_masktype == src_masktype
358 && interleave_supported_p (&indices[0], src_masktype, 0)
359 && interleave_supported_p (&indices[1], src_masktype, 1))
360 {
361 /* The destination requires twice as many mask bits as the source, so
362 we can use interleaving permutes to double up the number of bits. */
363 tree masks[2];
364 for (unsigned int i = 0; i < 2; ++i)
365 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
366 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
367 {
368 tree src = src_rgm->masks[i / 2];
369 tree dest = dest_rgm->masks[i];
370 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
371 src, src, masks[i & 1]);
372 gimple_seq_add_stmt (seq, stmt);
373 }
374 return true;
375 }
376 return false;
377}
378
379/* Helper for vect_set_loop_condition_masked. Generate definitions for
380 all the masks in RGM and return a mask that is nonzero when the loop
381 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
382 Use LOOP_COND_GSI to insert code before the exit gcond.
383
384 RGM belongs to loop LOOP. The loop originally iterated NITERS
8ac88374 385 times and has been vectorized according to LOOP_VINFO.
60b29a7e 386
6753a4bf 387 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
388 starts with NITERS_SKIP dummy iterations of the scalar loop before
389 the real work starts. The mask elements for these dummy iterations
390 must be 0, to ensure that the extra iterations do not have an effect.
391
60b29a7e 392 It is known that:
393
394 NITERS * RGM->max_nscalars_per_iter
395
396 does not overflow. However, MIGHT_WRAP_P says whether an induction
397 variable that starts at 0 and has step:
398
399 VF * RGM->max_nscalars_per_iter
400
401 might overflow before hitting a value above:
402
6753a4bf 403 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
60b29a7e 404
405 This means that we cannot guarantee that such an induction variable
406 would ever hit a value that produces a set of all-false masks for RGM. */
407
408static tree
409vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
410 gimple_seq *preheader_seq,
411 gimple_stmt_iterator loop_cond_gsi,
8ac88374 412 rgroup_masks *rgm, tree niters, tree niters_skip,
6753a4bf 413 bool might_wrap_p)
60b29a7e 414{
415 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
ef871d99 416 tree iv_type = LOOP_VINFO_MASK_IV_TYPE (loop_vinfo);
60b29a7e 417 tree mask_type = rgm->mask_type;
418 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
419 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
8ac88374 420 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
60b29a7e 421
422 /* Calculate the maximum number of scalar values that the rgroup
6753a4bf 423 handles in total, the number that it handles for each iteration
424 of the vector loop, and the number that it should skip during the
425 first iteration of the vector loop. */
60b29a7e 426 tree nscalars_total = niters;
8ac88374 427 tree nscalars_step = build_int_cst (iv_type, vf);
6753a4bf 428 tree nscalars_skip = niters_skip;
60b29a7e 429 if (nscalars_per_iter != 1)
430 {
431 /* We checked before choosing to use a fully-masked loop that these
432 multiplications don't overflow. */
8ac88374 433 tree compare_factor = build_int_cst (compare_type, nscalars_per_iter);
434 tree iv_factor = build_int_cst (iv_type, nscalars_per_iter);
60b29a7e 435 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
8ac88374 436 nscalars_total, compare_factor);
437 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, iv_type,
438 nscalars_step, iv_factor);
6753a4bf 439 if (nscalars_skip)
440 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
8ac88374 441 nscalars_skip, compare_factor);
60b29a7e 442 }
443
444 /* Create an induction variable that counts the number of scalars
445 processed. */
446 tree index_before_incr, index_after_incr;
447 gimple_stmt_iterator incr_gsi;
448 bool insert_after;
60b29a7e 449 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
8ac88374 450 create_iv (build_int_cst (iv_type, 0), nscalars_step, NULL_TREE, loop,
451 &incr_gsi, insert_after, &index_before_incr, &index_after_incr);
ef871d99 452
8ac88374 453 tree zero_index = build_int_cst (compare_type, 0);
6753a4bf 454 tree test_index, test_limit, first_limit;
60b29a7e 455 gimple_stmt_iterator *test_gsi;
456 if (might_wrap_p)
457 {
458 /* In principle the loop should stop iterating once the incremented
6753a4bf 459 IV reaches a value greater than or equal to:
460
461 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
462
463 However, there's no guarantee that this addition doesn't overflow
464 the comparison type, or that the IV hits a value above it before
465 wrapping around. We therefore adjust the limit down by one
466 IV step:
60b29a7e 467
6753a4bf 468 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
469 -[infinite-prec] NSCALARS_STEP
60b29a7e 470
471 and compare the IV against this limit _before_ incrementing it.
472 Since the comparison type is unsigned, we actually want the
473 subtraction to saturate at zero:
474
6753a4bf 475 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
476 -[sat] NSCALARS_STEP
477
478 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
479
480 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
481
482 where the rightmost subtraction can be done directly in
483 COMPARE_TYPE. */
60b29a7e 484 test_index = index_before_incr;
8ac88374 485 tree adjust = gimple_convert (preheader_seq, compare_type,
486 nscalars_step);
6753a4bf 487 if (nscalars_skip)
488 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
489 adjust, nscalars_skip);
60b29a7e 490 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
6753a4bf 491 nscalars_total, adjust);
60b29a7e 492 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
6753a4bf 493 test_limit, adjust);
60b29a7e 494 test_gsi = &incr_gsi;
6753a4bf 495
496 /* Get a safe limit for the first iteration. */
497 if (nscalars_skip)
498 {
499 /* The first vector iteration can handle at most NSCALARS_STEP
500 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
501 NSCALARS_SKIP to that cannot overflow. */
502 tree const_limit = build_int_cst (compare_type,
503 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
504 * nscalars_per_iter);
505 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
506 nscalars_total, const_limit);
507 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
508 first_limit, nscalars_skip);
509 }
510 else
511 /* For the first iteration it doesn't matter whether the IV hits
512 a value above NSCALARS_TOTAL. That only matters for the latch
513 condition. */
514 first_limit = nscalars_total;
60b29a7e 515 }
516 else
517 {
518 /* Test the incremented IV, which will always hit a value above
519 the bound before wrapping. */
520 test_index = index_after_incr;
521 test_limit = nscalars_total;
6753a4bf 522 if (nscalars_skip)
523 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
524 test_limit, nscalars_skip);
60b29a7e 525 test_gsi = &loop_cond_gsi;
6753a4bf 526
527 first_limit = test_limit;
60b29a7e 528 }
529
8ac88374 530 /* Convert the IV value to the comparison type (either a no-op or
531 a demotion). */
ef871d99 532 gimple_seq test_seq = NULL;
533 test_index = gimple_convert (&test_seq, compare_type, test_index);
534 gsi_insert_seq_before (test_gsi, test_seq, GSI_SAME_STMT);
535
8ac88374 536 /* Provide a definition of each mask in the group. */
537 tree next_mask = NULL_TREE;
538 tree mask;
539 unsigned int i;
60b29a7e 540 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
541 {
542 /* Previous masks will cover BIAS scalars. This mask covers the
543 next batch. */
544 poly_uint64 bias = nscalars_per_mask * i;
545 tree bias_tree = build_int_cst (compare_type, bias);
546 gimple *tmp_stmt;
547
548 /* See whether the first iteration of the vector loop is known
549 to have a full mask. */
550 poly_uint64 const_limit;
551 bool first_iteration_full
6753a4bf 552 = (poly_int_tree_p (first_limit, &const_limit)
60b29a7e 553 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
554
555 /* Rather than have a new IV that starts at BIAS and goes up to
556 TEST_LIMIT, prefer to use the same 0-based IV for each mask
557 and adjust the bound down by BIAS. */
558 tree this_test_limit = test_limit;
559 if (i != 0)
560 {
561 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
562 compare_type, this_test_limit,
563 bias_tree);
564 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
565 compare_type, this_test_limit,
566 bias_tree);
567 }
568
6753a4bf 569 /* Create the initial mask. First include all scalars that
570 are within the loop limit. */
60b29a7e 571 tree init_mask = NULL_TREE;
572 if (!first_iteration_full)
573 {
574 tree start, end;
6753a4bf 575 if (first_limit == test_limit)
60b29a7e 576 {
577 /* Use a natural test between zero (the initial IV value)
578 and the loop limit. The "else" block would be valid too,
579 but this choice can avoid the need to load BIAS_TREE into
580 a register. */
581 start = zero_index;
582 end = this_test_limit;
583 }
584 else
585 {
6753a4bf 586 /* FIRST_LIMIT is the maximum number of scalars handled by the
587 first iteration of the vector loop. Test the portion
588 associated with this mask. */
60b29a7e 589 start = bias_tree;
6753a4bf 590 end = first_limit;
60b29a7e 591 }
592
593 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
594 tmp_stmt = vect_gen_while (init_mask, start, end);
595 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
596 }
597
6753a4bf 598 /* Now AND out the bits that are within the number of skipped
599 scalars. */
600 poly_uint64 const_skip;
601 if (nscalars_skip
602 && !(poly_int_tree_p (nscalars_skip, &const_skip)
603 && known_le (const_skip, bias)))
604 {
605 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
606 bias_tree, nscalars_skip);
607 if (init_mask)
608 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
609 init_mask, unskipped_mask);
610 else
611 init_mask = unskipped_mask;
612 }
613
60b29a7e 614 if (!init_mask)
615 /* First iteration is full. */
616 init_mask = build_minus_one_cst (mask_type);
617
618 /* Get the mask value for the next iteration of the loop. */
619 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
620 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
621 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
622
623 vect_set_loop_mask (loop, mask, init_mask, next_mask);
624 }
625 return next_mask;
626}
627
628/* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
629 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
53771608 630 number of iterations of the original scalar loop that should be
631 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
632 as for vect_set_loop_condition.
60b29a7e 633
634 Insert the branch-back condition before LOOP_COND_GSI and return the
635 final gcond. */
636
637static gcond *
638vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
639 tree niters, tree final_iv,
640 bool niters_maybe_zero,
641 gimple_stmt_iterator loop_cond_gsi)
642{
643 gimple_seq preheader_seq = NULL;
644 gimple_seq header_seq = NULL;
645
646 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
647 unsigned int compare_precision = TYPE_PRECISION (compare_type);
60b29a7e 648 tree orig_niters = niters;
649
650 /* Type of the initial value of NITERS. */
651 tree ni_actual_type = TREE_TYPE (niters);
652 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
ef871d99 653 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
60b29a7e 654
655 /* Convert NITERS to the same size as the compare. */
656 if (compare_precision > ni_actual_precision
657 && niters_maybe_zero)
658 {
659 /* We know that there is always at least one iteration, so if the
660 count is zero then it must have wrapped. Cope with this by
661 subtracting 1 before the conversion and adding 1 to the result. */
662 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
663 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
664 niters, build_minus_one_cst (ni_actual_type));
665 niters = gimple_convert (&preheader_seq, compare_type, niters);
666 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
667 niters, build_one_cst (compare_type));
668 }
669 else
670 niters = gimple_convert (&preheader_seq, compare_type, niters);
671
ef871d99 672 widest_int iv_limit = vect_iv_limit_for_full_masking (loop_vinfo);
60b29a7e 673
674 /* Iterate over all the rgroups and fill in their masks. We could use
675 the first mask from any rgroup for the loop condition; here we
676 arbitrarily pick the last. */
677 tree test_mask = NULL_TREE;
678 rgroup_masks *rgm;
679 unsigned int i;
680 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
681 FOR_EACH_VEC_ELT (*masks, i, rgm)
682 if (!rgm->masks.is_empty ())
683 {
684 /* First try using permutes. This adds a single vector
685 instruction to the loop for each mask, but needs no extra
686 loop invariants or IVs. */
687 unsigned int nmasks = i + 1;
688 if ((nmasks & 1) == 0)
689 {
690 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
691 if (!half_rgm->masks.is_empty ()
692 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
693 continue;
694 }
695
696 /* See whether zero-based IV would ever generate all-false masks
697 before wrapping around. */
698 bool might_wrap_p
ef871d99 699 = (iv_limit == -1
60b29a7e 700 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
701 UNSIGNED)
702 > compare_precision));
703
704 /* Set up all masks for this group. */
705 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
706 &preheader_seq,
8ac88374 707 loop_cond_gsi, rgm,
6753a4bf 708 niters, niters_skip,
709 might_wrap_p);
60b29a7e 710 }
711
712 /* Emit all accumulated statements. */
713 add_preheader_seq (loop, preheader_seq);
714 add_header_seq (loop, header_seq);
715
716 /* Get a boolean result that tells us whether to iterate. */
717 edge exit_edge = single_exit (loop);
718 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
719 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
720 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
721 NULL_TREE, NULL_TREE);
722 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
723
724 /* The loop iterates (NITERS - 1) / VF + 1 times.
725 Subtract one from this to get the latch count. */
726 tree step = build_int_cst (compare_type,
727 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
728 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
729 build_minus_one_cst (compare_type));
730 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
731 niters_minus_one, step);
732
733 if (final_iv)
734 {
735 gassign *assign = gimple_build_assign (final_iv, orig_niters);
736 gsi_insert_on_edge_immediate (single_exit (loop), assign);
737 }
738
739 return cond_stmt;
740}
741
742/* Like vect_set_loop_condition, but handle the case in which there
743 are no loop masks. */
744
745static gcond *
746vect_set_loop_condition_unmasked (struct loop *loop, tree niters,
747 tree step, tree final_iv,
748 bool niters_maybe_zero,
749 gimple_stmt_iterator loop_cond_gsi)
fb85abff 750{
751 tree indx_before_incr, indx_after_incr;
1a91d914 752 gcond *cond_stmt;
753 gcond *orig_cond;
cde959e7 754 edge pe = loop_preheader_edge (loop);
fb85abff 755 edge exit_edge = single_exit (loop);
fb85abff 756 gimple_stmt_iterator incr_gsi;
757 bool insert_after;
fb85abff 758 enum tree_code code;
cde959e7 759 tree niters_type = TREE_TYPE (niters);
fb85abff 760
761 orig_cond = get_loop_exit_condition (loop);
762 gcc_assert (orig_cond);
763 loop_cond_gsi = gsi_for_stmt (orig_cond);
764
cde959e7 765 tree init, limit;
766 if (!niters_maybe_zero && integer_onep (step))
767 {
768 /* In this case we can use a simple 0-based IV:
769
770 A:
771 x = 0;
772 do
773 {
774 ...
775 x += 1;
776 }
777 while (x < NITERS); */
778 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
779 init = build_zero_cst (niters_type);
780 limit = niters;
781 }
782 else
783 {
784 /* The following works for all values of NITERS except 0:
785
786 B:
787 x = 0;
788 do
789 {
790 ...
791 x += STEP;
792 }
793 while (x <= NITERS - STEP);
794
795 so that the loop continues to iterate if x + STEP - 1 < NITERS
796 but stops if x + STEP - 1 >= NITERS.
797
798 However, if NITERS is zero, x never hits a value above NITERS - STEP
799 before wrapping around. There are two obvious ways of dealing with
800 this:
801
802 - start at STEP - 1 and compare x before incrementing it
803 - start at -1 and compare x after incrementing it
804
805 The latter is simpler and is what we use. The loop in this case
806 looks like:
807
808 C:
809 x = -1;
810 do
811 {
812 ...
813 x += STEP;
814 }
815 while (x < NITERS - STEP);
816
817 In both cases the loop limit is NITERS - STEP. */
818 gimple_seq seq = NULL;
819 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
820 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
821 if (seq)
822 {
823 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
824 gcc_assert (!new_bb);
825 }
826 if (niters_maybe_zero)
827 {
828 /* Case C. */
829 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
830 init = build_all_ones_cst (niters_type);
831 }
832 else
833 {
834 /* Case B. */
835 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
836 init = build_zero_cst (niters_type);
837 }
838 }
839
fb85abff 840 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
841 create_iv (init, step, NULL_TREE, loop,
842 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
fb85abff 843 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
844 true, NULL_TREE, true,
845 GSI_SAME_STMT);
cde959e7 846 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
fb85abff 847 true, GSI_SAME_STMT);
848
cde959e7 849 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
fb85abff 850 NULL_TREE);
851
852 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
853
56c0f85a 854 /* Record the number of latch iterations. */
cde959e7 855 if (limit == niters)
856 /* Case A: the loop iterates NITERS times. Subtract one to get the
857 latch count. */
858 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
859 build_int_cst (niters_type, 1));
860 else
861 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
862 Subtract one from this to get the latch count. */
863 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
864 limit, step);
865
866 if (final_iv)
867 {
868 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
869 indx_after_incr, init);
870 gsi_insert_on_edge_immediate (single_exit (loop), assign);
871 }
60b29a7e 872
873 return cond_stmt;
874}
875
876/* If we're using fully-masked loops, make LOOP iterate:
877
878 N == (NITERS - 1) / STEP + 1
879
880 times. When NITERS is zero, this is equivalent to making the loop
881 execute (1 << M) / STEP times, where M is the precision of NITERS.
882 NITERS_MAYBE_ZERO is true if this last case might occur.
883
884 If we're not using fully-masked loops, make LOOP iterate:
885
886 N == (NITERS - STEP) / STEP + 1
887
888 times, where NITERS is known to be outside the range [1, STEP - 1].
889 This is equivalent to making the loop execute NITERS / STEP times
890 when NITERS is nonzero and (1 << M) / STEP times otherwise.
891 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
892
893 If FINAL_IV is nonnull, it is an SSA name that should be set to
894 N * STEP on exit from the loop.
895
896 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
897
898void
899vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
900 tree niters, tree step, tree final_iv,
901 bool niters_maybe_zero)
902{
903 gcond *cond_stmt;
904 gcond *orig_cond = get_loop_exit_condition (loop);
905 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
906
907 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
908 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
909 final_iv, niters_maybe_zero,
910 loop_cond_gsi);
911 else
912 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
913 final_iv, niters_maybe_zero,
914 loop_cond_gsi);
915
916 /* Remove old loop exit test. */
f525c1af 917 stmt_vec_info orig_cond_info;
918 if (loop_vinfo
919 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
920 loop_vinfo->remove_stmt (orig_cond_info);
921 else
922 gsi_remove (&loop_cond_gsi, true);
60b29a7e 923
924 if (dump_enabled_p ())
a4e972e3 925 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
926 cond_stmt);
fb85abff 927}
928
c71d3c24 929/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
930 For all PHI arguments in FROM->dest and TO->dest from those
931 edges ensure that TO->dest PHI arguments have current_def
932 to that in from. */
933
934static void
935slpeel_duplicate_current_defs_from_edges (edge from, edge to)
936{
937 gimple_stmt_iterator gsi_from, gsi_to;
938
939 for (gsi_from = gsi_start_phis (from->dest),
940 gsi_to = gsi_start_phis (to->dest);
e87502d6 941 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
c71d3c24 942 {
42acab1c 943 gimple *from_phi = gsi_stmt (gsi_from);
944 gimple *to_phi = gsi_stmt (gsi_to);
c71d3c24 945 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
e3243c77 946 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
947 if (virtual_operand_p (from_arg))
948 {
e87502d6 949 gsi_next (&gsi_from);
950 continue;
951 }
e3243c77 952 if (virtual_operand_p (to_arg))
953 {
e87502d6 954 gsi_next (&gsi_to);
955 continue;
956 }
e3243c77 957 if (TREE_CODE (from_arg) != SSA_NAME)
958 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
68675984 959 else if (TREE_CODE (to_arg) == SSA_NAME
960 && from_arg != to_arg)
e3243c77 961 {
962 if (get_current_def (to_arg) == NULL_TREE)
6bae816f 963 {
964 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
965 TREE_TYPE (get_current_def
966 (from_arg))));
967 set_current_def (to_arg, get_current_def (from_arg));
968 }
e3243c77 969 }
e87502d6 970 gsi_next (&gsi_from);
971 gsi_next (&gsi_to);
c71d3c24 972 }
e3243c77 973
974 gphi *from_phi = get_virtual_phi (from->dest);
975 gphi *to_phi = get_virtual_phi (to->dest);
976 if (from_phi)
977 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
978 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
c71d3c24 979}
980
fb85abff 981
48e1416a 982/* Given LOOP this function generates a new copy of it and puts it
c71d3c24 983 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
984 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
985 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
986 entry or exit of LOOP. */
fb85abff 987
988struct loop *
c71d3c24 989slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
990 struct loop *scalar_loop, edge e)
fb85abff 991{
992 struct loop *new_loop;
8a61cfb8 993 basic_block *new_bbs, *bbs, *pbbs;
fb85abff 994 bool at_exit;
995 bool was_imm_dom;
48e1416a 996 basic_block exit_dest;
fb85abff 997 edge exit, new_exit;
5ee742c4 998 bool duplicate_outer_loop = false;
fb85abff 999
c9b2c569 1000 exit = single_exit (loop);
1001 at_exit = (e == exit);
fb85abff 1002 if (!at_exit && e != loop_preheader_edge (loop))
1003 return NULL;
1004
c71d3c24 1005 if (scalar_loop == NULL)
1006 scalar_loop = loop;
1007
1008 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
8a61cfb8 1009 pbbs = bbs + 1;
1010 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
5ee742c4 1011 /* Allow duplication of outer loops. */
1012 if (scalar_loop->inner)
1013 duplicate_outer_loop = true;
fb85abff 1014 /* Check whether duplication is possible. */
8a61cfb8 1015 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
fb85abff 1016 {
1017 free (bbs);
1018 return NULL;
1019 }
1020
1021 /* Generate new loop structure. */
c71d3c24 1022 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1023 duplicate_subloops (scalar_loop, new_loop);
fb85abff 1024
c9b2c569 1025 exit_dest = exit->dest;
48e1416a 1026 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1027 exit_dest) == loop->header ?
fb85abff 1028 true : false);
1029
c9b2c569 1030 /* Also copy the pre-header, this avoids jumping through hoops to
1031 duplicate the loop entry PHI arguments. Create an empty
1032 pre-header unconditionally for this. */
c71d3c24 1033 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
c9b2c569 1034 edge entry_e = single_pred_edge (preheader);
8a61cfb8 1035 bbs[0] = preheader;
c71d3c24 1036 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
fb85abff 1037
c71d3c24 1038 exit = single_exit (scalar_loop);
1039 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
fb85abff 1040 &exit, 1, &new_exit, NULL,
8a61cfb8 1041 at_exit ? loop->latch : e->src, true);
c71d3c24 1042 exit = single_exit (loop);
8a61cfb8 1043 basic_block new_preheader = new_bbs[0];
fb85abff 1044
c71d3c24 1045 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
fb85abff 1046
c71d3c24 1047 if (scalar_loop != loop)
1048 {
1049 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1050 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1051 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1052 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1053 header) to have current_def set, so copy them over. */
1054 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1055 exit);
1056 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1057 0),
1058 EDGE_SUCC (loop->latch, 0));
1059 }
48e1416a 1060
fb85abff 1061 if (at_exit) /* Add the loop copy at exit. */
1062 {
c71d3c24 1063 if (scalar_loop != loop)
1064 {
1a91d914 1065 gphi_iterator gsi;
c71d3c24 1066 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1067
1068 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1069 gsi_next (&gsi))
1070 {
1a91d914 1071 gphi *phi = gsi.phi ();
c71d3c24 1072 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1073 location_t orig_locus
1074 = gimple_phi_arg_location_from_edge (phi, e);
1075
1076 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1077 }
1078 }
c9b2c569 1079 redirect_edge_and_branch_force (e, new_preheader);
1080 flush_pending_stmts (e);
1081 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
5ee742c4 1082 if (was_imm_dom || duplicate_outer_loop)
c71d3c24 1083 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
c9b2c569 1084
1085 /* And remove the non-necessary forwarder again. Keep the other
1086 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 1087 redirect_edge_pred (single_succ_edge (preheader),
1088 single_pred (preheader));
c9b2c569 1089 delete_basic_block (preheader);
c71d3c24 1090 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1091 loop_preheader_edge (scalar_loop)->src);
fb85abff 1092 }
1093 else /* Add the copy at entry. */
1094 {
c71d3c24 1095 if (scalar_loop != loop)
1096 {
1097 /* Remove the non-necessary forwarder of scalar_loop again. */
1098 redirect_edge_pred (single_succ_edge (preheader),
1099 single_pred (preheader));
1100 delete_basic_block (preheader);
1101 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1102 loop_preheader_edge (scalar_loop)->src);
1103 preheader = split_edge (loop_preheader_edge (loop));
1104 entry_e = single_pred_edge (preheader);
1105 }
1106
c9b2c569 1107 redirect_edge_and_branch_force (entry_e, new_preheader);
1108 flush_pending_stmts (entry_e);
1109 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1110
1111 redirect_edge_and_branch_force (new_exit, preheader);
1112 flush_pending_stmts (new_exit);
1113 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1114
1115 /* And remove the non-necessary forwarder again. Keep the other
1116 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 1117 redirect_edge_pred (single_succ_edge (new_preheader),
1118 single_pred (new_preheader));
c9b2c569 1119 delete_basic_block (new_preheader);
1120 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1121 loop_preheader_edge (new_loop)->src);
fb85abff 1122 }
1123
6e429c5c 1124 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1125 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
5ee742c4 1126 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
c9b2c569 1127
c71d3c24 1128 if (scalar_loop != loop)
1129 {
1130 /* Update new_loop->header PHIs, so that on the preheader
1131 edge they are the ones from loop rather than scalar_loop. */
1a91d914 1132 gphi_iterator gsi_orig, gsi_new;
c71d3c24 1133 edge orig_e = loop_preheader_edge (loop);
1134 edge new_e = loop_preheader_edge (new_loop);
1135
1136 for (gsi_orig = gsi_start_phis (loop->header),
1137 gsi_new = gsi_start_phis (new_loop->header);
1138 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1139 gsi_next (&gsi_orig), gsi_next (&gsi_new))
1140 {
1a91d914 1141 gphi *orig_phi = gsi_orig.phi ();
1142 gphi *new_phi = gsi_new.phi ();
c71d3c24 1143 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1144 location_t orig_locus
1145 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1146
1147 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1148 }
1149 }
1150
fb85abff 1151 free (new_bbs);
1152 free (bbs);
1153
382ecba7 1154 checking_verify_dominators (CDI_DOMINATORS);
c9b2c569 1155
fb85abff 1156 return new_loop;
1157}
1158
1159
6c6a3430 1160/* Given the condition expression COND, put it as the last statement of
1161 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1162 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
15492f79 1163 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1164 new edge as irreducible if IRREDUCIBLE_P is true. */
fb85abff 1165
1166static edge
23a3430d 1167slpeel_add_loop_guard (basic_block guard_bb, tree cond,
6c6a3430 1168 basic_block guard_to, basic_block dom_bb,
720cfc43 1169 profile_probability probability, bool irreducible_p)
fb85abff 1170{
1171 gimple_stmt_iterator gsi;
1172 edge new_e, enter_e;
1a91d914 1173 gcond *cond_stmt;
fb85abff 1174 gimple_seq gimplify_stmt_list = NULL;
1175
1176 enter_e = EDGE_SUCC (guard_bb, 0);
1177 enter_e->flags &= ~EDGE_FALLTHRU;
1178 enter_e->flags |= EDGE_FALSE_VALUE;
1179 gsi = gsi_last_bb (guard_bb);
1180
87ae3989 1181 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
1182 NULL_TREE);
23a3430d 1183 if (gimplify_stmt_list)
6c6a3430 1184 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
fb85abff 1185
6c6a3430 1186 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
fb85abff 1187 gsi = gsi_last_bb (guard_bb);
1188 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1189
1190 /* Add new edge to connect guard block to the merge/loop-exit block. */
6c6a3430 1191 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
877584e4 1192
877584e4 1193 new_e->probability = probability;
15492f79 1194 if (irreducible_p)
1195 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1196
720cfc43 1197 enter_e->probability = probability.invert ();
6c6a3430 1198 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
a62a4a78 1199
1200 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1201 if (enter_e->dest->loop_father->header == enter_e->dest)
1202 split_edge (enter_e);
1203
fb85abff 1204 return new_e;
1205}
1206
1207
1208/* This function verifies that the following restrictions apply to LOOP:
5ee742c4 1209 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1210 for innermost loop and 5 basic blocks for outer-loop.
1211 (2) it is single entry, single exit
1212 (3) its exit condition is the last stmt in the header
1213 (4) E is the entry/exit edge of LOOP.
fb85abff 1214 */
1215
1216bool
1217slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
1218{
1219 edge exit_e = single_exit (loop);
1220 edge entry_e = loop_preheader_edge (loop);
1a91d914 1221 gcond *orig_cond = get_loop_exit_condition (loop);
fb85abff 1222 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
5ee742c4 1223 unsigned int num_bb = loop->inner? 5 : 2;
fb85abff 1224
51eb70a9 1225 /* All loops have an outer scope; the only case loop->outer is NULL is for
1226 the function itself. */
1227 if (!loop_outer (loop)
5ee742c4 1228 || loop->num_nodes != num_bb
fb85abff 1229 || !empty_block_p (loop->latch)
1230 || !single_exit (loop)
1231 /* Verify that new loop exit condition can be trivially modified. */
1232 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1233 || (e != exit_e && e != entry_e))
1234 return false;
1235
1236 return true;
1237}
1238
6c6a3430 1239/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1240 in the exit bb and rename all the uses after the loop. This simplifies
1241 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1242 (but normally loop closed SSA form doesn't require virtual PHIs to be
1243 in the same form). Doing this early simplifies the checking what
1244 uses should be renamed. */
fb85abff 1245
1246static void
6c6a3430 1247create_lcssa_for_virtual_phi (struct loop *loop)
fb85abff 1248{
1a91d914 1249 gphi_iterator gsi;
fb85abff 1250 edge exit_e = single_exit (loop);
48e1416a 1251
38091110 1252 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1253 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1254 {
1a91d914 1255 gphi *phi = gsi.phi ();
38091110 1256 for (gsi = gsi_start_phis (exit_e->dest);
1257 !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1258 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1259 break;
1260 if (gsi_end_p (gsi))
1261 {
f9e245b2 1262 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1a91d914 1263 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
38091110 1264 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1265 imm_use_iterator imm_iter;
42acab1c 1266 gimple *stmt;
38091110 1267 use_operand_p use_p;
1268
6c99cfac 1269 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
1270 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
60d535d2 1271 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
38091110 1272 gimple_phi_set_result (new_phi, new_vop);
1273 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
480233e7 1274 if (stmt != new_phi
1275 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
38091110 1276 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1277 SET_USE (use_p, new_vop);
1278 }
1279 break;
1280 }
fb85abff 1281
fb85abff 1282}
1283
1284/* Function vect_get_loop_location.
1285
1286 Extract the location of the loop in the source code.
1287 If the loop is not well formed for vectorization, an estimated
1288 location is calculated.
1289 Return the loop location if succeed and NULL if not. */
1290
c309657f 1291dump_user_location_t
fb85abff 1292find_loop_location (struct loop *loop)
1293{
42acab1c 1294 gimple *stmt = NULL;
fb85abff 1295 basic_block bb;
1296 gimple_stmt_iterator si;
1297
1298 if (!loop)
c309657f 1299 return dump_user_location_t ();
fb85abff 1300
1301 stmt = get_loop_exit_condition (loop);
1302
b2d978a6 1303 if (stmt
1304 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
c309657f 1305 return stmt;
fb85abff 1306
1307 /* If we got here the loop is probably not "well formed",
1308 try to estimate the loop location */
1309
1310 if (!loop->header)
c309657f 1311 return dump_user_location_t ();
fb85abff 1312
1313 bb = loop->header;
1314
1315 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1316 {
1317 stmt = gsi_stmt (si);
b2d978a6 1318 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
c309657f 1319 return stmt;
fb85abff 1320 }
1321
c309657f 1322 return dump_user_location_t ();
fb85abff 1323}
1324
e068828a 1325/* Return true if the phi described by STMT_INFO defines an IV of the
1326 loop to be vectorized. */
6c6a3430 1327
1328static bool
e068828a 1329iv_phi_p (stmt_vec_info stmt_info)
6c6a3430 1330{
e068828a 1331 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6c6a3430 1332 if (virtual_operand_p (PHI_RESULT (phi)))
1333 return false;
1334
6c6a3430 1335 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1336 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1337 return false;
1338
1339 return true;
1340}
fb85abff 1341
fb85abff 1342/* Function vect_can_advance_ivs_p
1343
48e1416a 1344 In case the number of iterations that LOOP iterates is unknown at compile
1345 time, an epilog loop will be generated, and the loop induction variables
1346 (IVs) will be "advanced" to the value they are supposed to take just before
fb85abff 1347 the epilog loop. Here we check that the access function of the loop IVs
1348 and the expression that represents the loop bound are simple enough.
1349 These restrictions will be relaxed in the future. */
1350
48e1416a 1351bool
fb85abff 1352vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1353{
1354 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1355 basic_block bb = loop->header;
1a91d914 1356 gphi_iterator gsi;
fb85abff 1357
1358 /* Analyze phi functions of the loop header. */
1359
6d8fb6cf 1360 if (dump_enabled_p ())
78bb46f5 1361 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
fb85abff 1362 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1363 {
fb85abff 1364 tree evolution_part;
1365
6c6a3430 1366 gphi *phi = gsi.phi ();
1c2fef9a 1367 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
6d8fb6cf 1368 if (dump_enabled_p ())
a4e972e3 1369 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1370 phi_info->stmt);
fb85abff 1371
1372 /* Skip virtual phi's. The data dependences that are associated with
6c6a3430 1373 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
fb85abff 1374
6c6a3430 1375 Skip reduction phis. */
e068828a 1376 if (!iv_phi_p (phi_info))
fb85abff 1377 {
6d8fb6cf 1378 if (dump_enabled_p ())
6c6a3430 1379 dump_printf_loc (MSG_NOTE, vect_location,
1380 "reduc or virtual phi. skip.\n");
fb85abff 1381 continue;
1382 }
1383
fb85abff 1384 /* Analyze the evolution function. */
1385
1c2fef9a 1386 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
fb85abff 1387 if (evolution_part == NULL_TREE)
1388 {
6d8fb6cf 1389 if (dump_enabled_p ())
2cd0995e 1390 dump_printf (MSG_MISSED_OPTIMIZATION,
78bb46f5 1391 "No access function or evolution.\n");
fb85abff 1392 return false;
1393 }
48e1416a 1394
12117f39 1395 /* FORNOW: We do not transform initial conditions of IVs
1396 which evolution functions are not invariants in the loop. */
1397
1398 if (!expr_invariant_in_loop_p (loop, evolution_part))
1399 {
1400 if (dump_enabled_p ())
1401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1402 "evolution not invariant in loop.\n");
1403 return false;
1404 }
1405
48e1416a 1406 /* FORNOW: We do not transform initial conditions of IVs
fb85abff 1407 which evolution functions are a polynomial of degree >= 2. */
1408
1409 if (tree_is_chrec (evolution_part))
12117f39 1410 {
1411 if (dump_enabled_p ())
1412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1413 "evolution is chrec.\n");
1414 return false;
1415 }
fb85abff 1416 }
1417
1418 return true;
1419}
1420
1421
1422/* Function vect_update_ivs_after_vectorizer.
1423
1424 "Advance" the induction variables of LOOP to the value they should take
1425 after the execution of LOOP. This is currently necessary because the
1426 vectorizer does not handle induction variables that are used after the
1427 loop. Such a situation occurs when the last iterations of LOOP are
1428 peeled, because:
1429 1. We introduced new uses after LOOP for IVs that were not originally used
1430 after LOOP: the IVs of LOOP are now used by an epilog loop.
1431 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1432 times, whereas the loop IVs should be bumped N times.
1433
1434 Input:
1435 - LOOP - a loop that is going to be vectorized. The last few iterations
1436 of LOOP were peeled.
1437 - NITERS - the number of iterations that LOOP executes (before it is
1438 vectorized). i.e, the number of times the ivs should be bumped.
1439 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1440 coming out from LOOP on which there are uses of the LOOP ivs
1441 (this is the path from LOOP->exit to epilog_loop->preheader).
1442
1443 The new definitions of the ivs are placed in LOOP->exit.
1444 The phi args associated with the edge UPDATE_E in the bb
1445 UPDATE_E->dest are updated accordingly.
1446
1447 Assumption 1: Like the rest of the vectorizer, this function assumes
1448 a single loop exit that has a single predecessor.
1449
1450 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1451 organized in the same order.
1452
1453 Assumption 3: The access function of the ivs is simple enough (see
1454 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1455
1456 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
48e1416a 1457 coming out of LOOP on which the ivs of LOOP are used (this is the path
fb85abff 1458 that leads to the epilog loop; other paths skip the epilog loop). This
1459 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1460 needs to have its phis updated.
1461 */
1462
1463static void
6c6a3430 1464vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1465 tree niters, edge update_e)
fb85abff 1466{
1a91d914 1467 gphi_iterator gsi, gsi1;
6c6a3430 1468 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 1469 basic_block update_bb = update_e->dest;
6c6a3430 1470 basic_block exit_bb = single_exit (loop)->dest;
fb85abff 1471
1472 /* Make sure there exists a single-predecessor exit bb: */
1473 gcc_assert (single_pred_p (exit_bb));
6c6a3430 1474 gcc_assert (single_succ_edge (exit_bb) == update_e);
fb85abff 1475
1476 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1477 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1478 gsi_next (&gsi), gsi_next (&gsi1))
1479 {
fb85abff 1480 tree init_expr;
1efcacec 1481 tree step_expr, off;
1482 tree type;
fb85abff 1483 tree var, ni, ni_name;
1484 gimple_stmt_iterator last_gsi;
1485
6c6a3430 1486 gphi *phi = gsi.phi ();
1487 gphi *phi1 = gsi1.phi ();
1c2fef9a 1488 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
6d8fb6cf 1489 if (dump_enabled_p ())
a4e972e3 1490 dump_printf_loc (MSG_NOTE, vect_location,
1491 "vect_update_ivs_after_vectorizer: phi: %G", phi);
fb85abff 1492
6c6a3430 1493 /* Skip reduction and virtual phis. */
e068828a 1494 if (!iv_phi_p (phi_info))
fb85abff 1495 {
6d8fb6cf 1496 if (dump_enabled_p ())
6c6a3430 1497 dump_printf_loc (MSG_NOTE, vect_location,
1498 "reduc or virtual phi. skip.\n");
fb85abff 1499 continue;
1500 }
1501
86faead7 1502 type = TREE_TYPE (gimple_phi_result (phi));
1c2fef9a 1503 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
86faead7 1504 step_expr = unshare_expr (step_expr);
48e1416a 1505
fb85abff 1506 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1507 of degree >= 2 or exponential. */
86faead7 1508 gcc_assert (!tree_is_chrec (step_expr));
fb85abff 1509
86faead7 1510 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fb85abff 1511
1efcacec 1512 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1513 fold_convert (TREE_TYPE (step_expr), niters),
1514 step_expr);
86faead7 1515 if (POINTER_TYPE_P (type))
2cc66f2a 1516 ni = fold_build_pointer_plus (init_expr, off);
fb85abff 1517 else
86faead7 1518 ni = fold_build2 (PLUS_EXPR, type,
1519 init_expr, fold_convert (type, off));
fb85abff 1520
86faead7 1521 var = create_tmp_var (type, "tmp");
fb85abff 1522
1523 last_gsi = gsi_last_bb (exit_bb);
6c6a3430 1524 gimple_seq new_stmts = NULL;
1525 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1526 /* Exit_bb shouldn't be empty. */
1527 if (!gsi_end_p (last_gsi))
1528 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1529 else
1530 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
48e1416a 1531
fb85abff 1532 /* Fix phi expressions in the successor bb. */
b123eaab 1533 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
fb85abff 1534 }
1535}
1536
6753a4bf 1537/* Return a gimple value containing the misalignment (measured in vector
1538 elements) for the loop described by LOOP_VINFO, i.e. how many elements
1539 it is away from a perfectly aligned address. Add any new statements
1540 to SEQ. */
1541
1542static tree
1543get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
1544{
ec5bf0fb 1545 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
abc9513d 1546 stmt_vec_info stmt_info = dr_info->stmt;
6753a4bf 1547 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1548
e092c20e 1549 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1550 unsigned HOST_WIDE_INT target_align_c;
1551 tree target_align_minus_1;
6753a4bf 1552
abc9513d 1553 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1554 size_zero_node) < 0;
6753a4bf 1555 tree offset = (negative
1556 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1557 : size_zero_node);
0219dc42 1558 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
6753a4bf 1559 offset);
1560 tree type = unsigned_type_for (TREE_TYPE (start_addr));
e092c20e 1561 if (target_align.is_constant (&target_align_c))
1562 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1563 else
1564 {
1565 tree vla = build_int_cst (type, target_align);
1566 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1567 fold_build2 (MINUS_EXPR, type,
1568 build_int_cst (type, 0), vla));
1569 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1570 build_int_cst (type, 1));
1571 }
1572
6753a4bf 1573 HOST_WIDE_INT elem_size
1574 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1575 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1576
1577 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1578 tree int_start_addr = fold_convert (type, start_addr);
1579 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
1580 target_align_minus_1);
1581
1582 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1583 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
1584 elem_size_log);
1585
1586 return misalign_in_elems;
1587}
1588
6c6a3430 1589/* Function vect_gen_prolog_loop_niters
97fe80a6 1590
6c6a3430 1591 Generate the number of iterations which should be peeled as prolog for the
1592 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1593 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1594 As a result, after the execution of this loop, the data reference DR will
1595 refer to an aligned location. The following computation is generated:
fb85abff 1596
1597 If the misalignment of DR is known at compile time:
1598 addr_mis = int mis = DR_MISALIGNMENT (dr);
1599 Else, compute address misalignment in bytes:
6753a4bf 1600 addr_mis = addr & (target_align - 1)
fb85abff 1601
6c6a3430 1602 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
fb85abff 1603
1604 (elem_size = element type size; an element is the scalar element whose type
1605 is the inner type of the vectype)
1606
6c6a3430 1607 The computations will be emitted at the end of BB. We also compute and
82112bf2 1608 store upper bound (included) of the result in BOUND.
6c6a3430 1609
fb85abff 1610 When the step of the data-ref in the loop is not 1 (as in interleaved data
1611 and SLP), the number of iterations of the prolog must be divided by the step
1612 (which is equal to the size of interleaved group).
1613
1614 The above formulas assume that VF == number of elements in the vector. This
1615 may not hold when there are multiple-types in the loop.
1616 In this case, for some data-references in the loop the VF does not represent
1617 the number of elements that fit in the vector. Therefore, instead of VF we
1618 use TYPE_VECTOR_SUBPARTS. */
1619
48e1416a 1620static tree
6c6a3430 1621vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1622 basic_block bb, int *bound)
fb85abff 1623{
ec5bf0fb 1624 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
fb85abff 1625 tree var;
6c6a3430 1626 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1627 gimple_seq stmts = NULL, new_stmts = NULL;
fb85abff 1628 tree iters, iters_name;
abc9513d 1629 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1630 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
e092c20e 1631 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
fb85abff 1632
313a5120 1633 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
fb85abff 1634 {
313a5120 1635 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1636
6d8fb6cf 1637 if (dump_enabled_p ())
b055bc88 1638 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1639 "known peeling = %d.\n", npeel);
fb85abff 1640
0822b158 1641 iters = build_int_cst (niters_type, npeel);
82112bf2 1642 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1643 }
1644 else
1645 {
6753a4bf 1646 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1647 tree type = TREE_TYPE (misalign_in_elems);
aec313e5 1648 HOST_WIDE_INT elem_size
1649 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
e092c20e 1650 /* We only do prolog peeling if the target alignment is known at compile
1651 time. */
1652 poly_uint64 align_in_elems =
1653 exact_div (target_align, elem_size);
1654 tree align_in_elems_minus_1 =
1655 build_int_cst (type, align_in_elems - 1);
aec313e5 1656 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
aec313e5 1657
1658 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1659 & (align_in_elems - 1)). */
abc9513d 1660 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1661 size_zero_node) < 0;
f1b8c740 1662 if (negative)
aec313e5 1663 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1664 align_in_elems_tree);
f1b8c740 1665 else
aec313e5 1666 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1667 misalign_in_elems);
1668 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
fb85abff 1669 iters = fold_convert (niters_type, iters);
e092c20e 1670 unsigned HOST_WIDE_INT align_in_elems_c;
1671 if (align_in_elems.is_constant (&align_in_elems_c))
1672 *bound = align_in_elems_c - 1;
1673 else
1674 *bound = -1;
fb85abff 1675 }
1676
6d8fb6cf 1677 if (dump_enabled_p ())
a4e972e3 1678 dump_printf_loc (MSG_NOTE, vect_location,
1679 "niters for prolog loop: %T\n", iters);
fb85abff 1680
1681 var = create_tmp_var (niters_type, "prolog_loop_niters");
6c6a3430 1682 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
fb85abff 1683
6c6a3430 1684 if (new_stmts)
1685 gimple_seq_add_seq (&stmts, new_stmts);
fb85abff 1686 if (stmts)
1687 {
6c6a3430 1688 gcc_assert (single_succ_p (bb));
1689 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1690 if (gsi_end_p (gsi))
1691 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1692 else
1693 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
fb85abff 1694 }
48e1416a 1695 return iters_name;
fb85abff 1696}
1697
1698
1699/* Function vect_update_init_of_dr
1700
6753a4bf 1701 If CODE is PLUS, the vector loop starts NITERS iterations after the
1702 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1703 iterations before the scalar one (using masking to skip inactive
1704 elements). This function updates the information recorded in DR to
1705 account for the difference. Specifically, it updates the OFFSET
1706 field of DR. */
fb85abff 1707
1708static void
6753a4bf 1709vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
fb85abff 1710{
1711 tree offset = DR_OFFSET (dr);
48e1416a 1712
fb85abff 1713 niters = fold_build2 (MULT_EXPR, sizetype,
1714 fold_convert (sizetype, niters),
1715 fold_convert (sizetype, DR_STEP (dr)));
6753a4bf 1716 offset = fold_build2 (code, sizetype,
87f9ffa4 1717 fold_convert (sizetype, offset), niters);
fb85abff 1718 DR_OFFSET (dr) = offset;
1719}
1720
1721
1722/* Function vect_update_inits_of_drs
1723
6753a4bf 1724 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1725 CODE and NITERS are as for vect_update_inits_of_dr. */
fb85abff 1726
1727static void
6753a4bf 1728vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1729 tree_code code)
fb85abff 1730{
1731 unsigned int i;
f1f41a6c 1732 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1733 struct data_reference *dr;
6c6a3430 1734
88f6eb8f 1735 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
6c6a3430 1736
1737 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1738 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1739 {
1740 gimple_seq seq;
1741 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1742 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1743
1744 niters = fold_convert (sizetype, niters);
1745 niters = force_gimple_operand (niters, &seq, false, var);
1746 if (seq)
1747 {
1748 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1749 gcc_assert (!new_bb);
1750 }
1751 }
fb85abff 1752
f1f41a6c 1753 FOR_EACH_VEC_ELT (datarefs, i, dr)
fa681b45 1754 {
db72d3bf 1755 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1756 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
fa681b45 1757 vect_update_init_of_dr (dr, niters, code);
1758 }
fb85abff 1759}
1760
6753a4bf 1761/* For the information recorded in LOOP_VINFO prepare the loop for peeling
1762 by masking. This involves calculating the number of iterations to
1763 be peeled and then aligning all memory references appropriately. */
1764
1765void
1766vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
1767{
1768 tree misalign_in_elems;
1769 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
1770
1771 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
1772
1773 /* From the information recorded in LOOP_VINFO get the number of iterations
1774 that need to be skipped via masking. */
1775 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1776 {
1777 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1778 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1779 misalign_in_elems = build_int_cst (type, misalign);
1780 }
1781 else
1782 {
1783 gimple_seq seq1 = NULL, seq2 = NULL;
1784 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
1785 misalign_in_elems = fold_convert (type, misalign_in_elems);
1786 misalign_in_elems = force_gimple_operand (misalign_in_elems,
1787 &seq2, true, NULL_TREE);
1788 gimple_seq_add_seq (&seq1, seq2);
1789 if (seq1)
1790 {
1791 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1792 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
1793 gcc_assert (!new_bb);
1794 }
1795 }
1796
1797 if (dump_enabled_p ())
a4e972e3 1798 dump_printf_loc (MSG_NOTE, vect_location,
1799 "misalignment for fully-masked loop: %T\n",
1800 misalign_in_elems);
6753a4bf 1801
1802 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
1803
1804 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
1805}
fb85abff 1806
6c6a3430 1807/* This function builds ni_name = number of iterations. Statements
3a815241 1808 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1809 it to TRUE if new ssa_var is generated. */
6c6a3430 1810
1811tree
3a815241 1812vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
6c6a3430 1813{
1814 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1815 if (TREE_CODE (ni) == INTEGER_CST)
1816 return ni;
1817 else
1818 {
1819 tree ni_name, var;
1820 gimple_seq stmts = NULL;
1821 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1822
1823 var = create_tmp_var (TREE_TYPE (ni), "niters");
1824 ni_name = force_gimple_operand (ni, &stmts, false, var);
1825 if (stmts)
3a815241 1826 {
1827 gsi_insert_seq_on_edge_immediate (pe, stmts);
1828 if (new_var_p != NULL)
1829 *new_var_p = true;
1830 }
6c6a3430 1831
1832 return ni_name;
1833 }
1834}
1835
82112bf2 1836/* Calculate the number of iterations above which vectorized loop will be
1837 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1838 of prolog loop. If it's integer const, the integer number is also passed
53771608 1839 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
1840 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
1841 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
1842 threshold below which the scalar (rather than vectorized) loop will be
1843 executed. This function stores the upper bound (inclusive) of the result
1844 in BOUND_SCALAR. */
6c6a3430 1845
1846static tree
1847vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
53771608 1848 int bound_prolog, poly_int64 bound_epilog, int th,
d75596cd 1849 poly_uint64 *bound_scalar,
1850 bool check_profitability)
6c6a3430 1851{
1852 tree type = TREE_TYPE (niters_prolog);
1853 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
53771608 1854 build_int_cst (type, bound_epilog));
6c6a3430 1855
53771608 1856 *bound_scalar = bound_prolog + bound_epilog;
6c6a3430 1857 if (check_profitability)
1858 {
82112bf2 1859 /* TH indicates the minimum niters of vectorized loop, while we
1860 compute the maximum niters of scalar loop. */
1861 th--;
6c6a3430 1862 /* Peeling for constant times. */
1863 if (int_niters_prolog >= 0)
1864 {
53771608 1865 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
82112bf2 1866 return build_int_cst (type, *bound_scalar);
6c6a3430 1867 }
53771608 1868 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
1869 and BOUND_EPILOG are inclusive upper bounds. */
1870 if (known_ge (th, bound_prolog + bound_epilog))
6c6a3430 1871 {
82112bf2 1872 *bound_scalar = th;
6c6a3430 1873 return build_int_cst (type, th);
1874 }
d75596cd 1875 /* Need to do runtime comparison. */
53771608 1876 else if (maybe_gt (th, bound_epilog))
d75596cd 1877 {
1878 *bound_scalar = upper_bound (*bound_scalar, th);
1879 return fold_build2 (MAX_EXPR, type,
1880 build_int_cst (type, th), niters);
1881 }
6c6a3430 1882 }
1883 return niters;
1884}
1885
cde959e7 1886/* NITERS is the number of times that the original scalar loop executes
1887 after peeling. Work out the maximum number of iterations N that can
1888 be handled by the vectorized form of the loop and then either:
fb85abff 1889
cde959e7 1890 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
97fe80a6 1891
cde959e7 1892 niters_vector = N
1893
1894 b) set *STEP_VECTOR_PTR to one and generate:
1895
1896 niters_vector = N / vf
1897
1898 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1899 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1900 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
fb85abff 1901
1902void
6c6a3430 1903vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
cde959e7 1904 tree *niters_vector_ptr, tree *step_vector_ptr,
1905 bool niters_no_overflow)
fb85abff 1906{
6c6a3430 1907 tree ni_minus_gap, var;
cde959e7 1908 tree niters_vector, step_vector, type = TREE_TYPE (niters);
d75596cd 1909 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6c6a3430 1910 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
cde959e7 1911 tree log_vf = NULL_TREE;
6c6a3430 1912
1913 /* If epilogue loop is required because of data accesses with gaps, we
1914 subtract one iteration from the total number of iterations here for
1915 correct calculation of RATIO. */
1916 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1917 {
3a815241 1918 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1919 build_one_cst (type));
6c6a3430 1920 if (!is_gimple_val (ni_minus_gap))
1921 {
3a815241 1922 var = create_tmp_var (type, "ni_gap");
6c6a3430 1923 gimple *stmts = NULL;
1924 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1925 true, var);
1926 gsi_insert_seq_on_edge_immediate (pe, stmts);
1927 }
1928 }
1929 else
1930 ni_minus_gap = niters;
1931
d75596cd 1932 unsigned HOST_WIDE_INT const_vf;
60b29a7e 1933 if (vf.is_constant (&const_vf)
1934 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
cde959e7 1935 {
1936 /* Create: niters >> log2(vf) */
1937 /* If it's known that niters == number of latch executions + 1 doesn't
1938 overflow, we can generate niters >> log2(vf); otherwise we generate
1939 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1940 will be at least one. */
d75596cd 1941 log_vf = build_int_cst (type, exact_log2 (const_vf));
cde959e7 1942 if (niters_no_overflow)
1943 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1944 else
1945 niters_vector
1946 = fold_build2 (PLUS_EXPR, type,
1947 fold_build2 (RSHIFT_EXPR, type,
1948 fold_build2 (MINUS_EXPR, type,
1949 ni_minus_gap,
1950 build_int_cst (type, vf)),
1951 log_vf),
1952 build_int_cst (type, 1));
1953 step_vector = build_one_cst (type);
1954 }
6c6a3430 1955 else
cde959e7 1956 {
1957 niters_vector = ni_minus_gap;
1958 step_vector = build_int_cst (type, vf);
1959 }
6c6a3430 1960
1961 if (!is_gimple_val (niters_vector))
1962 {
3a815241 1963 var = create_tmp_var (type, "bnd");
1964 gimple_seq stmts = NULL;
6c6a3430 1965 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1966 gsi_insert_seq_on_edge_immediate (pe, stmts);
3a815241 1967 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1968 we set range information to make niters analyzer's life easier. */
cde959e7 1969 if (stmts != NULL && log_vf)
e3d0f65c 1970 set_range_info (niters_vector, VR_RANGE,
1971 wi::to_wide (build_int_cst (type, 1)),
1972 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1973 TYPE_MAX_VALUE (type),
1974 log_vf)));
6c6a3430 1975 }
1976 *niters_vector_ptr = niters_vector;
cde959e7 1977 *step_vector_ptr = step_vector;
6c6a3430 1978
1979 return;
1980}
1981
1982/* Given NITERS_VECTOR which is the number of iterations for vectorized
1983 loop specified by LOOP_VINFO after vectorization, compute the number
1984 of iterations before vectorization (niters_vector * vf) and store it
1985 to NITERS_VECTOR_MULT_VF_PTR. */
1986
1987static void
1988vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1989 tree niters_vector,
1990 tree *niters_vector_mult_vf_ptr)
1991{
d75596cd 1992 /* We should be using a step_vector of VF if VF is variable. */
1993 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
fb85abff 1994 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6c6a3430 1995 tree type = TREE_TYPE (niters_vector);
1996 tree log_vf = build_int_cst (type, exact_log2 (vf));
1997 basic_block exit_bb = single_exit (loop)->dest;
fb85abff 1998
6c6a3430 1999 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2000 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2001 niters_vector, log_vf);
2002 if (!is_gimple_val (niters_vector_mult_vf))
2003 {
2004 tree var = create_tmp_var (type, "niters_vector_mult_vf");
2005 gimple_seq stmts = NULL;
2006 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2007 &stmts, true, var);
2008 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2009 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2010 }
2011 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2012}
2013
2014/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2015 from SECOND/FIRST and puts it at the original loop's preheader/exit
2016 edge, the two loops are arranged as below:
2017
2018 preheader_a:
2019 first_loop:
2020 header_a:
2021 i_1 = PHI<i_0, i_2>;
2022 ...
2023 i_2 = i_1 + 1;
2024 if (cond_a)
2025 goto latch_a;
2026 else
2027 goto between_bb;
2028 latch_a:
2029 goto header_a;
2030
2031 between_bb:
2032 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
2033
2034 second_loop:
2035 header_b:
2036 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2037 or with i_2 if no LCSSA phi is created
2038 under condition of CREATE_LCSSA_FOR_IV_PHIS.
2039 ...
2040 i_4 = i_3 + 1;
2041 if (cond_b)
2042 goto latch_b;
2043 else
2044 goto exit_bb;
2045 latch_b:
2046 goto header_b;
2047
2048 exit_bb:
2049
2050 This function creates loop closed SSA for the first loop; update the
2051 second loop's PHI nodes by replacing argument on incoming edge with the
2052 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
2053 is false, Loop closed ssa phis will only be created for non-iv phis for
2054 the first loop.
2055
2056 This function assumes exit bb of the first loop is preheader bb of the
2057 second loop, i.e, between_bb in the example code. With PHIs updated,
2058 the second loop will execute rest iterations of the first. */
fb85abff 2059
6c6a3430 2060static void
2061slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2062 struct loop *first, struct loop *second,
2063 bool create_lcssa_for_iv_phis)
2064{
2065 gphi_iterator gsi_update, gsi_orig;
2066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2067
2068 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2069 edge second_preheader_e = loop_preheader_edge (second);
2070 basic_block between_bb = single_exit (first)->dest;
2071
2072 gcc_assert (between_bb == second_preheader_e->src);
2073 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2074 /* Either the first loop or the second is the loop to be vectorized. */
2075 gcc_assert (loop == first || loop == second);
2076
2077 for (gsi_orig = gsi_start_phis (first->header),
2078 gsi_update = gsi_start_phis (second->header);
2079 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2080 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2081 {
2082 gphi *orig_phi = gsi_orig.phi ();
2083 gphi *update_phi = gsi_update.phi ();
2084
2085 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2086 /* Generate lcssa PHI node for the first loop. */
2087 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
e068828a 2088 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
2089 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
6c6a3430 2090 {
2091 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2092 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2093 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2094 arg = new_res;
2095 }
2096
2097 /* Update PHI node in the second loop by replacing arg on the loop's
2098 incoming edge. */
2099 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2100 }
2101}
2102
2103/* Function slpeel_add_loop_guard adds guard skipping from the beginning
2104 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2105 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2106 appear like below:
2107
2108 guard_bb:
2109 if (cond)
2110 goto merge_bb;
2111 else
2112 goto skip_loop;
2113
2114 skip_loop:
2115 header_a:
2116 i_1 = PHI<i_0, i_2>;
2117 ...
2118 i_2 = i_1 + 1;
2119 if (cond_a)
2120 goto latch_a;
2121 else
2122 goto exit_a;
2123 latch_a:
2124 goto header_a;
2125
2126 exit_a:
2127 i_5 = PHI<i_2>;
2128
2129 merge_bb:
2130 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2131
2132 update_loop:
2133 header_b:
2134 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2135 ...
2136 i_4 = i_3 + 1;
2137 if (cond_b)
2138 goto latch_b;
2139 else
2140 goto exit_bb;
2141 latch_b:
2142 goto header_b;
2143
2144 exit_bb:
2145
2146 This function creates PHI nodes at merge_bb and replaces the use of i_5
2147 in the update_loop's PHI node with the result of new PHI result. */
2148
2149static void
2150slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
2151 struct loop *update_loop,
2152 edge guard_edge, edge merge_edge)
2153{
be1e7283 2154 location_t merge_loc, guard_loc;
6c6a3430 2155 edge orig_e = loop_preheader_edge (skip_loop);
2156 edge update_e = loop_preheader_edge (update_loop);
2157 gphi_iterator gsi_orig, gsi_update;
2158
2159 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2160 gsi_update = gsi_start_phis (update_loop->header));
2161 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2162 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2163 {
2164 gphi *orig_phi = gsi_orig.phi ();
2165 gphi *update_phi = gsi_update.phi ();
2166
2167 /* Generate new phi node at merge bb of the guard. */
2168 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2169 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2170
2171 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2172 args in NEW_PHI for these edges. */
2173 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2174 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2175 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2176 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2177 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2178 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2179
2180 /* Update phi in UPDATE_PHI. */
2181 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2182 }
2183}
2184
2185/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2186 this function searches for the corresponding lcssa phi node in exit
2187 bb of LOOP. If it is found, return the phi result; otherwise return
2188 NULL. */
2189
2190static tree
2191find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
2192 gphi *lcssa_phi)
2193{
2194 gphi_iterator gsi;
2195 edge e = single_exit (loop);
2196
2197 gcc_assert (single_pred_p (e->dest));
2198 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2199 {
2200 gphi *phi = gsi.phi ();
2201 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2202 PHI_ARG_DEF (lcssa_phi, 0), 0))
2203 return PHI_RESULT (phi);
2204 }
2205 return NULL_TREE;
2206}
2207
2208/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2209 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2210 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2211 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2212 The CFG looks like:
2213
2214 loop:
2215 header_a:
2216 i_1 = PHI<i_0, i_2>;
2217 ...
2218 i_2 = i_1 + 1;
2219 if (cond_a)
2220 goto latch_a;
2221 else
2222 goto exit_a;
2223 latch_a:
2224 goto header_a;
2225
2226 exit_a:
2227
2228 guard_bb:
2229 if (cond)
2230 goto merge_bb;
2231 else
2232 goto epilog_loop;
2233
2234 ;; fall_through_bb
2235
2236 epilog_loop:
2237 header_b:
2238 i_3 = PHI<i_2, i_4>;
2239 ...
2240 i_4 = i_3 + 1;
2241 if (cond_b)
2242 goto latch_b;
2243 else
2244 goto merge_bb;
2245 latch_b:
2246 goto header_b;
2247
2248 merge_bb:
2249 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2250
2251 exit_bb:
2252 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2253
2254 For each name used out side EPILOG (i.e - for each name that has a lcssa
2255 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2256 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2257 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2258 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2259 in exit_bb will also be updated. */
2260
2261static void
2262slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
2263 edge guard_edge, edge merge_edge)
2264{
2265 gphi_iterator gsi;
2266 basic_block merge_bb = guard_edge->dest;
2267
2268 gcc_assert (single_succ_p (merge_bb));
2269 edge e = single_succ_edge (merge_bb);
2270 basic_block exit_bb = e->dest;
2271 gcc_assert (single_pred_p (exit_bb));
2272 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2273
2274 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2275 {
2276 gphi *update_phi = gsi.phi ();
2277 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2278 /* This loop-closed-phi actually doesn't represent a use out of the
2279 loop - the phi arg is a constant. */
2280 if (TREE_CODE (old_arg) != SSA_NAME)
2281 continue;
2282
2283 tree merge_arg = get_current_def (old_arg);
2284 if (!merge_arg)
2285 merge_arg = old_arg;
2286
2287 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2288 /* If the var is live after loop but not a reduction, we simply
2289 use the old arg. */
2290 if (!guard_arg)
2291 guard_arg = old_arg;
2292
2293 /* Create new phi node in MERGE_BB: */
2294 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2295 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2296
2297 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2298 the two PHI args in merge_phi for these edges. */
2299 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2300 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2301
2302 /* Update the original phi in exit_bb. */
2303 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2304 }
2305}
2306
2307/* EPILOG loop is duplicated from the original loop for vectorizing,
2308 the arg of its loop closed ssa PHI needs to be updated. */
2309
2310static void
2311slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
2312{
2313 gphi_iterator gsi;
2314 basic_block exit_bb = single_exit (epilog)->dest;
2315
2316 gcc_assert (single_pred_p (exit_bb));
2317 edge e = EDGE_PRED (exit_bb, 0);
2318 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2319 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2320}
2321
2322/* Function vect_do_peeling.
2323
2324 Input:
2325 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2326
2327 preheader:
2328 LOOP:
2329 header_bb:
2330 loop_body
2331 if (exit_loop_cond) goto exit_bb
2332 else goto header_bb
2333 exit_bb:
2334
2335 - NITERS: The number of iterations of the loop.
2336 - NITERSM1: The number of iterations of the loop's latch.
2337 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2338 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2339 CHECK_PROFITABILITY is true.
2340 Output:
cde959e7 2341 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
60b29a7e 2342 iterate after vectorization; see vect_set_loop_condition for details.
cde959e7 2343 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2344 should be set to the number of scalar iterations handled by the
2345 vector loop. The SSA name is only used on exit from the loop.
6c6a3430 2346
2347 This function peels prolog and epilog from the loop, adds guards skipping
2348 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2349 would look like:
2350
2351 guard_bb_1:
2352 if (prefer_scalar_loop) goto merge_bb_1
2353 else goto guard_bb_2
2354
2355 guard_bb_2:
2356 if (skip_prolog) goto merge_bb_2
2357 else goto prolog_preheader
2358
2359 prolog_preheader:
2360 PROLOG:
2361 prolog_header_bb:
2362 prolog_body
2363 if (exit_prolog_cond) goto prolog_exit_bb
2364 else goto prolog_header_bb
2365 prolog_exit_bb:
2366
2367 merge_bb_2:
2368
2369 vector_preheader:
2370 VECTOR LOOP:
2371 vector_header_bb:
2372 vector_body
2373 if (exit_vector_cond) goto vector_exit_bb
2374 else goto vector_header_bb
2375 vector_exit_bb:
2376
2377 guard_bb_3:
2378 if (skip_epilog) goto merge_bb_3
2379 else goto epilog_preheader
2380
2381 merge_bb_1:
2382
2383 epilog_preheader:
2384 EPILOG:
2385 epilog_header_bb:
2386 epilog_body
2387 if (exit_epilog_cond) goto merge_bb_3
2388 else goto epilog_header_bb
2389
2390 merge_bb_3:
2391
2392 Note this function peels prolog and epilog only if it's necessary,
2393 as well as guards.
5b631e09 2394 Returns created epilogue or NULL.
6c6a3430 2395
2396 TODO: Guard for prefer_scalar_loop should be emitted along with
2397 versioning conditions if loop versioning is needed. */
2398
5b631e09 2399
2400struct loop *
6c6a3430 2401vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
cde959e7 2402 tree *niters_vector, tree *step_vector,
2403 tree *niters_vector_mult_vf_var, int th,
2404 bool check_profitability, bool niters_no_overflow)
6c6a3430 2405{
2406 edge e, guard_e;
2407 tree type = TREE_TYPE (niters), guard_cond;
2408 basic_block guard_bb, guard_to;
720cfc43 2409 profile_probability prob_prolog, prob_vector, prob_epilog;
d75596cd 2410 int estimated_vf;
6753a4bf 2411 int prolog_peeling = 0;
e092c20e 2412 /* We currently do not support prolog peeling if the target alignment is not
2413 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
2414 target alignment being constant. */
2415 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2416 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2417 return NULL;
2418
6753a4bf 2419 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2420 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
53771608 2421
2422 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2423 poly_uint64 bound_epilog = 0;
2424 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2425 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2426 bound_epilog += vf - 1;
2427 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2428 bound_epilog += 1;
2429 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
2430 poly_uint64 bound_scalar = bound_epilog;
6c6a3430 2431
2432 if (!prolog_peeling && !epilog_peeling)
5b631e09 2433 return NULL;
6c6a3430 2434
720cfc43 2435 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
d75596cd 2436 estimated_vf = vect_vf_for_cost (loop_vinfo);
2437 if (estimated_vf == 2)
2438 estimated_vf = 3;
720cfc43 2439 prob_prolog = prob_epilog = profile_probability::guessed_always ()
d75596cd 2440 .apply_scale (estimated_vf - 1, estimated_vf);
6c6a3430 2441
5b631e09 2442 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
6c6a3430 2443 struct loop *first_loop = loop;
15492f79 2444 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
6c6a3430 2445 create_lcssa_for_virtual_phi (loop);
2446 update_ssa (TODO_update_ssa_only_virtuals);
2447
c64f38bf 2448 if (MAY_HAVE_DEBUG_BIND_STMTS)
6c6a3430 2449 {
2450 gcc_assert (!adjust_vec.exists ());
2451 adjust_vec.create (32);
2452 }
fb85abff 2453 initialize_original_copy_tables ();
2454
53771608 2455 /* Record the anchor bb at which the guard should be placed if the scalar
2456 loop might be preferred. */
2457 basic_block anchor = loop_preheader_edge (loop)->src;
2458
2459 /* Generate the number of iterations for the prolog loop. We do this here
2460 so that we can also get the upper bound on the number of iterations. */
2461 tree niters_prolog;
2462 int bound_prolog = 0;
2463 if (prolog_peeling)
2464 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2465 &bound_prolog);
2466 else
2467 niters_prolog = build_int_cst (type, 0);
2468
6c6a3430 2469 /* Prolog loop may be skipped. */
2470 bool skip_prolog = (prolog_peeling != 0);
32236f80 2471 /* Skip to epilog if scalar loop may be preferred. It's only needed
2472 when we peel for epilog loop and when it hasn't been checked with
2473 loop versioning. */
53771608 2474 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2475 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2476 bound_prolog + bound_epilog)
2477 : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
6c6a3430 2478 /* Epilog loop must be executed if the number of iterations for epilog
2479 loop is known at compile time, otherwise we need to add a check at
2480 the end of vector loop and skip to the end of epilog loop. */
2481 bool skip_epilog = (prolog_peeling < 0
d75596cd 2482 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2483 || !vf.is_constant ());
6c6a3430 2484 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
2485 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2486 skip_epilog = false;
2487
6c6a3430 2488 if (skip_vector)
c214c858 2489 {
2490 split_edge (loop_preheader_edge (loop));
2491
2492 /* Due to the order in which we peel prolog and epilog, we first
2493 propagate probability to the whole loop. The purpose is to
2494 avoid adjusting probabilities of both prolog and vector loops
2495 separately. Note in this case, the probability of epilog loop
2496 needs to be scaled back later. */
2497 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
720cfc43 2498 if (prob_vector.initialized_p ())
d75596cd 2499 {
2500 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
2501 scale_loop_profile (loop, prob_vector, 0);
2502 }
c214c858 2503 }
6c6a3430 2504
c309657f 2505 dump_user_location_t loop_loc = find_loop_location (loop);
6c6a3430 2506 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2507 if (prolog_peeling)
2f630015 2508 {
6c6a3430 2509 e = loop_preheader_edge (loop);
2510 if (!slpeel_can_duplicate_loop_p (loop, e))
2f630015 2511 {
6c6a3430 2512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2513 "loop can't be duplicated to preheader edge.\n");
2514 gcc_unreachable ();
2515 }
2516 /* Peel prolog and put it on preheader edge of loop. */
2517 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2518 if (!prolog)
2519 {
2520 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2521 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2522 gcc_unreachable ();
2523 }
44245620 2524 prolog->force_vectorize = false;
6c6a3430 2525 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
2526 first_loop = prolog;
2527 reset_original_copy_tables ();
2528
53771608 2529 /* Update the number of iterations for prolog loop. */
cde959e7 2530 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
60b29a7e 2531 vect_set_loop_condition (prolog, NULL, niters_prolog,
2532 step_prolog, NULL_TREE, false);
6c6a3430 2533
2534 /* Skip the prolog loop. */
2535 if (skip_prolog)
2536 {
2537 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2538 niters_prolog, build_int_cst (type, 0));
2539 guard_bb = loop_preheader_edge (prolog)->src;
c214c858 2540 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
6c6a3430 2541 guard_to = split_edge (loop_preheader_edge (loop));
2542 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2543 guard_to, guard_bb,
720cfc43 2544 prob_prolog.invert (),
15492f79 2545 irred_flag);
6c6a3430 2546 e = EDGE_PRED (guard_to, 0);
2547 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2548 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
c214c858 2549
ca69b069 2550 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
2551 scale_loop_profile (prolog, prob_prolog, bound_prolog);
6c6a3430 2552 }
2553 /* Update init address of DRs. */
6753a4bf 2554 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
6c6a3430 2555 /* Update niters for vector loop. */
2556 LOOP_VINFO_NITERS (loop_vinfo)
2557 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
2558 LOOP_VINFO_NITERSM1 (loop_vinfo)
2559 = fold_build2 (MINUS_EXPR, type,
2560 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3a815241 2561 bool new_var_p = false;
2562 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
2563 /* It's guaranteed that vector loop bound before vectorization is at
2564 least VF, so set range information for newly generated var. */
2565 if (new_var_p)
2566 set_range_info (niters, VR_RANGE,
e3d0f65c 2567 wi::to_wide (build_int_cst (type, vf)),
2568 wi::to_wide (TYPE_MAX_VALUE (type)));
6c6a3430 2569
82112bf2 2570 /* Prolog iterates at most bound_prolog times, latch iterates at
2571 most bound_prolog - 1 times. */
2572 record_niter_bound (prolog, bound_prolog - 1, false, true);
6c6a3430 2573 delete_update_ssa ();
2574 adjust_vec_debug_stmts ();
2575 scev_reset ();
2f630015 2576 }
2577
6c6a3430 2578 if (epilog_peeling)
2579 {
2580 e = single_exit (loop);
2581 if (!slpeel_can_duplicate_loop_p (loop, e))
2582 {
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2584 "loop can't be duplicated to exit edge.\n");
2585 gcc_unreachable ();
2586 }
2587 /* Peel epilog and put it on exit edge of loop. */
2588 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2589 if (!epilog)
2590 {
2591 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2592 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2593 gcc_unreachable ();
2594 }
44245620 2595 epilog->force_vectorize = false;
6c6a3430 2596 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
2597
2598 /* Scalar version loop may be preferred. In this case, add guard
2599 and skip to epilog. Note this only happens when the number of
2600 iterations of loop is unknown at compile time, otherwise this
2601 won't be vectorized. */
2602 if (skip_vector)
2603 {
82112bf2 2604 /* Additional epilogue iteration is peeled if gap exists. */
6c6a3430 2605 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
53771608 2606 bound_prolog, bound_epilog,
82112bf2 2607 th, &bound_scalar,
6c6a3430 2608 check_profitability);
82112bf2 2609 /* Build guard against NITERSM1 since NITERS may overflow. */
2610 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
6c6a3430 2611 guard_bb = anchor;
2612 guard_to = split_edge (loop_preheader_edge (epilog));
2613 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2614 guard_to, guard_bb,
720cfc43 2615 prob_vector.invert (),
15492f79 2616 irred_flag);
6c6a3430 2617 e = EDGE_PRED (guard_to, 0);
2618 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2619 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
c214c858 2620
2621 /* Simply propagate profile info from guard_bb to guard_to which is
2622 a merge point of control flow. */
c214c858 2623 guard_to->count = guard_bb->count;
dcc86e2d 2624
ca69b069 2625 /* Scale probability of epilog loop back.
2626 FIXME: We should avoid scaling down and back up. Profile may
2627 get lost if we scale down to 0. */
12b55cc8 2628 basic_block *bbs = get_loop_body (epilog);
dcc86e2d 2629 for (unsigned int i = 0; i < epilog->num_nodes; i++)
2630 bbs[i]->count = bbs[i]->count.apply_scale
2631 (bbs[i]->count,
2632 bbs[i]->count.apply_probability
2633 (prob_vector));
ca69b069 2634 free (bbs);
6c6a3430 2635 }
fb85abff 2636
c214c858 2637 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
6c6a3430 2638 tree niters_vector_mult_vf;
2639 /* If loop is peeled for non-zero constant times, now niters refers to
2640 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2641 overflows. */
2642 niters_no_overflow |= (prolog_peeling > 0);
2643 vect_gen_vector_loop_niters (loop_vinfo, niters,
cde959e7 2644 niters_vector, step_vector,
2645 niters_no_overflow);
2646 if (!integer_onep (*step_vector))
2647 {
2648 /* On exit from the loop we will have an easy way of calcalating
2649 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2650 until then. */
2651 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2652 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2653 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2654 }
2655 else
2656 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2657 &niters_vector_mult_vf);
6c6a3430 2658 /* Update IVs of original loop as if they were advanced by
2659 niters_vector_mult_vf steps. */
2660 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2661 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2662 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2663 update_e);
2664
2665 if (skip_epilog)
2666 {
2667 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2668 niters, niters_vector_mult_vf);
2669 guard_bb = single_exit (loop)->dest;
2670 guard_to = split_edge (single_exit (epilog));
2671 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2672 skip_vector ? anchor : guard_bb,
720cfc43 2673 prob_epilog.invert (),
15492f79 2674 irred_flag);
6c6a3430 2675 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2676 single_exit (epilog));
c214c858 2677 /* Only need to handle basic block before epilog loop if it's not
2678 the guard_bb, which is the case when skip_vector is true. */
2679 if (guard_bb != bb_before_epilog)
2680 {
720cfc43 2681 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
c214c858 2682
ca69b069 2683 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
c214c858 2684 }
d75596cd 2685 scale_loop_profile (epilog, prob_epilog, 0);
6c6a3430 2686 }
2687 else
2688 slpeel_update_phi_nodes_for_lcssa (epilog);
fb85abff 2689
53771608 2690 unsigned HOST_WIDE_INT bound;
2691 if (bound_scalar.is_constant (&bound))
d75596cd 2692 {
53771608 2693 gcc_assert (bound != 0);
2694 /* -1 to convert loop iterations to latch iterations. */
2695 record_niter_bound (epilog, bound - 1, false, true);
d75596cd 2696 }
6c6a3430 2697
2698 delete_update_ssa ();
2699 adjust_vec_debug_stmts ();
2700 scev_reset ();
2701 }
2702 adjust_vec.release ();
fb85abff 2703 free_original_copy_tables ();
5b631e09 2704
2705 return epilog;
fb85abff 2706}
2707
d5e80d93 2708/* Function vect_create_cond_for_niters_checks.
2709
2710 Create a conditional expression that represents the run-time checks for
f9936b7c 2711 loop's niter. The loop is guaranteed to terminate if the run-time
d5e80d93 2712 checks hold.
2713
2714 Input:
2715 COND_EXPR - input conditional expression. New conditions will be chained
2716 with logical AND operation. If it is NULL, then the function
2717 is used to return the number of alias checks.
2718 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2719 to be checked.
2720
2721 Output:
2722 COND_EXPR - conditional expression.
2723
2724 The returned COND_EXPR is the conditional expression to be used in the
2725 if statement that controls which version of the loop gets executed at
2726 runtime. */
2727
2728static void
2729vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2730{
2731 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2732
2733 if (*cond_expr)
2734 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2735 *cond_expr, part_cond_expr);
2736 else
2737 *cond_expr = part_cond_expr;
2738}
fb85abff 2739
f68a7726 2740/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2741 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2742
2743static void
2744chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2745{
2746 if (*cond_expr)
2747 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2748 *cond_expr, part_cond_expr);
2749 else
2750 *cond_expr = part_cond_expr;
2751}
2752
fb85abff 2753/* Function vect_create_cond_for_align_checks.
2754
2755 Create a conditional expression that represents the alignment checks for
2756 all of data references (array element references) whose alignment must be
2757 checked at runtime.
2758
2759 Input:
2760 COND_EXPR - input conditional expression. New conditions will be chained
2761 with logical AND operation.
2762 LOOP_VINFO - two fields of the loop information are used.
2763 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2764 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2765
2766 Output:
2767 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2768 expression.
2769 The returned value is the conditional expression to be used in the if
2770 statement that controls which version of the loop gets executed at runtime.
2771
2772 The algorithm makes two assumptions:
2773 1) The number of bytes "n" in a vector is a power of 2.
2774 2) An address "a" is aligned if a%n is zero and that this
2775 test can be done as a&(n-1) == 0. For example, for 16
2776 byte vectors the test is a&0xf == 0. */
2777
2778static void
2779vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2780 tree *cond_expr,
2781 gimple_seq *cond_expr_stmt_list)
2782{
ab98e625 2783 vec<stmt_vec_info> may_misalign_stmts
fb85abff 2784 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
ab98e625 2785 stmt_vec_info stmt_info;
fb85abff 2786 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2787 tree mask_cst;
2788 unsigned int i;
fb85abff 2789 tree int_ptrsize_type;
2790 char tmp_name[20];
2791 tree or_tmp_name = NULL_TREE;
03d37e4e 2792 tree and_tmp_name;
42acab1c 2793 gimple *and_stmt;
fb85abff 2794 tree ptrsize_zero;
2795 tree part_cond_expr;
2796
2797 /* Check that mask is one less than a power of 2, i.e., mask is
2798 all zeros followed by all ones. */
2799 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2800
3cea8318 2801 int_ptrsize_type = signed_type_for (ptr_type_node);
fb85abff 2802
2803 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2804 of the first vector of the i'th data reference. */
2805
ab98e625 2806 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
fb85abff 2807 {
2808 gimple_seq new_stmt_list = NULL;
2809 tree addr_base;
03d37e4e 2810 tree addr_tmp_name;
2811 tree new_or_tmp_name;
42acab1c 2812 gimple *addr_stmt, *or_stmt;
ab98e625 2813 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f1b8c740 2814 bool negative = tree_int_cst_compare
ab98e625 2815 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
f1b8c740 2816 tree offset = negative
80b8a97a 2817 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
fb85abff 2818
2819 /* create: addr_tmp = (int)(address_of_first_vector) */
2820 addr_base =
ab98e625 2821 vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
9e879814 2822 offset);
fb85abff 2823 if (new_stmt_list != NULL)
2824 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2825
03d37e4e 2826 sprintf (tmp_name, "addr2int%d", i);
2827 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
e9cf809e 2828 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
fb85abff 2829 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2830
2831 /* The addresses are OR together. */
2832
2833 if (or_tmp_name != NULL_TREE)
2834 {
2835 /* create: or_tmp = or_tmp | addr_tmp */
03d37e4e 2836 sprintf (tmp_name, "orptrs%d", i);
2837 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
e9cf809e 2838 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2839 or_tmp_name, addr_tmp_name);
fb85abff 2840 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2841 or_tmp_name = new_or_tmp_name;
2842 }
2843 else
2844 or_tmp_name = addr_tmp_name;
2845
2846 } /* end for i */
2847
2848 mask_cst = build_int_cst (int_ptrsize_type, mask);
2849
2850 /* create: and_tmp = or_tmp & mask */
03d37e4e 2851 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
fb85abff 2852
e9cf809e 2853 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2854 or_tmp_name, mask_cst);
fb85abff 2855 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2856
2857 /* Make and_tmp the left operand of the conditional test against zero.
2858 if and_tmp has a nonzero bit then some address is unaligned. */
2859 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2860 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2861 and_tmp_name, ptrsize_zero);
f68a7726 2862 chain_cond_expr (cond_expr, part_cond_expr);
2863}
2864
2865/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2866 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2867 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2868 and this new condition are true. Treat a null *COND_EXPR as "true". */
2869
2870static void
2871vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2872{
2873 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2874 unsigned int i;
2875 vec_object_pair *pair;
2876 FOR_EACH_VEC_ELT (pairs, i, pair)
2877 {
2878 tree addr1 = build_fold_addr_expr (pair->first);
2879 tree addr2 = build_fold_addr_expr (pair->second);
2880 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2881 addr1, addr2);
2882 chain_cond_expr (cond_expr, part_cond_expr);
2883 }
fb85abff 2884}
2885
e85b4a5e 2886/* Create an expression that is true when all lower-bound conditions for
2887 the vectorized loop are met. Chain this condition with *COND_EXPR. */
2888
2889static void
2890vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
2891{
2892 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
2893 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
2894 {
2895 tree expr = lower_bounds[i].expr;
2896 tree type = unsigned_type_for (TREE_TYPE (expr));
2897 expr = fold_convert (type, expr);
2898 poly_uint64 bound = lower_bounds[i].min_value;
2899 if (!lower_bounds[i].unsigned_p)
2900 {
2901 expr = fold_build2 (PLUS_EXPR, type, expr,
2902 build_int_cstu (type, bound - 1));
2903 bound += bound - 1;
2904 }
2905 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
2906 build_int_cstu (type, bound));
2907 chain_cond_expr (cond_expr, part_cond_expr);
2908 }
2909}
2910
fb85abff 2911/* Function vect_create_cond_for_alias_checks.
2912
2913 Create a conditional expression that represents the run-time checks for
2914 overlapping of address ranges represented by a list of data references
2915 relations passed as input.
2916
2917 Input:
2918 COND_EXPR - input conditional expression. New conditions will be chained
8a7b0f48 2919 with logical AND operation. If it is NULL, then the function
2920 is used to return the number of alias checks.
fb85abff 2921 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2922 to be checked.
2923
2924 Output:
2925 COND_EXPR - conditional expression.
fb85abff 2926
8a7b0f48 2927 The returned COND_EXPR is the conditional expression to be used in the if
fb85abff 2928 statement that controls which version of the loop gets executed at runtime.
2929*/
2930
8a7b0f48 2931void
90d4c4af 2932vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
fb85abff 2933{
43d14b66 2934 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
8a7b0f48 2935 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
fb85abff 2936
8a7b0f48 2937 if (comp_alias_ddrs.is_empty ())
fb85abff 2938 return;
2939
49ce332c 2940 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2941 &comp_alias_ddrs, cond_expr);
6d8fb6cf 2942 if (dump_enabled_p ())
b055bc88 2943 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 2944 "created %u versioning for alias checks.\n",
8a7b0f48 2945 comp_alias_ddrs.length ());
fb85abff 2946}
2947
2948
2949/* Function vect_loop_versioning.
48e1416a 2950
fb85abff 2951 If the loop has data references that may or may not be aligned or/and
2952 has data reference relations whose independence was not proven then
2953 two versions of the loop need to be generated, one which is vectorized
2954 and one which isn't. A test is then generated to control which of the
2955 loops is executed. The test checks for the alignment of all of the
2956 data references that may or may not be aligned. An additional
2957 sequence of runtime tests is generated for each pairs of DDRs whose
48e1416a 2958 independence was not proven. The vectorized version of loop is
2959 executed only if both alias and alignment tests are passed.
2960
fb85abff 2961 The test generated to check which version of loop is executed
48e1416a 2962 is modified to also check for profitability as indicated by the
97fe80a6 2963 cost model threshold TH.
23a3430d 2964
2965 The versioning precondition(s) are placed in *COND_EXPR and
2afdcbed 2966 *COND_EXPR_STMT_LIST. */
fb85abff 2967
44245620 2968struct loop *
e7430948 2969vect_loop_versioning (loop_vec_info loop_vinfo,
7456a7ea 2970 unsigned int th, bool check_profitability,
2971 poly_uint64 versioning_threshold)
fb85abff 2972{
d5e80d93 2973 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
c71d3c24 2974 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
fb85abff 2975 basic_block condition_bb;
1a91d914 2976 gphi_iterator gsi;
2977 gimple_stmt_iterator cond_exp_gsi;
fb85abff 2978 basic_block merge_bb;
2979 basic_block new_exit_bb;
2980 edge new_exit_e, e;
1a91d914 2981 gphi *orig_phi, *new_phi;
e7430948 2982 tree cond_expr = NULL_TREE;
2afdcbed 2983 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 2984 tree arg;
ca69b069 2985 profile_probability prob = profile_probability::likely ();
fb85abff 2986 gimple_seq gimplify_stmt_list = NULL;
b622227e 2987 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
6ee2edad 2988 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2989 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
d5e80d93 2990 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
1d86b8dc 2991 tree version_simd_if_cond
2992 = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
fb85abff 2993
e7430948 2994 if (check_profitability)
ba12948e 2995 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
d5e80d93 2996 build_int_cst (TREE_TYPE (scalar_loop_iters),
b622227e 2997 th - 1));
7456a7ea 2998 if (maybe_ne (versioning_threshold, 0U))
2999 {
3000 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3001 build_int_cst (TREE_TYPE (scalar_loop_iters),
3002 versioning_threshold - 1));
3003 if (cond_expr)
3004 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3005 expr, cond_expr);
3006 else
3007 cond_expr = expr;
3008 }
d5e80d93 3009
3010 if (version_niter)
3011 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3012
3013 if (cond_expr)
0decb676 3014 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3015 &cond_expr_stmt_list,
d5e80d93 3016 is_gimple_condexpr, NULL_TREE);
fb85abff 3017
6ee2edad 3018 if (version_align)
2afdcbed 3019 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3020 &cond_expr_stmt_list);
fb85abff 3021
6ee2edad 3022 if (version_alias)
f68a7726 3023 {
3024 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
e85b4a5e 3025 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
f68a7726 3026 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3027 }
23a3430d 3028
1d86b8dc 3029 if (version_simd_if_cond)
3030 {
3031 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
3032 if (flag_checking)
3033 if (basic_block bb
3034 = gimple_bb (SSA_NAME_DEF_STMT (version_simd_if_cond)))
3035 gcc_assert (bb != loop->header
3036 && dominated_by_p (CDI_DOMINATORS, loop->header, bb)
3037 && (scalar_loop == NULL
3038 || (bb != scalar_loop->header
3039 && dominated_by_p (CDI_DOMINATORS,
3040 scalar_loop->header, bb))));
3041 tree zero = build_zero_cst (TREE_TYPE (version_simd_if_cond));
3042 tree c = fold_build2 (NE_EXPR, boolean_type_node,
3043 version_simd_if_cond, zero);
3044 if (cond_expr)
3045 cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
3046 c, cond_expr);
3047 else
3048 cond_expr = c;
3049 if (dump_enabled_p ())
3050 dump_printf_loc (MSG_NOTE, vect_location,
3051 "created versioning for simd if condition check.\n");
3052 }
3053
f404501a 3054 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3055 &gimplify_stmt_list,
2afdcbed 3056 is_gimple_condexpr, NULL_TREE);
3057 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
fb85abff 3058
0decb676 3059 /* Compute the outermost loop cond_expr and cond_expr_stmt_list are
3060 invariant in. */
3061 struct loop *outermost = outermost_invariant_loop_for_expr (loop, cond_expr);
3062 for (gimple_stmt_iterator gsi = gsi_start (cond_expr_stmt_list);
3063 !gsi_end_p (gsi); gsi_next (&gsi))
c71d3c24 3064 {
0decb676 3065 gimple *stmt = gsi_stmt (gsi);
3066 update_stmt (stmt);
3067 ssa_op_iter iter;
3068 use_operand_p use_p;
3069 basic_block def_bb;
3070 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
3071 if ((def_bb = gimple_bb (SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p))))
3072 && flow_bb_inside_loop_p (outermost, def_bb))
3073 outermost = superloop_at_depth (loop, bb_loop_depth (def_bb) + 1);
3074 }
c71d3c24 3075
0decb676 3076 /* Search for the outermost loop we can version. Avoid versioning of
3077 non-perfect nests but allow if-conversion versioned loops inside. */
3078 struct loop *loop_to_version = loop;
3079 if (flow_loop_nested_p (outermost, loop))
3080 {
3081 if (dump_enabled_p ())
3082 dump_printf_loc (MSG_NOTE, vect_location,
3083 "trying to apply versioning to outer loop %d\n",
3084 outermost->num);
3085 if (outermost->num == 0)
3086 outermost = superloop_at_depth (loop, 1);
3087 /* And avoid applying versioning on non-perfect nests. */
3088 while (loop_to_version != outermost
3089 && single_exit (loop_outer (loop_to_version))
3090 && (!loop_outer (loop_to_version)->inner->next
3091 || vect_loop_vectorized_call (loop_to_version))
3092 && (!loop_outer (loop_to_version)->inner->next
3093 || !loop_outer (loop_to_version)->inner->next->next))
3094 loop_to_version = loop_outer (loop_to_version);
3095 }
3096
3097 /* Apply versioning. If there is already a scalar version created by
3098 if-conversion re-use that. */
3099 gcond *cond;
3100 if (gimple *call = vect_loop_vectorized_call (loop_to_version, &cond))
3101 {
3102 gcc_assert (scalar_loop);
3103 condition_bb = gimple_bb (cond);
3104 gimple_cond_set_condition_from_tree (cond, cond_expr);
3105 update_stmt (cond);
3106
3107 if (cond_expr_stmt_list)
3108 {
3109 cond_exp_gsi = gsi_for_stmt (call);
3110 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3111 GSI_SAME_STMT);
3112 }
3113
3114 /* ??? if-conversion uses profile_probability::always () but
3115 prob below is profile_probability::likely (). */
3116 nloop = scalar_loop;
3117 if (dump_enabled_p ())
3118 dump_printf_loc (MSG_NOTE, vect_location,
3119 "reusing %sloop version created by if conversion\n",
3120 loop_to_version != loop ? "outer " : "");
c71d3c24 3121 }
3122 else
0decb676 3123 {
3124 if (loop_to_version != loop
3125 && dump_enabled_p ())
3126 dump_printf_loc (MSG_NOTE, vect_location,
3127 "applying loop versioning to outer loop %d\n",
3128 loop_to_version->num);
3129
3130 initialize_original_copy_tables ();
3131 nloop = loop_version (loop_to_version, cond_expr, &condition_bb,
3132 prob, prob.invert (), prob, prob.invert (), true);
3133 gcc_assert (nloop);
3134 nloop = get_loop_copy (loop);
3135
3136 /* Kill off IFN_LOOP_VECTORIZED_CALL in the copy, nobody will
3137 reap those otherwise; they also refer to the original
3138 loops. */
3139 struct loop *l = loop;
3140 while (gimple *call = vect_loop_vectorized_call (l))
3141 {
3142 call = SSA_NAME_DEF_STMT (get_current_def (gimple_call_lhs (call)));
3143 fold_loop_internal_call (call, boolean_false_node);
3144 l = loop_outer (l);
3145 }
3146 free_original_copy_tables ();
3147
3148 if (cond_expr_stmt_list)
3149 {
3150 cond_exp_gsi = gsi_last_bb (condition_bb);
3151 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
3152 GSI_SAME_STMT);
3153 }
3154
3155 /* Loop versioning violates an assumption we try to maintain during
3156 vectorization - that the loop exit block has a single predecessor.
3157 After versioning, the exit block of both loop versions is the same
3158 basic block (i.e. it has two predecessors). Just in order to simplify
3159 following transformations in the vectorizer, we fix this situation
3160 here by adding a new (empty) block on the exit-edge of the loop,
3161 with the proper loop-exit phis to maintain loop-closed-form.
3162 If loop versioning wasn't done from loop, but scalar_loop instead,
3163 merge_bb will have already just a single successor. */
3164
3165 merge_bb = single_exit (loop_to_version)->dest;
3166 if (EDGE_COUNT (merge_bb->preds) >= 2)
3167 {
3168 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3169 new_exit_bb = split_edge (single_exit (loop_to_version));
3170 new_exit_e = single_exit (loop_to_version);
3171 e = EDGE_SUCC (new_exit_bb, 0);
3172
3173 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi);
3174 gsi_next (&gsi))
3175 {
3176 tree new_res;
3177 orig_phi = gsi.phi ();
3178 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
3179 new_phi = create_phi_node (new_res, new_exit_bb);
3180 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3181 add_phi_arg (new_phi, arg, new_exit_e,
3182 gimple_phi_arg_location_from_edge (orig_phi, e));
3183 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3184 }
3185 }
3186
3187 update_ssa (TODO_update_ssa);
3188 }
d5e80d93 3189
3190 if (version_niter)
3191 {
3192 /* The versioned loop could be infinite, we need to clear existing
3193 niter information which is copied from the original loop. */
3194 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3195 vect_free_loop_info_assumptions (nloop);
3196 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
3197 loop_constraint_set (loop, LOOP_C_INFINITE);
3198 }
6ee2edad 3199
c309657f 3200 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
6ee2edad 3201 && dump_enabled_p ())
3202 {
3203 if (version_alias)
9ddd8fa7 3204 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3205 vect_location,
6ee2edad 3206 "loop versioned for vectorization because of "
3207 "possible aliasing\n");
3208 if (version_align)
9ddd8fa7 3209 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3210 vect_location,
6ee2edad 3211 "loop versioned for vectorization to enhance "
3212 "alignment\n");
3213
3214 }
44245620 3215
3216 return nloop;
fb85abff 3217}