]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
48e1416a 1/* Vectorizer Specific Loop Manipulations
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
b20a8bb4 26#include "tree.h"
9ef16211 27#include "gimple.h"
7c29e30e 28#include "cfghooks.h"
29#include "tree-pass.h"
9ef16211 30#include "ssa.h"
9ef16211 31#include "fold-const.h"
94ea8568 32#include "cfganal.h"
a8783bee 33#include "gimplify.h"
dcf1a1ec 34#include "gimple-iterator.h"
e795d6e1 35#include "gimplify-me.h"
073c1fd5 36#include "tree-cfg.h"
05d9c18a 37#include "tree-ssa-loop-manip.h"
073c1fd5 38#include "tree-into-ssa.h"
69ee5dbb 39#include "tree-ssa.h"
fb85abff 40#include "cfgloop.h"
fb85abff 41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
12117f39 43#include "tree-ssa-loop-ivopts.h"
cde959e7 44#include "gimple-fold.h"
60b29a7e 45#include "tree-ssa-loop-niter.h"
46#include "internal-fn.h"
47#include "stor-layout.h"
48#include "optabs-query.h"
49#include "vec-perm-indices.h"
fb85abff 50
51/*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58/* Renames the use *OP_P. */
59
60static void
61rename_use_op (use_operand_p op_p)
62{
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77}
78
79
ccd0a9f9 80/* Renames the variables in basic block BB. Allow renaming of PHI arguments
5ee742c4 81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
82 true. */
fb85abff 83
c9b2c569 84static void
5ee742c4 85rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
fb85abff 86{
42acab1c 87 gimple *stmt;
fb85abff 88 use_operand_p use_p;
89 ssa_op_iter iter;
90 edge e;
91 edge_iterator ei;
92 struct loop *loop = bb->loop_father;
5ee742c4 93 struct loop *outer_loop = NULL;
94
95 if (rename_from_outer_loop)
96 {
97 gcc_assert (loop);
98 outer_loop = loop_outer (loop);
99 }
fb85abff 100
1a91d914 101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
102 gsi_next (&gsi))
fb85abff 103 {
104 stmt = gsi_stmt (gsi);
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
106 rename_use_op (use_p);
107 }
108
c9b2c569 109 FOR_EACH_EDGE (e, ei, bb->preds)
fb85abff 110 {
ccd0a9f9 111 if (!flow_bb_inside_loop_p (loop, e->src))
112 {
113 if (!rename_from_outer_loop)
114 continue;
115 if (e->src != outer_loop->header)
116 {
117 if (outer_loop->inner->next)
118 {
119 /* If outer_loop has 2 inner loops, allow there to
120 be an extra basic block which decides which of the
121 two loops to use using LOOP_VECTORIZED. */
122 if (!single_pred_p (e->src)
123 || single_pred (e->src) != outer_loop->header)
124 continue;
125 }
ccd0a9f9 126 }
127 }
1a91d914 128 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
129 gsi_next (&gsi))
130 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
fb85abff 131 }
132}
133
134
6dc50383 135struct adjust_info
b123eaab 136{
137 tree from, to;
138 basic_block bb;
6dc50383 139};
b123eaab 140
b123eaab 141/* A stack of values to be adjusted in debug stmts. We have to
142 process them LIFO, so that the closest substitution applies. If we
143 processed them FIFO, without the stack, we might substitute uses
144 with a PHI DEF that would soon become non-dominant, and when we got
145 to the suitable one, it wouldn't have anything to substitute any
146 more. */
d70aebca 147static vec<adjust_info, va_heap> adjust_vec;
b123eaab 148
149/* Adjust any debug stmts that referenced AI->from values to use the
150 loop-closed AI->to, if the references are dominated by AI->bb and
151 not by the definition of AI->from. */
152
153static void
154adjust_debug_stmts_now (adjust_info *ai)
155{
156 basic_block bbphi = ai->bb;
157 tree orig_def = ai->from;
158 tree new_def = ai->to;
159 imm_use_iterator imm_iter;
42acab1c 160 gimple *stmt;
b123eaab 161 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
162
163 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
164
165 /* Adjust any debug stmts that held onto non-loop-closed
166 references. */
167 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
168 {
169 use_operand_p use_p;
170 basic_block bbuse;
171
172 if (!is_gimple_debug (stmt))
173 continue;
174
175 gcc_assert (gimple_debug_bind_p (stmt));
176
177 bbuse = gimple_bb (stmt);
178
179 if ((bbuse == bbphi
180 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
181 && !(bbuse == bbdef
182 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
183 {
184 if (new_def)
185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
186 SET_USE (use_p, new_def);
187 else
188 {
189 gimple_debug_bind_reset_value (stmt);
190 update_stmt (stmt);
191 }
192 }
193 }
194}
195
196/* Adjust debug stmts as scheduled before. */
197
198static void
199adjust_vec_debug_stmts (void)
200{
c64f38bf 201 if (!MAY_HAVE_DEBUG_BIND_STMTS)
b123eaab 202 return;
203
f1f41a6c 204 gcc_assert (adjust_vec.exists ());
b123eaab 205
f1f41a6c 206 while (!adjust_vec.is_empty ())
b123eaab 207 {
f1f41a6c 208 adjust_debug_stmts_now (&adjust_vec.last ());
209 adjust_vec.pop ();
b123eaab 210 }
b123eaab 211}
212
213/* Adjust any debug stmts that referenced FROM values to use the
214 loop-closed TO, if the references are dominated by BB and not by
215 the definition of FROM. If adjust_vec is non-NULL, adjustments
216 will be postponed until adjust_vec_debug_stmts is called. */
217
218static void
219adjust_debug_stmts (tree from, tree to, basic_block bb)
220{
221 adjust_info ai;
222
c64f38bf 223 if (MAY_HAVE_DEBUG_BIND_STMTS
0087edc6 224 && TREE_CODE (from) == SSA_NAME
2510e5cd 225 && ! SSA_NAME_IS_DEFAULT_DEF (from)
0087edc6 226 && ! virtual_operand_p (from))
b123eaab 227 {
228 ai.from = from;
229 ai.to = to;
230 ai.bb = bb;
231
f1f41a6c 232 if (adjust_vec.exists ())
233 adjust_vec.safe_push (ai);
b123eaab 234 else
235 adjust_debug_stmts_now (&ai);
236 }
237}
238
239/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
240 to adjust any debug stmts that referenced the old phi arg,
241 presumably non-loop-closed references left over from other
242 transformations. */
243
244static void
42acab1c 245adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
b123eaab 246{
247 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
248
249 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
250
c64f38bf 251 if (MAY_HAVE_DEBUG_BIND_STMTS)
b123eaab 252 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
253 gimple_bb (update_phi));
254}
255
60b29a7e 256/* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
257 the mask should have during the first iteration and NEXT_MASK is the
258 value that it should have on subsequent iterations. */
cde959e7 259
60b29a7e 260static void
261vect_set_loop_mask (struct loop *loop, tree mask, tree init_mask,
262 tree next_mask)
263{
264 gphi *phi = create_phi_node (mask, loop->header);
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
267}
cde959e7 268
60b29a7e 269/* Add SEQ to the end of LOOP's preheader block. */
fb85abff 270
60b29a7e 271static void
272add_preheader_seq (struct loop *loop, gimple_seq seq)
273{
274 if (seq)
275 {
276 edge pe = loop_preheader_edge (loop);
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
278 gcc_assert (!new_bb);
279 }
280}
fb85abff 281
60b29a7e 282/* Add SEQ to the beginning of LOOP's header block. */
283
284static void
285add_header_seq (struct loop *loop, gimple_seq seq)
286{
287 if (seq)
288 {
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
291 }
292}
293
294/* Return true if the target can interleave elements of two vectors.
295 OFFSET is 0 if the first half of the vectors should be interleaved
296 or 1 if the second half should. When returning true, store the
297 associated permutation in INDICES. */
298
299static bool
300interleave_supported_p (vec_perm_indices *indices, tree vectype,
301 unsigned int offset)
302{
303 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
304 poly_uint64 base = exact_div (nelts, 2) * offset;
305 vec_perm_builder sel (nelts, 2, 3);
306 for (unsigned int i = 0; i < 3; ++i)
307 {
308 sel.quick_push (base + i);
309 sel.quick_push (base + i + nelts);
310 }
311 indices->new_vector (sel, 2, nelts);
312 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
313}
314
315/* Try to use permutes to define the masks in DEST_RGM using the masks
316 in SRC_RGM, given that the former has twice as many masks as the
317 latter. Return true on success, adding any new statements to SEQ. */
318
319static bool
320vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
321 rgroup_masks *src_rgm)
322{
323 tree src_masktype = src_rgm->mask_type;
324 tree dest_masktype = dest_rgm->mask_type;
325 machine_mode src_mode = TYPE_MODE (src_masktype);
326 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
327 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
328 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
329 {
330 /* Unpacking the source masks gives at least as many mask bits as
331 we need. We can then VIEW_CONVERT any excess bits away. */
332 tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
333 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
334 {
335 tree src = src_rgm->masks[i / 2];
336 tree dest = dest_rgm->masks[i];
88e81b08 337 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
338 ? VEC_UNPACK_HI_EXPR
60b29a7e 339 : VEC_UNPACK_LO_EXPR);
340 gassign *stmt;
341 if (dest_masktype == unpack_masktype)
342 stmt = gimple_build_assign (dest, code, src);
343 else
344 {
345 tree temp = make_ssa_name (unpack_masktype);
346 stmt = gimple_build_assign (temp, code, src);
347 gimple_seq_add_stmt (seq, stmt);
348 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
349 build1 (VIEW_CONVERT_EXPR,
350 dest_masktype, temp));
351 }
352 gimple_seq_add_stmt (seq, stmt);
353 }
354 return true;
355 }
356 vec_perm_indices indices[2];
357 if (dest_masktype == src_masktype
358 && interleave_supported_p (&indices[0], src_masktype, 0)
359 && interleave_supported_p (&indices[1], src_masktype, 1))
360 {
361 /* The destination requires twice as many mask bits as the source, so
362 we can use interleaving permutes to double up the number of bits. */
363 tree masks[2];
364 for (unsigned int i = 0; i < 2; ++i)
365 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
366 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
367 {
368 tree src = src_rgm->masks[i / 2];
369 tree dest = dest_rgm->masks[i];
370 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
371 src, src, masks[i & 1]);
372 gimple_seq_add_stmt (seq, stmt);
373 }
374 return true;
375 }
376 return false;
377}
378
379/* Helper for vect_set_loop_condition_masked. Generate definitions for
380 all the masks in RGM and return a mask that is nonzero when the loop
381 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
382 Use LOOP_COND_GSI to insert code before the exit gcond.
383
384 RGM belongs to loop LOOP. The loop originally iterated NITERS
385 times and has been vectorized according to LOOP_VINFO. Each iteration
386 of the vectorized loop handles VF iterations of the scalar loop.
387
6753a4bf 388 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
389 starts with NITERS_SKIP dummy iterations of the scalar loop before
390 the real work starts. The mask elements for these dummy iterations
391 must be 0, to ensure that the extra iterations do not have an effect.
392
60b29a7e 393 It is known that:
394
395 NITERS * RGM->max_nscalars_per_iter
396
397 does not overflow. However, MIGHT_WRAP_P says whether an induction
398 variable that starts at 0 and has step:
399
400 VF * RGM->max_nscalars_per_iter
401
402 might overflow before hitting a value above:
403
6753a4bf 404 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
60b29a7e 405
406 This means that we cannot guarantee that such an induction variable
407 would ever hit a value that produces a set of all-false masks for RGM. */
408
409static tree
410vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
411 gimple_seq *preheader_seq,
412 gimple_stmt_iterator loop_cond_gsi,
413 rgroup_masks *rgm, tree vf,
6753a4bf 414 tree niters, tree niters_skip,
415 bool might_wrap_p)
60b29a7e 416{
417 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
418 tree mask_type = rgm->mask_type;
419 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
420 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
421
422 /* Calculate the maximum number of scalar values that the rgroup
6753a4bf 423 handles in total, the number that it handles for each iteration
424 of the vector loop, and the number that it should skip during the
425 first iteration of the vector loop. */
60b29a7e 426 tree nscalars_total = niters;
427 tree nscalars_step = vf;
6753a4bf 428 tree nscalars_skip = niters_skip;
60b29a7e 429 if (nscalars_per_iter != 1)
430 {
431 /* We checked before choosing to use a fully-masked loop that these
432 multiplications don't overflow. */
433 tree factor = build_int_cst (compare_type, nscalars_per_iter);
434 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
435 nscalars_total, factor);
436 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, compare_type,
437 nscalars_step, factor);
6753a4bf 438 if (nscalars_skip)
439 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
440 nscalars_skip, factor);
60b29a7e 441 }
442
443 /* Create an induction variable that counts the number of scalars
444 processed. */
445 tree index_before_incr, index_after_incr;
446 gimple_stmt_iterator incr_gsi;
447 bool insert_after;
448 tree zero_index = build_int_cst (compare_type, 0);
449 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
450 create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi,
451 insert_after, &index_before_incr, &index_after_incr);
452
6753a4bf 453 tree test_index, test_limit, first_limit;
60b29a7e 454 gimple_stmt_iterator *test_gsi;
455 if (might_wrap_p)
456 {
457 /* In principle the loop should stop iterating once the incremented
6753a4bf 458 IV reaches a value greater than or equal to:
459
460 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
461
462 However, there's no guarantee that this addition doesn't overflow
463 the comparison type, or that the IV hits a value above it before
464 wrapping around. We therefore adjust the limit down by one
465 IV step:
60b29a7e 466
6753a4bf 467 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
468 -[infinite-prec] NSCALARS_STEP
60b29a7e 469
470 and compare the IV against this limit _before_ incrementing it.
471 Since the comparison type is unsigned, we actually want the
472 subtraction to saturate at zero:
473
6753a4bf 474 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
475 -[sat] NSCALARS_STEP
476
477 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
478
479 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
480
481 where the rightmost subtraction can be done directly in
482 COMPARE_TYPE. */
60b29a7e 483 test_index = index_before_incr;
6753a4bf 484 tree adjust = nscalars_step;
485 if (nscalars_skip)
486 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
487 adjust, nscalars_skip);
60b29a7e 488 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
6753a4bf 489 nscalars_total, adjust);
60b29a7e 490 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
6753a4bf 491 test_limit, adjust);
60b29a7e 492 test_gsi = &incr_gsi;
6753a4bf 493
494 /* Get a safe limit for the first iteration. */
495 if (nscalars_skip)
496 {
497 /* The first vector iteration can handle at most NSCALARS_STEP
498 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
499 NSCALARS_SKIP to that cannot overflow. */
500 tree const_limit = build_int_cst (compare_type,
501 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
502 * nscalars_per_iter);
503 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
504 nscalars_total, const_limit);
505 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
506 first_limit, nscalars_skip);
507 }
508 else
509 /* For the first iteration it doesn't matter whether the IV hits
510 a value above NSCALARS_TOTAL. That only matters for the latch
511 condition. */
512 first_limit = nscalars_total;
60b29a7e 513 }
514 else
515 {
516 /* Test the incremented IV, which will always hit a value above
517 the bound before wrapping. */
518 test_index = index_after_incr;
519 test_limit = nscalars_total;
6753a4bf 520 if (nscalars_skip)
521 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
522 test_limit, nscalars_skip);
60b29a7e 523 test_gsi = &loop_cond_gsi;
6753a4bf 524
525 first_limit = test_limit;
60b29a7e 526 }
527
528 /* Provide a definition of each mask in the group. */
529 tree next_mask = NULL_TREE;
530 tree mask;
531 unsigned int i;
532 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
533 {
534 /* Previous masks will cover BIAS scalars. This mask covers the
535 next batch. */
536 poly_uint64 bias = nscalars_per_mask * i;
537 tree bias_tree = build_int_cst (compare_type, bias);
538 gimple *tmp_stmt;
539
540 /* See whether the first iteration of the vector loop is known
541 to have a full mask. */
542 poly_uint64 const_limit;
543 bool first_iteration_full
6753a4bf 544 = (poly_int_tree_p (first_limit, &const_limit)
60b29a7e 545 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
546
547 /* Rather than have a new IV that starts at BIAS and goes up to
548 TEST_LIMIT, prefer to use the same 0-based IV for each mask
549 and adjust the bound down by BIAS. */
550 tree this_test_limit = test_limit;
551 if (i != 0)
552 {
553 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
554 compare_type, this_test_limit,
555 bias_tree);
556 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
557 compare_type, this_test_limit,
558 bias_tree);
559 }
560
6753a4bf 561 /* Create the initial mask. First include all scalars that
562 are within the loop limit. */
60b29a7e 563 tree init_mask = NULL_TREE;
564 if (!first_iteration_full)
565 {
566 tree start, end;
6753a4bf 567 if (first_limit == test_limit)
60b29a7e 568 {
569 /* Use a natural test between zero (the initial IV value)
570 and the loop limit. The "else" block would be valid too,
571 but this choice can avoid the need to load BIAS_TREE into
572 a register. */
573 start = zero_index;
574 end = this_test_limit;
575 }
576 else
577 {
6753a4bf 578 /* FIRST_LIMIT is the maximum number of scalars handled by the
579 first iteration of the vector loop. Test the portion
580 associated with this mask. */
60b29a7e 581 start = bias_tree;
6753a4bf 582 end = first_limit;
60b29a7e 583 }
584
585 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
586 tmp_stmt = vect_gen_while (init_mask, start, end);
587 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
588 }
589
6753a4bf 590 /* Now AND out the bits that are within the number of skipped
591 scalars. */
592 poly_uint64 const_skip;
593 if (nscalars_skip
594 && !(poly_int_tree_p (nscalars_skip, &const_skip)
595 && known_le (const_skip, bias)))
596 {
597 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
598 bias_tree, nscalars_skip);
599 if (init_mask)
600 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
601 init_mask, unskipped_mask);
602 else
603 init_mask = unskipped_mask;
604 }
605
60b29a7e 606 if (!init_mask)
607 /* First iteration is full. */
608 init_mask = build_minus_one_cst (mask_type);
609
610 /* Get the mask value for the next iteration of the loop. */
611 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
612 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
613 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
614
615 vect_set_loop_mask (loop, mask, init_mask, next_mask);
616 }
617 return next_mask;
618}
619
620/* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
621 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
53771608 622 number of iterations of the original scalar loop that should be
623 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
624 as for vect_set_loop_condition.
60b29a7e 625
626 Insert the branch-back condition before LOOP_COND_GSI and return the
627 final gcond. */
628
629static gcond *
630vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
631 tree niters, tree final_iv,
632 bool niters_maybe_zero,
633 gimple_stmt_iterator loop_cond_gsi)
634{
635 gimple_seq preheader_seq = NULL;
636 gimple_seq header_seq = NULL;
637
638 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
639 unsigned int compare_precision = TYPE_PRECISION (compare_type);
640 unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
641 tree orig_niters = niters;
642
643 /* Type of the initial value of NITERS. */
644 tree ni_actual_type = TREE_TYPE (niters);
645 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
646
647 /* Convert NITERS to the same size as the compare. */
648 if (compare_precision > ni_actual_precision
649 && niters_maybe_zero)
650 {
651 /* We know that there is always at least one iteration, so if the
652 count is zero then it must have wrapped. Cope with this by
653 subtracting 1 before the conversion and adding 1 to the result. */
654 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
655 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
656 niters, build_minus_one_cst (ni_actual_type));
657 niters = gimple_convert (&preheader_seq, compare_type, niters);
658 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
659 niters, build_one_cst (compare_type));
660 }
661 else
662 niters = gimple_convert (&preheader_seq, compare_type, niters);
663
6753a4bf 664 /* Convert skip_niters to the right type. */
665 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
666
60b29a7e 667 /* Now calculate the value that the induction variable must be able
668 to hit in order to ensure that we end the loop with an all-false mask.
669 This involves adding the maximum number of inactive trailing scalar
670 iterations. */
671 widest_int iv_limit;
672 bool known_max_iters = max_loop_iterations (loop, &iv_limit);
673 if (known_max_iters)
674 {
6753a4bf 675 if (niters_skip)
676 {
677 /* Add the maximum number of skipped iterations to the
678 maximum iteration count. */
679 if (TREE_CODE (niters_skip) == INTEGER_CST)
680 iv_limit += wi::to_widest (niters_skip);
681 else
682 iv_limit += max_vf - 1;
683 }
60b29a7e 684 /* IV_LIMIT is the maximum number of latch iterations, which is also
685 the maximum in-range IV value. Round this value down to the previous
686 vector alignment boundary and then add an extra full iteration. */
687 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
688 iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
689 }
690
691 /* Get the vectorization factor in tree form. */
692 tree vf = build_int_cst (compare_type,
693 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
694
695 /* Iterate over all the rgroups and fill in their masks. We could use
696 the first mask from any rgroup for the loop condition; here we
697 arbitrarily pick the last. */
698 tree test_mask = NULL_TREE;
699 rgroup_masks *rgm;
700 unsigned int i;
701 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
702 FOR_EACH_VEC_ELT (*masks, i, rgm)
703 if (!rgm->masks.is_empty ())
704 {
705 /* First try using permutes. This adds a single vector
706 instruction to the loop for each mask, but needs no extra
707 loop invariants or IVs. */
708 unsigned int nmasks = i + 1;
709 if ((nmasks & 1) == 0)
710 {
711 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
712 if (!half_rgm->masks.is_empty ()
713 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
714 continue;
715 }
716
717 /* See whether zero-based IV would ever generate all-false masks
718 before wrapping around. */
719 bool might_wrap_p
720 = (!known_max_iters
721 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
722 UNSIGNED)
723 > compare_precision));
724
725 /* Set up all masks for this group. */
726 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
727 &preheader_seq,
728 loop_cond_gsi, rgm, vf,
6753a4bf 729 niters, niters_skip,
730 might_wrap_p);
60b29a7e 731 }
732
733 /* Emit all accumulated statements. */
734 add_preheader_seq (loop, preheader_seq);
735 add_header_seq (loop, header_seq);
736
737 /* Get a boolean result that tells us whether to iterate. */
738 edge exit_edge = single_exit (loop);
739 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
740 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
741 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
742 NULL_TREE, NULL_TREE);
743 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
744
745 /* The loop iterates (NITERS - 1) / VF + 1 times.
746 Subtract one from this to get the latch count. */
747 tree step = build_int_cst (compare_type,
748 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
749 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
750 build_minus_one_cst (compare_type));
751 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
752 niters_minus_one, step);
753
754 if (final_iv)
755 {
756 gassign *assign = gimple_build_assign (final_iv, orig_niters);
757 gsi_insert_on_edge_immediate (single_exit (loop), assign);
758 }
759
760 return cond_stmt;
761}
762
763/* Like vect_set_loop_condition, but handle the case in which there
764 are no loop masks. */
765
766static gcond *
767vect_set_loop_condition_unmasked (struct loop *loop, tree niters,
768 tree step, tree final_iv,
769 bool niters_maybe_zero,
770 gimple_stmt_iterator loop_cond_gsi)
fb85abff 771{
772 tree indx_before_incr, indx_after_incr;
1a91d914 773 gcond *cond_stmt;
774 gcond *orig_cond;
cde959e7 775 edge pe = loop_preheader_edge (loop);
fb85abff 776 edge exit_edge = single_exit (loop);
fb85abff 777 gimple_stmt_iterator incr_gsi;
778 bool insert_after;
fb85abff 779 enum tree_code code;
cde959e7 780 tree niters_type = TREE_TYPE (niters);
fb85abff 781
782 orig_cond = get_loop_exit_condition (loop);
783 gcc_assert (orig_cond);
784 loop_cond_gsi = gsi_for_stmt (orig_cond);
785
cde959e7 786 tree init, limit;
787 if (!niters_maybe_zero && integer_onep (step))
788 {
789 /* In this case we can use a simple 0-based IV:
790
791 A:
792 x = 0;
793 do
794 {
795 ...
796 x += 1;
797 }
798 while (x < NITERS); */
799 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
800 init = build_zero_cst (niters_type);
801 limit = niters;
802 }
803 else
804 {
805 /* The following works for all values of NITERS except 0:
806
807 B:
808 x = 0;
809 do
810 {
811 ...
812 x += STEP;
813 }
814 while (x <= NITERS - STEP);
815
816 so that the loop continues to iterate if x + STEP - 1 < NITERS
817 but stops if x + STEP - 1 >= NITERS.
818
819 However, if NITERS is zero, x never hits a value above NITERS - STEP
820 before wrapping around. There are two obvious ways of dealing with
821 this:
822
823 - start at STEP - 1 and compare x before incrementing it
824 - start at -1 and compare x after incrementing it
825
826 The latter is simpler and is what we use. The loop in this case
827 looks like:
828
829 C:
830 x = -1;
831 do
832 {
833 ...
834 x += STEP;
835 }
836 while (x < NITERS - STEP);
837
838 In both cases the loop limit is NITERS - STEP. */
839 gimple_seq seq = NULL;
840 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
841 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
842 if (seq)
843 {
844 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
845 gcc_assert (!new_bb);
846 }
847 if (niters_maybe_zero)
848 {
849 /* Case C. */
850 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
851 init = build_all_ones_cst (niters_type);
852 }
853 else
854 {
855 /* Case B. */
856 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
857 init = build_zero_cst (niters_type);
858 }
859 }
860
fb85abff 861 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
862 create_iv (init, step, NULL_TREE, loop,
863 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
fb85abff 864 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
865 true, NULL_TREE, true,
866 GSI_SAME_STMT);
cde959e7 867 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
fb85abff 868 true, GSI_SAME_STMT);
869
cde959e7 870 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
fb85abff 871 NULL_TREE);
872
873 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
874
56c0f85a 875 /* Record the number of latch iterations. */
cde959e7 876 if (limit == niters)
877 /* Case A: the loop iterates NITERS times. Subtract one to get the
878 latch count. */
879 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
880 build_int_cst (niters_type, 1));
881 else
882 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
883 Subtract one from this to get the latch count. */
884 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
885 limit, step);
886
887 if (final_iv)
888 {
889 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
890 indx_after_incr, init);
891 gsi_insert_on_edge_immediate (single_exit (loop), assign);
892 }
60b29a7e 893
894 return cond_stmt;
895}
896
897/* If we're using fully-masked loops, make LOOP iterate:
898
899 N == (NITERS - 1) / STEP + 1
900
901 times. When NITERS is zero, this is equivalent to making the loop
902 execute (1 << M) / STEP times, where M is the precision of NITERS.
903 NITERS_MAYBE_ZERO is true if this last case might occur.
904
905 If we're not using fully-masked loops, make LOOP iterate:
906
907 N == (NITERS - STEP) / STEP + 1
908
909 times, where NITERS is known to be outside the range [1, STEP - 1].
910 This is equivalent to making the loop execute NITERS / STEP times
911 when NITERS is nonzero and (1 << M) / STEP times otherwise.
912 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
913
914 If FINAL_IV is nonnull, it is an SSA name that should be set to
915 N * STEP on exit from the loop.
916
917 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
918
919void
920vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
921 tree niters, tree step, tree final_iv,
922 bool niters_maybe_zero)
923{
924 gcond *cond_stmt;
925 gcond *orig_cond = get_loop_exit_condition (loop);
926 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
927
928 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
929 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
930 final_iv, niters_maybe_zero,
931 loop_cond_gsi);
932 else
933 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
934 final_iv, niters_maybe_zero,
935 loop_cond_gsi);
936
937 /* Remove old loop exit test. */
f525c1af 938 stmt_vec_info orig_cond_info;
939 if (loop_vinfo
940 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
941 loop_vinfo->remove_stmt (orig_cond_info);
942 else
943 gsi_remove (&loop_cond_gsi, true);
60b29a7e 944
945 if (dump_enabled_p ())
a4e972e3 946 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
947 cond_stmt);
fb85abff 948}
949
c71d3c24 950/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
951 For all PHI arguments in FROM->dest and TO->dest from those
952 edges ensure that TO->dest PHI arguments have current_def
953 to that in from. */
954
955static void
956slpeel_duplicate_current_defs_from_edges (edge from, edge to)
957{
958 gimple_stmt_iterator gsi_from, gsi_to;
959
960 for (gsi_from = gsi_start_phis (from->dest),
961 gsi_to = gsi_start_phis (to->dest);
e87502d6 962 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
c71d3c24 963 {
42acab1c 964 gimple *from_phi = gsi_stmt (gsi_from);
965 gimple *to_phi = gsi_stmt (gsi_to);
c71d3c24 966 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
e3243c77 967 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
968 if (virtual_operand_p (from_arg))
969 {
e87502d6 970 gsi_next (&gsi_from);
971 continue;
972 }
e3243c77 973 if (virtual_operand_p (to_arg))
974 {
e87502d6 975 gsi_next (&gsi_to);
976 continue;
977 }
e3243c77 978 if (TREE_CODE (from_arg) != SSA_NAME)
979 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
68675984 980 else if (TREE_CODE (to_arg) == SSA_NAME
981 && from_arg != to_arg)
e3243c77 982 {
983 if (get_current_def (to_arg) == NULL_TREE)
6bae816f 984 {
985 gcc_assert (types_compatible_p (TREE_TYPE (to_arg),
986 TREE_TYPE (get_current_def
987 (from_arg))));
988 set_current_def (to_arg, get_current_def (from_arg));
989 }
e3243c77 990 }
e87502d6 991 gsi_next (&gsi_from);
992 gsi_next (&gsi_to);
c71d3c24 993 }
e3243c77 994
995 gphi *from_phi = get_virtual_phi (from->dest);
996 gphi *to_phi = get_virtual_phi (to->dest);
997 if (from_phi)
998 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
999 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
c71d3c24 1000}
1001
fb85abff 1002
48e1416a 1003/* Given LOOP this function generates a new copy of it and puts it
c71d3c24 1004 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
1005 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
1006 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
1007 entry or exit of LOOP. */
fb85abff 1008
1009struct loop *
c71d3c24 1010slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
1011 struct loop *scalar_loop, edge e)
fb85abff 1012{
1013 struct loop *new_loop;
8a61cfb8 1014 basic_block *new_bbs, *bbs, *pbbs;
fb85abff 1015 bool at_exit;
1016 bool was_imm_dom;
48e1416a 1017 basic_block exit_dest;
fb85abff 1018 edge exit, new_exit;
5ee742c4 1019 bool duplicate_outer_loop = false;
fb85abff 1020
c9b2c569 1021 exit = single_exit (loop);
1022 at_exit = (e == exit);
fb85abff 1023 if (!at_exit && e != loop_preheader_edge (loop))
1024 return NULL;
1025
c71d3c24 1026 if (scalar_loop == NULL)
1027 scalar_loop = loop;
1028
1029 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
8a61cfb8 1030 pbbs = bbs + 1;
1031 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
5ee742c4 1032 /* Allow duplication of outer loops. */
1033 if (scalar_loop->inner)
1034 duplicate_outer_loop = true;
fb85abff 1035 /* Check whether duplication is possible. */
8a61cfb8 1036 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
fb85abff 1037 {
1038 free (bbs);
1039 return NULL;
1040 }
1041
1042 /* Generate new loop structure. */
c71d3c24 1043 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1044 duplicate_subloops (scalar_loop, new_loop);
fb85abff 1045
c9b2c569 1046 exit_dest = exit->dest;
48e1416a 1047 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1048 exit_dest) == loop->header ?
fb85abff 1049 true : false);
1050
c9b2c569 1051 /* Also copy the pre-header, this avoids jumping through hoops to
1052 duplicate the loop entry PHI arguments. Create an empty
1053 pre-header unconditionally for this. */
c71d3c24 1054 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
c9b2c569 1055 edge entry_e = single_pred_edge (preheader);
8a61cfb8 1056 bbs[0] = preheader;
c71d3c24 1057 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
fb85abff 1058
c71d3c24 1059 exit = single_exit (scalar_loop);
1060 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
fb85abff 1061 &exit, 1, &new_exit, NULL,
8a61cfb8 1062 at_exit ? loop->latch : e->src, true);
c71d3c24 1063 exit = single_exit (loop);
8a61cfb8 1064 basic_block new_preheader = new_bbs[0];
fb85abff 1065
c71d3c24 1066 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
fb85abff 1067
c71d3c24 1068 if (scalar_loop != loop)
1069 {
1070 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1071 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1072 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1073 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1074 header) to have current_def set, so copy them over. */
1075 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1076 exit);
1077 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1078 0),
1079 EDGE_SUCC (loop->latch, 0));
1080 }
48e1416a 1081
fb85abff 1082 if (at_exit) /* Add the loop copy at exit. */
1083 {
c71d3c24 1084 if (scalar_loop != loop)
1085 {
1a91d914 1086 gphi_iterator gsi;
c71d3c24 1087 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1088
1089 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1090 gsi_next (&gsi))
1091 {
1a91d914 1092 gphi *phi = gsi.phi ();
c71d3c24 1093 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1094 location_t orig_locus
1095 = gimple_phi_arg_location_from_edge (phi, e);
1096
1097 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1098 }
1099 }
c9b2c569 1100 redirect_edge_and_branch_force (e, new_preheader);
1101 flush_pending_stmts (e);
1102 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
5ee742c4 1103 if (was_imm_dom || duplicate_outer_loop)
c71d3c24 1104 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
c9b2c569 1105
1106 /* And remove the non-necessary forwarder again. Keep the other
1107 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 1108 redirect_edge_pred (single_succ_edge (preheader),
1109 single_pred (preheader));
c9b2c569 1110 delete_basic_block (preheader);
c71d3c24 1111 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1112 loop_preheader_edge (scalar_loop)->src);
fb85abff 1113 }
1114 else /* Add the copy at entry. */
1115 {
c71d3c24 1116 if (scalar_loop != loop)
1117 {
1118 /* Remove the non-necessary forwarder of scalar_loop again. */
1119 redirect_edge_pred (single_succ_edge (preheader),
1120 single_pred (preheader));
1121 delete_basic_block (preheader);
1122 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1123 loop_preheader_edge (scalar_loop)->src);
1124 preheader = split_edge (loop_preheader_edge (loop));
1125 entry_e = single_pred_edge (preheader);
1126 }
1127
c9b2c569 1128 redirect_edge_and_branch_force (entry_e, new_preheader);
1129 flush_pending_stmts (entry_e);
1130 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1131
1132 redirect_edge_and_branch_force (new_exit, preheader);
1133 flush_pending_stmts (new_exit);
1134 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1135
1136 /* And remove the non-necessary forwarder again. Keep the other
1137 one so we have a proper pre-header for the loop at the exit edge. */
c71d3c24 1138 redirect_edge_pred (single_succ_edge (new_preheader),
1139 single_pred (new_preheader));
c9b2c569 1140 delete_basic_block (new_preheader);
1141 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1142 loop_preheader_edge (new_loop)->src);
fb85abff 1143 }
1144
6e429c5c 1145 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1146 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
5ee742c4 1147 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
c9b2c569 1148
c71d3c24 1149 if (scalar_loop != loop)
1150 {
1151 /* Update new_loop->header PHIs, so that on the preheader
1152 edge they are the ones from loop rather than scalar_loop. */
1a91d914 1153 gphi_iterator gsi_orig, gsi_new;
c71d3c24 1154 edge orig_e = loop_preheader_edge (loop);
1155 edge new_e = loop_preheader_edge (new_loop);
1156
1157 for (gsi_orig = gsi_start_phis (loop->header),
1158 gsi_new = gsi_start_phis (new_loop->header);
1159 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1160 gsi_next (&gsi_orig), gsi_next (&gsi_new))
1161 {
1a91d914 1162 gphi *orig_phi = gsi_orig.phi ();
1163 gphi *new_phi = gsi_new.phi ();
c71d3c24 1164 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1165 location_t orig_locus
1166 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1167
1168 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1169 }
1170 }
1171
fb85abff 1172 free (new_bbs);
1173 free (bbs);
1174
382ecba7 1175 checking_verify_dominators (CDI_DOMINATORS);
c9b2c569 1176
fb85abff 1177 return new_loop;
1178}
1179
1180
6c6a3430 1181/* Given the condition expression COND, put it as the last statement of
1182 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1183 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
15492f79 1184 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1185 new edge as irreducible if IRREDUCIBLE_P is true. */
fb85abff 1186
1187static edge
23a3430d 1188slpeel_add_loop_guard (basic_block guard_bb, tree cond,
6c6a3430 1189 basic_block guard_to, basic_block dom_bb,
720cfc43 1190 profile_probability probability, bool irreducible_p)
fb85abff 1191{
1192 gimple_stmt_iterator gsi;
1193 edge new_e, enter_e;
1a91d914 1194 gcond *cond_stmt;
fb85abff 1195 gimple_seq gimplify_stmt_list = NULL;
1196
1197 enter_e = EDGE_SUCC (guard_bb, 0);
1198 enter_e->flags &= ~EDGE_FALLTHRU;
1199 enter_e->flags |= EDGE_FALSE_VALUE;
1200 gsi = gsi_last_bb (guard_bb);
1201
87ae3989 1202 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
1203 NULL_TREE);
23a3430d 1204 if (gimplify_stmt_list)
6c6a3430 1205 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
fb85abff 1206
6c6a3430 1207 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
fb85abff 1208 gsi = gsi_last_bb (guard_bb);
1209 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1210
1211 /* Add new edge to connect guard block to the merge/loop-exit block. */
6c6a3430 1212 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
877584e4 1213
877584e4 1214 new_e->probability = probability;
15492f79 1215 if (irreducible_p)
1216 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1217
720cfc43 1218 enter_e->probability = probability.invert ();
6c6a3430 1219 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
a62a4a78 1220
1221 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1222 if (enter_e->dest->loop_father->header == enter_e->dest)
1223 split_edge (enter_e);
1224
fb85abff 1225 return new_e;
1226}
1227
1228
1229/* This function verifies that the following restrictions apply to LOOP:
5ee742c4 1230 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1231 for innermost loop and 5 basic blocks for outer-loop.
1232 (2) it is single entry, single exit
1233 (3) its exit condition is the last stmt in the header
1234 (4) E is the entry/exit edge of LOOP.
fb85abff 1235 */
1236
1237bool
1238slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
1239{
1240 edge exit_e = single_exit (loop);
1241 edge entry_e = loop_preheader_edge (loop);
1a91d914 1242 gcond *orig_cond = get_loop_exit_condition (loop);
fb85abff 1243 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
5ee742c4 1244 unsigned int num_bb = loop->inner? 5 : 2;
fb85abff 1245
51eb70a9 1246 /* All loops have an outer scope; the only case loop->outer is NULL is for
1247 the function itself. */
1248 if (!loop_outer (loop)
5ee742c4 1249 || loop->num_nodes != num_bb
fb85abff 1250 || !empty_block_p (loop->latch)
1251 || !single_exit (loop)
1252 /* Verify that new loop exit condition can be trivially modified. */
1253 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1254 || (e != exit_e && e != entry_e))
1255 return false;
1256
1257 return true;
1258}
1259
6c6a3430 1260/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1261 in the exit bb and rename all the uses after the loop. This simplifies
1262 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1263 (but normally loop closed SSA form doesn't require virtual PHIs to be
1264 in the same form). Doing this early simplifies the checking what
1265 uses should be renamed. */
fb85abff 1266
1267static void
6c6a3430 1268create_lcssa_for_virtual_phi (struct loop *loop)
fb85abff 1269{
1a91d914 1270 gphi_iterator gsi;
fb85abff 1271 edge exit_e = single_exit (loop);
48e1416a 1272
38091110 1273 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1274 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1275 {
1a91d914 1276 gphi *phi = gsi.phi ();
38091110 1277 for (gsi = gsi_start_phis (exit_e->dest);
1278 !gsi_end_p (gsi); gsi_next (&gsi))
7c782c9b 1279 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
38091110 1280 break;
1281 if (gsi_end_p (gsi))
1282 {
f9e245b2 1283 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
1a91d914 1284 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
38091110 1285 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1286 imm_use_iterator imm_iter;
42acab1c 1287 gimple *stmt;
38091110 1288 use_operand_p use_p;
1289
6c99cfac 1290 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
1291 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
60d535d2 1292 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
38091110 1293 gimple_phi_set_result (new_phi, new_vop);
1294 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
480233e7 1295 if (stmt != new_phi
1296 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
38091110 1297 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1298 SET_USE (use_p, new_vop);
1299 }
1300 break;
1301 }
fb85abff 1302
fb85abff 1303}
1304
1305/* Function vect_get_loop_location.
1306
1307 Extract the location of the loop in the source code.
1308 If the loop is not well formed for vectorization, an estimated
1309 location is calculated.
1310 Return the loop location if succeed and NULL if not. */
1311
c309657f 1312dump_user_location_t
fb85abff 1313find_loop_location (struct loop *loop)
1314{
42acab1c 1315 gimple *stmt = NULL;
fb85abff 1316 basic_block bb;
1317 gimple_stmt_iterator si;
1318
1319 if (!loop)
c309657f 1320 return dump_user_location_t ();
fb85abff 1321
1322 stmt = get_loop_exit_condition (loop);
1323
b2d978a6 1324 if (stmt
1325 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
c309657f 1326 return stmt;
fb85abff 1327
1328 /* If we got here the loop is probably not "well formed",
1329 try to estimate the loop location */
1330
1331 if (!loop->header)
c309657f 1332 return dump_user_location_t ();
fb85abff 1333
1334 bb = loop->header;
1335
1336 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1337 {
1338 stmt = gsi_stmt (si);
b2d978a6 1339 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
c309657f 1340 return stmt;
fb85abff 1341 }
1342
c309657f 1343 return dump_user_location_t ();
fb85abff 1344}
1345
e068828a 1346/* Return true if the phi described by STMT_INFO defines an IV of the
1347 loop to be vectorized. */
6c6a3430 1348
1349static bool
e068828a 1350iv_phi_p (stmt_vec_info stmt_info)
6c6a3430 1351{
e068828a 1352 gphi *phi = as_a <gphi *> (stmt_info->stmt);
6c6a3430 1353 if (virtual_operand_p (PHI_RESULT (phi)))
1354 return false;
1355
6c6a3430 1356 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1357 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1358 return false;
1359
1360 return true;
1361}
fb85abff 1362
fb85abff 1363/* Function vect_can_advance_ivs_p
1364
48e1416a 1365 In case the number of iterations that LOOP iterates is unknown at compile
1366 time, an epilog loop will be generated, and the loop induction variables
1367 (IVs) will be "advanced" to the value they are supposed to take just before
fb85abff 1368 the epilog loop. Here we check that the access function of the loop IVs
1369 and the expression that represents the loop bound are simple enough.
1370 These restrictions will be relaxed in the future. */
1371
48e1416a 1372bool
fb85abff 1373vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1374{
1375 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1376 basic_block bb = loop->header;
1a91d914 1377 gphi_iterator gsi;
fb85abff 1378
1379 /* Analyze phi functions of the loop header. */
1380
6d8fb6cf 1381 if (dump_enabled_p ())
78bb46f5 1382 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
fb85abff 1383 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1384 {
fb85abff 1385 tree evolution_part;
1386
6c6a3430 1387 gphi *phi = gsi.phi ();
1c2fef9a 1388 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
6d8fb6cf 1389 if (dump_enabled_p ())
a4e972e3 1390 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
1391 phi_info->stmt);
fb85abff 1392
1393 /* Skip virtual phi's. The data dependences that are associated with
6c6a3430 1394 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
fb85abff 1395
6c6a3430 1396 Skip reduction phis. */
e068828a 1397 if (!iv_phi_p (phi_info))
fb85abff 1398 {
6d8fb6cf 1399 if (dump_enabled_p ())
6c6a3430 1400 dump_printf_loc (MSG_NOTE, vect_location,
1401 "reduc or virtual phi. skip.\n");
fb85abff 1402 continue;
1403 }
1404
fb85abff 1405 /* Analyze the evolution function. */
1406
1c2fef9a 1407 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
fb85abff 1408 if (evolution_part == NULL_TREE)
1409 {
6d8fb6cf 1410 if (dump_enabled_p ())
2cd0995e 1411 dump_printf (MSG_MISSED_OPTIMIZATION,
78bb46f5 1412 "No access function or evolution.\n");
fb85abff 1413 return false;
1414 }
48e1416a 1415
12117f39 1416 /* FORNOW: We do not transform initial conditions of IVs
1417 which evolution functions are not invariants in the loop. */
1418
1419 if (!expr_invariant_in_loop_p (loop, evolution_part))
1420 {
1421 if (dump_enabled_p ())
1422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1423 "evolution not invariant in loop.\n");
1424 return false;
1425 }
1426
48e1416a 1427 /* FORNOW: We do not transform initial conditions of IVs
fb85abff 1428 which evolution functions are a polynomial of degree >= 2. */
1429
1430 if (tree_is_chrec (evolution_part))
12117f39 1431 {
1432 if (dump_enabled_p ())
1433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1434 "evolution is chrec.\n");
1435 return false;
1436 }
fb85abff 1437 }
1438
1439 return true;
1440}
1441
1442
1443/* Function vect_update_ivs_after_vectorizer.
1444
1445 "Advance" the induction variables of LOOP to the value they should take
1446 after the execution of LOOP. This is currently necessary because the
1447 vectorizer does not handle induction variables that are used after the
1448 loop. Such a situation occurs when the last iterations of LOOP are
1449 peeled, because:
1450 1. We introduced new uses after LOOP for IVs that were not originally used
1451 after LOOP: the IVs of LOOP are now used by an epilog loop.
1452 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1453 times, whereas the loop IVs should be bumped N times.
1454
1455 Input:
1456 - LOOP - a loop that is going to be vectorized. The last few iterations
1457 of LOOP were peeled.
1458 - NITERS - the number of iterations that LOOP executes (before it is
1459 vectorized). i.e, the number of times the ivs should be bumped.
1460 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1461 coming out from LOOP on which there are uses of the LOOP ivs
1462 (this is the path from LOOP->exit to epilog_loop->preheader).
1463
1464 The new definitions of the ivs are placed in LOOP->exit.
1465 The phi args associated with the edge UPDATE_E in the bb
1466 UPDATE_E->dest are updated accordingly.
1467
1468 Assumption 1: Like the rest of the vectorizer, this function assumes
1469 a single loop exit that has a single predecessor.
1470
1471 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1472 organized in the same order.
1473
1474 Assumption 3: The access function of the ivs is simple enough (see
1475 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1476
1477 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
48e1416a 1478 coming out of LOOP on which the ivs of LOOP are used (this is the path
fb85abff 1479 that leads to the epilog loop; other paths skip the epilog loop). This
1480 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1481 needs to have its phis updated.
1482 */
1483
1484static void
6c6a3430 1485vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1486 tree niters, edge update_e)
fb85abff 1487{
1a91d914 1488 gphi_iterator gsi, gsi1;
6c6a3430 1489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 1490 basic_block update_bb = update_e->dest;
6c6a3430 1491 basic_block exit_bb = single_exit (loop)->dest;
fb85abff 1492
1493 /* Make sure there exists a single-predecessor exit bb: */
1494 gcc_assert (single_pred_p (exit_bb));
6c6a3430 1495 gcc_assert (single_succ_edge (exit_bb) == update_e);
fb85abff 1496
1497 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1498 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1499 gsi_next (&gsi), gsi_next (&gsi1))
1500 {
fb85abff 1501 tree init_expr;
1efcacec 1502 tree step_expr, off;
1503 tree type;
fb85abff 1504 tree var, ni, ni_name;
1505 gimple_stmt_iterator last_gsi;
1506
6c6a3430 1507 gphi *phi = gsi.phi ();
1508 gphi *phi1 = gsi1.phi ();
1c2fef9a 1509 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
6d8fb6cf 1510 if (dump_enabled_p ())
a4e972e3 1511 dump_printf_loc (MSG_NOTE, vect_location,
1512 "vect_update_ivs_after_vectorizer: phi: %G", phi);
fb85abff 1513
6c6a3430 1514 /* Skip reduction and virtual phis. */
e068828a 1515 if (!iv_phi_p (phi_info))
fb85abff 1516 {
6d8fb6cf 1517 if (dump_enabled_p ())
6c6a3430 1518 dump_printf_loc (MSG_NOTE, vect_location,
1519 "reduc or virtual phi. skip.\n");
fb85abff 1520 continue;
1521 }
1522
86faead7 1523 type = TREE_TYPE (gimple_phi_result (phi));
1c2fef9a 1524 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
86faead7 1525 step_expr = unshare_expr (step_expr);
48e1416a 1526
fb85abff 1527 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1528 of degree >= 2 or exponential. */
86faead7 1529 gcc_assert (!tree_is_chrec (step_expr));
fb85abff 1530
86faead7 1531 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fb85abff 1532
1efcacec 1533 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1534 fold_convert (TREE_TYPE (step_expr), niters),
1535 step_expr);
86faead7 1536 if (POINTER_TYPE_P (type))
2cc66f2a 1537 ni = fold_build_pointer_plus (init_expr, off);
fb85abff 1538 else
86faead7 1539 ni = fold_build2 (PLUS_EXPR, type,
1540 init_expr, fold_convert (type, off));
fb85abff 1541
86faead7 1542 var = create_tmp_var (type, "tmp");
fb85abff 1543
1544 last_gsi = gsi_last_bb (exit_bb);
6c6a3430 1545 gimple_seq new_stmts = NULL;
1546 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1547 /* Exit_bb shouldn't be empty. */
1548 if (!gsi_end_p (last_gsi))
1549 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1550 else
1551 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
48e1416a 1552
fb85abff 1553 /* Fix phi expressions in the successor bb. */
b123eaab 1554 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
fb85abff 1555 }
1556}
1557
6753a4bf 1558/* Return a gimple value containing the misalignment (measured in vector
1559 elements) for the loop described by LOOP_VINFO, i.e. how many elements
1560 it is away from a perfectly aligned address. Add any new statements
1561 to SEQ. */
1562
1563static tree
1564get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
1565{
ec5bf0fb 1566 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
abc9513d 1567 stmt_vec_info stmt_info = dr_info->stmt;
6753a4bf 1568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1569
e092c20e 1570 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
1571 unsigned HOST_WIDE_INT target_align_c;
1572 tree target_align_minus_1;
6753a4bf 1573
abc9513d 1574 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1575 size_zero_node) < 0;
6753a4bf 1576 tree offset = (negative
1577 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1578 : size_zero_node);
0219dc42 1579 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
6753a4bf 1580 offset);
1581 tree type = unsigned_type_for (TREE_TYPE (start_addr));
e092c20e 1582 if (target_align.is_constant (&target_align_c))
1583 target_align_minus_1 = build_int_cst (type, target_align_c - 1);
1584 else
1585 {
1586 tree vla = build_int_cst (type, target_align);
1587 tree vla_align = fold_build2 (BIT_AND_EXPR, type, vla,
1588 fold_build2 (MINUS_EXPR, type,
1589 build_int_cst (type, 0), vla));
1590 target_align_minus_1 = fold_build2 (MINUS_EXPR, type, vla_align,
1591 build_int_cst (type, 1));
1592 }
1593
6753a4bf 1594 HOST_WIDE_INT elem_size
1595 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1596 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1597
1598 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1599 tree int_start_addr = fold_convert (type, start_addr);
1600 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
1601 target_align_minus_1);
1602
1603 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1604 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
1605 elem_size_log);
1606
1607 return misalign_in_elems;
1608}
1609
6c6a3430 1610/* Function vect_gen_prolog_loop_niters
97fe80a6 1611
6c6a3430 1612 Generate the number of iterations which should be peeled as prolog for the
1613 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1614 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1615 As a result, after the execution of this loop, the data reference DR will
1616 refer to an aligned location. The following computation is generated:
fb85abff 1617
1618 If the misalignment of DR is known at compile time:
1619 addr_mis = int mis = DR_MISALIGNMENT (dr);
1620 Else, compute address misalignment in bytes:
6753a4bf 1621 addr_mis = addr & (target_align - 1)
fb85abff 1622
6c6a3430 1623 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
fb85abff 1624
1625 (elem_size = element type size; an element is the scalar element whose type
1626 is the inner type of the vectype)
1627
6c6a3430 1628 The computations will be emitted at the end of BB. We also compute and
82112bf2 1629 store upper bound (included) of the result in BOUND.
6c6a3430 1630
fb85abff 1631 When the step of the data-ref in the loop is not 1 (as in interleaved data
1632 and SLP), the number of iterations of the prolog must be divided by the step
1633 (which is equal to the size of interleaved group).
1634
1635 The above formulas assume that VF == number of elements in the vector. This
1636 may not hold when there are multiple-types in the loop.
1637 In this case, for some data-references in the loop the VF does not represent
1638 the number of elements that fit in the vector. Therefore, instead of VF we
1639 use TYPE_VECTOR_SUBPARTS. */
1640
48e1416a 1641static tree
6c6a3430 1642vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1643 basic_block bb, int *bound)
fb85abff 1644{
ec5bf0fb 1645 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
fb85abff 1646 tree var;
6c6a3430 1647 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1648 gimple_seq stmts = NULL, new_stmts = NULL;
fb85abff 1649 tree iters, iters_name;
abc9513d 1650 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1651 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
e092c20e 1652 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr_info);
fb85abff 1653
313a5120 1654 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
fb85abff 1655 {
313a5120 1656 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1657
6d8fb6cf 1658 if (dump_enabled_p ())
b055bc88 1659 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1660 "known peeling = %d.\n", npeel);
fb85abff 1661
0822b158 1662 iters = build_int_cst (niters_type, npeel);
82112bf2 1663 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
fb85abff 1664 }
1665 else
1666 {
6753a4bf 1667 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1668 tree type = TREE_TYPE (misalign_in_elems);
aec313e5 1669 HOST_WIDE_INT elem_size
1670 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
e092c20e 1671 /* We only do prolog peeling if the target alignment is known at compile
1672 time. */
1673 poly_uint64 align_in_elems =
1674 exact_div (target_align, elem_size);
1675 tree align_in_elems_minus_1 =
1676 build_int_cst (type, align_in_elems - 1);
aec313e5 1677 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
aec313e5 1678
1679 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1680 & (align_in_elems - 1)). */
abc9513d 1681 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
1682 size_zero_node) < 0;
f1b8c740 1683 if (negative)
aec313e5 1684 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1685 align_in_elems_tree);
f1b8c740 1686 else
aec313e5 1687 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1688 misalign_in_elems);
1689 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
fb85abff 1690 iters = fold_convert (niters_type, iters);
e092c20e 1691 unsigned HOST_WIDE_INT align_in_elems_c;
1692 if (align_in_elems.is_constant (&align_in_elems_c))
1693 *bound = align_in_elems_c - 1;
1694 else
1695 *bound = -1;
fb85abff 1696 }
1697
6d8fb6cf 1698 if (dump_enabled_p ())
a4e972e3 1699 dump_printf_loc (MSG_NOTE, vect_location,
1700 "niters for prolog loop: %T\n", iters);
fb85abff 1701
1702 var = create_tmp_var (niters_type, "prolog_loop_niters");
6c6a3430 1703 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
fb85abff 1704
6c6a3430 1705 if (new_stmts)
1706 gimple_seq_add_seq (&stmts, new_stmts);
fb85abff 1707 if (stmts)
1708 {
6c6a3430 1709 gcc_assert (single_succ_p (bb));
1710 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1711 if (gsi_end_p (gsi))
1712 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1713 else
1714 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
fb85abff 1715 }
48e1416a 1716 return iters_name;
fb85abff 1717}
1718
1719
1720/* Function vect_update_init_of_dr
1721
6753a4bf 1722 If CODE is PLUS, the vector loop starts NITERS iterations after the
1723 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1724 iterations before the scalar one (using masking to skip inactive
1725 elements). This function updates the information recorded in DR to
1726 account for the difference. Specifically, it updates the OFFSET
1727 field of DR. */
fb85abff 1728
1729static void
6753a4bf 1730vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
fb85abff 1731{
1732 tree offset = DR_OFFSET (dr);
48e1416a 1733
fb85abff 1734 niters = fold_build2 (MULT_EXPR, sizetype,
1735 fold_convert (sizetype, niters),
1736 fold_convert (sizetype, DR_STEP (dr)));
6753a4bf 1737 offset = fold_build2 (code, sizetype,
87f9ffa4 1738 fold_convert (sizetype, offset), niters);
fb85abff 1739 DR_OFFSET (dr) = offset;
1740}
1741
1742
1743/* Function vect_update_inits_of_drs
1744
6753a4bf 1745 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1746 CODE and NITERS are as for vect_update_inits_of_dr. */
fb85abff 1747
1748static void
6753a4bf 1749vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1750 tree_code code)
fb85abff 1751{
1752 unsigned int i;
f1f41a6c 1753 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1754 struct data_reference *dr;
6c6a3430 1755
88f6eb8f 1756 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
6c6a3430 1757
1758 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1759 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1760 {
1761 gimple_seq seq;
1762 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1763 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1764
1765 niters = fold_convert (sizetype, niters);
1766 niters = force_gimple_operand (niters, &seq, false, var);
1767 if (seq)
1768 {
1769 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1770 gcc_assert (!new_bb);
1771 }
1772 }
fb85abff 1773
f1f41a6c 1774 FOR_EACH_VEC_ELT (datarefs, i, dr)
fa681b45 1775 {
db72d3bf 1776 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
1777 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
fa681b45 1778 vect_update_init_of_dr (dr, niters, code);
1779 }
fb85abff 1780}
1781
6753a4bf 1782/* For the information recorded in LOOP_VINFO prepare the loop for peeling
1783 by masking. This involves calculating the number of iterations to
1784 be peeled and then aligning all memory references appropriately. */
1785
1786void
1787vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
1788{
1789 tree misalign_in_elems;
1790 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
1791
1792 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
1793
1794 /* From the information recorded in LOOP_VINFO get the number of iterations
1795 that need to be skipped via masking. */
1796 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1797 {
1798 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1799 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1800 misalign_in_elems = build_int_cst (type, misalign);
1801 }
1802 else
1803 {
1804 gimple_seq seq1 = NULL, seq2 = NULL;
1805 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
1806 misalign_in_elems = fold_convert (type, misalign_in_elems);
1807 misalign_in_elems = force_gimple_operand (misalign_in_elems,
1808 &seq2, true, NULL_TREE);
1809 gimple_seq_add_seq (&seq1, seq2);
1810 if (seq1)
1811 {
1812 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1813 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
1814 gcc_assert (!new_bb);
1815 }
1816 }
1817
1818 if (dump_enabled_p ())
a4e972e3 1819 dump_printf_loc (MSG_NOTE, vect_location,
1820 "misalignment for fully-masked loop: %T\n",
1821 misalign_in_elems);
6753a4bf 1822
1823 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
1824
1825 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
1826}
fb85abff 1827
6c6a3430 1828/* This function builds ni_name = number of iterations. Statements
3a815241 1829 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1830 it to TRUE if new ssa_var is generated. */
6c6a3430 1831
1832tree
3a815241 1833vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
6c6a3430 1834{
1835 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1836 if (TREE_CODE (ni) == INTEGER_CST)
1837 return ni;
1838 else
1839 {
1840 tree ni_name, var;
1841 gimple_seq stmts = NULL;
1842 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1843
1844 var = create_tmp_var (TREE_TYPE (ni), "niters");
1845 ni_name = force_gimple_operand (ni, &stmts, false, var);
1846 if (stmts)
3a815241 1847 {
1848 gsi_insert_seq_on_edge_immediate (pe, stmts);
1849 if (new_var_p != NULL)
1850 *new_var_p = true;
1851 }
6c6a3430 1852
1853 return ni_name;
1854 }
1855}
1856
82112bf2 1857/* Calculate the number of iterations above which vectorized loop will be
1858 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1859 of prolog loop. If it's integer const, the integer number is also passed
53771608 1860 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
1861 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
1862 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
1863 threshold below which the scalar (rather than vectorized) loop will be
1864 executed. This function stores the upper bound (inclusive) of the result
1865 in BOUND_SCALAR. */
6c6a3430 1866
1867static tree
1868vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
53771608 1869 int bound_prolog, poly_int64 bound_epilog, int th,
d75596cd 1870 poly_uint64 *bound_scalar,
1871 bool check_profitability)
6c6a3430 1872{
1873 tree type = TREE_TYPE (niters_prolog);
1874 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
53771608 1875 build_int_cst (type, bound_epilog));
6c6a3430 1876
53771608 1877 *bound_scalar = bound_prolog + bound_epilog;
6c6a3430 1878 if (check_profitability)
1879 {
82112bf2 1880 /* TH indicates the minimum niters of vectorized loop, while we
1881 compute the maximum niters of scalar loop. */
1882 th--;
6c6a3430 1883 /* Peeling for constant times. */
1884 if (int_niters_prolog >= 0)
1885 {
53771608 1886 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
82112bf2 1887 return build_int_cst (type, *bound_scalar);
6c6a3430 1888 }
53771608 1889 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
1890 and BOUND_EPILOG are inclusive upper bounds. */
1891 if (known_ge (th, bound_prolog + bound_epilog))
6c6a3430 1892 {
82112bf2 1893 *bound_scalar = th;
6c6a3430 1894 return build_int_cst (type, th);
1895 }
d75596cd 1896 /* Need to do runtime comparison. */
53771608 1897 else if (maybe_gt (th, bound_epilog))
d75596cd 1898 {
1899 *bound_scalar = upper_bound (*bound_scalar, th);
1900 return fold_build2 (MAX_EXPR, type,
1901 build_int_cst (type, th), niters);
1902 }
6c6a3430 1903 }
1904 return niters;
1905}
1906
cde959e7 1907/* NITERS is the number of times that the original scalar loop executes
1908 after peeling. Work out the maximum number of iterations N that can
1909 be handled by the vectorized form of the loop and then either:
fb85abff 1910
cde959e7 1911 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
97fe80a6 1912
cde959e7 1913 niters_vector = N
1914
1915 b) set *STEP_VECTOR_PTR to one and generate:
1916
1917 niters_vector = N / vf
1918
1919 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1920 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1921 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
fb85abff 1922
1923void
6c6a3430 1924vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
cde959e7 1925 tree *niters_vector_ptr, tree *step_vector_ptr,
1926 bool niters_no_overflow)
fb85abff 1927{
6c6a3430 1928 tree ni_minus_gap, var;
cde959e7 1929 tree niters_vector, step_vector, type = TREE_TYPE (niters);
d75596cd 1930 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6c6a3430 1931 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
cde959e7 1932 tree log_vf = NULL_TREE;
6c6a3430 1933
1934 /* If epilogue loop is required because of data accesses with gaps, we
1935 subtract one iteration from the total number of iterations here for
1936 correct calculation of RATIO. */
1937 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1938 {
3a815241 1939 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1940 build_one_cst (type));
6c6a3430 1941 if (!is_gimple_val (ni_minus_gap))
1942 {
3a815241 1943 var = create_tmp_var (type, "ni_gap");
6c6a3430 1944 gimple *stmts = NULL;
1945 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1946 true, var);
1947 gsi_insert_seq_on_edge_immediate (pe, stmts);
1948 }
1949 }
1950 else
1951 ni_minus_gap = niters;
1952
d75596cd 1953 unsigned HOST_WIDE_INT const_vf;
60b29a7e 1954 if (vf.is_constant (&const_vf)
1955 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
cde959e7 1956 {
1957 /* Create: niters >> log2(vf) */
1958 /* If it's known that niters == number of latch executions + 1 doesn't
1959 overflow, we can generate niters >> log2(vf); otherwise we generate
1960 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1961 will be at least one. */
d75596cd 1962 log_vf = build_int_cst (type, exact_log2 (const_vf));
cde959e7 1963 if (niters_no_overflow)
1964 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1965 else
1966 niters_vector
1967 = fold_build2 (PLUS_EXPR, type,
1968 fold_build2 (RSHIFT_EXPR, type,
1969 fold_build2 (MINUS_EXPR, type,
1970 ni_minus_gap,
1971 build_int_cst (type, vf)),
1972 log_vf),
1973 build_int_cst (type, 1));
1974 step_vector = build_one_cst (type);
1975 }
6c6a3430 1976 else
cde959e7 1977 {
1978 niters_vector = ni_minus_gap;
1979 step_vector = build_int_cst (type, vf);
1980 }
6c6a3430 1981
1982 if (!is_gimple_val (niters_vector))
1983 {
3a815241 1984 var = create_tmp_var (type, "bnd");
1985 gimple_seq stmts = NULL;
6c6a3430 1986 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1987 gsi_insert_seq_on_edge_immediate (pe, stmts);
3a815241 1988 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1989 we set range information to make niters analyzer's life easier. */
cde959e7 1990 if (stmts != NULL && log_vf)
e3d0f65c 1991 set_range_info (niters_vector, VR_RANGE,
1992 wi::to_wide (build_int_cst (type, 1)),
1993 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1994 TYPE_MAX_VALUE (type),
1995 log_vf)));
6c6a3430 1996 }
1997 *niters_vector_ptr = niters_vector;
cde959e7 1998 *step_vector_ptr = step_vector;
6c6a3430 1999
2000 return;
2001}
2002
2003/* Given NITERS_VECTOR which is the number of iterations for vectorized
2004 loop specified by LOOP_VINFO after vectorization, compute the number
2005 of iterations before vectorization (niters_vector * vf) and store it
2006 to NITERS_VECTOR_MULT_VF_PTR. */
2007
2008static void
2009vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
2010 tree niters_vector,
2011 tree *niters_vector_mult_vf_ptr)
2012{
d75596cd 2013 /* We should be using a step_vector of VF if VF is variable. */
2014 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
fb85abff 2015 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6c6a3430 2016 tree type = TREE_TYPE (niters_vector);
2017 tree log_vf = build_int_cst (type, exact_log2 (vf));
2018 basic_block exit_bb = single_exit (loop)->dest;
fb85abff 2019
6c6a3430 2020 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2021 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2022 niters_vector, log_vf);
2023 if (!is_gimple_val (niters_vector_mult_vf))
2024 {
2025 tree var = create_tmp_var (type, "niters_vector_mult_vf");
2026 gimple_seq stmts = NULL;
2027 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2028 &stmts, true, var);
2029 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2030 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2031 }
2032 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2033}
2034
2035/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2036 from SECOND/FIRST and puts it at the original loop's preheader/exit
2037 edge, the two loops are arranged as below:
2038
2039 preheader_a:
2040 first_loop:
2041 header_a:
2042 i_1 = PHI<i_0, i_2>;
2043 ...
2044 i_2 = i_1 + 1;
2045 if (cond_a)
2046 goto latch_a;
2047 else
2048 goto between_bb;
2049 latch_a:
2050 goto header_a;
2051
2052 between_bb:
2053 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
2054
2055 second_loop:
2056 header_b:
2057 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2058 or with i_2 if no LCSSA phi is created
2059 under condition of CREATE_LCSSA_FOR_IV_PHIS.
2060 ...
2061 i_4 = i_3 + 1;
2062 if (cond_b)
2063 goto latch_b;
2064 else
2065 goto exit_bb;
2066 latch_b:
2067 goto header_b;
2068
2069 exit_bb:
2070
2071 This function creates loop closed SSA for the first loop; update the
2072 second loop's PHI nodes by replacing argument on incoming edge with the
2073 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
2074 is false, Loop closed ssa phis will only be created for non-iv phis for
2075 the first loop.
2076
2077 This function assumes exit bb of the first loop is preheader bb of the
2078 second loop, i.e, between_bb in the example code. With PHIs updated,
2079 the second loop will execute rest iterations of the first. */
fb85abff 2080
6c6a3430 2081static void
2082slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2083 struct loop *first, struct loop *second,
2084 bool create_lcssa_for_iv_phis)
2085{
2086 gphi_iterator gsi_update, gsi_orig;
2087 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2088
2089 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2090 edge second_preheader_e = loop_preheader_edge (second);
2091 basic_block between_bb = single_exit (first)->dest;
2092
2093 gcc_assert (between_bb == second_preheader_e->src);
2094 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2095 /* Either the first loop or the second is the loop to be vectorized. */
2096 gcc_assert (loop == first || loop == second);
2097
2098 for (gsi_orig = gsi_start_phis (first->header),
2099 gsi_update = gsi_start_phis (second->header);
2100 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2101 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2102 {
2103 gphi *orig_phi = gsi_orig.phi ();
2104 gphi *update_phi = gsi_update.phi ();
2105
2106 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2107 /* Generate lcssa PHI node for the first loop. */
2108 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
e068828a 2109 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
2110 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
6c6a3430 2111 {
2112 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2113 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2114 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2115 arg = new_res;
2116 }
2117
2118 /* Update PHI node in the second loop by replacing arg on the loop's
2119 incoming edge. */
2120 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2121 }
2122}
2123
2124/* Function slpeel_add_loop_guard adds guard skipping from the beginning
2125 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2126 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2127 appear like below:
2128
2129 guard_bb:
2130 if (cond)
2131 goto merge_bb;
2132 else
2133 goto skip_loop;
2134
2135 skip_loop:
2136 header_a:
2137 i_1 = PHI<i_0, i_2>;
2138 ...
2139 i_2 = i_1 + 1;
2140 if (cond_a)
2141 goto latch_a;
2142 else
2143 goto exit_a;
2144 latch_a:
2145 goto header_a;
2146
2147 exit_a:
2148 i_5 = PHI<i_2>;
2149
2150 merge_bb:
2151 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2152
2153 update_loop:
2154 header_b:
2155 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2156 ...
2157 i_4 = i_3 + 1;
2158 if (cond_b)
2159 goto latch_b;
2160 else
2161 goto exit_bb;
2162 latch_b:
2163 goto header_b;
2164
2165 exit_bb:
2166
2167 This function creates PHI nodes at merge_bb and replaces the use of i_5
2168 in the update_loop's PHI node with the result of new PHI result. */
2169
2170static void
2171slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
2172 struct loop *update_loop,
2173 edge guard_edge, edge merge_edge)
2174{
be1e7283 2175 location_t merge_loc, guard_loc;
6c6a3430 2176 edge orig_e = loop_preheader_edge (skip_loop);
2177 edge update_e = loop_preheader_edge (update_loop);
2178 gphi_iterator gsi_orig, gsi_update;
2179
2180 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2181 gsi_update = gsi_start_phis (update_loop->header));
2182 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2183 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2184 {
2185 gphi *orig_phi = gsi_orig.phi ();
2186 gphi *update_phi = gsi_update.phi ();
2187
2188 /* Generate new phi node at merge bb of the guard. */
2189 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2190 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2191
2192 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2193 args in NEW_PHI for these edges. */
2194 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2195 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2196 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2197 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2198 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2199 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2200
2201 /* Update phi in UPDATE_PHI. */
2202 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2203 }
2204}
2205
2206/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2207 this function searches for the corresponding lcssa phi node in exit
2208 bb of LOOP. If it is found, return the phi result; otherwise return
2209 NULL. */
2210
2211static tree
2212find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
2213 gphi *lcssa_phi)
2214{
2215 gphi_iterator gsi;
2216 edge e = single_exit (loop);
2217
2218 gcc_assert (single_pred_p (e->dest));
2219 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2220 {
2221 gphi *phi = gsi.phi ();
2222 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2223 PHI_ARG_DEF (lcssa_phi, 0), 0))
2224 return PHI_RESULT (phi);
2225 }
2226 return NULL_TREE;
2227}
2228
2229/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2230 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2231 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2232 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2233 The CFG looks like:
2234
2235 loop:
2236 header_a:
2237 i_1 = PHI<i_0, i_2>;
2238 ...
2239 i_2 = i_1 + 1;
2240 if (cond_a)
2241 goto latch_a;
2242 else
2243 goto exit_a;
2244 latch_a:
2245 goto header_a;
2246
2247 exit_a:
2248
2249 guard_bb:
2250 if (cond)
2251 goto merge_bb;
2252 else
2253 goto epilog_loop;
2254
2255 ;; fall_through_bb
2256
2257 epilog_loop:
2258 header_b:
2259 i_3 = PHI<i_2, i_4>;
2260 ...
2261 i_4 = i_3 + 1;
2262 if (cond_b)
2263 goto latch_b;
2264 else
2265 goto merge_bb;
2266 latch_b:
2267 goto header_b;
2268
2269 merge_bb:
2270 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2271
2272 exit_bb:
2273 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2274
2275 For each name used out side EPILOG (i.e - for each name that has a lcssa
2276 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2277 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2278 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2279 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2280 in exit_bb will also be updated. */
2281
2282static void
2283slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
2284 edge guard_edge, edge merge_edge)
2285{
2286 gphi_iterator gsi;
2287 basic_block merge_bb = guard_edge->dest;
2288
2289 gcc_assert (single_succ_p (merge_bb));
2290 edge e = single_succ_edge (merge_bb);
2291 basic_block exit_bb = e->dest;
2292 gcc_assert (single_pred_p (exit_bb));
2293 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2294
2295 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2296 {
2297 gphi *update_phi = gsi.phi ();
2298 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2299 /* This loop-closed-phi actually doesn't represent a use out of the
2300 loop - the phi arg is a constant. */
2301 if (TREE_CODE (old_arg) != SSA_NAME)
2302 continue;
2303
2304 tree merge_arg = get_current_def (old_arg);
2305 if (!merge_arg)
2306 merge_arg = old_arg;
2307
2308 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2309 /* If the var is live after loop but not a reduction, we simply
2310 use the old arg. */
2311 if (!guard_arg)
2312 guard_arg = old_arg;
2313
2314 /* Create new phi node in MERGE_BB: */
2315 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2316 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2317
2318 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2319 the two PHI args in merge_phi for these edges. */
2320 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2321 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2322
2323 /* Update the original phi in exit_bb. */
2324 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2325 }
2326}
2327
2328/* EPILOG loop is duplicated from the original loop for vectorizing,
2329 the arg of its loop closed ssa PHI needs to be updated. */
2330
2331static void
2332slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
2333{
2334 gphi_iterator gsi;
2335 basic_block exit_bb = single_exit (epilog)->dest;
2336
2337 gcc_assert (single_pred_p (exit_bb));
2338 edge e = EDGE_PRED (exit_bb, 0);
2339 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2340 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2341}
2342
2343/* Function vect_do_peeling.
2344
2345 Input:
2346 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2347
2348 preheader:
2349 LOOP:
2350 header_bb:
2351 loop_body
2352 if (exit_loop_cond) goto exit_bb
2353 else goto header_bb
2354 exit_bb:
2355
2356 - NITERS: The number of iterations of the loop.
2357 - NITERSM1: The number of iterations of the loop's latch.
2358 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2359 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2360 CHECK_PROFITABILITY is true.
2361 Output:
cde959e7 2362 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
60b29a7e 2363 iterate after vectorization; see vect_set_loop_condition for details.
cde959e7 2364 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2365 should be set to the number of scalar iterations handled by the
2366 vector loop. The SSA name is only used on exit from the loop.
6c6a3430 2367
2368 This function peels prolog and epilog from the loop, adds guards skipping
2369 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2370 would look like:
2371
2372 guard_bb_1:
2373 if (prefer_scalar_loop) goto merge_bb_1
2374 else goto guard_bb_2
2375
2376 guard_bb_2:
2377 if (skip_prolog) goto merge_bb_2
2378 else goto prolog_preheader
2379
2380 prolog_preheader:
2381 PROLOG:
2382 prolog_header_bb:
2383 prolog_body
2384 if (exit_prolog_cond) goto prolog_exit_bb
2385 else goto prolog_header_bb
2386 prolog_exit_bb:
2387
2388 merge_bb_2:
2389
2390 vector_preheader:
2391 VECTOR LOOP:
2392 vector_header_bb:
2393 vector_body
2394 if (exit_vector_cond) goto vector_exit_bb
2395 else goto vector_header_bb
2396 vector_exit_bb:
2397
2398 guard_bb_3:
2399 if (skip_epilog) goto merge_bb_3
2400 else goto epilog_preheader
2401
2402 merge_bb_1:
2403
2404 epilog_preheader:
2405 EPILOG:
2406 epilog_header_bb:
2407 epilog_body
2408 if (exit_epilog_cond) goto merge_bb_3
2409 else goto epilog_header_bb
2410
2411 merge_bb_3:
2412
2413 Note this function peels prolog and epilog only if it's necessary,
2414 as well as guards.
5b631e09 2415 Returns created epilogue or NULL.
6c6a3430 2416
2417 TODO: Guard for prefer_scalar_loop should be emitted along with
2418 versioning conditions if loop versioning is needed. */
2419
5b631e09 2420
2421struct loop *
6c6a3430 2422vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
cde959e7 2423 tree *niters_vector, tree *step_vector,
2424 tree *niters_vector_mult_vf_var, int th,
2425 bool check_profitability, bool niters_no_overflow)
6c6a3430 2426{
2427 edge e, guard_e;
2428 tree type = TREE_TYPE (niters), guard_cond;
2429 basic_block guard_bb, guard_to;
720cfc43 2430 profile_probability prob_prolog, prob_vector, prob_epilog;
d75596cd 2431 int estimated_vf;
6753a4bf 2432 int prolog_peeling = 0;
e092c20e 2433 /* We currently do not support prolog peeling if the target alignment is not
2434 known at compile time. 'vect_gen_prolog_loop_niters' depends on the
2435 target alignment being constant. */
2436 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2437 if (dr_info && !DR_TARGET_ALIGNMENT (dr_info).is_constant ())
2438 return NULL;
2439
6753a4bf 2440 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2441 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
53771608 2442
2443 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2444 poly_uint64 bound_epilog = 0;
2445 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2446 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2447 bound_epilog += vf - 1;
2448 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2449 bound_epilog += 1;
2450 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
2451 poly_uint64 bound_scalar = bound_epilog;
6c6a3430 2452
2453 if (!prolog_peeling && !epilog_peeling)
5b631e09 2454 return NULL;
6c6a3430 2455
720cfc43 2456 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
d75596cd 2457 estimated_vf = vect_vf_for_cost (loop_vinfo);
2458 if (estimated_vf == 2)
2459 estimated_vf = 3;
720cfc43 2460 prob_prolog = prob_epilog = profile_probability::guessed_always ()
d75596cd 2461 .apply_scale (estimated_vf - 1, estimated_vf);
6c6a3430 2462
5b631e09 2463 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
6c6a3430 2464 struct loop *first_loop = loop;
15492f79 2465 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
6c6a3430 2466 create_lcssa_for_virtual_phi (loop);
2467 update_ssa (TODO_update_ssa_only_virtuals);
2468
c64f38bf 2469 if (MAY_HAVE_DEBUG_BIND_STMTS)
6c6a3430 2470 {
2471 gcc_assert (!adjust_vec.exists ());
2472 adjust_vec.create (32);
2473 }
fb85abff 2474 initialize_original_copy_tables ();
2475
53771608 2476 /* Record the anchor bb at which the guard should be placed if the scalar
2477 loop might be preferred. */
2478 basic_block anchor = loop_preheader_edge (loop)->src;
2479
2480 /* Generate the number of iterations for the prolog loop. We do this here
2481 so that we can also get the upper bound on the number of iterations. */
2482 tree niters_prolog;
2483 int bound_prolog = 0;
2484 if (prolog_peeling)
2485 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2486 &bound_prolog);
2487 else
2488 niters_prolog = build_int_cst (type, 0);
2489
6c6a3430 2490 /* Prolog loop may be skipped. */
2491 bool skip_prolog = (prolog_peeling != 0);
32236f80 2492 /* Skip to epilog if scalar loop may be preferred. It's only needed
2493 when we peel for epilog loop and when it hasn't been checked with
2494 loop versioning. */
53771608 2495 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2496 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2497 bound_prolog + bound_epilog)
2498 : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
6c6a3430 2499 /* Epilog loop must be executed if the number of iterations for epilog
2500 loop is known at compile time, otherwise we need to add a check at
2501 the end of vector loop and skip to the end of epilog loop. */
2502 bool skip_epilog = (prolog_peeling < 0
d75596cd 2503 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2504 || !vf.is_constant ());
6c6a3430 2505 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
2506 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2507 skip_epilog = false;
2508
6c6a3430 2509 if (skip_vector)
c214c858 2510 {
2511 split_edge (loop_preheader_edge (loop));
2512
2513 /* Due to the order in which we peel prolog and epilog, we first
2514 propagate probability to the whole loop. The purpose is to
2515 avoid adjusting probabilities of both prolog and vector loops
2516 separately. Note in this case, the probability of epilog loop
2517 needs to be scaled back later. */
2518 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
720cfc43 2519 if (prob_vector.initialized_p ())
d75596cd 2520 {
2521 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
2522 scale_loop_profile (loop, prob_vector, 0);
2523 }
c214c858 2524 }
6c6a3430 2525
c309657f 2526 dump_user_location_t loop_loc = find_loop_location (loop);
6c6a3430 2527 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2528 if (prolog_peeling)
2f630015 2529 {
6c6a3430 2530 e = loop_preheader_edge (loop);
2531 if (!slpeel_can_duplicate_loop_p (loop, e))
2f630015 2532 {
6c6a3430 2533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2534 "loop can't be duplicated to preheader edge.\n");
2535 gcc_unreachable ();
2536 }
2537 /* Peel prolog and put it on preheader edge of loop. */
2538 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2539 if (!prolog)
2540 {
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2542 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2543 gcc_unreachable ();
2544 }
2545 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
2546 first_loop = prolog;
2547 reset_original_copy_tables ();
2548
53771608 2549 /* Update the number of iterations for prolog loop. */
cde959e7 2550 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
60b29a7e 2551 vect_set_loop_condition (prolog, NULL, niters_prolog,
2552 step_prolog, NULL_TREE, false);
6c6a3430 2553
2554 /* Skip the prolog loop. */
2555 if (skip_prolog)
2556 {
2557 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2558 niters_prolog, build_int_cst (type, 0));
2559 guard_bb = loop_preheader_edge (prolog)->src;
c214c858 2560 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
6c6a3430 2561 guard_to = split_edge (loop_preheader_edge (loop));
2562 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2563 guard_to, guard_bb,
720cfc43 2564 prob_prolog.invert (),
15492f79 2565 irred_flag);
6c6a3430 2566 e = EDGE_PRED (guard_to, 0);
2567 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2568 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
c214c858 2569
ca69b069 2570 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
2571 scale_loop_profile (prolog, prob_prolog, bound_prolog);
6c6a3430 2572 }
2573 /* Update init address of DRs. */
6753a4bf 2574 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
6c6a3430 2575 /* Update niters for vector loop. */
2576 LOOP_VINFO_NITERS (loop_vinfo)
2577 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
2578 LOOP_VINFO_NITERSM1 (loop_vinfo)
2579 = fold_build2 (MINUS_EXPR, type,
2580 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
3a815241 2581 bool new_var_p = false;
2582 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
2583 /* It's guaranteed that vector loop bound before vectorization is at
2584 least VF, so set range information for newly generated var. */
2585 if (new_var_p)
2586 set_range_info (niters, VR_RANGE,
e3d0f65c 2587 wi::to_wide (build_int_cst (type, vf)),
2588 wi::to_wide (TYPE_MAX_VALUE (type)));
6c6a3430 2589
82112bf2 2590 /* Prolog iterates at most bound_prolog times, latch iterates at
2591 most bound_prolog - 1 times. */
2592 record_niter_bound (prolog, bound_prolog - 1, false, true);
6c6a3430 2593 delete_update_ssa ();
2594 adjust_vec_debug_stmts ();
2595 scev_reset ();
2f630015 2596 }
2597
6c6a3430 2598 if (epilog_peeling)
2599 {
2600 e = single_exit (loop);
2601 if (!slpeel_can_duplicate_loop_p (loop, e))
2602 {
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2604 "loop can't be duplicated to exit edge.\n");
2605 gcc_unreachable ();
2606 }
2607 /* Peel epilog and put it on exit edge of loop. */
2608 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2609 if (!epilog)
2610 {
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2612 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2613 gcc_unreachable ();
2614 }
2615 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
2616
2617 /* Scalar version loop may be preferred. In this case, add guard
2618 and skip to epilog. Note this only happens when the number of
2619 iterations of loop is unknown at compile time, otherwise this
2620 won't be vectorized. */
2621 if (skip_vector)
2622 {
82112bf2 2623 /* Additional epilogue iteration is peeled if gap exists. */
6c6a3430 2624 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
53771608 2625 bound_prolog, bound_epilog,
82112bf2 2626 th, &bound_scalar,
6c6a3430 2627 check_profitability);
82112bf2 2628 /* Build guard against NITERSM1 since NITERS may overflow. */
2629 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
6c6a3430 2630 guard_bb = anchor;
2631 guard_to = split_edge (loop_preheader_edge (epilog));
2632 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2633 guard_to, guard_bb,
720cfc43 2634 prob_vector.invert (),
15492f79 2635 irred_flag);
6c6a3430 2636 e = EDGE_PRED (guard_to, 0);
2637 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2638 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
c214c858 2639
2640 /* Simply propagate profile info from guard_bb to guard_to which is
2641 a merge point of control flow. */
c214c858 2642 guard_to->count = guard_bb->count;
dcc86e2d 2643
ca69b069 2644 /* Scale probability of epilog loop back.
2645 FIXME: We should avoid scaling down and back up. Profile may
2646 get lost if we scale down to 0. */
12b55cc8 2647 basic_block *bbs = get_loop_body (epilog);
dcc86e2d 2648 for (unsigned int i = 0; i < epilog->num_nodes; i++)
2649 bbs[i]->count = bbs[i]->count.apply_scale
2650 (bbs[i]->count,
2651 bbs[i]->count.apply_probability
2652 (prob_vector));
ca69b069 2653 free (bbs);
6c6a3430 2654 }
fb85abff 2655
c214c858 2656 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
6c6a3430 2657 tree niters_vector_mult_vf;
2658 /* If loop is peeled for non-zero constant times, now niters refers to
2659 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2660 overflows. */
2661 niters_no_overflow |= (prolog_peeling > 0);
2662 vect_gen_vector_loop_niters (loop_vinfo, niters,
cde959e7 2663 niters_vector, step_vector,
2664 niters_no_overflow);
2665 if (!integer_onep (*step_vector))
2666 {
2667 /* On exit from the loop we will have an easy way of calcalating
2668 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2669 until then. */
2670 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2671 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2672 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2673 }
2674 else
2675 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2676 &niters_vector_mult_vf);
6c6a3430 2677 /* Update IVs of original loop as if they were advanced by
2678 niters_vector_mult_vf steps. */
2679 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2680 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2681 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2682 update_e);
2683
2684 if (skip_epilog)
2685 {
2686 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2687 niters, niters_vector_mult_vf);
2688 guard_bb = single_exit (loop)->dest;
2689 guard_to = split_edge (single_exit (epilog));
2690 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2691 skip_vector ? anchor : guard_bb,
720cfc43 2692 prob_epilog.invert (),
15492f79 2693 irred_flag);
6c6a3430 2694 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2695 single_exit (epilog));
c214c858 2696 /* Only need to handle basic block before epilog loop if it's not
2697 the guard_bb, which is the case when skip_vector is true. */
2698 if (guard_bb != bb_before_epilog)
2699 {
720cfc43 2700 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
c214c858 2701
ca69b069 2702 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
c214c858 2703 }
d75596cd 2704 scale_loop_profile (epilog, prob_epilog, 0);
6c6a3430 2705 }
2706 else
2707 slpeel_update_phi_nodes_for_lcssa (epilog);
fb85abff 2708
53771608 2709 unsigned HOST_WIDE_INT bound;
2710 if (bound_scalar.is_constant (&bound))
d75596cd 2711 {
53771608 2712 gcc_assert (bound != 0);
2713 /* -1 to convert loop iterations to latch iterations. */
2714 record_niter_bound (epilog, bound - 1, false, true);
d75596cd 2715 }
6c6a3430 2716
2717 delete_update_ssa ();
2718 adjust_vec_debug_stmts ();
2719 scev_reset ();
2720 }
2721 adjust_vec.release ();
fb85abff 2722 free_original_copy_tables ();
5b631e09 2723
2724 return epilog;
fb85abff 2725}
2726
d5e80d93 2727/* Function vect_create_cond_for_niters_checks.
2728
2729 Create a conditional expression that represents the run-time checks for
f9936b7c 2730 loop's niter. The loop is guaranteed to terminate if the run-time
d5e80d93 2731 checks hold.
2732
2733 Input:
2734 COND_EXPR - input conditional expression. New conditions will be chained
2735 with logical AND operation. If it is NULL, then the function
2736 is used to return the number of alias checks.
2737 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2738 to be checked.
2739
2740 Output:
2741 COND_EXPR - conditional expression.
2742
2743 The returned COND_EXPR is the conditional expression to be used in the
2744 if statement that controls which version of the loop gets executed at
2745 runtime. */
2746
2747static void
2748vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2749{
2750 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2751
2752 if (*cond_expr)
2753 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2754 *cond_expr, part_cond_expr);
2755 else
2756 *cond_expr = part_cond_expr;
2757}
fb85abff 2758
f68a7726 2759/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2760 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2761
2762static void
2763chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2764{
2765 if (*cond_expr)
2766 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2767 *cond_expr, part_cond_expr);
2768 else
2769 *cond_expr = part_cond_expr;
2770}
2771
fb85abff 2772/* Function vect_create_cond_for_align_checks.
2773
2774 Create a conditional expression that represents the alignment checks for
2775 all of data references (array element references) whose alignment must be
2776 checked at runtime.
2777
2778 Input:
2779 COND_EXPR - input conditional expression. New conditions will be chained
2780 with logical AND operation.
2781 LOOP_VINFO - two fields of the loop information are used.
2782 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2783 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2784
2785 Output:
2786 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2787 expression.
2788 The returned value is the conditional expression to be used in the if
2789 statement that controls which version of the loop gets executed at runtime.
2790
2791 The algorithm makes two assumptions:
2792 1) The number of bytes "n" in a vector is a power of 2.
2793 2) An address "a" is aligned if a%n is zero and that this
2794 test can be done as a&(n-1) == 0. For example, for 16
2795 byte vectors the test is a&0xf == 0. */
2796
2797static void
2798vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2799 tree *cond_expr,
2800 gimple_seq *cond_expr_stmt_list)
2801{
ab98e625 2802 vec<stmt_vec_info> may_misalign_stmts
fb85abff 2803 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
ab98e625 2804 stmt_vec_info stmt_info;
fb85abff 2805 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2806 tree mask_cst;
2807 unsigned int i;
fb85abff 2808 tree int_ptrsize_type;
2809 char tmp_name[20];
2810 tree or_tmp_name = NULL_TREE;
03d37e4e 2811 tree and_tmp_name;
42acab1c 2812 gimple *and_stmt;
fb85abff 2813 tree ptrsize_zero;
2814 tree part_cond_expr;
2815
2816 /* Check that mask is one less than a power of 2, i.e., mask is
2817 all zeros followed by all ones. */
2818 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2819
3cea8318 2820 int_ptrsize_type = signed_type_for (ptr_type_node);
fb85abff 2821
2822 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2823 of the first vector of the i'th data reference. */
2824
ab98e625 2825 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
fb85abff 2826 {
2827 gimple_seq new_stmt_list = NULL;
2828 tree addr_base;
03d37e4e 2829 tree addr_tmp_name;
2830 tree new_or_tmp_name;
42acab1c 2831 gimple *addr_stmt, *or_stmt;
ab98e625 2832 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f1b8c740 2833 bool negative = tree_int_cst_compare
ab98e625 2834 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
f1b8c740 2835 tree offset = negative
80b8a97a 2836 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
fb85abff 2837
2838 /* create: addr_tmp = (int)(address_of_first_vector) */
2839 addr_base =
ab98e625 2840 vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
9e879814 2841 offset);
fb85abff 2842 if (new_stmt_list != NULL)
2843 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2844
03d37e4e 2845 sprintf (tmp_name, "addr2int%d", i);
2846 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
e9cf809e 2847 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
fb85abff 2848 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2849
2850 /* The addresses are OR together. */
2851
2852 if (or_tmp_name != NULL_TREE)
2853 {
2854 /* create: or_tmp = or_tmp | addr_tmp */
03d37e4e 2855 sprintf (tmp_name, "orptrs%d", i);
2856 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
e9cf809e 2857 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2858 or_tmp_name, addr_tmp_name);
fb85abff 2859 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2860 or_tmp_name = new_or_tmp_name;
2861 }
2862 else
2863 or_tmp_name = addr_tmp_name;
2864
2865 } /* end for i */
2866
2867 mask_cst = build_int_cst (int_ptrsize_type, mask);
2868
2869 /* create: and_tmp = or_tmp & mask */
03d37e4e 2870 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
fb85abff 2871
e9cf809e 2872 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2873 or_tmp_name, mask_cst);
fb85abff 2874 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2875
2876 /* Make and_tmp the left operand of the conditional test against zero.
2877 if and_tmp has a nonzero bit then some address is unaligned. */
2878 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2879 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2880 and_tmp_name, ptrsize_zero);
f68a7726 2881 chain_cond_expr (cond_expr, part_cond_expr);
2882}
2883
2884/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2885 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2886 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2887 and this new condition are true. Treat a null *COND_EXPR as "true". */
2888
2889static void
2890vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2891{
2892 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2893 unsigned int i;
2894 vec_object_pair *pair;
2895 FOR_EACH_VEC_ELT (pairs, i, pair)
2896 {
2897 tree addr1 = build_fold_addr_expr (pair->first);
2898 tree addr2 = build_fold_addr_expr (pair->second);
2899 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2900 addr1, addr2);
2901 chain_cond_expr (cond_expr, part_cond_expr);
2902 }
fb85abff 2903}
2904
e85b4a5e 2905/* Create an expression that is true when all lower-bound conditions for
2906 the vectorized loop are met. Chain this condition with *COND_EXPR. */
2907
2908static void
2909vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
2910{
2911 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
2912 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
2913 {
2914 tree expr = lower_bounds[i].expr;
2915 tree type = unsigned_type_for (TREE_TYPE (expr));
2916 expr = fold_convert (type, expr);
2917 poly_uint64 bound = lower_bounds[i].min_value;
2918 if (!lower_bounds[i].unsigned_p)
2919 {
2920 expr = fold_build2 (PLUS_EXPR, type, expr,
2921 build_int_cstu (type, bound - 1));
2922 bound += bound - 1;
2923 }
2924 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
2925 build_int_cstu (type, bound));
2926 chain_cond_expr (cond_expr, part_cond_expr);
2927 }
2928}
2929
fb85abff 2930/* Function vect_create_cond_for_alias_checks.
2931
2932 Create a conditional expression that represents the run-time checks for
2933 overlapping of address ranges represented by a list of data references
2934 relations passed as input.
2935
2936 Input:
2937 COND_EXPR - input conditional expression. New conditions will be chained
8a7b0f48 2938 with logical AND operation. If it is NULL, then the function
2939 is used to return the number of alias checks.
fb85abff 2940 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2941 to be checked.
2942
2943 Output:
2944 COND_EXPR - conditional expression.
fb85abff 2945
8a7b0f48 2946 The returned COND_EXPR is the conditional expression to be used in the if
fb85abff 2947 statement that controls which version of the loop gets executed at runtime.
2948*/
2949
8a7b0f48 2950void
90d4c4af 2951vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
fb85abff 2952{
43d14b66 2953 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
8a7b0f48 2954 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
fb85abff 2955
8a7b0f48 2956 if (comp_alias_ddrs.is_empty ())
fb85abff 2957 return;
2958
49ce332c 2959 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2960 &comp_alias_ddrs, cond_expr);
6d8fb6cf 2961 if (dump_enabled_p ())
b055bc88 2962 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 2963 "created %u versioning for alias checks.\n",
8a7b0f48 2964 comp_alias_ddrs.length ());
fb85abff 2965}
2966
2967
2968/* Function vect_loop_versioning.
48e1416a 2969
fb85abff 2970 If the loop has data references that may or may not be aligned or/and
2971 has data reference relations whose independence was not proven then
2972 two versions of the loop need to be generated, one which is vectorized
2973 and one which isn't. A test is then generated to control which of the
2974 loops is executed. The test checks for the alignment of all of the
2975 data references that may or may not be aligned. An additional
2976 sequence of runtime tests is generated for each pairs of DDRs whose
48e1416a 2977 independence was not proven. The vectorized version of loop is
2978 executed only if both alias and alignment tests are passed.
2979
fb85abff 2980 The test generated to check which version of loop is executed
48e1416a 2981 is modified to also check for profitability as indicated by the
97fe80a6 2982 cost model threshold TH.
23a3430d 2983
2984 The versioning precondition(s) are placed in *COND_EXPR and
2afdcbed 2985 *COND_EXPR_STMT_LIST. */
fb85abff 2986
2987void
e7430948 2988vect_loop_versioning (loop_vec_info loop_vinfo,
7456a7ea 2989 unsigned int th, bool check_profitability,
2990 poly_uint64 versioning_threshold)
fb85abff 2991{
d5e80d93 2992 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
c71d3c24 2993 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
fb85abff 2994 basic_block condition_bb;
1a91d914 2995 gphi_iterator gsi;
2996 gimple_stmt_iterator cond_exp_gsi;
fb85abff 2997 basic_block merge_bb;
2998 basic_block new_exit_bb;
2999 edge new_exit_e, e;
1a91d914 3000 gphi *orig_phi, *new_phi;
e7430948 3001 tree cond_expr = NULL_TREE;
2afdcbed 3002 gimple_seq cond_expr_stmt_list = NULL;
fb85abff 3003 tree arg;
ca69b069 3004 profile_probability prob = profile_probability::likely ();
fb85abff 3005 gimple_seq gimplify_stmt_list = NULL;
b622227e 3006 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
6ee2edad 3007 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
3008 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
d5e80d93 3009 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
fb85abff 3010
e7430948 3011 if (check_profitability)
ba12948e 3012 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
d5e80d93 3013 build_int_cst (TREE_TYPE (scalar_loop_iters),
b622227e 3014 th - 1));
7456a7ea 3015 if (maybe_ne (versioning_threshold, 0U))
3016 {
3017 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
3018 build_int_cst (TREE_TYPE (scalar_loop_iters),
3019 versioning_threshold - 1));
3020 if (cond_expr)
3021 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
3022 expr, cond_expr);
3023 else
3024 cond_expr = expr;
3025 }
d5e80d93 3026
3027 if (version_niter)
3028 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3029
3030 if (cond_expr)
3031 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
3032 is_gimple_condexpr, NULL_TREE);
fb85abff 3033
6ee2edad 3034 if (version_align)
2afdcbed 3035 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3036 &cond_expr_stmt_list);
fb85abff 3037
6ee2edad 3038 if (version_alias)
f68a7726 3039 {
3040 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
e85b4a5e 3041 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
f68a7726 3042 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3043 }
23a3430d 3044
f404501a 3045 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3046 &gimplify_stmt_list,
2afdcbed 3047 is_gimple_condexpr, NULL_TREE);
3048 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
fb85abff 3049
3050 initialize_original_copy_tables ();
c71d3c24 3051 if (scalar_loop)
3052 {
3053 edge scalar_e;
3054 basic_block preheader, scalar_preheader;
3055
3056 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
3057 scale LOOP's frequencies instead. */
b6863ffa 3058 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
ca69b069 3059 prob, prob.invert (), prob, prob.invert (), true);
3060 scale_loop_frequencies (loop, prob);
c71d3c24 3061 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
3062 while we need to move it above LOOP's preheader. */
3063 e = loop_preheader_edge (loop);
3064 scalar_e = loop_preheader_edge (scalar_loop);
57abb697 3065 /* The vector loop preheader might not be empty, since new
3066 invariants could have been created while analyzing the loop. */
3067 gcc_assert (single_pred_p (e->src));
c71d3c24 3068 gcc_assert (empty_block_p (scalar_e->src)
3069 && single_pred_p (scalar_e->src));
3070 gcc_assert (single_pred_p (condition_bb));
3071 preheader = e->src;
3072 scalar_preheader = scalar_e->src;
3073 scalar_e = find_edge (condition_bb, scalar_preheader);
3074 e = single_pred_edge (preheader);
3075 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
3076 scalar_preheader);
3077 redirect_edge_and_branch_force (scalar_e, preheader);
3078 redirect_edge_and_branch_force (e, condition_bb);
3079 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
3080 single_pred (condition_bb));
3081 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
3082 single_pred (scalar_preheader));
3083 set_immediate_dominator (CDI_DOMINATORS, preheader,
3084 condition_bb);
3085 }
3086 else
d5e80d93 3087 nloop = loop_version (loop, cond_expr, &condition_bb,
ca69b069 3088 prob, prob.invert (), prob, prob.invert (), true);
d5e80d93 3089
3090 if (version_niter)
3091 {
3092 /* The versioned loop could be infinite, we need to clear existing
3093 niter information which is copied from the original loop. */
3094 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3095 vect_free_loop_info_assumptions (nloop);
3096 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
3097 loop_constraint_set (loop, LOOP_C_INFINITE);
3098 }
6ee2edad 3099
c309657f 3100 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
6ee2edad 3101 && dump_enabled_p ())
3102 {
3103 if (version_alias)
9ddd8fa7 3104 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3105 vect_location,
6ee2edad 3106 "loop versioned for vectorization because of "
3107 "possible aliasing\n");
3108 if (version_align)
9ddd8fa7 3109 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
3110 vect_location,
6ee2edad 3111 "loop versioned for vectorization to enhance "
3112 "alignment\n");
3113
3114 }
9af5ce0c 3115 free_original_copy_tables ();
fb85abff 3116
48e1416a 3117 /* Loop versioning violates an assumption we try to maintain during
fb85abff 3118 vectorization - that the loop exit block has a single predecessor.
3119 After versioning, the exit block of both loop versions is the same
3120 basic block (i.e. it has two predecessors). Just in order to simplify
3121 following transformations in the vectorizer, we fix this situation
3122 here by adding a new (empty) block on the exit-edge of the loop,
c71d3c24 3123 with the proper loop-exit phis to maintain loop-closed-form.
3124 If loop versioning wasn't done from loop, but scalar_loop instead,
3125 merge_bb will have already just a single successor. */
48e1416a 3126
fb85abff 3127 merge_bb = single_exit (loop)->dest;
c71d3c24 3128 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
fb85abff 3129 {
c71d3c24 3130 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3131 new_exit_bb = split_edge (single_exit (loop));
3132 new_exit_e = single_exit (loop);
3133 e = EDGE_SUCC (new_exit_bb, 0);
3134
3135 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
3136 {
3137 tree new_res;
1a91d914 3138 orig_phi = gsi.phi ();
f9e245b2 3139 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
c71d3c24 3140 new_phi = create_phi_node (new_res, new_exit_bb);
3141 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3142 add_phi_arg (new_phi, arg, new_exit_e,
3143 gimple_phi_arg_location_from_edge (orig_phi, e));
3144 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3145 }
48e1416a 3146 }
fb85abff 3147
3148 /* End loop-exit-fixes after versioning. */
3149
2afdcbed 3150 if (cond_expr_stmt_list)
fb85abff 3151 {
3152 cond_exp_gsi = gsi_last_bb (condition_bb);
2afdcbed 3153 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
23a3430d 3154 GSI_SAME_STMT);
fb85abff 3155 }
95e19962 3156 update_ssa (TODO_update_ssa);
fb85abff 3157}