]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop-manip.c
re PR other/86198 (Libbacktrace does not properly work with ".note.gnu.build-id"...
[thirdparty/gcc.git] / gcc / tree-vect-loop-manip.c
CommitLineData
b8698a0f 1/* Vectorizer Specific Loop Manipulations
85ec4feb 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
c7131fb2 31#include "fold-const.h"
60393bbc 32#include "cfganal.h"
45b0be94 33#include "gimplify.h"
5be5c238 34#include "gimple-iterator.h"
18f429e2 35#include "gimplify-me.h"
442b4905 36#include "tree-cfg.h"
e28030cf 37#include "tree-ssa-loop-manip.h"
442b4905 38#include "tree-into-ssa.h"
7a300452 39#include "tree-ssa.h"
ebfd146a 40#include "cfgloop.h"
ebfd146a
IR
41#include "tree-scalar-evolution.h"
42#include "tree-vectorizer.h"
2a93954e 43#include "tree-ssa-loop-ivopts.h"
0f26839a 44#include "gimple-fold.h"
7cfb4d93
RS
45#include "tree-ssa-loop-niter.h"
46#include "internal-fn.h"
47#include "stor-layout.h"
48#include "optabs-query.h"
49#include "vec-perm-indices.h"
ebfd146a
IR
50
51/*************************************************************************
52 Simple Loop Peeling Utilities
53
54 Utilities to support loop peeling for vectorization purposes.
55 *************************************************************************/
56
57
58/* Renames the use *OP_P. */
59
60static void
61rename_use_op (use_operand_p op_p)
62{
63 tree new_name;
64
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
66 return;
67
68 new_name = get_current_def (USE_FROM_PTR (op_p));
69
70 /* Something defined outside of the loop. */
71 if (!new_name)
72 return;
73
74 /* An ordinary ssa name defined in the loop. */
75
76 SET_USE (op_p, new_name);
77}
78
79
cb330ba5 80/* Renames the variables in basic block BB. Allow renaming of PHI arguments
a6c51a12
YR
81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
82 true. */
ebfd146a 83
2cfc56b9 84static void
a6c51a12 85rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
ebfd146a 86{
355fe088 87 gimple *stmt;
ebfd146a
IR
88 use_operand_p use_p;
89 ssa_op_iter iter;
90 edge e;
91 edge_iterator ei;
92 struct loop *loop = bb->loop_father;
a6c51a12
YR
93 struct loop *outer_loop = NULL;
94
95 if (rename_from_outer_loop)
96 {
97 gcc_assert (loop);
98 outer_loop = loop_outer (loop);
99 }
ebfd146a 100
538dd0b7
DM
101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
102 gsi_next (&gsi))
ebfd146a
IR
103 {
104 stmt = gsi_stmt (gsi);
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
106 rename_use_op (use_p);
107 }
108
2cfc56b9 109 FOR_EACH_EDGE (e, ei, bb->preds)
ebfd146a 110 {
cb330ba5
JJ
111 if (!flow_bb_inside_loop_p (loop, e->src))
112 {
113 if (!rename_from_outer_loop)
114 continue;
115 if (e->src != outer_loop->header)
116 {
117 if (outer_loop->inner->next)
118 {
119 /* If outer_loop has 2 inner loops, allow there to
120 be an extra basic block which decides which of the
121 two loops to use using LOOP_VECTORIZED. */
122 if (!single_pred_p (e->src)
123 || single_pred (e->src) != outer_loop->header)
124 continue;
125 }
cb330ba5
JJ
126 }
127 }
538dd0b7
DM
128 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
129 gsi_next (&gsi))
130 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
ebfd146a
IR
131 }
132}
133
134
a79683d5 135struct adjust_info
684f25f4
AO
136{
137 tree from, to;
138 basic_block bb;
a79683d5 139};
684f25f4 140
684f25f4
AO
141/* A stack of values to be adjusted in debug stmts. We have to
142 process them LIFO, so that the closest substitution applies. If we
143 processed them FIFO, without the stack, we might substitute uses
144 with a PHI DEF that would soon become non-dominant, and when we got
145 to the suitable one, it wouldn't have anything to substitute any
146 more. */
ff4c81cc 147static vec<adjust_info, va_heap> adjust_vec;
684f25f4
AO
148
149/* Adjust any debug stmts that referenced AI->from values to use the
150 loop-closed AI->to, if the references are dominated by AI->bb and
151 not by the definition of AI->from. */
152
153static void
154adjust_debug_stmts_now (adjust_info *ai)
155{
156 basic_block bbphi = ai->bb;
157 tree orig_def = ai->from;
158 tree new_def = ai->to;
159 imm_use_iterator imm_iter;
355fe088 160 gimple *stmt;
684f25f4
AO
161 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
162
163 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
164
165 /* Adjust any debug stmts that held onto non-loop-closed
166 references. */
167 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
168 {
169 use_operand_p use_p;
170 basic_block bbuse;
171
172 if (!is_gimple_debug (stmt))
173 continue;
174
175 gcc_assert (gimple_debug_bind_p (stmt));
176
177 bbuse = gimple_bb (stmt);
178
179 if ((bbuse == bbphi
180 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
181 && !(bbuse == bbdef
182 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
183 {
184 if (new_def)
185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
186 SET_USE (use_p, new_def);
187 else
188 {
189 gimple_debug_bind_reset_value (stmt);
190 update_stmt (stmt);
191 }
192 }
193 }
194}
195
196/* Adjust debug stmts as scheduled before. */
197
198static void
199adjust_vec_debug_stmts (void)
200{
36f52e8f 201 if (!MAY_HAVE_DEBUG_BIND_STMTS)
684f25f4
AO
202 return;
203
9771b263 204 gcc_assert (adjust_vec.exists ());
684f25f4 205
9771b263 206 while (!adjust_vec.is_empty ())
684f25f4 207 {
9771b263
DN
208 adjust_debug_stmts_now (&adjust_vec.last ());
209 adjust_vec.pop ();
684f25f4 210 }
684f25f4
AO
211}
212
213/* Adjust any debug stmts that referenced FROM values to use the
214 loop-closed TO, if the references are dominated by BB and not by
215 the definition of FROM. If adjust_vec is non-NULL, adjustments
216 will be postponed until adjust_vec_debug_stmts is called. */
217
218static void
219adjust_debug_stmts (tree from, tree to, basic_block bb)
220{
221 adjust_info ai;
222
36f52e8f 223 if (MAY_HAVE_DEBUG_BIND_STMTS
a471762f 224 && TREE_CODE (from) == SSA_NAME
a52ca739 225 && ! SSA_NAME_IS_DEFAULT_DEF (from)
a471762f 226 && ! virtual_operand_p (from))
684f25f4
AO
227 {
228 ai.from = from;
229 ai.to = to;
230 ai.bb = bb;
231
9771b263
DN
232 if (adjust_vec.exists ())
233 adjust_vec.safe_push (ai);
684f25f4
AO
234 else
235 adjust_debug_stmts_now (&ai);
236 }
237}
238
239/* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
240 to adjust any debug stmts that referenced the old phi arg,
241 presumably non-loop-closed references left over from other
242 transformations. */
243
244static void
355fe088 245adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
684f25f4
AO
246{
247 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
248
249 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
250
36f52e8f 251 if (MAY_HAVE_DEBUG_BIND_STMTS)
684f25f4
AO
252 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
253 gimple_bb (update_phi));
254}
255
7cfb4d93
RS
256/* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
257 the mask should have during the first iteration and NEXT_MASK is the
258 value that it should have on subsequent iterations. */
0f26839a 259
7cfb4d93
RS
260static void
261vect_set_loop_mask (struct loop *loop, tree mask, tree init_mask,
262 tree next_mask)
263{
264 gphi *phi = create_phi_node (mask, loop->header);
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
267}
0f26839a 268
7cfb4d93 269/* Add SEQ to the end of LOOP's preheader block. */
ebfd146a 270
7cfb4d93
RS
271static void
272add_preheader_seq (struct loop *loop, gimple_seq seq)
273{
274 if (seq)
275 {
276 edge pe = loop_preheader_edge (loop);
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
278 gcc_assert (!new_bb);
279 }
280}
ebfd146a 281
7cfb4d93
RS
282/* Add SEQ to the beginning of LOOP's header block. */
283
284static void
285add_header_seq (struct loop *loop, gimple_seq seq)
286{
287 if (seq)
288 {
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
291 }
292}
293
294/* Return true if the target can interleave elements of two vectors.
295 OFFSET is 0 if the first half of the vectors should be interleaved
296 or 1 if the second half should. When returning true, store the
297 associated permutation in INDICES. */
298
299static bool
300interleave_supported_p (vec_perm_indices *indices, tree vectype,
301 unsigned int offset)
302{
303 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
304 poly_uint64 base = exact_div (nelts, 2) * offset;
305 vec_perm_builder sel (nelts, 2, 3);
306 for (unsigned int i = 0; i < 3; ++i)
307 {
308 sel.quick_push (base + i);
309 sel.quick_push (base + i + nelts);
310 }
311 indices->new_vector (sel, 2, nelts);
312 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
313}
314
315/* Try to use permutes to define the masks in DEST_RGM using the masks
316 in SRC_RGM, given that the former has twice as many masks as the
317 latter. Return true on success, adding any new statements to SEQ. */
318
319static bool
320vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
321 rgroup_masks *src_rgm)
322{
323 tree src_masktype = src_rgm->mask_type;
324 tree dest_masktype = dest_rgm->mask_type;
325 machine_mode src_mode = TYPE_MODE (src_masktype);
326 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
327 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
328 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
329 {
330 /* Unpacking the source masks gives at least as many mask bits as
331 we need. We can then VIEW_CONVERT any excess bits away. */
332 tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
333 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
334 {
335 tree src = src_rgm->masks[i / 2];
336 tree dest = dest_rgm->masks[i];
9bfb28ed
RS
337 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
338 ? VEC_UNPACK_HI_EXPR
7cfb4d93
RS
339 : VEC_UNPACK_LO_EXPR);
340 gassign *stmt;
341 if (dest_masktype == unpack_masktype)
342 stmt = gimple_build_assign (dest, code, src);
343 else
344 {
345 tree temp = make_ssa_name (unpack_masktype);
346 stmt = gimple_build_assign (temp, code, src);
347 gimple_seq_add_stmt (seq, stmt);
348 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
349 build1 (VIEW_CONVERT_EXPR,
350 dest_masktype, temp));
351 }
352 gimple_seq_add_stmt (seq, stmt);
353 }
354 return true;
355 }
356 vec_perm_indices indices[2];
357 if (dest_masktype == src_masktype
358 && interleave_supported_p (&indices[0], src_masktype, 0)
359 && interleave_supported_p (&indices[1], src_masktype, 1))
360 {
361 /* The destination requires twice as many mask bits as the source, so
362 we can use interleaving permutes to double up the number of bits. */
363 tree masks[2];
364 for (unsigned int i = 0; i < 2; ++i)
365 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
366 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
367 {
368 tree src = src_rgm->masks[i / 2];
369 tree dest = dest_rgm->masks[i];
370 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
371 src, src, masks[i & 1]);
372 gimple_seq_add_stmt (seq, stmt);
373 }
374 return true;
375 }
376 return false;
377}
378
379/* Helper for vect_set_loop_condition_masked. Generate definitions for
380 all the masks in RGM and return a mask that is nonzero when the loop
381 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
382 Use LOOP_COND_GSI to insert code before the exit gcond.
383
384 RGM belongs to loop LOOP. The loop originally iterated NITERS
385 times and has been vectorized according to LOOP_VINFO. Each iteration
386 of the vectorized loop handles VF iterations of the scalar loop.
387
535e7c11
RS
388 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
389 starts with NITERS_SKIP dummy iterations of the scalar loop before
390 the real work starts. The mask elements for these dummy iterations
391 must be 0, to ensure that the extra iterations do not have an effect.
392
7cfb4d93
RS
393 It is known that:
394
395 NITERS * RGM->max_nscalars_per_iter
396
397 does not overflow. However, MIGHT_WRAP_P says whether an induction
398 variable that starts at 0 and has step:
399
400 VF * RGM->max_nscalars_per_iter
401
402 might overflow before hitting a value above:
403
535e7c11 404 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
7cfb4d93
RS
405
406 This means that we cannot guarantee that such an induction variable
407 would ever hit a value that produces a set of all-false masks for RGM. */
408
409static tree
410vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
411 gimple_seq *preheader_seq,
412 gimple_stmt_iterator loop_cond_gsi,
413 rgroup_masks *rgm, tree vf,
535e7c11
RS
414 tree niters, tree niters_skip,
415 bool might_wrap_p)
7cfb4d93
RS
416{
417 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
418 tree mask_type = rgm->mask_type;
419 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
420 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
421
422 /* Calculate the maximum number of scalar values that the rgroup
535e7c11
RS
423 handles in total, the number that it handles for each iteration
424 of the vector loop, and the number that it should skip during the
425 first iteration of the vector loop. */
7cfb4d93
RS
426 tree nscalars_total = niters;
427 tree nscalars_step = vf;
535e7c11 428 tree nscalars_skip = niters_skip;
7cfb4d93
RS
429 if (nscalars_per_iter != 1)
430 {
431 /* We checked before choosing to use a fully-masked loop that these
432 multiplications don't overflow. */
433 tree factor = build_int_cst (compare_type, nscalars_per_iter);
434 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
435 nscalars_total, factor);
436 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, compare_type,
437 nscalars_step, factor);
535e7c11
RS
438 if (nscalars_skip)
439 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
440 nscalars_skip, factor);
7cfb4d93
RS
441 }
442
443 /* Create an induction variable that counts the number of scalars
444 processed. */
445 tree index_before_incr, index_after_incr;
446 gimple_stmt_iterator incr_gsi;
447 bool insert_after;
448 tree zero_index = build_int_cst (compare_type, 0);
449 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
450 create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi,
451 insert_after, &index_before_incr, &index_after_incr);
452
535e7c11 453 tree test_index, test_limit, first_limit;
7cfb4d93
RS
454 gimple_stmt_iterator *test_gsi;
455 if (might_wrap_p)
456 {
457 /* In principle the loop should stop iterating once the incremented
535e7c11
RS
458 IV reaches a value greater than or equal to:
459
460 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
461
462 However, there's no guarantee that this addition doesn't overflow
463 the comparison type, or that the IV hits a value above it before
464 wrapping around. We therefore adjust the limit down by one
465 IV step:
7cfb4d93 466
535e7c11
RS
467 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
468 -[infinite-prec] NSCALARS_STEP
7cfb4d93
RS
469
470 and compare the IV against this limit _before_ incrementing it.
471 Since the comparison type is unsigned, we actually want the
472 subtraction to saturate at zero:
473
535e7c11
RS
474 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
475 -[sat] NSCALARS_STEP
476
477 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
478
479 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
480
481 where the rightmost subtraction can be done directly in
482 COMPARE_TYPE. */
7cfb4d93 483 test_index = index_before_incr;
535e7c11
RS
484 tree adjust = nscalars_step;
485 if (nscalars_skip)
486 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
487 adjust, nscalars_skip);
7cfb4d93 488 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
535e7c11 489 nscalars_total, adjust);
7cfb4d93 490 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
535e7c11 491 test_limit, adjust);
7cfb4d93 492 test_gsi = &incr_gsi;
535e7c11
RS
493
494 /* Get a safe limit for the first iteration. */
495 if (nscalars_skip)
496 {
497 /* The first vector iteration can handle at most NSCALARS_STEP
498 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
499 NSCALARS_SKIP to that cannot overflow. */
500 tree const_limit = build_int_cst (compare_type,
501 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
502 * nscalars_per_iter);
503 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
504 nscalars_total, const_limit);
505 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
506 first_limit, nscalars_skip);
507 }
508 else
509 /* For the first iteration it doesn't matter whether the IV hits
510 a value above NSCALARS_TOTAL. That only matters for the latch
511 condition. */
512 first_limit = nscalars_total;
7cfb4d93
RS
513 }
514 else
515 {
516 /* Test the incremented IV, which will always hit a value above
517 the bound before wrapping. */
518 test_index = index_after_incr;
519 test_limit = nscalars_total;
535e7c11
RS
520 if (nscalars_skip)
521 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
522 test_limit, nscalars_skip);
7cfb4d93 523 test_gsi = &loop_cond_gsi;
535e7c11
RS
524
525 first_limit = test_limit;
7cfb4d93
RS
526 }
527
528 /* Provide a definition of each mask in the group. */
529 tree next_mask = NULL_TREE;
530 tree mask;
531 unsigned int i;
532 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
533 {
534 /* Previous masks will cover BIAS scalars. This mask covers the
535 next batch. */
536 poly_uint64 bias = nscalars_per_mask * i;
537 tree bias_tree = build_int_cst (compare_type, bias);
538 gimple *tmp_stmt;
539
540 /* See whether the first iteration of the vector loop is known
541 to have a full mask. */
542 poly_uint64 const_limit;
543 bool first_iteration_full
535e7c11 544 = (poly_int_tree_p (first_limit, &const_limit)
7cfb4d93
RS
545 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
546
547 /* Rather than have a new IV that starts at BIAS and goes up to
548 TEST_LIMIT, prefer to use the same 0-based IV for each mask
549 and adjust the bound down by BIAS. */
550 tree this_test_limit = test_limit;
551 if (i != 0)
552 {
553 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
554 compare_type, this_test_limit,
555 bias_tree);
556 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
557 compare_type, this_test_limit,
558 bias_tree);
559 }
560
535e7c11
RS
561 /* Create the initial mask. First include all scalars that
562 are within the loop limit. */
7cfb4d93
RS
563 tree init_mask = NULL_TREE;
564 if (!first_iteration_full)
565 {
566 tree start, end;
535e7c11 567 if (first_limit == test_limit)
7cfb4d93
RS
568 {
569 /* Use a natural test between zero (the initial IV value)
570 and the loop limit. The "else" block would be valid too,
571 but this choice can avoid the need to load BIAS_TREE into
572 a register. */
573 start = zero_index;
574 end = this_test_limit;
575 }
576 else
577 {
535e7c11
RS
578 /* FIRST_LIMIT is the maximum number of scalars handled by the
579 first iteration of the vector loop. Test the portion
580 associated with this mask. */
7cfb4d93 581 start = bias_tree;
535e7c11 582 end = first_limit;
7cfb4d93
RS
583 }
584
585 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
586 tmp_stmt = vect_gen_while (init_mask, start, end);
587 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
588 }
589
535e7c11
RS
590 /* Now AND out the bits that are within the number of skipped
591 scalars. */
592 poly_uint64 const_skip;
593 if (nscalars_skip
594 && !(poly_int_tree_p (nscalars_skip, &const_skip)
595 && known_le (const_skip, bias)))
596 {
597 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
598 bias_tree, nscalars_skip);
599 if (init_mask)
600 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
601 init_mask, unskipped_mask);
602 else
603 init_mask = unskipped_mask;
604 }
605
7cfb4d93
RS
606 if (!init_mask)
607 /* First iteration is full. */
608 init_mask = build_minus_one_cst (mask_type);
609
610 /* Get the mask value for the next iteration of the loop. */
611 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
612 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
613 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
614
615 vect_set_loop_mask (loop, mask, init_mask, next_mask);
616 }
617 return next_mask;
618}
619
620/* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
621 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
d1d20a49
RS
622 number of iterations of the original scalar loop that should be
623 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
624 as for vect_set_loop_condition.
7cfb4d93
RS
625
626 Insert the branch-back condition before LOOP_COND_GSI and return the
627 final gcond. */
628
629static gcond *
630vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
631 tree niters, tree final_iv,
632 bool niters_maybe_zero,
633 gimple_stmt_iterator loop_cond_gsi)
634{
635 gimple_seq preheader_seq = NULL;
636 gimple_seq header_seq = NULL;
637
638 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
639 unsigned int compare_precision = TYPE_PRECISION (compare_type);
640 unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
641 tree orig_niters = niters;
642
643 /* Type of the initial value of NITERS. */
644 tree ni_actual_type = TREE_TYPE (niters);
645 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
646
647 /* Convert NITERS to the same size as the compare. */
648 if (compare_precision > ni_actual_precision
649 && niters_maybe_zero)
650 {
651 /* We know that there is always at least one iteration, so if the
652 count is zero then it must have wrapped. Cope with this by
653 subtracting 1 before the conversion and adding 1 to the result. */
654 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
655 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
656 niters, build_minus_one_cst (ni_actual_type));
657 niters = gimple_convert (&preheader_seq, compare_type, niters);
658 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
659 niters, build_one_cst (compare_type));
660 }
661 else
662 niters = gimple_convert (&preheader_seq, compare_type, niters);
663
535e7c11
RS
664 /* Convert skip_niters to the right type. */
665 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
666
7cfb4d93
RS
667 /* Now calculate the value that the induction variable must be able
668 to hit in order to ensure that we end the loop with an all-false mask.
669 This involves adding the maximum number of inactive trailing scalar
670 iterations. */
671 widest_int iv_limit;
672 bool known_max_iters = max_loop_iterations (loop, &iv_limit);
673 if (known_max_iters)
674 {
535e7c11
RS
675 if (niters_skip)
676 {
677 /* Add the maximum number of skipped iterations to the
678 maximum iteration count. */
679 if (TREE_CODE (niters_skip) == INTEGER_CST)
680 iv_limit += wi::to_widest (niters_skip);
681 else
682 iv_limit += max_vf - 1;
683 }
7cfb4d93
RS
684 /* IV_LIMIT is the maximum number of latch iterations, which is also
685 the maximum in-range IV value. Round this value down to the previous
686 vector alignment boundary and then add an extra full iteration. */
687 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
688 iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
689 }
690
691 /* Get the vectorization factor in tree form. */
692 tree vf = build_int_cst (compare_type,
693 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
694
695 /* Iterate over all the rgroups and fill in their masks. We could use
696 the first mask from any rgroup for the loop condition; here we
697 arbitrarily pick the last. */
698 tree test_mask = NULL_TREE;
699 rgroup_masks *rgm;
700 unsigned int i;
701 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
702 FOR_EACH_VEC_ELT (*masks, i, rgm)
703 if (!rgm->masks.is_empty ())
704 {
705 /* First try using permutes. This adds a single vector
706 instruction to the loop for each mask, but needs no extra
707 loop invariants or IVs. */
708 unsigned int nmasks = i + 1;
709 if ((nmasks & 1) == 0)
710 {
711 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
712 if (!half_rgm->masks.is_empty ()
713 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
714 continue;
715 }
716
717 /* See whether zero-based IV would ever generate all-false masks
718 before wrapping around. */
719 bool might_wrap_p
720 = (!known_max_iters
721 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
722 UNSIGNED)
723 > compare_precision));
724
725 /* Set up all masks for this group. */
726 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
727 &preheader_seq,
728 loop_cond_gsi, rgm, vf,
535e7c11
RS
729 niters, niters_skip,
730 might_wrap_p);
7cfb4d93
RS
731 }
732
733 /* Emit all accumulated statements. */
734 add_preheader_seq (loop, preheader_seq);
735 add_header_seq (loop, header_seq);
736
737 /* Get a boolean result that tells us whether to iterate. */
738 edge exit_edge = single_exit (loop);
739 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
740 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
741 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
742 NULL_TREE, NULL_TREE);
743 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
744
745 /* The loop iterates (NITERS - 1) / VF + 1 times.
746 Subtract one from this to get the latch count. */
747 tree step = build_int_cst (compare_type,
748 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
749 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
750 build_minus_one_cst (compare_type));
751 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
752 niters_minus_one, step);
753
754 if (final_iv)
755 {
756 gassign *assign = gimple_build_assign (final_iv, orig_niters);
757 gsi_insert_on_edge_immediate (single_exit (loop), assign);
758 }
759
760 return cond_stmt;
761}
762
763/* Like vect_set_loop_condition, but handle the case in which there
764 are no loop masks. */
765
766static gcond *
767vect_set_loop_condition_unmasked (struct loop *loop, tree niters,
768 tree step, tree final_iv,
769 bool niters_maybe_zero,
770 gimple_stmt_iterator loop_cond_gsi)
ebfd146a
IR
771{
772 tree indx_before_incr, indx_after_incr;
538dd0b7
DM
773 gcond *cond_stmt;
774 gcond *orig_cond;
0f26839a 775 edge pe = loop_preheader_edge (loop);
ebfd146a 776 edge exit_edge = single_exit (loop);
ebfd146a
IR
777 gimple_stmt_iterator incr_gsi;
778 bool insert_after;
ebfd146a 779 enum tree_code code;
0f26839a 780 tree niters_type = TREE_TYPE (niters);
ebfd146a
IR
781
782 orig_cond = get_loop_exit_condition (loop);
783 gcc_assert (orig_cond);
784 loop_cond_gsi = gsi_for_stmt (orig_cond);
785
0f26839a
RS
786 tree init, limit;
787 if (!niters_maybe_zero && integer_onep (step))
788 {
789 /* In this case we can use a simple 0-based IV:
790
791 A:
792 x = 0;
793 do
794 {
795 ...
796 x += 1;
797 }
798 while (x < NITERS); */
799 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
800 init = build_zero_cst (niters_type);
801 limit = niters;
802 }
803 else
804 {
805 /* The following works for all values of NITERS except 0:
806
807 B:
808 x = 0;
809 do
810 {
811 ...
812 x += STEP;
813 }
814 while (x <= NITERS - STEP);
815
816 so that the loop continues to iterate if x + STEP - 1 < NITERS
817 but stops if x + STEP - 1 >= NITERS.
818
819 However, if NITERS is zero, x never hits a value above NITERS - STEP
820 before wrapping around. There are two obvious ways of dealing with
821 this:
822
823 - start at STEP - 1 and compare x before incrementing it
824 - start at -1 and compare x after incrementing it
825
826 The latter is simpler and is what we use. The loop in this case
827 looks like:
828
829 C:
830 x = -1;
831 do
832 {
833 ...
834 x += STEP;
835 }
836 while (x < NITERS - STEP);
837
838 In both cases the loop limit is NITERS - STEP. */
839 gimple_seq seq = NULL;
840 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
841 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
842 if (seq)
843 {
844 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
845 gcc_assert (!new_bb);
846 }
847 if (niters_maybe_zero)
848 {
849 /* Case C. */
850 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
851 init = build_all_ones_cst (niters_type);
852 }
853 else
854 {
855 /* Case B. */
856 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
857 init = build_zero_cst (niters_type);
858 }
859 }
860
ebfd146a
IR
861 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
862 create_iv (init, step, NULL_TREE, loop,
863 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
ebfd146a
IR
864 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
865 true, NULL_TREE, true,
866 GSI_SAME_STMT);
0f26839a 867 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
ebfd146a
IR
868 true, GSI_SAME_STMT);
869
0f26839a 870 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
ebfd146a
IR
871 NULL_TREE);
872
873 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
874
71118889 875 /* Record the number of latch iterations. */
0f26839a
RS
876 if (limit == niters)
877 /* Case A: the loop iterates NITERS times. Subtract one to get the
878 latch count. */
879 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
880 build_int_cst (niters_type, 1));
881 else
882 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
883 Subtract one from this to get the latch count. */
884 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
885 limit, step);
886
887 if (final_iv)
888 {
889 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
890 indx_after_incr, init);
891 gsi_insert_on_edge_immediate (single_exit (loop), assign);
892 }
7cfb4d93
RS
893
894 return cond_stmt;
895}
896
897/* If we're using fully-masked loops, make LOOP iterate:
898
899 N == (NITERS - 1) / STEP + 1
900
901 times. When NITERS is zero, this is equivalent to making the loop
902 execute (1 << M) / STEP times, where M is the precision of NITERS.
903 NITERS_MAYBE_ZERO is true if this last case might occur.
904
905 If we're not using fully-masked loops, make LOOP iterate:
906
907 N == (NITERS - STEP) / STEP + 1
908
909 times, where NITERS is known to be outside the range [1, STEP - 1].
910 This is equivalent to making the loop execute NITERS / STEP times
911 when NITERS is nonzero and (1 << M) / STEP times otherwise.
912 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
913
914 If FINAL_IV is nonnull, it is an SSA name that should be set to
915 N * STEP on exit from the loop.
916
917 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
918
919void
920vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
921 tree niters, tree step, tree final_iv,
922 bool niters_maybe_zero)
923{
924 gcond *cond_stmt;
925 gcond *orig_cond = get_loop_exit_condition (loop);
926 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
927
928 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
929 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
930 final_iv, niters_maybe_zero,
931 loop_cond_gsi);
932 else
933 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
934 final_iv, niters_maybe_zero,
935 loop_cond_gsi);
936
937 /* Remove old loop exit test. */
938 gsi_remove (&loop_cond_gsi, true);
939 free_stmt_vec_info (orig_cond);
940
941 if (dump_enabled_p ())
942 {
943 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: ");
944 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, cond_stmt, 0);
945 }
ebfd146a
IR
946}
947
5ce9450f
JJ
948/* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
949 For all PHI arguments in FROM->dest and TO->dest from those
950 edges ensure that TO->dest PHI arguments have current_def
951 to that in from. */
952
953static void
954slpeel_duplicate_current_defs_from_edges (edge from, edge to)
955{
956 gimple_stmt_iterator gsi_from, gsi_to;
957
958 for (gsi_from = gsi_start_phis (from->dest),
959 gsi_to = gsi_start_phis (to->dest);
14ba8d6d 960 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
5ce9450f 961 {
355fe088
TS
962 gimple *from_phi = gsi_stmt (gsi_from);
963 gimple *to_phi = gsi_stmt (gsi_to);
5ce9450f 964 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
1a5da5b6
RB
965 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
966 if (virtual_operand_p (from_arg))
967 {
14ba8d6d
RB
968 gsi_next (&gsi_from);
969 continue;
970 }
1a5da5b6
RB
971 if (virtual_operand_p (to_arg))
972 {
14ba8d6d
RB
973 gsi_next (&gsi_to);
974 continue;
975 }
1a5da5b6
RB
976 if (TREE_CODE (from_arg) != SSA_NAME)
977 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
978 else
979 {
980 if (get_current_def (to_arg) == NULL_TREE)
981 set_current_def (to_arg, get_current_def (from_arg));
982 }
14ba8d6d
RB
983 gsi_next (&gsi_from);
984 gsi_next (&gsi_to);
5ce9450f 985 }
1a5da5b6
RB
986
987 gphi *from_phi = get_virtual_phi (from->dest);
988 gphi *to_phi = get_virtual_phi (to->dest);
989 if (from_phi)
990 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
991 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
5ce9450f
JJ
992}
993
ebfd146a 994
b8698a0f 995/* Given LOOP this function generates a new copy of it and puts it
5ce9450f
JJ
996 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
997 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
998 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
999 entry or exit of LOOP. */
ebfd146a
IR
1000
1001struct loop *
5ce9450f
JJ
1002slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
1003 struct loop *scalar_loop, edge e)
ebfd146a
IR
1004{
1005 struct loop *new_loop;
3884da6f 1006 basic_block *new_bbs, *bbs, *pbbs;
ebfd146a
IR
1007 bool at_exit;
1008 bool was_imm_dom;
b8698a0f 1009 basic_block exit_dest;
ebfd146a 1010 edge exit, new_exit;
a6c51a12 1011 bool duplicate_outer_loop = false;
ebfd146a 1012
2cfc56b9
RB
1013 exit = single_exit (loop);
1014 at_exit = (e == exit);
ebfd146a
IR
1015 if (!at_exit && e != loop_preheader_edge (loop))
1016 return NULL;
1017
5ce9450f
JJ
1018 if (scalar_loop == NULL)
1019 scalar_loop = loop;
1020
1021 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
3884da6f
BC
1022 pbbs = bbs + 1;
1023 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
a6c51a12
YR
1024 /* Allow duplication of outer loops. */
1025 if (scalar_loop->inner)
1026 duplicate_outer_loop = true;
ebfd146a 1027 /* Check whether duplication is possible. */
3884da6f 1028 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
ebfd146a
IR
1029 {
1030 free (bbs);
1031 return NULL;
1032 }
1033
1034 /* Generate new loop structure. */
5ce9450f
JJ
1035 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
1036 duplicate_subloops (scalar_loop, new_loop);
ebfd146a 1037
2cfc56b9 1038 exit_dest = exit->dest;
b8698a0f
L
1039 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
1040 exit_dest) == loop->header ?
ebfd146a
IR
1041 true : false);
1042
2cfc56b9
RB
1043 /* Also copy the pre-header, this avoids jumping through hoops to
1044 duplicate the loop entry PHI arguments. Create an empty
1045 pre-header unconditionally for this. */
5ce9450f 1046 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
2cfc56b9 1047 edge entry_e = single_pred_edge (preheader);
3884da6f 1048 bbs[0] = preheader;
5ce9450f 1049 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
ebfd146a 1050
5ce9450f
JJ
1051 exit = single_exit (scalar_loop);
1052 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
ebfd146a 1053 &exit, 1, &new_exit, NULL,
3884da6f 1054 at_exit ? loop->latch : e->src, true);
5ce9450f 1055 exit = single_exit (loop);
3884da6f 1056 basic_block new_preheader = new_bbs[0];
ebfd146a 1057
5ce9450f
JJ
1058 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
1059
1060 if (scalar_loop != loop)
1061 {
1062 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
1063 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
1064 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
1065 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
1066 header) to have current_def set, so copy them over. */
1067 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
1068 exit);
1069 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
1070 0),
1071 EDGE_SUCC (loop->latch, 0));
1072 }
b8698a0f 1073
ebfd146a
IR
1074 if (at_exit) /* Add the loop copy at exit. */
1075 {
5ce9450f
JJ
1076 if (scalar_loop != loop)
1077 {
538dd0b7 1078 gphi_iterator gsi;
5ce9450f
JJ
1079 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
1080
1081 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
1082 gsi_next (&gsi))
1083 {
538dd0b7 1084 gphi *phi = gsi.phi ();
5ce9450f
JJ
1085 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
1086 location_t orig_locus
1087 = gimple_phi_arg_location_from_edge (phi, e);
1088
1089 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
1090 }
1091 }
2cfc56b9
RB
1092 redirect_edge_and_branch_force (e, new_preheader);
1093 flush_pending_stmts (e);
1094 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
a6c51a12 1095 if (was_imm_dom || duplicate_outer_loop)
5ce9450f 1096 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
2cfc56b9
RB
1097
1098 /* And remove the non-necessary forwarder again. Keep the other
1099 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
1100 redirect_edge_pred (single_succ_edge (preheader),
1101 single_pred (preheader));
2cfc56b9 1102 delete_basic_block (preheader);
5ce9450f
JJ
1103 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1104 loop_preheader_edge (scalar_loop)->src);
ebfd146a
IR
1105 }
1106 else /* Add the copy at entry. */
1107 {
5ce9450f
JJ
1108 if (scalar_loop != loop)
1109 {
1110 /* Remove the non-necessary forwarder of scalar_loop again. */
1111 redirect_edge_pred (single_succ_edge (preheader),
1112 single_pred (preheader));
1113 delete_basic_block (preheader);
1114 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
1115 loop_preheader_edge (scalar_loop)->src);
1116 preheader = split_edge (loop_preheader_edge (loop));
1117 entry_e = single_pred_edge (preheader);
1118 }
1119
2cfc56b9
RB
1120 redirect_edge_and_branch_force (entry_e, new_preheader);
1121 flush_pending_stmts (entry_e);
1122 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
1123
1124 redirect_edge_and_branch_force (new_exit, preheader);
1125 flush_pending_stmts (new_exit);
1126 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
1127
1128 /* And remove the non-necessary forwarder again. Keep the other
1129 one so we have a proper pre-header for the loop at the exit edge. */
5ce9450f
JJ
1130 redirect_edge_pred (single_succ_edge (new_preheader),
1131 single_pred (new_preheader));
2cfc56b9
RB
1132 delete_basic_block (new_preheader);
1133 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
1134 loop_preheader_edge (new_loop)->src);
ebfd146a
IR
1135 }
1136
166b8799
BC
1137 /* Skip new preheader since it's deleted if copy loop is added at entry. */
1138 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
a6c51a12 1139 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
2cfc56b9 1140
5ce9450f
JJ
1141 if (scalar_loop != loop)
1142 {
1143 /* Update new_loop->header PHIs, so that on the preheader
1144 edge they are the ones from loop rather than scalar_loop. */
538dd0b7 1145 gphi_iterator gsi_orig, gsi_new;
5ce9450f
JJ
1146 edge orig_e = loop_preheader_edge (loop);
1147 edge new_e = loop_preheader_edge (new_loop);
1148
1149 for (gsi_orig = gsi_start_phis (loop->header),
1150 gsi_new = gsi_start_phis (new_loop->header);
1151 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
1152 gsi_next (&gsi_orig), gsi_next (&gsi_new))
1153 {
538dd0b7
DM
1154 gphi *orig_phi = gsi_orig.phi ();
1155 gphi *new_phi = gsi_new.phi ();
5ce9450f
JJ
1156 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
1157 location_t orig_locus
1158 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
1159
1160 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
1161 }
1162 }
1163
ebfd146a
IR
1164 free (new_bbs);
1165 free (bbs);
1166
b2b29377 1167 checking_verify_dominators (CDI_DOMINATORS);
2cfc56b9 1168
ebfd146a
IR
1169 return new_loop;
1170}
1171
1172
a5e3d614
BC
1173/* Given the condition expression COND, put it as the last statement of
1174 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
1175 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
3e907b90
BC
1176 skip the loop. PROBABILITY is the skip edge's probability. Mark the
1177 new edge as irreducible if IRREDUCIBLE_P is true. */
ebfd146a
IR
1178
1179static edge
86290011 1180slpeel_add_loop_guard (basic_block guard_bb, tree cond,
a5e3d614 1181 basic_block guard_to, basic_block dom_bb,
357067f2 1182 profile_probability probability, bool irreducible_p)
ebfd146a
IR
1183{
1184 gimple_stmt_iterator gsi;
1185 edge new_e, enter_e;
538dd0b7 1186 gcond *cond_stmt;
ebfd146a
IR
1187 gimple_seq gimplify_stmt_list = NULL;
1188
1189 enter_e = EDGE_SUCC (guard_bb, 0);
1190 enter_e->flags &= ~EDGE_FALLTHRU;
1191 enter_e->flags |= EDGE_FALSE_VALUE;
1192 gsi = gsi_last_bb (guard_bb);
1193
f7a06a98
RG
1194 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
1195 NULL_TREE);
86290011 1196 if (gimplify_stmt_list)
a5e3d614 1197 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
ebfd146a 1198
a5e3d614 1199 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
ebfd146a
IR
1200 gsi = gsi_last_bb (guard_bb);
1201 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
1202
1203 /* Add new edge to connect guard block to the merge/loop-exit block. */
a5e3d614 1204 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
e78410bf 1205
e78410bf 1206 new_e->probability = probability;
3e907b90
BC
1207 if (irreducible_p)
1208 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
1209
357067f2 1210 enter_e->probability = probability.invert ();
a5e3d614 1211 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
5281a167
RB
1212
1213 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
1214 if (enter_e->dest->loop_father->header == enter_e->dest)
1215 split_edge (enter_e);
1216
ebfd146a
IR
1217 return new_e;
1218}
1219
1220
1221/* This function verifies that the following restrictions apply to LOOP:
a6c51a12
YR
1222 (1) it consists of exactly 2 basic blocks - header, and an empty latch
1223 for innermost loop and 5 basic blocks for outer-loop.
1224 (2) it is single entry, single exit
1225 (3) its exit condition is the last stmt in the header
1226 (4) E is the entry/exit edge of LOOP.
ebfd146a
IR
1227 */
1228
1229bool
1230slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
1231{
1232 edge exit_e = single_exit (loop);
1233 edge entry_e = loop_preheader_edge (loop);
538dd0b7 1234 gcond *orig_cond = get_loop_exit_condition (loop);
ebfd146a 1235 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a6c51a12 1236 unsigned int num_bb = loop->inner? 5 : 2;
ebfd146a 1237
62fdbf29
BC
1238 /* All loops have an outer scope; the only case loop->outer is NULL is for
1239 the function itself. */
1240 if (!loop_outer (loop)
a6c51a12 1241 || loop->num_nodes != num_bb
ebfd146a
IR
1242 || !empty_block_p (loop->latch)
1243 || !single_exit (loop)
1244 /* Verify that new loop exit condition can be trivially modified. */
1245 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
1246 || (e != exit_e && e != entry_e))
1247 return false;
1248
1249 return true;
1250}
1251
a5e3d614
BC
1252/* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
1253 in the exit bb and rename all the uses after the loop. This simplifies
1254 the *guard[12] routines, which assume loop closed SSA form for all PHIs
1255 (but normally loop closed SSA form doesn't require virtual PHIs to be
1256 in the same form). Doing this early simplifies the checking what
1257 uses should be renamed. */
ebfd146a
IR
1258
1259static void
a5e3d614 1260create_lcssa_for_virtual_phi (struct loop *loop)
ebfd146a 1261{
538dd0b7 1262 gphi_iterator gsi;
ebfd146a 1263 edge exit_e = single_exit (loop);
b8698a0f 1264
e20f6b4b 1265 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1266 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b 1267 {
538dd0b7 1268 gphi *phi = gsi.phi ();
e20f6b4b
JJ
1269 for (gsi = gsi_start_phis (exit_e->dest);
1270 !gsi_end_p (gsi); gsi_next (&gsi))
ea057359 1271 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
e20f6b4b
JJ
1272 break;
1273 if (gsi_end_p (gsi))
1274 {
b731b390 1275 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 1276 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
e20f6b4b
JJ
1277 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
1278 imm_use_iterator imm_iter;
355fe088 1279 gimple *stmt;
e20f6b4b
JJ
1280 use_operand_p use_p;
1281
15b6695a
RB
1282 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
1283 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
9e227d60 1284 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
e20f6b4b
JJ
1285 gimple_phi_set_result (new_phi, new_vop);
1286 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
b5fd0440
YR
1287 if (stmt != new_phi
1288 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
e20f6b4b
JJ
1289 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1290 SET_USE (use_p, new_vop);
1291 }
1292 break;
1293 }
ebfd146a 1294
ebfd146a
IR
1295}
1296
1297/* Function vect_get_loop_location.
1298
1299 Extract the location of the loop in the source code.
1300 If the loop is not well formed for vectorization, an estimated
1301 location is calculated.
1302 Return the loop location if succeed and NULL if not. */
1303
b05e0233 1304source_location
ebfd146a
IR
1305find_loop_location (struct loop *loop)
1306{
355fe088 1307 gimple *stmt = NULL;
ebfd146a
IR
1308 basic_block bb;
1309 gimple_stmt_iterator si;
1310
1311 if (!loop)
b05e0233 1312 return UNKNOWN_LOCATION;
ebfd146a
IR
1313
1314 stmt = get_loop_exit_condition (loop);
1315
502498d5
JJ
1316 if (stmt
1317 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1318 return gimple_location (stmt);
1319
1320 /* If we got here the loop is probably not "well formed",
1321 try to estimate the loop location */
1322
1323 if (!loop->header)
b05e0233 1324 return UNKNOWN_LOCATION;
ebfd146a
IR
1325
1326 bb = loop->header;
1327
1328 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1329 {
1330 stmt = gsi_stmt (si);
502498d5 1331 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
ebfd146a
IR
1332 return gimple_location (stmt);
1333 }
1334
b05e0233 1335 return UNKNOWN_LOCATION;
ebfd146a
IR
1336}
1337
a5e3d614
BC
1338/* Return true if PHI defines an IV of the loop to be vectorized. */
1339
1340static bool
1341iv_phi_p (gphi *phi)
1342{
1343 if (virtual_operand_p (PHI_RESULT (phi)))
1344 return false;
1345
1346 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
1347 gcc_assert (stmt_info != NULL);
1348 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1349 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
1350 return false;
1351
1352 return true;
1353}
ebfd146a 1354
ebfd146a
IR
1355/* Function vect_can_advance_ivs_p
1356
b8698a0f
L
1357 In case the number of iterations that LOOP iterates is unknown at compile
1358 time, an epilog loop will be generated, and the loop induction variables
1359 (IVs) will be "advanced" to the value they are supposed to take just before
ebfd146a
IR
1360 the epilog loop. Here we check that the access function of the loop IVs
1361 and the expression that represents the loop bound are simple enough.
1362 These restrictions will be relaxed in the future. */
1363
b8698a0f 1364bool
ebfd146a
IR
1365vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1366{
1367 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1368 basic_block bb = loop->header;
538dd0b7 1369 gphi_iterator gsi;
ebfd146a
IR
1370
1371 /* Analyze phi functions of the loop header. */
1372
73fbfcad 1373 if (dump_enabled_p ())
e645e942 1374 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
ebfd146a
IR
1375 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1376 {
ebfd146a
IR
1377 tree evolution_part;
1378
a5e3d614 1379 gphi *phi = gsi.phi ();
73fbfcad 1380 if (dump_enabled_p ())
ebfd146a 1381 {
78c60e3d
SS
1382 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
1383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
ebfd146a
IR
1384 }
1385
1386 /* Skip virtual phi's. The data dependences that are associated with
a5e3d614 1387 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
ebfd146a 1388
a5e3d614
BC
1389 Skip reduction phis. */
1390 if (!iv_phi_p (phi))
ebfd146a 1391 {
73fbfcad 1392 if (dump_enabled_p ())
a5e3d614
BC
1393 dump_printf_loc (MSG_NOTE, vect_location,
1394 "reduc or virtual phi. skip.\n");
ebfd146a
IR
1395 continue;
1396 }
1397
ebfd146a
IR
1398 /* Analyze the evolution function. */
1399
afb119be
RB
1400 evolution_part
1401 = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
ebfd146a
IR
1402 if (evolution_part == NULL_TREE)
1403 {
73fbfcad 1404 if (dump_enabled_p ())
afb119be 1405 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 1406 "No access function or evolution.\n");
ebfd146a
IR
1407 return false;
1408 }
b8698a0f 1409
2a93954e
AH
1410 /* FORNOW: We do not transform initial conditions of IVs
1411 which evolution functions are not invariants in the loop. */
1412
1413 if (!expr_invariant_in_loop_p (loop, evolution_part))
1414 {
1415 if (dump_enabled_p ())
1416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1417 "evolution not invariant in loop.\n");
1418 return false;
1419 }
1420
b8698a0f 1421 /* FORNOW: We do not transform initial conditions of IVs
ebfd146a
IR
1422 which evolution functions are a polynomial of degree >= 2. */
1423
1424 if (tree_is_chrec (evolution_part))
2a93954e
AH
1425 {
1426 if (dump_enabled_p ())
1427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1428 "evolution is chrec.\n");
1429 return false;
1430 }
ebfd146a
IR
1431 }
1432
1433 return true;
1434}
1435
1436
1437/* Function vect_update_ivs_after_vectorizer.
1438
1439 "Advance" the induction variables of LOOP to the value they should take
1440 after the execution of LOOP. This is currently necessary because the
1441 vectorizer does not handle induction variables that are used after the
1442 loop. Such a situation occurs when the last iterations of LOOP are
1443 peeled, because:
1444 1. We introduced new uses after LOOP for IVs that were not originally used
1445 after LOOP: the IVs of LOOP are now used by an epilog loop.
1446 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1447 times, whereas the loop IVs should be bumped N times.
1448
1449 Input:
1450 - LOOP - a loop that is going to be vectorized. The last few iterations
1451 of LOOP were peeled.
1452 - NITERS - the number of iterations that LOOP executes (before it is
1453 vectorized). i.e, the number of times the ivs should be bumped.
1454 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1455 coming out from LOOP on which there are uses of the LOOP ivs
1456 (this is the path from LOOP->exit to epilog_loop->preheader).
1457
1458 The new definitions of the ivs are placed in LOOP->exit.
1459 The phi args associated with the edge UPDATE_E in the bb
1460 UPDATE_E->dest are updated accordingly.
1461
1462 Assumption 1: Like the rest of the vectorizer, this function assumes
1463 a single loop exit that has a single predecessor.
1464
1465 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1466 organized in the same order.
1467
1468 Assumption 3: The access function of the ivs is simple enough (see
1469 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1470
1471 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
b8698a0f 1472 coming out of LOOP on which the ivs of LOOP are used (this is the path
ebfd146a
IR
1473 that leads to the epilog loop; other paths skip the epilog loop). This
1474 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1475 needs to have its phis updated.
1476 */
1477
1478static void
a5e3d614
BC
1479vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
1480 tree niters, edge update_e)
ebfd146a 1481{
538dd0b7 1482 gphi_iterator gsi, gsi1;
a5e3d614 1483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 1484 basic_block update_bb = update_e->dest;
a5e3d614 1485 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a
IR
1486
1487 /* Make sure there exists a single-predecessor exit bb: */
1488 gcc_assert (single_pred_p (exit_bb));
a5e3d614 1489 gcc_assert (single_succ_edge (exit_bb) == update_e);
ebfd146a
IR
1490
1491 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
1492 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
1493 gsi_next (&gsi), gsi_next (&gsi1))
1494 {
ebfd146a 1495 tree init_expr;
550918ca
RG
1496 tree step_expr, off;
1497 tree type;
ebfd146a
IR
1498 tree var, ni, ni_name;
1499 gimple_stmt_iterator last_gsi;
1500
a5e3d614
BC
1501 gphi *phi = gsi.phi ();
1502 gphi *phi1 = gsi1.phi ();
73fbfcad 1503 if (dump_enabled_p ())
a5e3d614
BC
1504 {
1505 dump_printf_loc (MSG_NOTE, vect_location,
1506 "vect_update_ivs_after_vectorizer: phi: ");
78c60e3d 1507 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
a5e3d614 1508 }
ebfd146a 1509
a5e3d614
BC
1510 /* Skip reduction and virtual phis. */
1511 if (!iv_phi_p (phi))
ebfd146a 1512 {
73fbfcad 1513 if (dump_enabled_p ())
a5e3d614
BC
1514 dump_printf_loc (MSG_NOTE, vect_location,
1515 "reduc or virtual phi. skip.\n");
ebfd146a
IR
1516 continue;
1517 }
1518
0ac168a1 1519 type = TREE_TYPE (gimple_phi_result (phi));
a5e3d614 1520 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (vinfo_for_stmt (phi));
0ac168a1 1521 step_expr = unshare_expr (step_expr);
b8698a0f 1522
ebfd146a
IR
1523 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1524 of degree >= 2 or exponential. */
0ac168a1 1525 gcc_assert (!tree_is_chrec (step_expr));
ebfd146a 1526
0ac168a1 1527 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
ebfd146a 1528
550918ca
RG
1529 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
1530 fold_convert (TREE_TYPE (step_expr), niters),
1531 step_expr);
0ac168a1 1532 if (POINTER_TYPE_P (type))
5d49b6a7 1533 ni = fold_build_pointer_plus (init_expr, off);
ebfd146a 1534 else
0ac168a1
RG
1535 ni = fold_build2 (PLUS_EXPR, type,
1536 init_expr, fold_convert (type, off));
ebfd146a 1537
0ac168a1 1538 var = create_tmp_var (type, "tmp");
ebfd146a
IR
1539
1540 last_gsi = gsi_last_bb (exit_bb);
a5e3d614
BC
1541 gimple_seq new_stmts = NULL;
1542 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
1543 /* Exit_bb shouldn't be empty. */
1544 if (!gsi_end_p (last_gsi))
1545 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
1546 else
1547 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
b8698a0f 1548
ebfd146a 1549 /* Fix phi expressions in the successor bb. */
684f25f4 1550 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
ebfd146a
IR
1551 }
1552}
1553
535e7c11
RS
1554/* Return a gimple value containing the misalignment (measured in vector
1555 elements) for the loop described by LOOP_VINFO, i.e. how many elements
1556 it is away from a perfectly aligned address. Add any new statements
1557 to SEQ. */
1558
1559static tree
1560get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
1561{
1562 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
57c454d2 1563 gimple *dr_stmt = vect_dr_stmt (dr);
535e7c11
RS
1564 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1565 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1566
1567 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
1568 gcc_assert (target_align != 0);
1569
1570 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1571 tree offset = (negative
1572 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
1573 : size_zero_node);
1574 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, seq,
1575 offset);
1576 tree type = unsigned_type_for (TREE_TYPE (start_addr));
1577 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
1578 HOST_WIDE_INT elem_size
1579 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1580 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
1581
1582 /* Create: misalign_in_bytes = addr & (target_align - 1). */
1583 tree int_start_addr = fold_convert (type, start_addr);
1584 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
1585 target_align_minus_1);
1586
1587 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
1588 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
1589 elem_size_log);
1590
1591 return misalign_in_elems;
1592}
1593
a5e3d614 1594/* Function vect_gen_prolog_loop_niters
d9157f15 1595
a5e3d614
BC
1596 Generate the number of iterations which should be peeled as prolog for the
1597 loop represented by LOOP_VINFO. It is calculated as the misalignment of
1598 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
1599 As a result, after the execution of this loop, the data reference DR will
1600 refer to an aligned location. The following computation is generated:
ebfd146a
IR
1601
1602 If the misalignment of DR is known at compile time:
1603 addr_mis = int mis = DR_MISALIGNMENT (dr);
1604 Else, compute address misalignment in bytes:
535e7c11 1605 addr_mis = addr & (target_align - 1)
ebfd146a 1606
a5e3d614 1607 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
ebfd146a
IR
1608
1609 (elem_size = element type size; an element is the scalar element whose type
1610 is the inner type of the vectype)
1611
a5e3d614 1612 The computations will be emitted at the end of BB. We also compute and
cbb22e61 1613 store upper bound (included) of the result in BOUND.
a5e3d614 1614
ebfd146a
IR
1615 When the step of the data-ref in the loop is not 1 (as in interleaved data
1616 and SLP), the number of iterations of the prolog must be divided by the step
1617 (which is equal to the size of interleaved group).
1618
1619 The above formulas assume that VF == number of elements in the vector. This
1620 may not hold when there are multiple-types in the loop.
1621 In this case, for some data-references in the loop the VF does not represent
1622 the number of elements that fit in the vector. Therefore, instead of VF we
1623 use TYPE_VECTOR_SUBPARTS. */
1624
b8698a0f 1625static tree
a5e3d614
BC
1626vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
1627 basic_block bb, int *bound)
ebfd146a
IR
1628{
1629 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
ebfd146a 1630 tree var;
a5e3d614
BC
1631 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
1632 gimple_seq stmts = NULL, new_stmts = NULL;
ebfd146a 1633 tree iters, iters_name;
57c454d2 1634 gimple *dr_stmt = vect_dr_stmt (dr);
ebfd146a
IR
1635 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1636 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f702e7d4 1637 unsigned int target_align = DR_TARGET_ALIGNMENT (dr);
ebfd146a 1638
15e693cc 1639 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
ebfd146a 1640 {
15e693cc 1641 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a 1642
73fbfcad 1643 if (dump_enabled_p ())
ccb3ad87 1644 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1645 "known peeling = %d.\n", npeel);
ebfd146a 1646
720f5239 1647 iters = build_int_cst (niters_type, npeel);
cbb22e61 1648 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
ebfd146a
IR
1649 }
1650 else
1651 {
535e7c11
RS
1652 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
1653 tree type = TREE_TYPE (misalign_in_elems);
f702e7d4
RS
1654 HOST_WIDE_INT elem_size
1655 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
f702e7d4
RS
1656 HOST_WIDE_INT align_in_elems = target_align / elem_size;
1657 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
1658 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
f702e7d4
RS
1659
1660 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
1661 & (align_in_elems - 1)). */
535e7c11 1662 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
d8ba5b19 1663 if (negative)
f702e7d4
RS
1664 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
1665 align_in_elems_tree);
d8ba5b19 1666 else
f702e7d4
RS
1667 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
1668 misalign_in_elems);
1669 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
ebfd146a 1670 iters = fold_convert (niters_type, iters);
f702e7d4 1671 *bound = align_in_elems - 1;
ebfd146a
IR
1672 }
1673
73fbfcad 1674 if (dump_enabled_p ())
ebfd146a 1675 {
ccb3ad87 1676 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1677 "niters for prolog loop: ");
ccb3ad87 1678 dump_generic_expr (MSG_NOTE, TDF_SLIM, iters);
e645e942 1679 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1680 }
1681
1682 var = create_tmp_var (niters_type, "prolog_loop_niters");
a5e3d614 1683 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
ebfd146a 1684
a5e3d614
BC
1685 if (new_stmts)
1686 gimple_seq_add_seq (&stmts, new_stmts);
ebfd146a
IR
1687 if (stmts)
1688 {
a5e3d614
BC
1689 gcc_assert (single_succ_p (bb));
1690 gimple_stmt_iterator gsi = gsi_last_bb (bb);
1691 if (gsi_end_p (gsi))
1692 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1693 else
1694 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
ebfd146a 1695 }
b8698a0f 1696 return iters_name;
ebfd146a
IR
1697}
1698
1699
1700/* Function vect_update_init_of_dr
1701
535e7c11
RS
1702 If CODE is PLUS, the vector loop starts NITERS iterations after the
1703 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
1704 iterations before the scalar one (using masking to skip inactive
1705 elements). This function updates the information recorded in DR to
1706 account for the difference. Specifically, it updates the OFFSET
1707 field of DR. */
ebfd146a
IR
1708
1709static void
535e7c11 1710vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
ebfd146a
IR
1711{
1712 tree offset = DR_OFFSET (dr);
b8698a0f 1713
ebfd146a
IR
1714 niters = fold_build2 (MULT_EXPR, sizetype,
1715 fold_convert (sizetype, niters),
1716 fold_convert (sizetype, DR_STEP (dr)));
535e7c11 1717 offset = fold_build2 (code, sizetype,
587aa063 1718 fold_convert (sizetype, offset), niters);
ebfd146a
IR
1719 DR_OFFSET (dr) = offset;
1720}
1721
1722
1723/* Function vect_update_inits_of_drs
1724
535e7c11
RS
1725 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
1726 CODE and NITERS are as for vect_update_inits_of_dr. */
ebfd146a
IR
1727
1728static void
535e7c11
RS
1729vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
1730 tree_code code)
ebfd146a
IR
1731{
1732 unsigned int i;
9771b263 1733 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a 1734 struct data_reference *dr;
a5e3d614 1735
adac3a68 1736 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
a5e3d614
BC
1737
1738 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
1739 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
1740 {
1741 gimple_seq seq;
1742 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1743 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
1744
1745 niters = fold_convert (sizetype, niters);
1746 niters = force_gimple_operand (niters, &seq, false, var);
1747 if (seq)
1748 {
1749 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
1750 gcc_assert (!new_bb);
1751 }
1752 }
ebfd146a 1753
9771b263 1754 FOR_EACH_VEC_ELT (datarefs, i, dr)
535e7c11 1755 vect_update_init_of_dr (dr, niters, code);
ebfd146a
IR
1756}
1757
535e7c11
RS
1758/* For the information recorded in LOOP_VINFO prepare the loop for peeling
1759 by masking. This involves calculating the number of iterations to
1760 be peeled and then aligning all memory references appropriately. */
1761
1762void
1763vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
1764{
1765 tree misalign_in_elems;
1766 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
1767
1768 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
1769
1770 /* From the information recorded in LOOP_VINFO get the number of iterations
1771 that need to be skipped via masking. */
1772 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1773 {
1774 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1775 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1776 misalign_in_elems = build_int_cst (type, misalign);
1777 }
1778 else
1779 {
1780 gimple_seq seq1 = NULL, seq2 = NULL;
1781 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
1782 misalign_in_elems = fold_convert (type, misalign_in_elems);
1783 misalign_in_elems = force_gimple_operand (misalign_in_elems,
1784 &seq2, true, NULL_TREE);
1785 gimple_seq_add_seq (&seq1, seq2);
1786 if (seq1)
1787 {
1788 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1789 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
1790 gcc_assert (!new_bb);
1791 }
1792 }
1793
1794 if (dump_enabled_p ())
1795 {
1796 dump_printf_loc (MSG_NOTE, vect_location,
1797 "misalignment for fully-masked loop: ");
1798 dump_generic_expr (MSG_NOTE, TDF_SLIM, misalign_in_elems);
1799 dump_printf (MSG_NOTE, "\n");
1800 }
1801
1802 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
1803
1804 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
1805}
ebfd146a 1806
a5e3d614 1807/* This function builds ni_name = number of iterations. Statements
7078979b
BC
1808 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
1809 it to TRUE if new ssa_var is generated. */
a5e3d614
BC
1810
1811tree
7078979b 1812vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
a5e3d614
BC
1813{
1814 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1815 if (TREE_CODE (ni) == INTEGER_CST)
1816 return ni;
1817 else
1818 {
1819 tree ni_name, var;
1820 gimple_seq stmts = NULL;
1821 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
1822
1823 var = create_tmp_var (TREE_TYPE (ni), "niters");
1824 ni_name = force_gimple_operand (ni, &stmts, false, var);
1825 if (stmts)
7078979b
BC
1826 {
1827 gsi_insert_seq_on_edge_immediate (pe, stmts);
1828 if (new_var_p != NULL)
1829 *new_var_p = true;
1830 }
a5e3d614
BC
1831
1832 return ni_name;
1833 }
1834}
1835
cbb22e61
BC
1836/* Calculate the number of iterations above which vectorized loop will be
1837 preferred than scalar loop. NITERS_PROLOG is the number of iterations
1838 of prolog loop. If it's integer const, the integer number is also passed
d1d20a49
RS
1839 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
1840 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
1841 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
1842 threshold below which the scalar (rather than vectorized) loop will be
1843 executed. This function stores the upper bound (inclusive) of the result
1844 in BOUND_SCALAR. */
a5e3d614
BC
1845
1846static tree
1847vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
d1d20a49 1848 int bound_prolog, poly_int64 bound_epilog, int th,
d9f21f6a
RS
1849 poly_uint64 *bound_scalar,
1850 bool check_profitability)
a5e3d614
BC
1851{
1852 tree type = TREE_TYPE (niters_prolog);
1853 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
d1d20a49 1854 build_int_cst (type, bound_epilog));
a5e3d614 1855
d1d20a49 1856 *bound_scalar = bound_prolog + bound_epilog;
a5e3d614
BC
1857 if (check_profitability)
1858 {
cbb22e61
BC
1859 /* TH indicates the minimum niters of vectorized loop, while we
1860 compute the maximum niters of scalar loop. */
1861 th--;
a5e3d614
BC
1862 /* Peeling for constant times. */
1863 if (int_niters_prolog >= 0)
1864 {
d1d20a49 1865 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
cbb22e61 1866 return build_int_cst (type, *bound_scalar);
a5e3d614 1867 }
d1d20a49
RS
1868 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
1869 and BOUND_EPILOG are inclusive upper bounds. */
1870 if (known_ge (th, bound_prolog + bound_epilog))
a5e3d614 1871 {
cbb22e61 1872 *bound_scalar = th;
a5e3d614
BC
1873 return build_int_cst (type, th);
1874 }
d9f21f6a 1875 /* Need to do runtime comparison. */
d1d20a49 1876 else if (maybe_gt (th, bound_epilog))
d9f21f6a
RS
1877 {
1878 *bound_scalar = upper_bound (*bound_scalar, th);
1879 return fold_build2 (MAX_EXPR, type,
1880 build_int_cst (type, th), niters);
1881 }
a5e3d614
BC
1882 }
1883 return niters;
1884}
1885
0f26839a
RS
1886/* NITERS is the number of times that the original scalar loop executes
1887 after peeling. Work out the maximum number of iterations N that can
1888 be handled by the vectorized form of the loop and then either:
ebfd146a 1889
0f26839a 1890 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
d9157f15 1891
0f26839a
RS
1892 niters_vector = N
1893
1894 b) set *STEP_VECTOR_PTR to one and generate:
1895
1896 niters_vector = N / vf
1897
1898 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
1899 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
1900 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
ebfd146a
IR
1901
1902void
a5e3d614 1903vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
0f26839a
RS
1904 tree *niters_vector_ptr, tree *step_vector_ptr,
1905 bool niters_no_overflow)
ebfd146a 1906{
a5e3d614 1907 tree ni_minus_gap, var;
0f26839a 1908 tree niters_vector, step_vector, type = TREE_TYPE (niters);
d9f21f6a 1909 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
a5e3d614 1910 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
0f26839a 1911 tree log_vf = NULL_TREE;
a5e3d614
BC
1912
1913 /* If epilogue loop is required because of data accesses with gaps, we
1914 subtract one iteration from the total number of iterations here for
1915 correct calculation of RATIO. */
1916 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
1917 {
7078979b
BC
1918 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
1919 build_one_cst (type));
a5e3d614
BC
1920 if (!is_gimple_val (ni_minus_gap))
1921 {
7078979b 1922 var = create_tmp_var (type, "ni_gap");
a5e3d614
BC
1923 gimple *stmts = NULL;
1924 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
1925 true, var);
1926 gsi_insert_seq_on_edge_immediate (pe, stmts);
1927 }
1928 }
1929 else
1930 ni_minus_gap = niters;
1931
d9f21f6a 1932 unsigned HOST_WIDE_INT const_vf;
7cfb4d93
RS
1933 if (vf.is_constant (&const_vf)
1934 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
0f26839a
RS
1935 {
1936 /* Create: niters >> log2(vf) */
1937 /* If it's known that niters == number of latch executions + 1 doesn't
1938 overflow, we can generate niters >> log2(vf); otherwise we generate
1939 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
1940 will be at least one. */
d9f21f6a 1941 log_vf = build_int_cst (type, exact_log2 (const_vf));
0f26839a
RS
1942 if (niters_no_overflow)
1943 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
1944 else
1945 niters_vector
1946 = fold_build2 (PLUS_EXPR, type,
1947 fold_build2 (RSHIFT_EXPR, type,
1948 fold_build2 (MINUS_EXPR, type,
1949 ni_minus_gap,
1950 build_int_cst (type, vf)),
1951 log_vf),
1952 build_int_cst (type, 1));
1953 step_vector = build_one_cst (type);
1954 }
a5e3d614 1955 else
0f26839a
RS
1956 {
1957 niters_vector = ni_minus_gap;
1958 step_vector = build_int_cst (type, vf);
1959 }
a5e3d614
BC
1960
1961 if (!is_gimple_val (niters_vector))
1962 {
7078979b
BC
1963 var = create_tmp_var (type, "bnd");
1964 gimple_seq stmts = NULL;
a5e3d614
BC
1965 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
1966 gsi_insert_seq_on_edge_immediate (pe, stmts);
7078979b
BC
1967 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
1968 we set range information to make niters analyzer's life easier. */
0f26839a 1969 if (stmts != NULL && log_vf)
8e6cdc90
RS
1970 set_range_info (niters_vector, VR_RANGE,
1971 wi::to_wide (build_int_cst (type, 1)),
1972 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
1973 TYPE_MAX_VALUE (type),
1974 log_vf)));
a5e3d614
BC
1975 }
1976 *niters_vector_ptr = niters_vector;
0f26839a 1977 *step_vector_ptr = step_vector;
a5e3d614
BC
1978
1979 return;
1980}
1981
1982/* Given NITERS_VECTOR which is the number of iterations for vectorized
1983 loop specified by LOOP_VINFO after vectorization, compute the number
1984 of iterations before vectorization (niters_vector * vf) and store it
1985 to NITERS_VECTOR_MULT_VF_PTR. */
1986
1987static void
1988vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
1989 tree niters_vector,
1990 tree *niters_vector_mult_vf_ptr)
1991{
d9f21f6a
RS
1992 /* We should be using a step_vector of VF if VF is variable. */
1993 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
ebfd146a 1994 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614
BC
1995 tree type = TREE_TYPE (niters_vector);
1996 tree log_vf = build_int_cst (type, exact_log2 (vf));
1997 basic_block exit_bb = single_exit (loop)->dest;
ebfd146a 1998
a5e3d614
BC
1999 gcc_assert (niters_vector_mult_vf_ptr != NULL);
2000 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
2001 niters_vector, log_vf);
2002 if (!is_gimple_val (niters_vector_mult_vf))
2003 {
2004 tree var = create_tmp_var (type, "niters_vector_mult_vf");
2005 gimple_seq stmts = NULL;
2006 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
2007 &stmts, true, var);
2008 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
2009 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
2010 }
2011 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
2012}
2013
2014/* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
2015 from SECOND/FIRST and puts it at the original loop's preheader/exit
2016 edge, the two loops are arranged as below:
2017
2018 preheader_a:
2019 first_loop:
2020 header_a:
2021 i_1 = PHI<i_0, i_2>;
2022 ...
2023 i_2 = i_1 + 1;
2024 if (cond_a)
2025 goto latch_a;
2026 else
2027 goto between_bb;
2028 latch_a:
2029 goto header_a;
2030
2031 between_bb:
2032 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
2033
2034 second_loop:
2035 header_b:
2036 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
2037 or with i_2 if no LCSSA phi is created
2038 under condition of CREATE_LCSSA_FOR_IV_PHIS.
2039 ...
2040 i_4 = i_3 + 1;
2041 if (cond_b)
2042 goto latch_b;
2043 else
2044 goto exit_bb;
2045 latch_b:
2046 goto header_b;
2047
2048 exit_bb:
2049
2050 This function creates loop closed SSA for the first loop; update the
2051 second loop's PHI nodes by replacing argument on incoming edge with the
2052 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
2053 is false, Loop closed ssa phis will only be created for non-iv phis for
2054 the first loop.
2055
2056 This function assumes exit bb of the first loop is preheader bb of the
2057 second loop, i.e, between_bb in the example code. With PHIs updated,
2058 the second loop will execute rest iterations of the first. */
ebfd146a 2059
a5e3d614
BC
2060static void
2061slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
2062 struct loop *first, struct loop *second,
2063 bool create_lcssa_for_iv_phis)
2064{
2065 gphi_iterator gsi_update, gsi_orig;
2066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2067
2068 edge first_latch_e = EDGE_SUCC (first->latch, 0);
2069 edge second_preheader_e = loop_preheader_edge (second);
2070 basic_block between_bb = single_exit (first)->dest;
2071
2072 gcc_assert (between_bb == second_preheader_e->src);
2073 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
2074 /* Either the first loop or the second is the loop to be vectorized. */
2075 gcc_assert (loop == first || loop == second);
2076
2077 for (gsi_orig = gsi_start_phis (first->header),
2078 gsi_update = gsi_start_phis (second->header);
2079 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2080 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2081 {
2082 gphi *orig_phi = gsi_orig.phi ();
2083 gphi *update_phi = gsi_update.phi ();
2084
2085 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
2086 /* Generate lcssa PHI node for the first loop. */
2087 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
2088 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi))
2089 {
2090 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2091 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
2092 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
2093 arg = new_res;
2094 }
2095
2096 /* Update PHI node in the second loop by replacing arg on the loop's
2097 incoming edge. */
2098 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
2099 }
2100}
2101
2102/* Function slpeel_add_loop_guard adds guard skipping from the beginning
2103 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
2104 are two pred edges of the merge point before UPDATE_LOOP. The two loops
2105 appear like below:
2106
2107 guard_bb:
2108 if (cond)
2109 goto merge_bb;
2110 else
2111 goto skip_loop;
2112
2113 skip_loop:
2114 header_a:
2115 i_1 = PHI<i_0, i_2>;
2116 ...
2117 i_2 = i_1 + 1;
2118 if (cond_a)
2119 goto latch_a;
2120 else
2121 goto exit_a;
2122 latch_a:
2123 goto header_a;
2124
2125 exit_a:
2126 i_5 = PHI<i_2>;
2127
2128 merge_bb:
2129 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
2130
2131 update_loop:
2132 header_b:
2133 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
2134 ...
2135 i_4 = i_3 + 1;
2136 if (cond_b)
2137 goto latch_b;
2138 else
2139 goto exit_bb;
2140 latch_b:
2141 goto header_b;
2142
2143 exit_bb:
2144
2145 This function creates PHI nodes at merge_bb and replaces the use of i_5
2146 in the update_loop's PHI node with the result of new PHI result. */
2147
2148static void
2149slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
2150 struct loop *update_loop,
2151 edge guard_edge, edge merge_edge)
2152{
2153 source_location merge_loc, guard_loc;
2154 edge orig_e = loop_preheader_edge (skip_loop);
2155 edge update_e = loop_preheader_edge (update_loop);
2156 gphi_iterator gsi_orig, gsi_update;
2157
2158 for ((gsi_orig = gsi_start_phis (skip_loop->header),
2159 gsi_update = gsi_start_phis (update_loop->header));
2160 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
2161 gsi_next (&gsi_orig), gsi_next (&gsi_update))
2162 {
2163 gphi *orig_phi = gsi_orig.phi ();
2164 gphi *update_phi = gsi_update.phi ();
2165
2166 /* Generate new phi node at merge bb of the guard. */
2167 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
2168 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
2169
2170 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
2171 args in NEW_PHI for these edges. */
2172 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
2173 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
2174 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
2175 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
2176 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
2177 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
2178
2179 /* Update phi in UPDATE_PHI. */
2180 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
2181 }
2182}
2183
2184/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
2185 this function searches for the corresponding lcssa phi node in exit
2186 bb of LOOP. If it is found, return the phi result; otherwise return
2187 NULL. */
2188
2189static tree
2190find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
2191 gphi *lcssa_phi)
2192{
2193 gphi_iterator gsi;
2194 edge e = single_exit (loop);
2195
2196 gcc_assert (single_pred_p (e->dest));
2197 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2198 {
2199 gphi *phi = gsi.phi ();
2200 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
2201 PHI_ARG_DEF (lcssa_phi, 0), 0))
2202 return PHI_RESULT (phi);
2203 }
2204 return NULL_TREE;
2205}
2206
2207/* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
2208 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
2209 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
2210 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
2211 The CFG looks like:
2212
2213 loop:
2214 header_a:
2215 i_1 = PHI<i_0, i_2>;
2216 ...
2217 i_2 = i_1 + 1;
2218 if (cond_a)
2219 goto latch_a;
2220 else
2221 goto exit_a;
2222 latch_a:
2223 goto header_a;
2224
2225 exit_a:
2226
2227 guard_bb:
2228 if (cond)
2229 goto merge_bb;
2230 else
2231 goto epilog_loop;
2232
2233 ;; fall_through_bb
2234
2235 epilog_loop:
2236 header_b:
2237 i_3 = PHI<i_2, i_4>;
2238 ...
2239 i_4 = i_3 + 1;
2240 if (cond_b)
2241 goto latch_b;
2242 else
2243 goto merge_bb;
2244 latch_b:
2245 goto header_b;
2246
2247 merge_bb:
2248 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
2249
2250 exit_bb:
2251 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
2252
2253 For each name used out side EPILOG (i.e - for each name that has a lcssa
2254 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
2255 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
2256 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
2257 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
2258 in exit_bb will also be updated. */
2259
2260static void
2261slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
2262 edge guard_edge, edge merge_edge)
2263{
2264 gphi_iterator gsi;
2265 basic_block merge_bb = guard_edge->dest;
2266
2267 gcc_assert (single_succ_p (merge_bb));
2268 edge e = single_succ_edge (merge_bb);
2269 basic_block exit_bb = e->dest;
2270 gcc_assert (single_pred_p (exit_bb));
2271 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
2272
2273 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2274 {
2275 gphi *update_phi = gsi.phi ();
2276 tree old_arg = PHI_ARG_DEF (update_phi, 0);
2277 /* This loop-closed-phi actually doesn't represent a use out of the
2278 loop - the phi arg is a constant. */
2279 if (TREE_CODE (old_arg) != SSA_NAME)
2280 continue;
2281
2282 tree merge_arg = get_current_def (old_arg);
2283 if (!merge_arg)
2284 merge_arg = old_arg;
2285
2286 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
2287 /* If the var is live after loop but not a reduction, we simply
2288 use the old arg. */
2289 if (!guard_arg)
2290 guard_arg = old_arg;
2291
2292 /* Create new phi node in MERGE_BB: */
2293 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
2294 gphi *merge_phi = create_phi_node (new_res, merge_bb);
2295
2296 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
2297 the two PHI args in merge_phi for these edges. */
2298 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
2299 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
2300
2301 /* Update the original phi in exit_bb. */
2302 adjust_phi_and_debug_stmts (update_phi, e, new_res);
2303 }
2304}
2305
2306/* EPILOG loop is duplicated from the original loop for vectorizing,
2307 the arg of its loop closed ssa PHI needs to be updated. */
2308
2309static void
2310slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
2311{
2312 gphi_iterator gsi;
2313 basic_block exit_bb = single_exit (epilog)->dest;
2314
2315 gcc_assert (single_pred_p (exit_bb));
2316 edge e = EDGE_PRED (exit_bb, 0);
2317 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
2318 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
2319}
2320
2321/* Function vect_do_peeling.
2322
2323 Input:
2324 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
2325
2326 preheader:
2327 LOOP:
2328 header_bb:
2329 loop_body
2330 if (exit_loop_cond) goto exit_bb
2331 else goto header_bb
2332 exit_bb:
2333
2334 - NITERS: The number of iterations of the loop.
2335 - NITERSM1: The number of iterations of the loop's latch.
2336 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
2337 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
2338 CHECK_PROFITABILITY is true.
2339 Output:
0f26839a 2340 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
7cfb4d93 2341 iterate after vectorization; see vect_set_loop_condition for details.
0f26839a
RS
2342 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
2343 should be set to the number of scalar iterations handled by the
2344 vector loop. The SSA name is only used on exit from the loop.
a5e3d614
BC
2345
2346 This function peels prolog and epilog from the loop, adds guards skipping
2347 PROLOG and EPILOG for various conditions. As a result, the changed CFG
2348 would look like:
2349
2350 guard_bb_1:
2351 if (prefer_scalar_loop) goto merge_bb_1
2352 else goto guard_bb_2
2353
2354 guard_bb_2:
2355 if (skip_prolog) goto merge_bb_2
2356 else goto prolog_preheader
2357
2358 prolog_preheader:
2359 PROLOG:
2360 prolog_header_bb:
2361 prolog_body
2362 if (exit_prolog_cond) goto prolog_exit_bb
2363 else goto prolog_header_bb
2364 prolog_exit_bb:
2365
2366 merge_bb_2:
2367
2368 vector_preheader:
2369 VECTOR LOOP:
2370 vector_header_bb:
2371 vector_body
2372 if (exit_vector_cond) goto vector_exit_bb
2373 else goto vector_header_bb
2374 vector_exit_bb:
2375
2376 guard_bb_3:
2377 if (skip_epilog) goto merge_bb_3
2378 else goto epilog_preheader
2379
2380 merge_bb_1:
2381
2382 epilog_preheader:
2383 EPILOG:
2384 epilog_header_bb:
2385 epilog_body
2386 if (exit_epilog_cond) goto merge_bb_3
2387 else goto epilog_header_bb
2388
2389 merge_bb_3:
2390
2391 Note this function peels prolog and epilog only if it's necessary,
2392 as well as guards.
598eaaa2 2393 Returns created epilogue or NULL.
a5e3d614
BC
2394
2395 TODO: Guard for prefer_scalar_loop should be emitted along with
2396 versioning conditions if loop versioning is needed. */
2397
598eaaa2
YR
2398
2399struct loop *
a5e3d614 2400vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
0f26839a
RS
2401 tree *niters_vector, tree *step_vector,
2402 tree *niters_vector_mult_vf_var, int th,
2403 bool check_profitability, bool niters_no_overflow)
a5e3d614
BC
2404{
2405 edge e, guard_e;
2406 tree type = TREE_TYPE (niters), guard_cond;
2407 basic_block guard_bb, guard_to;
357067f2 2408 profile_probability prob_prolog, prob_vector, prob_epilog;
d9f21f6a 2409 int estimated_vf;
535e7c11
RS
2410 int prolog_peeling = 0;
2411 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2412 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
d1d20a49
RS
2413
2414 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2415 poly_uint64 bound_epilog = 0;
2416 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
2417 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2418 bound_epilog += vf - 1;
2419 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2420 bound_epilog += 1;
2421 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
2422 poly_uint64 bound_scalar = bound_epilog;
a5e3d614
BC
2423
2424 if (!prolog_peeling && !epilog_peeling)
598eaaa2 2425 return NULL;
a5e3d614 2426
357067f2 2427 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
d9f21f6a
RS
2428 estimated_vf = vect_vf_for_cost (loop_vinfo);
2429 if (estimated_vf == 2)
2430 estimated_vf = 3;
357067f2 2431 prob_prolog = prob_epilog = profile_probability::guessed_always ()
d9f21f6a 2432 .apply_scale (estimated_vf - 1, estimated_vf);
a5e3d614 2433
598eaaa2 2434 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
a5e3d614 2435 struct loop *first_loop = loop;
3e907b90 2436 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
a5e3d614
BC
2437 create_lcssa_for_virtual_phi (loop);
2438 update_ssa (TODO_update_ssa_only_virtuals);
2439
36f52e8f 2440 if (MAY_HAVE_DEBUG_BIND_STMTS)
a5e3d614
BC
2441 {
2442 gcc_assert (!adjust_vec.exists ());
2443 adjust_vec.create (32);
2444 }
ebfd146a
IR
2445 initialize_original_copy_tables ();
2446
d1d20a49
RS
2447 /* Record the anchor bb at which the guard should be placed if the scalar
2448 loop might be preferred. */
2449 basic_block anchor = loop_preheader_edge (loop)->src;
2450
2451 /* Generate the number of iterations for the prolog loop. We do this here
2452 so that we can also get the upper bound on the number of iterations. */
2453 tree niters_prolog;
2454 int bound_prolog = 0;
2455 if (prolog_peeling)
2456 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
2457 &bound_prolog);
2458 else
2459 niters_prolog = build_int_cst (type, 0);
2460
a5e3d614
BC
2461 /* Prolog loop may be skipped. */
2462 bool skip_prolog = (prolog_peeling != 0);
704c28ee
BC
2463 /* Skip to epilog if scalar loop may be preferred. It's only needed
2464 when we peel for epilog loop and when it hasn't been checked with
2465 loop versioning. */
d1d20a49
RS
2466 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2467 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
2468 bound_prolog + bound_epilog)
2469 : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
a5e3d614
BC
2470 /* Epilog loop must be executed if the number of iterations for epilog
2471 loop is known at compile time, otherwise we need to add a check at
2472 the end of vector loop and skip to the end of epilog loop. */
2473 bool skip_epilog = (prolog_peeling < 0
d9f21f6a
RS
2474 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2475 || !vf.is_constant ());
a5e3d614
BC
2476 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
2477 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2478 skip_epilog = false;
2479
a5e3d614 2480 if (skip_vector)
25c99850
BC
2481 {
2482 split_edge (loop_preheader_edge (loop));
2483
2484 /* Due to the order in which we peel prolog and epilog, we first
2485 propagate probability to the whole loop. The purpose is to
2486 avoid adjusting probabilities of both prolog and vector loops
2487 separately. Note in this case, the probability of epilog loop
2488 needs to be scaled back later. */
2489 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
357067f2 2490 if (prob_vector.initialized_p ())
d9f21f6a
RS
2491 {
2492 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
2493 scale_loop_profile (loop, prob_vector, 0);
2494 }
25c99850 2495 }
a5e3d614 2496
a5e3d614
BC
2497 source_location loop_loc = find_loop_location (loop);
2498 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
2499 if (prolog_peeling)
5d2eb24b 2500 {
a5e3d614
BC
2501 e = loop_preheader_edge (loop);
2502 if (!slpeel_can_duplicate_loop_p (loop, e))
5d2eb24b 2503 {
a5e3d614
BC
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2505 "loop can't be duplicated to preheader edge.\n");
2506 gcc_unreachable ();
2507 }
2508 /* Peel prolog and put it on preheader edge of loop. */
2509 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2510 if (!prolog)
2511 {
2512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2513 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2514 gcc_unreachable ();
2515 }
2516 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
2517 first_loop = prolog;
2518 reset_original_copy_tables ();
2519
d1d20a49 2520 /* Update the number of iterations for prolog loop. */
0f26839a 2521 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
7cfb4d93
RS
2522 vect_set_loop_condition (prolog, NULL, niters_prolog,
2523 step_prolog, NULL_TREE, false);
a5e3d614
BC
2524
2525 /* Skip the prolog loop. */
2526 if (skip_prolog)
2527 {
2528 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2529 niters_prolog, build_int_cst (type, 0));
2530 guard_bb = loop_preheader_edge (prolog)->src;
25c99850 2531 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
a5e3d614
BC
2532 guard_to = split_edge (loop_preheader_edge (loop));
2533 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2534 guard_to, guard_bb,
357067f2 2535 prob_prolog.invert (),
3e907b90 2536 irred_flag);
a5e3d614
BC
2537 e = EDGE_PRED (guard_to, 0);
2538 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2539 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
25c99850 2540
af2bbc51
JH
2541 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
2542 scale_loop_profile (prolog, prob_prolog, bound_prolog);
a5e3d614
BC
2543 }
2544 /* Update init address of DRs. */
535e7c11 2545 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
a5e3d614
BC
2546 /* Update niters for vector loop. */
2547 LOOP_VINFO_NITERS (loop_vinfo)
2548 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
2549 LOOP_VINFO_NITERSM1 (loop_vinfo)
2550 = fold_build2 (MINUS_EXPR, type,
2551 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
7078979b
BC
2552 bool new_var_p = false;
2553 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
2554 /* It's guaranteed that vector loop bound before vectorization is at
2555 least VF, so set range information for newly generated var. */
2556 if (new_var_p)
2557 set_range_info (niters, VR_RANGE,
8e6cdc90
RS
2558 wi::to_wide (build_int_cst (type, vf)),
2559 wi::to_wide (TYPE_MAX_VALUE (type)));
a5e3d614 2560
cbb22e61
BC
2561 /* Prolog iterates at most bound_prolog times, latch iterates at
2562 most bound_prolog - 1 times. */
2563 record_niter_bound (prolog, bound_prolog - 1, false, true);
a5e3d614
BC
2564 delete_update_ssa ();
2565 adjust_vec_debug_stmts ();
2566 scev_reset ();
5d2eb24b
IR
2567 }
2568
a5e3d614
BC
2569 if (epilog_peeling)
2570 {
2571 e = single_exit (loop);
2572 if (!slpeel_can_duplicate_loop_p (loop, e))
2573 {
2574 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2575 "loop can't be duplicated to exit edge.\n");
2576 gcc_unreachable ();
2577 }
2578 /* Peel epilog and put it on exit edge of loop. */
2579 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
2580 if (!epilog)
2581 {
2582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
2583 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
2584 gcc_unreachable ();
2585 }
2586 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
2587
2588 /* Scalar version loop may be preferred. In this case, add guard
2589 and skip to epilog. Note this only happens when the number of
2590 iterations of loop is unknown at compile time, otherwise this
2591 won't be vectorized. */
2592 if (skip_vector)
2593 {
cbb22e61 2594 /* Additional epilogue iteration is peeled if gap exists. */
a5e3d614 2595 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
d1d20a49 2596 bound_prolog, bound_epilog,
cbb22e61 2597 th, &bound_scalar,
a5e3d614 2598 check_profitability);
cbb22e61
BC
2599 /* Build guard against NITERSM1 since NITERS may overflow. */
2600 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
a5e3d614
BC
2601 guard_bb = anchor;
2602 guard_to = split_edge (loop_preheader_edge (epilog));
2603 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
2604 guard_to, guard_bb,
357067f2 2605 prob_vector.invert (),
3e907b90 2606 irred_flag);
a5e3d614
BC
2607 e = EDGE_PRED (guard_to, 0);
2608 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
2609 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
25c99850
BC
2610
2611 /* Simply propagate profile info from guard_bb to guard_to which is
2612 a merge point of control flow. */
25c99850 2613 guard_to->count = guard_bb->count;
00fa28d1 2614
af2bbc51
JH
2615 /* Scale probability of epilog loop back.
2616 FIXME: We should avoid scaling down and back up. Profile may
2617 get lost if we scale down to 0. */
10ea2672 2618 basic_block *bbs = get_loop_body (epilog);
00fa28d1
JH
2619 for (unsigned int i = 0; i < epilog->num_nodes; i++)
2620 bbs[i]->count = bbs[i]->count.apply_scale
2621 (bbs[i]->count,
2622 bbs[i]->count.apply_probability
2623 (prob_vector));
af2bbc51 2624 free (bbs);
a5e3d614 2625 }
ebfd146a 2626
25c99850 2627 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
a5e3d614
BC
2628 tree niters_vector_mult_vf;
2629 /* If loop is peeled for non-zero constant times, now niters refers to
2630 orig_niters - prolog_peeling, it won't overflow even the orig_niters
2631 overflows. */
2632 niters_no_overflow |= (prolog_peeling > 0);
2633 vect_gen_vector_loop_niters (loop_vinfo, niters,
0f26839a
RS
2634 niters_vector, step_vector,
2635 niters_no_overflow);
2636 if (!integer_onep (*step_vector))
2637 {
2638 /* On exit from the loop we will have an easy way of calcalating
2639 NITERS_VECTOR / STEP * STEP. Install a dummy definition
2640 until then. */
2641 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
2642 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
2643 *niters_vector_mult_vf_var = niters_vector_mult_vf;
2644 }
2645 else
2646 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
2647 &niters_vector_mult_vf);
a5e3d614
BC
2648 /* Update IVs of original loop as if they were advanced by
2649 niters_vector_mult_vf steps. */
2650 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
2651 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
2652 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
2653 update_e);
2654
2655 if (skip_epilog)
2656 {
2657 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
2658 niters, niters_vector_mult_vf);
2659 guard_bb = single_exit (loop)->dest;
2660 guard_to = split_edge (single_exit (epilog));
2661 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
2662 skip_vector ? anchor : guard_bb,
357067f2 2663 prob_epilog.invert (),
3e907b90 2664 irred_flag);
a5e3d614
BC
2665 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
2666 single_exit (epilog));
25c99850
BC
2667 /* Only need to handle basic block before epilog loop if it's not
2668 the guard_bb, which is the case when skip_vector is true. */
2669 if (guard_bb != bb_before_epilog)
2670 {
357067f2 2671 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
25c99850 2672
af2bbc51 2673 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
25c99850 2674 }
d9f21f6a 2675 scale_loop_profile (epilog, prob_epilog, 0);
a5e3d614
BC
2676 }
2677 else
2678 slpeel_update_phi_nodes_for_lcssa (epilog);
ebfd146a 2679
d1d20a49
RS
2680 unsigned HOST_WIDE_INT bound;
2681 if (bound_scalar.is_constant (&bound))
d9f21f6a 2682 {
d1d20a49
RS
2683 gcc_assert (bound != 0);
2684 /* -1 to convert loop iterations to latch iterations. */
2685 record_niter_bound (epilog, bound - 1, false, true);
d9f21f6a 2686 }
a5e3d614
BC
2687
2688 delete_update_ssa ();
2689 adjust_vec_debug_stmts ();
2690 scev_reset ();
2691 }
2692 adjust_vec.release ();
ebfd146a 2693 free_original_copy_tables ();
598eaaa2
YR
2694
2695 return epilog;
ebfd146a
IR
2696}
2697
01d32b2b
BC
2698/* Function vect_create_cond_for_niters_checks.
2699
2700 Create a conditional expression that represents the run-time checks for
ce811fc4 2701 loop's niter. The loop is guaranteed to terminate if the run-time
01d32b2b
BC
2702 checks hold.
2703
2704 Input:
2705 COND_EXPR - input conditional expression. New conditions will be chained
2706 with logical AND operation. If it is NULL, then the function
2707 is used to return the number of alias checks.
2708 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2709 to be checked.
2710
2711 Output:
2712 COND_EXPR - conditional expression.
2713
2714 The returned COND_EXPR is the conditional expression to be used in the
2715 if statement that controls which version of the loop gets executed at
2716 runtime. */
2717
2718static void
2719vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
2720{
2721 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
2722
2723 if (*cond_expr)
2724 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2725 *cond_expr, part_cond_expr);
2726 else
2727 *cond_expr = part_cond_expr;
2728}
ebfd146a 2729
9adee305
RS
2730/* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2731 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
2732
2733static void
2734chain_cond_expr (tree *cond_expr, tree part_cond_expr)
2735{
2736 if (*cond_expr)
2737 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2738 *cond_expr, part_cond_expr);
2739 else
2740 *cond_expr = part_cond_expr;
2741}
2742
ebfd146a
IR
2743/* Function vect_create_cond_for_align_checks.
2744
2745 Create a conditional expression that represents the alignment checks for
2746 all of data references (array element references) whose alignment must be
2747 checked at runtime.
2748
2749 Input:
2750 COND_EXPR - input conditional expression. New conditions will be chained
2751 with logical AND operation.
2752 LOOP_VINFO - two fields of the loop information are used.
2753 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2754 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2755
2756 Output:
2757 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2758 expression.
2759 The returned value is the conditional expression to be used in the if
2760 statement that controls which version of the loop gets executed at runtime.
2761
2762 The algorithm makes two assumptions:
2763 1) The number of bytes "n" in a vector is a power of 2.
2764 2) An address "a" is aligned if a%n is zero and that this
2765 test can be done as a&(n-1) == 0. For example, for 16
2766 byte vectors the test is a&0xf == 0. */
2767
2768static void
2769vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2770 tree *cond_expr,
2771 gimple_seq *cond_expr_stmt_list)
2772{
355fe088 2773 vec<gimple *> may_misalign_stmts
ebfd146a 2774 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
355fe088 2775 gimple *ref_stmt;
ebfd146a
IR
2776 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2777 tree mask_cst;
2778 unsigned int i;
ebfd146a
IR
2779 tree int_ptrsize_type;
2780 char tmp_name[20];
2781 tree or_tmp_name = NULL_TREE;
83d5977e 2782 tree and_tmp_name;
355fe088 2783 gimple *and_stmt;
ebfd146a
IR
2784 tree ptrsize_zero;
2785 tree part_cond_expr;
2786
2787 /* Check that mask is one less than a power of 2, i.e., mask is
2788 all zeros followed by all ones. */
2789 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2790
96f9265a 2791 int_ptrsize_type = signed_type_for (ptr_type_node);
ebfd146a
IR
2792
2793 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2794 of the first vector of the i'th data reference. */
2795
9771b263 2796 FOR_EACH_VEC_ELT (may_misalign_stmts, i, ref_stmt)
ebfd146a
IR
2797 {
2798 gimple_seq new_stmt_list = NULL;
2799 tree addr_base;
83d5977e
RG
2800 tree addr_tmp_name;
2801 tree new_or_tmp_name;
355fe088 2802 gimple *addr_stmt, *or_stmt;
d8ba5b19
RG
2803 stmt_vec_info stmt_vinfo = vinfo_for_stmt (ref_stmt);
2804 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2805 bool negative = tree_int_cst_compare
2806 (DR_STEP (STMT_VINFO_DATA_REF (stmt_vinfo)), size_zero_node) < 0;
2807 tree offset = negative
aad83b7c 2808 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
ebfd146a
IR
2809
2810 /* create: addr_tmp = (int)(address_of_first_vector) */
2811 addr_base =
2812 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list,
3f5e8a76 2813 offset);
ebfd146a
IR
2814 if (new_stmt_list != NULL)
2815 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
2816
83d5977e
RG
2817 sprintf (tmp_name, "addr2int%d", i);
2818 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03 2819 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
ebfd146a
IR
2820 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
2821
2822 /* The addresses are OR together. */
2823
2824 if (or_tmp_name != NULL_TREE)
2825 {
2826 /* create: or_tmp = or_tmp | addr_tmp */
83d5977e
RG
2827 sprintf (tmp_name, "orptrs%d", i);
2828 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
0d0e4a03
JJ
2829 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
2830 or_tmp_name, addr_tmp_name);
ebfd146a
IR
2831 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
2832 or_tmp_name = new_or_tmp_name;
2833 }
2834 else
2835 or_tmp_name = addr_tmp_name;
2836
2837 } /* end for i */
2838
2839 mask_cst = build_int_cst (int_ptrsize_type, mask);
2840
2841 /* create: and_tmp = or_tmp & mask */
83d5977e 2842 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
ebfd146a 2843
0d0e4a03
JJ
2844 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
2845 or_tmp_name, mask_cst);
ebfd146a
IR
2846 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
2847
2848 /* Make and_tmp the left operand of the conditional test against zero.
2849 if and_tmp has a nonzero bit then some address is unaligned. */
2850 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2851 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
2852 and_tmp_name, ptrsize_zero);
9adee305
RS
2853 chain_cond_expr (cond_expr, part_cond_expr);
2854}
2855
2856/* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
2857 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
2858 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
2859 and this new condition are true. Treat a null *COND_EXPR as "true". */
2860
2861static void
2862vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
2863{
2864 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
2865 unsigned int i;
2866 vec_object_pair *pair;
2867 FOR_EACH_VEC_ELT (pairs, i, pair)
2868 {
2869 tree addr1 = build_fold_addr_expr (pair->first);
2870 tree addr2 = build_fold_addr_expr (pair->second);
2871 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
2872 addr1, addr2);
2873 chain_cond_expr (cond_expr, part_cond_expr);
2874 }
ebfd146a
IR
2875}
2876
a57776a1
RS
2877/* Create an expression that is true when all lower-bound conditions for
2878 the vectorized loop are met. Chain this condition with *COND_EXPR. */
2879
2880static void
2881vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
2882{
2883 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
2884 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
2885 {
2886 tree expr = lower_bounds[i].expr;
2887 tree type = unsigned_type_for (TREE_TYPE (expr));
2888 expr = fold_convert (type, expr);
2889 poly_uint64 bound = lower_bounds[i].min_value;
2890 if (!lower_bounds[i].unsigned_p)
2891 {
2892 expr = fold_build2 (PLUS_EXPR, type, expr,
2893 build_int_cstu (type, bound - 1));
2894 bound += bound - 1;
2895 }
2896 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
2897 build_int_cstu (type, bound));
2898 chain_cond_expr (cond_expr, part_cond_expr);
2899 }
2900}
2901
ebfd146a
IR
2902/* Function vect_create_cond_for_alias_checks.
2903
2904 Create a conditional expression that represents the run-time checks for
2905 overlapping of address ranges represented by a list of data references
2906 relations passed as input.
2907
2908 Input:
2909 COND_EXPR - input conditional expression. New conditions will be chained
a05a89fa
CH
2910 with logical AND operation. If it is NULL, then the function
2911 is used to return the number of alias checks.
ebfd146a
IR
2912 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
2913 to be checked.
2914
2915 Output:
2916 COND_EXPR - conditional expression.
ebfd146a 2917
a05a89fa 2918 The returned COND_EXPR is the conditional expression to be used in the if
ebfd146a
IR
2919 statement that controls which version of the loop gets executed at runtime.
2920*/
2921
a05a89fa 2922void
4bdd44c4 2923vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
ebfd146a 2924{
93bdc3ed 2925 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
a05a89fa 2926 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
ebfd146a 2927
a05a89fa 2928 if (comp_alias_ddrs.is_empty ())
ebfd146a
IR
2929 return;
2930
9cbd2d97
BC
2931 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
2932 &comp_alias_ddrs, cond_expr);
73fbfcad 2933 if (dump_enabled_p ())
ccb3ad87 2934 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2935 "created %u versioning for alias checks.\n",
a05a89fa 2936 comp_alias_ddrs.length ());
ebfd146a
IR
2937}
2938
2939
2940/* Function vect_loop_versioning.
b8698a0f 2941
ebfd146a
IR
2942 If the loop has data references that may or may not be aligned or/and
2943 has data reference relations whose independence was not proven then
2944 two versions of the loop need to be generated, one which is vectorized
2945 and one which isn't. A test is then generated to control which of the
2946 loops is executed. The test checks for the alignment of all of the
2947 data references that may or may not be aligned. An additional
2948 sequence of runtime tests is generated for each pairs of DDRs whose
b8698a0f
L
2949 independence was not proven. The vectorized version of loop is
2950 executed only if both alias and alignment tests are passed.
2951
ebfd146a 2952 The test generated to check which version of loop is executed
b8698a0f 2953 is modified to also check for profitability as indicated by the
d9157f15 2954 cost model threshold TH.
86290011
RG
2955
2956 The versioning precondition(s) are placed in *COND_EXPR and
d68d56b5 2957 *COND_EXPR_STMT_LIST. */
ebfd146a
IR
2958
2959void
368117e8 2960vect_loop_versioning (loop_vec_info loop_vinfo,
a696bc4f
RS
2961 unsigned int th, bool check_profitability,
2962 poly_uint64 versioning_threshold)
ebfd146a 2963{
01d32b2b 2964 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
5ce9450f 2965 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
ebfd146a 2966 basic_block condition_bb;
538dd0b7
DM
2967 gphi_iterator gsi;
2968 gimple_stmt_iterator cond_exp_gsi;
ebfd146a
IR
2969 basic_block merge_bb;
2970 basic_block new_exit_bb;
2971 edge new_exit_e, e;
538dd0b7 2972 gphi *orig_phi, *new_phi;
368117e8 2973 tree cond_expr = NULL_TREE;
d68d56b5 2974 gimple_seq cond_expr_stmt_list = NULL;
ebfd146a 2975 tree arg;
af2bbc51 2976 profile_probability prob = profile_probability::likely ();
ebfd146a 2977 gimple_seq gimplify_stmt_list = NULL;
fdd29374 2978 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
9cc1fb4b
XDL
2979 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
2980 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
01d32b2b 2981 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
ebfd146a 2982
368117e8 2983 if (check_profitability)
80be3333 2984 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
01d32b2b 2985 build_int_cst (TREE_TYPE (scalar_loop_iters),
fdd29374 2986 th - 1));
a696bc4f
RS
2987 if (maybe_ne (versioning_threshold, 0U))
2988 {
2989 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
2990 build_int_cst (TREE_TYPE (scalar_loop_iters),
2991 versioning_threshold - 1));
2992 if (cond_expr)
2993 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
2994 expr, cond_expr);
2995 else
2996 cond_expr = expr;
2997 }
01d32b2b
BC
2998
2999 if (version_niter)
3000 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
3001
3002 if (cond_expr)
3003 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
3004 is_gimple_condexpr, NULL_TREE);
ebfd146a 3005
9cc1fb4b 3006 if (version_align)
d68d56b5
RG
3007 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
3008 &cond_expr_stmt_list);
ebfd146a 3009
9cc1fb4b 3010 if (version_alias)
9adee305
RS
3011 {
3012 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
a57776a1 3013 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
9adee305
RS
3014 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
3015 }
86290011 3016
b210f45f
RB
3017 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
3018 &gimplify_stmt_list,
d68d56b5
RG
3019 is_gimple_condexpr, NULL_TREE);
3020 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
ebfd146a
IR
3021
3022 initialize_original_copy_tables ();
5ce9450f
JJ
3023 if (scalar_loop)
3024 {
3025 edge scalar_e;
3026 basic_block preheader, scalar_preheader;
3027
3028 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
3029 scale LOOP's frequencies instead. */
5d3ebb71 3030 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
af2bbc51
JH
3031 prob, prob.invert (), prob, prob.invert (), true);
3032 scale_loop_frequencies (loop, prob);
5ce9450f
JJ
3033 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
3034 while we need to move it above LOOP's preheader. */
3035 e = loop_preheader_edge (loop);
3036 scalar_e = loop_preheader_edge (scalar_loop);
3037 gcc_assert (empty_block_p (e->src)
3038 && single_pred_p (e->src));
3039 gcc_assert (empty_block_p (scalar_e->src)
3040 && single_pred_p (scalar_e->src));
3041 gcc_assert (single_pred_p (condition_bb));
3042 preheader = e->src;
3043 scalar_preheader = scalar_e->src;
3044 scalar_e = find_edge (condition_bb, scalar_preheader);
3045 e = single_pred_edge (preheader);
3046 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
3047 scalar_preheader);
3048 redirect_edge_and_branch_force (scalar_e, preheader);
3049 redirect_edge_and_branch_force (e, condition_bb);
3050 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
3051 single_pred (condition_bb));
3052 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
3053 single_pred (scalar_preheader));
3054 set_immediate_dominator (CDI_DOMINATORS, preheader,
3055 condition_bb);
3056 }
3057 else
01d32b2b 3058 nloop = loop_version (loop, cond_expr, &condition_bb,
af2bbc51 3059 prob, prob.invert (), prob, prob.invert (), true);
01d32b2b
BC
3060
3061 if (version_niter)
3062 {
3063 /* The versioned loop could be infinite, we need to clear existing
3064 niter information which is copied from the original loop. */
3065 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
3066 vect_free_loop_info_assumptions (nloop);
3067 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
3068 loop_constraint_set (loop, LOOP_C_INFINITE);
3069 }
9cc1fb4b 3070
b05e0233 3071 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION
9cc1fb4b
XDL
3072 && dump_enabled_p ())
3073 {
3074 if (version_alias)
3075 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3076 "loop versioned for vectorization because of "
3077 "possible aliasing\n");
3078 if (version_align)
3079 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3080 "loop versioned for vectorization to enhance "
3081 "alignment\n");
3082
3083 }
c3284718 3084 free_original_copy_tables ();
ebfd146a 3085
b8698a0f 3086 /* Loop versioning violates an assumption we try to maintain during
ebfd146a
IR
3087 vectorization - that the loop exit block has a single predecessor.
3088 After versioning, the exit block of both loop versions is the same
3089 basic block (i.e. it has two predecessors). Just in order to simplify
3090 following transformations in the vectorizer, we fix this situation
3091 here by adding a new (empty) block on the exit-edge of the loop,
5ce9450f
JJ
3092 with the proper loop-exit phis to maintain loop-closed-form.
3093 If loop versioning wasn't done from loop, but scalar_loop instead,
3094 merge_bb will have already just a single successor. */
b8698a0f 3095
ebfd146a 3096 merge_bb = single_exit (loop)->dest;
5ce9450f 3097 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
ebfd146a 3098 {
5ce9450f
JJ
3099 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
3100 new_exit_bb = split_edge (single_exit (loop));
3101 new_exit_e = single_exit (loop);
3102 e = EDGE_SUCC (new_exit_bb, 0);
3103
3104 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
3105 {
3106 tree new_res;
538dd0b7 3107 orig_phi = gsi.phi ();
b731b390 3108 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
5ce9450f
JJ
3109 new_phi = create_phi_node (new_res, new_exit_bb);
3110 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3111 add_phi_arg (new_phi, arg, new_exit_e,
3112 gimple_phi_arg_location_from_edge (orig_phi, e));
3113 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
3114 }
b8698a0f 3115 }
ebfd146a
IR
3116
3117 /* End loop-exit-fixes after versioning. */
3118
d68d56b5 3119 if (cond_expr_stmt_list)
ebfd146a
IR
3120 {
3121 cond_exp_gsi = gsi_last_bb (condition_bb);
d68d56b5 3122 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
86290011 3123 GSI_SAME_STMT);
ebfd146a 3124 }
90eb75f2 3125 update_ssa (TODO_update_ssa);
ebfd146a 3126}