]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-loop.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / tree-vect-loop.c
CommitLineData
ebfd146a 1/* Loop Vectorization
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
ebfd146a
IR
4 Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a 26#include "tm.h"
40e23961
MC
27#include "hash-set.h"
28#include "machmode.h"
29#include "vec.h"
30#include "double-int.h"
31#include "input.h"
32#include "alias.h"
33#include "symtab.h"
34#include "wide-int.h"
35#include "inchash.h"
ebfd146a 36#include "tree.h"
40e23961 37#include "fold-const.h"
d8a2d370 38#include "stor-layout.h"
60393bbc 39#include "predict.h"
60393bbc
AM
40#include "hard-reg-set.h"
41#include "input.h"
42#include "function.h"
43#include "dominance.h"
44#include "cfg.h"
45#include "cfganal.h"
ebfd146a 46#include "basic-block.h"
cf835838 47#include "gimple-pretty-print.h"
2fb9a547
AM
48#include "tree-ssa-alias.h"
49#include "internal-fn.h"
50#include "gimple-expr.h"
51#include "is-a.h"
18f429e2 52#include "gimple.h"
45b0be94 53#include "gimplify.h"
5be5c238 54#include "gimple-iterator.h"
18f429e2 55#include "gimplify-me.h"
442b4905
AM
56#include "gimple-ssa.h"
57#include "tree-phinodes.h"
58#include "ssa-iterators.h"
d8a2d370 59#include "stringpool.h"
442b4905 60#include "tree-ssanames.h"
e28030cf
AM
61#include "tree-ssa-loop-ivopts.h"
62#include "tree-ssa-loop-manip.h"
63#include "tree-ssa-loop-niter.h"
7ee2468b 64#include "tree-pass.h"
ebfd146a 65#include "cfgloop.h"
ebfd146a
IR
66#include "expr.h"
67#include "recog.h"
b0710fe1 68#include "insn-codes.h"
ebfd146a
IR
69#include "optabs.h"
70#include "params.h"
718f9c0f 71#include "diagnostic-core.h"
ebfd146a
IR
72#include "tree-chrec.h"
73#include "tree-scalar-evolution.h"
74#include "tree-vectorizer.h"
35e1a5e7 75#include "target.h"
ebfd146a
IR
76
77/* Loop Vectorization Pass.
78
b8698a0f 79 This pass tries to vectorize loops.
ebfd146a
IR
80
81 For example, the vectorizer transforms the following simple loop:
82
83 short a[N]; short b[N]; short c[N]; int i;
84
85 for (i=0; i<N; i++){
86 a[i] = b[i] + c[i];
87 }
88
89 as if it was manually vectorized by rewriting the source code into:
90
91 typedef int __attribute__((mode(V8HI))) v8hi;
92 short a[N]; short b[N]; short c[N]; int i;
93 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
94 v8hi va, vb, vc;
95
96 for (i=0; i<N/8; i++){
97 vb = pb[i];
98 vc = pc[i];
99 va = vb + vc;
100 pa[i] = va;
101 }
102
103 The main entry to this pass is vectorize_loops(), in which
104 the vectorizer applies a set of analyses on a given set of loops,
105 followed by the actual vectorization transformation for the loops that
106 had successfully passed the analysis phase.
107 Throughout this pass we make a distinction between two types of
108 data: scalars (which are represented by SSA_NAMES), and memory references
ff802fa1 109 ("data-refs"). These two types of data require different handling both
ebfd146a
IR
110 during analysis and transformation. The types of data-refs that the
111 vectorizer currently supports are ARRAY_REFS which base is an array DECL
112 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
113 accesses are required to have a simple (consecutive) access pattern.
114
115 Analysis phase:
116 ===============
117 The driver for the analysis phase is vect_analyze_loop().
118 It applies a set of analyses, some of which rely on the scalar evolution
119 analyzer (scev) developed by Sebastian Pop.
120
121 During the analysis phase the vectorizer records some information
122 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
123 loop, as well as general information about the loop as a whole, which is
124 recorded in a "loop_vec_info" struct attached to each loop.
125
126 Transformation phase:
127 =====================
128 The loop transformation phase scans all the stmts in the loop, and
129 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
ff802fa1 130 the loop that needs to be vectorized. It inserts the vector code sequence
ebfd146a
IR
131 just before the scalar stmt S, and records a pointer to the vector code
132 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
ff802fa1 133 attached to S). This pointer will be used for the vectorization of following
ebfd146a
IR
134 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
135 otherwise, we rely on dead code elimination for removing it.
136
137 For example, say stmt S1 was vectorized into stmt VS1:
138
139 VS1: vb = px[i];
140 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
141 S2: a = b;
142
143 To vectorize stmt S2, the vectorizer first finds the stmt that defines
144 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
ff802fa1 145 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
ebfd146a
IR
146 resulting sequence would be:
147
148 VS1: vb = px[i];
149 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
150 VS2: va = vb;
151 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
152
153 Operands that are not SSA_NAMEs, are data-refs that appear in
154 load/store operations (like 'x[i]' in S1), and are handled differently.
155
156 Target modeling:
157 =================
158 Currently the only target specific information that is used is the
26983c22
L
159 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
160 Targets that can support different sizes of vectors, for now will need
ff802fa1 161 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
26983c22 162 flexibility will be added in the future.
ebfd146a
IR
163
164 Since we only vectorize operations which vector form can be
165 expressed using existing tree codes, to verify that an operation is
166 supported, the vectorizer checks the relevant optab at the relevant
ff802fa1 167 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
ebfd146a
IR
168 the value found is CODE_FOR_nothing, then there's no target support, and
169 we can't vectorize the stmt.
170
171 For additional information on this project see:
172 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
173*/
174
9ed313cc
JH
175static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
176
ebfd146a
IR
177/* Function vect_determine_vectorization_factor
178
ff802fa1 179 Determine the vectorization factor (VF). VF is the number of data elements
ebfd146a 180 that are operated upon in parallel in a single iteration of the vectorized
ff802fa1 181 loop. For example, when vectorizing a loop that operates on 4byte elements,
ebfd146a
IR
182 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
183 elements can fit in a single vector register.
184
185 We currently support vectorization of loops in which all types operated upon
ff802fa1 186 are of the same size. Therefore this function currently sets VF according to
ebfd146a
IR
187 the size of the types operated upon, and fails if there are multiple sizes
188 in the loop.
189
190 VF is also the factor by which the loop iterations are strip-mined, e.g.:
191 original loop:
192 for (i=0; i<N; i++){
193 a[i] = b[i] + c[i];
194 }
195
196 vectorized loop:
197 for (i=0; i<N; i+=VF){
198 a[i:VF] = b[i:VF] + c[i:VF];
199 }
200*/
201
202static bool
203vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
204{
205 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
206 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
207 int nbbs = loop->num_nodes;
ebfd146a
IR
208 unsigned int vectorization_factor = 0;
209 tree scalar_type;
538dd0b7 210 gphi *phi;
ebfd146a
IR
211 tree vectype;
212 unsigned int nunits;
213 stmt_vec_info stmt_info;
214 int i;
215 HOST_WIDE_INT dummy;
363477c0
JJ
216 gimple stmt, pattern_stmt = NULL;
217 gimple_seq pattern_def_seq = NULL;
355a7673 218 gimple_stmt_iterator pattern_def_si = gsi_none ();
363477c0 219 bool analyze_pattern_stmt = false;
ebfd146a 220
73fbfcad 221 if (dump_enabled_p ())
78c60e3d 222 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 223 "=== vect_determine_vectorization_factor ===\n");
ebfd146a
IR
224
225 for (i = 0; i < nbbs; i++)
226 {
227 basic_block bb = bbs[i];
228
538dd0b7
DM
229 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
230 gsi_next (&si))
ebfd146a 231 {
538dd0b7 232 phi = si.phi ();
ebfd146a 233 stmt_info = vinfo_for_stmt (phi);
73fbfcad 234 if (dump_enabled_p ())
ebfd146a 235 {
78c60e3d
SS
236 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: ");
237 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 238 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
239 }
240
241 gcc_assert (stmt_info);
242
243 if (STMT_VINFO_RELEVANT_P (stmt_info))
244 {
245 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
246 scalar_type = TREE_TYPE (PHI_RESULT (phi));
247
73fbfcad 248 if (dump_enabled_p ())
ebfd146a 249 {
78c60e3d
SS
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "get vectype for scalar type: ");
252 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 253 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
254 }
255
256 vectype = get_vectype_for_scalar_type (scalar_type);
257 if (!vectype)
258 {
73fbfcad 259 if (dump_enabled_p ())
ebfd146a 260 {
78c60e3d
SS
261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
262 "not vectorized: unsupported "
263 "data-type ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
265 scalar_type);
e645e942 266 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
267 }
268 return false;
269 }
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
271
73fbfcad 272 if (dump_enabled_p ())
ebfd146a 273 {
78c60e3d
SS
274 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
275 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 276 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
277 }
278
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
73fbfcad 280 if (dump_enabled_p ())
e645e942
TJ
281 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
282 nunits);
ebfd146a
IR
283
284 if (!vectorization_factor
285 || (nunits > vectorization_factor))
286 vectorization_factor = nunits;
287 }
288 }
289
538dd0b7
DM
290 for (gimple_stmt_iterator si = gsi_start_bb (bb);
291 !gsi_end_p (si) || analyze_pattern_stmt;)
ebfd146a 292 {
83197f37
IR
293 tree vf_vectype;
294
295 if (analyze_pattern_stmt)
363477c0 296 stmt = pattern_stmt;
83197f37
IR
297 else
298 stmt = gsi_stmt (si);
299
300 stmt_info = vinfo_for_stmt (stmt);
ebfd146a 301
73fbfcad 302 if (dump_enabled_p ())
ebfd146a 303 {
78c60e3d
SS
304 dump_printf_loc (MSG_NOTE, vect_location,
305 "==> examining statement: ");
306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 307 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
308 }
309
310 gcc_assert (stmt_info);
311
f2164f87 312 /* Skip stmts which do not need to be vectorized. */
fbd7e877
RB
313 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
314 && !STMT_VINFO_LIVE_P (stmt_info))
315 || gimple_clobber_p (stmt))
9d5e7640
IR
316 {
317 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
318 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
319 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
320 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
f2164f87 321 {
9d5e7640
IR
322 stmt = pattern_stmt;
323 stmt_info = vinfo_for_stmt (pattern_stmt);
73fbfcad 324 if (dump_enabled_p ())
9d5e7640 325 {
78c60e3d
SS
326 dump_printf_loc (MSG_NOTE, vect_location,
327 "==> examining pattern statement: ");
328 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 329 dump_printf (MSG_NOTE, "\n");
9d5e7640
IR
330 }
331 }
332 else
333 {
73fbfcad 334 if (dump_enabled_p ())
e645e942 335 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
83197f37 336 gsi_next (&si);
9d5e7640 337 continue;
f2164f87 338 }
ebfd146a 339 }
83197f37
IR
340 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
341 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
342 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
343 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
344 analyze_pattern_stmt = true;
ebfd146a 345
363477c0
JJ
346 /* If a pattern statement has def stmts, analyze them too. */
347 if (is_pattern_stmt_p (stmt_info))
348 {
349 if (pattern_def_seq == NULL)
350 {
351 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
352 pattern_def_si = gsi_start (pattern_def_seq);
353 }
354 else if (!gsi_end_p (pattern_def_si))
355 gsi_next (&pattern_def_si);
356 if (pattern_def_seq != NULL)
357 {
358 gimple pattern_def_stmt = NULL;
359 stmt_vec_info pattern_def_stmt_info = NULL;
1107f3ae 360
363477c0
JJ
361 while (!gsi_end_p (pattern_def_si))
362 {
363 pattern_def_stmt = gsi_stmt (pattern_def_si);
364 pattern_def_stmt_info
365 = vinfo_for_stmt (pattern_def_stmt);
366 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
367 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
368 break;
369 gsi_next (&pattern_def_si);
370 }
371
372 if (!gsi_end_p (pattern_def_si))
373 {
73fbfcad 374 if (dump_enabled_p ())
363477c0 375 {
78c60e3d
SS
376 dump_printf_loc (MSG_NOTE, vect_location,
377 "==> examining pattern def stmt: ");
378 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
379 pattern_def_stmt, 0);
e645e942 380 dump_printf (MSG_NOTE, "\n");
363477c0
JJ
381 }
382
383 stmt = pattern_def_stmt;
384 stmt_info = pattern_def_stmt_info;
385 }
386 else
387 {
355a7673 388 pattern_def_si = gsi_none ();
363477c0
JJ
389 analyze_pattern_stmt = false;
390 }
391 }
392 else
393 analyze_pattern_stmt = false;
394 }
1107f3ae 395
5ce9450f
JJ
396 if (gimple_get_lhs (stmt) == NULL_TREE
397 /* MASK_STORE has no lhs, but is ok. */
398 && (!is_gimple_call (stmt)
399 || !gimple_call_internal_p (stmt)
400 || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
ebfd146a 401 {
0136f8f0
AH
402 if (is_gimple_call (stmt))
403 {
404 /* Ignore calls with no lhs. These must be calls to
405 #pragma omp simd functions, and what vectorization factor
406 it really needs can't be determined until
407 vectorizable_simd_clone_call. */
408 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
409 {
410 pattern_def_seq = NULL;
411 gsi_next (&si);
412 }
413 continue;
414 }
73fbfcad 415 if (dump_enabled_p ())
ebfd146a 416 {
78c60e3d
SS
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 "not vectorized: irregular stmt.");
419 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
420 0);
e645e942 421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
422 }
423 return false;
424 }
425
426 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
427 {
73fbfcad 428 if (dump_enabled_p ())
ebfd146a 429 {
78c60e3d
SS
430 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
431 "not vectorized: vector stmt in loop:");
432 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 433 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
434 }
435 return false;
436 }
437
438 if (STMT_VINFO_VECTYPE (stmt_info))
439 {
b8698a0f 440 /* The only case when a vectype had been already set is for stmts
9b858c88
IR
441 that contain a dataref, or for "pattern-stmts" (stmts
442 generated by the vectorizer to represent/replace a certain
443 idiom). */
b8698a0f 444 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
9b858c88 445 || is_pattern_stmt_p (stmt_info)
363477c0 446 || !gsi_end_p (pattern_def_si));
ebfd146a
IR
447 vectype = STMT_VINFO_VECTYPE (stmt_info);
448 }
449 else
450 {
51312233 451 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
5ce9450f
JJ
452 if (is_gimple_call (stmt)
453 && gimple_call_internal_p (stmt)
454 && gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
455 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
456 else
457 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
73fbfcad 458 if (dump_enabled_p ())
ebfd146a 459 {
78c60e3d
SS
460 dump_printf_loc (MSG_NOTE, vect_location,
461 "get vectype for scalar type: ");
462 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 463 dump_printf (MSG_NOTE, "\n");
ebfd146a 464 }
ebfd146a
IR
465 vectype = get_vectype_for_scalar_type (scalar_type);
466 if (!vectype)
467 {
73fbfcad 468 if (dump_enabled_p ())
ebfd146a 469 {
78c60e3d
SS
470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
471 "not vectorized: unsupported "
472 "data-type ");
473 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
474 scalar_type);
e645e942 475 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
476 }
477 return false;
478 }
b690cc0f 479
ebfd146a 480 STMT_VINFO_VECTYPE (stmt_info) = vectype;
451dabda
RB
481
482 if (dump_enabled_p ())
483 {
484 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
485 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
e645e942 486 dump_printf (MSG_NOTE, "\n");
451dabda 487 }
ebfd146a
IR
488 }
489
b690cc0f
RG
490 /* The vectorization factor is according to the smallest
491 scalar type (or the largest vector size, but we only
492 support one vector size per loop). */
493 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
494 &dummy);
73fbfcad 495 if (dump_enabled_p ())
b690cc0f 496 {
78c60e3d
SS
497 dump_printf_loc (MSG_NOTE, vect_location,
498 "get vectype for scalar type: ");
499 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
e645e942 500 dump_printf (MSG_NOTE, "\n");
b690cc0f
RG
501 }
502 vf_vectype = get_vectype_for_scalar_type (scalar_type);
503 if (!vf_vectype)
504 {
73fbfcad 505 if (dump_enabled_p ())
b690cc0f 506 {
78c60e3d
SS
507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
508 "not vectorized: unsupported data-type ");
509 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
510 scalar_type);
e645e942 511 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b690cc0f
RG
512 }
513 return false;
514 }
515
516 if ((GET_MODE_SIZE (TYPE_MODE (vectype))
517 != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
518 {
73fbfcad 519 if (dump_enabled_p ())
b690cc0f 520 {
78c60e3d
SS
521 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
522 "not vectorized: different sized vector "
523 "types in statement, ");
524 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
525 vectype);
526 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
527 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
528 vf_vectype);
e645e942 529 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b690cc0f
RG
530 }
531 return false;
532 }
533
73fbfcad 534 if (dump_enabled_p ())
ebfd146a 535 {
78c60e3d
SS
536 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
537 dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
e645e942 538 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
539 }
540
b690cc0f 541 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
73fbfcad 542 if (dump_enabled_p ())
e645e942 543 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
ebfd146a
IR
544 if (!vectorization_factor
545 || (nunits > vectorization_factor))
546 vectorization_factor = nunits;
83197f37 547
363477c0
JJ
548 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
549 {
550 pattern_def_seq = NULL;
551 gsi_next (&si);
552 }
ebfd146a
IR
553 }
554 }
555
556 /* TODO: Analyze cost. Decide if worth while to vectorize. */
73fbfcad 557 if (dump_enabled_p ())
e645e942 558 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n",
78c60e3d 559 vectorization_factor);
ebfd146a
IR
560 if (vectorization_factor <= 1)
561 {
73fbfcad 562 if (dump_enabled_p ())
78c60e3d 563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 564 "not vectorized: unsupported data-type\n");
ebfd146a
IR
565 return false;
566 }
567 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
568
569 return true;
570}
571
572
573/* Function vect_is_simple_iv_evolution.
574
575 FORNOW: A simple evolution of an induction variables in the loop is
f6fdeb62 576 considered a polynomial evolution. */
ebfd146a
IR
577
578static bool
579vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
580 tree * step)
581{
582 tree init_expr;
583 tree step_expr;
584 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
f6fdeb62 585 basic_block bb;
ebfd146a
IR
586
587 /* When there is no evolution in this loop, the evolution function
588 is not "simple". */
589 if (evolution_part == NULL_TREE)
590 return false;
591
592 /* When the evolution is a polynomial of degree >= 2
593 the evolution function is not "simple". */
594 if (tree_is_chrec (evolution_part))
595 return false;
596
597 step_expr = evolution_part;
598 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
599
73fbfcad 600 if (dump_enabled_p ())
ebfd146a 601 {
78c60e3d
SS
602 dump_printf_loc (MSG_NOTE, vect_location, "step: ");
603 dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
604 dump_printf (MSG_NOTE, ", init: ");
605 dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
e645e942 606 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
607 }
608
609 *init = init_expr;
610 *step = step_expr;
611
f6fdeb62
JJ
612 if (TREE_CODE (step_expr) != INTEGER_CST
613 && (TREE_CODE (step_expr) != SSA_NAME
614 || ((bb = gimple_bb (SSA_NAME_DEF_STMT (step_expr)))
c4e87a13
JJ
615 && flow_bb_inside_loop_p (get_loop (cfun, loop_nb), bb))
616 || (!INTEGRAL_TYPE_P (TREE_TYPE (step_expr))
617 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))
618 || !flag_associative_math)))
619 && (TREE_CODE (step_expr) != REAL_CST
620 || !flag_associative_math))
ebfd146a 621 {
73fbfcad 622 if (dump_enabled_p ())
78c60e3d 623 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 624 "step unknown.\n");
ebfd146a
IR
625 return false;
626 }
627
628 return true;
629}
630
631/* Function vect_analyze_scalar_cycles_1.
632
633 Examine the cross iteration def-use cycles of scalar variables
ff802fa1 634 in LOOP. LOOP_VINFO represents the loop that is now being
ebfd146a
IR
635 considered for vectorization (can be LOOP, or an outer-loop
636 enclosing LOOP). */
637
638static void
639vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
640{
641 basic_block bb = loop->header;
f6fdeb62 642 tree init, step;
00f96dc9 643 auto_vec<gimple, 64> worklist;
538dd0b7 644 gphi_iterator gsi;
06066f92 645 bool double_reduc;
ebfd146a 646
73fbfcad 647 if (dump_enabled_p ())
78c60e3d 648 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 649 "=== vect_analyze_scalar_cycles ===\n");
ebfd146a 650
ff802fa1 651 /* First - identify all inductions. Reduction detection assumes that all the
b8698a0f 652 inductions have been identified, therefore, this order must not be
7c5222ff 653 changed. */
ebfd146a
IR
654 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
655 {
538dd0b7 656 gphi *phi = gsi.phi ();
ebfd146a
IR
657 tree access_fn = NULL;
658 tree def = PHI_RESULT (phi);
659 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
660
73fbfcad 661 if (dump_enabled_p ())
ebfd146a 662 {
78c60e3d
SS
663 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
664 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 665 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
666 }
667
ff802fa1 668 /* Skip virtual phi's. The data dependences that are associated with
ebfd146a 669 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
ea057359 670 if (virtual_operand_p (def))
ebfd146a
IR
671 continue;
672
673 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
674
675 /* Analyze the evolution function. */
676 access_fn = analyze_scalar_evolution (loop, def);
b2087e8d 677 if (access_fn)
ebfd146a 678 {
ad5d161a 679 STRIP_NOPS (access_fn);
73fbfcad 680 if (dump_enabled_p ())
ad5d161a 681 {
78c60e3d
SS
682 dump_printf_loc (MSG_NOTE, vect_location,
683 "Access function of PHI: ");
684 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
e645e942 685 dump_printf (MSG_NOTE, "\n");
ad5d161a
RG
686 }
687 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
688 = evolution_part_in_loop_num (access_fn, loop->num);
ebfd146a
IR
689 }
690
691 if (!access_fn
f6fdeb62
JJ
692 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
693 || (LOOP_VINFO_LOOP (loop_vinfo) != loop
694 && TREE_CODE (step) != INTEGER_CST))
ebfd146a 695 {
9771b263 696 worklist.safe_push (phi);
ebfd146a
IR
697 continue;
698 }
699
0ac168a1
RG
700 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) != NULL_TREE);
701
73fbfcad 702 if (dump_enabled_p ())
e645e942 703 dump_printf_loc (MSG_NOTE, vect_location, "Detected induction.\n");
ebfd146a
IR
704 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
705 }
706
707
7c5222ff 708 /* Second - identify all reductions and nested cycles. */
9771b263 709 while (worklist.length () > 0)
ebfd146a 710 {
9771b263 711 gimple phi = worklist.pop ();
ebfd146a
IR
712 tree def = PHI_RESULT (phi);
713 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
714 gimple reduc_stmt;
7c5222ff 715 bool nested_cycle;
ebfd146a 716
73fbfcad 717 if (dump_enabled_p ())
b8698a0f 718 {
78c60e3d
SS
719 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
720 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 721 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
722 }
723
ea057359
RG
724 gcc_assert (!virtual_operand_p (def)
725 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
ebfd146a 726
7c5222ff 727 nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
8a9ecffd
MM
728 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
729 &double_reduc);
ebfd146a
IR
730 if (reduc_stmt)
731 {
06066f92 732 if (double_reduc)
7c5222ff 733 {
73fbfcad 734 if (dump_enabled_p ())
78c60e3d 735 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 736 "Detected double reduction.\n");
7c5222ff 737
06066f92 738 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
7c5222ff 739 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
06066f92 740 vect_double_reduction_def;
7c5222ff 741 }
b8698a0f 742 else
7c5222ff 743 {
06066f92
IR
744 if (nested_cycle)
745 {
73fbfcad 746 if (dump_enabled_p ())
78c60e3d 747 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 748 "Detected vectorizable nested cycle.\n");
7c5222ff 749
06066f92
IR
750 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
751 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
752 vect_nested_cycle;
753 }
754 else
755 {
73fbfcad 756 if (dump_enabled_p ())
78c60e3d 757 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 758 "Detected reduction.\n");
06066f92
IR
759
760 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
761 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
762 vect_reduction_def;
b5aeb3bb
IR
763 /* Store the reduction cycles for possible vectorization in
764 loop-aware SLP. */
9771b263 765 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt);
06066f92 766 }
7c5222ff 767 }
ebfd146a
IR
768 }
769 else
73fbfcad 770 if (dump_enabled_p ())
78c60e3d 771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 772 "Unknown def-use cycle pattern.\n");
ebfd146a 773 }
ebfd146a
IR
774}
775
776
777/* Function vect_analyze_scalar_cycles.
778
779 Examine the cross iteration def-use cycles of scalar variables, by
ff802fa1 780 analyzing the loop-header PHIs of scalar variables. Classify each
ebfd146a
IR
781 cycle as one of the following: invariant, induction, reduction, unknown.
782 We do that for the loop represented by LOOP_VINFO, and also to its
783 inner-loop, if exists.
784 Examples for scalar cycles:
785
786 Example1: reduction:
787
788 loop1:
789 for (i=0; i<N; i++)
790 sum += a[i];
791
792 Example2: induction:
793
794 loop2:
795 for (i=0; i<N; i++)
796 a[i] = i; */
797
798static void
799vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
800{
801 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
802
803 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
804
805 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
806 Reductions in such inner-loop therefore have different properties than
807 the reductions in the nest that gets vectorized:
808 1. When vectorized, they are executed in the same order as in the original
809 scalar loop, so we can't change the order of computation when
810 vectorizing them.
b8698a0f 811 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
ebfd146a
IR
812 current checks are too strict. */
813
814 if (loop->inner)
815 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
816}
817
15e693cc 818
ebfd146a
IR
819/* Function vect_get_loop_niters.
820
15e693cc 821 Determine how many iterations the loop is executed and place it
95b3eff3
RB
822 in NUMBER_OF_ITERATIONS. Place the number of latch iterations
823 in NUMBER_OF_ITERATIONSM1.
15e693cc 824
ebfd146a
IR
825 Return the loop exit condition. */
826
538dd0b7
DM
827
828static gcond *
95b3eff3
RB
829vect_get_loop_niters (struct loop *loop, tree *number_of_iterations,
830 tree *number_of_iterationsm1)
ebfd146a
IR
831{
832 tree niters;
833
73fbfcad 834 if (dump_enabled_p ())
78c60e3d 835 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 836 "=== get_loop_niters ===\n");
ebfd146a 837
15e693cc 838 niters = number_of_latch_executions (loop);
95b3eff3
RB
839 *number_of_iterationsm1 = niters;
840
15e693cc
RB
841 /* We want the number of loop header executions which is the number
842 of latch executions plus one.
843 ??? For UINT_MAX latch executions this number overflows to zero
844 for loops like do { n++; } while (n != 0); */
845 if (niters && !chrec_contains_undetermined (niters))
95b3eff3 846 niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters), unshare_expr (niters),
15e693cc
RB
847 build_int_cst (TREE_TYPE (niters), 1));
848 *number_of_iterations = niters;
ebfd146a
IR
849
850 return get_loop_exit_condition (loop);
851}
852
853
854/* Function bb_in_loop_p
855
856 Used as predicate for dfs order traversal of the loop bbs. */
857
858static bool
859bb_in_loop_p (const_basic_block bb, const void *data)
860{
861 const struct loop *const loop = (const struct loop *)data;
862 if (flow_bb_inside_loop_p (loop, bb))
863 return true;
864 return false;
865}
866
867
868/* Function new_loop_vec_info.
869
870 Create and initialize a new loop_vec_info struct for LOOP, as well as
871 stmt_vec_info structs for all the stmts in LOOP. */
872
873static loop_vec_info
874new_loop_vec_info (struct loop *loop)
875{
876 loop_vec_info res;
877 basic_block *bbs;
878 gimple_stmt_iterator si;
879 unsigned int i, nbbs;
880
881 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
882 LOOP_VINFO_LOOP (res) = loop;
883
884 bbs = get_loop_body (loop);
885
886 /* Create/Update stmt_info for all stmts in the loop. */
887 for (i = 0; i < loop->num_nodes; i++)
888 {
889 basic_block bb = bbs[i];
890
891 /* BBs in a nested inner-loop will have been already processed (because
892 we will have called vect_analyze_loop_form for any nested inner-loop).
893 Therefore, for stmts in an inner-loop we just want to update the
894 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
895 loop_info of the outer-loop we are currently considering to vectorize
896 (instead of the loop_info of the inner-loop).
897 For stmts in other BBs we need to create a stmt_info from scratch. */
898 if (bb->loop_father != loop)
899 {
900 /* Inner-loop bb. */
901 gcc_assert (loop->inner && bb->loop_father == loop->inner);
902 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
903 {
904 gimple phi = gsi_stmt (si);
905 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
906 loop_vec_info inner_loop_vinfo =
907 STMT_VINFO_LOOP_VINFO (stmt_info);
908 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
909 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
910 }
911 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
912 {
913 gimple stmt = gsi_stmt (si);
914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
915 loop_vec_info inner_loop_vinfo =
916 STMT_VINFO_LOOP_VINFO (stmt_info);
917 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
918 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
919 }
920 }
921 else
922 {
923 /* bb in current nest. */
924 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
925 {
926 gimple phi = gsi_stmt (si);
927 gimple_set_uid (phi, 0);
a70d6342 928 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
ebfd146a
IR
929 }
930
931 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
932 {
933 gimple stmt = gsi_stmt (si);
934 gimple_set_uid (stmt, 0);
a70d6342 935 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
ebfd146a
IR
936 }
937 }
938 }
939
940 /* CHECKME: We want to visit all BBs before their successors (except for
941 latch blocks, for which this assertion wouldn't hold). In the simple
942 case of the loop forms we allow, a dfs order of the BBs would the same
943 as reversed postorder traversal, so we are safe. */
944
945 free (bbs);
946 bbs = XCNEWVEC (basic_block, loop->num_nodes);
947 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
948 bbs, loop->num_nodes, loop);
949 gcc_assert (nbbs == loop->num_nodes);
950
951 LOOP_VINFO_BBS (res) = bbs;
95b3eff3 952 LOOP_VINFO_NITERSM1 (res) = NULL;
ebfd146a
IR
953 LOOP_VINFO_NITERS (res) = NULL;
954 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
955 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
090cd8dc 956 LOOP_VINFO_COST_MODEL_THRESHOLD (res) = 0;
ebfd146a 957 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
15e693cc 958 LOOP_VINFO_PEELING_FOR_ALIGNMENT (res) = 0;
ebfd146a 959 LOOP_VINFO_VECT_FACTOR (res) = 0;
9771b263
DN
960 LOOP_VINFO_LOOP_NEST (res).create (3);
961 LOOP_VINFO_DATAREFS (res).create (10);
962 LOOP_VINFO_DDRS (res).create (10 * 10);
ebfd146a 963 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
9771b263
DN
964 LOOP_VINFO_MAY_MISALIGN_STMTS (res).create (
965 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
966 LOOP_VINFO_MAY_ALIAS_DDRS (res).create (
967 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
968 LOOP_VINFO_GROUPED_STORES (res).create (10);
969 LOOP_VINFO_REDUCTIONS (res).create (10);
970 LOOP_VINFO_REDUCTION_CHAINS (res).create (10);
971 LOOP_VINFO_SLP_INSTANCES (res).create (10);
ebfd146a 972 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
c3e7ee41 973 LOOP_VINFO_TARGET_COST_DATA (res) = init_cost (loop);
48df3fa6 974 LOOP_VINFO_PEELING_FOR_GAPS (res) = false;
15e693cc 975 LOOP_VINFO_PEELING_FOR_NITER (res) = false;
3393a711 976 LOOP_VINFO_OPERANDS_SWAPPED (res) = false;
ebfd146a
IR
977
978 return res;
979}
980
981
982/* Function destroy_loop_vec_info.
983
984 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
985 stmts in the loop. */
986
987void
988destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
989{
990 struct loop *loop;
991 basic_block *bbs;
992 int nbbs;
993 gimple_stmt_iterator si;
994 int j;
9771b263 995 vec<slp_instance> slp_instances;
ebfd146a 996 slp_instance instance;
3393a711 997 bool swapped;
ebfd146a
IR
998
999 if (!loop_vinfo)
1000 return;
1001
1002 loop = LOOP_VINFO_LOOP (loop_vinfo);
1003
1004 bbs = LOOP_VINFO_BBS (loop_vinfo);
541d9ac8 1005 nbbs = clean_stmts ? loop->num_nodes : 0;
3393a711 1006 swapped = LOOP_VINFO_OPERANDS_SWAPPED (loop_vinfo);
ebfd146a 1007
ebfd146a
IR
1008 for (j = 0; j < nbbs; j++)
1009 {
1010 basic_block bb = bbs[j];
1011 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1012 free_stmt_vec_info (gsi_stmt (si));
1013
1014 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1015 {
1016 gimple stmt = gsi_stmt (si);
3393a711
BS
1017
1018 /* We may have broken canonical form by moving a constant
1019 into RHS1 of a commutative op. Fix such occurrences. */
1020 if (swapped && is_gimple_assign (stmt))
1021 {
1022 enum tree_code code = gimple_assign_rhs_code (stmt);
1023
1024 if ((code == PLUS_EXPR
1025 || code == POINTER_PLUS_EXPR
1026 || code == MULT_EXPR)
1027 && CONSTANT_CLASS_P (gimple_assign_rhs1 (stmt)))
80560f95
AM
1028 swap_ssa_operands (stmt,
1029 gimple_assign_rhs1_ptr (stmt),
1030 gimple_assign_rhs2_ptr (stmt));
3393a711
BS
1031 }
1032
78048b1c
JJ
1033 /* Free stmt_vec_info. */
1034 free_stmt_vec_info (stmt);
ebfd146a
IR
1035 gsi_next (&si);
1036 }
1037 }
1038
1039 free (LOOP_VINFO_BBS (loop_vinfo));
c716e67f 1040 vect_destroy_datarefs (loop_vinfo, NULL);
ebfd146a 1041 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
9771b263
DN
1042 LOOP_VINFO_LOOP_NEST (loop_vinfo).release ();
1043 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).release ();
1044 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).release ();
ebfd146a 1045 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
9771b263 1046 FOR_EACH_VEC_ELT (slp_instances, j, instance)
ebfd146a
IR
1047 vect_free_slp_instance (instance);
1048
9771b263
DN
1049 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
1050 LOOP_VINFO_GROUPED_STORES (loop_vinfo).release ();
1051 LOOP_VINFO_REDUCTIONS (loop_vinfo).release ();
1052 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).release ();
ebfd146a 1053
c203e8a7
TS
1054 delete LOOP_VINFO_PEELING_HTAB (loop_vinfo);
1055 LOOP_VINFO_PEELING_HTAB (loop_vinfo) = NULL;
720f5239 1056
c3e7ee41
BS
1057 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
1058
ebfd146a
IR
1059 free (loop_vinfo);
1060 loop->aux = NULL;
1061}
1062
1063
1064/* Function vect_analyze_loop_1.
1065
1066 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1067 for it. The different analyses will record information in the
1068 loop_vec_info struct. This is a subset of the analyses applied in
1069 vect_analyze_loop, to be applied on an inner-loop nested in the loop
1070 that is now considered for (outer-loop) vectorization. */
1071
1072static loop_vec_info
1073vect_analyze_loop_1 (struct loop *loop)
1074{
1075 loop_vec_info loop_vinfo;
1076
73fbfcad 1077 if (dump_enabled_p ())
78c60e3d 1078 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1079 "===== analyze_loop_nest_1 =====\n");
ebfd146a
IR
1080
1081 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1082
1083 loop_vinfo = vect_analyze_loop_form (loop);
1084 if (!loop_vinfo)
1085 {
73fbfcad 1086 if (dump_enabled_p ())
78c60e3d 1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1088 "bad inner-loop form.\n");
ebfd146a
IR
1089 return NULL;
1090 }
1091
1092 return loop_vinfo;
1093}
1094
1095
1096/* Function vect_analyze_loop_form.
1097
1098 Verify that certain CFG restrictions hold, including:
1099 - the loop has a pre-header
1100 - the loop has a single entry and exit
1101 - the loop exit condition is simple enough, and the number of iterations
1102 can be analyzed (a countable loop). */
1103
1104loop_vec_info
1105vect_analyze_loop_form (struct loop *loop)
1106{
1107 loop_vec_info loop_vinfo;
538dd0b7 1108 gcond *loop_cond;
95b3eff3 1109 tree number_of_iterations = NULL, number_of_iterationsm1 = NULL;
ebfd146a
IR
1110 loop_vec_info inner_loop_vinfo = NULL;
1111
73fbfcad 1112 if (dump_enabled_p ())
78c60e3d 1113 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1114 "=== vect_analyze_loop_form ===\n");
ebfd146a
IR
1115
1116 /* Different restrictions apply when we are considering an inner-most loop,
b8698a0f 1117 vs. an outer (nested) loop.
ebfd146a
IR
1118 (FORNOW. May want to relax some of these restrictions in the future). */
1119
1120 if (!loop->inner)
1121 {
b8698a0f
L
1122 /* Inner-most loop. We currently require that the number of BBs is
1123 exactly 2 (the header and latch). Vectorizable inner-most loops
ebfd146a
IR
1124 look like this:
1125
1126 (pre-header)
1127 |
1128 header <--------+
1129 | | |
1130 | +--> latch --+
1131 |
1132 (exit-bb) */
1133
1134 if (loop->num_nodes != 2)
1135 {
73fbfcad 1136 if (dump_enabled_p ())
78c60e3d 1137 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1138 "not vectorized: control flow in loop.\n");
ebfd146a
IR
1139 return NULL;
1140 }
1141
1142 if (empty_block_p (loop->header))
15e693cc
RB
1143 {
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1146 "not vectorized: empty loop.\n");
15e693cc
RB
1147 return NULL;
1148 }
ebfd146a
IR
1149 }
1150 else
1151 {
1152 struct loop *innerloop = loop->inner;
0f900dfa 1153 edge entryedge;
ebfd146a
IR
1154
1155 /* Nested loop. We currently require that the loop is doubly-nested,
b8698a0f 1156 contains a single inner loop, and the number of BBs is exactly 5.
ebfd146a
IR
1157 Vectorizable outer-loops look like this:
1158
1159 (pre-header)
1160 |
1161 header <---+
1162 | |
1163 inner-loop |
1164 | |
1165 tail ------+
b8698a0f 1166 |
ebfd146a
IR
1167 (exit-bb)
1168
1169 The inner-loop has the properties expected of inner-most loops
1170 as described above. */
1171
1172 if ((loop->inner)->inner || (loop->inner)->next)
1173 {
73fbfcad 1174 if (dump_enabled_p ())
78c60e3d 1175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1176 "not vectorized: multiple nested loops.\n");
ebfd146a
IR
1177 return NULL;
1178 }
1179
1180 /* Analyze the inner-loop. */
1181 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
1182 if (!inner_loop_vinfo)
1183 {
73fbfcad 1184 if (dump_enabled_p ())
78c60e3d 1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1186 "not vectorized: Bad inner loop.\n");
ebfd146a
IR
1187 return NULL;
1188 }
1189
1190 if (!expr_invariant_in_loop_p (loop,
1191 LOOP_VINFO_NITERS (inner_loop_vinfo)))
1192 {
73fbfcad 1193 if (dump_enabled_p ())
e645e942
TJ
1194 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1195 "not vectorized: inner-loop count not"
1196 " invariant.\n");
ebfd146a
IR
1197 destroy_loop_vec_info (inner_loop_vinfo, true);
1198 return NULL;
1199 }
1200
b8698a0f 1201 if (loop->num_nodes != 5)
ebfd146a 1202 {
73fbfcad 1203 if (dump_enabled_p ())
78c60e3d 1204 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1205 "not vectorized: control flow in loop.\n");
ebfd146a
IR
1206 destroy_loop_vec_info (inner_loop_vinfo, true);
1207 return NULL;
1208 }
1209
1210 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
ebfd146a
IR
1211 entryedge = EDGE_PRED (innerloop->header, 0);
1212 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
0f900dfa 1213 entryedge = EDGE_PRED (innerloop->header, 1);
b8698a0f 1214
ebfd146a
IR
1215 if (entryedge->src != loop->header
1216 || !single_exit (innerloop)
1217 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1218 {
73fbfcad 1219 if (dump_enabled_p ())
e645e942
TJ
1220 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1221 "not vectorized: unsupported outerloop form.\n");
ebfd146a
IR
1222 destroy_loop_vec_info (inner_loop_vinfo, true);
1223 return NULL;
1224 }
1225
73fbfcad 1226 if (dump_enabled_p ())
78c60e3d 1227 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1228 "Considering outer-loop vectorization.\n");
ebfd146a 1229 }
b8698a0f
L
1230
1231 if (!single_exit (loop)
ebfd146a
IR
1232 || EDGE_COUNT (loop->header->preds) != 2)
1233 {
73fbfcad 1234 if (dump_enabled_p ())
ebfd146a
IR
1235 {
1236 if (!single_exit (loop))
78c60e3d 1237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1238 "not vectorized: multiple exits.\n");
ebfd146a 1239 else if (EDGE_COUNT (loop->header->preds) != 2)
e645e942
TJ
1240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1241 "not vectorized: too many incoming edges.\n");
ebfd146a
IR
1242 }
1243 if (inner_loop_vinfo)
1244 destroy_loop_vec_info (inner_loop_vinfo, true);
1245 return NULL;
1246 }
1247
1248 /* We assume that the loop exit condition is at the end of the loop. i.e,
1249 that the loop is represented as a do-while (with a proper if-guard
1250 before the loop if needed), where the loop header contains all the
1251 executable statements, and the latch is empty. */
1252 if (!empty_block_p (loop->latch)
595c2679 1253 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
ebfd146a 1254 {
73fbfcad 1255 if (dump_enabled_p ())
78c60e3d 1256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1257 "not vectorized: latch block not empty.\n");
ebfd146a
IR
1258 if (inner_loop_vinfo)
1259 destroy_loop_vec_info (inner_loop_vinfo, true);
1260 return NULL;
1261 }
1262
1263 /* Make sure there exists a single-predecessor exit bb: */
1264 if (!single_pred_p (single_exit (loop)->dest))
1265 {
1266 edge e = single_exit (loop);
1267 if (!(e->flags & EDGE_ABNORMAL))
1268 {
1269 split_loop_exit_edge (e);
73fbfcad 1270 if (dump_enabled_p ())
e645e942 1271 dump_printf (MSG_NOTE, "split exit edge.\n");
ebfd146a
IR
1272 }
1273 else
1274 {
73fbfcad 1275 if (dump_enabled_p ())
e645e942
TJ
1276 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1277 "not vectorized: abnormal loop exit edge.\n");
ebfd146a
IR
1278 if (inner_loop_vinfo)
1279 destroy_loop_vec_info (inner_loop_vinfo, true);
1280 return NULL;
1281 }
1282 }
1283
95b3eff3
RB
1284 loop_cond = vect_get_loop_niters (loop, &number_of_iterations,
1285 &number_of_iterationsm1);
ebfd146a
IR
1286 if (!loop_cond)
1287 {
73fbfcad 1288 if (dump_enabled_p ())
e645e942
TJ
1289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1290 "not vectorized: complicated exit condition.\n");
ebfd146a
IR
1291 if (inner_loop_vinfo)
1292 destroy_loop_vec_info (inner_loop_vinfo, true);
1293 return NULL;
1294 }
b8698a0f 1295
15e693cc
RB
1296 if (!number_of_iterations
1297 || chrec_contains_undetermined (number_of_iterations))
ebfd146a 1298 {
73fbfcad 1299 if (dump_enabled_p ())
e645e942 1300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1301 "not vectorized: number of iterations cannot be "
e645e942 1302 "computed.\n");
ebfd146a
IR
1303 if (inner_loop_vinfo)
1304 destroy_loop_vec_info (inner_loop_vinfo, true);
1305 return NULL;
1306 }
1307
15e693cc 1308 if (integer_zerop (number_of_iterations))
ebfd146a 1309 {
73fbfcad 1310 if (dump_enabled_p ())
15e693cc
RB
1311 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1312 "not vectorized: number of iterations = 0.\n");
ebfd146a 1313 if (inner_loop_vinfo)
15e693cc 1314 destroy_loop_vec_info (inner_loop_vinfo, true);
ebfd146a
IR
1315 return NULL;
1316 }
1317
15e693cc 1318 loop_vinfo = new_loop_vec_info (loop);
95b3eff3 1319 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
15e693cc
RB
1320 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1321 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1322
1323 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
ebfd146a 1324 {
73fbfcad 1325 if (dump_enabled_p ())
ebfd146a 1326 {
78c60e3d
SS
1327 dump_printf_loc (MSG_NOTE, vect_location,
1328 "Symbolic number of iterations is ");
1329 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
e645e942 1330 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1331 }
1332 }
ebfd146a
IR
1333
1334 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1335
1336 /* CHECKME: May want to keep it around it in the future. */
1337 if (inner_loop_vinfo)
1338 destroy_loop_vec_info (inner_loop_vinfo, false);
1339
1340 gcc_assert (!loop->aux);
1341 loop->aux = loop_vinfo;
1342 return loop_vinfo;
1343}
1344
8644a673
IR
1345
1346/* Function vect_analyze_loop_operations.
1347
1348 Scan the loop stmts and make sure they are all vectorizable. */
1349
1350static bool
437f4a00 1351vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
8644a673
IR
1352{
1353 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1354 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1355 int nbbs = loop->num_nodes;
8644a673
IR
1356 unsigned int vectorization_factor = 0;
1357 int i;
8644a673
IR
1358 stmt_vec_info stmt_info;
1359 bool need_to_vectorize = false;
1360 int min_profitable_iters;
1361 int min_scalar_loop_bound;
1362 unsigned int th;
1363 bool only_slp_in_loop = true, ok;
552fd8e6 1364 HOST_WIDE_INT max_niter;
9ed313cc
JH
1365 HOST_WIDE_INT estimated_niter;
1366 int min_profitable_estimate;
8644a673 1367
73fbfcad 1368 if (dump_enabled_p ())
78c60e3d 1369 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1370 "=== vect_analyze_loop_operations ===\n");
8644a673
IR
1371
1372 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1373 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
437f4a00
IR
1374 if (slp)
1375 {
1376 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1377 vectorization factor of the loop is the unrolling factor required by
1378 the SLP instances. If that unrolling factor is 1, we say, that we
1379 perform pure SLP on loop - cross iteration parallelism is not
1380 exploited. */
1381 for (i = 0; i < nbbs; i++)
1382 {
1383 basic_block bb = bbs[i];
538dd0b7
DM
1384 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1385 gsi_next (&si))
437f4a00
IR
1386 {
1387 gimple stmt = gsi_stmt (si);
1388 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1389 gcc_assert (stmt_info);
1390 if ((STMT_VINFO_RELEVANT_P (stmt_info)
1391 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1392 && !PURE_SLP_STMT (stmt_info))
1393 /* STMT needs both SLP and loop-based vectorization. */
1394 only_slp_in_loop = false;
1395 }
1396 }
1397
1398 if (only_slp_in_loop)
1399 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1400 else
1401 vectorization_factor = least_common_multiple (vectorization_factor,
1402 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1403
1404 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
73fbfcad 1405 if (dump_enabled_p ())
78c60e3d 1406 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1407 "Updating vectorization factor to %d\n",
78c60e3d 1408 vectorization_factor);
437f4a00 1409 }
8644a673
IR
1410
1411 for (i = 0; i < nbbs; i++)
1412 {
1413 basic_block bb = bbs[i];
1414
538dd0b7
DM
1415 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
1416 gsi_next (&si))
8644a673 1417 {
538dd0b7 1418 gphi *phi = si.phi ();
8644a673
IR
1419 ok = true;
1420
1421 stmt_info = vinfo_for_stmt (phi);
73fbfcad 1422 if (dump_enabled_p ())
8644a673 1423 {
78c60e3d
SS
1424 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1425 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 1426 dump_printf (MSG_NOTE, "\n");
8644a673
IR
1427 }
1428
86c8d1f6
IR
1429 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1430 (i.e., a phi in the tail of the outer-loop). */
8644a673
IR
1431 if (! is_loop_header_bb_p (bb))
1432 {
86c8d1f6 1433 /* FORNOW: we currently don't support the case that these phis
06066f92 1434 are not used in the outerloop (unless it is double reduction,
b8698a0f 1435 i.e., this phi is vect_reduction_def), cause this case
06066f92
IR
1436 requires to actually do something here. */
1437 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1438 || STMT_VINFO_LIVE_P (stmt_info))
b8698a0f 1439 && STMT_VINFO_DEF_TYPE (stmt_info)
06066f92 1440 != vect_double_reduction_def)
8644a673 1441 {
73fbfcad 1442 if (dump_enabled_p ())
e645e942 1443 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1444 "Unsupported loop-closed phi in "
e645e942 1445 "outer-loop.\n");
8644a673
IR
1446 return false;
1447 }
86c8d1f6
IR
1448
1449 /* If PHI is used in the outer loop, we check that its operand
1450 is defined in the inner loop. */
1451 if (STMT_VINFO_RELEVANT_P (stmt_info))
1452 {
1453 tree phi_op;
1454 gimple op_def_stmt;
1455
1456 if (gimple_phi_num_args (phi) != 1)
1457 return false;
1458
1459 phi_op = PHI_ARG_DEF (phi, 0);
1460 if (TREE_CODE (phi_op) != SSA_NAME)
1461 return false;
1462
1463 op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
c370d99f 1464 if (gimple_nop_p (op_def_stmt)
017a2020
JJ
1465 || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt))
1466 || !vinfo_for_stmt (op_def_stmt))
86c8d1f6
IR
1467 return false;
1468
1469 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1470 != vect_used_in_outer
1471 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1472 != vect_used_in_outer_by_reduction)
1473 return false;
1474 }
1475
8644a673
IR
1476 continue;
1477 }
1478
1479 gcc_assert (stmt_info);
1480
1481 if (STMT_VINFO_LIVE_P (stmt_info))
1482 {
1483 /* FORNOW: not yet supported. */
73fbfcad 1484 if (dump_enabled_p ())
78c60e3d 1485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1486 "not vectorized: value used after loop.\n");
8644a673
IR
1487 return false;
1488 }
1489
1490 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1491 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1492 {
1493 /* A scalar-dependence cycle that we don't support. */
73fbfcad 1494 if (dump_enabled_p ())
e645e942
TJ
1495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1496 "not vectorized: scalar dependence cycle.\n");
8644a673
IR
1497 return false;
1498 }
1499
1500 if (STMT_VINFO_RELEVANT_P (stmt_info))
1501 {
1502 need_to_vectorize = true;
1503 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1504 ok = vectorizable_induction (phi, NULL, NULL);
1505 }
1506
1507 if (!ok)
1508 {
73fbfcad 1509 if (dump_enabled_p ())
8644a673 1510 {
e645e942 1511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1512 "not vectorized: relevant phi not "
1513 "supported: ");
1514 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
e645e942 1515 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8644a673 1516 }
c3e7ee41 1517 return false;
8644a673
IR
1518 }
1519 }
1520
538dd0b7
DM
1521 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1522 gsi_next (&si))
8644a673
IR
1523 {
1524 gimple stmt = gsi_stmt (si);
fbd7e877
RB
1525 if (!gimple_clobber_p (stmt)
1526 && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
8644a673 1527 return false;
b8698a0f 1528 }
8644a673
IR
1529 } /* bbs */
1530
1531 /* All operations in the loop are either irrelevant (deal with loop
1532 control, or dead), or only used outside the loop and can be moved
1533 out of the loop (e.g. invariants, inductions). The loop can be
1534 optimized away by scalar optimizations. We're better off not
1535 touching this loop. */
1536 if (!need_to_vectorize)
1537 {
73fbfcad 1538 if (dump_enabled_p ())
78c60e3d 1539 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1540 "All the computation can be taken out of the loop.\n");
73fbfcad 1541 if (dump_enabled_p ())
e645e942 1542 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1543 "not vectorized: redundant loop. no profit to "
e645e942 1544 "vectorize.\n");
8644a673
IR
1545 return false;
1546 }
1547
73fbfcad 1548 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
78c60e3d
SS
1549 dump_printf_loc (MSG_NOTE, vect_location,
1550 "vectorization_factor = %d, niters = "
e645e942 1551 HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor,
78c60e3d 1552 LOOP_VINFO_INT_NITERS (loop_vinfo));
8644a673 1553
552fd8e6
RG
1554 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1555 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1556 || ((max_niter = max_stmt_executions_int (loop)) != -1
e1b435ad 1557 && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
8644a673 1558 {
73fbfcad 1559 if (dump_enabled_p ())
78c60e3d 1560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1561 "not vectorized: iteration count too small.\n");
73fbfcad 1562 if (dump_enabled_p ())
78c60e3d
SS
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1564 "not vectorized: iteration count smaller than "
e645e942 1565 "vectorization factor.\n");
8644a673
IR
1566 return false;
1567 }
1568
ff802fa1 1569 /* Analyze cost. Decide if worth while to vectorize. */
8644a673
IR
1570
1571 /* Once VF is set, SLP costs should be updated since the number of created
1572 vector stmts depends on VF. */
1573 vect_update_slp_costs_according_to_vf (loop_vinfo);
1574
9ed313cc
JH
1575 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1576 &min_profitable_estimate);
8644a673
IR
1577 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1578
1579 if (min_profitable_iters < 0)
1580 {
73fbfcad 1581 if (dump_enabled_p ())
78c60e3d 1582 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1583 "not vectorized: vectorization not profitable.\n");
73fbfcad 1584 if (dump_enabled_p ())
e645e942 1585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1586 "not vectorized: vector version will never be "
e645e942 1587 "profitable.\n");
8644a673
IR
1588 return false;
1589 }
1590
1591 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1592 * vectorization_factor) - 1);
1593
9ed313cc 1594
8644a673
IR
1595 /* Use the cost model only if it is more conservative than user specified
1596 threshold. */
1597
1598 th = (unsigned) min_scalar_loop_bound;
1599 if (min_profitable_iters
1600 && (!min_scalar_loop_bound
1601 || min_profitable_iters > min_scalar_loop_bound))
1602 th = (unsigned) min_profitable_iters;
1603
090cd8dc
CH
1604 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
1605
8644a673
IR
1606 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1607 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1608 {
73fbfcad 1609 if (dump_enabled_p ())
78c60e3d 1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1611 "not vectorized: vectorization not profitable.\n");
73fbfcad 1612 if (dump_enabled_p ())
78c60e3d
SS
1613 dump_printf_loc (MSG_NOTE, vect_location,
1614 "not vectorized: iteration count smaller than user "
1615 "specified loop bound parameter or minimum profitable "
e645e942 1616 "iterations (whichever is more conservative).\n");
8644a673
IR
1617 return false;
1618 }
1619
9ed313cc
JH
1620 if ((estimated_niter = estimated_stmt_executions_int (loop)) != -1
1621 && ((unsigned HOST_WIDE_INT) estimated_niter
1622 <= MAX (th, (unsigned)min_profitable_estimate)))
1623 {
73fbfcad 1624 if (dump_enabled_p ())
9ed313cc
JH
1625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1626 "not vectorized: estimated iteration count too "
e645e942 1627 "small.\n");
73fbfcad 1628 if (dump_enabled_p ())
9ed313cc
JH
1629 dump_printf_loc (MSG_NOTE, vect_location,
1630 "not vectorized: estimated iteration count smaller "
1631 "than specified loop bound parameter or minimum "
1632 "profitable iterations (whichever is more "
e645e942 1633 "conservative).\n");
9ed313cc
JH
1634 return false;
1635 }
1636
8644a673
IR
1637 return true;
1638}
1639
1640
bb67d9c7 1641/* Function vect_analyze_loop_2.
ebfd146a
IR
1642
1643 Apply a set of analyses on LOOP, and create a loop_vec_info struct
ff802fa1 1644 for it. The different analyses will record information in the
ebfd146a 1645 loop_vec_info struct. */
bb67d9c7
RG
1646static bool
1647vect_analyze_loop_2 (loop_vec_info loop_vinfo)
ebfd146a 1648{
5bfdb7d8 1649 bool ok, slp = false;
777e1f09
RG
1650 int max_vf = MAX_VECTORIZATION_FACTOR;
1651 int min_vf = 2;
090cd8dc 1652 unsigned int th;
1428105c 1653 unsigned int n_stmts = 0;
ebfd146a 1654
ebfd146a 1655 /* Find all data references in the loop (which correspond to vdefs/vuses)
777e1f09
RG
1656 and analyze their evolution in the loop. Also adjust the minimal
1657 vectorization factor according to the loads and stores.
ebfd146a
IR
1658
1659 FORNOW: Handle only simple, array references, which
1660 alignment can be forced, and aligned pointer-references. */
1661
1428105c 1662 ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf, &n_stmts);
ebfd146a
IR
1663 if (!ok)
1664 {
73fbfcad 1665 if (dump_enabled_p ())
78c60e3d 1666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1667 "bad data references.\n");
bb67d9c7 1668 return false;
ebfd146a
IR
1669 }
1670
e8c504f1
RB
1671 /* Classify all cross-iteration scalar data-flow cycles.
1672 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1673
1674 vect_analyze_scalar_cycles (loop_vinfo);
1675
1676 vect_pattern_recog (loop_vinfo, NULL);
1677
5abe1e05
RB
1678 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1679 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1680
1681 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1682 if (!ok)
1683 {
1684 if (dump_enabled_p ())
1685 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1686 "bad data access.\n");
5abe1e05
RB
1687 return false;
1688 }
1689
ebfd146a
IR
1690 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1691
1692 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1693 if (!ok)
1694 {
73fbfcad 1695 if (dump_enabled_p ())
78c60e3d 1696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1697 "unexpected pattern.\n");
bb67d9c7 1698 return false;
ebfd146a
IR
1699 }
1700
777e1f09
RG
1701 /* Analyze data dependences between the data-refs in the loop
1702 and adjust the maximum vectorization factor according to
1703 the dependences.
1704 FORNOW: fail at the first data dependence that we encounter. */
ebfd146a 1705
5abe1e05 1706 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
777e1f09
RG
1707 if (!ok
1708 || max_vf < min_vf)
ebfd146a 1709 {
73fbfcad 1710 if (dump_enabled_p ())
78c60e3d 1711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1712 "bad data dependence.\n");
bb67d9c7 1713 return false;
ebfd146a
IR
1714 }
1715
1716 ok = vect_determine_vectorization_factor (loop_vinfo);
1717 if (!ok)
1718 {
73fbfcad 1719 if (dump_enabled_p ())
78c60e3d 1720 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1721 "can't determine vectorization factor.\n");
bb67d9c7 1722 return false;
ebfd146a 1723 }
777e1f09
RG
1724 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1725 {
73fbfcad 1726 if (dump_enabled_p ())
78c60e3d 1727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1728 "bad data dependence.\n");
bb67d9c7 1729 return false;
777e1f09 1730 }
ebfd146a 1731
777e1f09
RG
1732 /* Analyze the alignment of the data-refs in the loop.
1733 Fail if a data reference is found that cannot be vectorized. */
ebfd146a 1734
777e1f09 1735 ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1736 if (!ok)
1737 {
73fbfcad 1738 if (dump_enabled_p ())
78c60e3d 1739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1740 "bad data alignment.\n");
bb67d9c7 1741 return false;
ebfd146a
IR
1742 }
1743
ebfd146a
IR
1744 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1745 It is important to call pruning after vect_analyze_data_ref_accesses,
1746 since we use grouping information gathered by interleaving analysis. */
1747 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1748 if (!ok)
1749 {
73fbfcad 1750 if (dump_enabled_p ())
78c60e3d 1751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
d55d9ed0
RB
1752 "number of versioning for alias "
1753 "run-time tests exceeds %d "
1754 "(--param vect-max-version-for-alias-checks)\n",
1755 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
bb67d9c7 1756 return false;
ebfd146a
IR
1757 }
1758
ebfd146a
IR
1759 /* This pass will decide on using loop versioning and/or loop peeling in
1760 order to enhance the alignment of data references in the loop. */
1761
1762 ok = vect_enhance_data_refs_alignment (loop_vinfo);
1763 if (!ok)
1764 {
73fbfcad 1765 if (dump_enabled_p ())
78c60e3d 1766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1767 "bad data alignment.\n");
bb67d9c7 1768 return false;
ebfd146a
IR
1769 }
1770
720f5239 1771 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1428105c 1772 ok = vect_analyze_slp (loop_vinfo, NULL, n_stmts);
720f5239
IR
1773 if (ok)
1774 {
1775 /* Decide which possible SLP instances to SLP. */
437f4a00 1776 slp = vect_make_slp_decision (loop_vinfo);
720f5239
IR
1777
1778 /* Find stmts that need to be both vectorized and SLPed. */
1779 vect_detect_hybrid_slp (loop_vinfo);
1780 }
b010117a
IR
1781 else
1782 return false;
720f5239 1783
ebfd146a
IR
1784 /* Scan all the operations in the loop and make sure they are
1785 vectorizable. */
1786
437f4a00 1787 ok = vect_analyze_loop_operations (loop_vinfo, slp);
ebfd146a
IR
1788 if (!ok)
1789 {
73fbfcad 1790 if (dump_enabled_p ())
78c60e3d 1791 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1792 "bad operation or unsupported loop bound.\n");
bb67d9c7
RG
1793 return false;
1794 }
1795
15e693cc
RB
1796 /* Decide whether we need to create an epilogue loop to handle
1797 remaining scalar iterations. */
090cd8dc
CH
1798 th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) + 1)
1799 / LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1800 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1801
15e693cc
RB
1802 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1803 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1804 {
1805 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo)
1806 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
1807 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1808 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1809 }
1810 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1811 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
090cd8dc
CH
1812 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1813 /* In case of versioning, check if the maximum number of
1814 iterations is greater than th. If they are identical,
1815 the epilogue is unnecessary. */
1816 && ((!LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)
1817 && !LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1818 || (unsigned HOST_WIDE_INT)max_stmt_executions_int
1819 (LOOP_VINFO_LOOP (loop_vinfo)) > th)))
15e693cc
RB
1820 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
1821
1822 /* If an epilogue loop is required make sure we can create one. */
1823 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
1824 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
1825 {
1826 if (dump_enabled_p ())
1827 dump_printf_loc (MSG_NOTE, vect_location, "epilog loop required\n");
1828 if (!vect_can_advance_ivs_p (loop_vinfo)
1829 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
1830 single_exit (LOOP_VINFO_LOOP
1831 (loop_vinfo))))
1832 {
1833 if (dump_enabled_p ())
1834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1835 "not vectorized: can't create required "
1836 "epilog loop\n");
1837 return false;
1838 }
1839 }
1840
bb67d9c7
RG
1841 return true;
1842}
1843
1844/* Function vect_analyze_loop.
1845
1846 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1847 for it. The different analyses will record information in the
1848 loop_vec_info struct. */
1849loop_vec_info
1850vect_analyze_loop (struct loop *loop)
1851{
1852 loop_vec_info loop_vinfo;
1853 unsigned int vector_sizes;
1854
1855 /* Autodetect first vector size we try. */
1856 current_vector_size = 0;
1857 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1858
73fbfcad 1859 if (dump_enabled_p ())
78c60e3d 1860 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1861 "===== analyze_loop_nest =====\n");
bb67d9c7
RG
1862
1863 if (loop_outer (loop)
1864 && loop_vec_info_for_loop (loop_outer (loop))
1865 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1866 {
73fbfcad 1867 if (dump_enabled_p ())
78c60e3d 1868 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1869 "outer-loop already vectorized.\n");
ebfd146a
IR
1870 return NULL;
1871 }
1872
bb67d9c7
RG
1873 while (1)
1874 {
1875 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1876 loop_vinfo = vect_analyze_loop_form (loop);
1877 if (!loop_vinfo)
1878 {
73fbfcad 1879 if (dump_enabled_p ())
78c60e3d 1880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1881 "bad loop form.\n");
bb67d9c7
RG
1882 return NULL;
1883 }
ebfd146a 1884
bb67d9c7
RG
1885 if (vect_analyze_loop_2 (loop_vinfo))
1886 {
1887 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1888
1889 return loop_vinfo;
1890 }
1891
1892 destroy_loop_vec_info (loop_vinfo, true);
1893
1894 vector_sizes &= ~current_vector_size;
1895 if (vector_sizes == 0
1896 || current_vector_size == 0)
1897 return NULL;
1898
1899 /* Try the next biggest vector size. */
1900 current_vector_size = 1 << floor_log2 (vector_sizes);
73fbfcad 1901 if (dump_enabled_p ())
78c60e3d
SS
1902 dump_printf_loc (MSG_NOTE, vect_location,
1903 "***** Re-trying analysis with "
1904 "vector size %d\n", current_vector_size);
bb67d9c7 1905 }
ebfd146a
IR
1906}
1907
1908
1909/* Function reduction_code_for_scalar_code
1910
1911 Input:
1912 CODE - tree_code of a reduction operations.
1913
1914 Output:
1915 REDUC_CODE - the corresponding tree-code to be used to reduce the
99f76d9b
AL
1916 vector of partial results into a single scalar result, or ERROR_MARK
1917 if the operation is a supported reduction operation, but does not have
1918 such a tree-code.
ebfd146a 1919
06066f92 1920 Return FALSE if CODE currently cannot be vectorized as reduction. */
ebfd146a
IR
1921
1922static bool
1923reduction_code_for_scalar_code (enum tree_code code,
1924 enum tree_code *reduc_code)
1925{
1926 switch (code)
06066f92
IR
1927 {
1928 case MAX_EXPR:
1929 *reduc_code = REDUC_MAX_EXPR;
1930 return true;
ebfd146a 1931
06066f92
IR
1932 case MIN_EXPR:
1933 *reduc_code = REDUC_MIN_EXPR;
1934 return true;
ebfd146a 1935
06066f92
IR
1936 case PLUS_EXPR:
1937 *reduc_code = REDUC_PLUS_EXPR;
1938 return true;
ebfd146a 1939
06066f92
IR
1940 case MULT_EXPR:
1941 case MINUS_EXPR:
1942 case BIT_IOR_EXPR:
1943 case BIT_XOR_EXPR:
1944 case BIT_AND_EXPR:
1945 *reduc_code = ERROR_MARK;
1946 return true;
1947
1948 default:
1949 return false;
1950 }
ebfd146a
IR
1951}
1952
1953
ff802fa1 1954/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
ebfd146a
IR
1955 STMT is printed with a message MSG. */
1956
1957static void
78c60e3d 1958report_vect_op (int msg_type, gimple stmt, const char *msg)
ebfd146a 1959{
78c60e3d
SS
1960 dump_printf_loc (msg_type, vect_location, "%s", msg);
1961 dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
e645e942 1962 dump_printf (msg_type, "\n");
ebfd146a
IR
1963}
1964
1965
b010117a
IR
1966/* Detect SLP reduction of the form:
1967
1968 #a1 = phi <a5, a0>
1969 a2 = operation (a1)
1970 a3 = operation (a2)
1971 a4 = operation (a3)
1972 a5 = operation (a4)
1973
1974 #a = phi <a5>
1975
1976 PHI is the reduction phi node (#a1 = phi <a5, a0> above)
1977 FIRST_STMT is the first reduction stmt in the chain
1978 (a2 = operation (a1)).
1979
1980 Return TRUE if a reduction chain was detected. */
1981
1982static bool
1983vect_is_slp_reduction (loop_vec_info loop_info, gimple phi, gimple first_stmt)
1984{
1985 struct loop *loop = (gimple_bb (phi))->loop_father;
1986 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1987 enum tree_code code;
29f85237 1988 gimple current_stmt = NULL, loop_use_stmt = NULL, first, next_stmt;
b010117a
IR
1989 stmt_vec_info use_stmt_info, current_stmt_info;
1990 tree lhs;
1991 imm_use_iterator imm_iter;
1992 use_operand_p use_p;
67f2d54f 1993 int nloop_uses, size = 0, n_out_of_loop_uses;
b010117a
IR
1994 bool found = false;
1995
1996 if (loop != vect_loop)
1997 return false;
1998
1999 lhs = PHI_RESULT (phi);
2000 code = gimple_assign_rhs_code (first_stmt);
2001 while (1)
2002 {
2003 nloop_uses = 0;
67f2d54f 2004 n_out_of_loop_uses = 0;
b010117a
IR
2005 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
2006 {
29f85237 2007 gimple use_stmt = USE_STMT (use_p);
b010d601
JJ
2008 if (is_gimple_debug (use_stmt))
2009 continue;
29f85237 2010
b010117a 2011 /* Check if we got back to the reduction phi. */
29f85237 2012 if (use_stmt == phi)
b010117a 2013 {
29f85237 2014 loop_use_stmt = use_stmt;
b010117a
IR
2015 found = true;
2016 break;
2017 }
2018
67f2d54f
IR
2019 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2020 {
2021 if (vinfo_for_stmt (use_stmt)
2022 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
2023 {
2024 loop_use_stmt = use_stmt;
2025 nloop_uses++;
2026 }
2027 }
2028 else
2029 n_out_of_loop_uses++;
b010117a 2030
67f2d54f
IR
2031 /* There are can be either a single use in the loop or two uses in
2032 phi nodes. */
2033 if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
2034 return false;
b010117a
IR
2035 }
2036
2037 if (found)
2038 break;
2039
29f85237
JJ
2040 /* We reached a statement with no loop uses. */
2041 if (nloop_uses == 0)
2042 return false;
2043
b010117a 2044 /* This is a loop exit phi, and we haven't reached the reduction phi. */
29f85237 2045 if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
b010117a
IR
2046 return false;
2047
29f85237
JJ
2048 if (!is_gimple_assign (loop_use_stmt)
2049 || code != gimple_assign_rhs_code (loop_use_stmt)
2050 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
b010117a
IR
2051 return false;
2052
2053 /* Insert USE_STMT into reduction chain. */
29f85237 2054 use_stmt_info = vinfo_for_stmt (loop_use_stmt);
b010117a
IR
2055 if (current_stmt)
2056 {
2057 current_stmt_info = vinfo_for_stmt (current_stmt);
29f85237 2058 GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt;
b010117a
IR
2059 GROUP_FIRST_ELEMENT (use_stmt_info)
2060 = GROUP_FIRST_ELEMENT (current_stmt_info);
2061 }
2062 else
29f85237 2063 GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt;
b010117a 2064
29f85237
JJ
2065 lhs = gimple_assign_lhs (loop_use_stmt);
2066 current_stmt = loop_use_stmt;
b010117a
IR
2067 size++;
2068 }
2069
29f85237 2070 if (!found || loop_use_stmt != phi || size < 2)
b010117a
IR
2071 return false;
2072
b010117a
IR
2073 /* Swap the operands, if needed, to make the reduction operand be the second
2074 operand. */
2075 lhs = PHI_RESULT (phi);
69940d4d
IR
2076 next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
2077 while (next_stmt)
b010117a 2078 {
29f85237 2079 if (gimple_assign_rhs2 (next_stmt) == lhs)
69940d4d 2080 {
29f85237
JJ
2081 tree op = gimple_assign_rhs1 (next_stmt);
2082 gimple def_stmt = NULL;
2083
2084 if (TREE_CODE (op) == SSA_NAME)
2085 def_stmt = SSA_NAME_DEF_STMT (op);
2086
2087 /* Check that the other def is either defined in the loop
2088 ("vect_internal_def"), or it's an induction (defined by a
2089 loop-header phi-node). */
2090 if (def_stmt
b5066d8b 2091 && gimple_bb (def_stmt)
29f85237
JJ
2092 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2093 && (is_gimple_assign (def_stmt)
2094 || is_gimple_call (def_stmt)
2095 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2096 == vect_induction_def
2097 || (gimple_code (def_stmt) == GIMPLE_PHI
2098 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2099 == vect_internal_def
2100 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
69940d4d 2101 {
29f85237
JJ
2102 lhs = gimple_assign_lhs (next_stmt);
2103 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
2104 continue;
2105 }
2106
2107 return false;
2108 }
2109 else
2110 {
2111 tree op = gimple_assign_rhs2 (next_stmt);
2112 gimple def_stmt = NULL;
2113
2114 if (TREE_CODE (op) == SSA_NAME)
2115 def_stmt = SSA_NAME_DEF_STMT (op);
2116
2117 /* Check that the other def is either defined in the loop
2118 ("vect_internal_def"), or it's an induction (defined by a
2119 loop-header phi-node). */
2120 if (def_stmt
b5066d8b 2121 && gimple_bb (def_stmt)
29f85237
JJ
2122 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2123 && (is_gimple_assign (def_stmt)
2124 || is_gimple_call (def_stmt)
2125 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
69940d4d 2126 == vect_induction_def
29f85237
JJ
2127 || (gimple_code (def_stmt) == GIMPLE_PHI
2128 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
69940d4d 2129 == vect_internal_def
29f85237
JJ
2130 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2131 {
73fbfcad 2132 if (dump_enabled_p ())
69940d4d 2133 {
78c60e3d
SS
2134 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: ");
2135 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
e645e942 2136 dump_printf (MSG_NOTE, "\n");
69940d4d
IR
2137 }
2138
80560f95
AM
2139 swap_ssa_operands (next_stmt,
2140 gimple_assign_rhs1_ptr (next_stmt),
2141 gimple_assign_rhs2_ptr (next_stmt));
6ba84010 2142 update_stmt (next_stmt);
3393a711
BS
2143
2144 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
2145 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
69940d4d
IR
2146 }
2147 else
29f85237 2148 return false;
b010117a
IR
2149 }
2150
69940d4d
IR
2151 lhs = gimple_assign_lhs (next_stmt);
2152 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
b010117a
IR
2153 }
2154
69940d4d
IR
2155 /* Save the chain for further analysis in SLP detection. */
2156 first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt));
9771b263 2157 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first);
69940d4d
IR
2158 GROUP_SIZE (vinfo_for_stmt (first)) = size;
2159
b010117a
IR
2160 return true;
2161}
2162
2163
8a9ecffd 2164/* Function vect_is_simple_reduction_1
ebfd146a 2165
06066f92 2166 (1) Detect a cross-iteration def-use cycle that represents a simple
ff802fa1 2167 reduction computation. We look for the following pattern:
ebfd146a
IR
2168
2169 loop_header:
2170 a1 = phi < a0, a2 >
2171 a3 = ...
2172 a2 = operation (a3, a1)
b8698a0f 2173
69b8f2f9
CH
2174 or
2175
2176 a3 = ...
2177 loop_header:
2178 a1 = phi < a0, a2 >
2179 a2 = operation (a3, a1)
2180
ebfd146a 2181 such that:
b8698a0f 2182 1. operation is commutative and associative and it is safe to
7c5222ff 2183 change the order of the computation (if CHECK_REDUCTION is true)
ebfd146a 2184 2. no uses for a2 in the loop (a2 is used out of the loop)
210dedfe
IR
2185 3. no uses of a1 in the loop besides the reduction operation
2186 4. no uses of a1 outside the loop.
ebfd146a 2187
210dedfe 2188 Conditions 1,4 are tested here.
b8698a0f 2189 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
7c5222ff 2190
b8698a0f
L
2191 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
2192 nested cycles, if CHECK_REDUCTION is false.
06066f92
IR
2193
2194 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
2195 reductions:
2196
2197 a1 = phi < a0, a2 >
2198 inner loop (def of a3)
b8698a0f 2199 a2 = phi < a3 >
8a9ecffd
MM
2200
2201 If MODIFY is true it tries also to rework the code in-place to enable
2202 detection of more reduction patterns. For the time being we rewrite
2203 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
06066f92 2204*/
ebfd146a 2205
8a9ecffd
MM
2206static gimple
2207vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
2208 bool check_reduction, bool *double_reduc,
2209 bool modify)
ebfd146a
IR
2210{
2211 struct loop *loop = (gimple_bb (phi))->loop_father;
2212 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2213 edge latch_e = loop_latch_edge (loop);
2214 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
4bbe8262 2215 gimple def_stmt, def1 = NULL, def2 = NULL;
8a9ecffd 2216 enum tree_code orig_code, code;
4bbe8262 2217 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
ebfd146a
IR
2218 tree type;
2219 int nloop_uses;
2220 tree name;
2221 imm_use_iterator imm_iter;
2222 use_operand_p use_p;
06066f92
IR
2223 bool phi_def;
2224
2225 *double_reduc = false;
ebfd146a 2226
7c5222ff
IR
2227 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
2228 otherwise, we assume outer loop vectorization. */
b8698a0f 2229 gcc_assert ((check_reduction && loop == vect_loop)
7c5222ff 2230 || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
ebfd146a
IR
2231
2232 name = PHI_RESULT (phi);
b75f28e1
RB
2233 /* ??? If there are no uses of the PHI result the inner loop reduction
2234 won't be detected as possibly double-reduction by vectorizable_reduction
2235 because that tries to walk the PHI arg from the preheader edge which
2236 can be constant. See PR60382. */
2237 if (has_zero_uses (name))
2238 return NULL;
ebfd146a
IR
2239 nloop_uses = 0;
2240 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2241 {
2242 gimple use_stmt = USE_STMT (use_p);
b5b8b0ac
AO
2243 if (is_gimple_debug (use_stmt))
2244 continue;
210dedfe
IR
2245
2246 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2247 {
73fbfcad 2248 if (dump_enabled_p ())
78c60e3d 2249 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2250 "intermediate value used outside loop.\n");
210dedfe
IR
2251
2252 return NULL;
2253 }
2254
2255 if (vinfo_for_stmt (use_stmt)
ebfd146a
IR
2256 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2257 nloop_uses++;
2258 if (nloop_uses > 1)
2259 {
73fbfcad 2260 if (dump_enabled_p ())
78c60e3d 2261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2262 "reduction used in loop.\n");
ebfd146a
IR
2263 return NULL;
2264 }
2265 }
2266
2267 if (TREE_CODE (loop_arg) != SSA_NAME)
2268 {
73fbfcad 2269 if (dump_enabled_p ())
ebfd146a 2270 {
78c60e3d
SS
2271 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2272 "reduction: not ssa_name: ");
2273 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
e645e942 2274 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2275 }
2276 return NULL;
2277 }
2278
2279 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2280 if (!def_stmt)
2281 {
73fbfcad 2282 if (dump_enabled_p ())
78c60e3d 2283 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2284 "reduction: no def_stmt.\n");
ebfd146a
IR
2285 return NULL;
2286 }
2287
06066f92 2288 if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
ebfd146a 2289 {
73fbfcad 2290 if (dump_enabled_p ())
e645e942
TJ
2291 {
2292 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2293 dump_printf (MSG_NOTE, "\n");
2294 }
ebfd146a
IR
2295 return NULL;
2296 }
2297
06066f92
IR
2298 if (is_gimple_assign (def_stmt))
2299 {
2300 name = gimple_assign_lhs (def_stmt);
2301 phi_def = false;
2302 }
2303 else
2304 {
2305 name = PHI_RESULT (def_stmt);
2306 phi_def = true;
2307 }
2308
ebfd146a
IR
2309 nloop_uses = 0;
2310 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2311 {
2312 gimple use_stmt = USE_STMT (use_p);
b5b8b0ac
AO
2313 if (is_gimple_debug (use_stmt))
2314 continue;
ebfd146a
IR
2315 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
2316 && vinfo_for_stmt (use_stmt)
2317 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2318 nloop_uses++;
2319 if (nloop_uses > 1)
2320 {
73fbfcad 2321 if (dump_enabled_p ())
78c60e3d 2322 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2323 "reduction used in loop.\n");
ebfd146a
IR
2324 return NULL;
2325 }
2326 }
2327
06066f92
IR
2328 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2329 defined in the inner loop. */
2330 if (phi_def)
2331 {
2332 op1 = PHI_ARG_DEF (def_stmt, 0);
2333
2334 if (gimple_phi_num_args (def_stmt) != 1
2335 || TREE_CODE (op1) != SSA_NAME)
2336 {
73fbfcad 2337 if (dump_enabled_p ())
78c60e3d 2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 2339 "unsupported phi node definition.\n");
06066f92
IR
2340
2341 return NULL;
2342 }
2343
b8698a0f 2344 def1 = SSA_NAME_DEF_STMT (op1);
d4759fc6
FY
2345 if (gimple_bb (def1)
2346 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
06066f92
IR
2347 && loop->inner
2348 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2349 && is_gimple_assign (def1))
2350 {
73fbfcad 2351 if (dump_enabled_p ())
78c60e3d
SS
2352 report_vect_op (MSG_NOTE, def_stmt,
2353 "detected double reduction: ");
b8698a0f 2354
06066f92
IR
2355 *double_reduc = true;
2356 return def_stmt;
2357 }
2358
2359 return NULL;
2360 }
2361
8a9ecffd
MM
2362 code = orig_code = gimple_assign_rhs_code (def_stmt);
2363
2364 /* We can handle "res -= x[i]", which is non-associative by
2365 simply rewriting this into "res += -x[i]". Avoid changing
2366 gimple instruction for the first simple tests and only do this
2367 if we're allowed to change code at all. */
0532869d
IR
2368 if (code == MINUS_EXPR
2369 && modify
2370 && (op1 = gimple_assign_rhs1 (def_stmt))
2371 && TREE_CODE (op1) == SSA_NAME
2372 && SSA_NAME_DEF_STMT (op1) == phi)
8a9ecffd 2373 code = PLUS_EXPR;
ebfd146a 2374
b8698a0f 2375 if (check_reduction
7c5222ff 2376 && (!commutative_tree_code (code) || !associative_tree_code (code)))
ebfd146a 2377 {
73fbfcad 2378 if (dump_enabled_p ())
78c60e3d
SS
2379 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2380 "reduction: not commutative/associative: ");
ebfd146a
IR
2381 return NULL;
2382 }
2383
b8698a0f 2384 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
ebfd146a 2385 {
4bbe8262
IR
2386 if (code != COND_EXPR)
2387 {
73fbfcad 2388 if (dump_enabled_p ())
78c60e3d
SS
2389 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2390 "reduction: not binary operation: ");
ebfd146a 2391
4bbe8262
IR
2392 return NULL;
2393 }
2394
4e71066d 2395 op3 = gimple_assign_rhs1 (def_stmt);
6f4454fc
IR
2396 if (COMPARISON_CLASS_P (op3))
2397 {
2398 op4 = TREE_OPERAND (op3, 1);
2399 op3 = TREE_OPERAND (op3, 0);
b8698a0f
L
2400 }
2401
4e71066d
RG
2402 op1 = gimple_assign_rhs2 (def_stmt);
2403 op2 = gimple_assign_rhs3 (def_stmt);
4bbe8262
IR
2404
2405 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
2406 {
73fbfcad 2407 if (dump_enabled_p ())
78c60e3d
SS
2408 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2409 "reduction: uses not ssa_names: ");
4bbe8262
IR
2410
2411 return NULL;
2412 }
ebfd146a 2413 }
4bbe8262
IR
2414 else
2415 {
2416 op1 = gimple_assign_rhs1 (def_stmt);
2417 op2 = gimple_assign_rhs2 (def_stmt);
2418
107defbe 2419 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
4bbe8262 2420 {
73fbfcad 2421 if (dump_enabled_p ())
78c60e3d
SS
2422 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2423 "reduction: uses not ssa_names: ");
4bbe8262
IR
2424
2425 return NULL;
2426 }
2427 }
ebfd146a 2428
ebfd146a 2429 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
4bbe8262 2430 if ((TREE_CODE (op1) == SSA_NAME
9600efe1 2431 && !types_compatible_p (type,TREE_TYPE (op1)))
4bbe8262 2432 || (TREE_CODE (op2) == SSA_NAME
9600efe1 2433 && !types_compatible_p (type, TREE_TYPE (op2)))
4bbe8262 2434 || (op3 && TREE_CODE (op3) == SSA_NAME
9600efe1 2435 && !types_compatible_p (type, TREE_TYPE (op3)))
4bbe8262 2436 || (op4 && TREE_CODE (op4) == SSA_NAME
9600efe1 2437 && !types_compatible_p (type, TREE_TYPE (op4))))
ebfd146a 2438 {
73fbfcad 2439 if (dump_enabled_p ())
ebfd146a 2440 {
78c60e3d
SS
2441 dump_printf_loc (MSG_NOTE, vect_location,
2442 "reduction: multiple types: operation type: ");
2443 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
2444 dump_printf (MSG_NOTE, ", operands types: ");
2445 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2446 TREE_TYPE (op1));
2447 dump_printf (MSG_NOTE, ",");
2448 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2449 TREE_TYPE (op2));
6f4454fc 2450 if (op3)
4bbe8262 2451 {
78c60e3d
SS
2452 dump_printf (MSG_NOTE, ",");
2453 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2454 TREE_TYPE (op3));
6f4454fc
IR
2455 }
2456
2457 if (op4)
2458 {
78c60e3d
SS
2459 dump_printf (MSG_NOTE, ",");
2460 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2461 TREE_TYPE (op4));
4bbe8262 2462 }
e645e942 2463 dump_printf (MSG_NOTE, "\n");
ebfd146a 2464 }
4bbe8262 2465
ebfd146a
IR
2466 return NULL;
2467 }
2468
b8698a0f 2469 /* Check that it's ok to change the order of the computation.
7c5222ff 2470 Generally, when vectorizing a reduction we change the order of the
ebfd146a 2471 computation. This may change the behavior of the program in some
b8698a0f 2472 cases, so we need to check that this is ok. One exception is when
ebfd146a
IR
2473 vectorizing an outer-loop: the inner-loop is executed sequentially,
2474 and therefore vectorizing reductions in the inner-loop during
2475 outer-loop vectorization is safe. */
2476
2477 /* CHECKME: check for !flag_finite_math_only too? */
2478 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
b8698a0f 2479 && check_reduction)
ebfd146a
IR
2480 {
2481 /* Changing the order of operations changes the semantics. */
73fbfcad 2482 if (dump_enabled_p ())
78c60e3d
SS
2483 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2484 "reduction: unsafe fp math optimization: ");
ebfd146a
IR
2485 return NULL;
2486 }
2487 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
7c5222ff 2488 && check_reduction)
ebfd146a
IR
2489 {
2490 /* Changing the order of operations changes the semantics. */
73fbfcad 2491 if (dump_enabled_p ())
78c60e3d
SS
2492 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2493 "reduction: unsafe int math optimization: ");
ebfd146a
IR
2494 return NULL;
2495 }
7c5222ff 2496 else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
ebfd146a
IR
2497 {
2498 /* Changing the order of operations changes the semantics. */
73fbfcad 2499 if (dump_enabled_p ())
78c60e3d 2500 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
ebfd146a
IR
2501 "reduction: unsafe fixed-point math optimization: ");
2502 return NULL;
2503 }
2504
8a9ecffd
MM
2505 /* If we detected "res -= x[i]" earlier, rewrite it into
2506 "res += -x[i]" now. If this turns out to be useless reassoc
2507 will clean it up again. */
2508 if (orig_code == MINUS_EXPR)
2509 {
2510 tree rhs = gimple_assign_rhs2 (def_stmt);
b731b390 2511 tree negrhs = make_ssa_name (TREE_TYPE (rhs));
0d0e4a03 2512 gimple negate_stmt = gimple_build_assign (negrhs, NEGATE_EXPR, rhs);
8a9ecffd
MM
2513 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
2514 set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
2515 loop_info, NULL));
2516 gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
2517 gimple_assign_set_rhs2 (def_stmt, negrhs);
2518 gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
2519 update_stmt (def_stmt);
2520 }
2521
7c5222ff 2522 /* Reduction is safe. We're dealing with one of the following:
ebfd146a 2523 1) integer arithmetic and no trapv
7c5222ff
IR
2524 2) floating point arithmetic, and special flags permit this optimization
2525 3) nested cycle (i.e., outer loop vectorization). */
4bbe8262
IR
2526 if (TREE_CODE (op1) == SSA_NAME)
2527 def1 = SSA_NAME_DEF_STMT (op1);
2528
2529 if (TREE_CODE (op2) == SSA_NAME)
2530 def2 = SSA_NAME_DEF_STMT (op2);
2531
b8698a0f 2532 if (code != COND_EXPR
107defbe 2533 && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
ebfd146a 2534 {
73fbfcad 2535 if (dump_enabled_p ())
78c60e3d 2536 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
ebfd146a
IR
2537 return NULL;
2538 }
2539
ebfd146a 2540 /* Check that one def is the reduction def, defined by PHI,
8644a673 2541 the other def is either defined in the loop ("vect_internal_def"),
ebfd146a
IR
2542 or it's an induction (defined by a loop-header phi-node). */
2543
4bbe8262
IR
2544 if (def2 && def2 == phi
2545 && (code == COND_EXPR
107defbe 2546 || !def1 || gimple_nop_p (def1)
69b8f2f9 2547 || !flow_bb_inside_loop_p (loop, gimple_bb (def1))
4bbe8262
IR
2548 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2549 && (is_gimple_assign (def1)
4fc7469a 2550 || is_gimple_call (def1)
b8698a0f 2551 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
4bbe8262
IR
2552 == vect_induction_def
2553 || (gimple_code (def1) == GIMPLE_PHI
b8698a0f 2554 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
4bbe8262
IR
2555 == vect_internal_def
2556 && !is_loop_header_bb_p (gimple_bb (def1)))))))
ebfd146a 2557 {
73fbfcad 2558 if (dump_enabled_p ())
78c60e3d 2559 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
ebfd146a
IR
2560 return def_stmt;
2561 }
b010117a
IR
2562
2563 if (def1 && def1 == phi
2564 && (code == COND_EXPR
107defbe 2565 || !def2 || gimple_nop_p (def2)
69b8f2f9 2566 || !flow_bb_inside_loop_p (loop, gimple_bb (def2))
b010117a
IR
2567 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2568 && (is_gimple_assign (def2)
2569 || is_gimple_call (def2)
2570 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2571 == vect_induction_def
2572 || (gimple_code (def2) == GIMPLE_PHI
2573 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2574 == vect_internal_def
2575 && !is_loop_header_bb_p (gimple_bb (def2)))))))
ebfd146a 2576 {
7c5222ff
IR
2577 if (check_reduction)
2578 {
2579 /* Swap operands (just for simplicity - so that the rest of the code
2580 can assume that the reduction variable is always the last (second)
2581 argument). */
73fbfcad 2582 if (dump_enabled_p ())
78c60e3d 2583 report_vect_op (MSG_NOTE, def_stmt,
7c5222ff
IR
2584 "detected reduction: need to swap operands: ");
2585
80560f95
AM
2586 swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2587 gimple_assign_rhs2_ptr (def_stmt));
3393a711
BS
2588
2589 if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
2590 LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
7c5222ff
IR
2591 }
2592 else
2593 {
73fbfcad 2594 if (dump_enabled_p ())
78c60e3d 2595 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
7c5222ff
IR
2596 }
2597
ebfd146a
IR
2598 return def_stmt;
2599 }
b010117a
IR
2600
2601 /* Try to find SLP reduction chain. */
29f85237 2602 if (check_reduction && vect_is_slp_reduction (loop_info, phi, def_stmt))
ebfd146a 2603 {
73fbfcad 2604 if (dump_enabled_p ())
78c60e3d
SS
2605 report_vect_op (MSG_NOTE, def_stmt,
2606 "reduction: detected reduction chain: ");
7c5222ff 2607
b010117a 2608 return def_stmt;
ebfd146a 2609 }
b010117a 2610
73fbfcad 2611 if (dump_enabled_p ())
78c60e3d
SS
2612 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2613 "reduction: unknown pattern: ");
b010117a
IR
2614
2615 return NULL;
ebfd146a
IR
2616}
2617
8a9ecffd
MM
2618/* Wrapper around vect_is_simple_reduction_1, that won't modify code
2619 in-place. Arguments as there. */
2620
2621static gimple
2622vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2623 bool check_reduction, bool *double_reduc)
2624{
2625 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2626 double_reduc, false);
2627}
2628
2629/* Wrapper around vect_is_simple_reduction_1, which will modify code
2630 in-place if it enables detection of more reductions. Arguments
2631 as there. */
2632
2633gimple
2634vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2635 bool check_reduction, bool *double_reduc)
2636{
2637 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2638 double_reduc, true);
2639}
ebfd146a 2640
720f5239
IR
2641/* Calculate the cost of one scalar iteration of the loop. */
2642int
b6cef4e0 2643vect_get_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
720f5239
IR
2644{
2645 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2646 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2647 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2648 int innerloop_iters, i, stmt_cost;
2649
ff802fa1 2650 /* Count statements in scalar loop. Using this as scalar cost for a single
720f5239
IR
2651 iteration for now.
2652
2653 TODO: Add outer loop support.
2654
2655 TODO: Consider assigning different costs to different scalar
2656 statements. */
2657
2658 /* FORNOW. */
0b3f0088 2659 innerloop_iters = 1;
720f5239
IR
2660 if (loop->inner)
2661 innerloop_iters = 50; /* FIXME */
2662
2663 for (i = 0; i < nbbs; i++)
2664 {
2665 gimple_stmt_iterator si;
2666 basic_block bb = bbs[i];
2667
2668 if (bb->loop_father == loop->inner)
2669 factor = innerloop_iters;
2670 else
2671 factor = 1;
2672
2673 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2674 {
2675 gimple stmt = gsi_stmt (si);
9940b13c 2676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
720f5239
IR
2677
2678 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2679 continue;
2680
9940b13c
IR
2681 /* Skip stmts that are not vectorized inside the loop. */
2682 if (stmt_info
2683 && !STMT_VINFO_RELEVANT_P (stmt_info)
2684 && (!STMT_VINFO_LIVE_P (stmt_info)
8bd37302
BS
2685 || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
2686 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
9940b13c
IR
2687 continue;
2688
720f5239
IR
2689 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2690 {
2691 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
b6cef4e0 2692 stmt_cost = vect_get_stmt_cost (scalar_load);
720f5239 2693 else
b6cef4e0 2694 stmt_cost = vect_get_stmt_cost (scalar_store);
720f5239
IR
2695 }
2696 else
b6cef4e0 2697 stmt_cost = vect_get_stmt_cost (scalar_stmt);
720f5239
IR
2698
2699 scalar_single_iter_cost += stmt_cost * factor;
2700 }
2701 }
2702 return scalar_single_iter_cost;
2703}
2704
2705/* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2706int
2707vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2708 int *peel_iters_epilogue,
92345349
BS
2709 int scalar_single_iter_cost,
2710 stmt_vector_for_cost *prologue_cost_vec,
2711 stmt_vector_for_cost *epilogue_cost_vec)
720f5239 2712{
92345349 2713 int retval = 0;
720f5239
IR
2714 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2715
2716 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2717 {
2718 *peel_iters_epilogue = vf/2;
73fbfcad 2719 if (dump_enabled_p ())
78c60e3d
SS
2720 dump_printf_loc (MSG_NOTE, vect_location,
2721 "cost model: epilogue peel iters set to vf/2 "
e645e942 2722 "because loop iterations are unknown .\n");
720f5239
IR
2723
2724 /* If peeled iterations are known but number of scalar loop
2725 iterations are unknown, count a taken branch per peeled loop. */
92345349
BS
2726 retval = record_stmt_cost (prologue_cost_vec, 2, cond_branch_taken,
2727 NULL, 0, vect_prologue);
720f5239
IR
2728 }
2729 else
2730 {
2731 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2732 peel_iters_prologue = niters < peel_iters_prologue ?
2733 niters : peel_iters_prologue;
2734 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
48df3fa6
IR
2735 /* If we need to peel for gaps, but no peeling is required, we have to
2736 peel VF iterations. */
2737 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
2738 *peel_iters_epilogue = vf;
720f5239
IR
2739 }
2740
92345349
BS
2741 if (peel_iters_prologue)
2742 retval += record_stmt_cost (prologue_cost_vec,
2743 peel_iters_prologue * scalar_single_iter_cost,
2744 scalar_stmt, NULL, 0, vect_prologue);
2745 if (*peel_iters_epilogue)
2746 retval += record_stmt_cost (epilogue_cost_vec,
2747 *peel_iters_epilogue * scalar_single_iter_cost,
2748 scalar_stmt, NULL, 0, vect_epilogue);
2749 return retval;
720f5239
IR
2750}
2751
ebfd146a
IR
2752/* Function vect_estimate_min_profitable_iters
2753
2754 Return the number of iterations required for the vector version of the
2755 loop to be profitable relative to the cost of the scalar version of the
9ed313cc 2756 loop. */
ebfd146a 2757
9ed313cc
JH
2758static void
2759vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
2760 int *ret_min_profitable_niters,
2761 int *ret_min_profitable_estimate)
ebfd146a 2762{
ebfd146a 2763 int min_profitable_iters;
9ed313cc 2764 int min_profitable_estimate;
ebfd146a
IR
2765 int peel_iters_prologue;
2766 int peel_iters_epilogue;
92345349 2767 unsigned vec_inside_cost = 0;
ebfd146a 2768 int vec_outside_cost = 0;
92345349
BS
2769 unsigned vec_prologue_cost = 0;
2770 unsigned vec_epilogue_cost = 0;
ebfd146a
IR
2771 int scalar_single_iter_cost = 0;
2772 int scalar_outside_cost = 0;
2773 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
15e693cc 2774 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
92345349 2775 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
ebfd146a
IR
2776
2777 /* Cost model disabled. */
8b5e1202 2778 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
ebfd146a 2779 {
e645e942 2780 dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
9ed313cc
JH
2781 *ret_min_profitable_niters = 0;
2782 *ret_min_profitable_estimate = 0;
2783 return;
ebfd146a
IR
2784 }
2785
2786 /* Requires loop versioning tests to handle misalignment. */
e9dbe7bb 2787 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
ebfd146a
IR
2788 {
2789 /* FIXME: Make cost depend on complexity of individual check. */
9771b263 2790 unsigned len = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ();
92345349
BS
2791 (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2792 vect_prologue);
78c60e3d
SS
2793 dump_printf (MSG_NOTE,
2794 "cost model: Adding cost of checks for loop "
2795 "versioning to treat misalignment.\n");
ebfd146a
IR
2796 }
2797
e9dbe7bb
IR
2798 /* Requires loop versioning with alias checks. */
2799 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
ebfd146a
IR
2800 {
2801 /* FIXME: Make cost depend on complexity of individual check. */
9771b263 2802 unsigned len = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).length ();
92345349
BS
2803 (void) add_stmt_cost (target_cost_data, len, vector_stmt, NULL, 0,
2804 vect_prologue);
78c60e3d
SS
2805 dump_printf (MSG_NOTE,
2806 "cost model: Adding cost of checks for loop "
2807 "versioning aliasing.\n");
ebfd146a
IR
2808 }
2809
e9dbe7bb
IR
2810 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2811 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
92345349
BS
2812 (void) add_stmt_cost (target_cost_data, 1, cond_branch_taken, NULL, 0,
2813 vect_prologue);
ebfd146a
IR
2814
2815 /* Count statements in scalar loop. Using this as scalar cost for a single
2816 iteration for now.
2817
2818 TODO: Add outer loop support.
2819
2820 TODO: Consider assigning different costs to different scalar
2821 statements. */
2822
b6cef4e0 2823 scalar_single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
720f5239 2824
ebfd146a
IR
2825 /* Add additional cost for the peeled instructions in prologue and epilogue
2826 loop.
2827
2828 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2829 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2830
2831 TODO: Build an expression that represents peel_iters for prologue and
2832 epilogue to be used in a run-time test. */
2833
720f5239 2834 if (npeel < 0)
ebfd146a
IR
2835 {
2836 peel_iters_prologue = vf/2;
78c60e3d 2837 dump_printf (MSG_NOTE, "cost model: "
e645e942 2838 "prologue peel iters set to vf/2.\n");
ebfd146a
IR
2839
2840 /* If peeling for alignment is unknown, loop bound of main loop becomes
2841 unknown. */
2842 peel_iters_epilogue = vf/2;
78c60e3d
SS
2843 dump_printf (MSG_NOTE, "cost model: "
2844 "epilogue peel iters set to vf/2 because "
e645e942 2845 "peeling for alignment is unknown.\n");
ebfd146a
IR
2846
2847 /* If peeled iterations are unknown, count a taken branch and a not taken
2848 branch per peeled loop. Even if scalar loop iterations are known,
2849 vector iterations are not known since peeled prologue iterations are
2850 not known. Hence guards remain the same. */
92345349
BS
2851 (void) add_stmt_cost (target_cost_data, 2, cond_branch_taken,
2852 NULL, 0, vect_prologue);
2853 (void) add_stmt_cost (target_cost_data, 2, cond_branch_not_taken,
2854 NULL, 0, vect_prologue);
2855 /* FORNOW: Don't attempt to pass individual scalar instructions to
2856 the model; just assume linear cost for scalar iterations. */
2857 (void) add_stmt_cost (target_cost_data,
2858 peel_iters_prologue * scalar_single_iter_cost,
2859 scalar_stmt, NULL, 0, vect_prologue);
2860 (void) add_stmt_cost (target_cost_data,
2861 peel_iters_epilogue * scalar_single_iter_cost,
2862 scalar_stmt, NULL, 0, vect_epilogue);
ebfd146a 2863 }
b8698a0f 2864 else
ebfd146a 2865 {
92345349
BS
2866 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
2867 stmt_info_for_cost *si;
2868 int j;
2869 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
2870
9771b263
DN
2871 prologue_cost_vec.create (2);
2872 epilogue_cost_vec.create (2);
720f5239 2873 peel_iters_prologue = npeel;
92345349
BS
2874
2875 (void) vect_get_known_peeling_cost (loop_vinfo, peel_iters_prologue,
2876 &peel_iters_epilogue,
2877 scalar_single_iter_cost,
2878 &prologue_cost_vec,
2879 &epilogue_cost_vec);
2880
9771b263 2881 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
92345349
BS
2882 {
2883 struct _stmt_vec_info *stmt_info
2884 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2885 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2886 si->misalign, vect_prologue);
2887 }
2888
9771b263 2889 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
92345349
BS
2890 {
2891 struct _stmt_vec_info *stmt_info
2892 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
2893 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
2894 si->misalign, vect_epilogue);
2895 }
2896
9771b263
DN
2897 prologue_cost_vec.release ();
2898 epilogue_cost_vec.release ();
ebfd146a
IR
2899 }
2900
ebfd146a
IR
2901 /* FORNOW: The scalar outside cost is incremented in one of the
2902 following ways:
2903
2904 1. The vectorizer checks for alignment and aliasing and generates
2905 a condition that allows dynamic vectorization. A cost model
2906 check is ANDED with the versioning condition. Hence scalar code
2907 path now has the added cost of the versioning check.
2908
2909 if (cost > th & versioning_check)
2910 jmp to vector code
2911
2912 Hence run-time scalar is incremented by not-taken branch cost.
2913
2914 2. The vectorizer then checks if a prologue is required. If the
2915 cost model check was not done before during versioning, it has to
2916 be done before the prologue check.
2917
2918 if (cost <= th)
2919 prologue = scalar_iters
2920 if (prologue == 0)
2921 jmp to vector code
2922 else
2923 execute prologue
2924 if (prologue == num_iters)
2925 go to exit
2926
2927 Hence the run-time scalar cost is incremented by a taken branch,
2928 plus a not-taken branch, plus a taken branch cost.
2929
2930 3. The vectorizer then checks if an epilogue is required. If the
2931 cost model check was not done before during prologue check, it
2932 has to be done with the epilogue check.
2933
2934 if (prologue == 0)
2935 jmp to vector code
2936 else
2937 execute prologue
2938 if (prologue == num_iters)
2939 go to exit
2940 vector code:
2941 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2942 jmp to epilogue
2943
2944 Hence the run-time scalar cost should be incremented by 2 taken
2945 branches.
2946
2947 TODO: The back end may reorder the BBS's differently and reverse
2948 conditions/branch directions. Change the estimates below to
2949 something more reasonable. */
2950
2951 /* If the number of iterations is known and we do not do versioning, we can
ff802fa1 2952 decide whether to vectorize at compile time. Hence the scalar version
ebfd146a
IR
2953 do not carry cost model guard costs. */
2954 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
e9dbe7bb
IR
2955 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2956 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
ebfd146a
IR
2957 {
2958 /* Cost model check occurs at versioning. */
e9dbe7bb
IR
2959 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2960 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
b6cef4e0 2961 scalar_outside_cost += vect_get_stmt_cost (cond_branch_not_taken);
ebfd146a
IR
2962 else
2963 {
2964 /* Cost model check occurs at prologue generation. */
15e693cc 2965 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
b6cef4e0
BS
2966 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken)
2967 + vect_get_stmt_cost (cond_branch_not_taken);
ebfd146a
IR
2968 /* Cost model check occurs at epilogue generation. */
2969 else
b6cef4e0 2970 scalar_outside_cost += 2 * vect_get_stmt_cost (cond_branch_taken);
ebfd146a
IR
2971 }
2972 }
2973
92345349
BS
2974 /* Complete the target-specific cost calculations. */
2975 finish_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), &vec_prologue_cost,
2976 &vec_inside_cost, &vec_epilogue_cost);
ebfd146a 2977
92345349 2978 vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
c3e7ee41 2979
b8698a0f 2980 /* Calculate number of iterations required to make the vector version
ff802fa1 2981 profitable, relative to the loop bodies only. The following condition
b8698a0f 2982 must hold true:
ebfd146a
IR
2983 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2984 where
2985 SIC = scalar iteration cost, VIC = vector iteration cost,
2986 VOC = vector outside cost, VF = vectorization factor,
2987 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2988 SOC = scalar outside cost for run time cost model check. */
2989
92345349 2990 if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost)
ebfd146a
IR
2991 {
2992 if (vec_outside_cost <= 0)
2993 min_profitable_iters = 1;
2994 else
2995 {
2996 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2997 - vec_inside_cost * peel_iters_prologue
2998 - vec_inside_cost * peel_iters_epilogue)
2999 / ((scalar_single_iter_cost * vf)
3000 - vec_inside_cost);
3001
3002 if ((scalar_single_iter_cost * vf * min_profitable_iters)
92345349
BS
3003 <= (((int) vec_inside_cost * min_profitable_iters)
3004 + (((int) vec_outside_cost - scalar_outside_cost) * vf)))
ebfd146a
IR
3005 min_profitable_iters++;
3006 }
3007 }
3008 /* vector version will never be profitable. */
3009 else
3010 {
b15b5979 3011 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
8b5e1202
SO
3012 warning_at (vect_location, OPT_Wopenmp_simd, "vectorization "
3013 "did not happen for a simd loop");
3014
73fbfcad 3015 if (dump_enabled_p ())
78c60e3d
SS
3016 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3017 "cost model: the vector iteration cost = %d "
3018 "divided by the scalar iteration cost = %d "
e645e942
TJ
3019 "is greater or equal to the vectorization factor = %d"
3020 ".\n",
78c60e3d 3021 vec_inside_cost, scalar_single_iter_cost, vf);
9ed313cc
JH
3022 *ret_min_profitable_niters = -1;
3023 *ret_min_profitable_estimate = -1;
3024 return;
ebfd146a
IR
3025 }
3026
73fbfcad 3027 if (dump_enabled_p ())
78c60e3d
SS
3028 {
3029 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3030 dump_printf (MSG_NOTE, " Vector inside of loop cost: %d\n",
3031 vec_inside_cost);
3032 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n",
3033 vec_prologue_cost);
3034 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n",
3035 vec_epilogue_cost);
3036 dump_printf (MSG_NOTE, " Scalar iteration cost: %d\n",
3037 scalar_single_iter_cost);
3038 dump_printf (MSG_NOTE, " Scalar outside cost: %d\n",
3039 scalar_outside_cost);
9ed313cc
JH
3040 dump_printf (MSG_NOTE, " Vector outside cost: %d\n",
3041 vec_outside_cost);
78c60e3d
SS
3042 dump_printf (MSG_NOTE, " prologue iterations: %d\n",
3043 peel_iters_prologue);
3044 dump_printf (MSG_NOTE, " epilogue iterations: %d\n",
3045 peel_iters_epilogue);
e645e942 3046 dump_printf (MSG_NOTE,
78c60e3d
SS
3047 " Calculated minimum iters for profitability: %d\n",
3048 min_profitable_iters);
e645e942 3049 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3050 }
3051
b8698a0f 3052 min_profitable_iters =
ebfd146a
IR
3053 min_profitable_iters < vf ? vf : min_profitable_iters;
3054
3055 /* Because the condition we create is:
3056 if (niters <= min_profitable_iters)
3057 then skip the vectorized loop. */
3058 min_profitable_iters--;
3059
73fbfcad 3060 if (dump_enabled_p ())
78c60e3d 3061 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
3062 " Runtime profitability threshold = %d\n",
3063 min_profitable_iters);
9ed313cc
JH
3064
3065 *ret_min_profitable_niters = min_profitable_iters;
3066
3067 /* Calculate number of iterations required to make the vector version
3068 profitable, relative to the loop bodies only.
3069
3070 Non-vectorized variant is SIC * niters and it must win over vector
3071 variant on the expected loop trip count. The following condition must hold true:
3072 SIC * niters > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC + SOC */
3073
3074 if (vec_outside_cost <= 0)
3075 min_profitable_estimate = 1;
3076 else
3077 {
3078 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf
3079 - vec_inside_cost * peel_iters_prologue
3080 - vec_inside_cost * peel_iters_epilogue)
3081 / ((scalar_single_iter_cost * vf)
3082 - vec_inside_cost);
3083 }
3084 min_profitable_estimate --;
3085 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
73fbfcad 3086 if (dump_enabled_p ())
9ed313cc
JH
3087 dump_printf_loc (MSG_NOTE, vect_location,
3088 " Static estimate profitability threshold = %d\n",
3089 min_profitable_iters);
b8698a0f 3090
9ed313cc 3091 *ret_min_profitable_estimate = min_profitable_estimate;
ebfd146a
IR
3092}
3093
cf7aa6a3
AL
3094/* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3095 vector elements (not bits) for a vector of mode MODE. */
3096static void
3097calc_vec_perm_mask_for_shift (enum machine_mode mode, unsigned int offset,
3098 unsigned char *sel)
3099{
3100 unsigned int i, nelt = GET_MODE_NUNITS (mode);
3101
3102 for (i = 0; i < nelt; i++)
729ff76e 3103 sel[i] = (i + offset) & (2*nelt - 1);
cf7aa6a3
AL
3104}
3105
3106/* Checks whether the target supports whole-vector shifts for vectors of mode
3107 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
3108 it supports vec_perm_const with masks for all necessary shift amounts. */
3109static bool
3110have_whole_vector_shift (enum machine_mode mode)
3111{
3112 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3113 return true;
3114
3115 if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing)
3116 return false;
3117
3118 unsigned int i, nelt = GET_MODE_NUNITS (mode);
3119 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
3120
3121 for (i = nelt/2; i >= 1; i/=2)
3122 {
3123 calc_vec_perm_mask_for_shift (mode, i, sel);
3124 if (!can_vec_perm_p (mode, false, sel))
3125 return false;
3126 }
3127 return true;
3128}
ebfd146a 3129
b8698a0f 3130/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
ebfd146a 3131 functions. Design better to avoid maintenance issues. */
ebfd146a 3132
b8698a0f
L
3133/* Function vect_model_reduction_cost.
3134
3135 Models cost for a reduction operation, including the vector ops
ebfd146a
IR
3136 generated within the strip-mine loop, the initial definition before
3137 the loop, and the epilogue code that must be generated. */
3138
b8698a0f 3139static bool
ebfd146a
IR
3140vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
3141 int ncopies)
3142{
92345349 3143 int prologue_cost = 0, epilogue_cost = 0;
ebfd146a
IR
3144 enum tree_code code;
3145 optab optab;
3146 tree vectype;
3147 gimple stmt, orig_stmt;
3148 tree reduction_op;
ef4bddc2 3149 machine_mode mode;
ebfd146a
IR
3150 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3151 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
92345349 3152 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
ebfd146a 3153
ebfd146a 3154 /* Cost of reduction op inside loop. */
92345349
BS
3155 unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3156 stmt_info, 0, vect_body);
ebfd146a
IR
3157 stmt = STMT_VINFO_STMT (stmt_info);
3158
3159 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3160 {
3161 case GIMPLE_SINGLE_RHS:
3162 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
3163 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
3164 break;
3165 case GIMPLE_UNARY_RHS:
3166 reduction_op = gimple_assign_rhs1 (stmt);
3167 break;
3168 case GIMPLE_BINARY_RHS:
3169 reduction_op = gimple_assign_rhs2 (stmt);
3170 break;
f471fe72
RG
3171 case GIMPLE_TERNARY_RHS:
3172 reduction_op = gimple_assign_rhs3 (stmt);
3173 break;
ebfd146a
IR
3174 default:
3175 gcc_unreachable ();
3176 }
3177
3178 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3179 if (!vectype)
3180 {
73fbfcad 3181 if (dump_enabled_p ())
ebfd146a 3182 {
78c60e3d
SS
3183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3184 "unsupported data-type ");
3185 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3186 TREE_TYPE (reduction_op));
e645e942 3187 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
3188 }
3189 return false;
3190 }
b8698a0f 3191
ebfd146a
IR
3192 mode = TYPE_MODE (vectype);
3193 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3194
b8698a0f 3195 if (!orig_stmt)
ebfd146a
IR
3196 orig_stmt = STMT_VINFO_STMT (stmt_info);
3197
3198 code = gimple_assign_rhs_code (orig_stmt);
3199
3200 /* Add in cost for initial definition. */
92345349
BS
3201 prologue_cost += add_stmt_cost (target_cost_data, 1, scalar_to_vec,
3202 stmt_info, 0, vect_prologue);
ebfd146a
IR
3203
3204 /* Determine cost of epilogue code.
3205
3206 We have a reduction operator that will reduce the vector in one statement.
3207 Also requires scalar extract. */
3208
3209 if (!nested_in_vect_loop_p (loop, orig_stmt))
3210 {
32e8bb8e 3211 if (reduc_code != ERROR_MARK)
92345349
BS
3212 {
3213 epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt,
3214 stmt_info, 0, vect_epilogue);
3215 epilogue_cost += add_stmt_cost (target_cost_data, 1, vec_to_scalar,
3216 stmt_info, 0, vect_epilogue);
3217 }
b8698a0f 3218 else
ebfd146a 3219 {
ae7e9ddd 3220 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
ebfd146a
IR
3221 tree bitsize =
3222 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
ae7e9ddd 3223 int element_bitsize = tree_to_uhwi (bitsize);
ebfd146a
IR
3224 int nelements = vec_size_in_bits / element_bitsize;
3225
3226 optab = optab_for_tree_code (code, vectype, optab_default);
3227
3228 /* We have a whole vector shift available. */
3229 if (VECTOR_MODE_P (mode)
947131ba 3230 && optab_handler (optab, mode) != CODE_FOR_nothing
cf7aa6a3 3231 && have_whole_vector_shift (mode))
92345349
BS
3232 {
3233 /* Final reduction via vector shifts and the reduction operator.
3234 Also requires scalar extract. */
3235 epilogue_cost += add_stmt_cost (target_cost_data,
3236 exact_log2 (nelements) * 2,
3237 vector_stmt, stmt_info, 0,
3238 vect_epilogue);
3239 epilogue_cost += add_stmt_cost (target_cost_data, 1,
3240 vec_to_scalar, stmt_info, 0,
3241 vect_epilogue);
3242 }
ebfd146a 3243 else
92345349
BS
3244 /* Use extracts and reduction op for final reduction. For N
3245 elements, we have N extracts and N-1 reduction ops. */
3246 epilogue_cost += add_stmt_cost (target_cost_data,
3247 nelements + nelements - 1,
3248 vector_stmt, stmt_info, 0,
3249 vect_epilogue);
ebfd146a
IR
3250 }
3251 }
3252
73fbfcad 3253 if (dump_enabled_p ())
78c60e3d
SS
3254 dump_printf (MSG_NOTE,
3255 "vect_model_reduction_cost: inside_cost = %d, "
e645e942 3256 "prologue_cost = %d, epilogue_cost = %d .\n", inside_cost,
78c60e3d 3257 prologue_cost, epilogue_cost);
ebfd146a
IR
3258
3259 return true;
3260}
3261
3262
3263/* Function vect_model_induction_cost.
3264
3265 Models cost for induction operations. */
3266
3267static void
3268vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
3269{
c3e7ee41 3270 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
92345349
BS
3271 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3272 unsigned inside_cost, prologue_cost;
c3e7ee41 3273
ebfd146a 3274 /* loop cost for vec_loop. */
92345349
BS
3275 inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3276 stmt_info, 0, vect_body);
c3e7ee41 3277
ebfd146a 3278 /* prologue cost for vec_init and vec_step. */
92345349
BS
3279 prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec,
3280 stmt_info, 0, vect_prologue);
b8698a0f 3281
73fbfcad 3282 if (dump_enabled_p ())
78c60e3d
SS
3283 dump_printf_loc (MSG_NOTE, vect_location,
3284 "vect_model_induction_cost: inside_cost = %d, "
e645e942 3285 "prologue_cost = %d .\n", inside_cost, prologue_cost);
ebfd146a
IR
3286}
3287
3288
3289/* Function get_initial_def_for_induction
3290
3291 Input:
3292 STMT - a stmt that performs an induction operation in the loop.
3293 IV_PHI - the initial value of the induction variable
3294
3295 Output:
3296 Return a vector variable, initialized with the first VF values of
ff802fa1 3297 the induction variable. E.g., for an iv with IV_PHI='X' and
b8698a0f 3298 evolution S, for a vector of 4 units, we want to return:
ebfd146a
IR
3299 [X, X + S, X + 2*S, X + 3*S]. */
3300
3301static tree
3302get_initial_def_for_induction (gimple iv_phi)
3303{
3304 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
3305 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3306 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
b8698a0f 3307 tree vectype;
ebfd146a
IR
3308 int nunits;
3309 edge pe = loop_preheader_edge (loop);
3310 struct loop *iv_loop;
3311 basic_block new_bb;
9771b263 3312 tree new_vec, vec_init, vec_step, t;
ebfd146a
IR
3313 tree new_var;
3314 tree new_name;
538dd0b7
DM
3315 gimple init_stmt, new_stmt;
3316 gphi *induction_phi;
ebfd146a
IR
3317 tree induc_def, vec_def, vec_dest;
3318 tree init_expr, step_expr;
3319 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3320 int i;
ebfd146a
IR
3321 int ncopies;
3322 tree expr;
3323 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
3324 bool nested_in_vect_loop = false;
3325 gimple_seq stmts = NULL;
3326 imm_use_iterator imm_iter;
3327 use_operand_p use_p;
3328 gimple exit_phi;
3329 edge latch_e;
3330 tree loop_arg;
3331 gimple_stmt_iterator si;
3332 basic_block bb = gimple_bb (iv_phi);
795bd26a 3333 tree stepvectype;
6dbbece6 3334 tree resvectype;
ebfd146a
IR
3335
3336 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
3337 if (nested_in_vect_loop_p (loop, iv_phi))
3338 {
3339 nested_in_vect_loop = true;
3340 iv_loop = loop->inner;
3341 }
3342 else
3343 iv_loop = loop;
3344 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
3345
3346 latch_e = loop_latch_edge (iv_loop);
3347 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
3348
43ad66aa
RB
3349 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
3350 gcc_assert (step_expr != NULL_TREE);
3351
ebfd146a 3352 pe = loop_preheader_edge (iv_loop);
43ad66aa
RB
3353 init_expr = PHI_ARG_DEF_FROM_EDGE (iv_phi,
3354 loop_preheader_edge (iv_loop));
ebfd146a 3355
e67f39f7 3356 vectype = get_vectype_for_scalar_type (TREE_TYPE (init_expr));
6dbbece6
RG
3357 resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
3358 gcc_assert (vectype);
3359 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3360 ncopies = vf / nunits;
3361
3362 gcc_assert (phi_info);
3363 gcc_assert (ncopies >= 1);
3364
43ad66aa
RB
3365 /* Convert the step to the desired type. */
3366 step_expr = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3367 step_expr),
3368 &stmts, true, NULL_TREE);
3369 if (stmts)
3370 {
3371 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3372 gcc_assert (!new_bb);
3373 }
3374
6dbbece6
RG
3375 /* Find the first insertion point in the BB. */
3376 si = gsi_after_labels (bb);
3377
ebfd146a
IR
3378 /* Create the vector that holds the initial_value of the induction. */
3379 if (nested_in_vect_loop)
3380 {
3381 /* iv_loop is nested in the loop to be vectorized. init_expr had already
ff802fa1
IR
3382 been created during vectorization of previous stmts. We obtain it
3383 from the STMT_VINFO_VEC_STMT of the defining stmt. */
43ad66aa 3384 vec_init = vect_get_vec_def_for_operand (init_expr, iv_phi, NULL);
fe0b4796
RB
3385 /* If the initial value is not of proper type, convert it. */
3386 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
3387 {
0d0e4a03
JJ
3388 new_stmt
3389 = gimple_build_assign (vect_get_new_vect_var (vectype,
3390 vect_simple_var,
3391 "vec_iv_"),
3392 VIEW_CONVERT_EXPR,
3393 build1 (VIEW_CONVERT_EXPR, vectype,
3394 vec_init));
fe0b4796
RB
3395 vec_init = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3396 gimple_assign_set_lhs (new_stmt, vec_init);
3397 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
3398 new_stmt);
3399 gcc_assert (!new_bb);
3400 set_vinfo_for_stmt (new_stmt,
3401 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3402 }
ebfd146a
IR
3403 }
3404 else
3405 {
9771b263 3406 vec<constructor_elt, va_gc> *v;
50a763ab 3407
ebfd146a
IR
3408 /* iv_loop is the loop to be vectorized. Create:
3409 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
e67f39f7
RB
3410 new_var = vect_get_new_vect_var (TREE_TYPE (vectype),
3411 vect_scalar_var, "var_");
3412 new_name = force_gimple_operand (fold_convert (TREE_TYPE (vectype),
3413 init_expr),
3414 &stmts, false, new_var);
ebfd146a
IR
3415 if (stmts)
3416 {
3417 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3418 gcc_assert (!new_bb);
3419 }
3420
9771b263 3421 vec_alloc (v, nunits);
1d42c1ec 3422 bool constant_p = is_gimple_min_invariant (new_name);
50a763ab 3423 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
ebfd146a
IR
3424 for (i = 1; i < nunits; i++)
3425 {
3426 /* Create: new_name_i = new_name + step_expr */
e67f39f7
RB
3427 new_name = fold_build2 (PLUS_EXPR, TREE_TYPE (new_name),
3428 new_name, step_expr);
1d42c1ec 3429 if (!is_gimple_min_invariant (new_name))
ebfd146a 3430 {
1d42c1ec
RB
3431 init_stmt = gimple_build_assign (new_var, new_name);
3432 new_name = make_ssa_name (new_var, init_stmt);
3433 gimple_assign_set_lhs (init_stmt, new_name);
3434 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
3435 gcc_assert (!new_bb);
3436 if (dump_enabled_p ())
3437 {
3438 dump_printf_loc (MSG_NOTE, vect_location,
3439 "created new init_stmt: ");
3440 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, init_stmt, 0);
e645e942 3441 dump_printf (MSG_NOTE, "\n");
1d42c1ec
RB
3442 }
3443 constant_p = false;
ebfd146a 3444 }
50a763ab 3445 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, new_name);
ebfd146a
IR
3446 }
3447 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1d42c1ec
RB
3448 if (constant_p)
3449 new_vec = build_vector_from_ctor (vectype, v);
3450 else
3451 new_vec = build_constructor (vectype, v);
9771b263 3452 vec_init = vect_init_vector (iv_phi, new_vec, vectype, NULL);
ebfd146a
IR
3453 }
3454
3455
3456 /* Create the vector that holds the step of the induction. */
3457 if (nested_in_vect_loop)
3458 /* iv_loop is nested in the loop to be vectorized. Generate:
3459 vec_step = [S, S, S, S] */
3460 new_name = step_expr;
3461 else
3462 {
3463 /* iv_loop is the loop to be vectorized. Generate:
3464 vec_step = [VF*S, VF*S, VF*S, VF*S] */
c4e87a13
JJ
3465 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3466 {
3467 expr = build_int_cst (integer_type_node, vf);
3468 expr = fold_convert (TREE_TYPE (step_expr), expr);
3469 }
3470 else
3471 expr = build_int_cst (TREE_TYPE (step_expr), vf);
795bd26a
RG
3472 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3473 expr, step_expr);
f6fdeb62
JJ
3474 if (TREE_CODE (step_expr) == SSA_NAME)
3475 new_name = vect_init_vector (iv_phi, new_name,
3476 TREE_TYPE (step_expr), NULL);
ebfd146a
IR
3477 }
3478
b9acc9f1 3479 t = unshare_expr (new_name);
f6fdeb62
JJ
3480 gcc_assert (CONSTANT_CLASS_P (new_name)
3481 || TREE_CODE (new_name) == SSA_NAME);
795bd26a
RG
3482 stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
3483 gcc_assert (stepvectype);
9771b263
DN
3484 new_vec = build_vector_from_val (stepvectype, t);
3485 vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
ebfd146a
IR
3486
3487
3488 /* Create the following def-use cycle:
3489 loop prolog:
3490 vec_init = ...
3491 vec_step = ...
3492 loop:
3493 vec_iv = PHI <vec_init, vec_loop>
3494 ...
3495 STMT
3496 ...
3497 vec_loop = vec_iv + vec_step; */
3498
3499 /* Create the induction-phi that defines the induction-operand. */
3500 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
ebfd146a
IR
3501 induction_phi = create_phi_node (vec_dest, iv_loop->header);
3502 set_vinfo_for_stmt (induction_phi,
a70d6342 3503 new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
ebfd146a
IR
3504 induc_def = PHI_RESULT (induction_phi);
3505
3506 /* Create the iv update inside the loop */
0d0e4a03 3507 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, induc_def, vec_step);
ebfd146a
IR
3508 vec_def = make_ssa_name (vec_dest, new_stmt);
3509 gimple_assign_set_lhs (new_stmt, vec_def);
3510 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
b8698a0f 3511 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
a70d6342 3512 NULL));
ebfd146a
IR
3513
3514 /* Set the arguments of the phi node: */
9e227d60 3515 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
b8698a0f 3516 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
9e227d60 3517 UNKNOWN_LOCATION);
ebfd146a
IR
3518
3519
3520 /* In case that vectorization factor (VF) is bigger than the number
3521 of elements that we can fit in a vectype (nunits), we have to generate
3522 more than one vector stmt - i.e - we need to "unroll" the
3523 vector stmt by a factor VF/nunits. For more details see documentation
3524 in vectorizable_operation. */
b8698a0f 3525
ebfd146a
IR
3526 if (ncopies > 1)
3527 {
3528 stmt_vec_info prev_stmt_vinfo;
3529 /* FORNOW. This restriction should be relaxed. */
3530 gcc_assert (!nested_in_vect_loop);
3531
3532 /* Create the vector that holds the step of the induction. */
c4e87a13
JJ
3533 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
3534 {
3535 expr = build_int_cst (integer_type_node, nunits);
3536 expr = fold_convert (TREE_TYPE (step_expr), expr);
3537 }
3538 else
3539 expr = build_int_cst (TREE_TYPE (step_expr), nunits);
795bd26a
RG
3540 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
3541 expr, step_expr);
f6fdeb62
JJ
3542 if (TREE_CODE (step_expr) == SSA_NAME)
3543 new_name = vect_init_vector (iv_phi, new_name,
3544 TREE_TYPE (step_expr), NULL);
b9acc9f1 3545 t = unshare_expr (new_name);
f6fdeb62
JJ
3546 gcc_assert (CONSTANT_CLASS_P (new_name)
3547 || TREE_CODE (new_name) == SSA_NAME);
9771b263
DN
3548 new_vec = build_vector_from_val (stepvectype, t);
3549 vec_step = vect_init_vector (iv_phi, new_vec, stepvectype, NULL);
ebfd146a
IR
3550
3551 vec_def = induc_def;
3552 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
3553 for (i = 1; i < ncopies; i++)
3554 {
3555 /* vec_i = vec_prev + vec_step */
0d0e4a03
JJ
3556 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
3557 vec_def, vec_step);
ebfd146a
IR
3558 vec_def = make_ssa_name (vec_dest, new_stmt);
3559 gimple_assign_set_lhs (new_stmt, vec_def);
b010117a 3560
ebfd146a 3561 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6dbbece6
RG
3562 if (!useless_type_conversion_p (resvectype, vectype))
3563 {
0d0e4a03
JJ
3564 new_stmt
3565 = gimple_build_assign
3566 (vect_get_new_vect_var (resvectype, vect_simple_var,
3567 "vec_iv_"),
3568 VIEW_CONVERT_EXPR,
3569 build1 (VIEW_CONVERT_EXPR, resvectype,
3570 gimple_assign_lhs (new_stmt)));
6dbbece6
RG
3571 gimple_assign_set_lhs (new_stmt,
3572 make_ssa_name
3573 (gimple_assign_lhs (new_stmt), new_stmt));
3574 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
3575 }
ebfd146a 3576 set_vinfo_for_stmt (new_stmt,
a70d6342 3577 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
ebfd146a 3578 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
b8698a0f 3579 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
ebfd146a
IR
3580 }
3581 }
3582
3583 if (nested_in_vect_loop)
3584 {
3585 /* Find the loop-closed exit-phi of the induction, and record
3586 the final vector of induction results: */
3587 exit_phi = NULL;
3588 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
3589 {
b010d601
JJ
3590 gimple use_stmt = USE_STMT (use_p);
3591 if (is_gimple_debug (use_stmt))
3592 continue;
3593
3594 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (use_stmt)))
ebfd146a 3595 {
b010d601 3596 exit_phi = use_stmt;
ebfd146a
IR
3597 break;
3598 }
3599 }
b8698a0f 3600 if (exit_phi)
ebfd146a
IR
3601 {
3602 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
3603 /* FORNOW. Currently not supporting the case that an inner-loop induction
3604 is not used in the outer-loop (i.e. only outside the outer-loop). */
3605 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
3606 && !STMT_VINFO_LIVE_P (stmt_vinfo));
3607
3608 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
73fbfcad 3609 if (dump_enabled_p ())
ebfd146a 3610 {
78c60e3d
SS
3611 dump_printf_loc (MSG_NOTE, vect_location,
3612 "vector of inductions after inner-loop:");
3613 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
e645e942 3614 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3615 }
3616 }
3617 }
3618
3619
73fbfcad 3620 if (dump_enabled_p ())
ebfd146a 3621 {
78c60e3d
SS
3622 dump_printf_loc (MSG_NOTE, vect_location,
3623 "transform induction: created def-use cycle: ");
3624 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
3625 dump_printf (MSG_NOTE, "\n");
3626 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
3627 SSA_NAME_DEF_STMT (vec_def), 0);
e645e942 3628 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3629 }
3630
3631 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
6dbbece6
RG
3632 if (!useless_type_conversion_p (resvectype, vectype))
3633 {
0d0e4a03
JJ
3634 new_stmt = gimple_build_assign (vect_get_new_vect_var (resvectype,
3635 vect_simple_var,
3636 "vec_iv_"),
3637 VIEW_CONVERT_EXPR,
3638 build1 (VIEW_CONVERT_EXPR, resvectype,
3639 induc_def));
6dbbece6
RG
3640 induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
3641 gimple_assign_set_lhs (new_stmt, induc_def);
5bb6669d 3642 si = gsi_after_labels (bb);
6dbbece6 3643 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
f2ca11ca
RG
3644 set_vinfo_for_stmt (new_stmt,
3645 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
3646 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
3647 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
6dbbece6
RG
3648 }
3649
ebfd146a
IR
3650 return induc_def;
3651}
3652
3653
3654/* Function get_initial_def_for_reduction
3655
3656 Input:
3657 STMT - a stmt that performs a reduction operation in the loop.
3658 INIT_VAL - the initial value of the reduction variable
3659
3660 Output:
3661 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3662 of the reduction (used for adjusting the epilog - see below).
3663 Return a vector variable, initialized according to the operation that STMT
3664 performs. This vector will be used as the initial value of the
3665 vector of partial results.
3666
3667 Option1 (adjust in epilog): Initialize the vector as follows:
4bbe8262
IR
3668 add/bit or/xor: [0,0,...,0,0]
3669 mult/bit and: [1,1,...,1,1]
3670 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
ebfd146a
IR
3671 and when necessary (e.g. add/mult case) let the caller know
3672 that it needs to adjust the result by init_val.
3673
3674 Option2: Initialize the vector as follows:
4bbe8262
IR
3675 add/bit or/xor: [init_val,0,0,...,0]
3676 mult/bit and: [init_val,1,1,...,1]
3677 min/max/cond_expr: [init_val,init_val,...,init_val]
ebfd146a
IR
3678 and no adjustments are needed.
3679
3680 For example, for the following code:
3681
3682 s = init_val;
3683 for (i=0;i<n;i++)
3684 s = s + a[i];
3685
3686 STMT is 's = s + a[i]', and the reduction variable is 's'.
3687 For a vector of 4 units, we want to return either [0,0,0,init_val],
3688 or [0,0,0,0] and let the caller know that it needs to adjust
3689 the result at the end by 'init_val'.
3690
3691 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
06066f92
IR
3692 initialization vector is simpler (same element in all entries), if
3693 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
b8698a0f 3694
ebfd146a
IR
3695 A cost model should help decide between these two schemes. */
3696
3697tree
b8698a0f 3698get_initial_def_for_reduction (gimple stmt, tree init_val,
06066f92 3699 tree *adjustment_def)
ebfd146a
IR
3700{
3701 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3702 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3703 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
550918ca
RG
3704 tree scalar_type = TREE_TYPE (init_val);
3705 tree vectype = get_vectype_for_scalar_type (scalar_type);
3706 int nunits;
ebfd146a 3707 enum tree_code code = gimple_assign_rhs_code (stmt);
ebfd146a
IR
3708 tree def_for_init;
3709 tree init_def;
d2a12ae7 3710 tree *elts;
ebfd146a 3711 int i;
b8698a0f 3712 bool nested_in_vect_loop = false;
06066f92
IR
3713 tree init_value;
3714 REAL_VALUE_TYPE real_init_val = dconst0;
3715 int int_init_val = 0;
2f3e235b 3716 gimple def_stmt = NULL;
ebfd146a 3717
550918ca
RG
3718 gcc_assert (vectype);
3719 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3720
3721 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3722 || SCALAR_FLOAT_TYPE_P (scalar_type));
06066f92 3723
ebfd146a
IR
3724 if (nested_in_vect_loop_p (loop, stmt))
3725 nested_in_vect_loop = true;
3726 else
3727 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3728
06066f92 3729 /* In case of double reduction we only create a vector variable to be put
ff802fa1 3730 in the reduction phi node. The actual statement creation is done in
06066f92 3731 vect_create_epilog_for_reduction. */
2f3e235b
IR
3732 if (adjustment_def && nested_in_vect_loop
3733 && TREE_CODE (init_val) == SSA_NAME
3734 && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3735 && gimple_code (def_stmt) == GIMPLE_PHI
3736 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
b8698a0f
L
3737 && vinfo_for_stmt (def_stmt)
3738 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
06066f92
IR
3739 == vect_double_reduction_def)
3740 {
3741 *adjustment_def = NULL;
3742 return vect_create_destination_var (init_val, vectype);
3743 }
ebfd146a 3744
06066f92
IR
3745 if (TREE_CONSTANT (init_val))
3746 {
3747 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3748 init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3749 else
3750 init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3751 }
3752 else
3753 init_value = init_val;
ebfd146a 3754
06066f92
IR
3755 switch (code)
3756 {
3757 case WIDEN_SUM_EXPR:
3758 case DOT_PROD_EXPR:
79d652a5 3759 case SAD_EXPR:
06066f92
IR
3760 case PLUS_EXPR:
3761 case MINUS_EXPR:
3762 case BIT_IOR_EXPR:
3763 case BIT_XOR_EXPR:
3764 case MULT_EXPR:
3765 case BIT_AND_EXPR:
b8698a0f 3766 /* ADJUSMENT_DEF is NULL when called from
06066f92
IR
3767 vect_create_epilog_for_reduction to vectorize double reduction. */
3768 if (adjustment_def)
3769 {
3770 if (nested_in_vect_loop)
b8698a0f 3771 *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
06066f92
IR
3772 NULL);
3773 else
3774 *adjustment_def = init_val;
3775 }
3776
c1e822d5 3777 if (code == MULT_EXPR)
06066f92
IR
3778 {
3779 real_init_val = dconst1;
3780 int_init_val = 1;
3781 }
3782
c1e822d5
IR
3783 if (code == BIT_AND_EXPR)
3784 int_init_val = -1;
3785
06066f92
IR
3786 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3787 def_for_init = build_real (scalar_type, real_init_val);
3788 else
3789 def_for_init = build_int_cst (scalar_type, int_init_val);
3790
b8698a0f 3791 /* Create a vector of '0' or '1' except the first element. */
d2a12ae7 3792 elts = XALLOCAVEC (tree, nunits);
06066f92 3793 for (i = nunits - 2; i >= 0; --i)
d2a12ae7 3794 elts[i + 1] = def_for_init;
06066f92
IR
3795
3796 /* Option1: the first element is '0' or '1' as well. */
3797 if (adjustment_def)
3798 {
d2a12ae7
RG
3799 elts[0] = def_for_init;
3800 init_def = build_vector (vectype, elts);
06066f92
IR
3801 break;
3802 }
3803
3804 /* Option2: the first element is INIT_VAL. */
d2a12ae7 3805 elts[0] = init_val;
06066f92 3806 if (TREE_CONSTANT (init_val))
d2a12ae7 3807 init_def = build_vector (vectype, elts);
06066f92 3808 else
d2a12ae7 3809 {
9771b263
DN
3810 vec<constructor_elt, va_gc> *v;
3811 vec_alloc (v, nunits);
d2a12ae7
RG
3812 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, init_val);
3813 for (i = 1; i < nunits; ++i)
3814 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[i]);
3815 init_def = build_constructor (vectype, v);
3816 }
06066f92
IR
3817
3818 break;
3819
3820 case MIN_EXPR:
3821 case MAX_EXPR:
4bbe8262 3822 case COND_EXPR:
06066f92
IR
3823 if (adjustment_def)
3824 {
3825 *adjustment_def = NULL_TREE;
3826 init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3827 break;
3828 }
3829
b9acc9f1 3830 init_def = build_vector_from_val (vectype, init_value);
06066f92
IR
3831 break;
3832
3833 default:
3834 gcc_unreachable ();
3835 }
ebfd146a
IR
3836
3837 return init_def;
3838}
3839
ebfd146a 3840/* Function vect_create_epilog_for_reduction
b8698a0f 3841
ebfd146a 3842 Create code at the loop-epilog to finalize the result of a reduction
b5aeb3bb
IR
3843 computation.
3844
3845 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3846 reduction statements.
3847 STMT is the scalar reduction stmt that is being vectorized.
ebfd146a 3848 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
ff802fa1 3849 number of elements that we can fit in a vectype (nunits). In this case
ebfd146a
IR
3850 we have to generate more than one vector stmt - i.e - we need to "unroll"
3851 the vector stmt by a factor VF/nunits. For more details see documentation
3852 in vectorizable_operation.
b5aeb3bb
IR
3853 REDUC_CODE is the tree-code for the epilog reduction.
3854 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3855 computation.
3856 REDUC_INDEX is the index of the operand in the right hand side of the
7c5222ff 3857 statement that is defined by REDUCTION_PHI.
06066f92 3858 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
b5aeb3bb
IR
3859 SLP_NODE is an SLP node containing a group of reduction statements. The
3860 first one in this group is STMT.
ebfd146a
IR
3861
3862 This function:
b5aeb3bb
IR
3863 1. Creates the reduction def-use cycles: sets the arguments for
3864 REDUCTION_PHIS:
ebfd146a 3865 The loop-entry argument is the vectorized initial-value of the reduction.
b5aeb3bb
IR
3866 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3867 sums.
3868 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3869 by applying the operation specified by REDUC_CODE if available, or by
ebfd146a 3870 other means (whole-vector shifts or a scalar loop).
b8698a0f 3871 The function also creates a new phi node at the loop exit to preserve
ebfd146a 3872 loop-closed form, as illustrated below.
b8698a0f 3873
ebfd146a 3874 The flow at the entry to this function:
b8698a0f 3875
ebfd146a
IR
3876 loop:
3877 vec_def = phi <null, null> # REDUCTION_PHI
3878 VECT_DEF = vector_stmt # vectorized form of STMT
3879 s_loop = scalar_stmt # (scalar) STMT
3880 loop_exit:
3881 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3882 use <s_out0>
3883 use <s_out0>
3884
3885 The above is transformed by this function into:
3886
3887 loop:
3888 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3889 VECT_DEF = vector_stmt # vectorized form of STMT
b8698a0f 3890 s_loop = scalar_stmt # (scalar) STMT
ebfd146a
IR
3891 loop_exit:
3892 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3893 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3894 v_out2 = reduce <v_out1>
3895 s_out3 = extract_field <v_out2, 0>
3896 s_out4 = adjust_result <s_out3>
3897 use <s_out4>
3898 use <s_out4>
3899*/
3900
3901static void
9771b263 3902vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple stmt,
b5aeb3bb 3903 int ncopies, enum tree_code reduc_code,
9771b263 3904 vec<gimple> reduction_phis,
b5aeb3bb
IR
3905 int reduc_index, bool double_reduc,
3906 slp_tree slp_node)
ebfd146a
IR
3907{
3908 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3909 stmt_vec_info prev_phi_info;
3910 tree vectype;
ef4bddc2 3911 machine_mode mode;
ebfd146a 3912 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
06066f92 3913 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
ebfd146a
IR
3914 basic_block exit_bb;
3915 tree scalar_dest;
3916 tree scalar_type;
3917 gimple new_phi = NULL, phi;
3918 gimple_stmt_iterator exit_gsi;
3919 tree vec_dest;
b5aeb3bb 3920 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
ebfd146a 3921 gimple epilog_stmt = NULL;
b5aeb3bb 3922 enum tree_code code = gimple_assign_rhs_code (stmt);
ebfd146a 3923 gimple exit_phi;
729ff76e 3924 tree bitsize;
b5aeb3bb
IR
3925 tree adjustment_def = NULL;
3926 tree vec_initial_def = NULL;
3927 tree reduction_op, expr, def;
3928 tree orig_name, scalar_result;
61869cc9
IR
3929 imm_use_iterator imm_iter, phi_imm_iter;
3930 use_operand_p use_p, phi_use_p;
b5aeb3bb 3931 gimple use_stmt, orig_stmt, reduction_phi = NULL;
ebfd146a 3932 bool nested_in_vect_loop = false;
ef062b13
TS
3933 auto_vec<gimple> new_phis;
3934 auto_vec<gimple> inner_phis;
ebfd146a
IR
3935 enum vect_def_type dt = vect_unknown_def_type;
3936 int j, i;
ef062b13 3937 auto_vec<tree> scalar_results;
74500b3e 3938 unsigned int group_size = 1, k, ratio;
ef062b13
TS
3939 auto_vec<tree> vec_initial_defs;
3940 auto_vec<gimple> phis;
b010117a
IR
3941 bool slp_reduc = false;
3942 tree new_phi_result;
64d94959 3943 gimple inner_phi = NULL;
b5aeb3bb
IR
3944
3945 if (slp_node)
9771b263 3946 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
b8698a0f 3947
ebfd146a
IR
3948 if (nested_in_vect_loop_p (loop, stmt))
3949 {
06066f92 3950 outer_loop = loop;
ebfd146a
IR
3951 loop = loop->inner;
3952 nested_in_vect_loop = true;
b5aeb3bb 3953 gcc_assert (!slp_node);
ebfd146a 3954 }
b8698a0f 3955
ebfd146a
IR
3956 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3957 {
3958 case GIMPLE_SINGLE_RHS:
b8698a0f 3959 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
f471fe72 3960 == ternary_op);
7c5222ff 3961 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
ebfd146a
IR
3962 break;
3963 case GIMPLE_UNARY_RHS:
3964 reduction_op = gimple_assign_rhs1 (stmt);
3965 break;
3966 case GIMPLE_BINARY_RHS:
b8698a0f 3967 reduction_op = reduc_index ?
7c5222ff 3968 gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
ebfd146a 3969 break;
f471fe72
RG
3970 case GIMPLE_TERNARY_RHS:
3971 reduction_op = gimple_op (stmt, reduc_index + 1);
3972 break;
ebfd146a
IR
3973 default:
3974 gcc_unreachable ();
3975 }
3976
3977 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3978 gcc_assert (vectype);
3979 mode = TYPE_MODE (vectype);
3980
b5aeb3bb
IR
3981 /* 1. Create the reduction def-use cycle:
3982 Set the arguments of REDUCTION_PHIS, i.e., transform
b8698a0f 3983
b5aeb3bb
IR
3984 loop:
3985 vec_def = phi <null, null> # REDUCTION_PHI
3986 VECT_DEF = vector_stmt # vectorized form of STMT
3987 ...
ebfd146a 3988
b5aeb3bb
IR
3989 into:
3990
3991 loop:
3992 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3993 VECT_DEF = vector_stmt # vectorized form of STMT
3994 ...
3995
3996 (in case of SLP, do it for all the phis). */
3997
3998 /* Get the loop-entry arguments. */
3999 if (slp_node)
d092494c
IR
4000 vect_get_vec_defs (reduction_op, NULL_TREE, stmt, &vec_initial_defs,
4001 NULL, slp_node, reduc_index);
b5aeb3bb
IR
4002 else
4003 {
9771b263 4004 vec_initial_defs.create (1);
b5aeb3bb
IR
4005 /* For the case of reduction, vect_get_vec_def_for_operand returns
4006 the scalar def before the loop, that defines the initial value
4007 of the reduction variable. */
4008 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
4009 &adjustment_def);
9771b263 4010 vec_initial_defs.quick_push (vec_initial_def);
b5aeb3bb
IR
4011 }
4012
4013 /* Set phi nodes arguments. */
9771b263 4014 FOR_EACH_VEC_ELT (reduction_phis, i, phi)
ebfd146a 4015 {
730e78b0
RB
4016 tree vec_init_def, def;
4017 gimple_seq stmts;
4018 vec_init_def = force_gimple_operand (vec_initial_defs[i], &stmts,
4019 true, NULL_TREE);
4020 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4021 def = vect_defs[i];
b5aeb3bb
IR
4022 for (j = 0; j < ncopies; j++)
4023 {
4024 /* Set the loop-entry arg of the reduction-phi. */
538dd0b7
DM
4025 add_phi_arg (as_a <gphi *> (phi), vec_init_def,
4026 loop_preheader_edge (loop), UNKNOWN_LOCATION);
ebfd146a 4027
b5aeb3bb
IR
4028 /* Set the loop-latch arg for the reduction-phi. */
4029 if (j > 0)
4030 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
ebfd146a 4031
538dd0b7
DM
4032 add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop),
4033 UNKNOWN_LOCATION);
ebfd146a 4034
73fbfcad 4035 if (dump_enabled_p ())
b5aeb3bb 4036 {
78c60e3d
SS
4037 dump_printf_loc (MSG_NOTE, vect_location,
4038 "transform reduction: created def-use cycle: ");
4039 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4040 dump_printf (MSG_NOTE, "\n");
4041 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
e645e942 4042 dump_printf (MSG_NOTE, "\n");
b5aeb3bb
IR
4043 }
4044
4045 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
4046 }
ebfd146a
IR
4047 }
4048
b5aeb3bb
IR
4049 /* 2. Create epilog code.
4050 The reduction epilog code operates across the elements of the vector
4051 of partial results computed by the vectorized loop.
4052 The reduction epilog code consists of:
ebfd146a 4053
b5aeb3bb
IR
4054 step 1: compute the scalar result in a vector (v_out2)
4055 step 2: extract the scalar result (s_out3) from the vector (v_out2)
4056 step 3: adjust the scalar result (s_out3) if needed.
4057
4058 Step 1 can be accomplished using one the following three schemes:
ebfd146a
IR
4059 (scheme 1) using reduc_code, if available.
4060 (scheme 2) using whole-vector shifts, if available.
b8698a0f 4061 (scheme 3) using a scalar loop. In this case steps 1+2 above are
ebfd146a 4062 combined.
b8698a0f 4063
ebfd146a
IR
4064 The overall epilog code looks like this:
4065
4066 s_out0 = phi <s_loop> # original EXIT_PHI
4067 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4068 v_out2 = reduce <v_out1> # step 1
4069 s_out3 = extract_field <v_out2, 0> # step 2
4070 s_out4 = adjust_result <s_out3> # step 3
4071
4072 (step 3 is optional, and steps 1 and 2 may be combined).
b5aeb3bb 4073 Lastly, the uses of s_out0 are replaced by s_out4. */
ebfd146a 4074
ebfd146a 4075
b5aeb3bb
IR
4076 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
4077 v_out1 = phi <VECT_DEF>
4078 Store them in NEW_PHIS. */
ebfd146a
IR
4079
4080 exit_bb = single_exit (loop)->dest;
ebfd146a 4081 prev_phi_info = NULL;
9771b263
DN
4082 new_phis.create (vect_defs.length ());
4083 FOR_EACH_VEC_ELT (vect_defs, i, def)
ebfd146a 4084 {
b5aeb3bb
IR
4085 for (j = 0; j < ncopies; j++)
4086 {
b731b390 4087 tree new_def = copy_ssa_name (def);
070ecdfd 4088 phi = create_phi_node (new_def, exit_bb);
b5aeb3bb
IR
4089 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
4090 if (j == 0)
9771b263 4091 new_phis.quick_push (phi);
b5aeb3bb
IR
4092 else
4093 {
4094 def = vect_get_vec_def_for_stmt_copy (dt, def);
4095 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
4096 }
4097
4098 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4099 prev_phi_info = vinfo_for_stmt (phi);
4100 }
ebfd146a 4101 }
7c5222ff 4102
61869cc9 4103 /* The epilogue is created for the outer-loop, i.e., for the loop being
64d94959 4104 vectorized. Create exit phis for the outer loop. */
61869cc9
IR
4105 if (double_reduc)
4106 {
4107 loop = outer_loop;
4108 exit_bb = single_exit (loop)->dest;
9771b263
DN
4109 inner_phis.create (vect_defs.length ());
4110 FOR_EACH_VEC_ELT (new_phis, i, phi)
64d94959 4111 {
b731b390 4112 tree new_result = copy_ssa_name (PHI_RESULT (phi));
538dd0b7 4113 gphi *outer_phi = create_phi_node (new_result, exit_bb);
64d94959
IR
4114 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4115 PHI_RESULT (phi));
4116 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4117 loop_vinfo, NULL));
9771b263
DN
4118 inner_phis.quick_push (phi);
4119 new_phis[i] = outer_phi;
64d94959
IR
4120 prev_phi_info = vinfo_for_stmt (outer_phi);
4121 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4122 {
4123 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
b731b390 4124 new_result = copy_ssa_name (PHI_RESULT (phi));
070ecdfd 4125 outer_phi = create_phi_node (new_result, exit_bb);
64d94959
IR
4126 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4127 PHI_RESULT (phi));
4128 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi,
4129 loop_vinfo, NULL));
4130 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi;
4131 prev_phi_info = vinfo_for_stmt (outer_phi);
4132 }
4133 }
61869cc9
IR
4134 }
4135
ebfd146a
IR
4136 exit_gsi = gsi_after_labels (exit_bb);
4137
b8698a0f 4138 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
ebfd146a
IR
4139 (i.e. when reduc_code is not available) and in the final adjustment
4140 code (if needed). Also get the original scalar reduction variable as
b8698a0f
L
4141 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
4142 represents a reduction pattern), the tree-code and scalar-def are
4143 taken from the original stmt that the pattern-stmt (STMT) replaces.
ebfd146a 4144 Otherwise (it is a regular reduction) - the tree-code and scalar-def
b8698a0f 4145 are taken from STMT. */
ebfd146a
IR
4146
4147 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4148 if (!orig_stmt)
4149 {
4150 /* Regular reduction */
4151 orig_stmt = stmt;
4152 }
4153 else
4154 {
4155 /* Reduction pattern */
4156 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
4157 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
4158 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
4159 }
7c5222ff 4160
ebfd146a 4161 code = gimple_assign_rhs_code (orig_stmt);
b5aeb3bb
IR
4162 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4163 partial results are added and not subtracted. */
4164 if (code == MINUS_EXPR)
4165 code = PLUS_EXPR;
4166
ebfd146a
IR
4167 scalar_dest = gimple_assign_lhs (orig_stmt);
4168 scalar_type = TREE_TYPE (scalar_dest);
9771b263 4169 scalar_results.create (group_size);
ebfd146a
IR
4170 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4171 bitsize = TYPE_SIZE (scalar_type);
ebfd146a 4172
ebfd146a
IR
4173 /* In case this is a reduction in an inner-loop while vectorizing an outer
4174 loop - we don't need to extract a single scalar result at the end of the
06066f92 4175 inner-loop (unless it is double reduction, i.e., the use of reduction is
ff802fa1 4176 outside the outer-loop). The final vector of partial results will be used
06066f92
IR
4177 in the vectorized outer-loop, or reduced to a scalar result at the end of
4178 the outer-loop. */
4179 if (nested_in_vect_loop && !double_reduc)
ebfd146a
IR
4180 goto vect_finalize_reduction;
4181
b010117a
IR
4182 /* SLP reduction without reduction chain, e.g.,
4183 # a1 = phi <a2, a0>
4184 # b1 = phi <b2, b0>
4185 a2 = operation (a1)
4186 b2 = operation (b1) */
4187 slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)));
4188
4189 /* In case of reduction chain, e.g.,
4190 # a1 = phi <a3, a0>
4191 a2 = operation (a1)
4192 a3 = operation (a2),
4193
4194 we may end up with more than one vector result. Here we reduce them to
4195 one vector. */
4196 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4197 {
9771b263 4198 tree first_vect = PHI_RESULT (new_phis[0]);
b010117a 4199 tree tmp;
538dd0b7 4200 gassign *new_vec_stmt = NULL;
b010117a
IR
4201
4202 vec_dest = vect_create_destination_var (scalar_dest, vectype);
9771b263 4203 for (k = 1; k < new_phis.length (); k++)
b010117a 4204 {
9771b263 4205 gimple next_phi = new_phis[k];
b010117a 4206 tree second_vect = PHI_RESULT (next_phi);
b010117a
IR
4207
4208 tmp = build2 (code, vectype, first_vect, second_vect);
4209 new_vec_stmt = gimple_build_assign (vec_dest, tmp);
4210 first_vect = make_ssa_name (vec_dest, new_vec_stmt);
4211 gimple_assign_set_lhs (new_vec_stmt, first_vect);
4212 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4213 }
4214
4215 new_phi_result = first_vect;
2aacf3c3
IR
4216 if (new_vec_stmt)
4217 {
9771b263
DN
4218 new_phis.truncate (0);
4219 new_phis.safe_push (new_vec_stmt);
2aacf3c3 4220 }
b010117a
IR
4221 }
4222 else
9771b263 4223 new_phi_result = PHI_RESULT (new_phis[0]);
b010117a 4224
ebfd146a 4225 /* 2.3 Create the reduction code, using one of the three schemes described
b5aeb3bb
IR
4226 above. In SLP we simply need to extract all the elements from the
4227 vector (without reducing them), so we use scalar shifts. */
b010117a 4228 if (reduc_code != ERROR_MARK && !slp_reduc)
ebfd146a
IR
4229 {
4230 tree tmp;
99f76d9b 4231 tree vec_elem_type;
ebfd146a
IR
4232
4233 /*** Case 1: Create:
b5aeb3bb 4234 v_out2 = reduc_expr <v_out1> */
ebfd146a 4235
73fbfcad 4236 if (dump_enabled_p ())
78c60e3d 4237 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4238 "Reduce using direct vector reduction.\n");
ebfd146a 4239
99f76d9b
AL
4240 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4241 if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4242 {
4243 tree tmp_dest =
4244 vect_create_destination_var (scalar_dest, vec_elem_type);
4245 tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
4246 epilog_stmt = gimple_build_assign (tmp_dest, tmp);
4247 new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4248 gimple_assign_set_lhs (epilog_stmt, new_temp);
4249 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4250
4251 tmp = build1 (NOP_EXPR, scalar_type, new_temp);
4252 }
4253 else
4254 tmp = build1 (reduc_code, scalar_type, new_phi_result);
4255 epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
4256 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
ebfd146a
IR
4257 gimple_assign_set_lhs (epilog_stmt, new_temp);
4258 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
99f76d9b 4259 scalar_results.safe_push (new_temp);
ebfd146a
IR
4260 }
4261 else
4262 {
cf7aa6a3 4263 bool reduce_with_shift = have_whole_vector_shift (mode);
ae7e9ddd
RS
4264 int element_bitsize = tree_to_uhwi (bitsize);
4265 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
ebfd146a
IR
4266 tree vec_temp;
4267
ebfd146a 4268 /* Regardless of whether we have a whole vector shift, if we're
b5aeb3bb
IR
4269 emulating the operation via tree-vect-generic, we don't want
4270 to use it. Only the first round of the reduction is likely
4271 to still be profitable via emulation. */
ebfd146a 4272 /* ??? It might be better to emit a reduction tree code here, so that
b5aeb3bb 4273 tree-vect-generic can expand the first round via bit tricks. */
ebfd146a 4274 if (!VECTOR_MODE_P (mode))
cf7aa6a3 4275 reduce_with_shift = false;
ebfd146a 4276 else
ebfd146a 4277 {
b5aeb3bb 4278 optab optab = optab_for_tree_code (code, vectype, optab_default);
947131ba 4279 if (optab_handler (optab, mode) == CODE_FOR_nothing)
cf7aa6a3 4280 reduce_with_shift = false;
b5aeb3bb 4281 }
ebfd146a 4282
cf7aa6a3 4283 if (reduce_with_shift && !slp_reduc)
b5aeb3bb 4284 {
cf7aa6a3
AL
4285 int nelements = vec_size_in_bits / element_bitsize;
4286 unsigned char *sel = XALLOCAVEC (unsigned char, nelements);
4287
4288 int elt_offset;
4289
4290 tree zero_vec = build_zero_cst (vectype);
b5aeb3bb 4291 /*** Case 2: Create:
cf7aa6a3 4292 for (offset = nelements/2; offset >= 1; offset/=2)
b5aeb3bb
IR
4293 {
4294 Create: va' = vec_shift <va, offset>
4295 Create: va = vop <va, va'>
4296 } */
ebfd146a 4297
18d11814
AL
4298 tree rhs;
4299
73fbfcad 4300 if (dump_enabled_p ())
78c60e3d 4301 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4302 "Reduce using vector shifts\n");
b5aeb3bb
IR
4303
4304 vec_dest = vect_create_destination_var (scalar_dest, vectype);
b010117a 4305 new_temp = new_phi_result;
cf7aa6a3
AL
4306 for (elt_offset = nelements / 2;
4307 elt_offset >= 1;
4308 elt_offset /= 2)
b5aeb3bb 4309 {
cf7aa6a3
AL
4310 calc_vec_perm_mask_for_shift (mode, elt_offset, sel);
4311 tree mask = vect_gen_perm_mask_any (vectype, sel);
0d0e4a03
JJ
4312 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
4313 new_temp, zero_vec, mask);
b5aeb3bb
IR
4314 new_name = make_ssa_name (vec_dest, epilog_stmt);
4315 gimple_assign_set_lhs (epilog_stmt, new_name);
4316 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4317
0d0e4a03
JJ
4318 epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
4319 new_temp);
b5aeb3bb
IR
4320 new_temp = make_ssa_name (vec_dest, epilog_stmt);
4321 gimple_assign_set_lhs (epilog_stmt, new_temp);
4322 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4323 }
ebfd146a 4324
18d11814
AL
4325 /* 2.4 Extract the final scalar result. Create:
4326 s_out3 = extract_field <v_out2, bitpos> */
4327
4328 if (dump_enabled_p ())
4329 dump_printf_loc (MSG_NOTE, vect_location,
4330 "extract scalar result\n");
4331
4332 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp,
4333 bitsize, bitsize_zero_node);
4334 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4335 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4336 gimple_assign_set_lhs (epilog_stmt, new_temp);
4337 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4338 scalar_results.safe_push (new_temp);
b5aeb3bb 4339 }
ebfd146a
IR
4340 else
4341 {
b5aeb3bb
IR
4342 /*** Case 3: Create:
4343 s = extract_field <v_out2, 0>
4344 for (offset = element_size;
4345 offset < vector_size;
4346 offset += element_size;)
4347 {
4348 Create: s' = extract_field <v_out2, offset>
4349 Create: s = op <s, s'> // For non SLP cases
4350 } */
ebfd146a 4351
73fbfcad 4352 if (dump_enabled_p ())
78c60e3d 4353 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4354 "Reduce using scalar code.\n");
ebfd146a 4355
ae7e9ddd 4356 vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
9771b263 4357 FOR_EACH_VEC_ELT (new_phis, i, new_phi)
b5aeb3bb 4358 {
cf7aa6a3 4359 int bit_offset;
2aacf3c3
IR
4360 if (gimple_code (new_phi) == GIMPLE_PHI)
4361 vec_temp = PHI_RESULT (new_phi);
4362 else
4363 vec_temp = gimple_assign_lhs (new_phi);
cf7aa6a3 4364 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
b5aeb3bb
IR
4365 bitsize_zero_node);
4366 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4367 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4368 gimple_assign_set_lhs (epilog_stmt, new_temp);
4369 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4370
4371 /* In SLP we don't need to apply reduction operation, so we just
4372 collect s' values in SCALAR_RESULTS. */
b010117a 4373 if (slp_reduc)
9771b263 4374 scalar_results.safe_push (new_temp);
b5aeb3bb
IR
4375
4376 for (bit_offset = element_bitsize;
4377 bit_offset < vec_size_in_bits;
4378 bit_offset += element_bitsize)
4379 {
4380 tree bitpos = bitsize_int (bit_offset);
4381 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
4382 bitsize, bitpos);
4383
4384 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
4385 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
4386 gimple_assign_set_lhs (epilog_stmt, new_name);
4387 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4388
b010117a 4389 if (slp_reduc)
b5aeb3bb
IR
4390 {
4391 /* In SLP we don't need to apply reduction operation, so
4392 we just collect s' values in SCALAR_RESULTS. */
4393 new_temp = new_name;
9771b263 4394 scalar_results.safe_push (new_name);
b5aeb3bb
IR
4395 }
4396 else
4397 {
0d0e4a03
JJ
4398 epilog_stmt = gimple_build_assign (new_scalar_dest, code,
4399 new_name, new_temp);
b5aeb3bb
IR
4400 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4401 gimple_assign_set_lhs (epilog_stmt, new_temp);
4402 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4403 }
4404 }
4405 }
4406
4407 /* The only case where we need to reduce scalar results in SLP, is
ff802fa1 4408 unrolling. If the size of SCALAR_RESULTS is greater than
b5aeb3bb
IR
4409 GROUP_SIZE, we reduce them combining elements modulo
4410 GROUP_SIZE. */
b010117a 4411 if (slp_reduc)
b5aeb3bb
IR
4412 {
4413 tree res, first_res, new_res;
4414 gimple new_stmt;
4415
4416 /* Reduce multiple scalar results in case of SLP unrolling. */
9771b263 4417 for (j = group_size; scalar_results.iterate (j, &res);
b5aeb3bb
IR
4418 j++)
4419 {
9771b263 4420 first_res = scalar_results[j % group_size];
0d0e4a03
JJ
4421 new_stmt = gimple_build_assign (new_scalar_dest, code,
4422 first_res, res);
b5aeb3bb
IR
4423 new_res = make_ssa_name (new_scalar_dest, new_stmt);
4424 gimple_assign_set_lhs (new_stmt, new_res);
4425 gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
9771b263 4426 scalar_results[j % group_size] = new_res;
b5aeb3bb
IR
4427 }
4428 }
4429 else
4430 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
9771b263 4431 scalar_results.safe_push (new_temp);
b5aeb3bb 4432 }
ebfd146a 4433 }
b5aeb3bb 4434
ebfd146a
IR
4435vect_finalize_reduction:
4436
61869cc9
IR
4437 if (double_reduc)
4438 loop = loop->inner;
4439
ebfd146a
IR
4440 /* 2.5 Adjust the final result by the initial value of the reduction
4441 variable. (When such adjustment is not needed, then
4442 'adjustment_def' is zero). For example, if code is PLUS we create:
4443 new_temp = loop_exit_def + adjustment_def */
4444
4445 if (adjustment_def)
4446 {
b010117a 4447 gcc_assert (!slp_reduc);
ebfd146a
IR
4448 if (nested_in_vect_loop)
4449 {
9771b263 4450 new_phi = new_phis[0];
ebfd146a
IR
4451 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
4452 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
4453 new_dest = vect_create_destination_var (scalar_dest, vectype);
4454 }
4455 else
4456 {
9771b263 4457 new_temp = scalar_results[0];
ebfd146a
IR
4458 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
4459 expr = build2 (code, scalar_type, new_temp, adjustment_def);
4460 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
4461 }
7c5222ff 4462
ebfd146a
IR
4463 epilog_stmt = gimple_build_assign (new_dest, expr);
4464 new_temp = make_ssa_name (new_dest, epilog_stmt);
4465 gimple_assign_set_lhs (epilog_stmt, new_temp);
ebfd146a 4466 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
b5aeb3bb
IR
4467 if (nested_in_vect_loop)
4468 {
4469 set_vinfo_for_stmt (epilog_stmt,
4470 new_stmt_vec_info (epilog_stmt, loop_vinfo,
4471 NULL));
4472 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
4473 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
4474
4475 if (!double_reduc)
9771b263 4476 scalar_results.quick_push (new_temp);
b5aeb3bb 4477 else
9771b263 4478 scalar_results[0] = new_temp;
b5aeb3bb
IR
4479 }
4480 else
9771b263 4481 scalar_results[0] = new_temp;
b5aeb3bb 4482
9771b263 4483 new_phis[0] = epilog_stmt;
ebfd146a
IR
4484 }
4485
ff802fa1 4486 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
b5aeb3bb
IR
4487 phis with new adjusted scalar results, i.e., replace use <s_out0>
4488 with use <s_out4>.
ebfd146a 4489
b5aeb3bb
IR
4490 Transform:
4491 loop_exit:
4492 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4493 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4494 v_out2 = reduce <v_out1>
4495 s_out3 = extract_field <v_out2, 0>
4496 s_out4 = adjust_result <s_out3>
4497 use <s_out0>
4498 use <s_out0>
4499
4500 into:
ebfd146a 4501
b5aeb3bb
IR
4502 loop_exit:
4503 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4504 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4505 v_out2 = reduce <v_out1>
4506 s_out3 = extract_field <v_out2, 0>
4507 s_out4 = adjust_result <s_out3>
74500b3e
IR
4508 use <s_out4>
4509 use <s_out4> */
b5aeb3bb 4510
b010117a
IR
4511
4512 /* In SLP reduction chain we reduce vector results into one vector if
4513 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of
4514 the last stmt in the reduction chain, since we are looking for the loop
4515 exit phi node. */
4516 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4517 {
9771b263
DN
4518 scalar_dest = gimple_assign_lhs (
4519 SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
b010117a
IR
4520 group_size = 1;
4521 }
4522
b5aeb3bb 4523 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
ff802fa1
IR
4524 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
4525 need to match SCALAR_RESULTS with corresponding statements. The first
b5aeb3bb
IR
4526 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
4527 the first vector stmt, etc.
4528 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
9771b263 4529 if (group_size > new_phis.length ())
74500b3e 4530 {
9771b263
DN
4531 ratio = group_size / new_phis.length ();
4532 gcc_assert (!(group_size % new_phis.length ()));
74500b3e
IR
4533 }
4534 else
4535 ratio = 1;
b5aeb3bb
IR
4536
4537 for (k = 0; k < group_size; k++)
ebfd146a 4538 {
b5aeb3bb
IR
4539 if (k % ratio == 0)
4540 {
9771b263
DN
4541 epilog_stmt = new_phis[k / ratio];
4542 reduction_phi = reduction_phis[k / ratio];
64d94959 4543 if (double_reduc)
9771b263 4544 inner_phi = inner_phis[k / ratio];
b5aeb3bb 4545 }
06066f92 4546
b010117a 4547 if (slp_reduc)
b5aeb3bb 4548 {
9771b263 4549 gimple current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k];
ebfd146a 4550
b5aeb3bb
IR
4551 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
4552 /* SLP statements can't participate in patterns. */
4553 gcc_assert (!orig_stmt);
4554 scalar_dest = gimple_assign_lhs (current_stmt);
4555 }
4556
9771b263 4557 phis.create (3);
b5aeb3bb 4558 /* Find the loop-closed-use at the loop exit of the original scalar
ff802fa1 4559 result. (The reduction result is expected to have two immediate uses -
b5aeb3bb
IR
4560 one at the latch block, and one at the loop exit). */
4561 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
46e950db
RB
4562 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
4563 && !is_gimple_debug (USE_STMT (use_p)))
9771b263 4564 phis.safe_push (USE_STMT (use_p));
b5aeb3bb 4565
7a764c60
RB
4566 /* While we expect to have found an exit_phi because of loop-closed-ssa
4567 form we can end up without one if the scalar cycle is dead. */
b5aeb3bb 4568
9771b263 4569 FOR_EACH_VEC_ELT (phis, i, exit_phi)
b5aeb3bb
IR
4570 {
4571 if (outer_loop)
06066f92 4572 {
b5aeb3bb 4573 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
538dd0b7 4574 gphi *vect_phi;
b5aeb3bb
IR
4575
4576 /* FORNOW. Currently not supporting the case that an inner-loop
4577 reduction is not used in the outer-loop (but only outside the
4578 outer-loop), unless it is double reduction. */
4579 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
4580 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
4581 || double_reduc);
4582
4583 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
4584 if (!double_reduc
4585 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
4586 != vect_double_reduction_def)
06066f92
IR
4587 continue;
4588
b5aeb3bb 4589 /* Handle double reduction:
06066f92 4590
b5aeb3bb
IR
4591 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
4592 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
4593 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
4594 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
06066f92 4595
b5aeb3bb
IR
4596 At that point the regular reduction (stmt2 and stmt3) is
4597 already vectorized, as well as the exit phi node, stmt4.
4598 Here we vectorize the phi node of double reduction, stmt1, and
4599 update all relevant statements. */
06066f92 4600
b5aeb3bb
IR
4601 /* Go through all the uses of s2 to find double reduction phi
4602 node, i.e., stmt1 above. */
4603 orig_name = PHI_RESULT (exit_phi);
4604 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
06066f92 4605 {
1fa0c180 4606 stmt_vec_info use_stmt_vinfo;
b5aeb3bb
IR
4607 stmt_vec_info new_phi_vinfo;
4608 tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
4609 basic_block bb = gimple_bb (use_stmt);
4610 gimple use;
4611
4612 /* Check that USE_STMT is really double reduction phi
4613 node. */
4614 if (gimple_code (use_stmt) != GIMPLE_PHI
4615 || gimple_phi_num_args (use_stmt) != 2
b5aeb3bb
IR
4616 || bb->loop_father != outer_loop)
4617 continue;
1fa0c180
RG
4618 use_stmt_vinfo = vinfo_for_stmt (use_stmt);
4619 if (!use_stmt_vinfo
4620 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
4621 != vect_double_reduction_def)
4622 continue;
b5aeb3bb
IR
4623
4624 /* Create vector phi node for double reduction:
4625 vs1 = phi <vs0, vs2>
4626 vs1 was created previously in this function by a call to
4627 vect_get_vec_def_for_operand and is stored in
4628 vec_initial_def;
64d94959 4629 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
b5aeb3bb
IR
4630 vs0 is created here. */
4631
4632 /* Create vector phi node. */
4633 vect_phi = create_phi_node (vec_initial_def, bb);
4634 new_phi_vinfo = new_stmt_vec_info (vect_phi,
4635 loop_vec_info_for_loop (outer_loop), NULL);
4636 set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
4637
4638 /* Create vs0 - initial def of the double reduction phi. */
4639 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
4640 loop_preheader_edge (outer_loop));
4641 init_def = get_initial_def_for_reduction (stmt,
4642 preheader_arg, NULL);
4643 vect_phi_init = vect_init_vector (use_stmt, init_def,
4644 vectype, NULL);
4645
4646 /* Update phi node arguments with vs0 and vs2. */
4647 add_phi_arg (vect_phi, vect_phi_init,
4648 loop_preheader_edge (outer_loop),
9e227d60 4649 UNKNOWN_LOCATION);
64d94959 4650 add_phi_arg (vect_phi, PHI_RESULT (inner_phi),
9e227d60 4651 loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
73fbfcad 4652 if (dump_enabled_p ())
b5aeb3bb 4653 {
78c60e3d
SS
4654 dump_printf_loc (MSG_NOTE, vect_location,
4655 "created double reduction phi node: ");
4656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
e645e942 4657 dump_printf (MSG_NOTE, "\n");
b5aeb3bb
IR
4658 }
4659
4660 vect_phi_res = PHI_RESULT (vect_phi);
4661
4662 /* Replace the use, i.e., set the correct vs1 in the regular
ff802fa1 4663 reduction phi node. FORNOW, NCOPIES is always 1, so the
b5aeb3bb
IR
4664 loop is redundant. */
4665 use = reduction_phi;
4666 for (j = 0; j < ncopies; j++)
4667 {
4668 edge pr_edge = loop_preheader_edge (loop);
4669 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
4670 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
4671 }
06066f92
IR
4672 }
4673 }
61869cc9
IR
4674 }
4675
9771b263 4676 phis.release ();
61869cc9
IR
4677 if (nested_in_vect_loop)
4678 {
4679 if (double_reduc)
4680 loop = outer_loop;
4681 else
4682 continue;
4683 }
4684
9771b263 4685 phis.create (3);
61869cc9 4686 /* Find the loop-closed-use at the loop exit of the original scalar
ff802fa1
IR
4687 result. (The reduction result is expected to have two immediate uses,
4688 one at the latch block, and one at the loop exit). For double
61869cc9
IR
4689 reductions we are looking for exit phis of the outer loop. */
4690 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
4691 {
4692 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
46e950db
RB
4693 {
4694 if (!is_gimple_debug (USE_STMT (use_p)))
4695 phis.safe_push (USE_STMT (use_p));
4696 }
61869cc9
IR
4697 else
4698 {
4699 if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
4700 {
4701 tree phi_res = PHI_RESULT (USE_STMT (use_p));
4702
4703 FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
4704 {
4705 if (!flow_bb_inside_loop_p (loop,
46e950db
RB
4706 gimple_bb (USE_STMT (phi_use_p)))
4707 && !is_gimple_debug (USE_STMT (phi_use_p)))
9771b263 4708 phis.safe_push (USE_STMT (phi_use_p));
61869cc9
IR
4709 }
4710 }
4711 }
4712 }
ebfd146a 4713
9771b263 4714 FOR_EACH_VEC_ELT (phis, i, exit_phi)
61869cc9 4715 {
b5aeb3bb
IR
4716 /* Replace the uses: */
4717 orig_name = PHI_RESULT (exit_phi);
9771b263 4718 scalar_result = scalar_results[k];
b5aeb3bb
IR
4719 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
4720 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
4721 SET_USE (use_p, scalar_result);
4722 }
4723
9771b263 4724 phis.release ();
ebfd146a 4725 }
93675444 4726}
ebfd146a
IR
4727
4728
4729/* Function vectorizable_reduction.
4730
4731 Check if STMT performs a reduction operation that can be vectorized.
4732 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
7c5222ff 4733 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
ebfd146a
IR
4734 Return FALSE if not a vectorizable STMT, TRUE otherwise.
4735
b8698a0f 4736 This function also handles reduction idioms (patterns) that have been
ff802fa1 4737 recognized in advance during vect_pattern_recog. In this case, STMT may be
ebfd146a
IR
4738 of this form:
4739 X = pattern_expr (arg0, arg1, ..., X)
4740 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
4741 sequence that had been detected and replaced by the pattern-stmt (STMT).
b8698a0f 4742
ebfd146a
IR
4743 In some cases of reduction patterns, the type of the reduction variable X is
4744 different than the type of the other arguments of STMT.
4745 In such cases, the vectype that is used when transforming STMT into a vector
4746 stmt is different than the vectype that is used to determine the
b8698a0f 4747 vectorization factor, because it consists of a different number of elements
ebfd146a
IR
4748 than the actual number of elements that are being operated upon in parallel.
4749
4750 For example, consider an accumulation of shorts into an int accumulator.
4751 On some targets it's possible to vectorize this pattern operating on 8
4752 shorts at a time (hence, the vectype for purposes of determining the
4753 vectorization factor should be V8HI); on the other hand, the vectype that
4754 is used to create the vector form is actually V4SI (the type of the result).
4755
4756 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
4757 indicates what is the actual level of parallelism (V8HI in the example), so
ff802fa1 4758 that the right vectorization factor would be derived. This vectype
ebfd146a 4759 corresponds to the type of arguments to the reduction stmt, and should *NOT*
ff802fa1 4760 be used to create the vectorized stmt. The right vectype for the vectorized
ebfd146a
IR
4761 stmt is obtained from the type of the result X:
4762 get_vectype_for_scalar_type (TREE_TYPE (X))
4763
4764 This means that, contrary to "regular" reductions (or "regular" stmts in
4765 general), the following equation:
4766 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
4767 does *NOT* necessarily hold for reduction patterns. */
4768
4769bool
4770vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
b5aeb3bb 4771 gimple *vec_stmt, slp_tree slp_node)
ebfd146a
IR
4772{
4773 tree vec_dest;
4774 tree scalar_dest;
4775 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
4776 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
b690cc0f
RG
4777 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
4778 tree vectype_in = NULL_TREE;
ebfd146a
IR
4779 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4780 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
32e8bb8e 4781 enum tree_code code, orig_code, epilog_reduc_code;
ef4bddc2 4782 machine_mode vec_mode;
ebfd146a
IR
4783 int op_type;
4784 optab optab, reduc_optab;
4785 tree new_temp = NULL_TREE;
4786 tree def;
4787 gimple def_stmt;
4788 enum vect_def_type dt;
538dd0b7 4789 gphi *new_phi = NULL;
ebfd146a
IR
4790 tree scalar_type;
4791 bool is_simple_use;
4792 gimple orig_stmt;
4793 stmt_vec_info orig_stmt_info;
4794 tree expr = NULL_TREE;
4795 int i;
b690cc0f 4796 int ncopies;
ebfd146a
IR
4797 int epilog_copies;
4798 stmt_vec_info prev_stmt_info, prev_phi_info;
ebfd146a 4799 bool single_defuse_cycle = false;
4bbe8262 4800 tree reduc_def = NULL_TREE;
ebfd146a
IR
4801 gimple new_stmt = NULL;
4802 int j;
4803 tree ops[3];
7c5222ff
IR
4804 bool nested_cycle = false, found_nested_cycle_def = false;
4805 gimple reduc_def_stmt = NULL;
4806 /* The default is that the reduction variable is the last in statement. */
4807 int reduc_index = 2;
06066f92
IR
4808 bool double_reduc = false, dummy;
4809 basic_block def_bb;
2f3e235b 4810 struct loop * def_stmt_loop, *outer_loop = NULL;
06066f92 4811 tree def_arg;
2f3e235b 4812 gimple def_arg_stmt;
ef062b13
TS
4813 auto_vec<tree> vec_oprnds0;
4814 auto_vec<tree> vec_oprnds1;
4815 auto_vec<tree> vect_defs;
4816 auto_vec<gimple> phis;
b5aeb3bb 4817 int vec_num;
1684ca6b 4818 tree def0, def1, tem, op0, op1 = NULL_TREE;
ebfd146a 4819
b010117a
IR
4820 /* In case of reduction chain we switch to the first stmt in the chain, but
4821 we don't update STMT_INFO, since only the last stmt is marked as reduction
4822 and has reduction properties. */
4823 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
4824 stmt = GROUP_FIRST_ELEMENT (stmt_info);
4825
ebfd146a 4826 if (nested_in_vect_loop_p (loop, stmt))
7c5222ff 4827 {
2f3e235b 4828 outer_loop = loop;
7c5222ff
IR
4829 loop = loop->inner;
4830 nested_cycle = true;
4831 }
ebfd146a 4832
ebfd146a 4833 /* 1. Is vectorizable reduction? */
b010117a
IR
4834 /* Not supportable if the reduction variable is used in the loop, unless
4835 it's a reduction chain. */
4836 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
4837 && !GROUP_FIRST_ELEMENT (stmt_info))
ebfd146a
IR
4838 return false;
4839
4840 /* Reductions that are not used even in an enclosing outer-loop,
4841 are expected to be "live" (used out of the loop). */
8644a673 4842 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
ebfd146a
IR
4843 && !STMT_VINFO_LIVE_P (stmt_info))
4844 return false;
4845
4846 /* Make sure it was already recognized as a reduction computation. */
7c5222ff
IR
4847 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4848 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
ebfd146a
IR
4849 return false;
4850
b8698a0f 4851 /* 2. Has this been recognized as a reduction pattern?
ebfd146a
IR
4852
4853 Check if STMT represents a pattern that has been recognized
4854 in earlier analysis stages. For stmts that represent a pattern,
4855 the STMT_VINFO_RELATED_STMT field records the last stmt in
4856 the original sequence that constitutes the pattern. */
4857
4858 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4859 if (orig_stmt)
4860 {
4861 orig_stmt_info = vinfo_for_stmt (orig_stmt);
ebfd146a
IR
4862 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4863 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4864 }
b8698a0f 4865
ff802fa1 4866 /* 3. Check the operands of the operation. The first operands are defined
ebfd146a
IR
4867 inside the loop body. The last operand is the reduction variable,
4868 which is defined by the loop-header-phi. */
4869
4870 gcc_assert (is_gimple_assign (stmt));
4871
9dc3f7de 4872 /* Flatten RHS. */
ebfd146a
IR
4873 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4874 {
4875 case GIMPLE_SINGLE_RHS:
4876 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4877 if (op_type == ternary_op)
4878 {
4879 tree rhs = gimple_assign_rhs1 (stmt);
4880 ops[0] = TREE_OPERAND (rhs, 0);
4881 ops[1] = TREE_OPERAND (rhs, 1);
4882 ops[2] = TREE_OPERAND (rhs, 2);
4883 code = TREE_CODE (rhs);
4884 }
4885 else
4886 return false;
4887 break;
4888
4889 case GIMPLE_BINARY_RHS:
4890 code = gimple_assign_rhs_code (stmt);
4891 op_type = TREE_CODE_LENGTH (code);
4892 gcc_assert (op_type == binary_op);
4893 ops[0] = gimple_assign_rhs1 (stmt);
4894 ops[1] = gimple_assign_rhs2 (stmt);
4895 break;
4896
f471fe72
RG
4897 case GIMPLE_TERNARY_RHS:
4898 code = gimple_assign_rhs_code (stmt);
4899 op_type = TREE_CODE_LENGTH (code);
4900 gcc_assert (op_type == ternary_op);
4901 ops[0] = gimple_assign_rhs1 (stmt);
4902 ops[1] = gimple_assign_rhs2 (stmt);
4903 ops[2] = gimple_assign_rhs3 (stmt);
4904 break;
4905
ebfd146a
IR
4906 case GIMPLE_UNARY_RHS:
4907 return false;
4908
4909 default:
4910 gcc_unreachable ();
4911 }
4912
f7e531cf
IR
4913 if (code == COND_EXPR && slp_node)
4914 return false;
4915
ebfd146a
IR
4916 scalar_dest = gimple_assign_lhs (stmt);
4917 scalar_type = TREE_TYPE (scalar_dest);
b8698a0f 4918 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
ebfd146a
IR
4919 && !SCALAR_FLOAT_TYPE_P (scalar_type))
4920 return false;
4921
7b7b1813
RG
4922 /* Do not try to vectorize bit-precision reductions. */
4923 if ((TYPE_PRECISION (scalar_type)
4924 != GET_MODE_PRECISION (TYPE_MODE (scalar_type))))
4925 return false;
4926
ebfd146a 4927 /* All uses but the last are expected to be defined in the loop.
ff802fa1 4928 The last use is the reduction variable. In case of nested cycle this
7c5222ff
IR
4929 assumption is not true: we use reduc_index to record the index of the
4930 reduction variable. */
47cc28f5 4931 for (i = 0; i < op_type - 1; i++)
ebfd146a 4932 {
4bbe8262
IR
4933 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4934 if (i == 0 && code == COND_EXPR)
4935 continue;
4936
24ee1384 4937 is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
b690cc0f
RG
4938 &def_stmt, &def, &dt, &tem);
4939 if (!vectype_in)
4940 vectype_in = tem;
ebfd146a 4941 gcc_assert (is_simple_use);
b010117a 4942
8644a673
IR
4943 if (dt != vect_internal_def
4944 && dt != vect_external_def
ebfd146a 4945 && dt != vect_constant_def
7c5222ff 4946 && dt != vect_induction_def
4bbe8262 4947 && !(dt == vect_nested_cycle && nested_cycle))
ebfd146a 4948 return false;
7c5222ff
IR
4949
4950 if (dt == vect_nested_cycle)
4951 {
4952 found_nested_cycle_def = true;
4953 reduc_def_stmt = def_stmt;
4954 reduc_index = i;
4955 }
ebfd146a
IR
4956 }
4957
24ee1384
IR
4958 is_simple_use = vect_is_simple_use_1 (ops[i], stmt, loop_vinfo, NULL,
4959 &def_stmt, &def, &dt, &tem);
43427f3c
RG
4960 if (!vectype_in)
4961 vectype_in = tem;
ebfd146a 4962 gcc_assert (is_simple_use);
47cc28f5
JJ
4963 if (!(dt == vect_reduction_def
4964 || dt == vect_nested_cycle
4965 || ((dt == vect_internal_def || dt == vect_external_def
4966 || dt == vect_constant_def || dt == vect_induction_def)
4967 && nested_cycle && found_nested_cycle_def)))
4968 {
4969 /* For pattern recognized stmts, orig_stmt might be a reduction,
4970 but some helper statements for the pattern might not, or
4971 might be COND_EXPRs with reduction uses in the condition. */
4972 gcc_assert (orig_stmt);
4973 return false;
4974 }
7c5222ff
IR
4975 if (!found_nested_cycle_def)
4976 reduc_def_stmt = def_stmt;
4977
4978 gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
b8698a0f
L
4979 if (orig_stmt)
4980 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4981 reduc_def_stmt,
4982 !nested_cycle,
06066f92 4983 &dummy));
ebfd146a 4984 else
b010117a
IR
4985 {
4986 gimple tmp = vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4987 !nested_cycle, &dummy);
4988 /* We changed STMT to be the first stmt in reduction chain, hence we
4989 check that in this case the first element in the chain is STMT. */
4990 gcc_assert (stmt == tmp
4991 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt);
4992 }
b8698a0f 4993
7c5222ff 4994 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
ebfd146a
IR
4995 return false;
4996
437f4a00 4997 if (slp_node || PURE_SLP_STMT (stmt_info))
b5aeb3bb
IR
4998 ncopies = 1;
4999 else
5000 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5001 / TYPE_VECTOR_SUBPARTS (vectype_in));
b690cc0f 5002
b690cc0f
RG
5003 gcc_assert (ncopies >= 1);
5004
5005 vec_mode = TYPE_MODE (vectype_in);
ebfd146a 5006
4bbe8262 5007 if (code == COND_EXPR)
ebfd146a 5008 {
f7e531cf 5009 if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0, NULL))
4bbe8262 5010 {
73fbfcad 5011 if (dump_enabled_p ())
78c60e3d 5012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5013 "unsupported condition in reduction\n");
4bbe8262
IR
5014
5015 return false;
5016 }
ebfd146a 5017 }
4bbe8262 5018 else
ebfd146a 5019 {
4bbe8262 5020 /* 4. Supportable by target? */
ebfd146a 5021
ff784829
JJ
5022 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
5023 || code == LROTATE_EXPR || code == RROTATE_EXPR)
5024 {
5025 /* Shifts and rotates are only supported by vectorizable_shifts,
5026 not vectorizable_reduction. */
5027 if (dump_enabled_p ())
5028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5029 "unsupported shift or rotation.\n");
ff784829
JJ
5030 return false;
5031 }
5032
4bbe8262 5033 /* 4.1. check support for the operation in the loop */
b690cc0f 5034 optab = optab_for_tree_code (code, vectype_in, optab_default);
4bbe8262
IR
5035 if (!optab)
5036 {
73fbfcad 5037 if (dump_enabled_p ())
78c60e3d 5038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5039 "no optab.\n");
4bbe8262
IR
5040
5041 return false;
5042 }
5043
947131ba 5044 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4bbe8262 5045 {
73fbfcad 5046 if (dump_enabled_p ())
e645e942 5047 dump_printf (MSG_NOTE, "op not supported by target.\n");
4bbe8262
IR
5048
5049 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
5050 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5051 < vect_min_worthwhile_factor (code))
5052 return false;
5053
73fbfcad 5054 if (dump_enabled_p ())
e645e942 5055 dump_printf (MSG_NOTE, "proceeding using word mode.\n");
4bbe8262
IR
5056 }
5057
5058 /* Worthwhile without SIMD support? */
b690cc0f 5059 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4bbe8262
IR
5060 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5061 < vect_min_worthwhile_factor (code))
5062 {
73fbfcad 5063 if (dump_enabled_p ())
78c60e3d 5064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5065 "not worthwhile without SIMD support.\n");
4bbe8262
IR
5066
5067 return false;
5068 }
ebfd146a
IR
5069 }
5070
5071 /* 4.2. Check support for the epilog operation.
5072
5073 If STMT represents a reduction pattern, then the type of the
5074 reduction variable may be different than the type of the rest
5075 of the arguments. For example, consider the case of accumulation
5076 of shorts into an int accumulator; The original code:
5077 S1: int_a = (int) short_a;
5078 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
5079
5080 was replaced with:
5081 STMT: int_acc = widen_sum <short_a, int_acc>
5082
5083 This means that:
b8698a0f
L
5084 1. The tree-code that is used to create the vector operation in the
5085 epilog code (that reduces the partial results) is not the
5086 tree-code of STMT, but is rather the tree-code of the original
ff802fa1 5087 stmt from the pattern that STMT is replacing. I.e, in the example
b8698a0f 5088 above we want to use 'widen_sum' in the loop, but 'plus' in the
ebfd146a
IR
5089 epilog.
5090 2. The type (mode) we use to check available target support
b8698a0f
L
5091 for the vector operation to be created in the *epilog*, is
5092 determined by the type of the reduction variable (in the example
947131ba 5093 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
ebfd146a
IR
5094 However the type (mode) we use to check available target support
5095 for the vector operation to be created *inside the loop*, is
5096 determined by the type of the other arguments to STMT (in the
947131ba
RS
5097 example we'd check this: optab_handler (widen_sum_optab,
5098 vect_short_mode)).
b8698a0f
L
5099
5100 This is contrary to "regular" reductions, in which the types of all
5101 the arguments are the same as the type of the reduction variable.
5102 For "regular" reductions we can therefore use the same vector type
ebfd146a
IR
5103 (and also the same tree-code) when generating the epilog code and
5104 when generating the code inside the loop. */
5105
5106 if (orig_stmt)
5107 {
5108 /* This is a reduction pattern: get the vectype from the type of the
5109 reduction variable, and get the tree-code from orig_stmt. */
5110 orig_code = gimple_assign_rhs_code (orig_stmt);
b690cc0f
RG
5111 gcc_assert (vectype_out);
5112 vec_mode = TYPE_MODE (vectype_out);
ebfd146a
IR
5113 }
5114 else
5115 {
5116 /* Regular reduction: use the same vectype and tree-code as used for
5117 the vector code inside the loop can be used for the epilog code. */
5118 orig_code = code;
5119 }
5120
2f3e235b
IR
5121 if (nested_cycle)
5122 {
5123 def_bb = gimple_bb (reduc_def_stmt);
5124 def_stmt_loop = def_bb->loop_father;
5125 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
5126 loop_preheader_edge (def_stmt_loop));
5127 if (TREE_CODE (def_arg) == SSA_NAME
5128 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
5129 && gimple_code (def_arg_stmt) == GIMPLE_PHI
5130 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
5131 && vinfo_for_stmt (def_arg_stmt)
5132 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
5133 == vect_double_reduction_def)
5134 double_reduc = true;
5135 }
06066f92 5136
4bbe8262
IR
5137 epilog_reduc_code = ERROR_MARK;
5138 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
5139 {
b690cc0f 5140 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4bbe8262
IR
5141 optab_default);
5142 if (!reduc_optab)
5143 {
73fbfcad 5144 if (dump_enabled_p ())
78c60e3d 5145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5146 "no optab for reduction.\n");
4bbe8262
IR
5147
5148 epilog_reduc_code = ERROR_MARK;
5149 }
d43a252e 5150 else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4bbe8262 5151 {
d43a252e
AL
5152 optab = scalar_reduc_to_vector (reduc_optab, vectype_out);
5153 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
5154 {
5155 if (dump_enabled_p ())
5156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5157 "reduc op not supported by target.\n");
b8698a0f 5158
d43a252e
AL
5159 epilog_reduc_code = ERROR_MARK;
5160 }
4bbe8262
IR
5161 }
5162 }
5163 else
5164 {
5165 if (!nested_cycle || double_reduc)
5166 {
73fbfcad 5167 if (dump_enabled_p ())
78c60e3d 5168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5169 "no reduc code for scalar code.\n");
4bbe8262
IR
5170
5171 return false;
5172 }
5173 }
5174
06066f92
IR
5175 if (double_reduc && ncopies > 1)
5176 {
73fbfcad 5177 if (dump_enabled_p ())
78c60e3d 5178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5179 "multiple types in double reduction\n");
06066f92
IR
5180
5181 return false;
5182 }
b8698a0f 5183
5181cd6a
IR
5184 /* In case of widenning multiplication by a constant, we update the type
5185 of the constant to be the type of the other operand. We check that the
5186 constant fits the type in the pattern recognition pass. */
5187 if (code == DOT_PROD_EXPR
5188 && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
5189 {
5190 if (TREE_CODE (ops[0]) == INTEGER_CST)
5191 ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
5192 else if (TREE_CODE (ops[1]) == INTEGER_CST)
5193 ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
5194 else
5195 {
73fbfcad 5196 if (dump_enabled_p ())
78c60e3d 5197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5198 "invalid types in dot-prod\n");
5181cd6a
IR
5199
5200 return false;
5201 }
5202 }
5203
ebfd146a
IR
5204 if (!vec_stmt) /* transformation not required. */
5205 {
ebfd146a
IR
5206 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
5207 return false;
fdabf269 5208 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
ebfd146a
IR
5209 return true;
5210 }
5211
5212 /** Transform. **/
5213
73fbfcad 5214 if (dump_enabled_p ())
e645e942 5215 dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
ebfd146a 5216
4bbe8262
IR
5217 /* FORNOW: Multiple types are not supported for condition. */
5218 if (code == COND_EXPR)
5219 gcc_assert (ncopies == 1);
5220
ebfd146a 5221 /* Create the destination vector */
b690cc0f 5222 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
ebfd146a
IR
5223
5224 /* In case the vectorization factor (VF) is bigger than the number
5225 of elements that we can fit in a vectype (nunits), we have to generate
5226 more than one vector stmt - i.e - we need to "unroll" the
5227 vector stmt by a factor VF/nunits. For more details see documentation
5228 in vectorizable_operation. */
5229
5230 /* If the reduction is used in an outer loop we need to generate
5231 VF intermediate results, like so (e.g. for ncopies=2):
5232 r0 = phi (init, r0)
5233 r1 = phi (init, r1)
5234 r0 = x0 + r0;
5235 r1 = x1 + r1;
5236 (i.e. we generate VF results in 2 registers).
5237 In this case we have a separate def-use cycle for each copy, and therefore
5238 for each copy we get the vector def for the reduction variable from the
5239 respective phi node created for this copy.
5240
5241 Otherwise (the reduction is unused in the loop nest), we can combine
5242 together intermediate results, like so (e.g. for ncopies=2):
5243 r = phi (init, r)
5244 r = x0 + r;
5245 r = x1 + r;
5246 (i.e. we generate VF/2 results in a single register).
5247 In this case for each copy we get the vector def for the reduction variable
5248 from the vectorized reduction operation generated in the previous iteration.
5249 */
5250
8644a673 5251 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
ebfd146a
IR
5252 {
5253 single_defuse_cycle = true;
5254 epilog_copies = 1;
5255 }
5256 else
5257 epilog_copies = ncopies;
5258
5259 prev_stmt_info = NULL;
5260 prev_phi_info = NULL;
b5aeb3bb
IR
5261 if (slp_node)
5262 {
5263 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5264 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
5265 == TYPE_VECTOR_SUBPARTS (vectype_in));
5266 }
5267 else
5268 {
5269 vec_num = 1;
9771b263 5270 vec_oprnds0.create (1);
b5aeb3bb 5271 if (op_type == ternary_op)
9771b263 5272 vec_oprnds1.create (1);
b5aeb3bb
IR
5273 }
5274
9771b263
DN
5275 phis.create (vec_num);
5276 vect_defs.create (vec_num);
b5aeb3bb 5277 if (!slp_node)
9771b263 5278 vect_defs.quick_push (NULL_TREE);
b5aeb3bb 5279
ebfd146a
IR
5280 for (j = 0; j < ncopies; j++)
5281 {
5282 if (j == 0 || !single_defuse_cycle)
5283 {
b5aeb3bb
IR
5284 for (i = 0; i < vec_num; i++)
5285 {
5286 /* Create the reduction-phi that defines the reduction
5287 operand. */
5288 new_phi = create_phi_node (vec_dest, loop->header);
5289 set_vinfo_for_stmt (new_phi,
5290 new_stmt_vec_info (new_phi, loop_vinfo,
5291 NULL));
5292 if (j == 0 || slp_node)
9771b263 5293 phis.quick_push (new_phi);
b5aeb3bb
IR
5294 }
5295 }
ebfd146a 5296
4bbe8262
IR
5297 if (code == COND_EXPR)
5298 {
b5aeb3bb
IR
5299 gcc_assert (!slp_node);
5300 vectorizable_condition (stmt, gsi, vec_stmt,
9771b263 5301 PHI_RESULT (phis[0]),
f7e531cf 5302 reduc_index, NULL);
4bbe8262
IR
5303 /* Multiple types are not supported for condition. */
5304 break;
5305 }
5306
ebfd146a
IR
5307 /* Handle uses. */
5308 if (j == 0)
5309 {
9dc3f7de
IR
5310 op0 = ops[!reduc_index];
5311 if (op_type == ternary_op)
5312 {
5313 if (reduc_index == 0)
5314 op1 = ops[2];
5315 else
5316 op1 = ops[1];
5317 }
5318
b5aeb3bb 5319 if (slp_node)
d092494c
IR
5320 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
5321 slp_node, -1);
b5aeb3bb 5322 else
ebfd146a 5323 {
b5aeb3bb
IR
5324 loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
5325 stmt, NULL);
9771b263 5326 vec_oprnds0.quick_push (loop_vec_def0);
b5aeb3bb
IR
5327 if (op_type == ternary_op)
5328 {
9dc3f7de
IR
5329 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
5330 NULL);
9771b263 5331 vec_oprnds1.quick_push (loop_vec_def1);
b5aeb3bb 5332 }
ebfd146a 5333 }
ebfd146a
IR
5334 }
5335 else
5336 {
b5aeb3bb
IR
5337 if (!slp_node)
5338 {
1684ca6b
IR
5339 enum vect_def_type dt;
5340 gimple dummy_stmt;
5341 tree dummy;
5342
24ee1384 5343 vect_is_simple_use (ops[!reduc_index], stmt, loop_vinfo, NULL,
1684ca6b
IR
5344 &dummy_stmt, &dummy, &dt);
5345 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt,
5346 loop_vec_def0);
9771b263 5347 vec_oprnds0[0] = loop_vec_def0;
b5aeb3bb
IR
5348 if (op_type == ternary_op)
5349 {
24ee1384 5350 vect_is_simple_use (op1, stmt, loop_vinfo, NULL, &dummy_stmt,
1684ca6b 5351 &dummy, &dt);
b5aeb3bb
IR
5352 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
5353 loop_vec_def1);
9771b263 5354 vec_oprnds1[0] = loop_vec_def1;
b5aeb3bb
IR
5355 }
5356 }
ebfd146a 5357
b5aeb3bb
IR
5358 if (single_defuse_cycle)
5359 reduc_def = gimple_assign_lhs (new_stmt);
ebfd146a 5360
b5aeb3bb 5361 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
ebfd146a
IR
5362 }
5363
9771b263 5364 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
7c5222ff 5365 {
b5aeb3bb 5366 if (slp_node)
9771b263 5367 reduc_def = PHI_RESULT (phis[i]);
7c5222ff 5368 else
b5aeb3bb
IR
5369 {
5370 if (!single_defuse_cycle || j == 0)
5371 reduc_def = PHI_RESULT (new_phi);
5372 }
5373
5374 def1 = ((op_type == ternary_op)
9771b263 5375 ? vec_oprnds1[i] : NULL);
b5aeb3bb
IR
5376 if (op_type == binary_op)
5377 {
5378 if (reduc_index == 0)
5379 expr = build2 (code, vectype_out, reduc_def, def0);
5380 else
5381 expr = build2 (code, vectype_out, def0, reduc_def);
5382 }
b8698a0f 5383 else
7c5222ff 5384 {
b5aeb3bb
IR
5385 if (reduc_index == 0)
5386 expr = build3 (code, vectype_out, reduc_def, def0, def1);
7c5222ff 5387 else
b5aeb3bb
IR
5388 {
5389 if (reduc_index == 1)
5390 expr = build3 (code, vectype_out, def0, reduc_def, def1);
5391 else
5392 expr = build3 (code, vectype_out, def0, def1, reduc_def);
5393 }
5394 }
5395
5396 new_stmt = gimple_build_assign (vec_dest, expr);
5397 new_temp = make_ssa_name (vec_dest, new_stmt);
5398 gimple_assign_set_lhs (new_stmt, new_temp);
5399 vect_finish_stmt_generation (stmt, new_stmt, gsi);
b010117a 5400
b5aeb3bb
IR
5401 if (slp_node)
5402 {
9771b263
DN
5403 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
5404 vect_defs.quick_push (new_temp);
7c5222ff 5405 }
b5aeb3bb 5406 else
9771b263 5407 vect_defs[0] = new_temp;
7c5222ff
IR
5408 }
5409
b5aeb3bb
IR
5410 if (slp_node)
5411 continue;
b8698a0f 5412
ebfd146a
IR
5413 if (j == 0)
5414 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5415 else
5416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4bbe8262 5417
ebfd146a
IR
5418 prev_stmt_info = vinfo_for_stmt (new_stmt);
5419 prev_phi_info = vinfo_for_stmt (new_phi);
5420 }
5421
5422 /* Finalize the reduction-phi (set its arguments) and create the
5423 epilog reduction code. */
b5aeb3bb
IR
5424 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
5425 {
5426 new_temp = gimple_assign_lhs (*vec_stmt);
9771b263 5427 vect_defs[0] = new_temp;
b5aeb3bb
IR
5428 }
5429
5430 vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
5431 epilog_reduc_code, phis, reduc_index,
5432 double_reduc, slp_node);
5433
ebfd146a
IR
5434 return true;
5435}
5436
5437/* Function vect_min_worthwhile_factor.
5438
5439 For a loop where we could vectorize the operation indicated by CODE,
5440 return the minimum vectorization factor that makes it worthwhile
5441 to use generic vectors. */
5442int
5443vect_min_worthwhile_factor (enum tree_code code)
5444{
5445 switch (code)
5446 {
5447 case PLUS_EXPR:
5448 case MINUS_EXPR:
5449 case NEGATE_EXPR:
5450 return 4;
5451
5452 case BIT_AND_EXPR:
5453 case BIT_IOR_EXPR:
5454 case BIT_XOR_EXPR:
5455 case BIT_NOT_EXPR:
5456 return 2;
5457
5458 default:
5459 return INT_MAX;
5460 }
5461}
5462
5463
5464/* Function vectorizable_induction
5465
5466 Check if PHI performs an induction computation that can be vectorized.
5467 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
5468 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
5469 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5470
5471bool
5472vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
5473 gimple *vec_stmt)
5474{
5475 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
5476 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5477 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5478 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5479 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5480 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5481 tree vec_def;
5482
5483 gcc_assert (ncopies >= 1);
21039e0e
RG
5484 /* FORNOW. These restrictions should be relaxed. */
5485 if (nested_in_vect_loop_p (loop, phi))
ebfd146a 5486 {
21039e0e
RG
5487 imm_use_iterator imm_iter;
5488 use_operand_p use_p;
5489 gimple exit_phi;
5490 edge latch_e;
5491 tree loop_arg;
5492
5493 if (ncopies > 1)
5494 {
73fbfcad 5495 if (dump_enabled_p ())
78c60e3d 5496 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5497 "multiple types in nested loop.\n");
21039e0e
RG
5498 return false;
5499 }
5500
5501 exit_phi = NULL;
5502 latch_e = loop_latch_edge (loop->inner);
5503 loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
5504 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
5505 {
b010d601
JJ
5506 gimple use_stmt = USE_STMT (use_p);
5507 if (is_gimple_debug (use_stmt))
5508 continue;
5509
5510 if (!flow_bb_inside_loop_p (loop->inner, gimple_bb (use_stmt)))
21039e0e 5511 {
b010d601 5512 exit_phi = use_stmt;
21039e0e
RG
5513 break;
5514 }
5515 }
5516 if (exit_phi)
5517 {
5518 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
5519 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
5520 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
5521 {
73fbfcad 5522 if (dump_enabled_p ())
e645e942 5523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 5524 "inner-loop induction only used outside "
e645e942 5525 "of the outer vectorized loop.\n");
21039e0e
RG
5526 return false;
5527 }
5528 }
ebfd146a
IR
5529 }
5530
5531 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5532 return false;
5533
5534 /* FORNOW: SLP not supported. */
5535 if (STMT_SLP_TYPE (stmt_info))
5536 return false;
5537
5538 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
5539
5540 if (gimple_code (phi) != GIMPLE_PHI)
5541 return false;
5542
5543 if (!vec_stmt) /* transformation not required. */
5544 {
5545 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
73fbfcad 5546 if (dump_enabled_p ())
78c60e3d 5547 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5548 "=== vectorizable_induction ===\n");
ebfd146a
IR
5549 vect_model_induction_cost (stmt_info, ncopies);
5550 return true;
5551 }
5552
5553 /** Transform. **/
5554
73fbfcad 5555 if (dump_enabled_p ())
e645e942 5556 dump_printf_loc (MSG_NOTE, vect_location, "transform induction phi.\n");
ebfd146a
IR
5557
5558 vec_def = get_initial_def_for_induction (phi);
5559 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
5560 return true;
5561}
5562
5563/* Function vectorizable_live_operation.
5564
ff802fa1 5565 STMT computes a value that is used outside the loop. Check if
ebfd146a
IR
5566 it can be supported. */
5567
5568bool
5569vectorizable_live_operation (gimple stmt,
5570 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
74bf76ed 5571 gimple *vec_stmt)
ebfd146a
IR
5572{
5573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5574 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5575 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5576 int i;
5577 int op_type;
5578 tree op;
5579 tree def;
5580 gimple def_stmt;
b8698a0f 5581 enum vect_def_type dt;
ebfd146a
IR
5582 enum tree_code code;
5583 enum gimple_rhs_class rhs_class;
5584
5585 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5586
5587 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5588 return false;
5589
5590 if (!is_gimple_assign (stmt))
74bf76ed
JJ
5591 {
5592 if (gimple_call_internal_p (stmt)
5593 && gimple_call_internal_fn (stmt) == IFN_GOMP_SIMD_LANE
5594 && gimple_call_lhs (stmt)
5595 && loop->simduid
5596 && TREE_CODE (gimple_call_arg (stmt, 0)) == SSA_NAME
5597 && loop->simduid
5598 == SSA_NAME_VAR (gimple_call_arg (stmt, 0)))
5599 {
5600 edge e = single_exit (loop);
5601 basic_block merge_bb = e->dest;
5602 imm_use_iterator imm_iter;
5603 use_operand_p use_p;
5604 tree lhs = gimple_call_lhs (stmt);
5605
5606 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
5607 {
5608 gimple use_stmt = USE_STMT (use_p);
5609 if (gimple_code (use_stmt) == GIMPLE_PHI
b010d601 5610 && gimple_bb (use_stmt) == merge_bb)
74bf76ed
JJ
5611 {
5612 if (vec_stmt)
5613 {
5614 tree vfm1
5615 = build_int_cst (unsigned_type_node,
5616 loop_vinfo->vectorization_factor - 1);
5617 SET_PHI_ARG_DEF (use_stmt, e->dest_idx, vfm1);
5618 }
5619 return true;
5620 }
5621 }
5622 }
5623
5624 return false;
5625 }
ebfd146a
IR
5626
5627 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
5628 return false;
5629
5630 /* FORNOW. CHECKME. */
5631 if (nested_in_vect_loop_p (loop, stmt))
5632 return false;
5633
5634 code = gimple_assign_rhs_code (stmt);
5635 op_type = TREE_CODE_LENGTH (code);
5636 rhs_class = get_gimple_rhs_class (code);
5637 gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
5638 gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
5639
ff802fa1 5640 /* FORNOW: support only if all uses are invariant. This means
ebfd146a
IR
5641 that the scalar operations can remain in place, unvectorized.
5642 The original last scalar value that they compute will be used. */
5643
5644 for (i = 0; i < op_type; i++)
5645 {
5646 if (rhs_class == GIMPLE_SINGLE_RHS)
5647 op = TREE_OPERAND (gimple_op (stmt, 1), i);
5648 else
5649 op = gimple_op (stmt, i + 1);
a70d6342 5650 if (op
24ee1384
IR
5651 && !vect_is_simple_use (op, stmt, loop_vinfo, NULL, &def_stmt, &def,
5652 &dt))
ebfd146a 5653 {
73fbfcad 5654 if (dump_enabled_p ())
78c60e3d 5655 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 5656 "use not simple.\n");
ebfd146a
IR
5657 return false;
5658 }
5659
8644a673 5660 if (dt != vect_external_def && dt != vect_constant_def)
ebfd146a
IR
5661 return false;
5662 }
5663
5664 /* No transformation is required for the cases we currently support. */
5665 return true;
5666}
5667
a83452e9
AO
5668/* Kill any debug uses outside LOOP of SSA names defined in STMT. */
5669
5670static void
5671vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
5672{
5673 ssa_op_iter op_iter;
5674 imm_use_iterator imm_iter;
5675 def_operand_p def_p;
5676 gimple ustmt;
5677
5678 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
5679 {
5680 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
5681 {
5682 basic_block bb;
5683
5684 if (!is_gimple_debug (ustmt))
5685 continue;
5686
5687 bb = gimple_bb (ustmt);
5688
5689 if (!flow_bb_inside_loop_p (loop, bb))
5690 {
5691 if (gimple_debug_bind_p (ustmt))
5692 {
73fbfcad 5693 if (dump_enabled_p ())
78c60e3d 5694 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 5695 "killing debug use\n");
a83452e9
AO
5696
5697 gimple_debug_bind_reset_value (ustmt);
5698 update_stmt (ustmt);
5699 }
5700 else
5701 gcc_unreachable ();
5702 }
5703 }
5704 }
5705}
5706
f3c92486
RB
5707
5708/* This function builds ni_name = number of iterations. Statements
15e693cc 5709 are emitted on the loop preheader edge. */
f3c92486
RB
5710
5711static tree
15e693cc 5712vect_build_loop_niters (loop_vec_info loop_vinfo)
f3c92486 5713{
f3c92486 5714 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
15e693cc
RB
5715 if (TREE_CODE (ni) == INTEGER_CST)
5716 return ni;
5717 else
5718 {
5719 tree ni_name, var;
5720 gimple_seq stmts = NULL;
5721 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
f3c92486 5722
15e693cc
RB
5723 var = create_tmp_var (TREE_TYPE (ni), "niters");
5724 ni_name = force_gimple_operand (ni, &stmts, false, var);
5725 if (stmts)
5726 gsi_insert_seq_on_edge_immediate (pe, stmts);
f3c92486 5727
15e693cc
RB
5728 return ni_name;
5729 }
f3c92486
RB
5730}
5731
5732
5733/* This function generates the following statements:
5734
15e693cc
RB
5735 ni_name = number of iterations loop executes
5736 ratio = ni_name / vf
5737 ratio_mult_vf_name = ratio * vf
f3c92486 5738
15e693cc 5739 and places them on the loop preheader edge. */
f3c92486
RB
5740
5741static void
5742vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5743 tree ni_name,
5744 tree *ratio_mult_vf_name_ptr,
15e693cc 5745 tree *ratio_name_ptr)
f3c92486 5746{
f3c92486
RB
5747 tree ni_minus_gap_name;
5748 tree var;
5749 tree ratio_name;
5750 tree ratio_mult_vf_name;
f3c92486 5751 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
15e693cc 5752 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
f3c92486
RB
5753 tree log_vf;
5754
95b3eff3 5755 log_vf = build_int_cst (TREE_TYPE (ni_name), exact_log2 (vf));
f3c92486
RB
5756
5757 /* If epilogue loop is required because of data accesses with gaps, we
5758 subtract one iteration from the total number of iterations here for
5759 correct calculation of RATIO. */
5760 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
5761 {
5762 ni_minus_gap_name = fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5763 ni_name,
5764 build_one_cst (TREE_TYPE (ni_name)));
5765 if (!is_gimple_val (ni_minus_gap_name))
5766 {
95b3eff3 5767 var = create_tmp_var (TREE_TYPE (ni_name), "ni_gap");
15e693cc 5768 gimple stmts = NULL;
f3c92486
RB
5769 ni_minus_gap_name = force_gimple_operand (ni_minus_gap_name, &stmts,
5770 true, var);
15e693cc 5771 gsi_insert_seq_on_edge_immediate (pe, stmts);
f3c92486
RB
5772 }
5773 }
5774 else
5775 ni_minus_gap_name = ni_name;
5776
5777 /* Create: ratio = ni >> log2(vf) */
95b3eff3
RB
5778 /* ??? As we have ni == number of latch executions + 1, ni could
5779 have overflown to zero. So avoid computing ratio based on ni
5780 but compute it using the fact that we know ratio will be at least
5781 one, thus via (ni - vf) >> log2(vf) + 1. */
5782 ratio_name
5783 = fold_build2 (PLUS_EXPR, TREE_TYPE (ni_name),
5784 fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name),
5785 fold_build2 (MINUS_EXPR, TREE_TYPE (ni_name),
5786 ni_minus_gap_name,
5787 build_int_cst
5788 (TREE_TYPE (ni_name), vf)),
5789 log_vf),
5790 build_int_cst (TREE_TYPE (ni_name), 1));
f3c92486
RB
5791 if (!is_gimple_val (ratio_name))
5792 {
95b3eff3 5793 var = create_tmp_var (TREE_TYPE (ni_name), "bnd");
15e693cc 5794 gimple stmts = NULL;
f3c92486 5795 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var);
15e693cc 5796 gsi_insert_seq_on_edge_immediate (pe, stmts);
f3c92486
RB
5797 }
5798 *ratio_name_ptr = ratio_name;
5799
5800 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5801
5802 if (ratio_mult_vf_name_ptr)
5803 {
5804 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5805 ratio_name, log_vf);
5806 if (!is_gimple_val (ratio_mult_vf_name))
5807 {
95b3eff3 5808 var = create_tmp_var (TREE_TYPE (ni_name), "ratio_mult_vf");
15e693cc 5809 gimple stmts = NULL;
f3c92486
RB
5810 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts,
5811 true, var);
15e693cc 5812 gsi_insert_seq_on_edge_immediate (pe, stmts);
f3c92486
RB
5813 }
5814 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5815 }
5816
5817 return;
5818}
5819
5820
ebfd146a
IR
5821/* Function vect_transform_loop.
5822
5823 The analysis phase has determined that the loop is vectorizable.
5824 Vectorize the loop - created vectorized stmts to replace the scalar
5825 stmts in the loop, and update the loop exit condition. */
5826
5827void
5828vect_transform_loop (loop_vec_info loop_vinfo)
5829{
5830 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5831 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5832 int nbbs = loop->num_nodes;
ebfd146a
IR
5833 int i;
5834 tree ratio = NULL;
5835 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
0d0293ac 5836 bool grouped_store;
ebfd146a 5837 bool slp_scheduled = false;
363477c0
JJ
5838 gimple stmt, pattern_stmt;
5839 gimple_seq pattern_def_seq = NULL;
355a7673 5840 gimple_stmt_iterator pattern_def_si = gsi_none ();
363477c0 5841 bool transform_pattern_stmt = false;
156c01cf 5842 bool check_profitability = false;
368117e8 5843 int th;
22458c5a
JH
5844 /* Record number of iterations before we started tampering with the profile. */
5845 gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
ebfd146a 5846
73fbfcad 5847 if (dump_enabled_p ())
e645e942 5848 dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n");
ebfd146a 5849
22458c5a
JH
5850 /* If profile is inprecise, we have chance to fix it up. */
5851 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5852 expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
5853
368117e8
RG
5854 /* Use the more conservative vectorization threshold. If the number
5855 of iterations is constant assume the cost check has been performed
5856 by our caller. If the threshold makes all loops profitable that
5857 run at least the vectorization factor number of times checking
5858 is pointless, too. */
090cd8dc 5859 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
368117e8
RG
5860 if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1
5861 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5862 {
73fbfcad 5863 if (dump_enabled_p ())
78c60e3d 5864 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
5865 "Profitability threshold is %d loop iterations.\n",
5866 th);
368117e8
RG
5867 check_profitability = true;
5868 }
5869
afb119be
RB
5870 /* Version the loop first, if required, so the profitability check
5871 comes first. */
86290011 5872
afb119be
RB
5873 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
5874 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
368117e8 5875 {
afb119be 5876 vect_loop_versioning (loop_vinfo, th, check_profitability);
368117e8
RG
5877 check_profitability = false;
5878 }
86290011 5879
15e693cc
RB
5880 tree ni_name = vect_build_loop_niters (loop_vinfo);
5881 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = ni_name;
5882
afb119be 5883 /* Peel the loop if there are data refs with unknown alignment.
15e693cc 5884 Only one data ref with unknown store is allowed. */
afb119be 5885
15e693cc 5886 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo))
368117e8 5887 {
f3c92486
RB
5888 vect_do_peeling_for_alignment (loop_vinfo, ni_name,
5889 th, check_profitability);
368117e8 5890 check_profitability = false;
15e693cc
RB
5891 /* The above adjusts LOOP_VINFO_NITERS, so cause ni_name to
5892 be re-computed. */
5893 ni_name = NULL_TREE;
368117e8 5894 }
ebfd146a 5895
ebfd146a
IR
5896 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5897 compile time constant), or it is a constant that doesn't divide by the
5898 vectorization factor, then an epilog loop needs to be created.
5899 We therefore duplicate the loop: the original loop will be vectorized,
ff802fa1 5900 and will compute the first (n/VF) iterations. The second copy of the loop
ebfd146a
IR
5901 will remain scalar and will compute the remaining (n%VF) iterations.
5902 (VF is the vectorization factor). */
5903
15e693cc 5904 if (LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)
eae76e53 5905 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
f3c92486 5906 {
15e693cc
RB
5907 tree ratio_mult_vf;
5908 if (!ni_name)
5909 ni_name = vect_build_loop_niters (loop_vinfo);
f3c92486 5910 vect_generate_tmps_on_preheader (loop_vinfo, ni_name, &ratio_mult_vf,
15e693cc 5911 &ratio);
f3c92486
RB
5912 vect_do_peeling_for_loop_bound (loop_vinfo, ni_name, ratio_mult_vf,
5913 th, check_profitability);
5914 }
eae76e53 5915 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
ebfd146a
IR
5916 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5917 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
eae76e53
JJ
5918 else
5919 {
15e693cc
RB
5920 if (!ni_name)
5921 ni_name = vect_build_loop_niters (loop_vinfo);
5922 vect_generate_tmps_on_preheader (loop_vinfo, ni_name, NULL, &ratio);
eae76e53 5923 }
ebfd146a
IR
5924
5925 /* 1) Make sure the loop header has exactly two entries
5926 2) Make sure we have a preheader basic block. */
5927
5928 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5929
5930 split_edge (loop_preheader_edge (loop));
5931
5932 /* FORNOW: the vectorizer supports only loops which body consist
b8698a0f
L
5933 of one basic block (header + empty latch). When the vectorizer will
5934 support more involved loop forms, the order by which the BBs are
ebfd146a
IR
5935 traversed need to be reconsidered. */
5936
5937 for (i = 0; i < nbbs; i++)
5938 {
5939 basic_block bb = bbs[i];
5940 stmt_vec_info stmt_info;
ebfd146a 5941
538dd0b7
DM
5942 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
5943 gsi_next (&si))
ebfd146a 5944 {
538dd0b7 5945 gphi *phi = si.phi ();
73fbfcad 5946 if (dump_enabled_p ())
ebfd146a 5947 {
78c60e3d
SS
5948 dump_printf_loc (MSG_NOTE, vect_location,
5949 "------>vectorizing phi: ");
5950 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
e645e942 5951 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
5952 }
5953 stmt_info = vinfo_for_stmt (phi);
5954 if (!stmt_info)
5955 continue;
5956
aba09491
AO
5957 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
5958 vect_loop_kill_debug_uses (loop, phi);
5959
ebfd146a
IR
5960 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5961 && !STMT_VINFO_LIVE_P (stmt_info))
aba09491 5962 continue;
ebfd146a 5963
c972624e
RB
5964 if (STMT_VINFO_VECTYPE (stmt_info)
5965 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5966 != (unsigned HOST_WIDE_INT) vectorization_factor)
73fbfcad 5967 && dump_enabled_p ())
e645e942 5968 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
ebfd146a
IR
5969
5970 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5971 {
73fbfcad 5972 if (dump_enabled_p ())
e645e942 5973 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
ebfd146a
IR
5974 vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
5975 }
5976 }
5977
83197f37 5978 pattern_stmt = NULL;
538dd0b7
DM
5979 for (gimple_stmt_iterator si = gsi_start_bb (bb);
5980 !gsi_end_p (si) || transform_pattern_stmt;)
ebfd146a 5981 {
ebfd146a
IR
5982 bool is_store;
5983
83197f37 5984 if (transform_pattern_stmt)
363477c0 5985 stmt = pattern_stmt;
83197f37 5986 else
fbd7e877
RB
5987 {
5988 stmt = gsi_stmt (si);
5989 /* During vectorization remove existing clobber stmts. */
5990 if (gimple_clobber_p (stmt))
5991 {
5992 unlink_stmt_vdef (stmt);
5993 gsi_remove (&si, true);
5994 release_defs (stmt);
5995 continue;
5996 }
5997 }
83197f37 5998
73fbfcad 5999 if (dump_enabled_p ())
ebfd146a 6000 {
78c60e3d
SS
6001 dump_printf_loc (MSG_NOTE, vect_location,
6002 "------>vectorizing statement: ");
6003 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 6004 dump_printf (MSG_NOTE, "\n");
b8698a0f 6005 }
ebfd146a
IR
6006
6007 stmt_info = vinfo_for_stmt (stmt);
6008
6009 /* vector stmts created in the outer-loop during vectorization of
6010 stmts in an inner-loop may not have a stmt_info, and do not
6011 need to be vectorized. */
6012 if (!stmt_info)
6013 {
6014 gsi_next (&si);
6015 continue;
6016 }
6017
aba09491
AO
6018 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
6019 vect_loop_kill_debug_uses (loop, stmt);
6020
ebfd146a
IR
6021 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6022 && !STMT_VINFO_LIVE_P (stmt_info))
9d5e7640
IR
6023 {
6024 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6025 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6026 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6027 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6028 {
6029 stmt = pattern_stmt;
6030 stmt_info = vinfo_for_stmt (stmt);
6031 }
6032 else
6033 {
6034 gsi_next (&si);
6035 continue;
6036 }
ebfd146a 6037 }
83197f37
IR
6038 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
6039 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
6040 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
6041 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
6042 transform_pattern_stmt = true;
ebfd146a 6043
363477c0
JJ
6044 /* If pattern statement has def stmts, vectorize them too. */
6045 if (is_pattern_stmt_p (stmt_info))
6046 {
6047 if (pattern_def_seq == NULL)
6048 {
6049 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
6050 pattern_def_si = gsi_start (pattern_def_seq);
6051 }
6052 else if (!gsi_end_p (pattern_def_si))
6053 gsi_next (&pattern_def_si);
6054 if (pattern_def_seq != NULL)
6055 {
6056 gimple pattern_def_stmt = NULL;
6057 stmt_vec_info pattern_def_stmt_info = NULL;
1107f3ae 6058
363477c0
JJ
6059 while (!gsi_end_p (pattern_def_si))
6060 {
6061 pattern_def_stmt = gsi_stmt (pattern_def_si);
6062 pattern_def_stmt_info
6063 = vinfo_for_stmt (pattern_def_stmt);
6064 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
6065 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
6066 break;
6067 gsi_next (&pattern_def_si);
6068 }
6069
6070 if (!gsi_end_p (pattern_def_si))
6071 {
73fbfcad 6072 if (dump_enabled_p ())
363477c0 6073 {
78c60e3d
SS
6074 dump_printf_loc (MSG_NOTE, vect_location,
6075 "==> vectorizing pattern def "
6076 "stmt: ");
6077 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
6078 pattern_def_stmt, 0);
e645e942 6079 dump_printf (MSG_NOTE, "\n");
363477c0
JJ
6080 }
6081
6082 stmt = pattern_def_stmt;
6083 stmt_info = pattern_def_stmt_info;
6084 }
6085 else
6086 {
355a7673 6087 pattern_def_si = gsi_none ();
363477c0
JJ
6088 transform_pattern_stmt = false;
6089 }
6090 }
6091 else
6092 transform_pattern_stmt = false;
1107f3ae
IR
6093 }
6094
0136f8f0
AH
6095 if (STMT_VINFO_VECTYPE (stmt_info))
6096 {
6097 unsigned int nunits
6098 = (unsigned int)
6099 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
6100 if (!STMT_SLP_TYPE (stmt_info)
6101 && nunits != (unsigned int) vectorization_factor
6102 && dump_enabled_p ())
6103 /* For SLP VF is set according to unrolling factor, and not
6104 to vector size, hence for SLP this print is not valid. */
6105 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
6106 }
ebfd146a
IR
6107
6108 /* SLP. Schedule all the SLP instances when the first SLP stmt is
6109 reached. */
6110 if (STMT_SLP_TYPE (stmt_info))
6111 {
6112 if (!slp_scheduled)
6113 {
6114 slp_scheduled = true;
6115
73fbfcad 6116 if (dump_enabled_p ())
78c60e3d 6117 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 6118 "=== scheduling SLP instances ===\n");
ebfd146a 6119
a70d6342 6120 vect_schedule_slp (loop_vinfo, NULL);
ebfd146a
IR
6121 }
6122
6123 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
c4551b28 6124 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
ebfd146a 6125 {
363477c0
JJ
6126 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
6127 {
6128 pattern_def_seq = NULL;
6129 gsi_next (&si);
6130 }
6131 continue;
ebfd146a
IR
6132 }
6133 }
b8698a0f 6134
ebfd146a 6135 /* -------- vectorize statement ------------ */
73fbfcad 6136 if (dump_enabled_p ())
e645e942 6137 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
ebfd146a 6138
0d0293ac
MM
6139 grouped_store = false;
6140 is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
ebfd146a
IR
6141 if (is_store)
6142 {
0d0293ac 6143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
6144 {
6145 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6146 interleaving chain was completed - free all the stores in
6147 the chain. */
78048b1c 6148 gsi_next (&si);
e14c1050 6149 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
ebfd146a
IR
6150 }
6151 else
6152 {
6153 /* Free the attached stmt_vec_info and remove the stmt. */
3d3f2249
RG
6154 gimple store = gsi_stmt (si);
6155 free_stmt_vec_info (store);
6156 unlink_stmt_vdef (store);
ebfd146a 6157 gsi_remove (&si, true);
3d3f2249 6158 release_defs (store);
ebfd146a 6159 }
83197f37 6160
cf5b2be2
CH
6161 /* Stores can only appear at the end of pattern statements. */
6162 gcc_assert (!transform_pattern_stmt);
6163 pattern_def_seq = NULL;
6164 }
6165 else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
363477c0
JJ
6166 {
6167 pattern_def_seq = NULL;
6168 gsi_next (&si);
6169 }
ebfd146a
IR
6170 } /* stmts in BB */
6171 } /* BBs in loop */
6172
6173 slpeel_make_loop_iterate_ntimes (loop, ratio);
6174
22458c5a 6175 /* Reduce loop iterations by the vectorization factor. */
8ddb5a29 6176 scale_loop_profile (loop, GCOV_COMPUTE_SCALE (1, vectorization_factor),
22458c5a
JH
6177 expected_iterations / vectorization_factor);
6178 loop->nb_iterations_upper_bound
807e902e 6179 = wi::udiv_floor (loop->nb_iterations_upper_bound, vectorization_factor);
22458c5a 6180 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
807e902e
KZ
6181 && loop->nb_iterations_upper_bound != 0)
6182 loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - 1;
22458c5a
JH
6183 if (loop->any_estimate)
6184 {
6185 loop->nb_iterations_estimate
807e902e 6186 = wi::udiv_floor (loop->nb_iterations_estimate, vectorization_factor);
22458c5a 6187 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
807e902e
KZ
6188 && loop->nb_iterations_estimate != 0)
6189 loop->nb_iterations_estimate = loop->nb_iterations_estimate - 1;
22458c5a
JH
6190 }
6191
73fbfcad 6192 if (dump_enabled_p ())
ccb3ad87 6193 {
5d318fd4 6194 dump_printf_loc (MSG_NOTE, vect_location,
ccb3ad87
RB
6195 "LOOP VECTORIZED\n");
6196 if (loop->inner)
6197 dump_printf_loc (MSG_NOTE, vect_location,
6198 "OUTER LOOP VECTORIZED\n");
e645e942 6199 dump_printf (MSG_NOTE, "\n");
ccb3ad87 6200 }
ebfd146a 6201}