]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
* ChangeLog: Fix whitespace.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
d1e082c2 2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
ebfd146a 32#include "tree-flow.h"
7ee2468b 33#include "tree-pass.h"
ebfd146a 34#include "cfgloop.h"
ebfd146a 35#include "expr.h"
7ee2468b 36#include "recog.h" /* FIXME: for insn_data */
ebfd146a
IR
37#include "optabs.h"
38#include "tree-vectorizer.h"
2635892a 39#include "langhooks.h"
ebfd146a 40
a70d6342
IR
41/* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44LOC
45find_bb_location (basic_block bb)
46{
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61}
62
63
ebfd146a
IR
64/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66static void
67vect_free_slp_tree (slp_tree node)
68{
d092494c
IR
69 int i;
70 slp_void_p child;
71
ebfd146a
IR
72 if (!node)
73 return;
74
9771b263 75 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d3cfd39e 76 vect_free_slp_tree ((slp_tree) child);
b8698a0f 77
9771b263
DN
78 SLP_TREE_CHILDREN (node).release ();
79 SLP_TREE_SCALAR_STMTS (node).release ();
80 SLP_TREE_VEC_STMTS (node).release ();
ebfd146a
IR
81
82 free (node);
83}
84
85
86/* Free the memory allocated for the SLP instance. */
87
88void
89vect_free_slp_instance (slp_instance instance)
90{
91 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263
DN
92 SLP_INSTANCE_LOAD_PERMUTATION (instance).release ();
93 SLP_INSTANCE_LOADS (instance).release ();
94 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
c7e62a26 95 free (instance);
ebfd146a
IR
96}
97
98
d092494c
IR
99/* Create an SLP node for SCALAR_STMTS. */
100
101static slp_tree
9771b263 102vect_create_new_slp_node (vec<gimple> scalar_stmts)
d092494c 103{
d3cfd39e 104 slp_tree node;
9771b263 105 gimple stmt = scalar_stmts[0];
d092494c
IR
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
f7e531cf
IR
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
d092494c
IR
116 else
117 return NULL;
118
d3cfd39e 119 node = XNEW (struct _slp_tree);
d092494c 120 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
121 SLP_TREE_VEC_STMTS (node).create (0);
122 SLP_TREE_CHILDREN (node).create (nops);
d092494c
IR
123
124 return node;
125}
126
127
128/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
9771b263 130static vec<slp_oprnd_info>
d092494c
IR
131vect_create_oprnd_info (int nops, int group_size)
132{
133 int i;
134 slp_oprnd_info oprnd_info;
9771b263 135 vec<slp_oprnd_info> oprnds_info;
d092494c 136
9771b263 137 oprnds_info.create (nops);
d092494c
IR
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 141 oprnd_info->def_stmts.create (group_size);
d092494c
IR
142 oprnd_info->first_dt = vect_uninitialized_def;
143 oprnd_info->first_def_type = NULL_TREE;
144 oprnd_info->first_const_oprnd = NULL_TREE;
145 oprnd_info->first_pattern = false;
9771b263 146 oprnds_info.quick_push (oprnd_info);
d092494c
IR
147 }
148
149 return oprnds_info;
150}
151
152
d3cfd39e
JJ
153/* Free operands info. */
154
d092494c 155static void
9771b263 156vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
157{
158 int i;
159 slp_oprnd_info oprnd_info;
160
9771b263 161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 162 {
9771b263 163 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
164 XDELETE (oprnd_info);
165 }
d092494c 166
9771b263 167 oprnds_info.release ();
d092494c
IR
168}
169
170
171/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
ebfd146a
IR
174
175static bool
a70d6342 176vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 177 slp_tree slp_node, gimple stmt,
d092494c 178 int ncopies_for_cost, bool first,
9771b263 179 vec<slp_oprnd_info> *oprnds_info,
92345349
BS
180 stmt_vector_for_cost *prologue_cost_vec,
181 stmt_vector_for_cost *body_cost_vec)
ebfd146a
IR
182{
183 tree oprnd;
184 unsigned int i, number_of_oprnds;
d092494c 185 tree def, def_op0 = NULL_TREE;
ebfd146a 186 gimple def_stmt;
d092494c
IR
187 enum vect_def_type dt = vect_uninitialized_def;
188 enum vect_def_type dt_op0 = vect_uninitialized_def;
189 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
190 tree lhs = gimple_get_lhs (stmt);
a70d6342 191 struct loop *loop = NULL;
6aa904c4
IR
192 enum tree_code rhs_code;
193 bool different_types = false;
d092494c
IR
194 bool pattern = false;
195 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
f7e531cf
IR
196 int op_idx = 1;
197 tree compare_rhs = NULL_TREE;
b8698a0f 198
a70d6342
IR
199 if (loop_vinfo)
200 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 201
d092494c 202 if (is_gimple_call (stmt))
190c2236
JJ
203 {
204 number_of_oprnds = gimple_call_num_args (stmt);
205 op_idx = 3;
206 }
f7e531cf
IR
207 else if (is_gimple_assign (stmt))
208 {
209 number_of_oprnds = gimple_num_ops (stmt) - 1;
210 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
211 number_of_oprnds++;
212 }
d092494c 213 else
f7e531cf 214 return false;
ebfd146a
IR
215
216 for (i = 0; i < number_of_oprnds; i++)
217 {
f7e531cf
IR
218 if (compare_rhs)
219 {
220 oprnd = compare_rhs;
221 compare_rhs = NULL_TREE;
222 }
223 else
224 oprnd = gimple_op (stmt, op_idx++);
225
9771b263 226 oprnd_info = (*oprnds_info)[i];
ebfd146a 227
f7e531cf
IR
228 if (COMPARISON_CLASS_P (oprnd))
229 {
230 compare_rhs = TREE_OPERAND (oprnd, 1);
231 oprnd = TREE_OPERAND (oprnd, 0);
232 }
233
24ee1384
IR
234 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
235 &def, &dt)
d092494c 236 || (!def_stmt && dt != vect_constant_def))
ebfd146a 237 {
73fbfcad 238 if (dump_enabled_p ())
ebfd146a 239 {
78c60e3d
SS
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
241 "Build SLP failed: can't find def for ");
242 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
ebfd146a
IR
243 }
244
245 return false;
246 }
247
a70d6342 248 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 249 from the pattern. Check that all the stmts of the node are in the
ebfd146a 250 pattern. */
f5709183
IR
251 if (def_stmt && gimple_bb (def_stmt)
252 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
253 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
254 && gimple_code (def_stmt) != GIMPLE_PHI))
ebfd146a 255 && vinfo_for_stmt (def_stmt)
83197f37 256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 259 {
d092494c
IR
260 pattern = true;
261 if (!first && !oprnd_info->first_pattern)
262 {
73fbfcad 263 if (dump_enabled_p ())
d092494c 264 {
78c60e3d
SS
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266 "Build SLP failed: some of the stmts"
267 " are in a pattern, and others are not ");
268 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
d092494c 269 }
ebfd146a 270
d092494c 271 return false;
ebfd146a
IR
272 }
273
274 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 275 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 276
f7e531cf 277 if (dt == vect_unknown_def_type)
ebfd146a 278 {
73fbfcad 279 if (dump_enabled_p ())
78c60e3d
SS
280 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
281 "Unsupported pattern.");
ebfd146a
IR
282 return false;
283 }
284
285 switch (gimple_code (def_stmt))
286 {
287 case GIMPLE_PHI:
d092494c 288 def = gimple_phi_result (def_stmt);
ebfd146a
IR
289 break;
290
291 case GIMPLE_ASSIGN:
d092494c 292 def = gimple_assign_lhs (def_stmt);
ebfd146a
IR
293 break;
294
295 default:
73fbfcad 296 if (dump_enabled_p ())
78c60e3d
SS
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "unsupported defining stmt: ");
ebfd146a
IR
299 return false;
300 }
301 }
302
d092494c 303 if (first)
ebfd146a 304 {
d092494c
IR
305 oprnd_info->first_dt = dt;
306 oprnd_info->first_pattern = pattern;
307 if (def)
308 {
309 oprnd_info->first_def_type = TREE_TYPE (def);
310 oprnd_info->first_const_oprnd = NULL_TREE;
311 }
ebfd146a 312 else
d092494c
IR
313 {
314 oprnd_info->first_def_type = NULL_TREE;
315 oprnd_info->first_const_oprnd = oprnd;
316 }
ebfd146a 317
d092494c
IR
318 if (i == 0)
319 {
320 def_op0 = def;
321 dt_op0 = dt;
322 /* Analyze costs (for the first stmt of the group only). */
323 if (REFERENCE_CLASS_P (lhs))
324 /* Store. */
325 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
92345349
BS
326 dt, slp_node, prologue_cost_vec,
327 body_cost_vec);
d092494c 328 else
7daac345
JJ
329 {
330 enum vect_def_type dts[2];
331 dts[0] = dt;
332 dts[1] = vect_uninitialized_def;
333 /* Not memory operation (we don't call this function for
334 loads). */
335 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
92345349 336 prologue_cost_vec, body_cost_vec);
7daac345 337 }
d092494c 338 }
ebfd146a 339 }
ebfd146a
IR
340 else
341 {
d092494c
IR
342 /* Not first stmt of the group, check that the def-stmt/s match
343 the def-stmt/s of the first stmt. Allow different definition
344 types for reduction chains: the first stmt must be a
345 vect_reduction_def (a phi node), and the rest
346 vect_internal_def. */
347 if (((oprnd_info->first_dt != dt
348 && !(oprnd_info->first_dt == vect_reduction_def
349 && dt == vect_internal_def))
350 || (oprnd_info->first_def_type != NULL_TREE
351 && def
352 && !types_compatible_p (oprnd_info->first_def_type,
353 TREE_TYPE (def))))
354 || (!def
355 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
356 TREE_TYPE (oprnd)))
357 || different_types)
ebfd146a 358 {
d092494c 359 if (number_of_oprnds != 2)
ebfd146a 360 {
73fbfcad 361 if (dump_enabled_p ())
78c60e3d
SS
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
363 "Build SLP failed: different types ");
d092494c
IR
364
365 return false;
366 }
367
368 /* Try to swap operands in case of binary operation. */
369 if (i == 0)
370 different_types = true;
371 else
b8698a0f 372 {
9771b263 373 oprnd0_info = (*oprnds_info)[0];
d092494c
IR
374 if (is_gimple_assign (stmt)
375 && (rhs_code = gimple_assign_rhs_code (stmt))
376 && TREE_CODE_CLASS (rhs_code) == tcc_binary
377 && commutative_tree_code (rhs_code)
378 && oprnd0_info->first_dt == dt
379 && oprnd_info->first_dt == dt_op0
380 && def_op0 && def
381 && !(oprnd0_info->first_def_type
382 && !types_compatible_p (oprnd0_info->first_def_type,
383 TREE_TYPE (def)))
384 && !(oprnd_info->first_def_type
385 && !types_compatible_p (oprnd_info->first_def_type,
386 TREE_TYPE (def_op0))))
387 {
73fbfcad 388 if (dump_enabled_p ())
d092494c 389 {
78c60e3d
SS
390 dump_printf_loc (MSG_NOTE, vect_location,
391 "Swapping operands of ");
392 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
6aa904c4 393 }
b8698a0f 394
d092494c
IR
395 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
396 gimple_assign_rhs2_ptr (stmt));
397 }
398 else
399 {
73fbfcad 400 if (dump_enabled_p ())
78c60e3d
SS
401 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
402 "Build SLP failed: different types ");
d092494c
IR
403
404 return false;
6aa904c4 405 }
ebfd146a
IR
406 }
407 }
408 }
409
410 /* Check the types of the definitions. */
d092494c 411 switch (dt)
ebfd146a
IR
412 {
413 case vect_constant_def:
8644a673 414 case vect_external_def:
d092494c 415 case vect_reduction_def:
ebfd146a 416 break;
b8698a0f 417
8644a673 418 case vect_internal_def:
d092494c
IR
419 if (different_types)
420 {
9771b263
DN
421 oprnd0_info = (*oprnds_info)[0];
422 oprnd1_info = (*oprnds_info)[0];
d092494c 423 if (i == 0)
9771b263 424 oprnd1_info->def_stmts.quick_push (def_stmt);
d092494c 425 else
9771b263 426 oprnd0_info->def_stmts.quick_push (def_stmt);
d092494c 427 }
ebfd146a 428 else
9771b263 429 oprnd_info->def_stmts.quick_push (def_stmt);
d092494c 430
ebfd146a
IR
431 break;
432
433 default:
434 /* FORNOW: Not supported. */
73fbfcad 435 if (dump_enabled_p ())
ebfd146a 436 {
78c60e3d
SS
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: illegal type of def ");
439 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
ebfd146a
IR
440 }
441
442 return false;
443 }
444 }
445
446 return true;
447}
448
449
450/* Recursively build an SLP tree starting from NODE.
b8698a0f 451 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 452 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
453 TRUE. */
454
455static bool
b8698a0f 456vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
c3e7ee41 457 slp_tree *node, unsigned int group_size, int *outside_cost,
a70d6342 458 int ncopies_for_cost, unsigned int *max_nunits,
9771b263
DN
459 vec<int> *load_permutation,
460 vec<slp_tree> *loads,
c3e7ee41 461 unsigned int vectorization_factor, bool *loads_permuted,
92345349
BS
462 stmt_vector_for_cost *prologue_cost_vec,
463 stmt_vector_for_cost *body_cost_vec)
ebfd146a 464{
ebfd146a 465 unsigned int i;
9771b263
DN
466 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (*node);
467 gimple stmt = stmts[0];
2200fc49 468 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f7e531cf 469 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a
IR
470 tree lhs;
471 bool stop_recursion = false, need_same_oprnds = false;
472 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 473 unsigned int ncopies;
ebfd146a
IR
474 optab optab;
475 int icode;
476 enum machine_mode optab_op2_mode;
477 enum machine_mode vec_mode;
ebfd146a 478 struct data_reference *first_dr;
ebfd146a
IR
479 HOST_WIDE_INT dummy;
480 bool permutation = false;
481 unsigned int load_place;
c3e7ee41 482 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
9771b263 483 vec<slp_oprnd_info> oprnds_info;
d092494c
IR
484 unsigned int nops;
485 slp_oprnd_info oprnd_info;
f7e531cf 486 tree cond;
d092494c
IR
487
488 if (is_gimple_call (stmt))
489 nops = gimple_call_num_args (stmt);
f7e531cf
IR
490 else if (is_gimple_assign (stmt))
491 {
492 nops = gimple_num_ops (stmt) - 1;
493 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
494 nops++;
495 }
d092494c 496 else
f7e531cf 497 return false;
d092494c
IR
498
499 oprnds_info = vect_create_oprnd_info (nops, group_size);
ebfd146a
IR
500
501 /* For every stmt in NODE find its def stmt/s. */
9771b263 502 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 503 {
73fbfcad 504 if (dump_enabled_p ())
ebfd146a 505 {
78c60e3d
SS
506 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
507 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
508 }
509
4b5caab7
IR
510 /* Fail to vectorize statements marked as unvectorizable. */
511 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
512 {
73fbfcad 513 if (dump_enabled_p ())
4b5caab7 514 {
78c60e3d
SS
515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
516 "Build SLP failed: unvectorizable statement ");
517 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7
IR
518 }
519
9771b263 520 vect_free_oprnd_info (oprnds_info);
4b5caab7
IR
521 return false;
522 }
523
ebfd146a
IR
524 lhs = gimple_get_lhs (stmt);
525 if (lhs == NULL_TREE)
526 {
73fbfcad 527 if (dump_enabled_p ())
ebfd146a 528 {
78c60e3d
SS
529 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
530 "Build SLP failed: not GIMPLE_ASSIGN nor "
531 "GIMPLE_CALL ");
532 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 533 }
b8698a0f 534
9771b263 535 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
536 return false;
537 }
538
f7e531cf
IR
539 if (is_gimple_assign (stmt)
540 && gimple_assign_rhs_code (stmt) == COND_EXPR
541 && (cond = gimple_assign_rhs1 (stmt))
542 && !COMPARISON_CLASS_P (cond))
543 {
73fbfcad 544 if (dump_enabled_p ())
f7e531cf 545 {
78c60e3d
SS
546 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
547 "Build SLP failed: condition is not "
548 "comparison ");
549 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
f7e531cf
IR
550 }
551
9771b263 552 vect_free_oprnd_info (oprnds_info);
f7e531cf
IR
553 return false;
554 }
555
b8698a0f 556 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
557 vectype = get_vectype_for_scalar_type (scalar_type);
558 if (!vectype)
559 {
73fbfcad 560 if (dump_enabled_p ())
ebfd146a 561 {
78c60e3d
SS
562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
563 "Build SLP failed: unsupported data-type ");
564 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
565 scalar_type);
ebfd146a 566 }
d092494c 567
9771b263 568 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
569 return false;
570 }
b8698a0f 571
4ef69dfc
IR
572 /* In case of multiple types we need to detect the smallest type. */
573 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 574 {
4ef69dfc
IR
575 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
576 if (bb_vinfo)
577 vectorization_factor = *max_nunits;
a70d6342 578 }
b8698a0f 579
4ef69dfc 580 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 581
ebfd146a 582 if (is_gimple_call (stmt))
190c2236
JJ
583 {
584 rhs_code = CALL_EXPR;
585 if (gimple_call_internal_p (stmt)
586 || gimple_call_tail_p (stmt)
587 || gimple_call_noreturn_p (stmt)
588 || !gimple_call_nothrow_p (stmt)
589 || gimple_call_chain (stmt))
590 {
73fbfcad 591 if (dump_enabled_p ())
190c2236 592 {
78c60e3d
SS
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
594 "Build SLP failed: unsupported call type ");
595 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
190c2236
JJ
596 }
597
9771b263 598 vect_free_oprnd_info (oprnds_info);
190c2236
JJ
599 return false;
600 }
601 }
ebfd146a
IR
602 else
603 rhs_code = gimple_assign_rhs_code (stmt);
604
605 /* Check the operation. */
606 if (i == 0)
607 {
608 first_stmt_code = rhs_code;
609
b8698a0f 610 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
611 vector shift with scalar shift operand. */
612 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
613 || rhs_code == LROTATE_EXPR
614 || rhs_code == RROTATE_EXPR)
615 {
616 vec_mode = TYPE_MODE (vectype);
617
618 /* First see if we have a vector/vector shift. */
619 optab = optab_for_tree_code (rhs_code, vectype,
620 optab_vector);
621
622 if (!optab
947131ba 623 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
624 {
625 /* No vector/vector shift, try for a vector/scalar shift. */
626 optab = optab_for_tree_code (rhs_code, vectype,
627 optab_scalar);
628
629 if (!optab)
630 {
73fbfcad 631 if (dump_enabled_p ())
78c60e3d
SS
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633 "Build SLP failed: no optab.");
9771b263 634 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
635 return false;
636 }
947131ba 637 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
638 if (icode == CODE_FOR_nothing)
639 {
73fbfcad 640 if (dump_enabled_p ())
78c60e3d
SS
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: "
643 "op not supported by target.");
9771b263 644 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
645 return false;
646 }
647 optab_op2_mode = insn_data[icode].operand[2].mode;
648 if (!VECTOR_MODE_P (optab_op2_mode))
649 {
650 need_same_oprnds = true;
651 first_op1 = gimple_assign_rhs2 (stmt);
652 }
653 }
654 }
36ba4aae
IR
655 else if (rhs_code == WIDEN_LSHIFT_EXPR)
656 {
657 need_same_oprnds = true;
658 first_op1 = gimple_assign_rhs2 (stmt);
659 }
ebfd146a
IR
660 }
661 else
662 {
663 if (first_stmt_code != rhs_code
664 && (first_stmt_code != IMAGPART_EXPR
665 || rhs_code != REALPART_EXPR)
666 && (first_stmt_code != REALPART_EXPR
69f11a13 667 || rhs_code != IMAGPART_EXPR)
0d0293ac 668 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13
IR
669 && (first_stmt_code == ARRAY_REF
670 || first_stmt_code == INDIRECT_REF
671 || first_stmt_code == COMPONENT_REF
672 || first_stmt_code == MEM_REF)))
ebfd146a 673 {
73fbfcad 674 if (dump_enabled_p ())
ebfd146a 675 {
78c60e3d
SS
676 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
677 "Build SLP failed: different operation "
678 "in stmt ");
679 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 680 }
b8698a0f 681
9771b263 682 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
683 return false;
684 }
b8698a0f
L
685
686 if (need_same_oprnds
ebfd146a
IR
687 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
688 {
73fbfcad 689 if (dump_enabled_p ())
ebfd146a 690 {
78c60e3d
SS
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: different shift "
693 "arguments in ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 695 }
b8698a0f 696
9771b263 697 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
698 return false;
699 }
190c2236
JJ
700
701 if (rhs_code == CALL_EXPR)
702 {
9771b263 703 gimple first_stmt = stmts[0];
190c2236
JJ
704 if (gimple_call_num_args (stmt) != nops
705 || !operand_equal_p (gimple_call_fn (first_stmt),
706 gimple_call_fn (stmt), 0)
707 || gimple_call_fntype (first_stmt)
708 != gimple_call_fntype (stmt))
709 {
73fbfcad 710 if (dump_enabled_p ())
190c2236 711 {
78c60e3d
SS
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: different calls in ");
714 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
715 stmt, 0);
190c2236
JJ
716 }
717
9771b263 718 vect_free_oprnd_info (oprnds_info);
190c2236
JJ
719 return false;
720 }
721 }
ebfd146a
IR
722 }
723
0d0293ac
MM
724 /* Grouped store or load. */
725 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
726 {
727 if (REFERENCE_CLASS_P (lhs))
728 {
729 /* Store. */
b8698a0f 730 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
d092494c 731 stmt, ncopies_for_cost,
c3e7ee41 732 (i == 0), &oprnds_info,
92345349
BS
733 prologue_cost_vec,
734 body_cost_vec))
d092494c 735 {
9771b263 736 vect_free_oprnd_info (oprnds_info);
d092494c
IR
737 return false;
738 }
ebfd146a 739 }
b5aeb3bb
IR
740 else
741 {
742 /* Load. */
743 /* FORNOW: Check that there is no gap between the loads. */
e14c1050
IR
744 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
745 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
746 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
747 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb 748 {
73fbfcad 749 if (dump_enabled_p ())
b5aeb3bb 750 {
78c60e3d
SS
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "Build SLP failed: grouped "
753 "loads have gaps ");
754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
755 stmt, 0);
b5aeb3bb 756 }
b8698a0f 757
9771b263 758 vect_free_oprnd_info (oprnds_info);
b5aeb3bb
IR
759 return false;
760 }
2f0fa28e 761
b5aeb3bb
IR
762 /* Check that the size of interleaved loads group is not
763 greater than the SLP group size. */
6aa904c4
IR
764 if (loop_vinfo
765 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
b5aeb3bb 766 {
73fbfcad 767 if (dump_enabled_p ())
b5aeb3bb 768 {
78c60e3d
SS
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "Build SLP failed: the number "
771 "of interleaved loads is greater than "
772 "the SLP group size ");
773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
774 stmt, 0);
b5aeb3bb 775 }
b8698a0f 776
9771b263 777 vect_free_oprnd_info (oprnds_info);
b5aeb3bb
IR
778 return false;
779 }
780
c3e7ee41 781 old_first_load = first_load;
e14c1050 782 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
783 if (prev_first_load)
784 {
785 /* Check that there are no loads from different interleaving
ff802fa1 786 chains in the same node. The only exception is complex
b5aeb3bb
IR
787 numbers. */
788 if (prev_first_load != first_load
78c60e3d 789 && rhs_code != REALPART_EXPR
b5aeb3bb 790 && rhs_code != IMAGPART_EXPR)
78c60e3d 791 {
73fbfcad 792 if (dump_enabled_p ())
b5aeb3bb 793 {
78c60e3d
SS
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
795 vect_location,
796 "Build SLP failed: different "
797 "interleaving chains in one node ");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
b5aeb3bb 800 }
78c60e3d 801
9771b263 802 vect_free_oprnd_info (oprnds_info);
b5aeb3bb
IR
803 return false;
804 }
805 }
806 else
807 prev_first_load = first_load;
b8698a0f 808
c3e7ee41
BS
809 /* In some cases a group of loads is just the same load
810 repeated N times. Only analyze its cost once. */
811 if (first_load == stmt && old_first_load != first_load)
ebfd146a
IR
812 {
813 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 814 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
815 == dr_unaligned_unsupported)
816 {
73fbfcad 817 if (dump_enabled_p ())
ebfd146a 818 {
78c60e3d
SS
819 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
820 vect_location,
821 "Build SLP failed: unsupported "
822 "unaligned load ");
823 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
824 stmt, 0);
ebfd146a 825 }
b8698a0f 826
9771b263 827 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
828 return false;
829 }
b8698a0f 830
ebfd146a
IR
831 /* Analyze costs (for the first stmt in the group). */
832 vect_model_load_cost (vinfo_for_stmt (stmt),
c3e7ee41 833 ncopies_for_cost, false, *node,
92345349 834 prologue_cost_vec, body_cost_vec);
ebfd146a 835 }
b8698a0f 836
ff802fa1 837 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
838 case that permutation is needed we later decide if a specific
839 permutation is supported. */
840 load_place = vect_get_place_in_interleaving_chain (stmt,
841 first_load);
842 if (load_place != i)
843 permutation = true;
b8698a0f 844
9771b263 845 load_permutation->safe_push (load_place);
b8698a0f 846
ebfd146a
IR
847 /* We stop the tree when we reach a group of loads. */
848 stop_recursion = true;
849 continue;
850 }
0d0293ac 851 } /* Grouped access. */
ebfd146a
IR
852 else
853 {
854 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
855 {
0d0293ac 856 /* Not grouped load. */
73fbfcad 857 if (dump_enabled_p ())
ebfd146a 858 {
78c60e3d
SS
859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
860 "Build SLP failed: not grouped load ");
861 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
862 }
863
0d0293ac 864 /* FORNOW: Not grouped loads are not supported. */
9771b263 865 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
866 return false;
867 }
868
869 /* Not memory operation. */
870 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 871 && TREE_CODE_CLASS (rhs_code) != tcc_unary
190c2236
JJ
872 && rhs_code != COND_EXPR
873 && rhs_code != CALL_EXPR)
ebfd146a 874 {
73fbfcad 875 if (dump_enabled_p ())
ebfd146a 876 {
78c60e3d
SS
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
878 "Build SLP failed: operation");
879 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
880 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
881 }
882
9771b263 883 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
884 return false;
885 }
886
f7e531cf
IR
887 if (rhs_code == COND_EXPR)
888 {
889 tree cond_expr = gimple_assign_rhs1 (stmt);
890
891 if (i == 0)
892 first_cond_code = TREE_CODE (cond_expr);
893 else if (first_cond_code != TREE_CODE (cond_expr))
894 {
73fbfcad 895 if (dump_enabled_p ())
f7e531cf 896 {
78c60e3d
SS
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "Build SLP failed: different"
899 " operation");
900 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
901 stmt, 0);
f7e531cf
IR
902 }
903
9771b263 904 vect_free_oprnd_info (oprnds_info);
f7e531cf
IR
905 return false;
906 }
907 }
908
b8698a0f 909 /* Find the def-stmts. */
a70d6342 910 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
d092494c 911 ncopies_for_cost, (i == 0),
92345349
BS
912 &oprnds_info, prologue_cost_vec,
913 body_cost_vec))
d092494c 914 {
9771b263 915 vect_free_oprnd_info (oprnds_info);
d092494c
IR
916 return false;
917 }
ebfd146a
IR
918 }
919 }
920
0d0293ac 921 /* Grouped loads were reached - stop the recursion. */
ebfd146a
IR
922 if (stop_recursion)
923 {
9771b263 924 loads->safe_push (*node);
ebfd146a
IR
925 if (permutation)
926 {
9771b263 927 gimple first_stmt = stmts[0];
6aa904c4 928 *loads_permuted = true;
92345349
BS
929 (void) record_stmt_cost (body_cost_vec, group_size, vec_perm,
930 vinfo_for_stmt (first_stmt), 0, vect_body);
ebfd146a 931 }
2200fc49 932 else
6aa904c4
IR
933 {
934 /* We don't check here complex numbers chains, so we set
935 LOADS_PERMUTED for further check in
936 vect_supported_load_permutation_p. */
2200fc49 937 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
6aa904c4 938 *loads_permuted = true;
2200fc49 939 }
ebfd146a 940
9771b263 941 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
942 return true;
943 }
944
b8698a0f 945 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 946 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 947 {
d092494c 948 slp_tree child;
b8698a0f 949
d092494c
IR
950 if (oprnd_info->first_dt != vect_internal_def)
951 continue;
ebfd146a 952
d092494c
IR
953 child = vect_create_new_slp_node (oprnd_info->def_stmts);
954 if (!child
955 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
c3e7ee41
BS
956 outside_cost, ncopies_for_cost,
957 max_nunits, load_permutation, loads,
958 vectorization_factor, loads_permuted,
92345349 959 prologue_cost_vec, body_cost_vec))
d092494c 960 {
d3cfd39e 961 if (child)
6e1aa848 962 oprnd_info->def_stmts = vNULL;
d3cfd39e 963 vect_free_slp_tree (child);
9771b263 964 vect_free_oprnd_info (oprnds_info);
d092494c
IR
965 return false;
966 }
b8698a0f 967
9771b263
DN
968 oprnd_info->def_stmts.create (0);
969 SLP_TREE_CHILDREN (*node).quick_push (child);
ebfd146a
IR
970 }
971
9771b263 972 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
973 return true;
974}
975
78c60e3d 976/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
977
978static void
78c60e3d 979vect_print_slp_tree (int dump_kind, slp_tree node)
ebfd146a
IR
980{
981 int i;
982 gimple stmt;
d092494c 983 slp_void_p child;
ebfd146a
IR
984
985 if (!node)
986 return;
987
78c60e3d 988 dump_printf (dump_kind, "node ");
9771b263 989 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 990 {
78c60e3d
SS
991 dump_printf (dump_kind, "\n\tstmt %d ", i);
992 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 993 }
78c60e3d 994 dump_printf (dump_kind, "\n");
ebfd146a 995
9771b263 996 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
78c60e3d 997 vect_print_slp_tree (dump_kind, (slp_tree) child);
ebfd146a
IR
998}
999
1000
b8698a0f
L
1001/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1002 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1003 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1004 stmts in NODE are to be marked. */
1005
1006static void
1007vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1008{
1009 int i;
1010 gimple stmt;
d092494c 1011 slp_void_p child;
ebfd146a
IR
1012
1013 if (!node)
1014 return;
1015
9771b263 1016 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1017 if (j < 0 || i == j)
1018 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1019
9771b263 1020 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c 1021 vect_mark_slp_stmts ((slp_tree) child, mark, j);
ebfd146a
IR
1022}
1023
1024
a70d6342
IR
1025/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1026
1027static void
1028vect_mark_slp_stmts_relevant (slp_tree node)
1029{
1030 int i;
1031 gimple stmt;
1032 stmt_vec_info stmt_info;
d092494c 1033 slp_void_p child;
a70d6342
IR
1034
1035 if (!node)
1036 return;
1037
9771b263 1038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1039 {
1040 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1041 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1042 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1043 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1044 }
1045
9771b263 1046 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c 1047 vect_mark_slp_stmts_relevant ((slp_tree) child);
a70d6342
IR
1048}
1049
1050
b8698a0f 1051/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
1052 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1053
1054static bool
1055vect_supported_slp_permutation_p (slp_instance instance)
1056{
9771b263
DN
1057 slp_tree node = SLP_INSTANCE_LOADS (instance)[0];
1058 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
e14c1050 1059 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
6e1aa848 1060 vec<slp_tree> sorted_loads = vNULL;
ebfd146a
IR
1061 int index;
1062 slp_tree *tmp_loads = NULL;
b8698a0f 1063 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 1064 slp_tree load;
b8698a0f
L
1065
1066 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 1067 location in all the loads in the node, when the data-refs in
b8698a0f 1068 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
1069 Sort the nodes according to the order of accesses in the chain. */
1070 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f 1071 for (i = 0, j = 0;
9771b263
DN
1072 SLP_INSTANCE_LOAD_PERMUTATION (instance).iterate (i, &index)
1073 && SLP_INSTANCE_LOADS (instance).iterate (j, &load);
ebfd146a
IR
1074 i += group_size, j++)
1075 {
9771b263 1076 gimple scalar_stmt = SLP_TREE_SCALAR_STMTS (load)[0];
ebfd146a 1077 /* Check that the loads are all in the same interleaving chain. */
e14c1050 1078 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
ebfd146a 1079 {
73fbfcad 1080 if (dump_enabled_p ())
ebfd146a 1081 {
78c60e3d
SS
1082 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1083 "Build SLP failed: unsupported data "
1084 "permutation ");
1085 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1086 scalar_stmt, 0);
ebfd146a 1087 }
b8698a0f 1088
ebfd146a 1089 free (tmp_loads);
b8698a0f 1090 return false;
ebfd146a
IR
1091 }
1092
1093 tmp_loads[index] = load;
1094 }
b8698a0f 1095
9771b263 1096 sorted_loads.create (group_size);
ebfd146a 1097 for (i = 0; i < group_size; i++)
9771b263 1098 sorted_loads.safe_push (tmp_loads[i]);
ebfd146a 1099
9771b263 1100 SLP_INSTANCE_LOADS (instance).release ();
ebfd146a
IR
1101 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1102 free (tmp_loads);
1103
6e1aa848 1104 if (!vect_transform_slp_perm_load (stmt, vNULL, NULL,
ebfd146a
IR
1105 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1106 instance, true))
1107 return false;
1108
1109 return true;
1110}
1111
1112
b5aeb3bb
IR
1113/* Rearrange the statements of NODE according to PERMUTATION. */
1114
1115static void
1116vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
9771b263 1117 vec<int> permutation)
b5aeb3bb
IR
1118{
1119 gimple stmt;
9771b263 1120 vec<gimple> tmp_stmts;
b5aeb3bb 1121 unsigned int index, i;
d092494c 1122 slp_void_p child;
b5aeb3bb
IR
1123
1124 if (!node)
1125 return;
1126
9771b263 1127 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c 1128 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
b5aeb3bb 1129
9771b263
DN
1130 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1131 tmp_stmts.create (group_size);
b5aeb3bb
IR
1132
1133 for (i = 0; i < group_size; i++)
9771b263 1134 tmp_stmts.safe_push (NULL);
b5aeb3bb 1135
9771b263 1136 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb 1137 {
9771b263
DN
1138 index = permutation[i];
1139 tmp_stmts[index] = stmt;
b5aeb3bb
IR
1140 }
1141
9771b263 1142 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1143 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1144}
1145
1146
ebfd146a
IR
1147/* Check if the required load permutation is supported.
1148 LOAD_PERMUTATION contains a list of indices of the loads.
0d0293ac 1149 In SLP this permutation is relative to the order of grouped stores that are
ebfd146a
IR
1150 the base of the SLP instance. */
1151
1152static bool
1153vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
9771b263 1154 vec<int> load_permutation)
ebfd146a 1155{
b5aeb3bb
IR
1156 int i = 0, j, prev = -1, next, k, number_of_groups;
1157 bool supported, bad_permutation = false;
7417f6c0 1158 sbitmap load_index;
2200fc49 1159 slp_tree node, other_complex_node;
6aa904c4 1160 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
2200fc49 1161 unsigned complex_numbers = 0;
6aa904c4
IR
1162 struct data_reference *dr;
1163 bb_vec_info bb_vinfo;
ebfd146a 1164
a70d6342 1165 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
1166 if (!slp_instn)
1167 return false;
1168
73fbfcad 1169 if (dump_enabled_p ())
ebfd146a 1170 {
78c60e3d 1171 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
9771b263 1172 FOR_EACH_VEC_ELT (load_permutation, i, next)
78c60e3d 1173 dump_printf (MSG_NOTE, "%d ", next);
ebfd146a
IR
1174 }
1175
b5aeb3bb
IR
1176 /* In case of reduction every load permutation is allowed, since the order
1177 of the reduction statements is not important (as opposed to the case of
0d0293ac 1178 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1179 load nodes are of the same size and have the same permutation (and then
1180 rearrange all the nodes of the SLP instance according to this
1181 permutation). */
1182
1183 /* Check that all the load nodes are of the same size. */
9771b263 1184 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 1185 {
9771b263 1186 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
2200fc49
IR
1187 return false;
1188
9771b263 1189 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2200fc49
IR
1190 if (is_gimple_assign (stmt)
1191 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1192 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1193 complex_numbers++;
1194 }
1195
1196 /* Complex operands can be swapped as following:
1197 real_c = real_b + real_a;
1198 imag_c = imag_a + imag_b;
1199 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 1200 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
1201 chains are mixed, they match the above pattern. */
1202 if (complex_numbers)
1203 {
9771b263 1204 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 1205 {
9771b263 1206 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
1207 {
1208 if (j == 0)
1209 first = stmt;
1210 else
1211 {
e14c1050 1212 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
2200fc49
IR
1213 {
1214 if (complex_numbers != 2)
1215 return false;
1216
1217 if (i == 0)
1218 k = 1;
1219 else
1220 k = 0;
1221
9771b263
DN
1222 other_complex_node = SLP_INSTANCE_LOADS (slp_instn)[k];
1223 other_node_first =
1224 SLP_TREE_SCALAR_STMTS (other_complex_node)[0];
2200fc49 1225
e14c1050 1226 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2200fc49
IR
1227 != other_node_first)
1228 return false;
1229 }
1230 }
1231 }
1232 }
1233 }
1234
1235 /* We checked that this case ok, so there is no need to proceed with
1236 permutation tests. */
7772bae0 1237 if (complex_numbers == 2
9771b263 1238 && SLP_INSTANCE_LOADS (slp_instn).length () == 2)
2200fc49 1239 {
9771b263
DN
1240 SLP_INSTANCE_LOADS (slp_instn).release ();
1241 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
2200fc49
IR
1242 return true;
1243 }
1244
b5aeb3bb 1245 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1246 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb
IR
1247 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1248 instance, not all the loads belong to the same node or interleaving
ff802fa1 1249 group. Hence, we need to divide them into groups according to
b5aeb3bb 1250 GROUP_SIZE. */
9771b263 1251 number_of_groups = load_permutation.length () / group_size;
b5aeb3bb 1252
b010117a
IR
1253 /* Reduction (there are no data-refs in the root).
1254 In reduction chain the order of the loads is important. */
1255 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1256 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb
IR
1257 {
1258 int first_group_load_index;
1259
1260 /* Compare all the permutation sequences to the first one. */
1261 for (i = 1; i < number_of_groups; i++)
1262 {
1263 k = 0;
1264 for (j = i * group_size; j < i * group_size + group_size; j++)
1265 {
9771b263
DN
1266 next = load_permutation[j];
1267 first_group_load_index = load_permutation[k];
b5aeb3bb
IR
1268
1269 if (next != first_group_load_index)
1270 {
1271 bad_permutation = true;
1272 break;
1273 }
1274
1275 k++;
1276 }
1277
1278 if (bad_permutation)
1279 break;
1280 }
1281
1282 if (!bad_permutation)
1283 {
c9c1e775
IR
1284 /* Check that the loads in the first sequence are different and there
1285 are no gaps between them. */
1286 load_index = sbitmap_alloc (group_size);
f61e445a 1287 bitmap_clear (load_index);
c9c1e775
IR
1288 for (k = 0; k < group_size; k++)
1289 {
9771b263 1290 first_group_load_index = load_permutation[k];
d7c028c0 1291 if (bitmap_bit_p (load_index, first_group_load_index))
c9c1e775
IR
1292 {
1293 bad_permutation = true;
1294 break;
1295 }
1296
d7c028c0 1297 bitmap_set_bit (load_index, first_group_load_index);
c9c1e775
IR
1298 }
1299
1300 if (!bad_permutation)
1301 for (k = 0; k < group_size; k++)
d7c028c0 1302 if (!bitmap_bit_p (load_index, k))
c9c1e775
IR
1303 {
1304 bad_permutation = true;
1305 break;
1306 }
1307
1308 sbitmap_free (load_index);
1309 }
1310
1311 if (!bad_permutation)
1312 {
1313 /* This permutation is valid for reduction. Since the order of the
b5aeb3bb
IR
1314 statements in the nodes is not important unless they are memory
1315 accesses, we can rearrange the statements in all the nodes
1316 according to the order of the loads. */
1317 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1318 load_permutation);
9771b263 1319 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
b5aeb3bb
IR
1320 return true;
1321 }
1322 }
1323
6aa904c4
IR
1324 /* In basic block vectorization we allow any subchain of an interleaving
1325 chain.
1326 FORNOW: not supported in loop SLP because of realignment compications. */
1327 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1328 bad_permutation = false;
073a8998 1329 /* Check that for every node in the instance the loads form a subchain. */
6aa904c4
IR
1330 if (bb_vinfo)
1331 {
9771b263 1332 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4
IR
1333 {
1334 next_load = NULL;
1335 first_load = NULL;
9771b263 1336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4
IR
1337 {
1338 if (!first_load)
1339 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1340 else if (first_load
1341 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1342 {
1343 bad_permutation = true;
1344 break;
1345 }
1346
1347 if (j != 0 && next_load != load)
1348 {
1349 bad_permutation = true;
1350 break;
1351 }
1352
1353 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1354 }
1355
1356 if (bad_permutation)
1357 break;
1358 }
1359
1360 /* Check that the alignment of the first load in every subchain, i.e.,
1361 the first statement in every load node, is supported. */
1362 if (!bad_permutation)
1363 {
9771b263 1364 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1365 {
9771b263 1366 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
6aa904c4
IR
1367 if (first_load
1368 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1369 {
1370 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1371 if (vect_supportable_dr_alignment (dr, false)
1372 == dr_unaligned_unsupported)
1373 {
73fbfcad 1374 if (dump_enabled_p ())
6aa904c4 1375 {
78c60e3d
SS
1376 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1377 vect_location,
1378 "unsupported unaligned load ");
1379 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1380 first_load, 0);
6aa904c4
IR
1381 }
1382 bad_permutation = true;
1383 break;
1384 }
1385 }
1386 }
1387
1388 if (!bad_permutation)
1389 {
9771b263 1390 SLP_INSTANCE_LOAD_PERMUTATION (slp_instn).release ();
6aa904c4
IR
1391 return true;
1392 }
1393 }
1394 }
1395
b8698a0f
L
1396 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1397 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1398 well (unless it's reduction). */
9771b263 1399 if (load_permutation.length ()
ebfd146a
IR
1400 != (unsigned int) (group_size * group_size))
1401 return false;
1402
1403 supported = true;
7417f6c0 1404 load_index = sbitmap_alloc (group_size);
f61e445a 1405 bitmap_clear (load_index);
ebfd146a
IR
1406 for (j = 0; j < group_size; j++)
1407 {
1408 for (i = j * group_size, k = 0;
9771b263 1409 load_permutation.iterate (i, &next) && k < group_size;
ebfd146a
IR
1410 i++, k++)
1411 {
1412 if (i != j * group_size && next != prev)
1413 {
1414 supported = false;
1415 break;
1416 }
1417
1418 prev = next;
b8698a0f 1419 }
7417f6c0 1420
d7c028c0 1421 if (bitmap_bit_p (load_index, prev))
7417f6c0
IR
1422 {
1423 supported = false;
1424 break;
1425 }
1426
d7c028c0 1427 bitmap_set_bit (load_index, prev);
ebfd146a 1428 }
59eefaa6
IR
1429
1430 for (j = 0; j < group_size; j++)
d7c028c0 1431 if (!bitmap_bit_p (load_index, j))
b8d381a3
JJ
1432 {
1433 sbitmap_free (load_index);
1434 return false;
1435 }
59eefaa6 1436
7417f6c0 1437 sbitmap_free (load_index);
ebfd146a
IR
1438
1439 if (supported && i == group_size * group_size
1440 && vect_supported_slp_permutation_p (slp_instn))
1441 return true;
1442
b8698a0f 1443 return false;
ebfd146a
IR
1444}
1445
1446
b8698a0f 1447/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1448 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1449 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1450 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1451 they must be inserted before the first load of the SLP instance and not
1452 before the first load of the first node of the instance. */
ff802fa1 1453
b8698a0f
L
1454static gimple
1455vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1456{
1457 int i, j;
1458 slp_tree load_node;
1459 gimple first_load = NULL, load;
1460
9771b263
DN
1461 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1462 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1463 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1464
ebfd146a
IR
1465 return first_load;
1466}
1467
1468
e4a707c4 1469/* Find the last store in SLP INSTANCE. */
ff802fa1 1470
e4a707c4
IR
1471static gimple
1472vect_find_last_store_in_slp_instance (slp_instance instance)
1473{
1474 int i;
1475 slp_tree node;
1476 gimple last_store = NULL, store;
1477
1478 node = SLP_INSTANCE_TREE (instance);
9771b263 1479 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
e4a707c4
IR
1480 last_store = get_later_stmt (store, last_store);
1481
1482 return last_store;
1483}
1484
1485
0d0293ac 1486/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1487 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1488 Return FALSE if it's impossible to SLP any stmt in the loop. */
1489
1490static bool
a70d6342
IR
1491vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1492 gimple stmt)
ebfd146a
IR
1493{
1494 slp_instance new_instance;
d092494c 1495 slp_tree node;
e14c1050 1496 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1497 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1498 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1499 gimple next;
0f900dfa 1500 unsigned int vectorization_factor = 0;
c3e7ee41 1501 int outside_cost = 0, ncopies_for_cost, i;
ebfd146a 1502 unsigned int max_nunits = 0;
9771b263
DN
1503 vec<int> load_permutation;
1504 vec<slp_tree> loads;
b5aeb3bb 1505 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
6aa904c4 1506 bool loads_permuted = false;
9771b263 1507 vec<gimple> scalar_stmts;
92345349
BS
1508 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1509 stmt_info_for_cost *si;
b5aeb3bb 1510
b010117a 1511 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1512 {
b010117a
IR
1513 if (dr)
1514 {
1515 scalar_type = TREE_TYPE (DR_REF (dr));
1516 vectype = get_vectype_for_scalar_type (scalar_type);
1517 }
1518 else
1519 {
1520 gcc_assert (loop_vinfo);
1521 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1522 }
1523
e14c1050 1524 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1525 }
1526 else
1527 {
1528 gcc_assert (loop_vinfo);
1529 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
9771b263 1530 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
b5aeb3bb 1531 }
b8698a0f 1532
ebfd146a
IR
1533 if (!vectype)
1534 {
73fbfcad 1535 if (dump_enabled_p ())
ebfd146a 1536 {
78c60e3d
SS
1537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1538 "Build SLP failed: unsupported data-type ");
1539 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
ebfd146a 1540 }
b5aeb3bb 1541
ebfd146a
IR
1542 return false;
1543 }
1544
1545 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1546 if (loop_vinfo)
1547 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1548 else
a70d6342
IR
1549 vectorization_factor = nunits;
1550
a70d6342
IR
1551 /* Calculate the unrolling factor. */
1552 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1553 if (unrolling_factor != 1 && !loop_vinfo)
1554 {
73fbfcad 1555 if (dump_enabled_p ())
78c60e3d
SS
1556 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1557 "Build SLP failed: unrolling required in basic"
1558 " block SLP");
b8698a0f 1559
a70d6342
IR
1560 return false;
1561 }
1562
0d0293ac 1563 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1564 scalar_stmts.create (group_size);
ebfd146a 1565 next = stmt;
b010117a 1566 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1567 {
b5aeb3bb
IR
1568 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1569 while (next)
1570 {
f7e531cf
IR
1571 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1572 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1573 scalar_stmts.safe_push (
1574 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1575 else
9771b263 1576 scalar_stmts.safe_push (next);
e14c1050 1577 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1578 }
1579 }
1580 else
1581 {
1582 /* Collect reduction statements. */
9771b263
DN
1583 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1584 for (i = 0; reductions.iterate (i, &next); i++)
1585 scalar_stmts.safe_push (next);
ebfd146a
IR
1586 }
1587
d092494c 1588 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1589
ebfd146a
IR
1590 /* Calculate the number of vector stmts to create based on the unrolling
1591 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1592 GROUP_SIZE / NUNITS otherwise. */
1593 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f 1594
9771b263
DN
1595 load_permutation.create (group_size * group_size);
1596 loads.create (group_size);
1597 prologue_cost_vec.create (10);
1598 body_cost_vec.create (10);
ebfd146a
IR
1599
1600 /* Build the tree for the SLP instance. */
b8698a0f 1601 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
c3e7ee41 1602 &outside_cost, ncopies_for_cost,
b8698a0f 1603 &max_nunits, &load_permutation, &loads,
c3e7ee41 1604 vectorization_factor, &loads_permuted,
92345349 1605 &prologue_cost_vec, &body_cost_vec))
ebfd146a 1606 {
92345349
BS
1607 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1608 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1609
4ef69dfc 1610 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1611 if (max_nunits > nunits)
1612 unrolling_factor = least_common_multiple (max_nunits, group_size)
1613 / group_size;
b8698a0f 1614
4ef69dfc
IR
1615 if (unrolling_factor != 1 && !loop_vinfo)
1616 {
73fbfcad 1617 if (dump_enabled_p ())
78c60e3d
SS
1618 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1619 "Build SLP failed: unrolling required in basic"
1620 " block SLP");
c7e62a26 1621 vect_free_slp_tree (node);
9771b263
DN
1622 body_cost_vec.release ();
1623 prologue_cost_vec.release ();
1624 load_permutation.release ();
1625 loads.release ();
4ef69dfc
IR
1626 return false;
1627 }
1628
1629 /* Create a new SLP instance. */
1630 new_instance = XNEW (struct _slp_instance);
1631 SLP_INSTANCE_TREE (new_instance) = node;
1632 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1633 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
92345349 1634 SLP_INSTANCE_BODY_COST_VEC (new_instance) = body_cost_vec;
ebfd146a
IR
1635 SLP_INSTANCE_LOADS (new_instance) = loads;
1636 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1637 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
6aa904c4
IR
1638
1639 if (loads_permuted)
ebfd146a
IR
1640 {
1641 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1642 load_permutation))
ebfd146a 1643 {
73fbfcad 1644 if (dump_enabled_p ())
ebfd146a 1645 {
78c60e3d
SS
1646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1647 "Build SLP failed: unsupported load "
1648 "permutation ");
1649 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
1650 }
1651
1652 vect_free_slp_instance (new_instance);
9771b263 1653 prologue_cost_vec.release ();
ebfd146a
IR
1654 return false;
1655 }
1656
1657 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1658 = vect_find_first_load_in_slp_instance (new_instance);
1659 }
1660 else
9771b263 1661 SLP_INSTANCE_LOAD_PERMUTATION (new_instance).release ();
ebfd146a 1662
92345349
BS
1663 /* Record the prologue costs, which were delayed until we were
1664 sure that SLP was successful. Unlike the body costs, we know
1665 the final values now regardless of the loop vectorization factor. */
9771b263 1666 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
92345349
BS
1667 {
1668 struct _stmt_vec_info *stmt_info
1669 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1670 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1671 si->misalign, vect_prologue);
1672 }
1673
9771b263 1674 prologue_cost_vec.release ();
92345349 1675
a70d6342 1676 if (loop_vinfo)
9771b263 1677 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
a70d6342 1678 else
9771b263 1679 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
b8698a0f 1680
73fbfcad 1681 if (dump_enabled_p ())
78c60e3d 1682 vect_print_slp_tree (MSG_NOTE, node);
ebfd146a
IR
1683
1684 return true;
1685 }
c3e7ee41 1686 else
92345349 1687 {
9771b263
DN
1688 body_cost_vec.release ();
1689 prologue_cost_vec.release ();
92345349 1690 }
ebfd146a
IR
1691
1692 /* Failed to SLP. */
1693 /* Free the allocated memory. */
1694 vect_free_slp_tree (node);
9771b263
DN
1695 load_permutation.release ();
1696 loads.release ();
b8698a0f 1697
a70d6342 1698 return false;
ebfd146a
IR
1699}
1700
1701
ff802fa1 1702/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1703 trees of packed scalar stmts if SLP is possible. */
1704
1705bool
a70d6342 1706vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1707{
1708 unsigned int i;
9771b263 1709 vec<gimple> grouped_stores;
6e1aa848
DN
1710 vec<gimple> reductions = vNULL;
1711 vec<gimple> reduc_chains = vNULL;
b010117a 1712 gimple first_element;
a70d6342 1713 bool ok = false;
ebfd146a 1714
73fbfcad 1715 if (dump_enabled_p ())
78c60e3d 1716 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===");
ebfd146a 1717
a70d6342 1718 if (loop_vinfo)
b5aeb3bb 1719 {
0d0293ac 1720 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
b010117a 1721 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1722 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1723 }
a70d6342 1724 else
0d0293ac 1725 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
b8698a0f 1726
0d0293ac 1727 /* Find SLP sequences starting from groups of grouped stores. */
9771b263 1728 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
b010117a 1729 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
a70d6342 1730 ok = true;
ebfd146a 1731
b8698a0f 1732 if (bb_vinfo && !ok)
a70d6342 1733 {
73fbfcad 1734 if (dump_enabled_p ())
78c60e3d
SS
1735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1736 "Failed to SLP the basic block.");
a70d6342
IR
1737
1738 return false;
1739 }
ebfd146a 1740
b010117a 1741 if (loop_vinfo
9771b263 1742 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
b010117a
IR
1743 {
1744 /* Find SLP sequences starting from reduction chains. */
9771b263 1745 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
b010117a
IR
1746 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1747 ok = true;
1748 else
1749 return false;
1750
1751 /* Don't try to vectorize SLP reductions if reduction chain was
1752 detected. */
1753 return ok;
1754 }
1755
b5aeb3bb 1756 /* Find SLP sequences starting from groups of reductions. */
9771b263
DN
1757 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1758 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
b5aeb3bb
IR
1759 ok = true;
1760
ebfd146a
IR
1761 return true;
1762}
1763
1764
1765/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1766 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1767 least one instance. */
ebfd146a 1768
437f4a00 1769bool
ebfd146a
IR
1770vect_make_slp_decision (loop_vec_info loop_vinfo)
1771{
1772 unsigned int i, unrolling_factor = 1;
9771b263 1773 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1774 slp_instance instance;
1775 int decided_to_slp = 0;
1776
73fbfcad 1777 if (dump_enabled_p ())
78c60e3d 1778 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===");
ebfd146a 1779
9771b263 1780 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1781 {
1782 /* FORNOW: SLP if you can. */
1783 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1784 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1785
ff802fa1 1786 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1787 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1788 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1789 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1790 decided_to_slp++;
1791 }
1792
1793 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1794
73fbfcad 1795 if (decided_to_slp && dump_enabled_p ())
78c60e3d
SS
1796 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1797 "Decided to SLP %d instances. Unrolling factor %d",
1798 decided_to_slp, unrolling_factor);
437f4a00
IR
1799
1800 return (decided_to_slp > 0);
ebfd146a
IR
1801}
1802
1803
1804/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1805 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1806
1807static void
1808vect_detect_hybrid_slp_stmts (slp_tree node)
1809{
1810 int i;
9771b263
DN
1811 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1812 gimple stmt = stmts[0];
ebfd146a
IR
1813 imm_use_iterator imm_iter;
1814 gimple use_stmt;
f2c74cc4 1815 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d092494c 1816 slp_void_p child;
f2c74cc4
IR
1817 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1818 struct loop *loop = NULL;
1819 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1820 basic_block bb = NULL;
ebfd146a
IR
1821
1822 if (!node)
1823 return;
1824
f2c74cc4
IR
1825 if (loop_vinfo)
1826 loop = LOOP_VINFO_LOOP (loop_vinfo);
1827 else
1828 bb = BB_VINFO_BB (bb_vinfo);
1829
9771b263 1830 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1831 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1832 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1833 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
f2c74cc4
IR
1834 if (gimple_bb (use_stmt)
1835 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1836 || bb == gimple_bb (use_stmt))
1837 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
99f51320
IR
1838 && !STMT_SLP_TYPE (stmt_vinfo)
1839 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb 1840 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
f2c74cc4
IR
1841 && !(gimple_code (use_stmt) == GIMPLE_PHI
1842 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1843 == vect_reduction_def))
ebfd146a
IR
1844 vect_mark_slp_stmts (node, hybrid, i);
1845
9771b263 1846 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c 1847 vect_detect_hybrid_slp_stmts ((slp_tree) child);
ebfd146a
IR
1848}
1849
1850
1851/* Find stmts that must be both vectorized and SLPed. */
1852
1853void
1854vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1855{
1856 unsigned int i;
9771b263 1857 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1858 slp_instance instance;
1859
73fbfcad 1860 if (dump_enabled_p ())
78c60e3d 1861 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===");
ebfd146a 1862
9771b263 1863 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1864 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1865}
1866
a70d6342
IR
1867
1868/* Create and initialize a new bb_vec_info struct for BB, as well as
1869 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1870
a70d6342
IR
1871static bb_vec_info
1872new_bb_vec_info (basic_block bb)
1873{
1874 bb_vec_info res = NULL;
1875 gimple_stmt_iterator gsi;
1876
1877 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1878 BB_VINFO_BB (res) = bb;
1879
1880 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1881 {
1882 gimple stmt = gsi_stmt (gsi);
1883 gimple_set_uid (stmt, 0);
1884 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1885 }
1886
9771b263
DN
1887 BB_VINFO_GROUPED_STORES (res).create (10);
1888 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 1889 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
1890
1891 bb->aux = res;
1892 return res;
1893}
1894
1895
1896/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1897 stmts in the basic block. */
1898
1899static void
1900destroy_bb_vec_info (bb_vec_info bb_vinfo)
1901{
9771b263 1902 vec<slp_instance> slp_instances;
c7e62a26 1903 slp_instance instance;
a70d6342
IR
1904 basic_block bb;
1905 gimple_stmt_iterator si;
c7e62a26 1906 unsigned i;
a70d6342
IR
1907
1908 if (!bb_vinfo)
1909 return;
1910
1911 bb = BB_VINFO_BB (bb_vinfo);
1912
1913 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1914 {
1915 gimple stmt = gsi_stmt (si);
1916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1917
1918 if (stmt_info)
1919 /* Free stmt_vec_info. */
1920 free_stmt_vec_info (stmt);
1921 }
1922
01be8516
SP
1923 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1924 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
9771b263 1925 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
c7e62a26 1926 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
9771b263 1927 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c7e62a26 1928 vect_free_slp_instance (instance);
9771b263 1929 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
c3e7ee41 1930 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342
IR
1931 free (bb_vinfo);
1932 bb->aux = NULL;
1933}
1934
1935
1936/* Analyze statements contained in SLP tree node after recursively analyzing
1937 the subtree. Return TRUE if the operations are supported. */
1938
1939static bool
1940vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1941{
1942 bool dummy;
1943 int i;
1944 gimple stmt;
d092494c 1945 slp_void_p child;
a70d6342
IR
1946
1947 if (!node)
1948 return true;
1949
9771b263 1950 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c
IR
1951 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1952 return false;
a70d6342 1953
9771b263 1954 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1955 {
1956 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1957 gcc_assert (stmt_info);
1958 gcc_assert (PURE_SLP_STMT (stmt_info));
1959
1960 if (!vect_analyze_stmt (stmt, &dummy, node))
1961 return false;
1962 }
1963
1964 return true;
1965}
1966
1967
ff802fa1 1968/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1969 operations are supported. */
1970
1971static bool
1972vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1973{
9771b263 1974 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
a70d6342
IR
1975 slp_instance instance;
1976 int i;
1977
9771b263 1978 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 1979 {
b8698a0f 1980 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1981 SLP_INSTANCE_TREE (instance)))
1982 {
1983 vect_free_slp_instance (instance);
9771b263 1984 slp_instances.ordered_remove (i);
a70d6342
IR
1985 }
1986 else
1987 i++;
b8698a0f
L
1988 }
1989
9771b263 1990 if (!slp_instances.length ())
a70d6342
IR
1991 return false;
1992
1993 return true;
1994}
1995
69f11a13
IR
1996/* Check if vectorization of the basic block is profitable. */
1997
1998static bool
1999vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2000{
9771b263 2001 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2002 slp_instance instance;
c3e7ee41
BS
2003 int i, j;
2004 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2005 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2006 unsigned int stmt_cost;
2007 gimple stmt;
2008 gimple_stmt_iterator si;
2009 basic_block bb = BB_VINFO_BB (bb_vinfo);
92345349 2010 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
69f11a13 2011 stmt_vec_info stmt_info = NULL;
92345349 2012 stmt_vector_for_cost body_cost_vec;
c3e7ee41 2013 stmt_info_for_cost *ci;
69f11a13
IR
2014
2015 /* Calculate vector costs. */
9771b263 2016 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2017 {
92345349 2018 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
c3e7ee41 2019
9771b263 2020 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
92345349
BS
2021 {
2022 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2023 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2024 stmt_info, ci->misalign, vect_body);
2025 }
69f11a13
IR
2026 }
2027
2028 /* Calculate scalar cost. */
2029 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2030 {
2031 stmt = gsi_stmt (si);
2032 stmt_info = vinfo_for_stmt (stmt);
2033
2034 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
2035 || !PURE_SLP_STMT (stmt_info))
2036 continue;
2037
2038 if (STMT_VINFO_DATA_REF (stmt_info))
2039 {
2040 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
92345349 2041 stmt_cost = vect_get_stmt_cost (scalar_load);
69f11a13 2042 else
92345349 2043 stmt_cost = vect_get_stmt_cost (scalar_store);
69f11a13
IR
2044 }
2045 else
92345349 2046 stmt_cost = vect_get_stmt_cost (scalar_stmt);
69f11a13
IR
2047
2048 scalar_cost += stmt_cost;
2049 }
2050
c3e7ee41 2051 /* Complete the target-specific cost calculation. */
92345349
BS
2052 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2053 &vec_inside_cost, &vec_epilogue_cost);
2054
2055 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2056
73fbfcad 2057 if (dump_enabled_p ())
69f11a13 2058 {
78c60e3d
SS
2059 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2060 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2061 vec_inside_cost);
2062 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2063 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
2064 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d", scalar_cost);
69f11a13
IR
2065 }
2066
2067 /* Vectorization is profitable if its cost is less than the cost of scalar
2068 version. */
2069 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2070 return false;
2071
2072 return true;
2073}
2074
2075/* Check if the basic block can be vectorized. */
a70d6342 2076
8e19f5a1
IR
2077static bb_vec_info
2078vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
2079{
2080 bb_vec_info bb_vinfo;
9771b263 2081 vec<slp_instance> slp_instances;
a70d6342 2082 slp_instance instance;
8e19f5a1 2083 int i;
777e1f09
RG
2084 int min_vf = 2;
2085 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4 2086
a70d6342
IR
2087 bb_vinfo = new_bb_vec_info (bb);
2088 if (!bb_vinfo)
2089 return NULL;
2090
777e1f09 2091 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342 2092 {
73fbfcad 2093 if (dump_enabled_p ())
78c60e3d
SS
2094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2095 "not vectorized: unhandled data-ref in basic "
2096 "block.\n");
b8698a0f 2097
a70d6342
IR
2098 destroy_bb_vec_info (bb_vinfo);
2099 return NULL;
2100 }
2101
fcac74a1 2102 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2103 {
73fbfcad 2104 if (dump_enabled_p ())
78c60e3d
SS
2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2106 "not vectorized: not enough data-refs in "
2107 "basic block.\n");
a70d6342
IR
2108
2109 destroy_bb_vec_info (bb_vinfo);
2110 return NULL;
2111 }
2112
f5709183
IR
2113 vect_pattern_recog (NULL, bb_vinfo);
2114
2115 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
5bfdb7d8 2116 || min_vf > max_vf)
777e1f09 2117 {
73fbfcad 2118 if (dump_enabled_p ())
78c60e3d
SS
2119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2120 "not vectorized: unhandled data dependence "
2121 "in basic block.\n");
777e1f09
RG
2122
2123 destroy_bb_vec_info (bb_vinfo);
2124 return NULL;
2125 }
2126
a70d6342
IR
2127 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2128 {
73fbfcad 2129 if (dump_enabled_p ())
78c60e3d
SS
2130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2131 "not vectorized: bad data alignment in basic "
2132 "block.\n");
b8698a0f 2133
a70d6342
IR
2134 destroy_bb_vec_info (bb_vinfo);
2135 return NULL;
2136 }
b8698a0f 2137
a70d6342
IR
2138 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2139 {
73fbfcad 2140 if (dump_enabled_p ())
78c60e3d
SS
2141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2142 "not vectorized: unhandled data access in "
2143 "basic block.\n");
b8698a0f 2144
a70d6342
IR
2145 destroy_bb_vec_info (bb_vinfo);
2146 return NULL;
2147 }
2148
a70d6342
IR
2149 /* Check the SLP opportunities in the basic block, analyze and build SLP
2150 trees. */
2151 if (!vect_analyze_slp (NULL, bb_vinfo))
2152 {
73fbfcad 2153 if (dump_enabled_p ())
78c60e3d
SS
2154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2155 "not vectorized: failed to find SLP opportunities "
2156 "in basic block.\n");
a70d6342
IR
2157
2158 destroy_bb_vec_info (bb_vinfo);
2159 return NULL;
2160 }
b8698a0f 2161
a70d6342
IR
2162 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2163
2164 /* Mark all the statements that we want to vectorize as pure SLP and
2165 relevant. */
9771b263 2166 FOR_EACH_VEC_ELT (slp_instances, i, instance)
a70d6342
IR
2167 {
2168 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2169 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 2170 }
a70d6342 2171
c3e7ee41 2172 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
38eec4c6 2173 {
73fbfcad 2174 if (dump_enabled_p ())
78c60e3d
SS
2175 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2176 "not vectorized: unsupported alignment in basic "
2177 "block.\n");
38eec4c6
UW
2178 destroy_bb_vec_info (bb_vinfo);
2179 return NULL;
2180 }
2181
a70d6342
IR
2182 if (!vect_slp_analyze_operations (bb_vinfo))
2183 {
73fbfcad 2184 if (dump_enabled_p ())
78c60e3d
SS
2185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2186 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2187
2188 destroy_bb_vec_info (bb_vinfo);
2189 return NULL;
2190 }
2191
69f11a13
IR
2192 /* Cost model: check if the vectorization is worthwhile. */
2193 if (flag_vect_cost_model
2194 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2195 {
73fbfcad 2196 if (dump_enabled_p ())
78c60e3d
SS
2197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2198 "not vectorized: vectorization is not "
2199 "profitable.\n");
69f11a13
IR
2200
2201 destroy_bb_vec_info (bb_vinfo);
2202 return NULL;
2203 }
2204
73fbfcad 2205 if (dump_enabled_p ())
78c60e3d
SS
2206 dump_printf_loc (MSG_NOTE, vect_location,
2207 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2208
2209 return bb_vinfo;
2210}
2211
2212
8e19f5a1
IR
2213bb_vec_info
2214vect_slp_analyze_bb (basic_block bb)
2215{
2216 bb_vec_info bb_vinfo;
2217 int insns = 0;
2218 gimple_stmt_iterator gsi;
2219 unsigned int vector_sizes;
2220
73fbfcad 2221 if (dump_enabled_p ())
78c60e3d 2222 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1
IR
2223
2224 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2225 {
2226 gimple stmt = gsi_stmt (gsi);
2227 if (!is_gimple_debug (stmt)
2228 && !gimple_nop_p (stmt)
2229 && gimple_code (stmt) != GIMPLE_LABEL)
2230 insns++;
2231 }
2232
2233 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2234 {
73fbfcad 2235 if (dump_enabled_p ())
78c60e3d
SS
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2237 "not vectorized: too many instructions in "
2238 "basic block.\n");
8e19f5a1
IR
2239
2240 return NULL;
2241 }
2242
2243 /* Autodetect first vector size we try. */
2244 current_vector_size = 0;
2245 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2246
2247 while (1)
2248 {
2249 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2250 if (bb_vinfo)
2251 return bb_vinfo;
2252
2253 destroy_bb_vec_info (bb_vinfo);
2254
2255 vector_sizes &= ~current_vector_size;
2256 if (vector_sizes == 0
2257 || current_vector_size == 0)
2258 return NULL;
2259
2260 /* Try the next biggest vector size. */
2261 current_vector_size = 1 << floor_log2 (vector_sizes);
73fbfcad 2262 if (dump_enabled_p ())
78c60e3d
SS
2263 dump_printf_loc (MSG_NOTE, vect_location,
2264 "***** Re-trying analysis with "
2265 "vector size %d\n", current_vector_size);
8e19f5a1
IR
2266 }
2267}
2268
2269
b8698a0f 2270/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
2271 the number of created vector stmts depends on the unrolling factor).
2272 However, the actual number of vector stmts for every SLP node depends on
2273 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2274 should be updated. In this function we assume that the inside costs
2275 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
2276
2277void
2278vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2279{
c3e7ee41 2280 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9771b263 2281 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a 2282 slp_instance instance;
92345349 2283 stmt_vector_for_cost body_cost_vec;
c3e7ee41 2284 stmt_info_for_cost *si;
92345349 2285 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
ebfd146a 2286
73fbfcad 2287 if (dump_enabled_p ())
78c60e3d
SS
2288 dump_printf_loc (MSG_NOTE, vect_location,
2289 "=== vect_update_slp_costs_according_to_vf ===");
ebfd146a 2290
9771b263 2291 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c3e7ee41
BS
2292 {
2293 /* We assume that costs are linear in ncopies. */
2294 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2295
2296 /* Record the instance's instructions in the target cost model.
2297 This was delayed until here because the count of instructions
2298 isn't known beforehand. */
92345349 2299 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
c3e7ee41 2300
9771b263 2301 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
92345349
BS
2302 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2303 vinfo_for_stmt (si->stmt), si->misalign,
2304 vect_body);
c3e7ee41 2305 }
ebfd146a
IR
2306}
2307
a70d6342 2308
b8698a0f
L
2309/* For constant and loop invariant defs of SLP_NODE this function returns
2310 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2311 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2312 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2313 REDUC_INDEX is the index of the reduction operand in the statements, unless
2314 it is -1. */
ebfd146a
IR
2315
2316static void
9dc3f7de 2317vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2318 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2319 unsigned int op_num, unsigned int number_of_vectors,
2320 int reduc_index)
ebfd146a 2321{
9771b263
DN
2322 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2323 gimple stmt = stmts[0];
ebfd146a 2324 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2325 unsigned nunits;
ebfd146a 2326 tree vec_cst;
d2a12ae7
RG
2327 tree *elts;
2328 unsigned j, number_of_places_left_in_vector;
ebfd146a 2329 tree vector_type;
9dc3f7de 2330 tree vop;
9771b263 2331 int group_size = stmts.length ();
ebfd146a 2332 unsigned int vec_num, i;
d2a12ae7 2333 unsigned number_of_copies = 1;
9771b263
DN
2334 vec<tree> voprnds;
2335 voprnds.create (number_of_vectors);
ebfd146a 2336 bool constant_p, is_store;
b5aeb3bb 2337 tree neutral_op = NULL;
bac430c9 2338 enum tree_code code = gimple_expr_code (stmt);
0e93a64e
IR
2339 gimple def_stmt;
2340 struct loop *loop;
13396b6e 2341 gimple_seq ctor_seq = NULL;
b5aeb3bb 2342
29ed4920
IR
2343 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2344 && reduc_index != -1)
b5aeb3bb 2345 {
b5aeb3bb 2346 op_num = reduc_index - 1;
9dc3f7de 2347 op = gimple_op (stmt, reduc_index);
b5aeb3bb 2348 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2349 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2350 get_initial_def_for_reduction() for details. */
2351 switch (code)
2352 {
2353 case WIDEN_SUM_EXPR:
2354 case DOT_PROD_EXPR:
2355 case PLUS_EXPR:
2356 case MINUS_EXPR:
2357 case BIT_IOR_EXPR:
2358 case BIT_XOR_EXPR:
2359 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2360 neutral_op = build_real (TREE_TYPE (op), dconst0);
2361 else
2362 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2363
2364 break;
2365
2366 case MULT_EXPR:
b5aeb3bb
IR
2367 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2368 neutral_op = build_real (TREE_TYPE (op), dconst1);
2369 else
2370 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2371
2372 break;
2373
c1e822d5
IR
2374 case BIT_AND_EXPR:
2375 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2376 break;
2377
0e93a64e
IR
2378 case MAX_EXPR:
2379 case MIN_EXPR:
2380 def_stmt = SSA_NAME_DEF_STMT (op);
2381 loop = (gimple_bb (stmt))->loop_father;
2382 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2383 loop_preheader_edge (loop));
2384 break;
2385
b5aeb3bb 2386 default:
0e93a64e 2387 neutral_op = NULL;
b5aeb3bb
IR
2388 }
2389 }
ebfd146a
IR
2390
2391 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2392 {
2393 is_store = true;
2394 op = gimple_assign_rhs1 (stmt);
2395 }
2396 else
9dc3f7de
IR
2397 is_store = false;
2398
2399 gcc_assert (op);
ebfd146a
IR
2400
2401 if (CONSTANT_CLASS_P (op))
d59dc888 2402 constant_p = true;
ebfd146a 2403 else
d59dc888
IR
2404 constant_p = false;
2405
9dc3f7de 2406 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 2407 gcc_assert (vector_type);
ebfd146a
IR
2408 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2409
2410 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2411 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2412
2413 For example, we have two scalar operands, s1 and s2 (e.g., group of
2414 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2415 of this type can be packed in a vector). The output vector will contain
2416 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2417 will be 2).
2418
b8698a0f 2419 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2420 containing the operands.
2421
2422 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2423 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2424 {s5, s6, s7, s8}. */
b8698a0f 2425
ebfd146a
IR
2426 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2427
2428 number_of_places_left_in_vector = nunits;
d2a12ae7 2429 elts = XALLOCAVEC (tree, nunits);
ebfd146a
IR
2430 for (j = 0; j < number_of_copies; j++)
2431 {
9771b263 2432 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2433 {
2434 if (is_store)
2435 op = gimple_assign_rhs1 (stmt);
bac430c9 2436 else
f7e531cf 2437 {
bac430c9 2438 switch (code)
f7e531cf 2439 {
bac430c9
IR
2440 case COND_EXPR:
2441 if (op_num == 0 || op_num == 1)
2442 {
2443 tree cond = gimple_assign_rhs1 (stmt);
2444 op = TREE_OPERAND (cond, op_num);
2445 }
2446 else
2447 {
2448 if (op_num == 2)
2449 op = gimple_assign_rhs2 (stmt);
2450 else
2451 op = gimple_assign_rhs3 (stmt);
2452 }
2453 break;
2454
2455 case CALL_EXPR:
2456 op = gimple_call_arg (stmt, op_num);
2457 break;
2458
b84b294a
JJ
2459 case LSHIFT_EXPR:
2460 case RSHIFT_EXPR:
2461 case LROTATE_EXPR:
2462 case RROTATE_EXPR:
2463 op = gimple_op (stmt, op_num + 1);
2464 /* Unlike the other binary operators, shifts/rotates have
2465 the shift count being int, instead of the same type as
2466 the lhs, so make sure the scalar is the right type if
2467 we are dealing with vectors of
2468 long long/long/short/char. */
2469 if (op_num == 1 && constant_p)
2470 op = fold_convert (TREE_TYPE (vector_type), op);
2471 break;
2472
bac430c9
IR
2473 default:
2474 op = gimple_op (stmt, op_num + 1);
b84b294a 2475 break;
f7e531cf
IR
2476 }
2477 }
b8698a0f 2478
b5aeb3bb
IR
2479 if (reduc_index != -1)
2480 {
0e93a64e
IR
2481 loop = (gimple_bb (stmt))->loop_father;
2482 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2483
2484 gcc_assert (loop);
b010117a
IR
2485
2486 /* Get the def before the loop. In reduction chain we have only
2487 one initial value. */
2488 if ((j != (number_of_copies - 1)
2489 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2490 && i != 0))
2491 && neutral_op)
b5aeb3bb 2492 op = neutral_op;
b010117a
IR
2493 else
2494 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2495 loop_preheader_edge (loop));
b5aeb3bb
IR
2496 }
2497
ebfd146a 2498 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2499 number_of_places_left_in_vector--;
13396b6e 2500 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 2501 {
13396b6e
JJ
2502 if (constant_p)
2503 {
2504 op = fold_unary (VIEW_CONVERT_EXPR,
2505 TREE_TYPE (vector_type), op);
2506 gcc_assert (op && CONSTANT_CLASS_P (op));
2507 }
2508 else
2509 {
2510 tree new_temp
2511 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2512 gimple init_stmt;
2513 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2514 op);
2515 init_stmt
2516 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2517 new_temp, op, NULL_TREE);
2518 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2519 op = new_temp;
2520 }
50eeef09 2521 }
d2a12ae7 2522 elts[number_of_places_left_in_vector] = op;
ebfd146a
IR
2523
2524 if (number_of_places_left_in_vector == 0)
2525 {
2526 number_of_places_left_in_vector = nunits;
2527
2528 if (constant_p)
d2a12ae7 2529 vec_cst = build_vector (vector_type, elts);
ebfd146a 2530 else
d2a12ae7 2531 {
9771b263 2532 vec<constructor_elt, va_gc> *v;
d2a12ae7 2533 unsigned k;
9771b263 2534 vec_alloc (v, nunits);
d2a12ae7
RG
2535 for (k = 0; k < nunits; ++k)
2536 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2537 vec_cst = build_constructor (vector_type, v);
2538 }
9771b263
DN
2539 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2540 vector_type, NULL));
13396b6e
JJ
2541 if (ctor_seq != NULL)
2542 {
9771b263 2543 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
13396b6e
JJ
2544 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2545 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2546 GSI_SAME_STMT);
2547 ctor_seq = NULL;
2548 }
ebfd146a
IR
2549 }
2550 }
2551 }
2552
b8698a0f 2553 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 2554 them. */
9771b263 2555 vec_num = voprnds.length ();
d2a12ae7 2556 for (j = vec_num; j != 0; j--)
ebfd146a 2557 {
9771b263
DN
2558 vop = voprnds[j - 1];
2559 vec_oprnds->quick_push (vop);
ebfd146a
IR
2560 }
2561
9771b263 2562 voprnds.release ();
ebfd146a
IR
2563
2564 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2565 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2566 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 2567 to replicate the vectors. */
9771b263 2568 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 2569 {
b5aeb3bb
IR
2570 tree neutral_vec = NULL;
2571
2572 if (neutral_op)
2573 {
2574 if (!neutral_vec)
b9acc9f1 2575 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 2576
9771b263 2577 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
2578 }
2579 else
2580 {
9771b263
DN
2581 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2582 vec_oprnds->quick_push (vop);
b5aeb3bb 2583 }
ebfd146a
IR
2584 }
2585}
2586
2587
2588/* Get vectorized definitions from SLP_NODE that contains corresponding
2589 vectorized def-stmts. */
2590
2591static void
9771b263 2592vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
2593{
2594 tree vec_oprnd;
2595 gimple vec_def_stmt;
2596 unsigned int i;
2597
9771b263 2598 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 2599
9771b263 2600 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2601 {
2602 gcc_assert (vec_def_stmt);
2603 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 2604 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
2605 }
2606}
2607
2608
b8698a0f
L
2609/* Get vectorized definitions for SLP_NODE.
2610 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2611 call vect_get_constant_vectors() to create vector stmts.
2612 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2613 must be stored in the corresponding child of SLP_NODE, and we call
2614 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2615
ebfd146a 2616void
9771b263 2617vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 2618 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 2619{
e44978dc 2620 gimple first_stmt;
d092494c
IR
2621 int number_of_vects = 0, i;
2622 unsigned int child_index = 0;
b8698a0f 2623 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 2624 slp_tree child = NULL;
37b5ec8f 2625 vec<tree> vec_defs;
e44978dc 2626 tree oprnd;
d092494c 2627 bool vectorized_defs;
ebfd146a 2628
9771b263
DN
2629 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2630 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 2631 {
d092494c
IR
2632 /* For each operand we check if it has vectorized definitions in a child
2633 node or we need to create them (for invariants and constants). We
2634 check if the LHS of the first stmt of the next child matches OPRND.
2635 If it does, we found the correct child. Otherwise, we call
2636 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2637 to check this child node for the next operand. */
2638 vectorized_defs = false;
9771b263 2639 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 2640 {
9771b263 2641 child = (slp_tree) SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 2642
e44978dc
RB
2643 /* We have to check both pattern and original def, if available. */
2644 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2645 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
ebfd146a 2646
e44978dc
RB
2647 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2648 || (related
2649 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2650 {
2651 /* The number of vector defs is determined by the number of
2652 vector statements in the node from which we get those
d092494c 2653 statements. */
e44978dc
RB
2654 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2655 vectorized_defs = true;
d092494c 2656 child_index++;
e44978dc 2657 }
d092494c 2658 }
ebfd146a 2659
d092494c
IR
2660 if (!vectorized_defs)
2661 {
2662 if (i == 0)
2663 {
2664 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2665 /* Number of vector stmts was calculated according to LHS in
2666 vect_schedule_slp_instance (), fix it by replacing LHS with
2667 RHS, if necessary. See vect_get_smallest_scalar_type () for
2668 details. */
2669 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2670 &rhs_size_unit);
2671 if (rhs_size_unit != lhs_size_unit)
2672 {
2673 number_of_vects *= rhs_size_unit;
2674 number_of_vects /= lhs_size_unit;
2675 }
2676 }
2677 }
b5aeb3bb 2678
d092494c 2679 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
2680 vec_defs = vNULL;
2681 vec_defs.create (number_of_vects);
ebfd146a 2682
d092494c
IR
2683 /* For reduction defs we call vect_get_constant_vectors (), since we are
2684 looking for initial loop invariant values. */
2685 if (vectorized_defs && reduc_index == -1)
2686 /* The defs are already vectorized. */
37b5ec8f 2687 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
2688 else
2689 /* Build vectors from scalar defs. */
37b5ec8f 2690 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 2691 number_of_vects, reduc_index);
ebfd146a 2692
37b5ec8f 2693 vec_oprnds->quick_push (vec_defs);
ebfd146a 2694
d092494c
IR
2695 /* For reductions, we only need initial values. */
2696 if (reduc_index != -1)
2697 return;
2698 }
ebfd146a
IR
2699}
2700
a70d6342 2701
b8698a0f 2702/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2703 building a vector of type MASK_TYPE from it) and two input vectors placed in
2704 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2705 shifting by STRIDE elements of DR_CHAIN for every copy.
2706 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2707 copies).
ebfd146a
IR
2708 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2709 the created stmts must be inserted. */
2710
2711static inline void
b8698a0f 2712vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2713 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 2714 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 2715 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
2716 int ncopies, int vect_stmts_counter)
2717{
faf63e39 2718 tree perm_dest;
ebfd146a
IR
2719 gimple perm_stmt = NULL;
2720 stmt_vec_info next_stmt_info;
0f900dfa 2721 int i, stride;
ebfd146a 2722 tree first_vec, second_vec, data_ref;
ebfd146a 2723
ebfd146a 2724 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2725
b8698a0f 2726 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2727 stmts later. */
9771b263 2728 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 2729 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 2730 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
2731
2732 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2733 for (i = 0; i < ncopies; i++)
2734 {
9771b263
DN
2735 first_vec = dr_chain[first_vec_indx];
2736 second_vec = dr_chain[second_vec_indx];
ebfd146a 2737
ebfd146a 2738 /* Generate the permute statement. */
73804b12
RG
2739 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2740 first_vec, second_vec, mask);
ebfd146a 2741 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 2742 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 2743 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2744
b8698a0f 2745 /* Store the vector statement in NODE. */
9771b263 2746 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
ebfd146a
IR
2747
2748 first_vec_indx += stride;
2749 second_vec_indx += stride;
2750 }
2751
2752 /* Mark the scalar stmt as vectorized. */
2753 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2754 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2755}
2756
2757
b8698a0f 2758/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2759 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2760 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2761 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2762 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2763
ebfd146a 2764static bool
b8698a0f 2765vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2766 int mask_nunits, bool only_one_vec, int index,
22e4dee7 2767 unsigned char *mask, int *current_mask_element,
694a4f61
IR
2768 bool *need_next_vector, int *number_of_mask_fixes,
2769 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2770{
2771 int i;
ebfd146a
IR
2772
2773 /* Convert to target specific representation. */
2774 *current_mask_element = first_mask_element + m;
2775 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2776 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2777
2778 if (*current_mask_element < mask_nunits)
694a4f61 2779 *needs_first_vector = true;
ebfd146a
IR
2780
2781 /* We have only one input vector to permute but the mask accesses values in
2782 the next vector as well. */
2783 if (only_one_vec && *current_mask_element >= mask_nunits)
2784 {
73fbfcad 2785 if (dump_enabled_p ())
ebfd146a 2786 {
78c60e3d
SS
2787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2788 "permutation requires at least two vectors ");
2789 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
2790 }
2791
2792 return false;
2793 }
2794
2795 /* The mask requires the next vector. */
2796 if (*current_mask_element >= mask_nunits * 2)
2797 {
694a4f61 2798 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2799 {
2800 /* We either need the first vector too or have already moved to the
b8698a0f 2801 next vector. In both cases, this permutation needs three
ebfd146a 2802 vectors. */
73fbfcad 2803 if (dump_enabled_p ())
ebfd146a 2804 {
78c60e3d
SS
2805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2806 "permutation requires at "
2807 "least three vectors ");
2808 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
2809 }
2810
2811 return false;
2812 }
2813
2814 /* We move to the next vector, dropping the first one and working with
2815 the second and the third - we need to adjust the values of the mask
2816 accordingly. */
694a4f61 2817 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2818
2819 for (i = 0; i < index; i++)
694a4f61 2820 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2821
694a4f61
IR
2822 (*number_of_mask_fixes)++;
2823 *mask_fixed = true;
ebfd146a
IR
2824 }
2825
694a4f61 2826 *need_next_vector = *mask_fixed;
ebfd146a
IR
2827
2828 /* This was the last element of this mask. Start a new one. */
2829 if (index == mask_nunits - 1)
2830 {
694a4f61
IR
2831 *number_of_mask_fixes = 1;
2832 *mask_fixed = false;
2833 *needs_first_vector = false;
ebfd146a
IR
2834 }
2835
2836 return true;
2837}
2838
2839
2840/* Generate vector permute statements from a list of loads in DR_CHAIN.
2841 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2842 permute statements for SLP_NODE_INSTANCE. */
2843bool
9771b263 2844vect_transform_slp_perm_load (gimple stmt, vec<tree> dr_chain,
ebfd146a
IR
2845 gimple_stmt_iterator *gsi, int vf,
2846 slp_instance slp_node_instance, bool analyze_only)
2847{
2848 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2849 tree mask_element_type = NULL_TREE, mask_type;
2635892a 2850 int i, j, k, nunits, vec_index = 0, scalar_index;
ebfd146a 2851 slp_tree node;
2635892a 2852 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2853 gimple next_scalar_stmt;
2854 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2855 int first_mask_element;
22e4dee7
RH
2856 int index, unroll_factor, current_mask_element, ncopies;
2857 unsigned char *mask;
ebfd146a
IR
2858 bool only_one_vec = false, need_next_vector = false;
2859 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2860 int number_of_mask_fixes = 1;
2861 bool mask_fixed = false;
2862 bool needs_first_vector = false;
22e4dee7 2863 enum machine_mode mode;
ebfd146a 2864
22e4dee7
RH
2865 mode = TYPE_MODE (vectype);
2866
2867 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a 2868 {
73fbfcad 2869 if (dump_enabled_p ())
ebfd146a 2870 {
78c60e3d
SS
2871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2872 "no vect permute for ");
2873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 2874 }
2635892a 2875 return false;
ebfd146a
IR
2876 }
2877
2635892a
RH
2878 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2879 same size as the vector element being permuted. */
96f9265a
RG
2880 mask_element_type = lang_hooks.types.type_for_mode
2881 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 2882 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 2883 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 2884 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
2885 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2886
2887 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2888 unrolling factor. */
b8698a0f 2889 orig_vec_stmts_num = group_size *
ebfd146a
IR
2890 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2891 if (orig_vec_stmts_num == 1)
2892 only_one_vec = true;
2893
b8698a0f 2894 /* Number of copies is determined by the final vectorization factor
ebfd146a 2895 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2896 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2897
b8698a0f
L
2898 /* Generate permutation masks for every NODE. Number of masks for each NODE
2899 is equal to GROUP_SIZE.
2900 E.g., we have a group of three nodes with three loads from the same
2901 location in each node, and the vector size is 4. I.e., we have a
2902 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2903 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2904 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2905 ...
2906
2635892a 2907 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 2908 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2909 operation, and the mask element values can't be outside that range.
2910 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2911 For the first two permutations we need the first and the second input
ebfd146a 2912 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2913 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2914 {c2,a3,b3,c3}. */
2915
9771b263 2916 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2917 {
2918 scalar_index = 0;
2919 index = 0;
2920 vect_stmts_counter = 0;
2921 vec_index = 0;
2922 first_vec_index = vec_index++;
2923 if (only_one_vec)
2924 second_vec_index = first_vec_index;
2925 else
2926 second_vec_index = vec_index++;
2927
2928 for (j = 0; j < unroll_factor; j++)
2929 {
2930 for (k = 0; k < group_size; k++)
2931 {
2635892a
RH
2932 first_mask_element = i + j * group_size;
2933 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2934 nunits, only_one_vec, index,
2935 mask, &current_mask_element,
2936 &need_next_vector,
2937 &number_of_mask_fixes, &mask_fixed,
2938 &needs_first_vector))
2939 return false;
2940 mask[index++] = current_mask_element;
ebfd146a 2941
2635892a 2942 if (index == nunits)
ebfd146a 2943 {
d2a12ae7
RG
2944 tree mask_vec, *mask_elts;
2945 int l;
faf63e39 2946
22e4dee7
RH
2947 if (!can_vec_perm_p (mode, false, mask))
2948 {
73fbfcad 2949 if (dump_enabled_p ())
22e4dee7 2950 {
78c60e3d
SS
2951 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2952 vect_location,
2953 "unsupported vect permute { ");
22e4dee7 2954 for (i = 0; i < nunits; ++i)
78c60e3d
SS
2955 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2956 mask[i]);
2957 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7
RH
2958 }
2959 return false;
2960 }
2961
d2a12ae7
RG
2962 mask_elts = XALLOCAVEC (tree, nunits);
2963 for (l = 0; l < nunits; ++l)
2964 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2965 mask_vec = build_vector (mask_type, mask_elts);
faf63e39
RH
2966 index = 0;
2967
ebfd146a
IR
2968 if (!analyze_only)
2969 {
2970 if (need_next_vector)
2971 {
2972 first_vec_index = second_vec_index;
2973 second_vec_index = vec_index;
2974 }
2975
9771b263
DN
2976 next_scalar_stmt
2977 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
ebfd146a
IR
2978
2979 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39 2980 mask_vec, first_vec_index, second_vec_index,
2635892a 2981 gsi, node, vectype, dr_chain,
faf63e39 2982 ncopies, vect_stmts_counter++);
ebfd146a 2983 }
b8698a0f
L
2984 }
2985 }
2986 }
2987 }
ebfd146a 2988
ebfd146a
IR
2989 return true;
2990}
2991
2992
2993
2994/* Vectorize SLP instance tree in postorder. */
2995
2996static bool
2997vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2998 unsigned int vectorization_factor)
ebfd146a
IR
2999{
3000 gimple stmt;
0d0293ac 3001 bool grouped_store, is_store;
ebfd146a
IR
3002 gimple_stmt_iterator si;
3003 stmt_vec_info stmt_info;
3004 unsigned int vec_stmts_size, nunits, group_size;
3005 tree vectype;
3006 int i;
3007 slp_tree loads_node;
d092494c 3008 slp_void_p child;
ebfd146a
IR
3009
3010 if (!node)
3011 return false;
3012
9771b263 3013 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d092494c
IR
3014 vect_schedule_slp_instance ((slp_tree) child, instance,
3015 vectorization_factor);
b8698a0f 3016
9771b263 3017 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3018 stmt_info = vinfo_for_stmt (stmt);
3019
3020 /* VECTYPE is the type of the destination. */
b690cc0f 3021 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3022 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3023 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3024
3025 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3026 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3027 elements in one vector iteration is the number of scalar elements in
3028 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3029 size. */
3030 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3031
3032 /* In case of load permutation we have to allocate vectorized statements for
3033 all the nodes that participate in that permutation. */
9771b263 3034 if (SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
ebfd146a 3035 {
9771b263 3036 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a 3037 {
9771b263 3038 if (!SLP_TREE_VEC_STMTS (loads_node).exists ())
ebfd146a 3039 {
9771b263 3040 SLP_TREE_VEC_STMTS (loads_node).create (vec_stmts_size);
ebfd146a
IR
3041 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
3042 }
3043 }
3044 }
3045
9771b263 3046 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3047 {
9771b263 3048 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3049 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3050 }
3051
73fbfcad 3052 if (dump_enabled_p ())
ebfd146a 3053 {
78c60e3d
SS
3054 dump_printf_loc (MSG_NOTE,vect_location,
3055 "------>vectorizing SLP node starting from: ");
3056 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3057 }
ebfd146a
IR
3058
3059 /* Loads should be inserted before the first load. */
3060 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
0d0293ac 3061 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6aa904c4 3062 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
9771b263 3063 && SLP_INSTANCE_LOAD_PERMUTATION (instance).exists ())
ebfd146a 3064 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
9d5e7640 3065 else if (is_pattern_stmt_p (stmt_info))
6aa904c4 3066 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
ebfd146a
IR
3067 else
3068 si = gsi_for_stmt (stmt);
b8698a0f 3069
e4a707c4 3070 /* Stores should be inserted just before the last store. */
0d0293ac 3071 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e4a707c4
IR
3072 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3073 {
3074 gimple last_store = vect_find_last_store_in_slp_instance (instance);
a024e70e
IR
3075 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3076 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
e4a707c4
IR
3077 si = gsi_for_stmt (last_store);
3078 }
3079
b010117a
IR
3080 /* Mark the first element of the reduction chain as reduction to properly
3081 transform the node. In the analysis phase only the last element of the
3082 chain is marked as reduction. */
0d0293ac 3083 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3084 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3085 {
3086 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3087 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3088 }
3089
0d0293ac 3090 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 3091 return is_store;
ebfd146a
IR
3092}
3093
dd34c087
JJ
3094/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3095 For loop vectorization this is done in vectorizable_call, but for SLP
3096 it needs to be deferred until end of vect_schedule_slp, because multiple
3097 SLP instances may refer to the same scalar stmt. */
3098
3099static void
3100vect_remove_slp_scalar_calls (slp_tree node)
3101{
3102 gimple stmt, new_stmt;
3103 gimple_stmt_iterator gsi;
3104 int i;
3105 slp_void_p child;
3106 tree lhs;
3107 stmt_vec_info stmt_info;
3108
3109 if (!node)
3110 return;
3111
9771b263 3112 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
dd34c087
JJ
3113 vect_remove_slp_scalar_calls ((slp_tree) child);
3114
9771b263 3115 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3116 {
3117 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3118 continue;
3119 stmt_info = vinfo_for_stmt (stmt);
3120 if (stmt_info == NULL
3121 || is_pattern_stmt_p (stmt_info)
3122 || !PURE_SLP_STMT (stmt_info))
3123 continue;
3124 lhs = gimple_call_lhs (stmt);
3125 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3126 set_vinfo_for_stmt (new_stmt, stmt_info);
3127 set_vinfo_for_stmt (stmt, NULL);
3128 STMT_VINFO_STMT (stmt_info) = new_stmt;
3129 gsi = gsi_for_stmt (stmt);
3130 gsi_replace (&gsi, new_stmt, false);
3131 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3132 }
3133}
ebfd146a 3134
ff802fa1
IR
3135/* Generate vector code for all SLP instances in the loop/basic block. */
3136
ebfd146a 3137bool
a70d6342 3138vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 3139{
9771b263 3140 vec<slp_instance> slp_instances;
ebfd146a 3141 slp_instance instance;
12a43ab8
RB
3142 slp_tree loads_node;
3143 unsigned int i, j, vf;
ebfd146a
IR
3144 bool is_store = false;
3145
a70d6342
IR
3146 if (loop_vinfo)
3147 {
3148 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3149 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 3150 }
a70d6342
IR
3151 else
3152 {
3153 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3154 vf = 1;
b8698a0f 3155 }
a70d6342 3156
9771b263 3157 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3158 {
3159 /* Schedule the tree of INSTANCE. */
3160 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3161 instance, vf);
12a43ab8
RB
3162
3163 /* Clear STMT_VINFO_VEC_STMT of all loads. With shared loads
3164 between SLP instances we fail to properly initialize the
3165 vectorized SLP stmts and confuse different load permutations. */
3166 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, loads_node)
3167 STMT_VINFO_VEC_STMT
3168 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (loads_node)[0])) = NULL;
3169
73fbfcad 3170 if (dump_enabled_p ())
78c60e3d
SS
3171 dump_printf_loc (MSG_NOTE, vect_location,
3172 "vectorizing stmts using SLP.");
ebfd146a
IR
3173 }
3174
9771b263 3175 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3176 {
3177 slp_tree root = SLP_INSTANCE_TREE (instance);
3178 gimple store;
3179 unsigned int j;
3180 gimple_stmt_iterator gsi;
3181
c40eced0
RB
3182 /* Remove scalar call stmts. Do not do this for basic-block
3183 vectorization as not all uses may be vectorized.
3184 ??? Why should this be necessary? DCE should be able to
3185 remove the stmts itself.
3186 ??? For BB vectorization we can as well remove scalar
3187 stmts starting from the SLP tree root if they have no
3188 uses. */
3189 if (loop_vinfo)
3190 vect_remove_slp_scalar_calls (root);
dd34c087 3191
9771b263 3192 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3193 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3194 {
3195 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3196 break;
3197
a024e70e
IR
3198 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3199 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3200 /* Free the attached stmt_vec_info and remove the stmt. */
3201 gsi = gsi_for_stmt (store);
3d3f2249 3202 unlink_stmt_vdef (store);
b5aeb3bb 3203 gsi_remove (&gsi, true);
3d3f2249 3204 release_defs (store);
b5aeb3bb
IR
3205 free_stmt_vec_info (store);
3206 }
3207 }
3208
ebfd146a
IR
3209 return is_store;
3210}
a70d6342
IR
3211
3212
3213/* Vectorize the basic block. */
3214
3215void
3216vect_slp_transform_bb (basic_block bb)
3217{
3218 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3219 gimple_stmt_iterator si;
3220
3221 gcc_assert (bb_vinfo);
3222
73fbfcad 3223 if (dump_enabled_p ())
78c60e3d 3224 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
a70d6342
IR
3225
3226 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3227 {
3228 gimple stmt = gsi_stmt (si);
3229 stmt_vec_info stmt_info;
3230
73fbfcad 3231 if (dump_enabled_p ())
a70d6342 3232 {
78c60e3d
SS
3233 dump_printf_loc (MSG_NOTE, vect_location,
3234 "------>SLPing statement: ");
3235 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
a70d6342
IR
3236 }
3237
3238 stmt_info = vinfo_for_stmt (stmt);
3239 gcc_assert (stmt_info);
3240
3241 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3242 if (STMT_SLP_TYPE (stmt_info))
3243 {
3244 vect_schedule_slp (NULL, bb_vinfo);
3245 break;
3246 }
3247 }
3248
73fbfcad 3249 if (dump_enabled_p ())
78c60e3d 3250 dump_printf (MSG_OPTIMIZED_LOCATIONS, "BASIC BLOCK VECTORIZED\n");
a70d6342 3251
12aaf609
IR
3252 destroy_bb_vec_info (bb_vinfo);
3253}