]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Merger of git branch "gimple-classes-v2-option-3"
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
fb85abff 1/* SLP - Basic Block Vectorization
3aea1f79 2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
9ed99284 28#include "stor-layout.h"
fb85abff 29#include "target.h"
94ea8568 30#include "predict.h"
31#include "vec.h"
32#include "hashtab.h"
33#include "hash-set.h"
34#include "machmode.h"
35#include "hard-reg-set.h"
36#include "input.h"
37#include "function.h"
fb85abff 38#include "basic-block.h"
ce084dfc 39#include "gimple-pretty-print.h"
bc61cadb 40#include "tree-ssa-alias.h"
41#include "internal-fn.h"
42#include "gimple-expr.h"
43#include "is-a.h"
073c1fd5 44#include "gimple.h"
dcf1a1ec 45#include "gimple-iterator.h"
073c1fd5 46#include "gimple-ssa.h"
47#include "tree-phinodes.h"
48#include "ssa-iterators.h"
9ed99284 49#include "stringpool.h"
073c1fd5 50#include "tree-ssanames.h"
b9ed1410 51#include "tree-pass.h"
fb85abff 52#include "cfgloop.h"
fb85abff 53#include "expr.h"
b9ed1410 54#include "recog.h" /* FIXME: for insn_data */
34517c64 55#include "insn-codes.h"
fb85abff 56#include "optabs.h"
57#include "tree-vectorizer.h"
964ec9cd 58#include "langhooks.h"
fb85abff 59
37545e54 60/* Extract the location of the basic block in the source code.
61 Return the basic block location if succeed and NULL if not. */
62
36f39b2e 63source_location
37545e54 64find_bb_location (basic_block bb)
65{
66 gimple stmt = NULL;
67 gimple_stmt_iterator si;
68
69 if (!bb)
36f39b2e 70 return UNKNOWN_LOCATION;
37545e54 71
72 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
73 {
74 stmt = gsi_stmt (si);
36f39b2e 75 if (gimple_location (stmt) != UNKNOWN_LOCATION)
37545e54 76 return gimple_location (stmt);
77 }
78
36f39b2e 79 return UNKNOWN_LOCATION;
37545e54 80}
81
82
fb85abff 83/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
84
85static void
86vect_free_slp_tree (slp_tree node)
87{
b0f64919 88 int i;
40bcc7c2 89 slp_tree child;
b0f64919 90
fb85abff 91 if (!node)
92 return;
93
f1f41a6c 94 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 95 vect_free_slp_tree (child);
48e1416a 96
f1f41a6c 97 SLP_TREE_CHILDREN (node).release ();
98 SLP_TREE_SCALAR_STMTS (node).release ();
99 SLP_TREE_VEC_STMTS (node).release ();
678e3d6e 100 SLP_TREE_LOAD_PERMUTATION (node).release ();
fb85abff 101
102 free (node);
103}
104
105
106/* Free the memory allocated for the SLP instance. */
107
108void
109vect_free_slp_instance (slp_instance instance)
110{
111 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
f1f41a6c 112 SLP_INSTANCE_LOADS (instance).release ();
113 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
9f793cdf 114 free (instance);
fb85abff 115}
116
117
b0f64919 118/* Create an SLP node for SCALAR_STMTS. */
119
120static slp_tree
f1f41a6c 121vect_create_new_slp_node (vec<gimple> scalar_stmts)
b0f64919 122{
3c7a3fca 123 slp_tree node;
f1f41a6c 124 gimple stmt = scalar_stmts[0];
b0f64919 125 unsigned int nops;
126
127 if (is_gimple_call (stmt))
128 nops = gimple_call_num_args (stmt);
129 else if (is_gimple_assign (stmt))
f2104a54 130 {
131 nops = gimple_num_ops (stmt) - 1;
132 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
133 nops++;
134 }
b0f64919 135 else
136 return NULL;
137
3c7a3fca 138 node = XNEW (struct _slp_tree);
b0f64919 139 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
f1f41a6c 140 SLP_TREE_VEC_STMTS (node).create (0);
141 SLP_TREE_CHILDREN (node).create (nops);
678e3d6e 142 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
b0f64919 143
144 return node;
145}
146
147
148/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
149 operand. */
f1f41a6c 150static vec<slp_oprnd_info>
b0f64919 151vect_create_oprnd_info (int nops, int group_size)
152{
153 int i;
154 slp_oprnd_info oprnd_info;
f1f41a6c 155 vec<slp_oprnd_info> oprnds_info;
b0f64919 156
f1f41a6c 157 oprnds_info.create (nops);
b0f64919 158 for (i = 0; i < nops; i++)
159 {
160 oprnd_info = XNEW (struct _slp_oprnd_info);
f1f41a6c 161 oprnd_info->def_stmts.create (group_size);
b0f64919 162 oprnd_info->first_dt = vect_uninitialized_def;
3533e58d 163 oprnd_info->first_op_type = NULL_TREE;
b0f64919 164 oprnd_info->first_pattern = false;
f1f41a6c 165 oprnds_info.quick_push (oprnd_info);
b0f64919 166 }
167
168 return oprnds_info;
169}
170
171
3c7a3fca 172/* Free operands info. */
173
b0f64919 174static void
f1f41a6c 175vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
b0f64919 176{
177 int i;
178 slp_oprnd_info oprnd_info;
179
f1f41a6c 180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
3c7a3fca 181 {
f1f41a6c 182 oprnd_info->def_stmts.release ();
3c7a3fca 183 XDELETE (oprnd_info);
184 }
b0f64919 185
f1f41a6c 186 oprnds_info.release ();
b0f64919 187}
188
189
40bcc7c2 190/* Find the place of the data-ref in STMT in the interleaving chain that starts
191 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
192
193static int
194vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
195{
196 gimple next_stmt = first_stmt;
197 int result = 0;
198
199 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
200 return -1;
201
202 do
203 {
204 if (next_stmt == stmt)
205 return result;
206 result++;
207 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
208 }
209 while (next_stmt);
210
211 return -1;
212}
213
214
b0f64919 215/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
216 they are of a valid type and that they match the defs of the first stmt of
291e6474 217 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
218 return -1, if the error could be corrected by swapping operands of the
219 operation return 1, if everything is ok return 0. */
fb85abff 220
291e6474 221static int
37545e54 222vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ee6e9ba5 223 gimple stmt, bool first,
224 vec<slp_oprnd_info> *oprnds_info)
fb85abff 225{
226 tree oprnd;
227 unsigned int i, number_of_oprnds;
0eba4748 228 tree def;
fb85abff 229 gimple def_stmt;
b0f64919 230 enum vect_def_type dt = vect_uninitialized_def;
37545e54 231 struct loop *loop = NULL;
b0f64919 232 bool pattern = false;
0eba4748 233 slp_oprnd_info oprnd_info;
291e6474 234 int first_op_idx = 1;
235 bool commutative = false;
236 bool first_op_cond = false;
48e1416a 237
37545e54 238 if (loop_vinfo)
239 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 240
b0f64919 241 if (is_gimple_call (stmt))
b57a47d0 242 {
243 number_of_oprnds = gimple_call_num_args (stmt);
291e6474 244 first_op_idx = 3;
b57a47d0 245 }
f2104a54 246 else if (is_gimple_assign (stmt))
247 {
291e6474 248 enum tree_code code = gimple_assign_rhs_code (stmt);
f2104a54 249 number_of_oprnds = gimple_num_ops (stmt) - 1;
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
291e6474 251 {
252 first_op_cond = true;
253 commutative = true;
254 number_of_oprnds++;
255 }
256 else
257 commutative = commutative_tree_code (code);
f2104a54 258 }
b0f64919 259 else
291e6474 260 return -1;
fb85abff 261
291e6474 262 bool swapped = false;
fb85abff 263 for (i = 0; i < number_of_oprnds; i++)
264 {
291e6474 265again:
266 if (first_op_cond)
f2104a54 267 {
291e6474 268 if (i == 0 || i == 1)
269 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
270 swapped ? !i : i);
271 else
272 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f2104a54 273 }
274 else
291e6474 275 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f2104a54 276
f1f41a6c 277 oprnd_info = (*oprnds_info)[i];
fb85abff 278
bed8b93b 279 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
280 &def, &dt)
b0f64919 281 || (!def_stmt && dt != vect_constant_def))
fb85abff 282 {
6d8fb6cf 283 if (dump_enabled_p ())
fb85abff 284 {
7bd765d4 285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
286 "Build SLP failed: can't find def for ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 288 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 289 }
290
291e6474 291 return -1;
fb85abff 292 }
293
37545e54 294 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
282bf14c 295 from the pattern. Check that all the stmts of the node are in the
fb85abff 296 pattern. */
4c0c783a 297 if (def_stmt && gimple_bb (def_stmt)
298 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
299 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
300 && gimple_code (def_stmt) != GIMPLE_PHI))
fb85abff 301 && vinfo_for_stmt (def_stmt)
8bf58742 302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
4c0c783a 303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
fb85abff 305 {
b0f64919 306 pattern = true;
307 if (!first && !oprnd_info->first_pattern)
308 {
291e6474 309 if (i == 0
310 && !swapped
311 && commutative)
312 {
313 swapped = true;
314 goto again;
315 }
316
6d8fb6cf 317 if (dump_enabled_p ())
b0f64919 318 {
7bd765d4 319 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
320 "Build SLP failed: some of the stmts"
321 " are in a pattern, and others are not ");
322 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 323 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b0f64919 324 }
fb85abff 325
291e6474 326 return 1;
fb85abff 327 }
328
329 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
b0f64919 330 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
fb85abff 331
f2104a54 332 if (dt == vect_unknown_def_type)
fb85abff 333 {
6d8fb6cf 334 if (dump_enabled_p ())
7bd765d4 335 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 336 "Unsupported pattern.\n");
291e6474 337 return -1;
fb85abff 338 }
339
340 switch (gimple_code (def_stmt))
341 {
342 case GIMPLE_PHI:
b0f64919 343 def = gimple_phi_result (def_stmt);
fb85abff 344 break;
345
346 case GIMPLE_ASSIGN:
b0f64919 347 def = gimple_assign_lhs (def_stmt);
fb85abff 348 break;
349
350 default:
6d8fb6cf 351 if (dump_enabled_p ())
7bd765d4 352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 353 "unsupported defining stmt:\n");
291e6474 354 return -1;
fb85abff 355 }
356 }
357
b0f64919 358 if (first)
fb85abff 359 {
b0f64919 360 oprnd_info->first_dt = dt;
361 oprnd_info->first_pattern = pattern;
3533e58d 362 oprnd_info->first_op_type = TREE_TYPE (oprnd);
fb85abff 363 }
fb85abff 364 else
365 {
b0f64919 366 /* Not first stmt of the group, check that the def-stmt/s match
367 the def-stmt/s of the first stmt. Allow different definition
368 types for reduction chains: the first stmt must be a
369 vect_reduction_def (a phi node), and the rest
370 vect_internal_def. */
371 if (((oprnd_info->first_dt != dt
372 && !(oprnd_info->first_dt == vect_reduction_def
3533e58d 373 && dt == vect_internal_def)
374 && !((oprnd_info->first_dt == vect_external_def
375 || oprnd_info->first_dt == vect_constant_def)
376 && (dt == vect_external_def
377 || dt == vect_constant_def)))
378 || !types_compatible_p (oprnd_info->first_op_type,
379 TREE_TYPE (oprnd))))
fb85abff 380 {
291e6474 381 /* Try swapping operands if we got a mismatch. */
382 if (i == 0
383 && !swapped
384 && commutative)
385 {
386 swapped = true;
387 goto again;
388 }
389
0eba4748 390 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 392 "Build SLP failed: different types\n");
b0f64919 393
291e6474 394 return 1;
fb85abff 395 }
396 }
397
398 /* Check the types of the definitions. */
b0f64919 399 switch (dt)
fb85abff 400 {
401 case vect_constant_def:
f083cd24 402 case vect_external_def:
b0f64919 403 case vect_reduction_def:
fb85abff 404 break;
48e1416a 405
f083cd24 406 case vect_internal_def:
0eba4748 407 oprnd_info->def_stmts.quick_push (def_stmt);
fb85abff 408 break;
409
410 default:
411 /* FORNOW: Not supported. */
6d8fb6cf 412 if (dump_enabled_p ())
fb85abff 413 {
7bd765d4 414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: illegal type of def ");
416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
78bb46f5 417 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 418 }
419
291e6474 420 return -1;
fb85abff 421 }
422 }
423
291e6474 424 /* Swap operands. */
425 if (swapped)
426 {
427 if (first_op_cond)
428 {
429 tree cond = gimple_assign_rhs1 (stmt);
430 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
431 &TREE_OPERAND (cond, 1));
432 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
433 }
434 else
435 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
436 gimple_assign_rhs2_ptr (stmt));
437 }
438
439 return 0;
fb85abff 440}
441
442
16513aa4 443/* Verify if the scalar stmts STMTS are isomorphic, require data
444 permutation or are of unsupported types of operation. Return
445 true if they are, otherwise return false and indicate in *MATCHES
446 which stmts are not isomorphic to the first one. If MATCHES[0]
447 is false then this indicates the comparison could not be
448 carried out or the stmts will never be vectorized by SLP. */
fb85abff 449
450static bool
16513aa4 451vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
452 vec<gimple> stmts, unsigned int group_size,
453 unsigned nops, unsigned int *max_nunits,
454 unsigned int vectorization_factor, bool *matches)
fb85abff 455{
fb85abff 456 unsigned int i;
f1f41a6c 457 gimple stmt = stmts[0];
a2e70a99 458 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f2104a54 459 enum tree_code first_cond_code = ERROR_MARK;
fb85abff 460 tree lhs;
16513aa4 461 bool need_same_oprnds = false;
fb85abff 462 tree vectype, scalar_type, first_op1 = NULL_TREE;
fb85abff 463 optab optab;
464 int icode;
3754d046 465 machine_mode optab_op2_mode;
466 machine_mode vec_mode;
fb85abff 467 struct data_reference *first_dr;
fb85abff 468 HOST_WIDE_INT dummy;
4db2b577 469 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
f2104a54 470 tree cond;
b0f64919 471
fb85abff 472 /* For every stmt in NODE find its def stmt/s. */
f1f41a6c 473 FOR_EACH_VEC_ELT (stmts, i, stmt)
fb85abff 474 {
16513aa4 475 matches[i] = false;
476
6d8fb6cf 477 if (dump_enabled_p ())
fb85abff 478 {
7bd765d4 479 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
480 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 481 dump_printf (MSG_NOTE, "\n");
fb85abff 482 }
483
6ea6a380 484 /* Fail to vectorize statements marked as unvectorizable. */
485 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
486 {
6d8fb6cf 487 if (dump_enabled_p ())
6ea6a380 488 {
7bd765d4 489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "Build SLP failed: unvectorizable statement ");
491 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 492 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
6ea6a380 493 }
16513aa4 494 /* Fatal mismatch. */
495 matches[0] = false;
6ea6a380 496 return false;
497 }
498
fb85abff 499 lhs = gimple_get_lhs (stmt);
500 if (lhs == NULL_TREE)
501 {
6d8fb6cf 502 if (dump_enabled_p ())
fb85abff 503 {
7bd765d4 504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
505 "Build SLP failed: not GIMPLE_ASSIGN nor "
506 "GIMPLE_CALL ");
507 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 508 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 509 }
16513aa4 510 /* Fatal mismatch. */
511 matches[0] = false;
fb85abff 512 return false;
513 }
514
f2104a54 515 if (is_gimple_assign (stmt)
516 && gimple_assign_rhs_code (stmt) == COND_EXPR
517 && (cond = gimple_assign_rhs1 (stmt))
518 && !COMPARISON_CLASS_P (cond))
519 {
6d8fb6cf 520 if (dump_enabled_p ())
f2104a54 521 {
7bd765d4 522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523 "Build SLP failed: condition is not "
524 "comparison ");
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 526 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 527 }
16513aa4 528 /* Fatal mismatch. */
529 matches[0] = false;
f2104a54 530 return false;
531 }
532
48e1416a 533 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
fb85abff 534 vectype = get_vectype_for_scalar_type (scalar_type);
535 if (!vectype)
536 {
6d8fb6cf 537 if (dump_enabled_p ())
fb85abff 538 {
7bd765d4 539 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
540 "Build SLP failed: unsupported data-type ");
541 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
542 scalar_type);
78bb46f5 543 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 544 }
16513aa4 545 /* Fatal mismatch. */
546 matches[0] = false;
fb85abff 547 return false;
548 }
48e1416a 549
03dab834 550 /* In case of multiple types we need to detect the smallest type. */
551 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
37545e54 552 {
03dab834 553 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
554 if (bb_vinfo)
555 vectorization_factor = *max_nunits;
37545e54 556 }
48e1416a 557
1a91d914 558 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
b57a47d0 559 {
560 rhs_code = CALL_EXPR;
1a91d914 561 if (gimple_call_internal_p (call_stmt)
562 || gimple_call_tail_p (call_stmt)
563 || gimple_call_noreturn_p (call_stmt)
564 || !gimple_call_nothrow_p (call_stmt)
565 || gimple_call_chain (call_stmt))
b57a47d0 566 {
6d8fb6cf 567 if (dump_enabled_p ())
b57a47d0 568 {
7bd765d4 569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570 "Build SLP failed: unsupported call type ");
1a91d914 571 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
572 call_stmt, 0);
78bb46f5 573 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 574 }
16513aa4 575 /* Fatal mismatch. */
576 matches[0] = false;
b57a47d0 577 return false;
578 }
579 }
fb85abff 580 else
581 rhs_code = gimple_assign_rhs_code (stmt);
582
583 /* Check the operation. */
584 if (i == 0)
585 {
586 first_stmt_code = rhs_code;
587
48e1416a 588 /* Shift arguments should be equal in all the packed stmts for a
fb85abff 589 vector shift with scalar shift operand. */
590 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
591 || rhs_code == LROTATE_EXPR
592 || rhs_code == RROTATE_EXPR)
593 {
594 vec_mode = TYPE_MODE (vectype);
595
596 /* First see if we have a vector/vector shift. */
597 optab = optab_for_tree_code (rhs_code, vectype,
598 optab_vector);
599
600 if (!optab
d6bf3b14 601 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
fb85abff 602 {
603 /* No vector/vector shift, try for a vector/scalar shift. */
604 optab = optab_for_tree_code (rhs_code, vectype,
605 optab_scalar);
606
607 if (!optab)
608 {
6d8fb6cf 609 if (dump_enabled_p ())
7bd765d4 610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 611 "Build SLP failed: no optab.\n");
16513aa4 612 /* Fatal mismatch. */
613 matches[0] = false;
fb85abff 614 return false;
615 }
d6bf3b14 616 icode = (int) optab_handler (optab, vec_mode);
fb85abff 617 if (icode == CODE_FOR_nothing)
618 {
6d8fb6cf 619 if (dump_enabled_p ())
7bd765d4 620 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
621 "Build SLP failed: "
78bb46f5 622 "op not supported by target.\n");
16513aa4 623 /* Fatal mismatch. */
624 matches[0] = false;
fb85abff 625 return false;
626 }
627 optab_op2_mode = insn_data[icode].operand[2].mode;
628 if (!VECTOR_MODE_P (optab_op2_mode))
629 {
630 need_same_oprnds = true;
631 first_op1 = gimple_assign_rhs2 (stmt);
632 }
633 }
634 }
6083c152 635 else if (rhs_code == WIDEN_LSHIFT_EXPR)
636 {
637 need_same_oprnds = true;
638 first_op1 = gimple_assign_rhs2 (stmt);
639 }
fb85abff 640 }
641 else
642 {
643 if (first_stmt_code != rhs_code
644 && (first_stmt_code != IMAGPART_EXPR
645 || rhs_code != REALPART_EXPR)
646 && (first_stmt_code != REALPART_EXPR
ad074595 647 || rhs_code != IMAGPART_EXPR)
ee612634 648 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
ad074595 649 && (first_stmt_code == ARRAY_REF
19bacd59 650 || first_stmt_code == BIT_FIELD_REF
ad074595 651 || first_stmt_code == INDIRECT_REF
652 || first_stmt_code == COMPONENT_REF
653 || first_stmt_code == MEM_REF)))
fb85abff 654 {
6d8fb6cf 655 if (dump_enabled_p ())
fb85abff 656 {
7bd765d4 657 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
658 "Build SLP failed: different operation "
659 "in stmt ");
660 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 661 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 662 }
16513aa4 663 /* Mismatch. */
664 continue;
fb85abff 665 }
48e1416a 666
667 if (need_same_oprnds
fb85abff 668 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
669 {
6d8fb6cf 670 if (dump_enabled_p ())
fb85abff 671 {
7bd765d4 672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
673 "Build SLP failed: different shift "
674 "arguments in ");
675 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 676 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 677 }
16513aa4 678 /* Mismatch. */
679 continue;
fb85abff 680 }
b57a47d0 681
682 if (rhs_code == CALL_EXPR)
683 {
f1f41a6c 684 gimple first_stmt = stmts[0];
b57a47d0 685 if (gimple_call_num_args (stmt) != nops
686 || !operand_equal_p (gimple_call_fn (first_stmt),
687 gimple_call_fn (stmt), 0)
688 || gimple_call_fntype (first_stmt)
689 != gimple_call_fntype (stmt))
690 {
6d8fb6cf 691 if (dump_enabled_p ())
b57a47d0 692 {
7bd765d4 693 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
694 "Build SLP failed: different calls in ");
695 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
696 stmt, 0);
78bb46f5 697 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 698 }
16513aa4 699 /* Mismatch. */
700 continue;
b57a47d0 701 }
702 }
fb85abff 703 }
704
ee612634 705 /* Grouped store or load. */
706 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
fb85abff 707 {
708 if (REFERENCE_CLASS_P (lhs))
709 {
710 /* Store. */
16513aa4 711 ;
fb85abff 712 }
eefa05c8 713 else
714 {
715 /* Load. */
f77a1eec 716 unsigned unrolling_factor
717 = least_common_multiple
718 (*max_nunits, group_size) / group_size;
8f3e4987 719 /* FORNOW: Check that there is no gap between the loads
720 and no gap between the groups when we need to load
721 multiple groups at once.
722 ??? We should enhance this to only disallow gaps
723 inside vectors. */
f77a1eec 724 if ((unrolling_factor > 1
8f3e4987 725 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
726 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
727 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
728 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
eefa05c8 729 {
6d8fb6cf 730 if (dump_enabled_p ())
eefa05c8 731 {
7bd765d4 732 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
733 "Build SLP failed: grouped "
734 "loads have gaps ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
736 stmt, 0);
78bb46f5 737 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 738 }
16513aa4 739 /* Fatal mismatch. */
740 matches[0] = false;
eefa05c8 741 return false;
742 }
792672f0 743
eefa05c8 744 /* Check that the size of interleaved loads group is not
745 greater than the SLP group size. */
f77a1eec 746 unsigned ncopies
747 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
24c271ef 748 if (loop_vinfo
8f3e4987 749 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
750 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
751 - GROUP_GAP (vinfo_for_stmt (stmt)))
752 > ncopies * group_size))
eefa05c8 753 {
6d8fb6cf 754 if (dump_enabled_p ())
eefa05c8 755 {
7bd765d4 756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: the number "
758 "of interleaved loads is greater than "
759 "the SLP group size ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
761 stmt, 0);
78bb46f5 762 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 763 }
16513aa4 764 /* Fatal mismatch. */
765 matches[0] = false;
eefa05c8 766 return false;
767 }
768
4db2b577 769 old_first_load = first_load;
21009880 770 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
eefa05c8 771 if (prev_first_load)
772 {
773 /* Check that there are no loads from different interleaving
16513aa4 774 chains in the same node. */
775 if (prev_first_load != first_load)
7bd765d4 776 {
6d8fb6cf 777 if (dump_enabled_p ())
eefa05c8 778 {
7bd765d4 779 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
780 vect_location,
781 "Build SLP failed: different "
782 "interleaving chains in one node ");
783 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
784 stmt, 0);
78bb46f5 785 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 786 }
16513aa4 787 /* Mismatch. */
788 continue;
eefa05c8 789 }
790 }
791 else
792 prev_first_load = first_load;
48e1416a 793
4db2b577 794 /* In some cases a group of loads is just the same load
795 repeated N times. Only analyze its cost once. */
796 if (first_load == stmt && old_first_load != first_load)
fb85abff 797 {
798 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
0822b158 799 if (vect_supportable_dr_alignment (first_dr, false)
fb85abff 800 == dr_unaligned_unsupported)
801 {
6d8fb6cf 802 if (dump_enabled_p ())
fb85abff 803 {
7bd765d4 804 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
805 vect_location,
806 "Build SLP failed: unsupported "
807 "unaligned load ");
808 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
809 stmt, 0);
78bb46f5 810 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 811 }
16513aa4 812 /* Fatal mismatch. */
813 matches[0] = false;
fb85abff 814 return false;
815 }
fb85abff 816 }
fb85abff 817 }
ee612634 818 } /* Grouped access. */
fb85abff 819 else
820 {
821 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
822 {
ee612634 823 /* Not grouped load. */
6d8fb6cf 824 if (dump_enabled_p ())
fb85abff 825 {
7bd765d4 826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
827 "Build SLP failed: not grouped load ");
828 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 829 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 830 }
831
ee612634 832 /* FORNOW: Not grouped loads are not supported. */
16513aa4 833 /* Fatal mismatch. */
834 matches[0] = false;
fb85abff 835 return false;
836 }
837
838 /* Not memory operation. */
839 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f2104a54 840 && TREE_CODE_CLASS (rhs_code) != tcc_unary
b57a47d0 841 && rhs_code != COND_EXPR
842 && rhs_code != CALL_EXPR)
fb85abff 843 {
6d8fb6cf 844 if (dump_enabled_p ())
fb85abff 845 {
7bd765d4 846 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
847 "Build SLP failed: operation");
848 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
849 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 850 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 851 }
16513aa4 852 /* Fatal mismatch. */
853 matches[0] = false;
fb85abff 854 return false;
855 }
856
f2104a54 857 if (rhs_code == COND_EXPR)
858 {
859 tree cond_expr = gimple_assign_rhs1 (stmt);
860
861 if (i == 0)
862 first_cond_code = TREE_CODE (cond_expr);
863 else if (first_cond_code != TREE_CODE (cond_expr))
864 {
6d8fb6cf 865 if (dump_enabled_p ())
f2104a54 866 {
7bd765d4 867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different"
869 " operation");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
871 stmt, 0);
78bb46f5 872 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 873 }
16513aa4 874 /* Mismatch. */
875 continue;
f2104a54 876 }
877 }
fb85abff 878 }
16513aa4 879
880 matches[i] = true;
881 }
882
883 for (i = 0; i < group_size; ++i)
884 if (!matches[i])
885 return false;
886
887 return true;
888}
889
890/* Recursively build an SLP tree starting from NODE.
891 Fail (and return a value not equal to zero) if def-stmts are not
892 isomorphic, require data permutation or are of unsupported types of
893 operation. Otherwise, return 0.
894 The value returned is the depth in the SLP tree where a mismatch
895 was found. */
896
897static bool
898vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
899 slp_tree *node, unsigned int group_size,
900 unsigned int *max_nunits,
901 vec<slp_tree> *loads,
902 unsigned int vectorization_factor,
ca91d3f8 903 bool *matches, unsigned *npermutes, unsigned *tree_size,
904 unsigned max_tree_size)
16513aa4 905{
ca91d3f8 906 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
16513aa4 907 gimple stmt;
908
909 if (!matches)
910 matches = XALLOCAVEC (bool, group_size);
911 if (!npermutes)
912 npermutes = &this_npermutes;
913
914 matches[0] = false;
915
916 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
917 if (is_gimple_call (stmt))
918 nops = gimple_call_num_args (stmt);
919 else if (is_gimple_assign (stmt))
920 {
921 nops = gimple_num_ops (stmt) - 1;
922 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
923 nops++;
fb85abff 924 }
16513aa4 925 else
926 return false;
927
928 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
929 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
930 max_nunits, vectorization_factor, matches))
931 return false;
fb85abff 932
16513aa4 933 /* If the SLP node is a load, terminate the recursion. */
934 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
935 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
fb85abff 936 {
f1f41a6c 937 loads->safe_push (*node);
fb85abff 938 return true;
939 }
940
16513aa4 941 /* Get at the operands, verifying they are compatible. */
942 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
943 slp_oprnd_info oprnd_info;
944 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
945 {
291e6474 946 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
947 stmt, (i == 0), &oprnds_info))
16513aa4 948 {
291e6474 949 case 0:
950 break;
951 case -1:
952 matches[0] = false;
16513aa4 953 vect_free_oprnd_info (oprnds_info);
954 return false;
291e6474 955 case 1:
956 matches[i] = false;
957 break;
16513aa4 958 }
959 }
291e6474 960 for (i = 0; i < group_size; ++i)
961 if (!matches[i])
962 {
963 vect_free_oprnd_info (oprnds_info);
964 return false;
965 }
16513aa4 966
967 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
968
48e1416a 969 /* Create SLP_TREE nodes for the definition node/s. */
f1f41a6c 970 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
fb85abff 971 {
b0f64919 972 slp_tree child;
16513aa4 973 unsigned old_nloads = loads->length ();
974 unsigned old_max_nunits = *max_nunits;
48e1416a 975
b0f64919 976 if (oprnd_info->first_dt != vect_internal_def)
977 continue;
fb85abff 978
ca91d3f8 979 if (++this_tree_size > max_tree_size)
980 {
981 vect_free_oprnd_info (oprnds_info);
982 return false;
983 }
984
b0f64919 985 child = vect_create_new_slp_node (oprnd_info->def_stmts);
16513aa4 986 if (!child)
987 {
f1f41a6c 988 vect_free_oprnd_info (oprnds_info);
16513aa4 989 return false;
b0f64919 990 }
48e1416a 991
16513aa4 992 bool *matches = XALLOCAVEC (bool, group_size);
993 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
994 group_size, max_nunits, loads,
ca91d3f8 995 vectorization_factor, matches,
996 npermutes, &this_tree_size, max_tree_size))
16513aa4 997 {
998 oprnd_info->def_stmts = vNULL;
999 SLP_TREE_CHILDREN (*node).quick_push (child);
1000 continue;
1001 }
1002
1003 /* If the SLP build for operand zero failed and operand zero
1004 and one can be commutated try that for the scalar stmts
1005 that failed the match. */
1006 if (i == 0
1007 /* A first scalar stmt mismatch signals a fatal mismatch. */
1008 && matches[0]
1009 /* ??? For COND_EXPRs we can swap the comparison operands
1010 as well as the arms under some constraints. */
1011 && nops == 2
1012 && oprnds_info[1]->first_dt == vect_internal_def
1013 && is_gimple_assign (stmt)
1014 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1015 /* Do so only if the number of not successful permutes was nor more
1016 than a cut-ff as re-trying the recursive match on
1017 possibly each level of the tree would expose exponential
1018 behavior. */
1019 && *npermutes < 4)
1020 {
1021 /* Roll back. */
1022 *max_nunits = old_max_nunits;
1023 loads->truncate (old_nloads);
1024 /* Swap mismatched definition stmts. */
291e6474 1025 dump_printf_loc (MSG_NOTE, vect_location,
1026 "Re-trying with swapped operands of stmts ");
16513aa4 1027 for (unsigned j = 0; j < group_size; ++j)
1028 if (!matches[j])
1029 {
1030 gimple tem = oprnds_info[0]->def_stmts[j];
1031 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1032 oprnds_info[1]->def_stmts[j] = tem;
291e6474 1033 dump_printf (MSG_NOTE, "%d ", j);
16513aa4 1034 }
291e6474 1035 dump_printf (MSG_NOTE, "\n");
16513aa4 1036 /* And try again ... */
1037 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1038 group_size, max_nunits, loads,
1039 vectorization_factor,
ca91d3f8 1040 matches, npermutes, &this_tree_size,
1041 max_tree_size))
16513aa4 1042 {
1043 oprnd_info->def_stmts = vNULL;
1044 SLP_TREE_CHILDREN (*node).quick_push (child);
1045 continue;
1046 }
1047
1048 ++*npermutes;
1049 }
1050
1051 oprnd_info->def_stmts = vNULL;
1052 vect_free_slp_tree (child);
1053 vect_free_oprnd_info (oprnds_info);
1054 return false;
fb85abff 1055 }
1056
ca91d3f8 1057 if (tree_size)
1058 *tree_size += this_tree_size;
1059
f1f41a6c 1060 vect_free_oprnd_info (oprnds_info);
fb85abff 1061 return true;
1062}
1063
7bd765d4 1064/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
fb85abff 1065
1066static void
7bd765d4 1067vect_print_slp_tree (int dump_kind, slp_tree node)
fb85abff 1068{
1069 int i;
1070 gimple stmt;
40bcc7c2 1071 slp_tree child;
fb85abff 1072
1073 if (!node)
1074 return;
1075
7bd765d4 1076 dump_printf (dump_kind, "node ");
f1f41a6c 1077 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1078 {
7bd765d4 1079 dump_printf (dump_kind, "\n\tstmt %d ", i);
1080 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
fb85abff 1081 }
7bd765d4 1082 dump_printf (dump_kind, "\n");
fb85abff 1083
f1f41a6c 1084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1085 vect_print_slp_tree (dump_kind, child);
fb85abff 1086}
1087
1088
48e1416a 1089/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1090 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
282bf14c 1091 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
fb85abff 1092 stmts in NODE are to be marked. */
1093
1094static void
1095vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1096{
1097 int i;
1098 gimple stmt;
40bcc7c2 1099 slp_tree child;
fb85abff 1100
1101 if (!node)
1102 return;
1103
f1f41a6c 1104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1105 if (j < 0 || i == j)
1106 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1107
f1f41a6c 1108 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1109 vect_mark_slp_stmts (child, mark, j);
fb85abff 1110}
1111
1112
37545e54 1113/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1114
1115static void
1116vect_mark_slp_stmts_relevant (slp_tree node)
1117{
1118 int i;
1119 gimple stmt;
1120 stmt_vec_info stmt_info;
40bcc7c2 1121 slp_tree child;
37545e54 1122
1123 if (!node)
1124 return;
1125
f1f41a6c 1126 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1127 {
1128 stmt_info = vinfo_for_stmt (stmt);
48e1416a 1129 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
37545e54 1130 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1131 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1132 }
1133
f1f41a6c 1134 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1135 vect_mark_slp_stmts_relevant (child);
37545e54 1136}
1137
1138
eefa05c8 1139/* Rearrange the statements of NODE according to PERMUTATION. */
1140
1141static void
1142vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
678e3d6e 1143 vec<unsigned> permutation)
eefa05c8 1144{
1145 gimple stmt;
f1f41a6c 1146 vec<gimple> tmp_stmts;
40bcc7c2 1147 unsigned int i;
1148 slp_tree child;
eefa05c8 1149
f1f41a6c 1150 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1151 vect_slp_rearrange_stmts (child, group_size, permutation);
eefa05c8 1152
f1f41a6c 1153 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1154 tmp_stmts.create (group_size);
40bcc7c2 1155 tmp_stmts.quick_grow_cleared (group_size);
eefa05c8 1156
f1f41a6c 1157 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
40bcc7c2 1158 tmp_stmts[permutation[i]] = stmt;
eefa05c8 1159
f1f41a6c 1160 SLP_TREE_SCALAR_STMTS (node).release ();
eefa05c8 1161 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1162}
1163
1164
678e3d6e 1165/* Check if the required load permutations in the SLP instance
1166 SLP_INSTN are supported. */
fb85abff 1167
1168static bool
678e3d6e 1169vect_supported_load_permutation_p (slp_instance slp_instn)
fb85abff 1170{
678e3d6e 1171 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1172 unsigned int i, j, k, next;
e8332698 1173 sbitmap load_index;
16513aa4 1174 slp_tree node;
1175 gimple stmt, load, next_load, first_load;
24c271ef 1176 struct data_reference *dr;
fb85abff 1177
6d8fb6cf 1178 if (dump_enabled_p ())
fb85abff 1179 {
7bd765d4 1180 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
678e3d6e 1181 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1182 if (node->load_permutation.exists ())
1183 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1184 dump_printf (MSG_NOTE, "%d ", next);
1185 else
9cf883aa 1186 for (k = 0; k < group_size; ++k)
1187 dump_printf (MSG_NOTE, "%d ", k);
78bb46f5 1188 dump_printf (MSG_NOTE, "\n");
fb85abff 1189 }
1190
eefa05c8 1191 /* In case of reduction every load permutation is allowed, since the order
1192 of the reduction statements is not important (as opposed to the case of
ee612634 1193 grouped stores). The only condition we need to check is that all the
eefa05c8 1194 load nodes are of the same size and have the same permutation (and then
1195 rearrange all the nodes of the SLP instance according to this
1196 permutation). */
1197
1198 /* Check that all the load nodes are of the same size. */
678e3d6e 1199 /* ??? Can't we assert this? */
f1f41a6c 1200 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
16513aa4 1201 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1202 return false;
a2e70a99 1203
eefa05c8 1204 node = SLP_INSTANCE_TREE (slp_instn);
f1f41a6c 1205 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
eefa05c8 1206
39a5d6b1 1207 /* Reduction (there are no data-refs in the root).
1208 In reduction chain the order of the loads is important. */
1209 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1210 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1211 {
678e3d6e 1212 slp_tree load;
1213 unsigned int lidx;
eefa05c8 1214
678e3d6e 1215 /* Compare all the permutation sequences to the first one. We know
1216 that at least one load is permuted. */
1217 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1218 if (!node->load_permutation.exists ())
1219 return false;
1220 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1221 {
1222 if (!load->load_permutation.exists ())
1223 return false;
1224 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1225 if (lidx != node->load_permutation[j])
1226 return false;
1227 }
0ede2ac4 1228
678e3d6e 1229 /* Check that the loads in the first sequence are different and there
1230 are no gaps between them. */
1231 load_index = sbitmap_alloc (group_size);
1232 bitmap_clear (load_index);
1233 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1234 {
1235 if (bitmap_bit_p (load_index, lidx))
1236 {
1237 sbitmap_free (load_index);
1238 return false;
1239 }
1240 bitmap_set_bit (load_index, lidx);
1241 }
1242 for (i = 0; i < group_size; i++)
1243 if (!bitmap_bit_p (load_index, i))
1244 {
1245 sbitmap_free (load_index);
1246 return false;
1247 }
1248 sbitmap_free (load_index);
1249
1250 /* This permutation is valid for reduction. Since the order of the
1251 statements in the nodes is not important unless they are memory
1252 accesses, we can rearrange the statements in all the nodes
1253 according to the order of the loads. */
1254 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1255 node->load_permutation);
1256
1257 /* We are done, no actual permutations need to be generated. */
1258 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1259 SLP_TREE_LOAD_PERMUTATION (node).release ();
1260 return true;
eefa05c8 1261 }
1262
24c271ef 1263 /* In basic block vectorization we allow any subchain of an interleaving
1264 chain.
1265 FORNOW: not supported in loop SLP because of realignment compications. */
678e3d6e 1266 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
24c271ef 1267 {
678e3d6e 1268 /* Check that for every node in the instance the loads
1269 form a subchain. */
f1f41a6c 1270 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
24c271ef 1271 {
1272 next_load = NULL;
f1f41a6c 1273 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
24c271ef 1274 {
24c271ef 1275 if (j != 0 && next_load != load)
678e3d6e 1276 return false;
24c271ef 1277 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1278 }
24c271ef 1279 }
1280
1281 /* Check that the alignment of the first load in every subchain, i.e.,
678e3d6e 1282 the first statement in every load node, is supported.
1283 ??? This belongs in alignment checking. */
1284 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1285 {
1286 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1287 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1288 {
1289 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1290 if (vect_supportable_dr_alignment (dr, false)
1291 == dr_unaligned_unsupported)
1292 {
1293 if (dump_enabled_p ())
1294 {
1295 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1296 vect_location,
1297 "unsupported unaligned load ");
1298 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1299 first_load, 0);
78bb46f5 1300 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
678e3d6e 1301 }
1302 return false;
1303 }
1304 }
1305 }
24c271ef 1306
678e3d6e 1307 /* We are done, no actual permutations need to be generated. */
1308 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1309 SLP_TREE_LOAD_PERMUTATION (node).release ();
1310 return true;
24c271ef 1311 }
1312
48e1416a 1313 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1314 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
eefa05c8 1315 well (unless it's reduction). */
678e3d6e 1316 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
fb85abff 1317 return false;
678e3d6e 1318 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1319 if (!node->load_permutation.exists ())
1320 return false;
fb85abff 1321
e8332698 1322 load_index = sbitmap_alloc (group_size);
53c5d9d4 1323 bitmap_clear (load_index);
678e3d6e 1324 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1325 {
1326 unsigned int lidx = node->load_permutation[0];
1327 if (bitmap_bit_p (load_index, lidx))
1328 {
1329 sbitmap_free (load_index);
1330 return false;
1331 }
1332 bitmap_set_bit (load_index, lidx);
1333 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1334 if (k != lidx)
1335 {
1336 sbitmap_free (load_index);
1337 return false;
1338 }
fb85abff 1339 }
678e3d6e 1340 for (i = 0; i < group_size; i++)
1341 if (!bitmap_bit_p (load_index, i))
cae603e9 1342 {
1343 sbitmap_free (load_index);
1344 return false;
1345 }
e8332698 1346 sbitmap_free (load_index);
fb85abff 1347
678e3d6e 1348 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1349 if (node->load_permutation.exists ()
1350 && !vect_transform_slp_perm_load
1351 (node, vNULL, NULL,
1352 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1353 return false;
1354 return true;
fb85abff 1355}
1356
1357
48e1416a 1358/* Find the first load in the loop that belongs to INSTANCE.
fb85abff 1359 When loads are in several SLP nodes, there can be a case in which the first
48e1416a 1360 load does not appear in the first SLP node to be transformed, causing
282bf14c 1361 incorrect order of statements. Since we generate all the loads together,
fb85abff 1362 they must be inserted before the first load of the SLP instance and not
1363 before the first load of the first node of the instance. */
282bf14c 1364
48e1416a 1365static gimple
1366vect_find_first_load_in_slp_instance (slp_instance instance)
fb85abff 1367{
1368 int i, j;
1369 slp_tree load_node;
1370 gimple first_load = NULL, load;
1371
f1f41a6c 1372 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1373 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
fb85abff 1374 first_load = get_earlier_stmt (load, first_load);
48e1416a 1375
fb85abff 1376 return first_load;
1377}
1378
1379
d4b21757 1380/* Find the last store in SLP INSTANCE. */
282bf14c 1381
d4b21757 1382static gimple
1383vect_find_last_store_in_slp_instance (slp_instance instance)
1384{
1385 int i;
1386 slp_tree node;
1387 gimple last_store = NULL, store;
1388
1389 node = SLP_INSTANCE_TREE (instance);
f1f41a6c 1390 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
d4b21757 1391 last_store = get_later_stmt (store, last_store);
1392
1393 return last_store;
1394}
1395
ee6e9ba5 1396/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1397
1398static void
1399vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1400 slp_instance instance, slp_tree node,
1401 stmt_vector_for_cost *prologue_cost_vec,
1402 unsigned ncopies_for_cost)
1403{
1404 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1405
1406 unsigned i;
1407 slp_tree child;
1408 gimple stmt, s;
1409 stmt_vec_info stmt_info;
1410 tree lhs;
1411 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1412
1413 /* Recurse down the SLP tree. */
1414 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1415 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1416 instance, child, prologue_cost_vec,
1417 ncopies_for_cost);
1418
1419 /* Look at the first scalar stmt to determine the cost. */
1420 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1421 stmt_info = vinfo_for_stmt (stmt);
1422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1423 {
1424 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1425 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1426 vect_uninitialized_def,
1427 node, prologue_cost_vec, body_cost_vec);
1428 else
1429 {
1430 int i;
1431 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1432 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1433 node, prologue_cost_vec, body_cost_vec);
1434 /* If the load is permuted record the cost for the permutation.
1435 ??? Loads from multiple chains are let through here only
1436 for a single special case involving complex numbers where
1437 in the end no permutation is necessary. */
1438 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1439 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1440 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1441 && vect_get_place_in_interleaving_chain
1442 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1443 {
1444 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1445 stmt_info, 0, vect_body);
1446 break;
1447 }
1448 }
1449 }
1450 else
1451 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1452 stmt_info, 0, vect_body);
1453
1454 /* Scan operands and account for prologue cost of constants/externals.
1455 ??? This over-estimates cost for multiple uses and should be
1456 re-engineered. */
1457 lhs = gimple_get_lhs (stmt);
1458 for (i = 0; i < gimple_num_ops (stmt); ++i)
1459 {
1460 tree def, op = gimple_op (stmt, i);
1461 gimple def_stmt;
1462 enum vect_def_type dt;
1463 if (!op || op == lhs)
1464 continue;
1465 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1466 &def_stmt, &def, &dt)
1467 && (dt == vect_constant_def || dt == vect_external_def))
1468 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1469 stmt_info, 0, vect_prologue);
1470 }
1471}
1472
1473/* Compute the cost for the SLP instance INSTANCE. */
1474
1475static void
1476vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1477 slp_instance instance, unsigned nunits)
1478{
1479 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1480 unsigned ncopies_for_cost;
1481 stmt_info_for_cost *si;
1482 unsigned i;
1483
1484 /* Calculate the number of vector stmts to create based on the unrolling
1485 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1486 GROUP_SIZE / NUNITS otherwise. */
1487 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1488 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1489
1490 prologue_cost_vec.create (10);
1491 body_cost_vec.create (10);
1492 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1493 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1494 instance, SLP_INSTANCE_TREE (instance),
1495 &prologue_cost_vec, ncopies_for_cost);
1496
1497 /* Record the prologue costs, which were delayed until we were
1498 sure that SLP was successful. Unlike the body costs, we know
1499 the final values now regardless of the loop vectorization factor. */
1500 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1501 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1502 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1503 {
1504 struct _stmt_vec_info *stmt_info
1505 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1506 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1507 si->misalign, vect_prologue);
1508 }
1509
1510 prologue_cost_vec.release ();
1511}
d4b21757 1512
ee612634 1513/* Analyze an SLP instance starting from a group of grouped stores. Call
48e1416a 1514 vect_build_slp_tree to build a tree of packed stmts if possible.
fb85abff 1515 Return FALSE if it's impossible to SLP any stmt in the loop. */
1516
1517static bool
37545e54 1518vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ca91d3f8 1519 gimple stmt, unsigned max_tree_size)
fb85abff 1520{
1521 slp_instance new_instance;
b0f64919 1522 slp_tree node;
21009880 1523 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
fb85abff 1524 unsigned int unrolling_factor = 1, nunits;
eefa05c8 1525 tree vectype, scalar_type = NULL_TREE;
fb85abff 1526 gimple next;
f018d957 1527 unsigned int vectorization_factor = 0;
ee6e9ba5 1528 int i;
fb85abff 1529 unsigned int max_nunits = 0;
f1f41a6c 1530 vec<slp_tree> loads;
eefa05c8 1531 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
f1f41a6c 1532 vec<gimple> scalar_stmts;
eefa05c8 1533
39a5d6b1 1534 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1535 {
39a5d6b1 1536 if (dr)
1537 {
1538 scalar_type = TREE_TYPE (DR_REF (dr));
1539 vectype = get_vectype_for_scalar_type (scalar_type);
1540 }
1541 else
1542 {
1543 gcc_assert (loop_vinfo);
1544 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1545 }
1546
21009880 1547 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
eefa05c8 1548 }
1549 else
1550 {
1551 gcc_assert (loop_vinfo);
1552 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
f1f41a6c 1553 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
eefa05c8 1554 }
48e1416a 1555
fb85abff 1556 if (!vectype)
1557 {
6d8fb6cf 1558 if (dump_enabled_p ())
fb85abff 1559 {
7bd765d4 1560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1561 "Build SLP failed: unsupported data-type ");
1562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
78bb46f5 1563 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1564 }
eefa05c8 1565
fb85abff 1566 return false;
1567 }
1568
1569 nunits = TYPE_VECTOR_SUBPARTS (vectype);
37545e54 1570 if (loop_vinfo)
1571 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1572 else
37545e54 1573 vectorization_factor = nunits;
1574
37545e54 1575 /* Calculate the unrolling factor. */
1576 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1577 if (unrolling_factor != 1 && !loop_vinfo)
1578 {
6d8fb6cf 1579 if (dump_enabled_p ())
78bb46f5 1580 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1581 "Build SLP failed: unrolling required in basic"
78bb46f5 1582 " block SLP\n");
48e1416a 1583
37545e54 1584 return false;
1585 }
1586
ee612634 1587 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
f1f41a6c 1588 scalar_stmts.create (group_size);
fb85abff 1589 next = stmt;
39a5d6b1 1590 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 1591 {
eefa05c8 1592 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1593 while (next)
1594 {
f2104a54 1595 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1596 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
f1f41a6c 1597 scalar_stmts.safe_push (
1598 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f2104a54 1599 else
f1f41a6c 1600 scalar_stmts.safe_push (next);
21009880 1601 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
eefa05c8 1602 }
1603 }
1604 else
1605 {
1606 /* Collect reduction statements. */
f1f41a6c 1607 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1608 for (i = 0; reductions.iterate (i, &next); i++)
1609 scalar_stmts.safe_push (next);
fb85abff 1610 }
1611
b0f64919 1612 node = vect_create_new_slp_node (scalar_stmts);
fb85abff 1613
f1f41a6c 1614 loads.create (group_size);
fb85abff 1615
1616 /* Build the tree for the SLP instance. */
48e1416a 1617 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
0eba4748 1618 &max_nunits, &loads,
ca91d3f8 1619 vectorization_factor, NULL, NULL, NULL,
1620 max_tree_size))
fb85abff 1621 {
03dab834 1622 /* Calculate the unrolling factor based on the smallest type. */
fb85abff 1623 if (max_nunits > nunits)
1624 unrolling_factor = least_common_multiple (max_nunits, group_size)
1625 / group_size;
48e1416a 1626
03dab834 1627 if (unrolling_factor != 1 && !loop_vinfo)
1628 {
6d8fb6cf 1629 if (dump_enabled_p ())
78bb46f5 1630 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1631 "Build SLP failed: unrolling required in basic"
78bb46f5 1632 " block SLP\n");
9f793cdf 1633 vect_free_slp_tree (node);
f1f41a6c 1634 loads.release ();
03dab834 1635 return false;
1636 }
1637
1638 /* Create a new SLP instance. */
1639 new_instance = XNEW (struct _slp_instance);
1640 SLP_INSTANCE_TREE (new_instance) = node;
1641 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
fb85abff 1642 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ee6e9ba5 1643 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
fb85abff 1644 SLP_INSTANCE_LOADS (new_instance) = loads;
1645 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
0eba4748 1646
1647 /* Compute the load permutation. */
1648 slp_tree load_node;
1649 bool loads_permuted = false;
0eba4748 1650 FOR_EACH_VEC_ELT (loads, i, load_node)
1651 {
678e3d6e 1652 vec<unsigned> load_permutation;
0eba4748 1653 int j;
16513aa4 1654 gimple load, first_stmt;
678e3d6e 1655 bool this_load_permuted = false;
1656 load_permutation.create (group_size);
16513aa4 1657 first_stmt = GROUP_FIRST_ELEMENT
1658 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
0eba4748 1659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1660 {
16513aa4 1661 int load_place
1662 = vect_get_place_in_interleaving_chain (load, first_stmt);
1663 gcc_assert (load_place != -1);
1664 if (load_place != j)
678e3d6e 1665 this_load_permuted = true;
0eba4748 1666 load_permutation.safe_push (load_place);
1667 }
678e3d6e 1668 if (!this_load_permuted)
1669 {
1670 load_permutation.release ();
1671 continue;
1672 }
1673 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1674 loads_permuted = true;
0eba4748 1675 }
24c271ef 1676
1677 if (loads_permuted)
fb85abff 1678 {
678e3d6e 1679 if (!vect_supported_load_permutation_p (new_instance))
fb85abff 1680 {
6d8fb6cf 1681 if (dump_enabled_p ())
fb85abff 1682 {
78bb46f5 1683 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1684 "Build SLP failed: unsupported load "
1685 "permutation ");
1686 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 1687 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1688 }
fb85abff 1689 vect_free_slp_instance (new_instance);
1690 return false;
1691 }
1692
1693 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
678e3d6e 1694 = vect_find_first_load_in_slp_instance (new_instance);
fb85abff 1695 }
fb85abff 1696
ee6e9ba5 1697 /* Compute the costs of this SLP instance. */
1698 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1699 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
f97dec81 1700
37545e54 1701 if (loop_vinfo)
f1f41a6c 1702 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
37545e54 1703 else
f1f41a6c 1704 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
48e1416a 1705
6d8fb6cf 1706 if (dump_enabled_p ())
7bd765d4 1707 vect_print_slp_tree (MSG_NOTE, node);
fb85abff 1708
1709 return true;
1710 }
1711
1712 /* Failed to SLP. */
1713 /* Free the allocated memory. */
1714 vect_free_slp_tree (node);
f1f41a6c 1715 loads.release ();
48e1416a 1716
37545e54 1717 return false;
fb85abff 1718}
1719
1720
282bf14c 1721/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
fb85abff 1722 trees of packed scalar stmts if SLP is possible. */
1723
1724bool
ca91d3f8 1725vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1726 unsigned max_tree_size)
fb85abff 1727{
1728 unsigned int i;
f1f41a6c 1729 vec<gimple> grouped_stores;
1e094109 1730 vec<gimple> reductions = vNULL;
1731 vec<gimple> reduc_chains = vNULL;
39a5d6b1 1732 gimple first_element;
37545e54 1733 bool ok = false;
fb85abff 1734
6d8fb6cf 1735 if (dump_enabled_p ())
78bb46f5 1736 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
fb85abff 1737
37545e54 1738 if (loop_vinfo)
eefa05c8 1739 {
ee612634 1740 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
39a5d6b1 1741 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
eefa05c8 1742 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1743 }
37545e54 1744 else
ee612634 1745 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
48e1416a 1746
ee612634 1747 /* Find SLP sequences starting from groups of grouped stores. */
f1f41a6c 1748 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
ca91d3f8 1749 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1750 max_tree_size))
37545e54 1751 ok = true;
fb85abff 1752
48e1416a 1753 if (bb_vinfo && !ok)
37545e54 1754 {
6d8fb6cf 1755 if (dump_enabled_p ())
7bd765d4 1756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1757 "Failed to SLP the basic block.\n");
37545e54 1758
1759 return false;
1760 }
fb85abff 1761
39a5d6b1 1762 if (loop_vinfo
f1f41a6c 1763 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
39a5d6b1 1764 {
1765 /* Find SLP sequences starting from reduction chains. */
f1f41a6c 1766 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
ca91d3f8 1767 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1768 max_tree_size))
39a5d6b1 1769 ok = true;
1770 else
1771 return false;
1772
1773 /* Don't try to vectorize SLP reductions if reduction chain was
1774 detected. */
1775 return ok;
1776 }
1777
eefa05c8 1778 /* Find SLP sequences starting from groups of reductions. */
f1f41a6c 1779 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
ca91d3f8 1780 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1781 max_tree_size))
eefa05c8 1782 ok = true;
1783
fb85abff 1784 return true;
1785}
1786
1787
1788/* For each possible SLP instance decide whether to SLP it and calculate overall
bc937a44 1789 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1790 least one instance. */
fb85abff 1791
bc937a44 1792bool
fb85abff 1793vect_make_slp_decision (loop_vec_info loop_vinfo)
1794{
1795 unsigned int i, unrolling_factor = 1;
f1f41a6c 1796 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1797 slp_instance instance;
1798 int decided_to_slp = 0;
1799
6d8fb6cf 1800 if (dump_enabled_p ())
78bb46f5 1801 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1802 "\n");
fb85abff 1803
f1f41a6c 1804 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1805 {
1806 /* FORNOW: SLP if you can. */
1807 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1808 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1809
282bf14c 1810 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
48e1416a 1811 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
282bf14c 1812 loop-based vectorization. Such stmts will be marked as HYBRID. */
fb85abff 1813 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1814 decided_to_slp++;
1815 }
1816
1817 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1818
6d8fb6cf 1819 if (decided_to_slp && dump_enabled_p ())
b055bc88 1820 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1821 "Decided to SLP %d instances. Unrolling factor %d\n",
7bd765d4 1822 decided_to_slp, unrolling_factor);
bc937a44 1823
1824 return (decided_to_slp > 0);
fb85abff 1825}
1826
1827
1828/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
282bf14c 1829 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
fb85abff 1830
1831static void
1832vect_detect_hybrid_slp_stmts (slp_tree node)
1833{
1834 int i;
f1f41a6c 1835 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1836 gimple stmt = stmts[0];
fb85abff 1837 imm_use_iterator imm_iter;
1838 gimple use_stmt;
a1123047 1839 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
40bcc7c2 1840 slp_tree child;
a1123047 1841 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1842 struct loop *loop = NULL;
1843 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1844 basic_block bb = NULL;
fb85abff 1845
1846 if (!node)
1847 return;
1848
a1123047 1849 if (loop_vinfo)
1850 loop = LOOP_VINFO_LOOP (loop_vinfo);
1851 else
1852 bb = BB_VINFO_BB (bb_vinfo);
1853
f1f41a6c 1854 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1855 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1856 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1857 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
a1123047 1858 if (gimple_bb (use_stmt)
1859 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1860 || bb == gimple_bb (use_stmt))
1861 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
07be02da 1862 && !STMT_SLP_TYPE (stmt_vinfo)
1863 && (STMT_VINFO_RELEVANT (stmt_vinfo)
2544aaab 1864 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo))
1865 || (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
1866 && STMT_VINFO_RELATED_STMT (stmt_vinfo)
1867 && !STMT_SLP_TYPE (vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo)))))
a1123047 1868 && !(gimple_code (use_stmt) == GIMPLE_PHI
1869 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1870 == vect_reduction_def))
fb85abff 1871 vect_mark_slp_stmts (node, hybrid, i);
1872
f1f41a6c 1873 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1874 vect_detect_hybrid_slp_stmts (child);
fb85abff 1875}
1876
1877
1878/* Find stmts that must be both vectorized and SLPed. */
1879
1880void
1881vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1882{
1883 unsigned int i;
f1f41a6c 1884 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1885 slp_instance instance;
1886
6d8fb6cf 1887 if (dump_enabled_p ())
78bb46f5 1888 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1889 "\n");
fb85abff 1890
f1f41a6c 1891 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1892 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1893}
1894
37545e54 1895
1896/* Create and initialize a new bb_vec_info struct for BB, as well as
1897 stmt_vec_info structs for all the stmts in it. */
48e1416a 1898
37545e54 1899static bb_vec_info
1900new_bb_vec_info (basic_block bb)
1901{
1902 bb_vec_info res = NULL;
1903 gimple_stmt_iterator gsi;
1904
1905 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1906 BB_VINFO_BB (res) = bb;
1907
1908 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1909 {
1910 gimple stmt = gsi_stmt (gsi);
1911 gimple_set_uid (stmt, 0);
1912 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1913 }
1914
f1f41a6c 1915 BB_VINFO_GROUPED_STORES (res).create (10);
1916 BB_VINFO_SLP_INSTANCES (res).create (2);
4db2b577 1917 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
37545e54 1918
1919 bb->aux = res;
1920 return res;
1921}
1922
1923
1924/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1925 stmts in the basic block. */
1926
1927static void
1928destroy_bb_vec_info (bb_vec_info bb_vinfo)
1929{
f1f41a6c 1930 vec<slp_instance> slp_instances;
9f793cdf 1931 slp_instance instance;
37545e54 1932 basic_block bb;
1933 gimple_stmt_iterator si;
9f793cdf 1934 unsigned i;
37545e54 1935
1936 if (!bb_vinfo)
1937 return;
1938
1939 bb = BB_VINFO_BB (bb_vinfo);
1940
1941 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1942 {
1943 gimple stmt = gsi_stmt (si);
1944 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1945
1946 if (stmt_info)
1947 /* Free stmt_vec_info. */
1948 free_stmt_vec_info (stmt);
1949 }
1950
23e1875f 1951 vect_destroy_datarefs (NULL, bb_vinfo);
a8af2e86 1952 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
f1f41a6c 1953 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
9f793cdf 1954 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
f1f41a6c 1955 FOR_EACH_VEC_ELT (slp_instances, i, instance)
9f793cdf 1956 vect_free_slp_instance (instance);
f1f41a6c 1957 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
4db2b577 1958 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
37545e54 1959 free (bb_vinfo);
1960 bb->aux = NULL;
1961}
1962
1963
1964/* Analyze statements contained in SLP tree node after recursively analyzing
1965 the subtree. Return TRUE if the operations are supported. */
1966
1967static bool
1968vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1969{
1970 bool dummy;
1971 int i;
1972 gimple stmt;
40bcc7c2 1973 slp_tree child;
37545e54 1974
1975 if (!node)
1976 return true;
1977
f1f41a6c 1978 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1979 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
b0f64919 1980 return false;
37545e54 1981
f1f41a6c 1982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1983 {
1984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985 gcc_assert (stmt_info);
1986 gcc_assert (PURE_SLP_STMT (stmt_info));
1987
1988 if (!vect_analyze_stmt (stmt, &dummy, node))
1989 return false;
1990 }
1991
1992 return true;
1993}
1994
1995
282bf14c 1996/* Analyze statements in SLP instances of the basic block. Return TRUE if the
37545e54 1997 operations are supported. */
1998
1999static bool
2000vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2001{
f1f41a6c 2002 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
37545e54 2003 slp_instance instance;
2004 int i;
2005
f1f41a6c 2006 for (i = 0; slp_instances.iterate (i, &instance); )
37545e54 2007 {
48e1416a 2008 if (!vect_slp_analyze_node_operations (bb_vinfo,
37545e54 2009 SLP_INSTANCE_TREE (instance)))
2010 {
2011 vect_free_slp_instance (instance);
f1f41a6c 2012 slp_instances.ordered_remove (i);
37545e54 2013 }
2014 else
2015 i++;
48e1416a 2016 }
2017
f1f41a6c 2018 if (!slp_instances.length ())
37545e54 2019 return false;
2020
2021 return true;
2022}
2023
32afb748 2024
2025/* Compute the scalar cost of the SLP node NODE and its children
2026 and return it. Do not account defs that are marked in LIFE and
2027 update LIFE according to uses of NODE. */
2028
2029static unsigned
65526f7f 2030vect_bb_slp_scalar_cost (basic_block bb,
d70aebca 2031 slp_tree node, vec<bool, va_heap> *life)
32afb748 2032{
2033 unsigned scalar_cost = 0;
2034 unsigned i;
2035 gimple stmt;
2036 slp_tree child;
2037
2038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2039 {
2040 unsigned stmt_cost;
2041 ssa_op_iter op_iter;
2042 def_operand_p def_p;
2043 stmt_vec_info stmt_info;
2044
d70aebca 2045 if ((*life)[i])
32afb748 2046 continue;
2047
2048 /* If there is a non-vectorized use of the defs then the scalar
2049 stmt is kept live in which case we do not account it or any
2050 required defs in the SLP children in the scalar cost. This
2051 way we make the vectorization more costly when compared to
2052 the scalar cost. */
2053 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2054 {
2055 imm_use_iterator use_iter;
2056 gimple use_stmt;
2057 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
ab2dc680 2058 if (!is_gimple_debug (use_stmt)
2059 && (gimple_code (use_stmt) == GIMPLE_PHI
2060 || gimple_bb (use_stmt) != bb
2061 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
32afb748 2062 {
d70aebca 2063 (*life)[i] = true;
32afb748 2064 BREAK_FROM_IMM_USE_STMT (use_iter);
2065 }
2066 }
d70aebca 2067 if ((*life)[i])
32afb748 2068 continue;
2069
2070 stmt_info = vinfo_for_stmt (stmt);
2071 if (STMT_VINFO_DATA_REF (stmt_info))
2072 {
2073 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2074 stmt_cost = vect_get_stmt_cost (scalar_load);
2075 else
2076 stmt_cost = vect_get_stmt_cost (scalar_store);
2077 }
2078 else
2079 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2080
2081 scalar_cost += stmt_cost;
2082 }
2083
2084 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
65526f7f 2085 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
32afb748 2086
2087 return scalar_cost;
2088}
2089
ad074595 2090/* Check if vectorization of the basic block is profitable. */
2091
2092static bool
2093vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2094{
f1f41a6c 2095 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
ad074595 2096 slp_instance instance;
4db2b577 2097 int i, j;
2098 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
f97dec81 2099 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
f97dec81 2100 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
ad074595 2101 stmt_vec_info stmt_info = NULL;
f97dec81 2102 stmt_vector_for_cost body_cost_vec;
4db2b577 2103 stmt_info_for_cost *ci;
ad074595 2104
2105 /* Calculate vector costs. */
f1f41a6c 2106 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2107 {
f97dec81 2108 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2109
f1f41a6c 2110 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
f97dec81 2111 {
2112 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2113 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2114 stmt_info, ci->misalign, vect_body);
2115 }
ad074595 2116 }
2117
2118 /* Calculate scalar cost. */
32afb748 2119 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2120 {
4997014d 2121 auto_vec<bool, 20> life;
d70aebca 2122 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
65526f7f 2123 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2124 SLP_INSTANCE_TREE (instance),
d70aebca 2125 &life);
ad074595 2126 }
2127
4db2b577 2128 /* Complete the target-specific cost calculation. */
f97dec81 2129 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2130 &vec_inside_cost, &vec_epilogue_cost);
2131
2132 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
4db2b577 2133
6d8fb6cf 2134 if (dump_enabled_p ())
ad074595 2135 {
7bd765d4 2136 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2137 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2138 vec_inside_cost);
2139 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2140 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
78bb46f5 2141 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
ad074595 2142 }
2143
2144 /* Vectorization is profitable if its cost is less than the cost of scalar
2145 version. */
2146 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2147 return false;
2148
2149 return true;
2150}
2151
2152/* Check if the basic block can be vectorized. */
37545e54 2153
5bbe6156 2154static bb_vec_info
2155vect_slp_analyze_bb_1 (basic_block bb)
37545e54 2156{
2157 bb_vec_info bb_vinfo;
f1f41a6c 2158 vec<slp_instance> slp_instances;
37545e54 2159 slp_instance instance;
5bbe6156 2160 int i;
91a74fc6 2161 int min_vf = 2;
ca91d3f8 2162 unsigned n_stmts = 0;
d4b21757 2163
37545e54 2164 bb_vinfo = new_bb_vec_info (bb);
2165 if (!bb_vinfo)
2166 return NULL;
2167
ca91d3f8 2168 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
37545e54 2169 {
6d8fb6cf 2170 if (dump_enabled_p ())
7bd765d4 2171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2172 "not vectorized: unhandled data-ref in basic "
2173 "block.\n");
48e1416a 2174
37545e54 2175 destroy_bb_vec_info (bb_vinfo);
2176 return NULL;
2177 }
2178
07e3bcbf 2179 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
37545e54 2180 {
6d8fb6cf 2181 if (dump_enabled_p ())
7bd765d4 2182 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2183 "not vectorized: not enough data-refs in "
2184 "basic block.\n");
37545e54 2185
2186 destroy_bb_vec_info (bb_vinfo);
2187 return NULL;
2188 }
2189
68f15e9d 2190 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2191 {
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2194 "not vectorized: unhandled data access in "
2195 "basic block.\n");
2196
2197 destroy_bb_vec_info (bb_vinfo);
2198 return NULL;
2199 }
2200
4c0c783a 2201 vect_pattern_recog (NULL, bb_vinfo);
2202
37545e54 2203 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2204 {
6d8fb6cf 2205 if (dump_enabled_p ())
7bd765d4 2206 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2207 "not vectorized: bad data alignment in basic "
2208 "block.\n");
48e1416a 2209
37545e54 2210 destroy_bb_vec_info (bb_vinfo);
2211 return NULL;
2212 }
48e1416a 2213
37545e54 2214 /* Check the SLP opportunities in the basic block, analyze and build SLP
2215 trees. */
ca91d3f8 2216 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
37545e54 2217 {
6d8fb6cf 2218 if (dump_enabled_p ())
7bd765d4 2219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2220 "not vectorized: failed to find SLP opportunities "
2221 "in basic block.\n");
37545e54 2222
2223 destroy_bb_vec_info (bb_vinfo);
2224 return NULL;
2225 }
48e1416a 2226
37545e54 2227 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2228
2229 /* Mark all the statements that we want to vectorize as pure SLP and
2230 relevant. */
f1f41a6c 2231 FOR_EACH_VEC_ELT (slp_instances, i, instance)
37545e54 2232 {
2233 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2234 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
48e1416a 2235 }
37545e54 2236
1824e1e2 2237 /* Mark all the statements that we do not want to vectorize. */
2238 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2239 !gsi_end_p (gsi); gsi_next (&gsi))
2240 {
2241 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2242 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2243 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2244 }
2245
2246 /* Analyze dependences. At this point all stmts not participating in
2247 vectorization have to be marked. Dependence analysis assumes
2248 that we either vectorize all SLP instances or none at all. */
2249 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2250 {
2251 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2253 "not vectorized: unhandled data dependence "
2254 "in basic block.\n");
2255
2256 destroy_bb_vec_info (bb_vinfo);
2257 return NULL;
2258 }
2259
4db2b577 2260 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1ad41595 2261 {
6d8fb6cf 2262 if (dump_enabled_p ())
7bd765d4 2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2264 "not vectorized: unsupported alignment in basic "
2265 "block.\n");
1ad41595 2266 destroy_bb_vec_info (bb_vinfo);
2267 return NULL;
2268 }
2269
37545e54 2270 if (!vect_slp_analyze_operations (bb_vinfo))
2271 {
6d8fb6cf 2272 if (dump_enabled_p ())
78bb46f5 2273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2274 "not vectorized: bad operation in basic block.\n");
37545e54 2275
2276 destroy_bb_vec_info (bb_vinfo);
2277 return NULL;
2278 }
2279
ad074595 2280 /* Cost model: check if the vectorization is worthwhile. */
3e398f5b 2281 if (!unlimited_cost_model (NULL)
ad074595 2282 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2283 {
6d8fb6cf 2284 if (dump_enabled_p ())
7bd765d4 2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "not vectorized: vectorization is not "
2287 "profitable.\n");
ad074595 2288
2289 destroy_bb_vec_info (bb_vinfo);
2290 return NULL;
2291 }
2292
6d8fb6cf 2293 if (dump_enabled_p ())
7bd765d4 2294 dump_printf_loc (MSG_NOTE, vect_location,
2295 "Basic block will be vectorized using SLP\n");
37545e54 2296
2297 return bb_vinfo;
2298}
2299
2300
5bbe6156 2301bb_vec_info
2302vect_slp_analyze_bb (basic_block bb)
2303{
2304 bb_vec_info bb_vinfo;
2305 int insns = 0;
2306 gimple_stmt_iterator gsi;
2307 unsigned int vector_sizes;
2308
6d8fb6cf 2309 if (dump_enabled_p ())
7bd765d4 2310 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
5bbe6156 2311
2312 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2313 {
2314 gimple stmt = gsi_stmt (gsi);
2315 if (!is_gimple_debug (stmt)
2316 && !gimple_nop_p (stmt)
2317 && gimple_code (stmt) != GIMPLE_LABEL)
2318 insns++;
2319 }
2320
2321 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2322 {
6d8fb6cf 2323 if (dump_enabled_p ())
7bd765d4 2324 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2325 "not vectorized: too many instructions in "
2326 "basic block.\n");
5bbe6156 2327
2328 return NULL;
2329 }
2330
2331 /* Autodetect first vector size we try. */
2332 current_vector_size = 0;
2333 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2334
2335 while (1)
2336 {
2337 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2338 if (bb_vinfo)
2339 return bb_vinfo;
2340
2341 destroy_bb_vec_info (bb_vinfo);
2342
2343 vector_sizes &= ~current_vector_size;
2344 if (vector_sizes == 0
2345 || current_vector_size == 0)
2346 return NULL;
2347
2348 /* Try the next biggest vector size. */
2349 current_vector_size = 1 << floor_log2 (vector_sizes);
6d8fb6cf 2350 if (dump_enabled_p ())
7bd765d4 2351 dump_printf_loc (MSG_NOTE, vect_location,
2352 "***** Re-trying analysis with "
2353 "vector size %d\n", current_vector_size);
5bbe6156 2354 }
2355}
2356
2357
48e1416a 2358/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
282bf14c 2359 the number of created vector stmts depends on the unrolling factor).
2360 However, the actual number of vector stmts for every SLP node depends on
2361 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2362 should be updated. In this function we assume that the inside costs
2363 calculated in vect_model_xxx_cost are linear in ncopies. */
fb85abff 2364
2365void
2366vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2367{
4db2b577 2368 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
f1f41a6c 2369 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 2370 slp_instance instance;
f97dec81 2371 stmt_vector_for_cost body_cost_vec;
4db2b577 2372 stmt_info_for_cost *si;
f97dec81 2373 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
fb85abff 2374
6d8fb6cf 2375 if (dump_enabled_p ())
7bd765d4 2376 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2377 "=== vect_update_slp_costs_according_to_vf ===\n");
fb85abff 2378
f1f41a6c 2379 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4db2b577 2380 {
2381 /* We assume that costs are linear in ncopies. */
2382 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2383
2384 /* Record the instance's instructions in the target cost model.
2385 This was delayed until here because the count of instructions
2386 isn't known beforehand. */
f97dec81 2387 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2388
f1f41a6c 2389 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
f97dec81 2390 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2391 vinfo_for_stmt (si->stmt), si->misalign,
2392 vect_body);
4db2b577 2393 }
fb85abff 2394}
2395
37545e54 2396
48e1416a 2397/* For constant and loop invariant defs of SLP_NODE this function returns
2398 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
987502fa 2399 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2400 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
eefa05c8 2401 REDUC_INDEX is the index of the reduction operand in the statements, unless
2402 it is -1. */
fb85abff 2403
2404static void
09e31a48 2405vect_get_constant_vectors (tree op, slp_tree slp_node,
f1f41a6c 2406 vec<tree> *vec_oprnds,
eefa05c8 2407 unsigned int op_num, unsigned int number_of_vectors,
2408 int reduc_index)
fb85abff 2409{
f1f41a6c 2410 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2411 gimple stmt = stmts[0];
fb85abff 2412 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
fadf62f4 2413 unsigned nunits;
fb85abff 2414 tree vec_cst;
fadf62f4 2415 tree *elts;
2416 unsigned j, number_of_places_left_in_vector;
fb85abff 2417 tree vector_type;
09e31a48 2418 tree vop;
f1f41a6c 2419 int group_size = stmts.length ();
fb85abff 2420 unsigned int vec_num, i;
fadf62f4 2421 unsigned number_of_copies = 1;
f1f41a6c 2422 vec<tree> voprnds;
2423 voprnds.create (number_of_vectors);
fb85abff 2424 bool constant_p, is_store;
eefa05c8 2425 tree neutral_op = NULL;
98dbf32d 2426 enum tree_code code = gimple_expr_code (stmt);
d706d35c 2427 gimple def_stmt;
2428 struct loop *loop;
0ff8139c 2429 gimple_seq ctor_seq = NULL;
eefa05c8 2430
38fbbe0d 2431 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2432 && reduc_index != -1)
eefa05c8 2433 {
eefa05c8 2434 op_num = reduc_index - 1;
09e31a48 2435 op = gimple_op (stmt, reduc_index);
eefa05c8 2436 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
282bf14c 2437 we need either neutral operands or the original operands. See
eefa05c8 2438 get_initial_def_for_reduction() for details. */
2439 switch (code)
2440 {
2441 case WIDEN_SUM_EXPR:
2442 case DOT_PROD_EXPR:
2443 case PLUS_EXPR:
2444 case MINUS_EXPR:
2445 case BIT_IOR_EXPR:
2446 case BIT_XOR_EXPR:
2447 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2448 neutral_op = build_real (TREE_TYPE (op), dconst0);
2449 else
2450 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2451
2452 break;
2453
2454 case MULT_EXPR:
eefa05c8 2455 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2456 neutral_op = build_real (TREE_TYPE (op), dconst1);
2457 else
2458 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2459
2460 break;
2461
b036fcd8 2462 case BIT_AND_EXPR:
2463 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2464 break;
2465
298cab21 2466 /* For MIN/MAX we don't have an easy neutral operand but
2467 the initial values can be used fine here. Only for
2468 a reduction chain we have to force a neutral element. */
2469 case MAX_EXPR:
2470 case MIN_EXPR:
2471 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2472 neutral_op = NULL;
2473 else
2474 {
2475 def_stmt = SSA_NAME_DEF_STMT (op);
2476 loop = (gimple_bb (stmt))->loop_father;
2477 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2478 loop_preheader_edge (loop));
2479 }
2480 break;
d706d35c 2481
eefa05c8 2482 default:
d706d35c 2483 neutral_op = NULL;
eefa05c8 2484 }
2485 }
fb85abff 2486
2487 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2488 {
2489 is_store = true;
2490 op = gimple_assign_rhs1 (stmt);
2491 }
2492 else
09e31a48 2493 is_store = false;
2494
2495 gcc_assert (op);
fb85abff 2496
2497 if (CONSTANT_CLASS_P (op))
987502fa 2498 constant_p = true;
fb85abff 2499 else
987502fa 2500 constant_p = false;
2501
09e31a48 2502 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
ece0be9d 2503 gcc_assert (vector_type);
fb85abff 2504 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2505
2506 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
48e1416a 2507 created vectors. It is greater than 1 if unrolling is performed.
fb85abff 2508
2509 For example, we have two scalar operands, s1 and s2 (e.g., group of
2510 strided accesses of size two), while NUNITS is four (i.e., four scalars
f2104a54 2511 of this type can be packed in a vector). The output vector will contain
2512 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
fb85abff 2513 will be 2).
2514
48e1416a 2515 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
fb85abff 2516 containing the operands.
2517
2518 For example, NUNITS is four as before, and the group size is 8
f2104a54 2519 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
fb85abff 2520 {s5, s6, s7, s8}. */
48e1416a 2521
fb85abff 2522 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2523
2524 number_of_places_left_in_vector = nunits;
fadf62f4 2525 elts = XALLOCAVEC (tree, nunits);
fb85abff 2526 for (j = 0; j < number_of_copies; j++)
2527 {
f1f41a6c 2528 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
fb85abff 2529 {
2530 if (is_store)
2531 op = gimple_assign_rhs1 (stmt);
98dbf32d 2532 else
f2104a54 2533 {
98dbf32d 2534 switch (code)
f2104a54 2535 {
98dbf32d 2536 case COND_EXPR:
2537 if (op_num == 0 || op_num == 1)
2538 {
2539 tree cond = gimple_assign_rhs1 (stmt);
2540 op = TREE_OPERAND (cond, op_num);
2541 }
2542 else
2543 {
2544 if (op_num == 2)
2545 op = gimple_assign_rhs2 (stmt);
2546 else
2547 op = gimple_assign_rhs3 (stmt);
2548 }
2549 break;
2550
2551 case CALL_EXPR:
2552 op = gimple_call_arg (stmt, op_num);
2553 break;
2554
aa17a496 2555 case LSHIFT_EXPR:
2556 case RSHIFT_EXPR:
2557 case LROTATE_EXPR:
2558 case RROTATE_EXPR:
2559 op = gimple_op (stmt, op_num + 1);
2560 /* Unlike the other binary operators, shifts/rotates have
2561 the shift count being int, instead of the same type as
2562 the lhs, so make sure the scalar is the right type if
2563 we are dealing with vectors of
2564 long long/long/short/char. */
3533e58d 2565 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
aa17a496 2566 op = fold_convert (TREE_TYPE (vector_type), op);
2567 break;
2568
98dbf32d 2569 default:
2570 op = gimple_op (stmt, op_num + 1);
aa17a496 2571 break;
f2104a54 2572 }
2573 }
48e1416a 2574
eefa05c8 2575 if (reduc_index != -1)
2576 {
d706d35c 2577 loop = (gimple_bb (stmt))->loop_father;
2578 def_stmt = SSA_NAME_DEF_STMT (op);
eefa05c8 2579
2580 gcc_assert (loop);
39a5d6b1 2581
2582 /* Get the def before the loop. In reduction chain we have only
2583 one initial value. */
2584 if ((j != (number_of_copies - 1)
2585 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2586 && i != 0))
2587 && neutral_op)
eefa05c8 2588 op = neutral_op;
39a5d6b1 2589 else
2590 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2591 loop_preheader_edge (loop));
eefa05c8 2592 }
2593
fb85abff 2594 /* Create 'vect_ = {op0,op1,...,opn}'. */
fb85abff 2595 number_of_places_left_in_vector--;
0ff8139c 2596 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
6fa78636 2597 {
3533e58d 2598 if (CONSTANT_CLASS_P (op))
0ff8139c 2599 {
2600 op = fold_unary (VIEW_CONVERT_EXPR,
2601 TREE_TYPE (vector_type), op);
2602 gcc_assert (op && CONSTANT_CLASS_P (op));
2603 }
2604 else
2605 {
2606 tree new_temp
2607 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2608 gimple init_stmt;
2609 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2610 op);
2611 init_stmt
2612 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
806413d2 2613 new_temp, op);
0ff8139c 2614 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2615 op = new_temp;
2616 }
6fa78636 2617 }
fadf62f4 2618 elts[number_of_places_left_in_vector] = op;
3533e58d 2619 if (!CONSTANT_CLASS_P (op))
2620 constant_p = false;
fb85abff 2621
2622 if (number_of_places_left_in_vector == 0)
2623 {
2624 number_of_places_left_in_vector = nunits;
2625
2626 if (constant_p)
fadf62f4 2627 vec_cst = build_vector (vector_type, elts);
fb85abff 2628 else
fadf62f4 2629 {
f1f41a6c 2630 vec<constructor_elt, va_gc> *v;
fadf62f4 2631 unsigned k;
f1f41a6c 2632 vec_alloc (v, nunits);
fadf62f4 2633 for (k = 0; k < nunits; ++k)
2634 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2635 vec_cst = build_constructor (vector_type, v);
2636 }
f1f41a6c 2637 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2638 vector_type, NULL));
0ff8139c 2639 if (ctor_seq != NULL)
2640 {
f1f41a6c 2641 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
0ff8139c 2642 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2643 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2644 GSI_SAME_STMT);
2645 ctor_seq = NULL;
2646 }
fb85abff 2647 }
2648 }
2649 }
2650
48e1416a 2651 /* Since the vectors are created in the reverse order, we should invert
fb85abff 2652 them. */
f1f41a6c 2653 vec_num = voprnds.length ();
fadf62f4 2654 for (j = vec_num; j != 0; j--)
fb85abff 2655 {
f1f41a6c 2656 vop = voprnds[j - 1];
2657 vec_oprnds->quick_push (vop);
fb85abff 2658 }
2659
f1f41a6c 2660 voprnds.release ();
fb85abff 2661
2662 /* In case that VF is greater than the unrolling factor needed for the SLP
48e1416a 2663 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2664 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
fb85abff 2665 to replicate the vectors. */
f1f41a6c 2666 while (number_of_vectors > vec_oprnds->length ())
fb85abff 2667 {
eefa05c8 2668 tree neutral_vec = NULL;
2669
2670 if (neutral_op)
2671 {
2672 if (!neutral_vec)
b797154e 2673 neutral_vec = build_vector_from_val (vector_type, neutral_op);
eefa05c8 2674
f1f41a6c 2675 vec_oprnds->quick_push (neutral_vec);
eefa05c8 2676 }
2677 else
2678 {
f1f41a6c 2679 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2680 vec_oprnds->quick_push (vop);
eefa05c8 2681 }
fb85abff 2682 }
2683}
2684
2685
2686/* Get vectorized definitions from SLP_NODE that contains corresponding
2687 vectorized def-stmts. */
2688
2689static void
f1f41a6c 2690vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
fb85abff 2691{
2692 tree vec_oprnd;
2693 gimple vec_def_stmt;
2694 unsigned int i;
2695
f1f41a6c 2696 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
fb85abff 2697
f1f41a6c 2698 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
fb85abff 2699 {
2700 gcc_assert (vec_def_stmt);
2701 vec_oprnd = gimple_get_lhs (vec_def_stmt);
f1f41a6c 2702 vec_oprnds->quick_push (vec_oprnd);
fb85abff 2703 }
2704}
2705
2706
48e1416a 2707/* Get vectorized definitions for SLP_NODE.
2708 If the scalar definitions are loop invariants or constants, collect them and
fb85abff 2709 call vect_get_constant_vectors() to create vector stmts.
2710 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
b0f64919 2711 must be stored in the corresponding child of SLP_NODE, and we call
2712 vect_get_slp_vect_defs () to retrieve them. */
48e1416a 2713
fb85abff 2714void
f1f41a6c 2715vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
7f7695a7 2716 vec<vec<tree> > *vec_oprnds, int reduc_index)
fb85abff 2717{
a8bcfda2 2718 gimple first_stmt;
b0f64919 2719 int number_of_vects = 0, i;
2720 unsigned int child_index = 0;
48e1416a 2721 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
b0f64919 2722 slp_tree child = NULL;
7f7695a7 2723 vec<tree> vec_defs;
a8bcfda2 2724 tree oprnd;
b0f64919 2725 bool vectorized_defs;
fb85abff 2726
f1f41a6c 2727 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2728 FOR_EACH_VEC_ELT (ops, i, oprnd)
fb85abff 2729 {
b0f64919 2730 /* For each operand we check if it has vectorized definitions in a child
2731 node or we need to create them (for invariants and constants). We
2732 check if the LHS of the first stmt of the next child matches OPRND.
2733 If it does, we found the correct child. Otherwise, we call
2734 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2735 to check this child node for the next operand. */
2736 vectorized_defs = false;
f1f41a6c 2737 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
fb85abff 2738 {
678e3d6e 2739 child = SLP_TREE_CHILDREN (slp_node)[child_index];
b0f64919 2740
a8bcfda2 2741 /* We have to check both pattern and original def, if available. */
2742 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2743 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
fb85abff 2744
a8bcfda2 2745 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2746 || (related
2747 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2748 {
2749 /* The number of vector defs is determined by the number of
2750 vector statements in the node from which we get those
b0f64919 2751 statements. */
a8bcfda2 2752 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2753 vectorized_defs = true;
b0f64919 2754 child_index++;
a8bcfda2 2755 }
b0f64919 2756 }
fb85abff 2757
b0f64919 2758 if (!vectorized_defs)
2759 {
2760 if (i == 0)
2761 {
2762 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2763 /* Number of vector stmts was calculated according to LHS in
2764 vect_schedule_slp_instance (), fix it by replacing LHS with
2765 RHS, if necessary. See vect_get_smallest_scalar_type () for
2766 details. */
2767 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2768 &rhs_size_unit);
2769 if (rhs_size_unit != lhs_size_unit)
2770 {
2771 number_of_vects *= rhs_size_unit;
2772 number_of_vects /= lhs_size_unit;
2773 }
2774 }
2775 }
eefa05c8 2776
b0f64919 2777 /* Allocate memory for vectorized defs. */
7f7695a7 2778 vec_defs = vNULL;
2779 vec_defs.create (number_of_vects);
fb85abff 2780
b0f64919 2781 /* For reduction defs we call vect_get_constant_vectors (), since we are
2782 looking for initial loop invariant values. */
2783 if (vectorized_defs && reduc_index == -1)
2784 /* The defs are already vectorized. */
7f7695a7 2785 vect_get_slp_vect_defs (child, &vec_defs);
b0f64919 2786 else
2787 /* Build vectors from scalar defs. */
7f7695a7 2788 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
b0f64919 2789 number_of_vects, reduc_index);
fb85abff 2790
7f7695a7 2791 vec_oprnds->quick_push (vec_defs);
fb85abff 2792
b0f64919 2793 /* For reductions, we only need initial values. */
2794 if (reduc_index != -1)
2795 return;
2796 }
fb85abff 2797}
2798
37545e54 2799
48e1416a 2800/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
fb85abff 2801 building a vector of type MASK_TYPE from it) and two input vectors placed in
2802 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2803 shifting by STRIDE elements of DR_CHAIN for every copy.
2804 (STRIDE is the number of vectorized stmts for NODE divided by the number of
48e1416a 2805 copies).
fb85abff 2806 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2807 the created stmts must be inserted. */
2808
2809static inline void
48e1416a 2810vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
afaef242 2811 tree mask, int first_vec_indx, int second_vec_indx,
48e1416a 2812 gimple_stmt_iterator *gsi, slp_tree node,
f1f41a6c 2813 tree vectype, vec<tree> dr_chain,
fb85abff 2814 int ncopies, int vect_stmts_counter)
2815{
afaef242 2816 tree perm_dest;
fb85abff 2817 gimple perm_stmt = NULL;
2818 stmt_vec_info next_stmt_info;
f018d957 2819 int i, stride;
fb85abff 2820 tree first_vec, second_vec, data_ref;
fb85abff 2821
fb85abff 2822 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
fb85abff 2823
48e1416a 2824 /* Initialize the vect stmts of NODE to properly insert the generated
fb85abff 2825 stmts later. */
f1f41a6c 2826 for (i = SLP_TREE_VEC_STMTS (node).length ();
fb85abff 2827 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
f1f41a6c 2828 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
fb85abff 2829
2830 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2831 for (i = 0; i < ncopies; i++)
2832 {
f1f41a6c 2833 first_vec = dr_chain[first_vec_indx];
2834 second_vec = dr_chain[second_vec_indx];
fb85abff 2835
fb85abff 2836 /* Generate the permute statement. */
446e85eb 2837 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2838 first_vec, second_vec, mask);
fb85abff 2839 data_ref = make_ssa_name (perm_dest, perm_stmt);
964ec9cd 2840 gimple_set_lhs (perm_stmt, data_ref);
fb85abff 2841 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 2842
48e1416a 2843 /* Store the vector statement in NODE. */
f1f41a6c 2844 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
fb85abff 2845
2846 first_vec_indx += stride;
2847 second_vec_indx += stride;
2848 }
2849
2850 /* Mark the scalar stmt as vectorized. */
2851 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2852 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2853}
2854
2855
48e1416a 2856/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
fb85abff 2857 return in CURRENT_MASK_ELEMENT its equivalent in target specific
282bf14c 2858 representation. Check that the mask is valid and return FALSE if not.
fb85abff 2859 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2860 the next vector, i.e., the current first vector is not needed. */
48e1416a 2861
fb85abff 2862static bool
48e1416a 2863vect_get_mask_element (gimple stmt, int first_mask_element, int m,
fb85abff 2864 int mask_nunits, bool only_one_vec, int index,
e21c468f 2865 unsigned char *mask, int *current_mask_element,
90674129 2866 bool *need_next_vector, int *number_of_mask_fixes,
2867 bool *mask_fixed, bool *needs_first_vector)
fb85abff 2868{
2869 int i;
fb85abff 2870
2871 /* Convert to target specific representation. */
2872 *current_mask_element = first_mask_element + m;
2873 /* Adjust the value in case it's a mask for second and third vectors. */
90674129 2874 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
fb85abff 2875
2876 if (*current_mask_element < mask_nunits)
90674129 2877 *needs_first_vector = true;
fb85abff 2878
2879 /* We have only one input vector to permute but the mask accesses values in
2880 the next vector as well. */
2881 if (only_one_vec && *current_mask_element >= mask_nunits)
2882 {
6d8fb6cf 2883 if (dump_enabled_p ())
fb85abff 2884 {
78bb46f5 2885 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2886 "permutation requires at least two vectors ");
2887 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2888 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2889 }
2890
2891 return false;
2892 }
2893
2894 /* The mask requires the next vector. */
e5cf270d 2895 while (*current_mask_element >= mask_nunits * 2)
fb85abff 2896 {
90674129 2897 if (*needs_first_vector || *mask_fixed)
fb85abff 2898 {
2899 /* We either need the first vector too or have already moved to the
48e1416a 2900 next vector. In both cases, this permutation needs three
fb85abff 2901 vectors. */
6d8fb6cf 2902 if (dump_enabled_p ())
fb85abff 2903 {
7bd765d4 2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2905 "permutation requires at "
2906 "least three vectors ");
2907 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2908 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2909 }
2910
2911 return false;
2912 }
2913
2914 /* We move to the next vector, dropping the first one and working with
2915 the second and the third - we need to adjust the values of the mask
2916 accordingly. */
90674129 2917 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
fb85abff 2918
2919 for (i = 0; i < index; i++)
90674129 2920 mask[i] -= mask_nunits * *number_of_mask_fixes;
fb85abff 2921
90674129 2922 (*number_of_mask_fixes)++;
2923 *mask_fixed = true;
fb85abff 2924 }
2925
90674129 2926 *need_next_vector = *mask_fixed;
fb85abff 2927
2928 /* This was the last element of this mask. Start a new one. */
2929 if (index == mask_nunits - 1)
2930 {
90674129 2931 *number_of_mask_fixes = 1;
2932 *mask_fixed = false;
2933 *needs_first_vector = false;
fb85abff 2934 }
2935
2936 return true;
2937}
2938
2939
2940/* Generate vector permute statements from a list of loads in DR_CHAIN.
2941 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
678e3d6e 2942 permute statements for the SLP node NODE of the SLP instance
2943 SLP_NODE_INSTANCE. */
2944
fb85abff 2945bool
678e3d6e 2946vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
fb85abff 2947 gimple_stmt_iterator *gsi, int vf,
2948 slp_instance slp_node_instance, bool analyze_only)
2949{
678e3d6e 2950 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 2951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2952 tree mask_element_type = NULL_TREE, mask_type;
964ec9cd 2953 int i, j, k, nunits, vec_index = 0, scalar_index;
964ec9cd 2954 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 2955 gimple next_scalar_stmt;
2956 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2957 int first_mask_element;
e21c468f 2958 int index, unroll_factor, current_mask_element, ncopies;
2959 unsigned char *mask;
fb85abff 2960 bool only_one_vec = false, need_next_vector = false;
2961 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
90674129 2962 int number_of_mask_fixes = 1;
2963 bool mask_fixed = false;
2964 bool needs_first_vector = false;
3754d046 2965 machine_mode mode;
fb85abff 2966
e21c468f 2967 mode = TYPE_MODE (vectype);
2968
2969 if (!can_vec_perm_p (mode, false, NULL))
fb85abff 2970 {
6d8fb6cf 2971 if (dump_enabled_p ())
fb85abff 2972 {
7bd765d4 2973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2974 "no vect permute for ");
2975 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2976 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2977 }
964ec9cd 2978 return false;
fb85abff 2979 }
2980
964ec9cd 2981 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2982 same size as the vector element being permuted. */
3cea8318 2983 mask_element_type = lang_hooks.types.type_for_mode
2984 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
fb85abff 2985 mask_type = get_vectype_for_scalar_type (mask_element_type);
fb85abff 2986 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e21c468f 2987 mask = XALLOCAVEC (unsigned char, nunits);
fb85abff 2988 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2989
2990 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2991 unrolling factor. */
48e1416a 2992 orig_vec_stmts_num = group_size *
fb85abff 2993 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2994 if (orig_vec_stmts_num == 1)
2995 only_one_vec = true;
2996
48e1416a 2997 /* Number of copies is determined by the final vectorization factor
fb85abff 2998 relatively to SLP_NODE_INSTANCE unrolling factor. */
48e1416a 2999 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
fb85abff 3000
678e3d6e 3001 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3002 return false;
3003
48e1416a 3004 /* Generate permutation masks for every NODE. Number of masks for each NODE
3005 is equal to GROUP_SIZE.
3006 E.g., we have a group of three nodes with three loads from the same
3007 location in each node, and the vector size is 4. I.e., we have a
3008 a0b0c0a1b1c1... sequence and we need to create the following vectors:
fb85abff 3009 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3010 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3011 ...
3012
964ec9cd 3013 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
48e1416a 3014 The last mask is illegal since we assume two operands for permute
282bf14c 3015 operation, and the mask element values can't be outside that range.
3016 Hence, the last mask must be converted into {2,5,5,5}.
48e1416a 3017 For the first two permutations we need the first and the second input
fb85abff 3018 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
48e1416a 3019 we need the second and the third vectors: {b1,c1,a2,b2} and
fb85abff 3020 {c2,a3,b3,c3}. */
3021
fb85abff 3022 {
3023 scalar_index = 0;
3024 index = 0;
3025 vect_stmts_counter = 0;
3026 vec_index = 0;
3027 first_vec_index = vec_index++;
3028 if (only_one_vec)
3029 second_vec_index = first_vec_index;
3030 else
3031 second_vec_index = vec_index++;
3032
3033 for (j = 0; j < unroll_factor; j++)
3034 {
3035 for (k = 0; k < group_size; k++)
3036 {
678e3d6e 3037 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
964ec9cd 3038 first_mask_element = i + j * group_size;
3039 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3040 nunits, only_one_vec, index,
3041 mask, &current_mask_element,
3042 &need_next_vector,
3043 &number_of_mask_fixes, &mask_fixed,
3044 &needs_first_vector))
3045 return false;
e5cf270d 3046 gcc_assert (current_mask_element < 2 * nunits);
964ec9cd 3047 mask[index++] = current_mask_element;
fb85abff 3048
964ec9cd 3049 if (index == nunits)
fb85abff 3050 {
678e3d6e 3051 index = 0;
e21c468f 3052 if (!can_vec_perm_p (mode, false, mask))
3053 {
6d8fb6cf 3054 if (dump_enabled_p ())
e21c468f 3055 {
7bd765d4 3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3057 vect_location,
3058 "unsupported vect permute { ");
e21c468f 3059 for (i = 0; i < nunits; ++i)
7bd765d4 3060 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3061 mask[i]);
3062 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
e21c468f 3063 }
3064 return false;
3065 }
3066
fb85abff 3067 if (!analyze_only)
3068 {
678e3d6e 3069 int l;
3070 tree mask_vec, *mask_elts;
3071 mask_elts = XALLOCAVEC (tree, nunits);
3072 for (l = 0; l < nunits; ++l)
3073 mask_elts[l] = build_int_cst (mask_element_type,
3074 mask[l]);
3075 mask_vec = build_vector (mask_type, mask_elts);
3076
3077 if (need_next_vector)
fb85abff 3078 {
3079 first_vec_index = second_vec_index;
3080 second_vec_index = vec_index;
3081 }
3082
f1f41a6c 3083 next_scalar_stmt
3084 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
fb85abff 3085
3086 vect_create_mask_and_perm (stmt, next_scalar_stmt,
afaef242 3087 mask_vec, first_vec_index, second_vec_index,
964ec9cd 3088 gsi, node, vectype, dr_chain,
afaef242 3089 ncopies, vect_stmts_counter++);
fb85abff 3090 }
48e1416a 3091 }
3092 }
3093 }
3094 }
fb85abff 3095
fb85abff 3096 return true;
3097}
3098
3099
3100
3101/* Vectorize SLP instance tree in postorder. */
3102
3103static bool
3104vect_schedule_slp_instance (slp_tree node, slp_instance instance,
37545e54 3105 unsigned int vectorization_factor)
fb85abff 3106{
3107 gimple stmt;
ee612634 3108 bool grouped_store, is_store;
fb85abff 3109 gimple_stmt_iterator si;
3110 stmt_vec_info stmt_info;
3111 unsigned int vec_stmts_size, nunits, group_size;
3112 tree vectype;
3113 int i;
40bcc7c2 3114 slp_tree child;
fb85abff 3115
3116 if (!node)
3117 return false;
3118
f1f41a6c 3119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3120 vect_schedule_slp_instance (child, instance, vectorization_factor);
48e1416a 3121
f1f41a6c 3122 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3123 stmt_info = vinfo_for_stmt (stmt);
3124
3125 /* VECTYPE is the type of the destination. */
b334cbba 3126 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3127 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3128 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3129
3130 /* For each SLP instance calculate number of vector stmts to be created
282bf14c 3131 for the scalar stmts in each node of the SLP tree. Number of vector
fb85abff 3132 elements in one vector iteration is the number of scalar elements in
3133 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3134 size. */
3135 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3136
f1f41a6c 3137 if (!SLP_TREE_VEC_STMTS (node).exists ())
fb85abff 3138 {
f1f41a6c 3139 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
fb85abff 3140 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3141 }
3142
6d8fb6cf 3143 if (dump_enabled_p ())
fb85abff 3144 {
7bd765d4 3145 dump_printf_loc (MSG_NOTE,vect_location,
3146 "------>vectorizing SLP node starting from: ");
3147 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3148 dump_printf (MSG_NOTE, "\n");
48e1416a 3149 }
fb85abff 3150
3151 /* Loads should be inserted before the first load. */
3152 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
ee612634 3153 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
24c271ef 3154 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
678e3d6e 3155 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
fb85abff 3156 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
cfdcf183 3157 else if (is_pattern_stmt_p (stmt_info))
24c271ef 3158 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
fb85abff 3159 else
3160 si = gsi_for_stmt (stmt);
48e1416a 3161
d4b21757 3162 /* Stores should be inserted just before the last store. */
ee612634 3163 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
d4b21757 3164 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3165 {
3166 gimple last_store = vect_find_last_store_in_slp_instance (instance);
0bc79e04 3167 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3168 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
d4b21757 3169 si = gsi_for_stmt (last_store);
3170 }
3171
39a5d6b1 3172 /* Mark the first element of the reduction chain as reduction to properly
3173 transform the node. In the analysis phase only the last element of the
3174 chain is marked as reduction. */
ee612634 3175 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
39a5d6b1 3176 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3177 {
3178 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3179 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3180 }
3181
ee612634 3182 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
eefa05c8 3183 return is_store;
fb85abff 3184}
3185
529fcc2a 3186/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3187 For loop vectorization this is done in vectorizable_call, but for SLP
3188 it needs to be deferred until end of vect_schedule_slp, because multiple
3189 SLP instances may refer to the same scalar stmt. */
3190
3191static void
3192vect_remove_slp_scalar_calls (slp_tree node)
3193{
3194 gimple stmt, new_stmt;
3195 gimple_stmt_iterator gsi;
3196 int i;
40bcc7c2 3197 slp_tree child;
529fcc2a 3198 tree lhs;
3199 stmt_vec_info stmt_info;
3200
3201 if (!node)
3202 return;
3203
f1f41a6c 3204 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3205 vect_remove_slp_scalar_calls (child);
529fcc2a 3206
f1f41a6c 3207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
529fcc2a 3208 {
3209 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3210 continue;
3211 stmt_info = vinfo_for_stmt (stmt);
3212 if (stmt_info == NULL
3213 || is_pattern_stmt_p (stmt_info)
3214 || !PURE_SLP_STMT (stmt_info))
3215 continue;
3216 lhs = gimple_call_lhs (stmt);
3217 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3218 set_vinfo_for_stmt (new_stmt, stmt_info);
3219 set_vinfo_for_stmt (stmt, NULL);
3220 STMT_VINFO_STMT (stmt_info) = new_stmt;
3221 gsi = gsi_for_stmt (stmt);
3222 gsi_replace (&gsi, new_stmt, false);
3223 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3224 }
3225}
fb85abff 3226
282bf14c 3227/* Generate vector code for all SLP instances in the loop/basic block. */
3228
fb85abff 3229bool
37545e54 3230vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 3231{
f1f41a6c 3232 vec<slp_instance> slp_instances;
fb85abff 3233 slp_instance instance;
678e3d6e 3234 unsigned int i, vf;
fb85abff 3235 bool is_store = false;
3236
37545e54 3237 if (loop_vinfo)
3238 {
3239 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3240 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
48e1416a 3241 }
37545e54 3242 else
3243 {
3244 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3245 vf = 1;
48e1416a 3246 }
37545e54 3247
f1f41a6c 3248 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 3249 {
3250 /* Schedule the tree of INSTANCE. */
3251 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
37545e54 3252 instance, vf);
6d8fb6cf 3253 if (dump_enabled_p ())
7bd765d4 3254 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3255 "vectorizing stmts using SLP.\n");
fb85abff 3256 }
3257
f1f41a6c 3258 FOR_EACH_VEC_ELT (slp_instances, i, instance)
eefa05c8 3259 {
3260 slp_tree root = SLP_INSTANCE_TREE (instance);
3261 gimple store;
3262 unsigned int j;
3263 gimple_stmt_iterator gsi;
3264
814c4f66 3265 /* Remove scalar call stmts. Do not do this for basic-block
3266 vectorization as not all uses may be vectorized.
3267 ??? Why should this be necessary? DCE should be able to
3268 remove the stmts itself.
3269 ??? For BB vectorization we can as well remove scalar
3270 stmts starting from the SLP tree root if they have no
3271 uses. */
3272 if (loop_vinfo)
3273 vect_remove_slp_scalar_calls (root);
529fcc2a 3274
f1f41a6c 3275 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
eefa05c8 3276 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3277 {
3278 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3279 break;
3280
0bc79e04 3281 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3282 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
eefa05c8 3283 /* Free the attached stmt_vec_info and remove the stmt. */
3284 gsi = gsi_for_stmt (store);
bc8a8451 3285 unlink_stmt_vdef (store);
eefa05c8 3286 gsi_remove (&gsi, true);
bc8a8451 3287 release_defs (store);
eefa05c8 3288 free_stmt_vec_info (store);
3289 }
3290 }
3291
fb85abff 3292 return is_store;
3293}
37545e54 3294
3295
3296/* Vectorize the basic block. */
3297
3298void
3299vect_slp_transform_bb (basic_block bb)
3300{
3301 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3302 gimple_stmt_iterator si;
3303
3304 gcc_assert (bb_vinfo);
3305
6d8fb6cf 3306 if (dump_enabled_p ())
7bd765d4 3307 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
37545e54 3308
3309 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3310 {
3311 gimple stmt = gsi_stmt (si);
3312 stmt_vec_info stmt_info;
3313
6d8fb6cf 3314 if (dump_enabled_p ())
37545e54 3315 {
7bd765d4 3316 dump_printf_loc (MSG_NOTE, vect_location,
3317 "------>SLPing statement: ");
3318 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3319 dump_printf (MSG_NOTE, "\n");
37545e54 3320 }
3321
3322 stmt_info = vinfo_for_stmt (stmt);
3323 gcc_assert (stmt_info);
3324
3325 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3326 if (STMT_SLP_TYPE (stmt_info))
3327 {
3328 vect_schedule_slp (NULL, bb_vinfo);
3329 break;
3330 }
3331 }
3332
6d8fb6cf 3333 if (dump_enabled_p ())
a21425b5 3334 dump_printf_loc (MSG_NOTE, vect_location,
b055bc88 3335 "BASIC BLOCK VECTORIZED\n");
37545e54 3336
765a637f 3337 destroy_bb_vec_info (bb_vinfo);
3338}