]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
* gimple-expr.h (create_tmp_var_raw, create_tmp_var,
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
fb85abff 1/* SLP - Basic Block Vectorization
3aea1f79 2 Copyright (C) 2007-2014 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
9ed99284 28#include "stor-layout.h"
fb85abff 29#include "target.h"
94ea8568 30#include "predict.h"
31#include "vec.h"
32#include "hashtab.h"
33#include "hash-set.h"
34#include "machmode.h"
35#include "hard-reg-set.h"
36#include "input.h"
37#include "function.h"
fb85abff 38#include "basic-block.h"
ce084dfc 39#include "gimple-pretty-print.h"
bc61cadb 40#include "tree-ssa-alias.h"
41#include "internal-fn.h"
42#include "gimple-expr.h"
43#include "is-a.h"
073c1fd5 44#include "gimple.h"
dcf1a1ec 45#include "gimple-iterator.h"
073c1fd5 46#include "gimple-ssa.h"
47#include "tree-phinodes.h"
48#include "ssa-iterators.h"
9ed99284 49#include "stringpool.h"
073c1fd5 50#include "tree-ssanames.h"
b9ed1410 51#include "tree-pass.h"
fb85abff 52#include "cfgloop.h"
fb85abff 53#include "expr.h"
b9ed1410 54#include "recog.h" /* FIXME: for insn_data */
34517c64 55#include "insn-codes.h"
fb85abff 56#include "optabs.h"
57#include "tree-vectorizer.h"
964ec9cd 58#include "langhooks.h"
041208c6 59#include "gimple-walk.h"
fb85abff 60
37545e54 61/* Extract the location of the basic block in the source code.
62 Return the basic block location if succeed and NULL if not. */
63
36f39b2e 64source_location
37545e54 65find_bb_location (basic_block bb)
66{
67 gimple stmt = NULL;
68 gimple_stmt_iterator si;
69
70 if (!bb)
36f39b2e 71 return UNKNOWN_LOCATION;
37545e54 72
73 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
74 {
75 stmt = gsi_stmt (si);
36f39b2e 76 if (gimple_location (stmt) != UNKNOWN_LOCATION)
37545e54 77 return gimple_location (stmt);
78 }
79
36f39b2e 80 return UNKNOWN_LOCATION;
37545e54 81}
82
83
fb85abff 84/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
85
86static void
87vect_free_slp_tree (slp_tree node)
88{
b0f64919 89 int i;
40bcc7c2 90 slp_tree child;
b0f64919 91
fb85abff 92 if (!node)
93 return;
94
f1f41a6c 95 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 96 vect_free_slp_tree (child);
48e1416a 97
f1f41a6c 98 SLP_TREE_CHILDREN (node).release ();
99 SLP_TREE_SCALAR_STMTS (node).release ();
100 SLP_TREE_VEC_STMTS (node).release ();
678e3d6e 101 SLP_TREE_LOAD_PERMUTATION (node).release ();
fb85abff 102
103 free (node);
104}
105
106
107/* Free the memory allocated for the SLP instance. */
108
109void
110vect_free_slp_instance (slp_instance instance)
111{
112 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
f1f41a6c 113 SLP_INSTANCE_LOADS (instance).release ();
114 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
9f793cdf 115 free (instance);
fb85abff 116}
117
118
b0f64919 119/* Create an SLP node for SCALAR_STMTS. */
120
121static slp_tree
f1f41a6c 122vect_create_new_slp_node (vec<gimple> scalar_stmts)
b0f64919 123{
3c7a3fca 124 slp_tree node;
f1f41a6c 125 gimple stmt = scalar_stmts[0];
b0f64919 126 unsigned int nops;
127
128 if (is_gimple_call (stmt))
129 nops = gimple_call_num_args (stmt);
130 else if (is_gimple_assign (stmt))
f2104a54 131 {
132 nops = gimple_num_ops (stmt) - 1;
133 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
134 nops++;
135 }
b0f64919 136 else
137 return NULL;
138
3c7a3fca 139 node = XNEW (struct _slp_tree);
b0f64919 140 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
f1f41a6c 141 SLP_TREE_VEC_STMTS (node).create (0);
142 SLP_TREE_CHILDREN (node).create (nops);
678e3d6e 143 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
b0f64919 144
145 return node;
146}
147
148
149/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
150 operand. */
f1f41a6c 151static vec<slp_oprnd_info>
b0f64919 152vect_create_oprnd_info (int nops, int group_size)
153{
154 int i;
155 slp_oprnd_info oprnd_info;
f1f41a6c 156 vec<slp_oprnd_info> oprnds_info;
b0f64919 157
f1f41a6c 158 oprnds_info.create (nops);
b0f64919 159 for (i = 0; i < nops; i++)
160 {
161 oprnd_info = XNEW (struct _slp_oprnd_info);
f1f41a6c 162 oprnd_info->def_stmts.create (group_size);
b0f64919 163 oprnd_info->first_dt = vect_uninitialized_def;
3533e58d 164 oprnd_info->first_op_type = NULL_TREE;
b0f64919 165 oprnd_info->first_pattern = false;
f1f41a6c 166 oprnds_info.quick_push (oprnd_info);
b0f64919 167 }
168
169 return oprnds_info;
170}
171
172
3c7a3fca 173/* Free operands info. */
174
b0f64919 175static void
f1f41a6c 176vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
b0f64919 177{
178 int i;
179 slp_oprnd_info oprnd_info;
180
f1f41a6c 181 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
3c7a3fca 182 {
f1f41a6c 183 oprnd_info->def_stmts.release ();
3c7a3fca 184 XDELETE (oprnd_info);
185 }
b0f64919 186
f1f41a6c 187 oprnds_info.release ();
b0f64919 188}
189
190
40bcc7c2 191/* Find the place of the data-ref in STMT in the interleaving chain that starts
192 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
193
194static int
195vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
196{
197 gimple next_stmt = first_stmt;
198 int result = 0;
199
200 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
201 return -1;
202
203 do
204 {
205 if (next_stmt == stmt)
206 return result;
207 result++;
208 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
209 }
210 while (next_stmt);
211
212 return -1;
213}
214
215
b0f64919 216/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
217 they are of a valid type and that they match the defs of the first stmt of
291e6474 218 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
219 return -1, if the error could be corrected by swapping operands of the
220 operation return 1, if everything is ok return 0. */
fb85abff 221
291e6474 222static int
37545e54 223vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ee6e9ba5 224 gimple stmt, bool first,
225 vec<slp_oprnd_info> *oprnds_info)
fb85abff 226{
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
0eba4748 229 tree def;
fb85abff 230 gimple def_stmt;
b0f64919 231 enum vect_def_type dt = vect_uninitialized_def;
37545e54 232 struct loop *loop = NULL;
b0f64919 233 bool pattern = false;
0eba4748 234 slp_oprnd_info oprnd_info;
291e6474 235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
48e1416a 238
37545e54 239 if (loop_vinfo)
240 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 241
b0f64919 242 if (is_gimple_call (stmt))
b57a47d0 243 {
244 number_of_oprnds = gimple_call_num_args (stmt);
291e6474 245 first_op_idx = 3;
b57a47d0 246 }
f2104a54 247 else if (is_gimple_assign (stmt))
248 {
291e6474 249 enum tree_code code = gimple_assign_rhs_code (stmt);
f2104a54 250 number_of_oprnds = gimple_num_ops (stmt) - 1;
251 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
291e6474 252 {
253 first_op_cond = true;
254 commutative = true;
255 number_of_oprnds++;
256 }
257 else
258 commutative = commutative_tree_code (code);
f2104a54 259 }
b0f64919 260 else
291e6474 261 return -1;
fb85abff 262
291e6474 263 bool swapped = false;
fb85abff 264 for (i = 0; i < number_of_oprnds; i++)
265 {
291e6474 266again:
267 if (first_op_cond)
f2104a54 268 {
291e6474 269 if (i == 0 || i == 1)
270 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
271 swapped ? !i : i);
272 else
273 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f2104a54 274 }
275 else
291e6474 276 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f2104a54 277
f1f41a6c 278 oprnd_info = (*oprnds_info)[i];
fb85abff 279
bed8b93b 280 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
281 &def, &dt)
b0f64919 282 || (!def_stmt && dt != vect_constant_def))
fb85abff 283 {
6d8fb6cf 284 if (dump_enabled_p ())
fb85abff 285 {
7bd765d4 286 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
287 "Build SLP failed: can't find def for ");
288 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 289 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 290 }
291
291e6474 292 return -1;
fb85abff 293 }
294
37545e54 295 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
282bf14c 296 from the pattern. Check that all the stmts of the node are in the
fb85abff 297 pattern. */
4c0c783a 298 if (def_stmt && gimple_bb (def_stmt)
299 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
300 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
301 && gimple_code (def_stmt) != GIMPLE_PHI))
fb85abff 302 && vinfo_for_stmt (def_stmt)
8bf58742 303 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
4c0c783a 304 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
305 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
fb85abff 306 {
b0f64919 307 pattern = true;
308 if (!first && !oprnd_info->first_pattern)
309 {
291e6474 310 if (i == 0
311 && !swapped
312 && commutative)
313 {
314 swapped = true;
315 goto again;
316 }
317
6d8fb6cf 318 if (dump_enabled_p ())
b0f64919 319 {
7bd765d4 320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
321 "Build SLP failed: some of the stmts"
322 " are in a pattern, and others are not ");
323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 324 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b0f64919 325 }
fb85abff 326
291e6474 327 return 1;
fb85abff 328 }
329
330 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
b0f64919 331 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
fb85abff 332
f2104a54 333 if (dt == vect_unknown_def_type)
fb85abff 334 {
6d8fb6cf 335 if (dump_enabled_p ())
7bd765d4 336 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 337 "Unsupported pattern.\n");
291e6474 338 return -1;
fb85abff 339 }
340
341 switch (gimple_code (def_stmt))
342 {
343 case GIMPLE_PHI:
b0f64919 344 def = gimple_phi_result (def_stmt);
fb85abff 345 break;
346
347 case GIMPLE_ASSIGN:
b0f64919 348 def = gimple_assign_lhs (def_stmt);
fb85abff 349 break;
350
351 default:
6d8fb6cf 352 if (dump_enabled_p ())
7bd765d4 353 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 354 "unsupported defining stmt:\n");
291e6474 355 return -1;
fb85abff 356 }
357 }
358
b0f64919 359 if (first)
fb85abff 360 {
b0f64919 361 oprnd_info->first_dt = dt;
362 oprnd_info->first_pattern = pattern;
3533e58d 363 oprnd_info->first_op_type = TREE_TYPE (oprnd);
fb85abff 364 }
fb85abff 365 else
366 {
b0f64919 367 /* Not first stmt of the group, check that the def-stmt/s match
368 the def-stmt/s of the first stmt. Allow different definition
369 types for reduction chains: the first stmt must be a
370 vect_reduction_def (a phi node), and the rest
371 vect_internal_def. */
372 if (((oprnd_info->first_dt != dt
373 && !(oprnd_info->first_dt == vect_reduction_def
3533e58d 374 && dt == vect_internal_def)
375 && !((oprnd_info->first_dt == vect_external_def
376 || oprnd_info->first_dt == vect_constant_def)
377 && (dt == vect_external_def
378 || dt == vect_constant_def)))
379 || !types_compatible_p (oprnd_info->first_op_type,
380 TREE_TYPE (oprnd))))
fb85abff 381 {
291e6474 382 /* Try swapping operands if we got a mismatch. */
383 if (i == 0
384 && !swapped
385 && commutative)
386 {
387 swapped = true;
388 goto again;
389 }
390
0eba4748 391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 393 "Build SLP failed: different types\n");
b0f64919 394
291e6474 395 return 1;
fb85abff 396 }
397 }
398
399 /* Check the types of the definitions. */
b0f64919 400 switch (dt)
fb85abff 401 {
402 case vect_constant_def:
f083cd24 403 case vect_external_def:
b0f64919 404 case vect_reduction_def:
fb85abff 405 break;
48e1416a 406
f083cd24 407 case vect_internal_def:
0eba4748 408 oprnd_info->def_stmts.quick_push (def_stmt);
fb85abff 409 break;
410
411 default:
412 /* FORNOW: Not supported. */
6d8fb6cf 413 if (dump_enabled_p ())
fb85abff 414 {
7bd765d4 415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
416 "Build SLP failed: illegal type of def ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
78bb46f5 418 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 419 }
420
291e6474 421 return -1;
fb85abff 422 }
423 }
424
291e6474 425 /* Swap operands. */
426 if (swapped)
427 {
428 if (first_op_cond)
429 {
430 tree cond = gimple_assign_rhs1 (stmt);
431 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
432 &TREE_OPERAND (cond, 1));
433 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
434 }
435 else
436 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
437 gimple_assign_rhs2_ptr (stmt));
438 }
439
440 return 0;
fb85abff 441}
442
443
16513aa4 444/* Verify if the scalar stmts STMTS are isomorphic, require data
445 permutation or are of unsupported types of operation. Return
446 true if they are, otherwise return false and indicate in *MATCHES
447 which stmts are not isomorphic to the first one. If MATCHES[0]
448 is false then this indicates the comparison could not be
449 carried out or the stmts will never be vectorized by SLP. */
fb85abff 450
451static bool
16513aa4 452vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
453 vec<gimple> stmts, unsigned int group_size,
454 unsigned nops, unsigned int *max_nunits,
455 unsigned int vectorization_factor, bool *matches)
fb85abff 456{
fb85abff 457 unsigned int i;
f1f41a6c 458 gimple stmt = stmts[0];
a2e70a99 459 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f2104a54 460 enum tree_code first_cond_code = ERROR_MARK;
fb85abff 461 tree lhs;
16513aa4 462 bool need_same_oprnds = false;
fb85abff 463 tree vectype, scalar_type, first_op1 = NULL_TREE;
fb85abff 464 optab optab;
465 int icode;
3754d046 466 machine_mode optab_op2_mode;
467 machine_mode vec_mode;
fb85abff 468 struct data_reference *first_dr;
fb85abff 469 HOST_WIDE_INT dummy;
4db2b577 470 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
f2104a54 471 tree cond;
b0f64919 472
fb85abff 473 /* For every stmt in NODE find its def stmt/s. */
f1f41a6c 474 FOR_EACH_VEC_ELT (stmts, i, stmt)
fb85abff 475 {
16513aa4 476 matches[i] = false;
477
6d8fb6cf 478 if (dump_enabled_p ())
fb85abff 479 {
7bd765d4 480 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
481 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 482 dump_printf (MSG_NOTE, "\n");
fb85abff 483 }
484
6ea6a380 485 /* Fail to vectorize statements marked as unvectorizable. */
486 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
487 {
6d8fb6cf 488 if (dump_enabled_p ())
6ea6a380 489 {
7bd765d4 490 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
491 "Build SLP failed: unvectorizable statement ");
492 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 493 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
6ea6a380 494 }
16513aa4 495 /* Fatal mismatch. */
496 matches[0] = false;
6ea6a380 497 return false;
498 }
499
fb85abff 500 lhs = gimple_get_lhs (stmt);
501 if (lhs == NULL_TREE)
502 {
6d8fb6cf 503 if (dump_enabled_p ())
fb85abff 504 {
7bd765d4 505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
506 "Build SLP failed: not GIMPLE_ASSIGN nor "
507 "GIMPLE_CALL ");
508 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 509 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 510 }
16513aa4 511 /* Fatal mismatch. */
512 matches[0] = false;
fb85abff 513 return false;
514 }
515
f2104a54 516 if (is_gimple_assign (stmt)
517 && gimple_assign_rhs_code (stmt) == COND_EXPR
518 && (cond = gimple_assign_rhs1 (stmt))
519 && !COMPARISON_CLASS_P (cond))
520 {
6d8fb6cf 521 if (dump_enabled_p ())
f2104a54 522 {
7bd765d4 523 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
524 "Build SLP failed: condition is not "
525 "comparison ");
526 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 527 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 528 }
16513aa4 529 /* Fatal mismatch. */
530 matches[0] = false;
f2104a54 531 return false;
532 }
533
48e1416a 534 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
fb85abff 535 vectype = get_vectype_for_scalar_type (scalar_type);
536 if (!vectype)
537 {
6d8fb6cf 538 if (dump_enabled_p ())
fb85abff 539 {
7bd765d4 540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
541 "Build SLP failed: unsupported data-type ");
542 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
543 scalar_type);
78bb46f5 544 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 545 }
16513aa4 546 /* Fatal mismatch. */
547 matches[0] = false;
fb85abff 548 return false;
549 }
48e1416a 550
03dab834 551 /* In case of multiple types we need to detect the smallest type. */
552 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
37545e54 553 {
03dab834 554 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
555 if (bb_vinfo)
556 vectorization_factor = *max_nunits;
37545e54 557 }
48e1416a 558
1a91d914 559 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
b57a47d0 560 {
561 rhs_code = CALL_EXPR;
1a91d914 562 if (gimple_call_internal_p (call_stmt)
563 || gimple_call_tail_p (call_stmt)
564 || gimple_call_noreturn_p (call_stmt)
565 || !gimple_call_nothrow_p (call_stmt)
566 || gimple_call_chain (call_stmt))
b57a47d0 567 {
6d8fb6cf 568 if (dump_enabled_p ())
b57a47d0 569 {
7bd765d4 570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
571 "Build SLP failed: unsupported call type ");
1a91d914 572 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
573 call_stmt, 0);
78bb46f5 574 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 575 }
16513aa4 576 /* Fatal mismatch. */
577 matches[0] = false;
b57a47d0 578 return false;
579 }
580 }
fb85abff 581 else
582 rhs_code = gimple_assign_rhs_code (stmt);
583
584 /* Check the operation. */
585 if (i == 0)
586 {
587 first_stmt_code = rhs_code;
588
48e1416a 589 /* Shift arguments should be equal in all the packed stmts for a
fb85abff 590 vector shift with scalar shift operand. */
591 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
592 || rhs_code == LROTATE_EXPR
593 || rhs_code == RROTATE_EXPR)
594 {
595 vec_mode = TYPE_MODE (vectype);
596
597 /* First see if we have a vector/vector shift. */
598 optab = optab_for_tree_code (rhs_code, vectype,
599 optab_vector);
600
601 if (!optab
d6bf3b14 602 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
fb85abff 603 {
604 /* No vector/vector shift, try for a vector/scalar shift. */
605 optab = optab_for_tree_code (rhs_code, vectype,
606 optab_scalar);
607
608 if (!optab)
609 {
6d8fb6cf 610 if (dump_enabled_p ())
7bd765d4 611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 612 "Build SLP failed: no optab.\n");
16513aa4 613 /* Fatal mismatch. */
614 matches[0] = false;
fb85abff 615 return false;
616 }
d6bf3b14 617 icode = (int) optab_handler (optab, vec_mode);
fb85abff 618 if (icode == CODE_FOR_nothing)
619 {
6d8fb6cf 620 if (dump_enabled_p ())
7bd765d4 621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
622 "Build SLP failed: "
78bb46f5 623 "op not supported by target.\n");
16513aa4 624 /* Fatal mismatch. */
625 matches[0] = false;
fb85abff 626 return false;
627 }
628 optab_op2_mode = insn_data[icode].operand[2].mode;
629 if (!VECTOR_MODE_P (optab_op2_mode))
630 {
631 need_same_oprnds = true;
632 first_op1 = gimple_assign_rhs2 (stmt);
633 }
634 }
635 }
6083c152 636 else if (rhs_code == WIDEN_LSHIFT_EXPR)
637 {
638 need_same_oprnds = true;
639 first_op1 = gimple_assign_rhs2 (stmt);
640 }
fb85abff 641 }
642 else
643 {
644 if (first_stmt_code != rhs_code
645 && (first_stmt_code != IMAGPART_EXPR
646 || rhs_code != REALPART_EXPR)
647 && (first_stmt_code != REALPART_EXPR
ad074595 648 || rhs_code != IMAGPART_EXPR)
ee612634 649 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
ad074595 650 && (first_stmt_code == ARRAY_REF
19bacd59 651 || first_stmt_code == BIT_FIELD_REF
ad074595 652 || first_stmt_code == INDIRECT_REF
653 || first_stmt_code == COMPONENT_REF
654 || first_stmt_code == MEM_REF)))
fb85abff 655 {
6d8fb6cf 656 if (dump_enabled_p ())
fb85abff 657 {
7bd765d4 658 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
659 "Build SLP failed: different operation "
660 "in stmt ");
661 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 662 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 663 }
16513aa4 664 /* Mismatch. */
665 continue;
fb85abff 666 }
48e1416a 667
668 if (need_same_oprnds
fb85abff 669 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
670 {
6d8fb6cf 671 if (dump_enabled_p ())
fb85abff 672 {
7bd765d4 673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
674 "Build SLP failed: different shift "
675 "arguments in ");
676 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 677 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 678 }
16513aa4 679 /* Mismatch. */
680 continue;
fb85abff 681 }
b57a47d0 682
683 if (rhs_code == CALL_EXPR)
684 {
f1f41a6c 685 gimple first_stmt = stmts[0];
b57a47d0 686 if (gimple_call_num_args (stmt) != nops
687 || !operand_equal_p (gimple_call_fn (first_stmt),
688 gimple_call_fn (stmt), 0)
689 || gimple_call_fntype (first_stmt)
690 != gimple_call_fntype (stmt))
691 {
6d8fb6cf 692 if (dump_enabled_p ())
b57a47d0 693 {
7bd765d4 694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695 "Build SLP failed: different calls in ");
696 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
697 stmt, 0);
78bb46f5 698 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 699 }
16513aa4 700 /* Mismatch. */
701 continue;
b57a47d0 702 }
703 }
fb85abff 704 }
705
ee612634 706 /* Grouped store or load. */
707 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
fb85abff 708 {
709 if (REFERENCE_CLASS_P (lhs))
710 {
711 /* Store. */
16513aa4 712 ;
fb85abff 713 }
eefa05c8 714 else
715 {
716 /* Load. */
f77a1eec 717 unsigned unrolling_factor
718 = least_common_multiple
719 (*max_nunits, group_size) / group_size;
8f3e4987 720 /* FORNOW: Check that there is no gap between the loads
721 and no gap between the groups when we need to load
722 multiple groups at once.
723 ??? We should enhance this to only disallow gaps
724 inside vectors. */
f77a1eec 725 if ((unrolling_factor > 1
8f3e4987 726 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
727 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
728 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
729 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
eefa05c8 730 {
6d8fb6cf 731 if (dump_enabled_p ())
eefa05c8 732 {
7bd765d4 733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
734 "Build SLP failed: grouped "
735 "loads have gaps ");
736 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
737 stmt, 0);
78bb46f5 738 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 739 }
16513aa4 740 /* Fatal mismatch. */
741 matches[0] = false;
eefa05c8 742 return false;
743 }
792672f0 744
eefa05c8 745 /* Check that the size of interleaved loads group is not
746 greater than the SLP group size. */
f77a1eec 747 unsigned ncopies
748 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
24c271ef 749 if (loop_vinfo
8f3e4987 750 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
751 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
752 - GROUP_GAP (vinfo_for_stmt (stmt)))
753 > ncopies * group_size))
eefa05c8 754 {
6d8fb6cf 755 if (dump_enabled_p ())
eefa05c8 756 {
7bd765d4 757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: the number "
759 "of interleaved loads is greater than "
760 "the SLP group size ");
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
762 stmt, 0);
78bb46f5 763 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 764 }
16513aa4 765 /* Fatal mismatch. */
766 matches[0] = false;
eefa05c8 767 return false;
768 }
769
4db2b577 770 old_first_load = first_load;
21009880 771 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
eefa05c8 772 if (prev_first_load)
773 {
774 /* Check that there are no loads from different interleaving
16513aa4 775 chains in the same node. */
776 if (prev_first_load != first_load)
7bd765d4 777 {
6d8fb6cf 778 if (dump_enabled_p ())
eefa05c8 779 {
7bd765d4 780 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
781 vect_location,
782 "Build SLP failed: different "
783 "interleaving chains in one node ");
784 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
785 stmt, 0);
78bb46f5 786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 787 }
16513aa4 788 /* Mismatch. */
789 continue;
eefa05c8 790 }
791 }
792 else
793 prev_first_load = first_load;
48e1416a 794
4db2b577 795 /* In some cases a group of loads is just the same load
796 repeated N times. Only analyze its cost once. */
797 if (first_load == stmt && old_first_load != first_load)
fb85abff 798 {
799 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
0822b158 800 if (vect_supportable_dr_alignment (first_dr, false)
fb85abff 801 == dr_unaligned_unsupported)
802 {
6d8fb6cf 803 if (dump_enabled_p ())
fb85abff 804 {
7bd765d4 805 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
806 vect_location,
807 "Build SLP failed: unsupported "
808 "unaligned load ");
809 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
810 stmt, 0);
78bb46f5 811 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 812 }
16513aa4 813 /* Fatal mismatch. */
814 matches[0] = false;
fb85abff 815 return false;
816 }
fb85abff 817 }
fb85abff 818 }
ee612634 819 } /* Grouped access. */
fb85abff 820 else
821 {
822 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
823 {
ee612634 824 /* Not grouped load. */
6d8fb6cf 825 if (dump_enabled_p ())
fb85abff 826 {
7bd765d4 827 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
828 "Build SLP failed: not grouped load ");
829 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 830 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 831 }
832
ee612634 833 /* FORNOW: Not grouped loads are not supported. */
16513aa4 834 /* Fatal mismatch. */
835 matches[0] = false;
fb85abff 836 return false;
837 }
838
839 /* Not memory operation. */
840 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f2104a54 841 && TREE_CODE_CLASS (rhs_code) != tcc_unary
b57a47d0 842 && rhs_code != COND_EXPR
843 && rhs_code != CALL_EXPR)
fb85abff 844 {
6d8fb6cf 845 if (dump_enabled_p ())
fb85abff 846 {
7bd765d4 847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
848 "Build SLP failed: operation");
849 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
850 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 851 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 852 }
16513aa4 853 /* Fatal mismatch. */
854 matches[0] = false;
fb85abff 855 return false;
856 }
857
f2104a54 858 if (rhs_code == COND_EXPR)
859 {
860 tree cond_expr = gimple_assign_rhs1 (stmt);
861
862 if (i == 0)
863 first_cond_code = TREE_CODE (cond_expr);
864 else if (first_cond_code != TREE_CODE (cond_expr))
865 {
6d8fb6cf 866 if (dump_enabled_p ())
f2104a54 867 {
7bd765d4 868 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
869 "Build SLP failed: different"
870 " operation");
871 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
872 stmt, 0);
78bb46f5 873 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 874 }
16513aa4 875 /* Mismatch. */
876 continue;
f2104a54 877 }
878 }
fb85abff 879 }
16513aa4 880
881 matches[i] = true;
882 }
883
884 for (i = 0; i < group_size; ++i)
885 if (!matches[i])
886 return false;
887
888 return true;
889}
890
891/* Recursively build an SLP tree starting from NODE.
892 Fail (and return a value not equal to zero) if def-stmts are not
893 isomorphic, require data permutation or are of unsupported types of
894 operation. Otherwise, return 0.
895 The value returned is the depth in the SLP tree where a mismatch
896 was found. */
897
898static bool
899vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
900 slp_tree *node, unsigned int group_size,
901 unsigned int *max_nunits,
902 vec<slp_tree> *loads,
903 unsigned int vectorization_factor,
ca91d3f8 904 bool *matches, unsigned *npermutes, unsigned *tree_size,
905 unsigned max_tree_size)
16513aa4 906{
ca91d3f8 907 unsigned nops, i, this_npermutes = 0, this_tree_size = 0;
16513aa4 908 gimple stmt;
909
910 if (!matches)
911 matches = XALLOCAVEC (bool, group_size);
912 if (!npermutes)
913 npermutes = &this_npermutes;
914
915 matches[0] = false;
916
917 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
918 if (is_gimple_call (stmt))
919 nops = gimple_call_num_args (stmt);
920 else if (is_gimple_assign (stmt))
921 {
922 nops = gimple_num_ops (stmt) - 1;
923 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
924 nops++;
fb85abff 925 }
16513aa4 926 else
927 return false;
928
929 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
930 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
931 max_nunits, vectorization_factor, matches))
932 return false;
fb85abff 933
16513aa4 934 /* If the SLP node is a load, terminate the recursion. */
935 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
936 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
fb85abff 937 {
f1f41a6c 938 loads->safe_push (*node);
fb85abff 939 return true;
940 }
941
16513aa4 942 /* Get at the operands, verifying they are compatible. */
943 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
944 slp_oprnd_info oprnd_info;
945 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
946 {
291e6474 947 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
948 stmt, (i == 0), &oprnds_info))
16513aa4 949 {
291e6474 950 case 0:
951 break;
952 case -1:
953 matches[0] = false;
16513aa4 954 vect_free_oprnd_info (oprnds_info);
955 return false;
291e6474 956 case 1:
957 matches[i] = false;
958 break;
16513aa4 959 }
960 }
291e6474 961 for (i = 0; i < group_size; ++i)
962 if (!matches[i])
963 {
964 vect_free_oprnd_info (oprnds_info);
965 return false;
966 }
16513aa4 967
968 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
969
48e1416a 970 /* Create SLP_TREE nodes for the definition node/s. */
f1f41a6c 971 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
fb85abff 972 {
b0f64919 973 slp_tree child;
16513aa4 974 unsigned old_nloads = loads->length ();
975 unsigned old_max_nunits = *max_nunits;
48e1416a 976
b0f64919 977 if (oprnd_info->first_dt != vect_internal_def)
978 continue;
fb85abff 979
ca91d3f8 980 if (++this_tree_size > max_tree_size)
981 {
982 vect_free_oprnd_info (oprnds_info);
983 return false;
984 }
985
b0f64919 986 child = vect_create_new_slp_node (oprnd_info->def_stmts);
16513aa4 987 if (!child)
988 {
f1f41a6c 989 vect_free_oprnd_info (oprnds_info);
16513aa4 990 return false;
b0f64919 991 }
48e1416a 992
16513aa4 993 bool *matches = XALLOCAVEC (bool, group_size);
994 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
995 group_size, max_nunits, loads,
ca91d3f8 996 vectorization_factor, matches,
997 npermutes, &this_tree_size, max_tree_size))
16513aa4 998 {
999 oprnd_info->def_stmts = vNULL;
1000 SLP_TREE_CHILDREN (*node).quick_push (child);
1001 continue;
1002 }
1003
1004 /* If the SLP build for operand zero failed and operand zero
1005 and one can be commutated try that for the scalar stmts
1006 that failed the match. */
1007 if (i == 0
1008 /* A first scalar stmt mismatch signals a fatal mismatch. */
1009 && matches[0]
1010 /* ??? For COND_EXPRs we can swap the comparison operands
1011 as well as the arms under some constraints. */
1012 && nops == 2
1013 && oprnds_info[1]->first_dt == vect_internal_def
1014 && is_gimple_assign (stmt)
1015 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1016 /* Do so only if the number of not successful permutes was nor more
1017 than a cut-ff as re-trying the recursive match on
1018 possibly each level of the tree would expose exponential
1019 behavior. */
1020 && *npermutes < 4)
1021 {
1022 /* Roll back. */
1023 *max_nunits = old_max_nunits;
1024 loads->truncate (old_nloads);
1025 /* Swap mismatched definition stmts. */
291e6474 1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "Re-trying with swapped operands of stmts ");
16513aa4 1028 for (unsigned j = 0; j < group_size; ++j)
1029 if (!matches[j])
1030 {
1031 gimple tem = oprnds_info[0]->def_stmts[j];
1032 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1033 oprnds_info[1]->def_stmts[j] = tem;
291e6474 1034 dump_printf (MSG_NOTE, "%d ", j);
16513aa4 1035 }
291e6474 1036 dump_printf (MSG_NOTE, "\n");
16513aa4 1037 /* And try again ... */
1038 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1039 group_size, max_nunits, loads,
1040 vectorization_factor,
ca91d3f8 1041 matches, npermutes, &this_tree_size,
1042 max_tree_size))
16513aa4 1043 {
1044 oprnd_info->def_stmts = vNULL;
1045 SLP_TREE_CHILDREN (*node).quick_push (child);
1046 continue;
1047 }
1048
1049 ++*npermutes;
1050 }
1051
1052 oprnd_info->def_stmts = vNULL;
1053 vect_free_slp_tree (child);
1054 vect_free_oprnd_info (oprnds_info);
1055 return false;
fb85abff 1056 }
1057
ca91d3f8 1058 if (tree_size)
1059 *tree_size += this_tree_size;
1060
f1f41a6c 1061 vect_free_oprnd_info (oprnds_info);
fb85abff 1062 return true;
1063}
1064
7bd765d4 1065/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
fb85abff 1066
1067static void
7bd765d4 1068vect_print_slp_tree (int dump_kind, slp_tree node)
fb85abff 1069{
1070 int i;
1071 gimple stmt;
40bcc7c2 1072 slp_tree child;
fb85abff 1073
1074 if (!node)
1075 return;
1076
7bd765d4 1077 dump_printf (dump_kind, "node ");
f1f41a6c 1078 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1079 {
7bd765d4 1080 dump_printf (dump_kind, "\n\tstmt %d ", i);
1081 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
fb85abff 1082 }
7bd765d4 1083 dump_printf (dump_kind, "\n");
fb85abff 1084
f1f41a6c 1085 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1086 vect_print_slp_tree (dump_kind, child);
fb85abff 1087}
1088
1089
48e1416a 1090/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1091 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
282bf14c 1092 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
fb85abff 1093 stmts in NODE are to be marked. */
1094
1095static void
1096vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1097{
1098 int i;
1099 gimple stmt;
40bcc7c2 1100 slp_tree child;
fb85abff 1101
1102 if (!node)
1103 return;
1104
f1f41a6c 1105 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1106 if (j < 0 || i == j)
1107 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1108
f1f41a6c 1109 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1110 vect_mark_slp_stmts (child, mark, j);
fb85abff 1111}
1112
1113
37545e54 1114/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1115
1116static void
1117vect_mark_slp_stmts_relevant (slp_tree node)
1118{
1119 int i;
1120 gimple stmt;
1121 stmt_vec_info stmt_info;
40bcc7c2 1122 slp_tree child;
37545e54 1123
1124 if (!node)
1125 return;
1126
f1f41a6c 1127 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1128 {
1129 stmt_info = vinfo_for_stmt (stmt);
48e1416a 1130 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
37545e54 1131 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1132 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1133 }
1134
f1f41a6c 1135 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1136 vect_mark_slp_stmts_relevant (child);
37545e54 1137}
1138
1139
eefa05c8 1140/* Rearrange the statements of NODE according to PERMUTATION. */
1141
1142static void
1143vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
678e3d6e 1144 vec<unsigned> permutation)
eefa05c8 1145{
1146 gimple stmt;
f1f41a6c 1147 vec<gimple> tmp_stmts;
40bcc7c2 1148 unsigned int i;
1149 slp_tree child;
eefa05c8 1150
f1f41a6c 1151 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1152 vect_slp_rearrange_stmts (child, group_size, permutation);
eefa05c8 1153
f1f41a6c 1154 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1155 tmp_stmts.create (group_size);
40bcc7c2 1156 tmp_stmts.quick_grow_cleared (group_size);
eefa05c8 1157
f1f41a6c 1158 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
40bcc7c2 1159 tmp_stmts[permutation[i]] = stmt;
eefa05c8 1160
f1f41a6c 1161 SLP_TREE_SCALAR_STMTS (node).release ();
eefa05c8 1162 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1163}
1164
1165
678e3d6e 1166/* Check if the required load permutations in the SLP instance
1167 SLP_INSTN are supported. */
fb85abff 1168
1169static bool
678e3d6e 1170vect_supported_load_permutation_p (slp_instance slp_instn)
fb85abff 1171{
678e3d6e 1172 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1173 unsigned int i, j, k, next;
e8332698 1174 sbitmap load_index;
16513aa4 1175 slp_tree node;
1176 gimple stmt, load, next_load, first_load;
24c271ef 1177 struct data_reference *dr;
fb85abff 1178
6d8fb6cf 1179 if (dump_enabled_p ())
fb85abff 1180 {
7bd765d4 1181 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
678e3d6e 1182 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1183 if (node->load_permutation.exists ())
1184 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1185 dump_printf (MSG_NOTE, "%d ", next);
1186 else
9cf883aa 1187 for (k = 0; k < group_size; ++k)
1188 dump_printf (MSG_NOTE, "%d ", k);
78bb46f5 1189 dump_printf (MSG_NOTE, "\n");
fb85abff 1190 }
1191
eefa05c8 1192 /* In case of reduction every load permutation is allowed, since the order
1193 of the reduction statements is not important (as opposed to the case of
ee612634 1194 grouped stores). The only condition we need to check is that all the
eefa05c8 1195 load nodes are of the same size and have the same permutation (and then
1196 rearrange all the nodes of the SLP instance according to this
1197 permutation). */
1198
1199 /* Check that all the load nodes are of the same size. */
678e3d6e 1200 /* ??? Can't we assert this? */
f1f41a6c 1201 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
16513aa4 1202 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1203 return false;
a2e70a99 1204
eefa05c8 1205 node = SLP_INSTANCE_TREE (slp_instn);
f1f41a6c 1206 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
eefa05c8 1207
39a5d6b1 1208 /* Reduction (there are no data-refs in the root).
1209 In reduction chain the order of the loads is important. */
1210 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1211 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1212 {
678e3d6e 1213 slp_tree load;
1214 unsigned int lidx;
eefa05c8 1215
678e3d6e 1216 /* Compare all the permutation sequences to the first one. We know
1217 that at least one load is permuted. */
1218 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1219 if (!node->load_permutation.exists ())
1220 return false;
1221 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1222 {
1223 if (!load->load_permutation.exists ())
1224 return false;
1225 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1226 if (lidx != node->load_permutation[j])
1227 return false;
1228 }
0ede2ac4 1229
678e3d6e 1230 /* Check that the loads in the first sequence are different and there
1231 are no gaps between them. */
1232 load_index = sbitmap_alloc (group_size);
1233 bitmap_clear (load_index);
1234 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1235 {
1236 if (bitmap_bit_p (load_index, lidx))
1237 {
1238 sbitmap_free (load_index);
1239 return false;
1240 }
1241 bitmap_set_bit (load_index, lidx);
1242 }
1243 for (i = 0; i < group_size; i++)
1244 if (!bitmap_bit_p (load_index, i))
1245 {
1246 sbitmap_free (load_index);
1247 return false;
1248 }
1249 sbitmap_free (load_index);
1250
1251 /* This permutation is valid for reduction. Since the order of the
1252 statements in the nodes is not important unless they are memory
1253 accesses, we can rearrange the statements in all the nodes
1254 according to the order of the loads. */
1255 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1256 node->load_permutation);
1257
1258 /* We are done, no actual permutations need to be generated. */
1259 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1260 SLP_TREE_LOAD_PERMUTATION (node).release ();
1261 return true;
eefa05c8 1262 }
1263
24c271ef 1264 /* In basic block vectorization we allow any subchain of an interleaving
1265 chain.
1266 FORNOW: not supported in loop SLP because of realignment compications. */
678e3d6e 1267 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
24c271ef 1268 {
678e3d6e 1269 /* Check that for every node in the instance the loads
1270 form a subchain. */
f1f41a6c 1271 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
24c271ef 1272 {
1273 next_load = NULL;
f1f41a6c 1274 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
24c271ef 1275 {
24c271ef 1276 if (j != 0 && next_load != load)
678e3d6e 1277 return false;
24c271ef 1278 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1279 }
24c271ef 1280 }
1281
1282 /* Check that the alignment of the first load in every subchain, i.e.,
678e3d6e 1283 the first statement in every load node, is supported.
1284 ??? This belongs in alignment checking. */
1285 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1286 {
1287 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1288 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1289 {
1290 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1291 if (vect_supportable_dr_alignment (dr, false)
1292 == dr_unaligned_unsupported)
1293 {
1294 if (dump_enabled_p ())
1295 {
1296 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1297 vect_location,
1298 "unsupported unaligned load ");
1299 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1300 first_load, 0);
78bb46f5 1301 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
678e3d6e 1302 }
1303 return false;
1304 }
1305 }
1306 }
24c271ef 1307
678e3d6e 1308 /* We are done, no actual permutations need to be generated. */
1309 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1310 SLP_TREE_LOAD_PERMUTATION (node).release ();
1311 return true;
24c271ef 1312 }
1313
48e1416a 1314 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1315 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
eefa05c8 1316 well (unless it's reduction). */
678e3d6e 1317 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
fb85abff 1318 return false;
678e3d6e 1319 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1320 if (!node->load_permutation.exists ())
1321 return false;
fb85abff 1322
e8332698 1323 load_index = sbitmap_alloc (group_size);
53c5d9d4 1324 bitmap_clear (load_index);
678e3d6e 1325 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1326 {
1327 unsigned int lidx = node->load_permutation[0];
1328 if (bitmap_bit_p (load_index, lidx))
1329 {
1330 sbitmap_free (load_index);
1331 return false;
1332 }
1333 bitmap_set_bit (load_index, lidx);
1334 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1335 if (k != lidx)
1336 {
1337 sbitmap_free (load_index);
1338 return false;
1339 }
fb85abff 1340 }
678e3d6e 1341 for (i = 0; i < group_size; i++)
1342 if (!bitmap_bit_p (load_index, i))
cae603e9 1343 {
1344 sbitmap_free (load_index);
1345 return false;
1346 }
e8332698 1347 sbitmap_free (load_index);
fb85abff 1348
678e3d6e 1349 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1350 if (node->load_permutation.exists ()
1351 && !vect_transform_slp_perm_load
1352 (node, vNULL, NULL,
1353 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1354 return false;
1355 return true;
fb85abff 1356}
1357
1358
48e1416a 1359/* Find the first load in the loop that belongs to INSTANCE.
fb85abff 1360 When loads are in several SLP nodes, there can be a case in which the first
48e1416a 1361 load does not appear in the first SLP node to be transformed, causing
282bf14c 1362 incorrect order of statements. Since we generate all the loads together,
fb85abff 1363 they must be inserted before the first load of the SLP instance and not
1364 before the first load of the first node of the instance. */
282bf14c 1365
48e1416a 1366static gimple
1367vect_find_first_load_in_slp_instance (slp_instance instance)
fb85abff 1368{
1369 int i, j;
1370 slp_tree load_node;
1371 gimple first_load = NULL, load;
1372
f1f41a6c 1373 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1374 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
fb85abff 1375 first_load = get_earlier_stmt (load, first_load);
48e1416a 1376
fb85abff 1377 return first_load;
1378}
1379
1380
d4b21757 1381/* Find the last store in SLP INSTANCE. */
282bf14c 1382
d4b21757 1383static gimple
1384vect_find_last_store_in_slp_instance (slp_instance instance)
1385{
1386 int i;
1387 slp_tree node;
1388 gimple last_store = NULL, store;
1389
1390 node = SLP_INSTANCE_TREE (instance);
f1f41a6c 1391 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
d4b21757 1392 last_store = get_later_stmt (store, last_store);
1393
1394 return last_store;
1395}
1396
ee6e9ba5 1397/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1398
1399static void
1400vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1401 slp_instance instance, slp_tree node,
1402 stmt_vector_for_cost *prologue_cost_vec,
1403 unsigned ncopies_for_cost)
1404{
1405 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1406
1407 unsigned i;
1408 slp_tree child;
1409 gimple stmt, s;
1410 stmt_vec_info stmt_info;
1411 tree lhs;
1412 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1413
1414 /* Recurse down the SLP tree. */
1415 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1416 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1417 instance, child, prologue_cost_vec,
1418 ncopies_for_cost);
1419
1420 /* Look at the first scalar stmt to determine the cost. */
1421 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1422 stmt_info = vinfo_for_stmt (stmt);
1423 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1424 {
1425 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1426 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1427 vect_uninitialized_def,
1428 node, prologue_cost_vec, body_cost_vec);
1429 else
1430 {
1431 int i;
1432 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1433 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1434 node, prologue_cost_vec, body_cost_vec);
1435 /* If the load is permuted record the cost for the permutation.
1436 ??? Loads from multiple chains are let through here only
1437 for a single special case involving complex numbers where
1438 in the end no permutation is necessary. */
1439 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1440 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1441 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1442 && vect_get_place_in_interleaving_chain
1443 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1444 {
1445 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1446 stmt_info, 0, vect_body);
1447 break;
1448 }
1449 }
1450 }
1451 else
1452 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1453 stmt_info, 0, vect_body);
1454
1455 /* Scan operands and account for prologue cost of constants/externals.
1456 ??? This over-estimates cost for multiple uses and should be
1457 re-engineered. */
1458 lhs = gimple_get_lhs (stmt);
1459 for (i = 0; i < gimple_num_ops (stmt); ++i)
1460 {
1461 tree def, op = gimple_op (stmt, i);
1462 gimple def_stmt;
1463 enum vect_def_type dt;
1464 if (!op || op == lhs)
1465 continue;
1466 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1467 &def_stmt, &def, &dt)
1468 && (dt == vect_constant_def || dt == vect_external_def))
1469 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1470 stmt_info, 0, vect_prologue);
1471 }
1472}
1473
1474/* Compute the cost for the SLP instance INSTANCE. */
1475
1476static void
1477vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1478 slp_instance instance, unsigned nunits)
1479{
1480 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1481 unsigned ncopies_for_cost;
1482 stmt_info_for_cost *si;
1483 unsigned i;
1484
1485 /* Calculate the number of vector stmts to create based on the unrolling
1486 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1487 GROUP_SIZE / NUNITS otherwise. */
1488 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1489 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1490
1491 prologue_cost_vec.create (10);
1492 body_cost_vec.create (10);
1493 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1494 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1495 instance, SLP_INSTANCE_TREE (instance),
1496 &prologue_cost_vec, ncopies_for_cost);
1497
1498 /* Record the prologue costs, which were delayed until we were
1499 sure that SLP was successful. Unlike the body costs, we know
1500 the final values now regardless of the loop vectorization factor. */
1501 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1502 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1503 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1504 {
1505 struct _stmt_vec_info *stmt_info
1506 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1507 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1508 si->misalign, vect_prologue);
1509 }
1510
1511 prologue_cost_vec.release ();
1512}
d4b21757 1513
ee612634 1514/* Analyze an SLP instance starting from a group of grouped stores. Call
48e1416a 1515 vect_build_slp_tree to build a tree of packed stmts if possible.
fb85abff 1516 Return FALSE if it's impossible to SLP any stmt in the loop. */
1517
1518static bool
37545e54 1519vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ca91d3f8 1520 gimple stmt, unsigned max_tree_size)
fb85abff 1521{
1522 slp_instance new_instance;
b0f64919 1523 slp_tree node;
21009880 1524 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
fb85abff 1525 unsigned int unrolling_factor = 1, nunits;
eefa05c8 1526 tree vectype, scalar_type = NULL_TREE;
fb85abff 1527 gimple next;
f018d957 1528 unsigned int vectorization_factor = 0;
ee6e9ba5 1529 int i;
fb85abff 1530 unsigned int max_nunits = 0;
f1f41a6c 1531 vec<slp_tree> loads;
eefa05c8 1532 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
f1f41a6c 1533 vec<gimple> scalar_stmts;
eefa05c8 1534
39a5d6b1 1535 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1536 {
39a5d6b1 1537 if (dr)
1538 {
1539 scalar_type = TREE_TYPE (DR_REF (dr));
1540 vectype = get_vectype_for_scalar_type (scalar_type);
1541 }
1542 else
1543 {
1544 gcc_assert (loop_vinfo);
1545 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1546 }
1547
21009880 1548 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
eefa05c8 1549 }
1550 else
1551 {
1552 gcc_assert (loop_vinfo);
1553 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
f1f41a6c 1554 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
eefa05c8 1555 }
48e1416a 1556
fb85abff 1557 if (!vectype)
1558 {
6d8fb6cf 1559 if (dump_enabled_p ())
fb85abff 1560 {
7bd765d4 1561 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1562 "Build SLP failed: unsupported data-type ");
1563 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
78bb46f5 1564 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1565 }
eefa05c8 1566
fb85abff 1567 return false;
1568 }
1569
1570 nunits = TYPE_VECTOR_SUBPARTS (vectype);
37545e54 1571 if (loop_vinfo)
1572 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1573 else
37545e54 1574 vectorization_factor = nunits;
1575
37545e54 1576 /* Calculate the unrolling factor. */
1577 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1578 if (unrolling_factor != 1 && !loop_vinfo)
1579 {
6d8fb6cf 1580 if (dump_enabled_p ())
78bb46f5 1581 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1582 "Build SLP failed: unrolling required in basic"
78bb46f5 1583 " block SLP\n");
48e1416a 1584
37545e54 1585 return false;
1586 }
1587
ee612634 1588 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
f1f41a6c 1589 scalar_stmts.create (group_size);
fb85abff 1590 next = stmt;
39a5d6b1 1591 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 1592 {
eefa05c8 1593 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1594 while (next)
1595 {
f2104a54 1596 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1597 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
f1f41a6c 1598 scalar_stmts.safe_push (
1599 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f2104a54 1600 else
f1f41a6c 1601 scalar_stmts.safe_push (next);
21009880 1602 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
eefa05c8 1603 }
1604 }
1605 else
1606 {
1607 /* Collect reduction statements. */
f1f41a6c 1608 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1609 for (i = 0; reductions.iterate (i, &next); i++)
1610 scalar_stmts.safe_push (next);
fb85abff 1611 }
1612
b0f64919 1613 node = vect_create_new_slp_node (scalar_stmts);
fb85abff 1614
f1f41a6c 1615 loads.create (group_size);
fb85abff 1616
1617 /* Build the tree for the SLP instance. */
48e1416a 1618 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
0eba4748 1619 &max_nunits, &loads,
ca91d3f8 1620 vectorization_factor, NULL, NULL, NULL,
1621 max_tree_size))
fb85abff 1622 {
03dab834 1623 /* Calculate the unrolling factor based on the smallest type. */
fb85abff 1624 if (max_nunits > nunits)
1625 unrolling_factor = least_common_multiple (max_nunits, group_size)
1626 / group_size;
48e1416a 1627
03dab834 1628 if (unrolling_factor != 1 && !loop_vinfo)
1629 {
6d8fb6cf 1630 if (dump_enabled_p ())
78bb46f5 1631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1632 "Build SLP failed: unrolling required in basic"
78bb46f5 1633 " block SLP\n");
9f793cdf 1634 vect_free_slp_tree (node);
f1f41a6c 1635 loads.release ();
03dab834 1636 return false;
1637 }
1638
1639 /* Create a new SLP instance. */
1640 new_instance = XNEW (struct _slp_instance);
1641 SLP_INSTANCE_TREE (new_instance) = node;
1642 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
fb85abff 1643 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ee6e9ba5 1644 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
fb85abff 1645 SLP_INSTANCE_LOADS (new_instance) = loads;
1646 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
0eba4748 1647
1648 /* Compute the load permutation. */
1649 slp_tree load_node;
1650 bool loads_permuted = false;
0eba4748 1651 FOR_EACH_VEC_ELT (loads, i, load_node)
1652 {
678e3d6e 1653 vec<unsigned> load_permutation;
0eba4748 1654 int j;
16513aa4 1655 gimple load, first_stmt;
678e3d6e 1656 bool this_load_permuted = false;
1657 load_permutation.create (group_size);
16513aa4 1658 first_stmt = GROUP_FIRST_ELEMENT
1659 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
0eba4748 1660 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1661 {
16513aa4 1662 int load_place
1663 = vect_get_place_in_interleaving_chain (load, first_stmt);
1664 gcc_assert (load_place != -1);
1665 if (load_place != j)
678e3d6e 1666 this_load_permuted = true;
0eba4748 1667 load_permutation.safe_push (load_place);
1668 }
678e3d6e 1669 if (!this_load_permuted)
1670 {
1671 load_permutation.release ();
1672 continue;
1673 }
1674 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1675 loads_permuted = true;
0eba4748 1676 }
24c271ef 1677
1678 if (loads_permuted)
fb85abff 1679 {
678e3d6e 1680 if (!vect_supported_load_permutation_p (new_instance))
fb85abff 1681 {
6d8fb6cf 1682 if (dump_enabled_p ())
fb85abff 1683 {
78bb46f5 1684 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1685 "Build SLP failed: unsupported load "
1686 "permutation ");
1687 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 1688 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1689 }
fb85abff 1690 vect_free_slp_instance (new_instance);
1691 return false;
1692 }
1693
1694 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
678e3d6e 1695 = vect_find_first_load_in_slp_instance (new_instance);
fb85abff 1696 }
fb85abff 1697
ee6e9ba5 1698 /* Compute the costs of this SLP instance. */
1699 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1700 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
f97dec81 1701
37545e54 1702 if (loop_vinfo)
f1f41a6c 1703 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
37545e54 1704 else
f1f41a6c 1705 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
48e1416a 1706
6d8fb6cf 1707 if (dump_enabled_p ())
7bd765d4 1708 vect_print_slp_tree (MSG_NOTE, node);
fb85abff 1709
1710 return true;
1711 }
1712
1713 /* Failed to SLP. */
1714 /* Free the allocated memory. */
1715 vect_free_slp_tree (node);
f1f41a6c 1716 loads.release ();
48e1416a 1717
37545e54 1718 return false;
fb85abff 1719}
1720
1721
282bf14c 1722/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
fb85abff 1723 trees of packed scalar stmts if SLP is possible. */
1724
1725bool
ca91d3f8 1726vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1727 unsigned max_tree_size)
fb85abff 1728{
1729 unsigned int i;
f1f41a6c 1730 vec<gimple> grouped_stores;
1e094109 1731 vec<gimple> reductions = vNULL;
1732 vec<gimple> reduc_chains = vNULL;
39a5d6b1 1733 gimple first_element;
37545e54 1734 bool ok = false;
fb85abff 1735
6d8fb6cf 1736 if (dump_enabled_p ())
78bb46f5 1737 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
fb85abff 1738
37545e54 1739 if (loop_vinfo)
eefa05c8 1740 {
ee612634 1741 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
39a5d6b1 1742 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
eefa05c8 1743 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1744 }
37545e54 1745 else
ee612634 1746 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
48e1416a 1747
ee612634 1748 /* Find SLP sequences starting from groups of grouped stores. */
f1f41a6c 1749 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
ca91d3f8 1750 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1751 max_tree_size))
37545e54 1752 ok = true;
fb85abff 1753
48e1416a 1754 if (bb_vinfo && !ok)
37545e54 1755 {
6d8fb6cf 1756 if (dump_enabled_p ())
7bd765d4 1757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1758 "Failed to SLP the basic block.\n");
37545e54 1759
1760 return false;
1761 }
fb85abff 1762
39a5d6b1 1763 if (loop_vinfo
f1f41a6c 1764 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
39a5d6b1 1765 {
1766 /* Find SLP sequences starting from reduction chains. */
f1f41a6c 1767 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
ca91d3f8 1768 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1769 max_tree_size))
39a5d6b1 1770 ok = true;
1771 else
1772 return false;
1773
1774 /* Don't try to vectorize SLP reductions if reduction chain was
1775 detected. */
1776 return ok;
1777 }
1778
eefa05c8 1779 /* Find SLP sequences starting from groups of reductions. */
f1f41a6c 1780 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
ca91d3f8 1781 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1782 max_tree_size))
eefa05c8 1783 ok = true;
1784
fb85abff 1785 return true;
1786}
1787
1788
1789/* For each possible SLP instance decide whether to SLP it and calculate overall
bc937a44 1790 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1791 least one instance. */
fb85abff 1792
bc937a44 1793bool
fb85abff 1794vect_make_slp_decision (loop_vec_info loop_vinfo)
1795{
1796 unsigned int i, unrolling_factor = 1;
f1f41a6c 1797 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1798 slp_instance instance;
1799 int decided_to_slp = 0;
1800
6d8fb6cf 1801 if (dump_enabled_p ())
78bb46f5 1802 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1803 "\n");
fb85abff 1804
f1f41a6c 1805 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1806 {
1807 /* FORNOW: SLP if you can. */
1808 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1809 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1810
282bf14c 1811 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
48e1416a 1812 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
282bf14c 1813 loop-based vectorization. Such stmts will be marked as HYBRID. */
fb85abff 1814 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1815 decided_to_slp++;
1816 }
1817
1818 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1819
6d8fb6cf 1820 if (decided_to_slp && dump_enabled_p ())
b055bc88 1821 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1822 "Decided to SLP %d instances. Unrolling factor %d\n",
7bd765d4 1823 decided_to_slp, unrolling_factor);
bc937a44 1824
1825 return (decided_to_slp > 0);
fb85abff 1826}
1827
1828
1829/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
282bf14c 1830 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
fb85abff 1831
1832static void
041208c6 1833vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
fb85abff 1834{
041208c6 1835 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
fb85abff 1836 imm_use_iterator imm_iter;
1837 gimple use_stmt;
041208c6 1838 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
40bcc7c2 1839 slp_tree child;
a1123047 1840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
041208c6 1841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1842 int j;
1843
1844 /* Propagate hybrid down the SLP tree. */
1845 if (stype == hybrid)
1846 ;
1847 else if (HYBRID_SLP_STMT (stmt_vinfo))
1848 stype = hybrid;
1849 else
1850 {
1851 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1852 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
1853 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1854 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
1855 if (gimple_bb (use_stmt)
1856 && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1857 && (use_vinfo = vinfo_for_stmt (use_stmt))
1858 && !STMT_SLP_TYPE (use_vinfo)
1859 && (STMT_VINFO_RELEVANT (use_vinfo)
1860 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))
1861 || (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1862 && STMT_VINFO_RELATED_STMT (use_vinfo)
1863 && !STMT_SLP_TYPE (vinfo_for_stmt
1864 (STMT_VINFO_RELATED_STMT (use_vinfo)))))
1865 && !(gimple_code (use_stmt) == GIMPLE_PHI
1866 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
1867 stype = hybrid;
1868 }
fb85abff 1869
041208c6 1870 if (stype == hybrid)
1871 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
fb85abff 1872
041208c6 1873 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
1874 vect_detect_hybrid_slp_stmts (child, i, stype);
1875}
a1123047 1876
041208c6 1877/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
fb85abff 1878
041208c6 1879static tree
1880vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
1881{
1882 walk_stmt_info *wi = (walk_stmt_info *)data;
1883 struct loop *loopp = (struct loop *)wi->info;
1884
1885 if (wi->is_lhs)
1886 return NULL_TREE;
1887
1888 if (TREE_CODE (*tp) == SSA_NAME
1889 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
1890 {
1891 gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
1892 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
1893 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
1894 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
1895 }
1896
1897 return NULL_TREE;
fb85abff 1898}
1899
041208c6 1900static tree
1901vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
1902 walk_stmt_info *)
1903{
1904 /* If the stmt is in a SLP instance then this isn't a reason
1905 to mark use definitions in other SLP instances as hybrid. */
1906 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
1907 *handled = true;
1908 return NULL_TREE;
1909}
fb85abff 1910
1911/* Find stmts that must be both vectorized and SLPed. */
1912
1913void
1914vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1915{
1916 unsigned int i;
f1f41a6c 1917 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1918 slp_instance instance;
1919
6d8fb6cf 1920 if (dump_enabled_p ())
78bb46f5 1921 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1922 "\n");
fb85abff 1923
041208c6 1924 /* First walk all pattern stmt in the loop and mark defs of uses as
1925 hybrid because immediate uses in them are not recorded. */
1926 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
1927 {
1928 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
1929 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1930 gsi_next (&gsi))
1931 {
1932 gimple stmt = gsi_stmt (gsi);
1933 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1934 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1935 {
1936 walk_stmt_info wi;
1937 memset (&wi, 0, sizeof (wi));
1938 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
1939 gimple_stmt_iterator gsi2
1940 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
1941 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
1942 vect_detect_hybrid_slp_1, &wi);
1943 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
1944 vect_detect_hybrid_slp_2,
1945 vect_detect_hybrid_slp_1, &wi);
1946 }
1947 }
1948 }
1949
1950 /* Then walk the SLP instance trees marking stmts with uses in
1951 non-SLP stmts as hybrid, also propagating hybrid down the
1952 SLP tree, collecting the above info on-the-fly. */
f1f41a6c 1953 FOR_EACH_VEC_ELT (slp_instances, i, instance)
041208c6 1954 {
1955 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
1956 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
1957 i, pure_slp);
1958 }
fb85abff 1959}
1960
37545e54 1961
1962/* Create and initialize a new bb_vec_info struct for BB, as well as
1963 stmt_vec_info structs for all the stmts in it. */
48e1416a 1964
37545e54 1965static bb_vec_info
1966new_bb_vec_info (basic_block bb)
1967{
1968 bb_vec_info res = NULL;
1969 gimple_stmt_iterator gsi;
1970
1971 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1972 BB_VINFO_BB (res) = bb;
1973
1974 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1975 {
1976 gimple stmt = gsi_stmt (gsi);
1977 gimple_set_uid (stmt, 0);
1978 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1979 }
1980
f1f41a6c 1981 BB_VINFO_GROUPED_STORES (res).create (10);
1982 BB_VINFO_SLP_INSTANCES (res).create (2);
4db2b577 1983 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
37545e54 1984
1985 bb->aux = res;
1986 return res;
1987}
1988
1989
1990/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1991 stmts in the basic block. */
1992
1993static void
1994destroy_bb_vec_info (bb_vec_info bb_vinfo)
1995{
f1f41a6c 1996 vec<slp_instance> slp_instances;
9f793cdf 1997 slp_instance instance;
37545e54 1998 basic_block bb;
1999 gimple_stmt_iterator si;
9f793cdf 2000 unsigned i;
37545e54 2001
2002 if (!bb_vinfo)
2003 return;
2004
2005 bb = BB_VINFO_BB (bb_vinfo);
2006
2007 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2008 {
2009 gimple stmt = gsi_stmt (si);
2010 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2011
2012 if (stmt_info)
2013 /* Free stmt_vec_info. */
2014 free_stmt_vec_info (stmt);
2015 }
2016
23e1875f 2017 vect_destroy_datarefs (NULL, bb_vinfo);
a8af2e86 2018 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
f1f41a6c 2019 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
9f793cdf 2020 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
f1f41a6c 2021 FOR_EACH_VEC_ELT (slp_instances, i, instance)
9f793cdf 2022 vect_free_slp_instance (instance);
f1f41a6c 2023 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
4db2b577 2024 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
37545e54 2025 free (bb_vinfo);
2026 bb->aux = NULL;
2027}
2028
2029
2030/* Analyze statements contained in SLP tree node after recursively analyzing
2031 the subtree. Return TRUE if the operations are supported. */
2032
2033static bool
2034vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
2035{
2036 bool dummy;
2037 int i;
2038 gimple stmt;
40bcc7c2 2039 slp_tree child;
37545e54 2040
2041 if (!node)
2042 return true;
2043
f1f41a6c 2044 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 2045 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
b0f64919 2046 return false;
37545e54 2047
f1f41a6c 2048 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 2049 {
2050 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2051 gcc_assert (stmt_info);
2052 gcc_assert (PURE_SLP_STMT (stmt_info));
2053
2054 if (!vect_analyze_stmt (stmt, &dummy, node))
2055 return false;
2056 }
2057
2058 return true;
2059}
2060
2061
282bf14c 2062/* Analyze statements in SLP instances of the basic block. Return TRUE if the
37545e54 2063 operations are supported. */
2064
2065static bool
2066vect_slp_analyze_operations (bb_vec_info bb_vinfo)
2067{
f1f41a6c 2068 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
37545e54 2069 slp_instance instance;
2070 int i;
2071
f1f41a6c 2072 for (i = 0; slp_instances.iterate (i, &instance); )
37545e54 2073 {
48e1416a 2074 if (!vect_slp_analyze_node_operations (bb_vinfo,
37545e54 2075 SLP_INSTANCE_TREE (instance)))
2076 {
2077 vect_free_slp_instance (instance);
f1f41a6c 2078 slp_instances.ordered_remove (i);
37545e54 2079 }
2080 else
2081 i++;
48e1416a 2082 }
2083
f1f41a6c 2084 if (!slp_instances.length ())
37545e54 2085 return false;
2086
2087 return true;
2088}
2089
32afb748 2090
2091/* Compute the scalar cost of the SLP node NODE and its children
2092 and return it. Do not account defs that are marked in LIFE and
2093 update LIFE according to uses of NODE. */
2094
2095static unsigned
65526f7f 2096vect_bb_slp_scalar_cost (basic_block bb,
d70aebca 2097 slp_tree node, vec<bool, va_heap> *life)
32afb748 2098{
2099 unsigned scalar_cost = 0;
2100 unsigned i;
2101 gimple stmt;
2102 slp_tree child;
2103
2104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2105 {
2106 unsigned stmt_cost;
2107 ssa_op_iter op_iter;
2108 def_operand_p def_p;
2109 stmt_vec_info stmt_info;
2110
d70aebca 2111 if ((*life)[i])
32afb748 2112 continue;
2113
2114 /* If there is a non-vectorized use of the defs then the scalar
2115 stmt is kept live in which case we do not account it or any
2116 required defs in the SLP children in the scalar cost. This
2117 way we make the vectorization more costly when compared to
2118 the scalar cost. */
2119 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2120 {
2121 imm_use_iterator use_iter;
2122 gimple use_stmt;
2123 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
ab2dc680 2124 if (!is_gimple_debug (use_stmt)
2125 && (gimple_code (use_stmt) == GIMPLE_PHI
2126 || gimple_bb (use_stmt) != bb
2127 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
32afb748 2128 {
d70aebca 2129 (*life)[i] = true;
32afb748 2130 BREAK_FROM_IMM_USE_STMT (use_iter);
2131 }
2132 }
d70aebca 2133 if ((*life)[i])
32afb748 2134 continue;
2135
2136 stmt_info = vinfo_for_stmt (stmt);
2137 if (STMT_VINFO_DATA_REF (stmt_info))
2138 {
2139 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2140 stmt_cost = vect_get_stmt_cost (scalar_load);
2141 else
2142 stmt_cost = vect_get_stmt_cost (scalar_store);
2143 }
2144 else
2145 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2146
2147 scalar_cost += stmt_cost;
2148 }
2149
2150 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
65526f7f 2151 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
32afb748 2152
2153 return scalar_cost;
2154}
2155
ad074595 2156/* Check if vectorization of the basic block is profitable. */
2157
2158static bool
2159vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2160{
f1f41a6c 2161 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
ad074595 2162 slp_instance instance;
4db2b577 2163 int i, j;
2164 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
f97dec81 2165 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
f97dec81 2166 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
ad074595 2167 stmt_vec_info stmt_info = NULL;
f97dec81 2168 stmt_vector_for_cost body_cost_vec;
4db2b577 2169 stmt_info_for_cost *ci;
ad074595 2170
2171 /* Calculate vector costs. */
f1f41a6c 2172 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2173 {
f97dec81 2174 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2175
f1f41a6c 2176 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
f97dec81 2177 {
2178 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2179 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2180 stmt_info, ci->misalign, vect_body);
2181 }
ad074595 2182 }
2183
2184 /* Calculate scalar cost. */
32afb748 2185 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2186 {
4997014d 2187 auto_vec<bool, 20> life;
d70aebca 2188 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
65526f7f 2189 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2190 SLP_INSTANCE_TREE (instance),
d70aebca 2191 &life);
ad074595 2192 }
2193
4db2b577 2194 /* Complete the target-specific cost calculation. */
f97dec81 2195 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2196 &vec_inside_cost, &vec_epilogue_cost);
2197
2198 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
4db2b577 2199
6d8fb6cf 2200 if (dump_enabled_p ())
ad074595 2201 {
7bd765d4 2202 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2203 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2204 vec_inside_cost);
2205 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2206 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
78bb46f5 2207 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
ad074595 2208 }
2209
2210 /* Vectorization is profitable if its cost is less than the cost of scalar
2211 version. */
2212 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2213 return false;
2214
2215 return true;
2216}
2217
2218/* Check if the basic block can be vectorized. */
37545e54 2219
5bbe6156 2220static bb_vec_info
2221vect_slp_analyze_bb_1 (basic_block bb)
37545e54 2222{
2223 bb_vec_info bb_vinfo;
f1f41a6c 2224 vec<slp_instance> slp_instances;
37545e54 2225 slp_instance instance;
5bbe6156 2226 int i;
91a74fc6 2227 int min_vf = 2;
ca91d3f8 2228 unsigned n_stmts = 0;
d4b21757 2229
37545e54 2230 bb_vinfo = new_bb_vec_info (bb);
2231 if (!bb_vinfo)
2232 return NULL;
2233
ca91d3f8 2234 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
37545e54 2235 {
6d8fb6cf 2236 if (dump_enabled_p ())
7bd765d4 2237 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2238 "not vectorized: unhandled data-ref in basic "
2239 "block.\n");
48e1416a 2240
37545e54 2241 destroy_bb_vec_info (bb_vinfo);
2242 return NULL;
2243 }
2244
07e3bcbf 2245 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
37545e54 2246 {
6d8fb6cf 2247 if (dump_enabled_p ())
7bd765d4 2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249 "not vectorized: not enough data-refs in "
2250 "basic block.\n");
37545e54 2251
2252 destroy_bb_vec_info (bb_vinfo);
2253 return NULL;
2254 }
2255
68f15e9d 2256 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2257 {
2258 if (dump_enabled_p ())
2259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2260 "not vectorized: unhandled data access in "
2261 "basic block.\n");
2262
2263 destroy_bb_vec_info (bb_vinfo);
2264 return NULL;
2265 }
2266
4c0c783a 2267 vect_pattern_recog (NULL, bb_vinfo);
2268
37545e54 2269 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2270 {
6d8fb6cf 2271 if (dump_enabled_p ())
7bd765d4 2272 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2273 "not vectorized: bad data alignment in basic "
2274 "block.\n");
48e1416a 2275
37545e54 2276 destroy_bb_vec_info (bb_vinfo);
2277 return NULL;
2278 }
48e1416a 2279
37545e54 2280 /* Check the SLP opportunities in the basic block, analyze and build SLP
2281 trees. */
ca91d3f8 2282 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
37545e54 2283 {
6d8fb6cf 2284 if (dump_enabled_p ())
7bd765d4 2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "not vectorized: failed to find SLP opportunities "
2287 "in basic block.\n");
37545e54 2288
2289 destroy_bb_vec_info (bb_vinfo);
2290 return NULL;
2291 }
48e1416a 2292
37545e54 2293 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2294
2295 /* Mark all the statements that we want to vectorize as pure SLP and
2296 relevant. */
f1f41a6c 2297 FOR_EACH_VEC_ELT (slp_instances, i, instance)
37545e54 2298 {
2299 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2300 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
48e1416a 2301 }
37545e54 2302
1824e1e2 2303 /* Mark all the statements that we do not want to vectorize. */
2304 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2305 !gsi_end_p (gsi); gsi_next (&gsi))
2306 {
2307 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2308 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2309 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2310 }
2311
2312 /* Analyze dependences. At this point all stmts not participating in
2313 vectorization have to be marked. Dependence analysis assumes
2314 that we either vectorize all SLP instances or none at all. */
2315 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2316 {
2317 if (dump_enabled_p ())
2318 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2319 "not vectorized: unhandled data dependence "
2320 "in basic block.\n");
2321
2322 destroy_bb_vec_info (bb_vinfo);
2323 return NULL;
2324 }
2325
4db2b577 2326 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1ad41595 2327 {
6d8fb6cf 2328 if (dump_enabled_p ())
7bd765d4 2329 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2330 "not vectorized: unsupported alignment in basic "
2331 "block.\n");
1ad41595 2332 destroy_bb_vec_info (bb_vinfo);
2333 return NULL;
2334 }
2335
37545e54 2336 if (!vect_slp_analyze_operations (bb_vinfo))
2337 {
6d8fb6cf 2338 if (dump_enabled_p ())
78bb46f5 2339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2340 "not vectorized: bad operation in basic block.\n");
37545e54 2341
2342 destroy_bb_vec_info (bb_vinfo);
2343 return NULL;
2344 }
2345
ad074595 2346 /* Cost model: check if the vectorization is worthwhile. */
3e398f5b 2347 if (!unlimited_cost_model (NULL)
ad074595 2348 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2349 {
6d8fb6cf 2350 if (dump_enabled_p ())
7bd765d4 2351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2352 "not vectorized: vectorization is not "
2353 "profitable.\n");
ad074595 2354
2355 destroy_bb_vec_info (bb_vinfo);
2356 return NULL;
2357 }
2358
6d8fb6cf 2359 if (dump_enabled_p ())
7bd765d4 2360 dump_printf_loc (MSG_NOTE, vect_location,
2361 "Basic block will be vectorized using SLP\n");
37545e54 2362
2363 return bb_vinfo;
2364}
2365
2366
5bbe6156 2367bb_vec_info
2368vect_slp_analyze_bb (basic_block bb)
2369{
2370 bb_vec_info bb_vinfo;
2371 int insns = 0;
2372 gimple_stmt_iterator gsi;
2373 unsigned int vector_sizes;
2374
6d8fb6cf 2375 if (dump_enabled_p ())
7bd765d4 2376 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
5bbe6156 2377
2378 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2379 {
2380 gimple stmt = gsi_stmt (gsi);
2381 if (!is_gimple_debug (stmt)
2382 && !gimple_nop_p (stmt)
2383 && gimple_code (stmt) != GIMPLE_LABEL)
2384 insns++;
2385 }
2386
2387 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2388 {
6d8fb6cf 2389 if (dump_enabled_p ())
7bd765d4 2390 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2391 "not vectorized: too many instructions in "
2392 "basic block.\n");
5bbe6156 2393
2394 return NULL;
2395 }
2396
2397 /* Autodetect first vector size we try. */
2398 current_vector_size = 0;
2399 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2400
2401 while (1)
2402 {
2403 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2404 if (bb_vinfo)
2405 return bb_vinfo;
2406
2407 destroy_bb_vec_info (bb_vinfo);
2408
2409 vector_sizes &= ~current_vector_size;
2410 if (vector_sizes == 0
2411 || current_vector_size == 0)
2412 return NULL;
2413
2414 /* Try the next biggest vector size. */
2415 current_vector_size = 1 << floor_log2 (vector_sizes);
6d8fb6cf 2416 if (dump_enabled_p ())
7bd765d4 2417 dump_printf_loc (MSG_NOTE, vect_location,
2418 "***** Re-trying analysis with "
2419 "vector size %d\n", current_vector_size);
5bbe6156 2420 }
2421}
2422
2423
48e1416a 2424/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
282bf14c 2425 the number of created vector stmts depends on the unrolling factor).
2426 However, the actual number of vector stmts for every SLP node depends on
2427 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2428 should be updated. In this function we assume that the inside costs
2429 calculated in vect_model_xxx_cost are linear in ncopies. */
fb85abff 2430
2431void
2432vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2433{
4db2b577 2434 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
f1f41a6c 2435 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 2436 slp_instance instance;
f97dec81 2437 stmt_vector_for_cost body_cost_vec;
4db2b577 2438 stmt_info_for_cost *si;
f97dec81 2439 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
fb85abff 2440
6d8fb6cf 2441 if (dump_enabled_p ())
7bd765d4 2442 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2443 "=== vect_update_slp_costs_according_to_vf ===\n");
fb85abff 2444
f1f41a6c 2445 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4db2b577 2446 {
2447 /* We assume that costs are linear in ncopies. */
2448 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2449
2450 /* Record the instance's instructions in the target cost model.
2451 This was delayed until here because the count of instructions
2452 isn't known beforehand. */
f97dec81 2453 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2454
f1f41a6c 2455 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
f97dec81 2456 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2457 vinfo_for_stmt (si->stmt), si->misalign,
2458 vect_body);
4db2b577 2459 }
fb85abff 2460}
2461
37545e54 2462
48e1416a 2463/* For constant and loop invariant defs of SLP_NODE this function returns
2464 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
987502fa 2465 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2466 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
eefa05c8 2467 REDUC_INDEX is the index of the reduction operand in the statements, unless
2468 it is -1. */
fb85abff 2469
2470static void
09e31a48 2471vect_get_constant_vectors (tree op, slp_tree slp_node,
f1f41a6c 2472 vec<tree> *vec_oprnds,
eefa05c8 2473 unsigned int op_num, unsigned int number_of_vectors,
2474 int reduc_index)
fb85abff 2475{
f1f41a6c 2476 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2477 gimple stmt = stmts[0];
fb85abff 2478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
fadf62f4 2479 unsigned nunits;
fb85abff 2480 tree vec_cst;
fadf62f4 2481 tree *elts;
2482 unsigned j, number_of_places_left_in_vector;
fb85abff 2483 tree vector_type;
09e31a48 2484 tree vop;
f1f41a6c 2485 int group_size = stmts.length ();
fb85abff 2486 unsigned int vec_num, i;
fadf62f4 2487 unsigned number_of_copies = 1;
f1f41a6c 2488 vec<tree> voprnds;
2489 voprnds.create (number_of_vectors);
fb85abff 2490 bool constant_p, is_store;
eefa05c8 2491 tree neutral_op = NULL;
98dbf32d 2492 enum tree_code code = gimple_expr_code (stmt);
d706d35c 2493 gimple def_stmt;
2494 struct loop *loop;
0ff8139c 2495 gimple_seq ctor_seq = NULL;
eefa05c8 2496
38fbbe0d 2497 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2498 && reduc_index != -1)
eefa05c8 2499 {
eefa05c8 2500 op_num = reduc_index - 1;
09e31a48 2501 op = gimple_op (stmt, reduc_index);
eefa05c8 2502 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
282bf14c 2503 we need either neutral operands or the original operands. See
eefa05c8 2504 get_initial_def_for_reduction() for details. */
2505 switch (code)
2506 {
2507 case WIDEN_SUM_EXPR:
2508 case DOT_PROD_EXPR:
2509 case PLUS_EXPR:
2510 case MINUS_EXPR:
2511 case BIT_IOR_EXPR:
2512 case BIT_XOR_EXPR:
2513 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2514 neutral_op = build_real (TREE_TYPE (op), dconst0);
2515 else
2516 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2517
2518 break;
2519
2520 case MULT_EXPR:
eefa05c8 2521 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2522 neutral_op = build_real (TREE_TYPE (op), dconst1);
2523 else
2524 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2525
2526 break;
2527
b036fcd8 2528 case BIT_AND_EXPR:
2529 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2530 break;
2531
298cab21 2532 /* For MIN/MAX we don't have an easy neutral operand but
2533 the initial values can be used fine here. Only for
2534 a reduction chain we have to force a neutral element. */
2535 case MAX_EXPR:
2536 case MIN_EXPR:
2537 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2538 neutral_op = NULL;
2539 else
2540 {
2541 def_stmt = SSA_NAME_DEF_STMT (op);
2542 loop = (gimple_bb (stmt))->loop_father;
2543 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2544 loop_preheader_edge (loop));
2545 }
2546 break;
d706d35c 2547
eefa05c8 2548 default:
d706d35c 2549 neutral_op = NULL;
eefa05c8 2550 }
2551 }
fb85abff 2552
2553 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2554 {
2555 is_store = true;
2556 op = gimple_assign_rhs1 (stmt);
2557 }
2558 else
09e31a48 2559 is_store = false;
2560
2561 gcc_assert (op);
fb85abff 2562
2563 if (CONSTANT_CLASS_P (op))
987502fa 2564 constant_p = true;
fb85abff 2565 else
987502fa 2566 constant_p = false;
2567
09e31a48 2568 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
ece0be9d 2569 gcc_assert (vector_type);
fb85abff 2570 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2571
2572 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
48e1416a 2573 created vectors. It is greater than 1 if unrolling is performed.
fb85abff 2574
2575 For example, we have two scalar operands, s1 and s2 (e.g., group of
2576 strided accesses of size two), while NUNITS is four (i.e., four scalars
f2104a54 2577 of this type can be packed in a vector). The output vector will contain
2578 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
fb85abff 2579 will be 2).
2580
48e1416a 2581 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
fb85abff 2582 containing the operands.
2583
2584 For example, NUNITS is four as before, and the group size is 8
f2104a54 2585 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
fb85abff 2586 {s5, s6, s7, s8}. */
48e1416a 2587
fb85abff 2588 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2589
2590 number_of_places_left_in_vector = nunits;
fadf62f4 2591 elts = XALLOCAVEC (tree, nunits);
fb85abff 2592 for (j = 0; j < number_of_copies; j++)
2593 {
f1f41a6c 2594 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
fb85abff 2595 {
2596 if (is_store)
2597 op = gimple_assign_rhs1 (stmt);
98dbf32d 2598 else
f2104a54 2599 {
98dbf32d 2600 switch (code)
f2104a54 2601 {
98dbf32d 2602 case COND_EXPR:
2603 if (op_num == 0 || op_num == 1)
2604 {
2605 tree cond = gimple_assign_rhs1 (stmt);
2606 op = TREE_OPERAND (cond, op_num);
2607 }
2608 else
2609 {
2610 if (op_num == 2)
2611 op = gimple_assign_rhs2 (stmt);
2612 else
2613 op = gimple_assign_rhs3 (stmt);
2614 }
2615 break;
2616
2617 case CALL_EXPR:
2618 op = gimple_call_arg (stmt, op_num);
2619 break;
2620
aa17a496 2621 case LSHIFT_EXPR:
2622 case RSHIFT_EXPR:
2623 case LROTATE_EXPR:
2624 case RROTATE_EXPR:
2625 op = gimple_op (stmt, op_num + 1);
2626 /* Unlike the other binary operators, shifts/rotates have
2627 the shift count being int, instead of the same type as
2628 the lhs, so make sure the scalar is the right type if
2629 we are dealing with vectors of
2630 long long/long/short/char. */
3533e58d 2631 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
aa17a496 2632 op = fold_convert (TREE_TYPE (vector_type), op);
2633 break;
2634
98dbf32d 2635 default:
2636 op = gimple_op (stmt, op_num + 1);
aa17a496 2637 break;
f2104a54 2638 }
2639 }
48e1416a 2640
eefa05c8 2641 if (reduc_index != -1)
2642 {
d706d35c 2643 loop = (gimple_bb (stmt))->loop_father;
2644 def_stmt = SSA_NAME_DEF_STMT (op);
eefa05c8 2645
2646 gcc_assert (loop);
39a5d6b1 2647
2648 /* Get the def before the loop. In reduction chain we have only
2649 one initial value. */
2650 if ((j != (number_of_copies - 1)
2651 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2652 && i != 0))
2653 && neutral_op)
eefa05c8 2654 op = neutral_op;
39a5d6b1 2655 else
2656 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2657 loop_preheader_edge (loop));
eefa05c8 2658 }
2659
fb85abff 2660 /* Create 'vect_ = {op0,op1,...,opn}'. */
fb85abff 2661 number_of_places_left_in_vector--;
0ff8139c 2662 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
6fa78636 2663 {
3533e58d 2664 if (CONSTANT_CLASS_P (op))
0ff8139c 2665 {
2666 op = fold_unary (VIEW_CONVERT_EXPR,
2667 TREE_TYPE (vector_type), op);
2668 gcc_assert (op && CONSTANT_CLASS_P (op));
2669 }
2670 else
2671 {
f9e245b2 2672 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
0ff8139c 2673 gimple init_stmt;
2674 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2675 op);
2676 init_stmt
2677 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
806413d2 2678 new_temp, op);
0ff8139c 2679 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2680 op = new_temp;
2681 }
6fa78636 2682 }
fadf62f4 2683 elts[number_of_places_left_in_vector] = op;
3533e58d 2684 if (!CONSTANT_CLASS_P (op))
2685 constant_p = false;
fb85abff 2686
2687 if (number_of_places_left_in_vector == 0)
2688 {
2689 number_of_places_left_in_vector = nunits;
2690
2691 if (constant_p)
fadf62f4 2692 vec_cst = build_vector (vector_type, elts);
fb85abff 2693 else
fadf62f4 2694 {
f1f41a6c 2695 vec<constructor_elt, va_gc> *v;
fadf62f4 2696 unsigned k;
f1f41a6c 2697 vec_alloc (v, nunits);
fadf62f4 2698 for (k = 0; k < nunits; ++k)
2699 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2700 vec_cst = build_constructor (vector_type, v);
2701 }
f1f41a6c 2702 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2703 vector_type, NULL));
0ff8139c 2704 if (ctor_seq != NULL)
2705 {
f1f41a6c 2706 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
0ff8139c 2707 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2708 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2709 GSI_SAME_STMT);
2710 ctor_seq = NULL;
2711 }
fb85abff 2712 }
2713 }
2714 }
2715
48e1416a 2716 /* Since the vectors are created in the reverse order, we should invert
fb85abff 2717 them. */
f1f41a6c 2718 vec_num = voprnds.length ();
fadf62f4 2719 for (j = vec_num; j != 0; j--)
fb85abff 2720 {
f1f41a6c 2721 vop = voprnds[j - 1];
2722 vec_oprnds->quick_push (vop);
fb85abff 2723 }
2724
f1f41a6c 2725 voprnds.release ();
fb85abff 2726
2727 /* In case that VF is greater than the unrolling factor needed for the SLP
48e1416a 2728 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2729 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
fb85abff 2730 to replicate the vectors. */
f1f41a6c 2731 while (number_of_vectors > vec_oprnds->length ())
fb85abff 2732 {
eefa05c8 2733 tree neutral_vec = NULL;
2734
2735 if (neutral_op)
2736 {
2737 if (!neutral_vec)
b797154e 2738 neutral_vec = build_vector_from_val (vector_type, neutral_op);
eefa05c8 2739
f1f41a6c 2740 vec_oprnds->quick_push (neutral_vec);
eefa05c8 2741 }
2742 else
2743 {
f1f41a6c 2744 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2745 vec_oprnds->quick_push (vop);
eefa05c8 2746 }
fb85abff 2747 }
2748}
2749
2750
2751/* Get vectorized definitions from SLP_NODE that contains corresponding
2752 vectorized def-stmts. */
2753
2754static void
f1f41a6c 2755vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
fb85abff 2756{
2757 tree vec_oprnd;
2758 gimple vec_def_stmt;
2759 unsigned int i;
2760
f1f41a6c 2761 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
fb85abff 2762
f1f41a6c 2763 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
fb85abff 2764 {
2765 gcc_assert (vec_def_stmt);
2766 vec_oprnd = gimple_get_lhs (vec_def_stmt);
f1f41a6c 2767 vec_oprnds->quick_push (vec_oprnd);
fb85abff 2768 }
2769}
2770
2771
48e1416a 2772/* Get vectorized definitions for SLP_NODE.
2773 If the scalar definitions are loop invariants or constants, collect them and
fb85abff 2774 call vect_get_constant_vectors() to create vector stmts.
2775 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
b0f64919 2776 must be stored in the corresponding child of SLP_NODE, and we call
2777 vect_get_slp_vect_defs () to retrieve them. */
48e1416a 2778
fb85abff 2779void
f1f41a6c 2780vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
7f7695a7 2781 vec<vec<tree> > *vec_oprnds, int reduc_index)
fb85abff 2782{
a8bcfda2 2783 gimple first_stmt;
b0f64919 2784 int number_of_vects = 0, i;
2785 unsigned int child_index = 0;
48e1416a 2786 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
b0f64919 2787 slp_tree child = NULL;
7f7695a7 2788 vec<tree> vec_defs;
a8bcfda2 2789 tree oprnd;
b0f64919 2790 bool vectorized_defs;
fb85abff 2791
f1f41a6c 2792 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2793 FOR_EACH_VEC_ELT (ops, i, oprnd)
fb85abff 2794 {
b0f64919 2795 /* For each operand we check if it has vectorized definitions in a child
2796 node or we need to create them (for invariants and constants). We
2797 check if the LHS of the first stmt of the next child matches OPRND.
2798 If it does, we found the correct child. Otherwise, we call
2799 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2800 to check this child node for the next operand. */
2801 vectorized_defs = false;
f1f41a6c 2802 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
fb85abff 2803 {
678e3d6e 2804 child = SLP_TREE_CHILDREN (slp_node)[child_index];
b0f64919 2805
a8bcfda2 2806 /* We have to check both pattern and original def, if available. */
2807 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2808 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
fb85abff 2809
a8bcfda2 2810 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2811 || (related
2812 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2813 {
2814 /* The number of vector defs is determined by the number of
2815 vector statements in the node from which we get those
b0f64919 2816 statements. */
a8bcfda2 2817 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2818 vectorized_defs = true;
b0f64919 2819 child_index++;
a8bcfda2 2820 }
b0f64919 2821 }
fb85abff 2822
b0f64919 2823 if (!vectorized_defs)
2824 {
2825 if (i == 0)
2826 {
2827 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2828 /* Number of vector stmts was calculated according to LHS in
2829 vect_schedule_slp_instance (), fix it by replacing LHS with
2830 RHS, if necessary. See vect_get_smallest_scalar_type () for
2831 details. */
2832 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2833 &rhs_size_unit);
2834 if (rhs_size_unit != lhs_size_unit)
2835 {
2836 number_of_vects *= rhs_size_unit;
2837 number_of_vects /= lhs_size_unit;
2838 }
2839 }
2840 }
eefa05c8 2841
b0f64919 2842 /* Allocate memory for vectorized defs. */
7f7695a7 2843 vec_defs = vNULL;
2844 vec_defs.create (number_of_vects);
fb85abff 2845
b0f64919 2846 /* For reduction defs we call vect_get_constant_vectors (), since we are
2847 looking for initial loop invariant values. */
2848 if (vectorized_defs && reduc_index == -1)
2849 /* The defs are already vectorized. */
7f7695a7 2850 vect_get_slp_vect_defs (child, &vec_defs);
b0f64919 2851 else
2852 /* Build vectors from scalar defs. */
7f7695a7 2853 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
b0f64919 2854 number_of_vects, reduc_index);
fb85abff 2855
7f7695a7 2856 vec_oprnds->quick_push (vec_defs);
fb85abff 2857
b0f64919 2858 /* For reductions, we only need initial values. */
2859 if (reduc_index != -1)
2860 return;
2861 }
fb85abff 2862}
2863
37545e54 2864
48e1416a 2865/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
fb85abff 2866 building a vector of type MASK_TYPE from it) and two input vectors placed in
2867 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2868 shifting by STRIDE elements of DR_CHAIN for every copy.
2869 (STRIDE is the number of vectorized stmts for NODE divided by the number of
48e1416a 2870 copies).
fb85abff 2871 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2872 the created stmts must be inserted. */
2873
2874static inline void
48e1416a 2875vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
afaef242 2876 tree mask, int first_vec_indx, int second_vec_indx,
48e1416a 2877 gimple_stmt_iterator *gsi, slp_tree node,
f1f41a6c 2878 tree vectype, vec<tree> dr_chain,
fb85abff 2879 int ncopies, int vect_stmts_counter)
2880{
afaef242 2881 tree perm_dest;
fb85abff 2882 gimple perm_stmt = NULL;
2883 stmt_vec_info next_stmt_info;
f018d957 2884 int i, stride;
fb85abff 2885 tree first_vec, second_vec, data_ref;
fb85abff 2886
fb85abff 2887 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
fb85abff 2888
48e1416a 2889 /* Initialize the vect stmts of NODE to properly insert the generated
fb85abff 2890 stmts later. */
f1f41a6c 2891 for (i = SLP_TREE_VEC_STMTS (node).length ();
fb85abff 2892 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
f1f41a6c 2893 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
fb85abff 2894
2895 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2896 for (i = 0; i < ncopies; i++)
2897 {
f1f41a6c 2898 first_vec = dr_chain[first_vec_indx];
2899 second_vec = dr_chain[second_vec_indx];
fb85abff 2900
fb85abff 2901 /* Generate the permute statement. */
446e85eb 2902 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2903 first_vec, second_vec, mask);
fb85abff 2904 data_ref = make_ssa_name (perm_dest, perm_stmt);
964ec9cd 2905 gimple_set_lhs (perm_stmt, data_ref);
fb85abff 2906 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 2907
48e1416a 2908 /* Store the vector statement in NODE. */
f1f41a6c 2909 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
fb85abff 2910
2911 first_vec_indx += stride;
2912 second_vec_indx += stride;
2913 }
2914
2915 /* Mark the scalar stmt as vectorized. */
2916 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2917 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2918}
2919
2920
48e1416a 2921/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
fb85abff 2922 return in CURRENT_MASK_ELEMENT its equivalent in target specific
282bf14c 2923 representation. Check that the mask is valid and return FALSE if not.
fb85abff 2924 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2925 the next vector, i.e., the current first vector is not needed. */
48e1416a 2926
fb85abff 2927static bool
48e1416a 2928vect_get_mask_element (gimple stmt, int first_mask_element, int m,
fb85abff 2929 int mask_nunits, bool only_one_vec, int index,
e21c468f 2930 unsigned char *mask, int *current_mask_element,
90674129 2931 bool *need_next_vector, int *number_of_mask_fixes,
2932 bool *mask_fixed, bool *needs_first_vector)
fb85abff 2933{
2934 int i;
fb85abff 2935
2936 /* Convert to target specific representation. */
2937 *current_mask_element = first_mask_element + m;
2938 /* Adjust the value in case it's a mask for second and third vectors. */
90674129 2939 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
fb85abff 2940
2941 if (*current_mask_element < mask_nunits)
90674129 2942 *needs_first_vector = true;
fb85abff 2943
2944 /* We have only one input vector to permute but the mask accesses values in
2945 the next vector as well. */
2946 if (only_one_vec && *current_mask_element >= mask_nunits)
2947 {
6d8fb6cf 2948 if (dump_enabled_p ())
fb85abff 2949 {
78bb46f5 2950 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2951 "permutation requires at least two vectors ");
2952 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2953 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2954 }
2955
2956 return false;
2957 }
2958
2959 /* The mask requires the next vector. */
e5cf270d 2960 while (*current_mask_element >= mask_nunits * 2)
fb85abff 2961 {
90674129 2962 if (*needs_first_vector || *mask_fixed)
fb85abff 2963 {
2964 /* We either need the first vector too or have already moved to the
48e1416a 2965 next vector. In both cases, this permutation needs three
fb85abff 2966 vectors. */
6d8fb6cf 2967 if (dump_enabled_p ())
fb85abff 2968 {
7bd765d4 2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "permutation requires at "
2971 "least three vectors ");
2972 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2973 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2974 }
2975
2976 return false;
2977 }
2978
2979 /* We move to the next vector, dropping the first one and working with
2980 the second and the third - we need to adjust the values of the mask
2981 accordingly. */
90674129 2982 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
fb85abff 2983
2984 for (i = 0; i < index; i++)
90674129 2985 mask[i] -= mask_nunits * *number_of_mask_fixes;
fb85abff 2986
90674129 2987 (*number_of_mask_fixes)++;
2988 *mask_fixed = true;
fb85abff 2989 }
2990
90674129 2991 *need_next_vector = *mask_fixed;
fb85abff 2992
2993 /* This was the last element of this mask. Start a new one. */
2994 if (index == mask_nunits - 1)
2995 {
90674129 2996 *number_of_mask_fixes = 1;
2997 *mask_fixed = false;
2998 *needs_first_vector = false;
fb85abff 2999 }
3000
3001 return true;
3002}
3003
3004
3005/* Generate vector permute statements from a list of loads in DR_CHAIN.
3006 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
678e3d6e 3007 permute statements for the SLP node NODE of the SLP instance
3008 SLP_NODE_INSTANCE. */
3009
fb85abff 3010bool
678e3d6e 3011vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
fb85abff 3012 gimple_stmt_iterator *gsi, int vf,
3013 slp_instance slp_node_instance, bool analyze_only)
3014{
678e3d6e 3015 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3017 tree mask_element_type = NULL_TREE, mask_type;
964ec9cd 3018 int i, j, k, nunits, vec_index = 0, scalar_index;
964ec9cd 3019 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3020 gimple next_scalar_stmt;
3021 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3022 int first_mask_element;
e21c468f 3023 int index, unroll_factor, current_mask_element, ncopies;
3024 unsigned char *mask;
fb85abff 3025 bool only_one_vec = false, need_next_vector = false;
3026 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
90674129 3027 int number_of_mask_fixes = 1;
3028 bool mask_fixed = false;
3029 bool needs_first_vector = false;
3754d046 3030 machine_mode mode;
fb85abff 3031
e21c468f 3032 mode = TYPE_MODE (vectype);
3033
3034 if (!can_vec_perm_p (mode, false, NULL))
fb85abff 3035 {
6d8fb6cf 3036 if (dump_enabled_p ())
fb85abff 3037 {
7bd765d4 3038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3039 "no vect permute for ");
3040 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3041 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3042 }
964ec9cd 3043 return false;
fb85abff 3044 }
3045
964ec9cd 3046 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3047 same size as the vector element being permuted. */
3cea8318 3048 mask_element_type = lang_hooks.types.type_for_mode
3049 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
fb85abff 3050 mask_type = get_vectype_for_scalar_type (mask_element_type);
fb85abff 3051 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e21c468f 3052 mask = XALLOCAVEC (unsigned char, nunits);
fb85abff 3053 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3054
3055 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3056 unrolling factor. */
48e1416a 3057 orig_vec_stmts_num = group_size *
fb85abff 3058 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3059 if (orig_vec_stmts_num == 1)
3060 only_one_vec = true;
3061
48e1416a 3062 /* Number of copies is determined by the final vectorization factor
fb85abff 3063 relatively to SLP_NODE_INSTANCE unrolling factor. */
48e1416a 3064 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
fb85abff 3065
678e3d6e 3066 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3067 return false;
3068
48e1416a 3069 /* Generate permutation masks for every NODE. Number of masks for each NODE
3070 is equal to GROUP_SIZE.
3071 E.g., we have a group of three nodes with three loads from the same
3072 location in each node, and the vector size is 4. I.e., we have a
3073 a0b0c0a1b1c1... sequence and we need to create the following vectors:
fb85abff 3074 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3075 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3076 ...
3077
964ec9cd 3078 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
48e1416a 3079 The last mask is illegal since we assume two operands for permute
282bf14c 3080 operation, and the mask element values can't be outside that range.
3081 Hence, the last mask must be converted into {2,5,5,5}.
48e1416a 3082 For the first two permutations we need the first and the second input
fb85abff 3083 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
48e1416a 3084 we need the second and the third vectors: {b1,c1,a2,b2} and
fb85abff 3085 {c2,a3,b3,c3}. */
3086
fb85abff 3087 {
3088 scalar_index = 0;
3089 index = 0;
3090 vect_stmts_counter = 0;
3091 vec_index = 0;
3092 first_vec_index = vec_index++;
3093 if (only_one_vec)
3094 second_vec_index = first_vec_index;
3095 else
3096 second_vec_index = vec_index++;
3097
3098 for (j = 0; j < unroll_factor; j++)
3099 {
3100 for (k = 0; k < group_size; k++)
3101 {
678e3d6e 3102 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
964ec9cd 3103 first_mask_element = i + j * group_size;
3104 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3105 nunits, only_one_vec, index,
3106 mask, &current_mask_element,
3107 &need_next_vector,
3108 &number_of_mask_fixes, &mask_fixed,
3109 &needs_first_vector))
3110 return false;
e5cf270d 3111 gcc_assert (current_mask_element < 2 * nunits);
964ec9cd 3112 mask[index++] = current_mask_element;
fb85abff 3113
964ec9cd 3114 if (index == nunits)
fb85abff 3115 {
678e3d6e 3116 index = 0;
e21c468f 3117 if (!can_vec_perm_p (mode, false, mask))
3118 {
6d8fb6cf 3119 if (dump_enabled_p ())
e21c468f 3120 {
7bd765d4 3121 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3122 vect_location,
3123 "unsupported vect permute { ");
e21c468f 3124 for (i = 0; i < nunits; ++i)
7bd765d4 3125 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3126 mask[i]);
3127 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
e21c468f 3128 }
3129 return false;
3130 }
3131
fb85abff 3132 if (!analyze_only)
3133 {
678e3d6e 3134 int l;
3135 tree mask_vec, *mask_elts;
3136 mask_elts = XALLOCAVEC (tree, nunits);
3137 for (l = 0; l < nunits; ++l)
3138 mask_elts[l] = build_int_cst (mask_element_type,
3139 mask[l]);
3140 mask_vec = build_vector (mask_type, mask_elts);
3141
3142 if (need_next_vector)
fb85abff 3143 {
3144 first_vec_index = second_vec_index;
3145 second_vec_index = vec_index;
3146 }
3147
f1f41a6c 3148 next_scalar_stmt
3149 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
fb85abff 3150
3151 vect_create_mask_and_perm (stmt, next_scalar_stmt,
afaef242 3152 mask_vec, first_vec_index, second_vec_index,
964ec9cd 3153 gsi, node, vectype, dr_chain,
afaef242 3154 ncopies, vect_stmts_counter++);
fb85abff 3155 }
48e1416a 3156 }
3157 }
3158 }
3159 }
fb85abff 3160
fb85abff 3161 return true;
3162}
3163
3164
3165
3166/* Vectorize SLP instance tree in postorder. */
3167
3168static bool
3169vect_schedule_slp_instance (slp_tree node, slp_instance instance,
37545e54 3170 unsigned int vectorization_factor)
fb85abff 3171{
3172 gimple stmt;
ee612634 3173 bool grouped_store, is_store;
fb85abff 3174 gimple_stmt_iterator si;
3175 stmt_vec_info stmt_info;
3176 unsigned int vec_stmts_size, nunits, group_size;
3177 tree vectype;
3178 int i;
40bcc7c2 3179 slp_tree child;
fb85abff 3180
3181 if (!node)
3182 return false;
3183
f1f41a6c 3184 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3185 vect_schedule_slp_instance (child, instance, vectorization_factor);
48e1416a 3186
f1f41a6c 3187 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3188 stmt_info = vinfo_for_stmt (stmt);
3189
3190 /* VECTYPE is the type of the destination. */
b334cbba 3191 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3192 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3193 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3194
3195 /* For each SLP instance calculate number of vector stmts to be created
282bf14c 3196 for the scalar stmts in each node of the SLP tree. Number of vector
fb85abff 3197 elements in one vector iteration is the number of scalar elements in
3198 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3199 size. */
3200 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3201
f1f41a6c 3202 if (!SLP_TREE_VEC_STMTS (node).exists ())
fb85abff 3203 {
f1f41a6c 3204 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
fb85abff 3205 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3206 }
3207
6d8fb6cf 3208 if (dump_enabled_p ())
fb85abff 3209 {
7bd765d4 3210 dump_printf_loc (MSG_NOTE,vect_location,
3211 "------>vectorizing SLP node starting from: ");
3212 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3213 dump_printf (MSG_NOTE, "\n");
48e1416a 3214 }
fb85abff 3215
3216 /* Loads should be inserted before the first load. */
3217 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
ee612634 3218 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
24c271ef 3219 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
678e3d6e 3220 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
fb85abff 3221 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
cfdcf183 3222 else if (is_pattern_stmt_p (stmt_info))
24c271ef 3223 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
fb85abff 3224 else
3225 si = gsi_for_stmt (stmt);
48e1416a 3226
d4b21757 3227 /* Stores should be inserted just before the last store. */
ee612634 3228 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
d4b21757 3229 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3230 {
3231 gimple last_store = vect_find_last_store_in_slp_instance (instance);
0bc79e04 3232 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3233 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
d4b21757 3234 si = gsi_for_stmt (last_store);
3235 }
3236
39a5d6b1 3237 /* Mark the first element of the reduction chain as reduction to properly
3238 transform the node. In the analysis phase only the last element of the
3239 chain is marked as reduction. */
ee612634 3240 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
39a5d6b1 3241 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3242 {
3243 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3244 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3245 }
3246
ee612634 3247 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
eefa05c8 3248 return is_store;
fb85abff 3249}
3250
529fcc2a 3251/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3252 For loop vectorization this is done in vectorizable_call, but for SLP
3253 it needs to be deferred until end of vect_schedule_slp, because multiple
3254 SLP instances may refer to the same scalar stmt. */
3255
3256static void
3257vect_remove_slp_scalar_calls (slp_tree node)
3258{
3259 gimple stmt, new_stmt;
3260 gimple_stmt_iterator gsi;
3261 int i;
40bcc7c2 3262 slp_tree child;
529fcc2a 3263 tree lhs;
3264 stmt_vec_info stmt_info;
3265
3266 if (!node)
3267 return;
3268
f1f41a6c 3269 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3270 vect_remove_slp_scalar_calls (child);
529fcc2a 3271
f1f41a6c 3272 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
529fcc2a 3273 {
3274 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3275 continue;
3276 stmt_info = vinfo_for_stmt (stmt);
3277 if (stmt_info == NULL
3278 || is_pattern_stmt_p (stmt_info)
3279 || !PURE_SLP_STMT (stmt_info))
3280 continue;
3281 lhs = gimple_call_lhs (stmt);
3282 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3283 set_vinfo_for_stmt (new_stmt, stmt_info);
3284 set_vinfo_for_stmt (stmt, NULL);
3285 STMT_VINFO_STMT (stmt_info) = new_stmt;
3286 gsi = gsi_for_stmt (stmt);
3287 gsi_replace (&gsi, new_stmt, false);
3288 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3289 }
3290}
fb85abff 3291
282bf14c 3292/* Generate vector code for all SLP instances in the loop/basic block. */
3293
fb85abff 3294bool
37545e54 3295vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 3296{
f1f41a6c 3297 vec<slp_instance> slp_instances;
fb85abff 3298 slp_instance instance;
678e3d6e 3299 unsigned int i, vf;
fb85abff 3300 bool is_store = false;
3301
37545e54 3302 if (loop_vinfo)
3303 {
3304 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3305 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
48e1416a 3306 }
37545e54 3307 else
3308 {
3309 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3310 vf = 1;
48e1416a 3311 }
37545e54 3312
f1f41a6c 3313 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 3314 {
3315 /* Schedule the tree of INSTANCE. */
3316 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
37545e54 3317 instance, vf);
6d8fb6cf 3318 if (dump_enabled_p ())
7bd765d4 3319 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3320 "vectorizing stmts using SLP.\n");
fb85abff 3321 }
3322
f1f41a6c 3323 FOR_EACH_VEC_ELT (slp_instances, i, instance)
eefa05c8 3324 {
3325 slp_tree root = SLP_INSTANCE_TREE (instance);
3326 gimple store;
3327 unsigned int j;
3328 gimple_stmt_iterator gsi;
3329
814c4f66 3330 /* Remove scalar call stmts. Do not do this for basic-block
3331 vectorization as not all uses may be vectorized.
3332 ??? Why should this be necessary? DCE should be able to
3333 remove the stmts itself.
3334 ??? For BB vectorization we can as well remove scalar
3335 stmts starting from the SLP tree root if they have no
3336 uses. */
3337 if (loop_vinfo)
3338 vect_remove_slp_scalar_calls (root);
529fcc2a 3339
f1f41a6c 3340 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
eefa05c8 3341 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3342 {
3343 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3344 break;
3345
0bc79e04 3346 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3347 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
eefa05c8 3348 /* Free the attached stmt_vec_info and remove the stmt. */
3349 gsi = gsi_for_stmt (store);
bc8a8451 3350 unlink_stmt_vdef (store);
eefa05c8 3351 gsi_remove (&gsi, true);
bc8a8451 3352 release_defs (store);
eefa05c8 3353 free_stmt_vec_info (store);
3354 }
3355 }
3356
fb85abff 3357 return is_store;
3358}
37545e54 3359
3360
3361/* Vectorize the basic block. */
3362
3363void
3364vect_slp_transform_bb (basic_block bb)
3365{
3366 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3367 gimple_stmt_iterator si;
3368
3369 gcc_assert (bb_vinfo);
3370
6d8fb6cf 3371 if (dump_enabled_p ())
7bd765d4 3372 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
37545e54 3373
3374 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3375 {
3376 gimple stmt = gsi_stmt (si);
3377 stmt_vec_info stmt_info;
3378
6d8fb6cf 3379 if (dump_enabled_p ())
37545e54 3380 {
7bd765d4 3381 dump_printf_loc (MSG_NOTE, vect_location,
3382 "------>SLPing statement: ");
3383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3384 dump_printf (MSG_NOTE, "\n");
37545e54 3385 }
3386
3387 stmt_info = vinfo_for_stmt (stmt);
3388 gcc_assert (stmt_info);
3389
3390 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3391 if (STMT_SLP_TYPE (stmt_info))
3392 {
3393 vect_schedule_slp (NULL, bb_vinfo);
3394 break;
3395 }
3396 }
3397
6d8fb6cf 3398 if (dump_enabled_p ())
a21425b5 3399 dump_printf_loc (MSG_NOTE, vect_location,
b055bc88 3400 "BASIC BLOCK VECTORIZED\n");
37545e54 3401
765a637f 3402 destroy_bb_vec_info (bb_vinfo);
3403}