]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
* gimple.h: Remove all includes.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
fb85abff 1/* SLP - Basic Block Vectorization
711789cc 2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
9ed99284 28#include "stor-layout.h"
fb85abff 29#include "target.h"
30#include "basic-block.h"
ce084dfc 31#include "gimple-pretty-print.h"
bc61cadb 32#include "tree-ssa-alias.h"
33#include "internal-fn.h"
34#include "gimple-expr.h"
35#include "is-a.h"
073c1fd5 36#include "gimple.h"
dcf1a1ec 37#include "gimple-iterator.h"
073c1fd5 38#include "gimple-ssa.h"
39#include "tree-phinodes.h"
40#include "ssa-iterators.h"
9ed99284 41#include "stringpool.h"
073c1fd5 42#include "tree-ssanames.h"
b9ed1410 43#include "tree-pass.h"
fb85abff 44#include "cfgloop.h"
fb85abff 45#include "expr.h"
b9ed1410 46#include "recog.h" /* FIXME: for insn_data */
fb85abff 47#include "optabs.h"
48#include "tree-vectorizer.h"
964ec9cd 49#include "langhooks.h"
fb85abff 50
37545e54 51/* Extract the location of the basic block in the source code.
52 Return the basic block location if succeed and NULL if not. */
53
36f39b2e 54source_location
37545e54 55find_bb_location (basic_block bb)
56{
57 gimple stmt = NULL;
58 gimple_stmt_iterator si;
59
60 if (!bb)
36f39b2e 61 return UNKNOWN_LOCATION;
37545e54 62
63 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
64 {
65 stmt = gsi_stmt (si);
36f39b2e 66 if (gimple_location (stmt) != UNKNOWN_LOCATION)
37545e54 67 return gimple_location (stmt);
68 }
69
36f39b2e 70 return UNKNOWN_LOCATION;
37545e54 71}
72
73
fb85abff 74/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
75
76static void
77vect_free_slp_tree (slp_tree node)
78{
b0f64919 79 int i;
40bcc7c2 80 slp_tree child;
b0f64919 81
fb85abff 82 if (!node)
83 return;
84
f1f41a6c 85 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 86 vect_free_slp_tree (child);
48e1416a 87
f1f41a6c 88 SLP_TREE_CHILDREN (node).release ();
89 SLP_TREE_SCALAR_STMTS (node).release ();
90 SLP_TREE_VEC_STMTS (node).release ();
678e3d6e 91 SLP_TREE_LOAD_PERMUTATION (node).release ();
fb85abff 92
93 free (node);
94}
95
96
97/* Free the memory allocated for the SLP instance. */
98
99void
100vect_free_slp_instance (slp_instance instance)
101{
102 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
f1f41a6c 103 SLP_INSTANCE_LOADS (instance).release ();
104 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
9f793cdf 105 free (instance);
fb85abff 106}
107
108
b0f64919 109/* Create an SLP node for SCALAR_STMTS. */
110
111static slp_tree
f1f41a6c 112vect_create_new_slp_node (vec<gimple> scalar_stmts)
b0f64919 113{
3c7a3fca 114 slp_tree node;
f1f41a6c 115 gimple stmt = scalar_stmts[0];
b0f64919 116 unsigned int nops;
117
118 if (is_gimple_call (stmt))
119 nops = gimple_call_num_args (stmt);
120 else if (is_gimple_assign (stmt))
f2104a54 121 {
122 nops = gimple_num_ops (stmt) - 1;
123 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
124 nops++;
125 }
b0f64919 126 else
127 return NULL;
128
3c7a3fca 129 node = XNEW (struct _slp_tree);
b0f64919 130 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
f1f41a6c 131 SLP_TREE_VEC_STMTS (node).create (0);
132 SLP_TREE_CHILDREN (node).create (nops);
678e3d6e 133 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
b0f64919 134
135 return node;
136}
137
138
139/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140 operand. */
f1f41a6c 141static vec<slp_oprnd_info>
b0f64919 142vect_create_oprnd_info (int nops, int group_size)
143{
144 int i;
145 slp_oprnd_info oprnd_info;
f1f41a6c 146 vec<slp_oprnd_info> oprnds_info;
b0f64919 147
f1f41a6c 148 oprnds_info.create (nops);
b0f64919 149 for (i = 0; i < nops; i++)
150 {
151 oprnd_info = XNEW (struct _slp_oprnd_info);
f1f41a6c 152 oprnd_info->def_stmts.create (group_size);
b0f64919 153 oprnd_info->first_dt = vect_uninitialized_def;
3533e58d 154 oprnd_info->first_op_type = NULL_TREE;
b0f64919 155 oprnd_info->first_pattern = false;
f1f41a6c 156 oprnds_info.quick_push (oprnd_info);
b0f64919 157 }
158
159 return oprnds_info;
160}
161
162
3c7a3fca 163/* Free operands info. */
164
b0f64919 165static void
f1f41a6c 166vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
b0f64919 167{
168 int i;
169 slp_oprnd_info oprnd_info;
170
f1f41a6c 171 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
3c7a3fca 172 {
f1f41a6c 173 oprnd_info->def_stmts.release ();
3c7a3fca 174 XDELETE (oprnd_info);
175 }
b0f64919 176
f1f41a6c 177 oprnds_info.release ();
b0f64919 178}
179
180
40bcc7c2 181/* Find the place of the data-ref in STMT in the interleaving chain that starts
182 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
183
184static int
185vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
186{
187 gimple next_stmt = first_stmt;
188 int result = 0;
189
190 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191 return -1;
192
193 do
194 {
195 if (next_stmt == stmt)
196 return result;
197 result++;
198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
199 }
200 while (next_stmt);
201
202 return -1;
203}
204
205
b0f64919 206/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
207 they are of a valid type and that they match the defs of the first stmt of
208 the SLP group (stored in OPRNDS_INFO). */
fb85abff 209
210static bool
37545e54 211vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ee6e9ba5 212 gimple stmt, bool first,
213 vec<slp_oprnd_info> *oprnds_info)
fb85abff 214{
215 tree oprnd;
216 unsigned int i, number_of_oprnds;
0eba4748 217 tree def;
fb85abff 218 gimple def_stmt;
b0f64919 219 enum vect_def_type dt = vect_uninitialized_def;
37545e54 220 struct loop *loop = NULL;
b0f64919 221 bool pattern = false;
0eba4748 222 slp_oprnd_info oprnd_info;
f2104a54 223 int op_idx = 1;
224 tree compare_rhs = NULL_TREE;
48e1416a 225
37545e54 226 if (loop_vinfo)
227 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 228
b0f64919 229 if (is_gimple_call (stmt))
b57a47d0 230 {
231 number_of_oprnds = gimple_call_num_args (stmt);
232 op_idx = 3;
233 }
f2104a54 234 else if (is_gimple_assign (stmt))
235 {
236 number_of_oprnds = gimple_num_ops (stmt) - 1;
237 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
238 number_of_oprnds++;
239 }
b0f64919 240 else
f2104a54 241 return false;
fb85abff 242
243 for (i = 0; i < number_of_oprnds; i++)
244 {
f2104a54 245 if (compare_rhs)
246 {
247 oprnd = compare_rhs;
248 compare_rhs = NULL_TREE;
249 }
250 else
251 oprnd = gimple_op (stmt, op_idx++);
252
f1f41a6c 253 oprnd_info = (*oprnds_info)[i];
fb85abff 254
f2104a54 255 if (COMPARISON_CLASS_P (oprnd))
256 {
257 compare_rhs = TREE_OPERAND (oprnd, 1);
258 oprnd = TREE_OPERAND (oprnd, 0);
259 }
260
bed8b93b 261 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
262 &def, &dt)
b0f64919 263 || (!def_stmt && dt != vect_constant_def))
fb85abff 264 {
6d8fb6cf 265 if (dump_enabled_p ())
fb85abff 266 {
7bd765d4 267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
268 "Build SLP failed: can't find def for ");
269 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 270 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 271 }
272
273 return false;
274 }
275
37545e54 276 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
282bf14c 277 from the pattern. Check that all the stmts of the node are in the
fb85abff 278 pattern. */
4c0c783a 279 if (def_stmt && gimple_bb (def_stmt)
280 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
281 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
282 && gimple_code (def_stmt) != GIMPLE_PHI))
fb85abff 283 && vinfo_for_stmt (def_stmt)
8bf58742 284 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
4c0c783a 285 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
286 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
fb85abff 287 {
b0f64919 288 pattern = true;
289 if (!first && !oprnd_info->first_pattern)
290 {
6d8fb6cf 291 if (dump_enabled_p ())
b0f64919 292 {
7bd765d4 293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
294 "Build SLP failed: some of the stmts"
295 " are in a pattern, and others are not ");
296 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 297 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b0f64919 298 }
fb85abff 299
b0f64919 300 return false;
fb85abff 301 }
302
303 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
b0f64919 304 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
fb85abff 305
f2104a54 306 if (dt == vect_unknown_def_type)
fb85abff 307 {
6d8fb6cf 308 if (dump_enabled_p ())
7bd765d4 309 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 310 "Unsupported pattern.\n");
fb85abff 311 return false;
312 }
313
314 switch (gimple_code (def_stmt))
315 {
316 case GIMPLE_PHI:
b0f64919 317 def = gimple_phi_result (def_stmt);
fb85abff 318 break;
319
320 case GIMPLE_ASSIGN:
b0f64919 321 def = gimple_assign_lhs (def_stmt);
fb85abff 322 break;
323
324 default:
6d8fb6cf 325 if (dump_enabled_p ())
7bd765d4 326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 327 "unsupported defining stmt:\n");
fb85abff 328 return false;
329 }
330 }
331
b0f64919 332 if (first)
fb85abff 333 {
b0f64919 334 oprnd_info->first_dt = dt;
335 oprnd_info->first_pattern = pattern;
3533e58d 336 oprnd_info->first_op_type = TREE_TYPE (oprnd);
fb85abff 337 }
fb85abff 338 else
339 {
b0f64919 340 /* Not first stmt of the group, check that the def-stmt/s match
341 the def-stmt/s of the first stmt. Allow different definition
342 types for reduction chains: the first stmt must be a
343 vect_reduction_def (a phi node), and the rest
344 vect_internal_def. */
345 if (((oprnd_info->first_dt != dt
346 && !(oprnd_info->first_dt == vect_reduction_def
3533e58d 347 && dt == vect_internal_def)
348 && !((oprnd_info->first_dt == vect_external_def
349 || oprnd_info->first_dt == vect_constant_def)
350 && (dt == vect_external_def
351 || dt == vect_constant_def)))
352 || !types_compatible_p (oprnd_info->first_op_type,
353 TREE_TYPE (oprnd))))
fb85abff 354 {
0eba4748 355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 357 "Build SLP failed: different types\n");
b0f64919 358
0eba4748 359 return false;
fb85abff 360 }
361 }
362
363 /* Check the types of the definitions. */
b0f64919 364 switch (dt)
fb85abff 365 {
366 case vect_constant_def:
f083cd24 367 case vect_external_def:
b0f64919 368 case vect_reduction_def:
fb85abff 369 break;
48e1416a 370
f083cd24 371 case vect_internal_def:
0eba4748 372 oprnd_info->def_stmts.quick_push (def_stmt);
fb85abff 373 break;
374
375 default:
376 /* FORNOW: Not supported. */
6d8fb6cf 377 if (dump_enabled_p ())
fb85abff 378 {
7bd765d4 379 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
380 "Build SLP failed: illegal type of def ");
381 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
78bb46f5 382 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 383 }
384
385 return false;
386 }
387 }
388
389 return true;
390}
391
392
16513aa4 393/* Verify if the scalar stmts STMTS are isomorphic, require data
394 permutation or are of unsupported types of operation. Return
395 true if they are, otherwise return false and indicate in *MATCHES
396 which stmts are not isomorphic to the first one. If MATCHES[0]
397 is false then this indicates the comparison could not be
398 carried out or the stmts will never be vectorized by SLP. */
fb85abff 399
400static bool
16513aa4 401vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
402 vec<gimple> stmts, unsigned int group_size,
403 unsigned nops, unsigned int *max_nunits,
404 unsigned int vectorization_factor, bool *matches)
fb85abff 405{
fb85abff 406 unsigned int i;
f1f41a6c 407 gimple stmt = stmts[0];
a2e70a99 408 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f2104a54 409 enum tree_code first_cond_code = ERROR_MARK;
fb85abff 410 tree lhs;
16513aa4 411 bool need_same_oprnds = false;
fb85abff 412 tree vectype, scalar_type, first_op1 = NULL_TREE;
fb85abff 413 optab optab;
414 int icode;
415 enum machine_mode optab_op2_mode;
416 enum machine_mode vec_mode;
fb85abff 417 struct data_reference *first_dr;
fb85abff 418 HOST_WIDE_INT dummy;
4db2b577 419 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
f2104a54 420 tree cond;
b0f64919 421
fb85abff 422 /* For every stmt in NODE find its def stmt/s. */
f1f41a6c 423 FOR_EACH_VEC_ELT (stmts, i, stmt)
fb85abff 424 {
16513aa4 425 matches[i] = false;
426
6d8fb6cf 427 if (dump_enabled_p ())
fb85abff 428 {
7bd765d4 429 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 431 dump_printf (MSG_NOTE, "\n");
fb85abff 432 }
433
6ea6a380 434 /* Fail to vectorize statements marked as unvectorizable. */
435 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
436 {
6d8fb6cf 437 if (dump_enabled_p ())
6ea6a380 438 {
7bd765d4 439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
440 "Build SLP failed: unvectorizable statement ");
441 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 442 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
6ea6a380 443 }
16513aa4 444 /* Fatal mismatch. */
445 matches[0] = false;
6ea6a380 446 return false;
447 }
448
fb85abff 449 lhs = gimple_get_lhs (stmt);
450 if (lhs == NULL_TREE)
451 {
6d8fb6cf 452 if (dump_enabled_p ())
fb85abff 453 {
7bd765d4 454 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
455 "Build SLP failed: not GIMPLE_ASSIGN nor "
456 "GIMPLE_CALL ");
457 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 458 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 459 }
16513aa4 460 /* Fatal mismatch. */
461 matches[0] = false;
fb85abff 462 return false;
463 }
464
f2104a54 465 if (is_gimple_assign (stmt)
466 && gimple_assign_rhs_code (stmt) == COND_EXPR
467 && (cond = gimple_assign_rhs1 (stmt))
468 && !COMPARISON_CLASS_P (cond))
469 {
6d8fb6cf 470 if (dump_enabled_p ())
f2104a54 471 {
7bd765d4 472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
473 "Build SLP failed: condition is not "
474 "comparison ");
475 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 476 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 477 }
16513aa4 478 /* Fatal mismatch. */
479 matches[0] = false;
f2104a54 480 return false;
481 }
482
48e1416a 483 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
fb85abff 484 vectype = get_vectype_for_scalar_type (scalar_type);
485 if (!vectype)
486 {
6d8fb6cf 487 if (dump_enabled_p ())
fb85abff 488 {
7bd765d4 489 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
490 "Build SLP failed: unsupported data-type ");
491 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
492 scalar_type);
78bb46f5 493 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 494 }
16513aa4 495 /* Fatal mismatch. */
496 matches[0] = false;
fb85abff 497 return false;
498 }
48e1416a 499
03dab834 500 /* In case of multiple types we need to detect the smallest type. */
501 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
37545e54 502 {
03dab834 503 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
504 if (bb_vinfo)
505 vectorization_factor = *max_nunits;
37545e54 506 }
48e1416a 507
fb85abff 508 if (is_gimple_call (stmt))
b57a47d0 509 {
510 rhs_code = CALL_EXPR;
511 if (gimple_call_internal_p (stmt)
512 || gimple_call_tail_p (stmt)
513 || gimple_call_noreturn_p (stmt)
514 || !gimple_call_nothrow_p (stmt)
515 || gimple_call_chain (stmt))
516 {
6d8fb6cf 517 if (dump_enabled_p ())
b57a47d0 518 {
7bd765d4 519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
520 "Build SLP failed: unsupported call type ");
521 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 522 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 523 }
16513aa4 524 /* Fatal mismatch. */
525 matches[0] = false;
b57a47d0 526 return false;
527 }
528 }
fb85abff 529 else
530 rhs_code = gimple_assign_rhs_code (stmt);
531
532 /* Check the operation. */
533 if (i == 0)
534 {
535 first_stmt_code = rhs_code;
536
48e1416a 537 /* Shift arguments should be equal in all the packed stmts for a
fb85abff 538 vector shift with scalar shift operand. */
539 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
540 || rhs_code == LROTATE_EXPR
541 || rhs_code == RROTATE_EXPR)
542 {
543 vec_mode = TYPE_MODE (vectype);
544
545 /* First see if we have a vector/vector shift. */
546 optab = optab_for_tree_code (rhs_code, vectype,
547 optab_vector);
548
549 if (!optab
d6bf3b14 550 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
fb85abff 551 {
552 /* No vector/vector shift, try for a vector/scalar shift. */
553 optab = optab_for_tree_code (rhs_code, vectype,
554 optab_scalar);
555
556 if (!optab)
557 {
6d8fb6cf 558 if (dump_enabled_p ())
7bd765d4 559 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 560 "Build SLP failed: no optab.\n");
16513aa4 561 /* Fatal mismatch. */
562 matches[0] = false;
fb85abff 563 return false;
564 }
d6bf3b14 565 icode = (int) optab_handler (optab, vec_mode);
fb85abff 566 if (icode == CODE_FOR_nothing)
567 {
6d8fb6cf 568 if (dump_enabled_p ())
7bd765d4 569 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
570 "Build SLP failed: "
78bb46f5 571 "op not supported by target.\n");
16513aa4 572 /* Fatal mismatch. */
573 matches[0] = false;
fb85abff 574 return false;
575 }
576 optab_op2_mode = insn_data[icode].operand[2].mode;
577 if (!VECTOR_MODE_P (optab_op2_mode))
578 {
579 need_same_oprnds = true;
580 first_op1 = gimple_assign_rhs2 (stmt);
581 }
582 }
583 }
6083c152 584 else if (rhs_code == WIDEN_LSHIFT_EXPR)
585 {
586 need_same_oprnds = true;
587 first_op1 = gimple_assign_rhs2 (stmt);
588 }
fb85abff 589 }
590 else
591 {
592 if (first_stmt_code != rhs_code
593 && (first_stmt_code != IMAGPART_EXPR
594 || rhs_code != REALPART_EXPR)
595 && (first_stmt_code != REALPART_EXPR
ad074595 596 || rhs_code != IMAGPART_EXPR)
ee612634 597 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
ad074595 598 && (first_stmt_code == ARRAY_REF
19bacd59 599 || first_stmt_code == BIT_FIELD_REF
ad074595 600 || first_stmt_code == INDIRECT_REF
601 || first_stmt_code == COMPONENT_REF
602 || first_stmt_code == MEM_REF)))
fb85abff 603 {
6d8fb6cf 604 if (dump_enabled_p ())
fb85abff 605 {
7bd765d4 606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: different operation "
608 "in stmt ");
609 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 610 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 611 }
16513aa4 612 /* Mismatch. */
613 continue;
fb85abff 614 }
48e1416a 615
616 if (need_same_oprnds
fb85abff 617 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
618 {
6d8fb6cf 619 if (dump_enabled_p ())
fb85abff 620 {
7bd765d4 621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
622 "Build SLP failed: different shift "
623 "arguments in ");
624 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 625 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 626 }
16513aa4 627 /* Mismatch. */
628 continue;
fb85abff 629 }
b57a47d0 630
631 if (rhs_code == CALL_EXPR)
632 {
f1f41a6c 633 gimple first_stmt = stmts[0];
b57a47d0 634 if (gimple_call_num_args (stmt) != nops
635 || !operand_equal_p (gimple_call_fn (first_stmt),
636 gimple_call_fn (stmt), 0)
637 || gimple_call_fntype (first_stmt)
638 != gimple_call_fntype (stmt))
639 {
6d8fb6cf 640 if (dump_enabled_p ())
b57a47d0 641 {
7bd765d4 642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
643 "Build SLP failed: different calls in ");
644 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
645 stmt, 0);
78bb46f5 646 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 647 }
16513aa4 648 /* Mismatch. */
649 continue;
b57a47d0 650 }
651 }
fb85abff 652 }
653
ee612634 654 /* Grouped store or load. */
655 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
fb85abff 656 {
657 if (REFERENCE_CLASS_P (lhs))
658 {
659 /* Store. */
16513aa4 660 ;
fb85abff 661 }
eefa05c8 662 else
663 {
664 /* Load. */
f77a1eec 665 unsigned unrolling_factor
666 = least_common_multiple
667 (*max_nunits, group_size) / group_size;
8f3e4987 668 /* FORNOW: Check that there is no gap between the loads
669 and no gap between the groups when we need to load
670 multiple groups at once.
671 ??? We should enhance this to only disallow gaps
672 inside vectors. */
f77a1eec 673 if ((unrolling_factor > 1
8f3e4987 674 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
675 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
676 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
677 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
eefa05c8 678 {
6d8fb6cf 679 if (dump_enabled_p ())
eefa05c8 680 {
7bd765d4 681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "Build SLP failed: grouped "
683 "loads have gaps ");
684 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
685 stmt, 0);
78bb46f5 686 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 687 }
16513aa4 688 /* Fatal mismatch. */
689 matches[0] = false;
eefa05c8 690 return false;
691 }
792672f0 692
eefa05c8 693 /* Check that the size of interleaved loads group is not
694 greater than the SLP group size. */
f77a1eec 695 unsigned ncopies
696 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
24c271ef 697 if (loop_vinfo
8f3e4987 698 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
699 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
700 - GROUP_GAP (vinfo_for_stmt (stmt)))
701 > ncopies * group_size))
eefa05c8 702 {
6d8fb6cf 703 if (dump_enabled_p ())
eefa05c8 704 {
7bd765d4 705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706 "Build SLP failed: the number "
707 "of interleaved loads is greater than "
708 "the SLP group size ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 stmt, 0);
78bb46f5 711 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 712 }
16513aa4 713 /* Fatal mismatch. */
714 matches[0] = false;
eefa05c8 715 return false;
716 }
717
4db2b577 718 old_first_load = first_load;
21009880 719 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
eefa05c8 720 if (prev_first_load)
721 {
722 /* Check that there are no loads from different interleaving
16513aa4 723 chains in the same node. */
724 if (prev_first_load != first_load)
7bd765d4 725 {
6d8fb6cf 726 if (dump_enabled_p ())
eefa05c8 727 {
7bd765d4 728 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
729 vect_location,
730 "Build SLP failed: different "
731 "interleaving chains in one node ");
732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
733 stmt, 0);
78bb46f5 734 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 735 }
16513aa4 736 /* Mismatch. */
737 continue;
eefa05c8 738 }
739 }
740 else
741 prev_first_load = first_load;
48e1416a 742
4db2b577 743 /* In some cases a group of loads is just the same load
744 repeated N times. Only analyze its cost once. */
745 if (first_load == stmt && old_first_load != first_load)
fb85abff 746 {
747 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
0822b158 748 if (vect_supportable_dr_alignment (first_dr, false)
fb85abff 749 == dr_unaligned_unsupported)
750 {
6d8fb6cf 751 if (dump_enabled_p ())
fb85abff 752 {
7bd765d4 753 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
754 vect_location,
755 "Build SLP failed: unsupported "
756 "unaligned load ");
757 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
758 stmt, 0);
78bb46f5 759 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 760 }
16513aa4 761 /* Fatal mismatch. */
762 matches[0] = false;
fb85abff 763 return false;
764 }
fb85abff 765 }
fb85abff 766 }
ee612634 767 } /* Grouped access. */
fb85abff 768 else
769 {
770 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
771 {
ee612634 772 /* Not grouped load. */
6d8fb6cf 773 if (dump_enabled_p ())
fb85abff 774 {
7bd765d4 775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: not grouped load ");
777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 778 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 779 }
780
ee612634 781 /* FORNOW: Not grouped loads are not supported. */
16513aa4 782 /* Fatal mismatch. */
783 matches[0] = false;
fb85abff 784 return false;
785 }
786
787 /* Not memory operation. */
788 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f2104a54 789 && TREE_CODE_CLASS (rhs_code) != tcc_unary
b57a47d0 790 && rhs_code != COND_EXPR
791 && rhs_code != CALL_EXPR)
fb85abff 792 {
6d8fb6cf 793 if (dump_enabled_p ())
fb85abff 794 {
7bd765d4 795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796 "Build SLP failed: operation");
797 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 799 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 800 }
16513aa4 801 /* Fatal mismatch. */
802 matches[0] = false;
fb85abff 803 return false;
804 }
805
f2104a54 806 if (rhs_code == COND_EXPR)
807 {
808 tree cond_expr = gimple_assign_rhs1 (stmt);
809
810 if (i == 0)
811 first_cond_code = TREE_CODE (cond_expr);
812 else if (first_cond_code != TREE_CODE (cond_expr))
813 {
6d8fb6cf 814 if (dump_enabled_p ())
f2104a54 815 {
7bd765d4 816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: different"
818 " operation");
819 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
820 stmt, 0);
78bb46f5 821 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 822 }
16513aa4 823 /* Mismatch. */
824 continue;
f2104a54 825 }
826 }
fb85abff 827 }
16513aa4 828
829 matches[i] = true;
830 }
831
832 for (i = 0; i < group_size; ++i)
833 if (!matches[i])
834 return false;
835
836 return true;
837}
838
839/* Recursively build an SLP tree starting from NODE.
840 Fail (and return a value not equal to zero) if def-stmts are not
841 isomorphic, require data permutation or are of unsupported types of
842 operation. Otherwise, return 0.
843 The value returned is the depth in the SLP tree where a mismatch
844 was found. */
845
846static bool
847vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
848 slp_tree *node, unsigned int group_size,
849 unsigned int *max_nunits,
850 vec<slp_tree> *loads,
851 unsigned int vectorization_factor,
852 bool *matches, unsigned *npermutes)
853{
854 unsigned nops, i, this_npermutes = 0;
855 gimple stmt;
856
857 if (!matches)
858 matches = XALLOCAVEC (bool, group_size);
859 if (!npermutes)
860 npermutes = &this_npermutes;
861
862 matches[0] = false;
863
864 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
865 if (is_gimple_call (stmt))
866 nops = gimple_call_num_args (stmt);
867 else if (is_gimple_assign (stmt))
868 {
869 nops = gimple_num_ops (stmt) - 1;
870 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
871 nops++;
fb85abff 872 }
16513aa4 873 else
874 return false;
875
876 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
877 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
878 max_nunits, vectorization_factor, matches))
879 return false;
fb85abff 880
16513aa4 881 /* If the SLP node is a load, terminate the recursion. */
882 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
883 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
fb85abff 884 {
f1f41a6c 885 loads->safe_push (*node);
fb85abff 886 return true;
887 }
888
16513aa4 889 /* Get at the operands, verifying they are compatible. */
890 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
891 slp_oprnd_info oprnd_info;
892 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
893 {
894 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
895 stmt, (i == 0), &oprnds_info))
896 {
897 vect_free_oprnd_info (oprnds_info);
898 return false;
899 }
900 }
901
902 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
903
48e1416a 904 /* Create SLP_TREE nodes for the definition node/s. */
f1f41a6c 905 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
fb85abff 906 {
b0f64919 907 slp_tree child;
16513aa4 908 unsigned old_nloads = loads->length ();
909 unsigned old_max_nunits = *max_nunits;
48e1416a 910
b0f64919 911 if (oprnd_info->first_dt != vect_internal_def)
912 continue;
fb85abff 913
b0f64919 914 child = vect_create_new_slp_node (oprnd_info->def_stmts);
16513aa4 915 if (!child)
916 {
f1f41a6c 917 vect_free_oprnd_info (oprnds_info);
16513aa4 918 return false;
b0f64919 919 }
48e1416a 920
16513aa4 921 bool *matches = XALLOCAVEC (bool, group_size);
922 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
923 group_size, max_nunits, loads,
924 vectorization_factor, matches, npermutes))
925 {
926 oprnd_info->def_stmts = vNULL;
927 SLP_TREE_CHILDREN (*node).quick_push (child);
928 continue;
929 }
930
931 /* If the SLP build for operand zero failed and operand zero
932 and one can be commutated try that for the scalar stmts
933 that failed the match. */
934 if (i == 0
935 /* A first scalar stmt mismatch signals a fatal mismatch. */
936 && matches[0]
937 /* ??? For COND_EXPRs we can swap the comparison operands
938 as well as the arms under some constraints. */
939 && nops == 2
940 && oprnds_info[1]->first_dt == vect_internal_def
941 && is_gimple_assign (stmt)
942 && commutative_tree_code (gimple_assign_rhs_code (stmt))
943 /* Do so only if the number of not successful permutes was nor more
944 than a cut-ff as re-trying the recursive match on
945 possibly each level of the tree would expose exponential
946 behavior. */
947 && *npermutes < 4)
948 {
949 /* Roll back. */
950 *max_nunits = old_max_nunits;
951 loads->truncate (old_nloads);
952 /* Swap mismatched definition stmts. */
953 for (unsigned j = 0; j < group_size; ++j)
954 if (!matches[j])
955 {
956 gimple tem = oprnds_info[0]->def_stmts[j];
957 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
958 oprnds_info[1]->def_stmts[j] = tem;
959 }
960 /* And try again ... */
961 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
962 group_size, max_nunits, loads,
963 vectorization_factor,
964 matches, npermutes))
965 {
966 oprnd_info->def_stmts = vNULL;
967 SLP_TREE_CHILDREN (*node).quick_push (child);
968 continue;
969 }
970
971 ++*npermutes;
972 }
973
974 oprnd_info->def_stmts = vNULL;
975 vect_free_slp_tree (child);
976 vect_free_oprnd_info (oprnds_info);
977 return false;
fb85abff 978 }
979
f1f41a6c 980 vect_free_oprnd_info (oprnds_info);
fb85abff 981 return true;
982}
983
7bd765d4 984/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
fb85abff 985
986static void
7bd765d4 987vect_print_slp_tree (int dump_kind, slp_tree node)
fb85abff 988{
989 int i;
990 gimple stmt;
40bcc7c2 991 slp_tree child;
fb85abff 992
993 if (!node)
994 return;
995
7bd765d4 996 dump_printf (dump_kind, "node ");
f1f41a6c 997 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 998 {
7bd765d4 999 dump_printf (dump_kind, "\n\tstmt %d ", i);
1000 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
fb85abff 1001 }
7bd765d4 1002 dump_printf (dump_kind, "\n");
fb85abff 1003
f1f41a6c 1004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1005 vect_print_slp_tree (dump_kind, child);
fb85abff 1006}
1007
1008
48e1416a 1009/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1010 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
282bf14c 1011 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
fb85abff 1012 stmts in NODE are to be marked. */
1013
1014static void
1015vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1016{
1017 int i;
1018 gimple stmt;
40bcc7c2 1019 slp_tree child;
fb85abff 1020
1021 if (!node)
1022 return;
1023
f1f41a6c 1024 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1025 if (j < 0 || i == j)
1026 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1027
f1f41a6c 1028 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1029 vect_mark_slp_stmts (child, mark, j);
fb85abff 1030}
1031
1032
37545e54 1033/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1034
1035static void
1036vect_mark_slp_stmts_relevant (slp_tree node)
1037{
1038 int i;
1039 gimple stmt;
1040 stmt_vec_info stmt_info;
40bcc7c2 1041 slp_tree child;
37545e54 1042
1043 if (!node)
1044 return;
1045
f1f41a6c 1046 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1047 {
1048 stmt_info = vinfo_for_stmt (stmt);
48e1416a 1049 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
37545e54 1050 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1051 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1052 }
1053
f1f41a6c 1054 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1055 vect_mark_slp_stmts_relevant (child);
37545e54 1056}
1057
1058
eefa05c8 1059/* Rearrange the statements of NODE according to PERMUTATION. */
1060
1061static void
1062vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
678e3d6e 1063 vec<unsigned> permutation)
eefa05c8 1064{
1065 gimple stmt;
f1f41a6c 1066 vec<gimple> tmp_stmts;
40bcc7c2 1067 unsigned int i;
1068 slp_tree child;
eefa05c8 1069
f1f41a6c 1070 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1071 vect_slp_rearrange_stmts (child, group_size, permutation);
eefa05c8 1072
f1f41a6c 1073 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1074 tmp_stmts.create (group_size);
40bcc7c2 1075 tmp_stmts.quick_grow_cleared (group_size);
eefa05c8 1076
f1f41a6c 1077 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
40bcc7c2 1078 tmp_stmts[permutation[i]] = stmt;
eefa05c8 1079
f1f41a6c 1080 SLP_TREE_SCALAR_STMTS (node).release ();
eefa05c8 1081 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1082}
1083
1084
678e3d6e 1085/* Check if the required load permutations in the SLP instance
1086 SLP_INSTN are supported. */
fb85abff 1087
1088static bool
678e3d6e 1089vect_supported_load_permutation_p (slp_instance slp_instn)
fb85abff 1090{
678e3d6e 1091 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1092 unsigned int i, j, k, next;
e8332698 1093 sbitmap load_index;
16513aa4 1094 slp_tree node;
1095 gimple stmt, load, next_load, first_load;
24c271ef 1096 struct data_reference *dr;
fb85abff 1097
6d8fb6cf 1098 if (dump_enabled_p ())
fb85abff 1099 {
7bd765d4 1100 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
678e3d6e 1101 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1102 if (node->load_permutation.exists ())
1103 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1104 dump_printf (MSG_NOTE, "%d ", next);
1105 else
1106 for (i = 0; i < group_size; ++i)
1107 dump_printf (MSG_NOTE, "%d ", i);
78bb46f5 1108 dump_printf (MSG_NOTE, "\n");
fb85abff 1109 }
1110
eefa05c8 1111 /* In case of reduction every load permutation is allowed, since the order
1112 of the reduction statements is not important (as opposed to the case of
ee612634 1113 grouped stores). The only condition we need to check is that all the
eefa05c8 1114 load nodes are of the same size and have the same permutation (and then
1115 rearrange all the nodes of the SLP instance according to this
1116 permutation). */
1117
1118 /* Check that all the load nodes are of the same size. */
678e3d6e 1119 /* ??? Can't we assert this? */
f1f41a6c 1120 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
16513aa4 1121 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1122 return false;
a2e70a99 1123
eefa05c8 1124 node = SLP_INSTANCE_TREE (slp_instn);
f1f41a6c 1125 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
eefa05c8 1126
39a5d6b1 1127 /* Reduction (there are no data-refs in the root).
1128 In reduction chain the order of the loads is important. */
1129 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1130 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1131 {
678e3d6e 1132 slp_tree load;
1133 unsigned int lidx;
eefa05c8 1134
678e3d6e 1135 /* Compare all the permutation sequences to the first one. We know
1136 that at least one load is permuted. */
1137 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1138 if (!node->load_permutation.exists ())
1139 return false;
1140 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1141 {
1142 if (!load->load_permutation.exists ())
1143 return false;
1144 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1145 if (lidx != node->load_permutation[j])
1146 return false;
1147 }
0ede2ac4 1148
678e3d6e 1149 /* Check that the loads in the first sequence are different and there
1150 are no gaps between them. */
1151 load_index = sbitmap_alloc (group_size);
1152 bitmap_clear (load_index);
1153 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1154 {
1155 if (bitmap_bit_p (load_index, lidx))
1156 {
1157 sbitmap_free (load_index);
1158 return false;
1159 }
1160 bitmap_set_bit (load_index, lidx);
1161 }
1162 for (i = 0; i < group_size; i++)
1163 if (!bitmap_bit_p (load_index, i))
1164 {
1165 sbitmap_free (load_index);
1166 return false;
1167 }
1168 sbitmap_free (load_index);
1169
1170 /* This permutation is valid for reduction. Since the order of the
1171 statements in the nodes is not important unless they are memory
1172 accesses, we can rearrange the statements in all the nodes
1173 according to the order of the loads. */
1174 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1175 node->load_permutation);
1176
1177 /* We are done, no actual permutations need to be generated. */
1178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1179 SLP_TREE_LOAD_PERMUTATION (node).release ();
1180 return true;
eefa05c8 1181 }
1182
24c271ef 1183 /* In basic block vectorization we allow any subchain of an interleaving
1184 chain.
1185 FORNOW: not supported in loop SLP because of realignment compications. */
678e3d6e 1186 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
24c271ef 1187 {
678e3d6e 1188 /* Check that for every node in the instance the loads
1189 form a subchain. */
f1f41a6c 1190 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
24c271ef 1191 {
1192 next_load = NULL;
f1f41a6c 1193 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
24c271ef 1194 {
24c271ef 1195 if (j != 0 && next_load != load)
678e3d6e 1196 return false;
24c271ef 1197 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1198 }
24c271ef 1199 }
1200
1201 /* Check that the alignment of the first load in every subchain, i.e.,
678e3d6e 1202 the first statement in every load node, is supported.
1203 ??? This belongs in alignment checking. */
1204 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1205 {
1206 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1207 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1208 {
1209 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1210 if (vect_supportable_dr_alignment (dr, false)
1211 == dr_unaligned_unsupported)
1212 {
1213 if (dump_enabled_p ())
1214 {
1215 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1216 vect_location,
1217 "unsupported unaligned load ");
1218 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1219 first_load, 0);
78bb46f5 1220 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
678e3d6e 1221 }
1222 return false;
1223 }
1224 }
1225 }
24c271ef 1226
678e3d6e 1227 /* We are done, no actual permutations need to be generated. */
1228 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1229 SLP_TREE_LOAD_PERMUTATION (node).release ();
1230 return true;
24c271ef 1231 }
1232
48e1416a 1233 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1234 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
eefa05c8 1235 well (unless it's reduction). */
678e3d6e 1236 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
fb85abff 1237 return false;
678e3d6e 1238 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1239 if (!node->load_permutation.exists ())
1240 return false;
fb85abff 1241
e8332698 1242 load_index = sbitmap_alloc (group_size);
53c5d9d4 1243 bitmap_clear (load_index);
678e3d6e 1244 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1245 {
1246 unsigned int lidx = node->load_permutation[0];
1247 if (bitmap_bit_p (load_index, lidx))
1248 {
1249 sbitmap_free (load_index);
1250 return false;
1251 }
1252 bitmap_set_bit (load_index, lidx);
1253 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1254 if (k != lidx)
1255 {
1256 sbitmap_free (load_index);
1257 return false;
1258 }
fb85abff 1259 }
678e3d6e 1260 for (i = 0; i < group_size; i++)
1261 if (!bitmap_bit_p (load_index, i))
cae603e9 1262 {
1263 sbitmap_free (load_index);
1264 return false;
1265 }
e8332698 1266 sbitmap_free (load_index);
fb85abff 1267
678e3d6e 1268 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1269 if (node->load_permutation.exists ()
1270 && !vect_transform_slp_perm_load
1271 (node, vNULL, NULL,
1272 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1273 return false;
1274 return true;
fb85abff 1275}
1276
1277
48e1416a 1278/* Find the first load in the loop that belongs to INSTANCE.
fb85abff 1279 When loads are in several SLP nodes, there can be a case in which the first
48e1416a 1280 load does not appear in the first SLP node to be transformed, causing
282bf14c 1281 incorrect order of statements. Since we generate all the loads together,
fb85abff 1282 they must be inserted before the first load of the SLP instance and not
1283 before the first load of the first node of the instance. */
282bf14c 1284
48e1416a 1285static gimple
1286vect_find_first_load_in_slp_instance (slp_instance instance)
fb85abff 1287{
1288 int i, j;
1289 slp_tree load_node;
1290 gimple first_load = NULL, load;
1291
f1f41a6c 1292 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1293 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
fb85abff 1294 first_load = get_earlier_stmt (load, first_load);
48e1416a 1295
fb85abff 1296 return first_load;
1297}
1298
1299
d4b21757 1300/* Find the last store in SLP INSTANCE. */
282bf14c 1301
d4b21757 1302static gimple
1303vect_find_last_store_in_slp_instance (slp_instance instance)
1304{
1305 int i;
1306 slp_tree node;
1307 gimple last_store = NULL, store;
1308
1309 node = SLP_INSTANCE_TREE (instance);
f1f41a6c 1310 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
d4b21757 1311 last_store = get_later_stmt (store, last_store);
1312
1313 return last_store;
1314}
1315
ee6e9ba5 1316/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1317
1318static void
1319vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1320 slp_instance instance, slp_tree node,
1321 stmt_vector_for_cost *prologue_cost_vec,
1322 unsigned ncopies_for_cost)
1323{
1324 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1325
1326 unsigned i;
1327 slp_tree child;
1328 gimple stmt, s;
1329 stmt_vec_info stmt_info;
1330 tree lhs;
1331 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1332
1333 /* Recurse down the SLP tree. */
1334 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1335 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1336 instance, child, prologue_cost_vec,
1337 ncopies_for_cost);
1338
1339 /* Look at the first scalar stmt to determine the cost. */
1340 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1341 stmt_info = vinfo_for_stmt (stmt);
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1343 {
1344 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1345 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1346 vect_uninitialized_def,
1347 node, prologue_cost_vec, body_cost_vec);
1348 else
1349 {
1350 int i;
1351 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1352 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1353 node, prologue_cost_vec, body_cost_vec);
1354 /* If the load is permuted record the cost for the permutation.
1355 ??? Loads from multiple chains are let through here only
1356 for a single special case involving complex numbers where
1357 in the end no permutation is necessary. */
1358 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1359 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1360 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1361 && vect_get_place_in_interleaving_chain
1362 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1363 {
1364 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1365 stmt_info, 0, vect_body);
1366 break;
1367 }
1368 }
1369 }
1370 else
1371 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1372 stmt_info, 0, vect_body);
1373
1374 /* Scan operands and account for prologue cost of constants/externals.
1375 ??? This over-estimates cost for multiple uses and should be
1376 re-engineered. */
1377 lhs = gimple_get_lhs (stmt);
1378 for (i = 0; i < gimple_num_ops (stmt); ++i)
1379 {
1380 tree def, op = gimple_op (stmt, i);
1381 gimple def_stmt;
1382 enum vect_def_type dt;
1383 if (!op || op == lhs)
1384 continue;
1385 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1386 &def_stmt, &def, &dt)
1387 && (dt == vect_constant_def || dt == vect_external_def))
1388 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1389 stmt_info, 0, vect_prologue);
1390 }
1391}
1392
1393/* Compute the cost for the SLP instance INSTANCE. */
1394
1395static void
1396vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1397 slp_instance instance, unsigned nunits)
1398{
1399 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1400 unsigned ncopies_for_cost;
1401 stmt_info_for_cost *si;
1402 unsigned i;
1403
1404 /* Calculate the number of vector stmts to create based on the unrolling
1405 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1406 GROUP_SIZE / NUNITS otherwise. */
1407 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1408 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1409
1410 prologue_cost_vec.create (10);
1411 body_cost_vec.create (10);
1412 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1413 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1414 instance, SLP_INSTANCE_TREE (instance),
1415 &prologue_cost_vec, ncopies_for_cost);
1416
1417 /* Record the prologue costs, which were delayed until we were
1418 sure that SLP was successful. Unlike the body costs, we know
1419 the final values now regardless of the loop vectorization factor. */
1420 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1421 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1422 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1423 {
1424 struct _stmt_vec_info *stmt_info
1425 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1426 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1427 si->misalign, vect_prologue);
1428 }
1429
1430 prologue_cost_vec.release ();
1431}
d4b21757 1432
ee612634 1433/* Analyze an SLP instance starting from a group of grouped stores. Call
48e1416a 1434 vect_build_slp_tree to build a tree of packed stmts if possible.
fb85abff 1435 Return FALSE if it's impossible to SLP any stmt in the loop. */
1436
1437static bool
37545e54 1438vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1439 gimple stmt)
fb85abff 1440{
1441 slp_instance new_instance;
b0f64919 1442 slp_tree node;
21009880 1443 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
fb85abff 1444 unsigned int unrolling_factor = 1, nunits;
eefa05c8 1445 tree vectype, scalar_type = NULL_TREE;
fb85abff 1446 gimple next;
f018d957 1447 unsigned int vectorization_factor = 0;
ee6e9ba5 1448 int i;
fb85abff 1449 unsigned int max_nunits = 0;
f1f41a6c 1450 vec<slp_tree> loads;
eefa05c8 1451 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
f1f41a6c 1452 vec<gimple> scalar_stmts;
eefa05c8 1453
39a5d6b1 1454 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1455 {
39a5d6b1 1456 if (dr)
1457 {
1458 scalar_type = TREE_TYPE (DR_REF (dr));
1459 vectype = get_vectype_for_scalar_type (scalar_type);
1460 }
1461 else
1462 {
1463 gcc_assert (loop_vinfo);
1464 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1465 }
1466
21009880 1467 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
eefa05c8 1468 }
1469 else
1470 {
1471 gcc_assert (loop_vinfo);
1472 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
f1f41a6c 1473 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
eefa05c8 1474 }
48e1416a 1475
fb85abff 1476 if (!vectype)
1477 {
6d8fb6cf 1478 if (dump_enabled_p ())
fb85abff 1479 {
7bd765d4 1480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1481 "Build SLP failed: unsupported data-type ");
1482 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
78bb46f5 1483 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1484 }
eefa05c8 1485
fb85abff 1486 return false;
1487 }
1488
1489 nunits = TYPE_VECTOR_SUBPARTS (vectype);
37545e54 1490 if (loop_vinfo)
1491 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1492 else
37545e54 1493 vectorization_factor = nunits;
1494
37545e54 1495 /* Calculate the unrolling factor. */
1496 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1497 if (unrolling_factor != 1 && !loop_vinfo)
1498 {
6d8fb6cf 1499 if (dump_enabled_p ())
78bb46f5 1500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1501 "Build SLP failed: unrolling required in basic"
78bb46f5 1502 " block SLP\n");
48e1416a 1503
37545e54 1504 return false;
1505 }
1506
ee612634 1507 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
f1f41a6c 1508 scalar_stmts.create (group_size);
fb85abff 1509 next = stmt;
39a5d6b1 1510 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 1511 {
eefa05c8 1512 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1513 while (next)
1514 {
f2104a54 1515 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1516 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
f1f41a6c 1517 scalar_stmts.safe_push (
1518 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f2104a54 1519 else
f1f41a6c 1520 scalar_stmts.safe_push (next);
21009880 1521 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
eefa05c8 1522 }
1523 }
1524 else
1525 {
1526 /* Collect reduction statements. */
f1f41a6c 1527 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1528 for (i = 0; reductions.iterate (i, &next); i++)
1529 scalar_stmts.safe_push (next);
fb85abff 1530 }
1531
b0f64919 1532 node = vect_create_new_slp_node (scalar_stmts);
fb85abff 1533
f1f41a6c 1534 loads.create (group_size);
fb85abff 1535
1536 /* Build the tree for the SLP instance. */
48e1416a 1537 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
0eba4748 1538 &max_nunits, &loads,
16513aa4 1539 vectorization_factor, NULL, NULL))
fb85abff 1540 {
03dab834 1541 /* Calculate the unrolling factor based on the smallest type. */
fb85abff 1542 if (max_nunits > nunits)
1543 unrolling_factor = least_common_multiple (max_nunits, group_size)
1544 / group_size;
48e1416a 1545
03dab834 1546 if (unrolling_factor != 1 && !loop_vinfo)
1547 {
6d8fb6cf 1548 if (dump_enabled_p ())
78bb46f5 1549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1550 "Build SLP failed: unrolling required in basic"
78bb46f5 1551 " block SLP\n");
9f793cdf 1552 vect_free_slp_tree (node);
f1f41a6c 1553 loads.release ();
03dab834 1554 return false;
1555 }
1556
1557 /* Create a new SLP instance. */
1558 new_instance = XNEW (struct _slp_instance);
1559 SLP_INSTANCE_TREE (new_instance) = node;
1560 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
fb85abff 1561 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ee6e9ba5 1562 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
fb85abff 1563 SLP_INSTANCE_LOADS (new_instance) = loads;
1564 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
0eba4748 1565
1566 /* Compute the load permutation. */
1567 slp_tree load_node;
1568 bool loads_permuted = false;
0eba4748 1569 FOR_EACH_VEC_ELT (loads, i, load_node)
1570 {
678e3d6e 1571 vec<unsigned> load_permutation;
0eba4748 1572 int j;
16513aa4 1573 gimple load, first_stmt;
678e3d6e 1574 bool this_load_permuted = false;
1575 load_permutation.create (group_size);
16513aa4 1576 first_stmt = GROUP_FIRST_ELEMENT
1577 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
0eba4748 1578 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1579 {
16513aa4 1580 int load_place
1581 = vect_get_place_in_interleaving_chain (load, first_stmt);
1582 gcc_assert (load_place != -1);
1583 if (load_place != j)
678e3d6e 1584 this_load_permuted = true;
0eba4748 1585 load_permutation.safe_push (load_place);
1586 }
678e3d6e 1587 if (!this_load_permuted)
1588 {
1589 load_permutation.release ();
1590 continue;
1591 }
1592 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1593 loads_permuted = true;
0eba4748 1594 }
24c271ef 1595
1596 if (loads_permuted)
fb85abff 1597 {
678e3d6e 1598 if (!vect_supported_load_permutation_p (new_instance))
fb85abff 1599 {
6d8fb6cf 1600 if (dump_enabled_p ())
fb85abff 1601 {
78bb46f5 1602 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1603 "Build SLP failed: unsupported load "
1604 "permutation ");
1605 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 1606 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1607 }
fb85abff 1608 vect_free_slp_instance (new_instance);
1609 return false;
1610 }
1611
1612 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
678e3d6e 1613 = vect_find_first_load_in_slp_instance (new_instance);
fb85abff 1614 }
fb85abff 1615
ee6e9ba5 1616 /* Compute the costs of this SLP instance. */
1617 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1618 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
f97dec81 1619
37545e54 1620 if (loop_vinfo)
f1f41a6c 1621 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
37545e54 1622 else
f1f41a6c 1623 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
48e1416a 1624
6d8fb6cf 1625 if (dump_enabled_p ())
7bd765d4 1626 vect_print_slp_tree (MSG_NOTE, node);
fb85abff 1627
1628 return true;
1629 }
1630
1631 /* Failed to SLP. */
1632 /* Free the allocated memory. */
1633 vect_free_slp_tree (node);
f1f41a6c 1634 loads.release ();
48e1416a 1635
37545e54 1636 return false;
fb85abff 1637}
1638
1639
282bf14c 1640/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
fb85abff 1641 trees of packed scalar stmts if SLP is possible. */
1642
1643bool
37545e54 1644vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1645{
1646 unsigned int i;
f1f41a6c 1647 vec<gimple> grouped_stores;
1e094109 1648 vec<gimple> reductions = vNULL;
1649 vec<gimple> reduc_chains = vNULL;
39a5d6b1 1650 gimple first_element;
37545e54 1651 bool ok = false;
fb85abff 1652
6d8fb6cf 1653 if (dump_enabled_p ())
78bb46f5 1654 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
fb85abff 1655
37545e54 1656 if (loop_vinfo)
eefa05c8 1657 {
ee612634 1658 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
39a5d6b1 1659 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
eefa05c8 1660 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1661 }
37545e54 1662 else
ee612634 1663 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
48e1416a 1664
ee612634 1665 /* Find SLP sequences starting from groups of grouped stores. */
f1f41a6c 1666 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
39a5d6b1 1667 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
37545e54 1668 ok = true;
fb85abff 1669
48e1416a 1670 if (bb_vinfo && !ok)
37545e54 1671 {
6d8fb6cf 1672 if (dump_enabled_p ())
7bd765d4 1673 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 1674 "Failed to SLP the basic block.\n");
37545e54 1675
1676 return false;
1677 }
fb85abff 1678
39a5d6b1 1679 if (loop_vinfo
f1f41a6c 1680 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
39a5d6b1 1681 {
1682 /* Find SLP sequences starting from reduction chains. */
f1f41a6c 1683 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
39a5d6b1 1684 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1685 ok = true;
1686 else
1687 return false;
1688
1689 /* Don't try to vectorize SLP reductions if reduction chain was
1690 detected. */
1691 return ok;
1692 }
1693
eefa05c8 1694 /* Find SLP sequences starting from groups of reductions. */
f1f41a6c 1695 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1696 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
eefa05c8 1697 ok = true;
1698
fb85abff 1699 return true;
1700}
1701
1702
1703/* For each possible SLP instance decide whether to SLP it and calculate overall
bc937a44 1704 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1705 least one instance. */
fb85abff 1706
bc937a44 1707bool
fb85abff 1708vect_make_slp_decision (loop_vec_info loop_vinfo)
1709{
1710 unsigned int i, unrolling_factor = 1;
f1f41a6c 1711 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1712 slp_instance instance;
1713 int decided_to_slp = 0;
1714
6d8fb6cf 1715 if (dump_enabled_p ())
78bb46f5 1716 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1717 "\n");
fb85abff 1718
f1f41a6c 1719 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1720 {
1721 /* FORNOW: SLP if you can. */
1722 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1723 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1724
282bf14c 1725 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
48e1416a 1726 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
282bf14c 1727 loop-based vectorization. Such stmts will be marked as HYBRID. */
fb85abff 1728 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1729 decided_to_slp++;
1730 }
1731
1732 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1733
6d8fb6cf 1734 if (decided_to_slp && dump_enabled_p ())
b055bc88 1735 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1736 "Decided to SLP %d instances. Unrolling factor %d\n",
7bd765d4 1737 decided_to_slp, unrolling_factor);
bc937a44 1738
1739 return (decided_to_slp > 0);
fb85abff 1740}
1741
1742
1743/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
282bf14c 1744 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
fb85abff 1745
1746static void
1747vect_detect_hybrid_slp_stmts (slp_tree node)
1748{
1749 int i;
f1f41a6c 1750 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1751 gimple stmt = stmts[0];
fb85abff 1752 imm_use_iterator imm_iter;
1753 gimple use_stmt;
a1123047 1754 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
40bcc7c2 1755 slp_tree child;
a1123047 1756 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1757 struct loop *loop = NULL;
1758 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1759 basic_block bb = NULL;
fb85abff 1760
1761 if (!node)
1762 return;
1763
a1123047 1764 if (loop_vinfo)
1765 loop = LOOP_VINFO_LOOP (loop_vinfo);
1766 else
1767 bb = BB_VINFO_BB (bb_vinfo);
1768
f1f41a6c 1769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1770 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1771 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1772 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
a1123047 1773 if (gimple_bb (use_stmt)
1774 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1775 || bb == gimple_bb (use_stmt))
1776 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
07be02da 1777 && !STMT_SLP_TYPE (stmt_vinfo)
1778 && (STMT_VINFO_RELEVANT (stmt_vinfo)
eefa05c8 1779 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
a1123047 1780 && !(gimple_code (use_stmt) == GIMPLE_PHI
1781 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1782 == vect_reduction_def))
fb85abff 1783 vect_mark_slp_stmts (node, hybrid, i);
1784
f1f41a6c 1785 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1786 vect_detect_hybrid_slp_stmts (child);
fb85abff 1787}
1788
1789
1790/* Find stmts that must be both vectorized and SLPed. */
1791
1792void
1793vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1794{
1795 unsigned int i;
f1f41a6c 1796 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1797 slp_instance instance;
1798
6d8fb6cf 1799 if (dump_enabled_p ())
78bb46f5 1800 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1801 "\n");
fb85abff 1802
f1f41a6c 1803 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1804 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1805}
1806
37545e54 1807
1808/* Create and initialize a new bb_vec_info struct for BB, as well as
1809 stmt_vec_info structs for all the stmts in it. */
48e1416a 1810
37545e54 1811static bb_vec_info
1812new_bb_vec_info (basic_block bb)
1813{
1814 bb_vec_info res = NULL;
1815 gimple_stmt_iterator gsi;
1816
1817 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1818 BB_VINFO_BB (res) = bb;
1819
1820 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1821 {
1822 gimple stmt = gsi_stmt (gsi);
1823 gimple_set_uid (stmt, 0);
1824 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1825 }
1826
f1f41a6c 1827 BB_VINFO_GROUPED_STORES (res).create (10);
1828 BB_VINFO_SLP_INSTANCES (res).create (2);
4db2b577 1829 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
37545e54 1830
1831 bb->aux = res;
1832 return res;
1833}
1834
1835
1836/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1837 stmts in the basic block. */
1838
1839static void
1840destroy_bb_vec_info (bb_vec_info bb_vinfo)
1841{
f1f41a6c 1842 vec<slp_instance> slp_instances;
9f793cdf 1843 slp_instance instance;
37545e54 1844 basic_block bb;
1845 gimple_stmt_iterator si;
9f793cdf 1846 unsigned i;
37545e54 1847
1848 if (!bb_vinfo)
1849 return;
1850
1851 bb = BB_VINFO_BB (bb_vinfo);
1852
1853 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1854 {
1855 gimple stmt = gsi_stmt (si);
1856 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1857
1858 if (stmt_info)
1859 /* Free stmt_vec_info. */
1860 free_stmt_vec_info (stmt);
1861 }
1862
23e1875f 1863 vect_destroy_datarefs (NULL, bb_vinfo);
a8af2e86 1864 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
f1f41a6c 1865 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
9f793cdf 1866 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
f1f41a6c 1867 FOR_EACH_VEC_ELT (slp_instances, i, instance)
9f793cdf 1868 vect_free_slp_instance (instance);
f1f41a6c 1869 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
4db2b577 1870 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
37545e54 1871 free (bb_vinfo);
1872 bb->aux = NULL;
1873}
1874
1875
1876/* Analyze statements contained in SLP tree node after recursively analyzing
1877 the subtree. Return TRUE if the operations are supported. */
1878
1879static bool
1880vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1881{
1882 bool dummy;
1883 int i;
1884 gimple stmt;
40bcc7c2 1885 slp_tree child;
37545e54 1886
1887 if (!node)
1888 return true;
1889
f1f41a6c 1890 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1891 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
b0f64919 1892 return false;
37545e54 1893
f1f41a6c 1894 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1895 {
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897 gcc_assert (stmt_info);
1898 gcc_assert (PURE_SLP_STMT (stmt_info));
1899
1900 if (!vect_analyze_stmt (stmt, &dummy, node))
1901 return false;
1902 }
1903
1904 return true;
1905}
1906
1907
282bf14c 1908/* Analyze statements in SLP instances of the basic block. Return TRUE if the
37545e54 1909 operations are supported. */
1910
1911static bool
1912vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1913{
f1f41a6c 1914 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
37545e54 1915 slp_instance instance;
1916 int i;
1917
f1f41a6c 1918 for (i = 0; slp_instances.iterate (i, &instance); )
37545e54 1919 {
48e1416a 1920 if (!vect_slp_analyze_node_operations (bb_vinfo,
37545e54 1921 SLP_INSTANCE_TREE (instance)))
1922 {
1923 vect_free_slp_instance (instance);
f1f41a6c 1924 slp_instances.ordered_remove (i);
37545e54 1925 }
1926 else
1927 i++;
48e1416a 1928 }
1929
f1f41a6c 1930 if (!slp_instances.length ())
37545e54 1931 return false;
1932
1933 return true;
1934}
1935
32afb748 1936
1937/* Compute the scalar cost of the SLP node NODE and its children
1938 and return it. Do not account defs that are marked in LIFE and
1939 update LIFE according to uses of NODE. */
1940
1941static unsigned
65526f7f 1942vect_bb_slp_scalar_cost (basic_block bb,
d70aebca 1943 slp_tree node, vec<bool, va_heap> *life)
32afb748 1944{
1945 unsigned scalar_cost = 0;
1946 unsigned i;
1947 gimple stmt;
1948 slp_tree child;
1949
1950 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1951 {
1952 unsigned stmt_cost;
1953 ssa_op_iter op_iter;
1954 def_operand_p def_p;
1955 stmt_vec_info stmt_info;
1956
d70aebca 1957 if ((*life)[i])
32afb748 1958 continue;
1959
1960 /* If there is a non-vectorized use of the defs then the scalar
1961 stmt is kept live in which case we do not account it or any
1962 required defs in the SLP children in the scalar cost. This
1963 way we make the vectorization more costly when compared to
1964 the scalar cost. */
1965 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1966 {
1967 imm_use_iterator use_iter;
1968 gimple use_stmt;
1969 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
5cfd0eb5 1970 if (gimple_code (use_stmt) == GIMPLE_PHI
1971 || gimple_bb (use_stmt) != bb
32afb748 1972 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1973 {
d70aebca 1974 (*life)[i] = true;
32afb748 1975 BREAK_FROM_IMM_USE_STMT (use_iter);
1976 }
1977 }
d70aebca 1978 if ((*life)[i])
32afb748 1979 continue;
1980
1981 stmt_info = vinfo_for_stmt (stmt);
1982 if (STMT_VINFO_DATA_REF (stmt_info))
1983 {
1984 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1985 stmt_cost = vect_get_stmt_cost (scalar_load);
1986 else
1987 stmt_cost = vect_get_stmt_cost (scalar_store);
1988 }
1989 else
1990 stmt_cost = vect_get_stmt_cost (scalar_stmt);
1991
1992 scalar_cost += stmt_cost;
1993 }
1994
1995 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
65526f7f 1996 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
32afb748 1997
1998 return scalar_cost;
1999}
2000
ad074595 2001/* Check if vectorization of the basic block is profitable. */
2002
2003static bool
2004vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2005{
f1f41a6c 2006 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
ad074595 2007 slp_instance instance;
4db2b577 2008 int i, j;
2009 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
f97dec81 2010 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
f97dec81 2011 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
ad074595 2012 stmt_vec_info stmt_info = NULL;
f97dec81 2013 stmt_vector_for_cost body_cost_vec;
4db2b577 2014 stmt_info_for_cost *ci;
ad074595 2015
2016 /* Calculate vector costs. */
f1f41a6c 2017 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2018 {
f97dec81 2019 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2020
f1f41a6c 2021 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
f97dec81 2022 {
2023 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2024 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2025 stmt_info, ci->misalign, vect_body);
2026 }
ad074595 2027 }
2028
2029 /* Calculate scalar cost. */
32afb748 2030 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2031 {
d70aebca 2032 stack_vec<bool, 20> life;
2033 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
65526f7f 2034 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2035 SLP_INSTANCE_TREE (instance),
d70aebca 2036 &life);
ad074595 2037 }
2038
4db2b577 2039 /* Complete the target-specific cost calculation. */
f97dec81 2040 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2041 &vec_inside_cost, &vec_epilogue_cost);
2042
2043 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
4db2b577 2044
6d8fb6cf 2045 if (dump_enabled_p ())
ad074595 2046 {
7bd765d4 2047 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2048 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2049 vec_inside_cost);
2050 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2051 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
78bb46f5 2052 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
ad074595 2053 }
2054
2055 /* Vectorization is profitable if its cost is less than the cost of scalar
2056 version. */
2057 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2058 return false;
2059
2060 return true;
2061}
2062
2063/* Check if the basic block can be vectorized. */
37545e54 2064
5bbe6156 2065static bb_vec_info
2066vect_slp_analyze_bb_1 (basic_block bb)
37545e54 2067{
2068 bb_vec_info bb_vinfo;
f1f41a6c 2069 vec<slp_instance> slp_instances;
37545e54 2070 slp_instance instance;
5bbe6156 2071 int i;
91a74fc6 2072 int min_vf = 2;
d4b21757 2073
37545e54 2074 bb_vinfo = new_bb_vec_info (bb);
2075 if (!bb_vinfo)
2076 return NULL;
2077
91a74fc6 2078 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
37545e54 2079 {
6d8fb6cf 2080 if (dump_enabled_p ())
7bd765d4 2081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2082 "not vectorized: unhandled data-ref in basic "
2083 "block.\n");
48e1416a 2084
37545e54 2085 destroy_bb_vec_info (bb_vinfo);
2086 return NULL;
2087 }
2088
07e3bcbf 2089 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
37545e54 2090 {
6d8fb6cf 2091 if (dump_enabled_p ())
7bd765d4 2092 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2093 "not vectorized: not enough data-refs in "
2094 "basic block.\n");
37545e54 2095
2096 destroy_bb_vec_info (bb_vinfo);
2097 return NULL;
2098 }
2099
68f15e9d 2100 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2101 {
2102 if (dump_enabled_p ())
2103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2104 "not vectorized: unhandled data access in "
2105 "basic block.\n");
2106
2107 destroy_bb_vec_info (bb_vinfo);
2108 return NULL;
2109 }
2110
4c0c783a 2111 vect_pattern_recog (NULL, bb_vinfo);
2112
68f15e9d 2113 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
91a74fc6 2114 {
6d8fb6cf 2115 if (dump_enabled_p ())
7bd765d4 2116 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2117 "not vectorized: unhandled data dependence "
2118 "in basic block.\n");
91a74fc6 2119
2120 destroy_bb_vec_info (bb_vinfo);
2121 return NULL;
2122 }
2123
37545e54 2124 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2125 {
6d8fb6cf 2126 if (dump_enabled_p ())
7bd765d4 2127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2128 "not vectorized: bad data alignment in basic "
2129 "block.\n");
48e1416a 2130
37545e54 2131 destroy_bb_vec_info (bb_vinfo);
2132 return NULL;
2133 }
48e1416a 2134
37545e54 2135 /* Check the SLP opportunities in the basic block, analyze and build SLP
2136 trees. */
2137 if (!vect_analyze_slp (NULL, bb_vinfo))
2138 {
6d8fb6cf 2139 if (dump_enabled_p ())
7bd765d4 2140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2141 "not vectorized: failed to find SLP opportunities "
2142 "in basic block.\n");
37545e54 2143
2144 destroy_bb_vec_info (bb_vinfo);
2145 return NULL;
2146 }
48e1416a 2147
37545e54 2148 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2149
2150 /* Mark all the statements that we want to vectorize as pure SLP and
2151 relevant. */
f1f41a6c 2152 FOR_EACH_VEC_ELT (slp_instances, i, instance)
37545e54 2153 {
2154 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2155 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
48e1416a 2156 }
37545e54 2157
4db2b577 2158 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1ad41595 2159 {
6d8fb6cf 2160 if (dump_enabled_p ())
7bd765d4 2161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2162 "not vectorized: unsupported alignment in basic "
2163 "block.\n");
1ad41595 2164 destroy_bb_vec_info (bb_vinfo);
2165 return NULL;
2166 }
2167
37545e54 2168 if (!vect_slp_analyze_operations (bb_vinfo))
2169 {
6d8fb6cf 2170 if (dump_enabled_p ())
78bb46f5 2171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2172 "not vectorized: bad operation in basic block.\n");
37545e54 2173
2174 destroy_bb_vec_info (bb_vinfo);
2175 return NULL;
2176 }
2177
ad074595 2178 /* Cost model: check if the vectorization is worthwhile. */
1dbf9bd1 2179 if (!unlimited_cost_model ()
ad074595 2180 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2181 {
6d8fb6cf 2182 if (dump_enabled_p ())
7bd765d4 2183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2184 "not vectorized: vectorization is not "
2185 "profitable.\n");
ad074595 2186
2187 destroy_bb_vec_info (bb_vinfo);
2188 return NULL;
2189 }
2190
6d8fb6cf 2191 if (dump_enabled_p ())
7bd765d4 2192 dump_printf_loc (MSG_NOTE, vect_location,
2193 "Basic block will be vectorized using SLP\n");
37545e54 2194
2195 return bb_vinfo;
2196}
2197
2198
5bbe6156 2199bb_vec_info
2200vect_slp_analyze_bb (basic_block bb)
2201{
2202 bb_vec_info bb_vinfo;
2203 int insns = 0;
2204 gimple_stmt_iterator gsi;
2205 unsigned int vector_sizes;
2206
6d8fb6cf 2207 if (dump_enabled_p ())
7bd765d4 2208 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
5bbe6156 2209
2210 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2211 {
2212 gimple stmt = gsi_stmt (gsi);
2213 if (!is_gimple_debug (stmt)
2214 && !gimple_nop_p (stmt)
2215 && gimple_code (stmt) != GIMPLE_LABEL)
2216 insns++;
2217 }
2218
2219 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2220 {
6d8fb6cf 2221 if (dump_enabled_p ())
7bd765d4 2222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2223 "not vectorized: too many instructions in "
2224 "basic block.\n");
5bbe6156 2225
2226 return NULL;
2227 }
2228
2229 /* Autodetect first vector size we try. */
2230 current_vector_size = 0;
2231 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2232
2233 while (1)
2234 {
2235 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2236 if (bb_vinfo)
2237 return bb_vinfo;
2238
2239 destroy_bb_vec_info (bb_vinfo);
2240
2241 vector_sizes &= ~current_vector_size;
2242 if (vector_sizes == 0
2243 || current_vector_size == 0)
2244 return NULL;
2245
2246 /* Try the next biggest vector size. */
2247 current_vector_size = 1 << floor_log2 (vector_sizes);
6d8fb6cf 2248 if (dump_enabled_p ())
7bd765d4 2249 dump_printf_loc (MSG_NOTE, vect_location,
2250 "***** Re-trying analysis with "
2251 "vector size %d\n", current_vector_size);
5bbe6156 2252 }
2253}
2254
2255
48e1416a 2256/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
282bf14c 2257 the number of created vector stmts depends on the unrolling factor).
2258 However, the actual number of vector stmts for every SLP node depends on
2259 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2260 should be updated. In this function we assume that the inside costs
2261 calculated in vect_model_xxx_cost are linear in ncopies. */
fb85abff 2262
2263void
2264vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2265{
4db2b577 2266 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
f1f41a6c 2267 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 2268 slp_instance instance;
f97dec81 2269 stmt_vector_for_cost body_cost_vec;
4db2b577 2270 stmt_info_for_cost *si;
f97dec81 2271 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
fb85abff 2272
6d8fb6cf 2273 if (dump_enabled_p ())
7bd765d4 2274 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2275 "=== vect_update_slp_costs_according_to_vf ===\n");
fb85abff 2276
f1f41a6c 2277 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4db2b577 2278 {
2279 /* We assume that costs are linear in ncopies. */
2280 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2281
2282 /* Record the instance's instructions in the target cost model.
2283 This was delayed until here because the count of instructions
2284 isn't known beforehand. */
f97dec81 2285 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
4db2b577 2286
f1f41a6c 2287 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
f97dec81 2288 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2289 vinfo_for_stmt (si->stmt), si->misalign,
2290 vect_body);
4db2b577 2291 }
fb85abff 2292}
2293
37545e54 2294
48e1416a 2295/* For constant and loop invariant defs of SLP_NODE this function returns
2296 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
987502fa 2297 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2298 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
eefa05c8 2299 REDUC_INDEX is the index of the reduction operand in the statements, unless
2300 it is -1. */
fb85abff 2301
2302static void
09e31a48 2303vect_get_constant_vectors (tree op, slp_tree slp_node,
f1f41a6c 2304 vec<tree> *vec_oprnds,
eefa05c8 2305 unsigned int op_num, unsigned int number_of_vectors,
2306 int reduc_index)
fb85abff 2307{
f1f41a6c 2308 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2309 gimple stmt = stmts[0];
fb85abff 2310 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
fadf62f4 2311 unsigned nunits;
fb85abff 2312 tree vec_cst;
fadf62f4 2313 tree *elts;
2314 unsigned j, number_of_places_left_in_vector;
fb85abff 2315 tree vector_type;
09e31a48 2316 tree vop;
f1f41a6c 2317 int group_size = stmts.length ();
fb85abff 2318 unsigned int vec_num, i;
fadf62f4 2319 unsigned number_of_copies = 1;
f1f41a6c 2320 vec<tree> voprnds;
2321 voprnds.create (number_of_vectors);
fb85abff 2322 bool constant_p, is_store;
eefa05c8 2323 tree neutral_op = NULL;
98dbf32d 2324 enum tree_code code = gimple_expr_code (stmt);
d706d35c 2325 gimple def_stmt;
2326 struct loop *loop;
0ff8139c 2327 gimple_seq ctor_seq = NULL;
eefa05c8 2328
38fbbe0d 2329 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2330 && reduc_index != -1)
eefa05c8 2331 {
eefa05c8 2332 op_num = reduc_index - 1;
09e31a48 2333 op = gimple_op (stmt, reduc_index);
eefa05c8 2334 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
282bf14c 2335 we need either neutral operands or the original operands. See
eefa05c8 2336 get_initial_def_for_reduction() for details. */
2337 switch (code)
2338 {
2339 case WIDEN_SUM_EXPR:
2340 case DOT_PROD_EXPR:
2341 case PLUS_EXPR:
2342 case MINUS_EXPR:
2343 case BIT_IOR_EXPR:
2344 case BIT_XOR_EXPR:
2345 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2346 neutral_op = build_real (TREE_TYPE (op), dconst0);
2347 else
2348 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2349
2350 break;
2351
2352 case MULT_EXPR:
eefa05c8 2353 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2354 neutral_op = build_real (TREE_TYPE (op), dconst1);
2355 else
2356 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2357
2358 break;
2359
b036fcd8 2360 case BIT_AND_EXPR:
2361 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2362 break;
2363
d706d35c 2364 case MAX_EXPR:
2365 case MIN_EXPR:
2366 def_stmt = SSA_NAME_DEF_STMT (op);
2367 loop = (gimple_bb (stmt))->loop_father;
2368 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2369 loop_preheader_edge (loop));
2370 break;
2371
eefa05c8 2372 default:
d706d35c 2373 neutral_op = NULL;
eefa05c8 2374 }
2375 }
fb85abff 2376
2377 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2378 {
2379 is_store = true;
2380 op = gimple_assign_rhs1 (stmt);
2381 }
2382 else
09e31a48 2383 is_store = false;
2384
2385 gcc_assert (op);
fb85abff 2386
2387 if (CONSTANT_CLASS_P (op))
987502fa 2388 constant_p = true;
fb85abff 2389 else
987502fa 2390 constant_p = false;
2391
09e31a48 2392 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
ece0be9d 2393 gcc_assert (vector_type);
fb85abff 2394 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2395
2396 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
48e1416a 2397 created vectors. It is greater than 1 if unrolling is performed.
fb85abff 2398
2399 For example, we have two scalar operands, s1 and s2 (e.g., group of
2400 strided accesses of size two), while NUNITS is four (i.e., four scalars
f2104a54 2401 of this type can be packed in a vector). The output vector will contain
2402 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
fb85abff 2403 will be 2).
2404
48e1416a 2405 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
fb85abff 2406 containing the operands.
2407
2408 For example, NUNITS is four as before, and the group size is 8
f2104a54 2409 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
fb85abff 2410 {s5, s6, s7, s8}. */
48e1416a 2411
fb85abff 2412 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2413
2414 number_of_places_left_in_vector = nunits;
fadf62f4 2415 elts = XALLOCAVEC (tree, nunits);
fb85abff 2416 for (j = 0; j < number_of_copies; j++)
2417 {
f1f41a6c 2418 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
fb85abff 2419 {
2420 if (is_store)
2421 op = gimple_assign_rhs1 (stmt);
98dbf32d 2422 else
f2104a54 2423 {
98dbf32d 2424 switch (code)
f2104a54 2425 {
98dbf32d 2426 case COND_EXPR:
2427 if (op_num == 0 || op_num == 1)
2428 {
2429 tree cond = gimple_assign_rhs1 (stmt);
2430 op = TREE_OPERAND (cond, op_num);
2431 }
2432 else
2433 {
2434 if (op_num == 2)
2435 op = gimple_assign_rhs2 (stmt);
2436 else
2437 op = gimple_assign_rhs3 (stmt);
2438 }
2439 break;
2440
2441 case CALL_EXPR:
2442 op = gimple_call_arg (stmt, op_num);
2443 break;
2444
aa17a496 2445 case LSHIFT_EXPR:
2446 case RSHIFT_EXPR:
2447 case LROTATE_EXPR:
2448 case RROTATE_EXPR:
2449 op = gimple_op (stmt, op_num + 1);
2450 /* Unlike the other binary operators, shifts/rotates have
2451 the shift count being int, instead of the same type as
2452 the lhs, so make sure the scalar is the right type if
2453 we are dealing with vectors of
2454 long long/long/short/char. */
3533e58d 2455 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
aa17a496 2456 op = fold_convert (TREE_TYPE (vector_type), op);
2457 break;
2458
98dbf32d 2459 default:
2460 op = gimple_op (stmt, op_num + 1);
aa17a496 2461 break;
f2104a54 2462 }
2463 }
48e1416a 2464
eefa05c8 2465 if (reduc_index != -1)
2466 {
d706d35c 2467 loop = (gimple_bb (stmt))->loop_father;
2468 def_stmt = SSA_NAME_DEF_STMT (op);
eefa05c8 2469
2470 gcc_assert (loop);
39a5d6b1 2471
2472 /* Get the def before the loop. In reduction chain we have only
2473 one initial value. */
2474 if ((j != (number_of_copies - 1)
2475 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2476 && i != 0))
2477 && neutral_op)
eefa05c8 2478 op = neutral_op;
39a5d6b1 2479 else
2480 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2481 loop_preheader_edge (loop));
eefa05c8 2482 }
2483
fb85abff 2484 /* Create 'vect_ = {op0,op1,...,opn}'. */
fb85abff 2485 number_of_places_left_in_vector--;
0ff8139c 2486 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
6fa78636 2487 {
3533e58d 2488 if (CONSTANT_CLASS_P (op))
0ff8139c 2489 {
2490 op = fold_unary (VIEW_CONVERT_EXPR,
2491 TREE_TYPE (vector_type), op);
2492 gcc_assert (op && CONSTANT_CLASS_P (op));
2493 }
2494 else
2495 {
2496 tree new_temp
2497 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2498 gimple init_stmt;
2499 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2500 op);
2501 init_stmt
2502 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2503 new_temp, op, NULL_TREE);
2504 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2505 op = new_temp;
2506 }
6fa78636 2507 }
fadf62f4 2508 elts[number_of_places_left_in_vector] = op;
3533e58d 2509 if (!CONSTANT_CLASS_P (op))
2510 constant_p = false;
fb85abff 2511
2512 if (number_of_places_left_in_vector == 0)
2513 {
2514 number_of_places_left_in_vector = nunits;
2515
2516 if (constant_p)
fadf62f4 2517 vec_cst = build_vector (vector_type, elts);
fb85abff 2518 else
fadf62f4 2519 {
f1f41a6c 2520 vec<constructor_elt, va_gc> *v;
fadf62f4 2521 unsigned k;
f1f41a6c 2522 vec_alloc (v, nunits);
fadf62f4 2523 for (k = 0; k < nunits; ++k)
2524 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2525 vec_cst = build_constructor (vector_type, v);
2526 }
f1f41a6c 2527 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2528 vector_type, NULL));
0ff8139c 2529 if (ctor_seq != NULL)
2530 {
f1f41a6c 2531 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
0ff8139c 2532 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2533 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2534 GSI_SAME_STMT);
2535 ctor_seq = NULL;
2536 }
fb85abff 2537 }
2538 }
2539 }
2540
48e1416a 2541 /* Since the vectors are created in the reverse order, we should invert
fb85abff 2542 them. */
f1f41a6c 2543 vec_num = voprnds.length ();
fadf62f4 2544 for (j = vec_num; j != 0; j--)
fb85abff 2545 {
f1f41a6c 2546 vop = voprnds[j - 1];
2547 vec_oprnds->quick_push (vop);
fb85abff 2548 }
2549
f1f41a6c 2550 voprnds.release ();
fb85abff 2551
2552 /* In case that VF is greater than the unrolling factor needed for the SLP
48e1416a 2553 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2554 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
fb85abff 2555 to replicate the vectors. */
f1f41a6c 2556 while (number_of_vectors > vec_oprnds->length ())
fb85abff 2557 {
eefa05c8 2558 tree neutral_vec = NULL;
2559
2560 if (neutral_op)
2561 {
2562 if (!neutral_vec)
b797154e 2563 neutral_vec = build_vector_from_val (vector_type, neutral_op);
eefa05c8 2564
f1f41a6c 2565 vec_oprnds->quick_push (neutral_vec);
eefa05c8 2566 }
2567 else
2568 {
f1f41a6c 2569 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2570 vec_oprnds->quick_push (vop);
eefa05c8 2571 }
fb85abff 2572 }
2573}
2574
2575
2576/* Get vectorized definitions from SLP_NODE that contains corresponding
2577 vectorized def-stmts. */
2578
2579static void
f1f41a6c 2580vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
fb85abff 2581{
2582 tree vec_oprnd;
2583 gimple vec_def_stmt;
2584 unsigned int i;
2585
f1f41a6c 2586 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
fb85abff 2587
f1f41a6c 2588 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
fb85abff 2589 {
2590 gcc_assert (vec_def_stmt);
2591 vec_oprnd = gimple_get_lhs (vec_def_stmt);
f1f41a6c 2592 vec_oprnds->quick_push (vec_oprnd);
fb85abff 2593 }
2594}
2595
2596
48e1416a 2597/* Get vectorized definitions for SLP_NODE.
2598 If the scalar definitions are loop invariants or constants, collect them and
fb85abff 2599 call vect_get_constant_vectors() to create vector stmts.
2600 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
b0f64919 2601 must be stored in the corresponding child of SLP_NODE, and we call
2602 vect_get_slp_vect_defs () to retrieve them. */
48e1416a 2603
fb85abff 2604void
f1f41a6c 2605vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
7f7695a7 2606 vec<vec<tree> > *vec_oprnds, int reduc_index)
fb85abff 2607{
a8bcfda2 2608 gimple first_stmt;
b0f64919 2609 int number_of_vects = 0, i;
2610 unsigned int child_index = 0;
48e1416a 2611 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
b0f64919 2612 slp_tree child = NULL;
7f7695a7 2613 vec<tree> vec_defs;
a8bcfda2 2614 tree oprnd;
b0f64919 2615 bool vectorized_defs;
fb85abff 2616
f1f41a6c 2617 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2618 FOR_EACH_VEC_ELT (ops, i, oprnd)
fb85abff 2619 {
b0f64919 2620 /* For each operand we check if it has vectorized definitions in a child
2621 node or we need to create them (for invariants and constants). We
2622 check if the LHS of the first stmt of the next child matches OPRND.
2623 If it does, we found the correct child. Otherwise, we call
2624 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2625 to check this child node for the next operand. */
2626 vectorized_defs = false;
f1f41a6c 2627 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
fb85abff 2628 {
678e3d6e 2629 child = SLP_TREE_CHILDREN (slp_node)[child_index];
b0f64919 2630
a8bcfda2 2631 /* We have to check both pattern and original def, if available. */
2632 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2633 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
fb85abff 2634
a8bcfda2 2635 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2636 || (related
2637 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2638 {
2639 /* The number of vector defs is determined by the number of
2640 vector statements in the node from which we get those
b0f64919 2641 statements. */
a8bcfda2 2642 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2643 vectorized_defs = true;
b0f64919 2644 child_index++;
a8bcfda2 2645 }
b0f64919 2646 }
fb85abff 2647
b0f64919 2648 if (!vectorized_defs)
2649 {
2650 if (i == 0)
2651 {
2652 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2653 /* Number of vector stmts was calculated according to LHS in
2654 vect_schedule_slp_instance (), fix it by replacing LHS with
2655 RHS, if necessary. See vect_get_smallest_scalar_type () for
2656 details. */
2657 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2658 &rhs_size_unit);
2659 if (rhs_size_unit != lhs_size_unit)
2660 {
2661 number_of_vects *= rhs_size_unit;
2662 number_of_vects /= lhs_size_unit;
2663 }
2664 }
2665 }
eefa05c8 2666
b0f64919 2667 /* Allocate memory for vectorized defs. */
7f7695a7 2668 vec_defs = vNULL;
2669 vec_defs.create (number_of_vects);
fb85abff 2670
b0f64919 2671 /* For reduction defs we call vect_get_constant_vectors (), since we are
2672 looking for initial loop invariant values. */
2673 if (vectorized_defs && reduc_index == -1)
2674 /* The defs are already vectorized. */
7f7695a7 2675 vect_get_slp_vect_defs (child, &vec_defs);
b0f64919 2676 else
2677 /* Build vectors from scalar defs. */
7f7695a7 2678 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
b0f64919 2679 number_of_vects, reduc_index);
fb85abff 2680
7f7695a7 2681 vec_oprnds->quick_push (vec_defs);
fb85abff 2682
b0f64919 2683 /* For reductions, we only need initial values. */
2684 if (reduc_index != -1)
2685 return;
2686 }
fb85abff 2687}
2688
37545e54 2689
48e1416a 2690/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
fb85abff 2691 building a vector of type MASK_TYPE from it) and two input vectors placed in
2692 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2693 shifting by STRIDE elements of DR_CHAIN for every copy.
2694 (STRIDE is the number of vectorized stmts for NODE divided by the number of
48e1416a 2695 copies).
fb85abff 2696 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2697 the created stmts must be inserted. */
2698
2699static inline void
48e1416a 2700vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
afaef242 2701 tree mask, int first_vec_indx, int second_vec_indx,
48e1416a 2702 gimple_stmt_iterator *gsi, slp_tree node,
f1f41a6c 2703 tree vectype, vec<tree> dr_chain,
fb85abff 2704 int ncopies, int vect_stmts_counter)
2705{
afaef242 2706 tree perm_dest;
fb85abff 2707 gimple perm_stmt = NULL;
2708 stmt_vec_info next_stmt_info;
f018d957 2709 int i, stride;
fb85abff 2710 tree first_vec, second_vec, data_ref;
fb85abff 2711
fb85abff 2712 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
fb85abff 2713
48e1416a 2714 /* Initialize the vect stmts of NODE to properly insert the generated
fb85abff 2715 stmts later. */
f1f41a6c 2716 for (i = SLP_TREE_VEC_STMTS (node).length ();
fb85abff 2717 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
f1f41a6c 2718 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
fb85abff 2719
2720 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2721 for (i = 0; i < ncopies; i++)
2722 {
f1f41a6c 2723 first_vec = dr_chain[first_vec_indx];
2724 second_vec = dr_chain[second_vec_indx];
fb85abff 2725
fb85abff 2726 /* Generate the permute statement. */
446e85eb 2727 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2728 first_vec, second_vec, mask);
fb85abff 2729 data_ref = make_ssa_name (perm_dest, perm_stmt);
964ec9cd 2730 gimple_set_lhs (perm_stmt, data_ref);
fb85abff 2731 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 2732
48e1416a 2733 /* Store the vector statement in NODE. */
f1f41a6c 2734 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
fb85abff 2735
2736 first_vec_indx += stride;
2737 second_vec_indx += stride;
2738 }
2739
2740 /* Mark the scalar stmt as vectorized. */
2741 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2742 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2743}
2744
2745
48e1416a 2746/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
fb85abff 2747 return in CURRENT_MASK_ELEMENT its equivalent in target specific
282bf14c 2748 representation. Check that the mask is valid and return FALSE if not.
fb85abff 2749 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2750 the next vector, i.e., the current first vector is not needed. */
48e1416a 2751
fb85abff 2752static bool
48e1416a 2753vect_get_mask_element (gimple stmt, int first_mask_element, int m,
fb85abff 2754 int mask_nunits, bool only_one_vec, int index,
e21c468f 2755 unsigned char *mask, int *current_mask_element,
90674129 2756 bool *need_next_vector, int *number_of_mask_fixes,
2757 bool *mask_fixed, bool *needs_first_vector)
fb85abff 2758{
2759 int i;
fb85abff 2760
2761 /* Convert to target specific representation. */
2762 *current_mask_element = first_mask_element + m;
2763 /* Adjust the value in case it's a mask for second and third vectors. */
90674129 2764 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
fb85abff 2765
2766 if (*current_mask_element < mask_nunits)
90674129 2767 *needs_first_vector = true;
fb85abff 2768
2769 /* We have only one input vector to permute but the mask accesses values in
2770 the next vector as well. */
2771 if (only_one_vec && *current_mask_element >= mask_nunits)
2772 {
6d8fb6cf 2773 if (dump_enabled_p ())
fb85abff 2774 {
78bb46f5 2775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2776 "permutation requires at least two vectors ");
2777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2778 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2779 }
2780
2781 return false;
2782 }
2783
2784 /* The mask requires the next vector. */
2785 if (*current_mask_element >= mask_nunits * 2)
2786 {
90674129 2787 if (*needs_first_vector || *mask_fixed)
fb85abff 2788 {
2789 /* We either need the first vector too or have already moved to the
48e1416a 2790 next vector. In both cases, this permutation needs three
fb85abff 2791 vectors. */
6d8fb6cf 2792 if (dump_enabled_p ())
fb85abff 2793 {
7bd765d4 2794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2795 "permutation requires at "
2796 "least three vectors ");
2797 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2798 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2799 }
2800
2801 return false;
2802 }
2803
2804 /* We move to the next vector, dropping the first one and working with
2805 the second and the third - we need to adjust the values of the mask
2806 accordingly. */
90674129 2807 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
fb85abff 2808
2809 for (i = 0; i < index; i++)
90674129 2810 mask[i] -= mask_nunits * *number_of_mask_fixes;
fb85abff 2811
90674129 2812 (*number_of_mask_fixes)++;
2813 *mask_fixed = true;
fb85abff 2814 }
2815
90674129 2816 *need_next_vector = *mask_fixed;
fb85abff 2817
2818 /* This was the last element of this mask. Start a new one. */
2819 if (index == mask_nunits - 1)
2820 {
90674129 2821 *number_of_mask_fixes = 1;
2822 *mask_fixed = false;
2823 *needs_first_vector = false;
fb85abff 2824 }
2825
2826 return true;
2827}
2828
2829
2830/* Generate vector permute statements from a list of loads in DR_CHAIN.
2831 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
678e3d6e 2832 permute statements for the SLP node NODE of the SLP instance
2833 SLP_NODE_INSTANCE. */
2834
fb85abff 2835bool
678e3d6e 2836vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
fb85abff 2837 gimple_stmt_iterator *gsi, int vf,
2838 slp_instance slp_node_instance, bool analyze_only)
2839{
678e3d6e 2840 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 2841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2842 tree mask_element_type = NULL_TREE, mask_type;
964ec9cd 2843 int i, j, k, nunits, vec_index = 0, scalar_index;
964ec9cd 2844 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 2845 gimple next_scalar_stmt;
2846 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2847 int first_mask_element;
e21c468f 2848 int index, unroll_factor, current_mask_element, ncopies;
2849 unsigned char *mask;
fb85abff 2850 bool only_one_vec = false, need_next_vector = false;
2851 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
90674129 2852 int number_of_mask_fixes = 1;
2853 bool mask_fixed = false;
2854 bool needs_first_vector = false;
e21c468f 2855 enum machine_mode mode;
fb85abff 2856
e21c468f 2857 mode = TYPE_MODE (vectype);
2858
2859 if (!can_vec_perm_p (mode, false, NULL))
fb85abff 2860 {
6d8fb6cf 2861 if (dump_enabled_p ())
fb85abff 2862 {
7bd765d4 2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2864 "no vect permute for ");
2865 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 2866 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2867 }
964ec9cd 2868 return false;
fb85abff 2869 }
2870
964ec9cd 2871 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2872 same size as the vector element being permuted. */
3cea8318 2873 mask_element_type = lang_hooks.types.type_for_mode
2874 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
fb85abff 2875 mask_type = get_vectype_for_scalar_type (mask_element_type);
fb85abff 2876 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e21c468f 2877 mask = XALLOCAVEC (unsigned char, nunits);
fb85abff 2878 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2879
2880 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2881 unrolling factor. */
48e1416a 2882 orig_vec_stmts_num = group_size *
fb85abff 2883 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2884 if (orig_vec_stmts_num == 1)
2885 only_one_vec = true;
2886
48e1416a 2887 /* Number of copies is determined by the final vectorization factor
fb85abff 2888 relatively to SLP_NODE_INSTANCE unrolling factor. */
48e1416a 2889 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
fb85abff 2890
678e3d6e 2891 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2892 return false;
2893
48e1416a 2894 /* Generate permutation masks for every NODE. Number of masks for each NODE
2895 is equal to GROUP_SIZE.
2896 E.g., we have a group of three nodes with three loads from the same
2897 location in each node, and the vector size is 4. I.e., we have a
2898 a0b0c0a1b1c1... sequence and we need to create the following vectors:
fb85abff 2899 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2900 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2901 ...
2902
964ec9cd 2903 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
48e1416a 2904 The last mask is illegal since we assume two operands for permute
282bf14c 2905 operation, and the mask element values can't be outside that range.
2906 Hence, the last mask must be converted into {2,5,5,5}.
48e1416a 2907 For the first two permutations we need the first and the second input
fb85abff 2908 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
48e1416a 2909 we need the second and the third vectors: {b1,c1,a2,b2} and
fb85abff 2910 {c2,a3,b3,c3}. */
2911
fb85abff 2912 {
2913 scalar_index = 0;
2914 index = 0;
2915 vect_stmts_counter = 0;
2916 vec_index = 0;
2917 first_vec_index = vec_index++;
2918 if (only_one_vec)
2919 second_vec_index = first_vec_index;
2920 else
2921 second_vec_index = vec_index++;
2922
2923 for (j = 0; j < unroll_factor; j++)
2924 {
2925 for (k = 0; k < group_size; k++)
2926 {
678e3d6e 2927 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
964ec9cd 2928 first_mask_element = i + j * group_size;
2929 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2930 nunits, only_one_vec, index,
2931 mask, &current_mask_element,
2932 &need_next_vector,
2933 &number_of_mask_fixes, &mask_fixed,
2934 &needs_first_vector))
2935 return false;
2936 mask[index++] = current_mask_element;
fb85abff 2937
964ec9cd 2938 if (index == nunits)
fb85abff 2939 {
678e3d6e 2940 index = 0;
e21c468f 2941 if (!can_vec_perm_p (mode, false, mask))
2942 {
6d8fb6cf 2943 if (dump_enabled_p ())
e21c468f 2944 {
7bd765d4 2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2946 vect_location,
2947 "unsupported vect permute { ");
e21c468f 2948 for (i = 0; i < nunits; ++i)
7bd765d4 2949 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2950 mask[i]);
2951 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
e21c468f 2952 }
2953 return false;
2954 }
2955
fb85abff 2956 if (!analyze_only)
2957 {
678e3d6e 2958 int l;
2959 tree mask_vec, *mask_elts;
2960 mask_elts = XALLOCAVEC (tree, nunits);
2961 for (l = 0; l < nunits; ++l)
2962 mask_elts[l] = build_int_cst (mask_element_type,
2963 mask[l]);
2964 mask_vec = build_vector (mask_type, mask_elts);
2965
2966 if (need_next_vector)
fb85abff 2967 {
2968 first_vec_index = second_vec_index;
2969 second_vec_index = vec_index;
2970 }
2971
f1f41a6c 2972 next_scalar_stmt
2973 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
fb85abff 2974
2975 vect_create_mask_and_perm (stmt, next_scalar_stmt,
afaef242 2976 mask_vec, first_vec_index, second_vec_index,
964ec9cd 2977 gsi, node, vectype, dr_chain,
afaef242 2978 ncopies, vect_stmts_counter++);
fb85abff 2979 }
48e1416a 2980 }
2981 }
2982 }
2983 }
fb85abff 2984
fb85abff 2985 return true;
2986}
2987
2988
2989
2990/* Vectorize SLP instance tree in postorder. */
2991
2992static bool
2993vect_schedule_slp_instance (slp_tree node, slp_instance instance,
37545e54 2994 unsigned int vectorization_factor)
fb85abff 2995{
2996 gimple stmt;
ee612634 2997 bool grouped_store, is_store;
fb85abff 2998 gimple_stmt_iterator si;
2999 stmt_vec_info stmt_info;
3000 unsigned int vec_stmts_size, nunits, group_size;
3001 tree vectype;
3002 int i;
40bcc7c2 3003 slp_tree child;
fb85abff 3004
3005 if (!node)
3006 return false;
3007
f1f41a6c 3008 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3009 vect_schedule_slp_instance (child, instance, vectorization_factor);
48e1416a 3010
f1f41a6c 3011 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3012 stmt_info = vinfo_for_stmt (stmt);
3013
3014 /* VECTYPE is the type of the destination. */
b334cbba 3015 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3016 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3017 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3018
3019 /* For each SLP instance calculate number of vector stmts to be created
282bf14c 3020 for the scalar stmts in each node of the SLP tree. Number of vector
fb85abff 3021 elements in one vector iteration is the number of scalar elements in
3022 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3023 size. */
3024 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3025
f1f41a6c 3026 if (!SLP_TREE_VEC_STMTS (node).exists ())
fb85abff 3027 {
f1f41a6c 3028 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
fb85abff 3029 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3030 }
3031
6d8fb6cf 3032 if (dump_enabled_p ())
fb85abff 3033 {
7bd765d4 3034 dump_printf_loc (MSG_NOTE,vect_location,
3035 "------>vectorizing SLP node starting from: ");
3036 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3037 dump_printf (MSG_NOTE, "\n");
48e1416a 3038 }
fb85abff 3039
3040 /* Loads should be inserted before the first load. */
3041 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
ee612634 3042 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
24c271ef 3043 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
678e3d6e 3044 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
fb85abff 3045 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
cfdcf183 3046 else if (is_pattern_stmt_p (stmt_info))
24c271ef 3047 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
fb85abff 3048 else
3049 si = gsi_for_stmt (stmt);
48e1416a 3050
d4b21757 3051 /* Stores should be inserted just before the last store. */
ee612634 3052 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
d4b21757 3053 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3054 {
3055 gimple last_store = vect_find_last_store_in_slp_instance (instance);
0bc79e04 3056 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3057 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
d4b21757 3058 si = gsi_for_stmt (last_store);
3059 }
3060
39a5d6b1 3061 /* Mark the first element of the reduction chain as reduction to properly
3062 transform the node. In the analysis phase only the last element of the
3063 chain is marked as reduction. */
ee612634 3064 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
39a5d6b1 3065 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3066 {
3067 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3068 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3069 }
3070
ee612634 3071 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
eefa05c8 3072 return is_store;
fb85abff 3073}
3074
529fcc2a 3075/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3076 For loop vectorization this is done in vectorizable_call, but for SLP
3077 it needs to be deferred until end of vect_schedule_slp, because multiple
3078 SLP instances may refer to the same scalar stmt. */
3079
3080static void
3081vect_remove_slp_scalar_calls (slp_tree node)
3082{
3083 gimple stmt, new_stmt;
3084 gimple_stmt_iterator gsi;
3085 int i;
40bcc7c2 3086 slp_tree child;
529fcc2a 3087 tree lhs;
3088 stmt_vec_info stmt_info;
3089
3090 if (!node)
3091 return;
3092
f1f41a6c 3093 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3094 vect_remove_slp_scalar_calls (child);
529fcc2a 3095
f1f41a6c 3096 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
529fcc2a 3097 {
3098 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3099 continue;
3100 stmt_info = vinfo_for_stmt (stmt);
3101 if (stmt_info == NULL
3102 || is_pattern_stmt_p (stmt_info)
3103 || !PURE_SLP_STMT (stmt_info))
3104 continue;
3105 lhs = gimple_call_lhs (stmt);
3106 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3107 set_vinfo_for_stmt (new_stmt, stmt_info);
3108 set_vinfo_for_stmt (stmt, NULL);
3109 STMT_VINFO_STMT (stmt_info) = new_stmt;
3110 gsi = gsi_for_stmt (stmt);
3111 gsi_replace (&gsi, new_stmt, false);
3112 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3113 }
3114}
fb85abff 3115
282bf14c 3116/* Generate vector code for all SLP instances in the loop/basic block. */
3117
fb85abff 3118bool
37545e54 3119vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 3120{
f1f41a6c 3121 vec<slp_instance> slp_instances;
fb85abff 3122 slp_instance instance;
678e3d6e 3123 unsigned int i, vf;
fb85abff 3124 bool is_store = false;
3125
37545e54 3126 if (loop_vinfo)
3127 {
3128 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3129 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
48e1416a 3130 }
37545e54 3131 else
3132 {
3133 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3134 vf = 1;
48e1416a 3135 }
37545e54 3136
f1f41a6c 3137 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 3138 {
3139 /* Schedule the tree of INSTANCE. */
3140 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
37545e54 3141 instance, vf);
6d8fb6cf 3142 if (dump_enabled_p ())
7bd765d4 3143 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3144 "vectorizing stmts using SLP.\n");
fb85abff 3145 }
3146
f1f41a6c 3147 FOR_EACH_VEC_ELT (slp_instances, i, instance)
eefa05c8 3148 {
3149 slp_tree root = SLP_INSTANCE_TREE (instance);
3150 gimple store;
3151 unsigned int j;
3152 gimple_stmt_iterator gsi;
3153
814c4f66 3154 /* Remove scalar call stmts. Do not do this for basic-block
3155 vectorization as not all uses may be vectorized.
3156 ??? Why should this be necessary? DCE should be able to
3157 remove the stmts itself.
3158 ??? For BB vectorization we can as well remove scalar
3159 stmts starting from the SLP tree root if they have no
3160 uses. */
3161 if (loop_vinfo)
3162 vect_remove_slp_scalar_calls (root);
529fcc2a 3163
f1f41a6c 3164 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
eefa05c8 3165 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3166 {
3167 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3168 break;
3169
0bc79e04 3170 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3171 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
eefa05c8 3172 /* Free the attached stmt_vec_info and remove the stmt. */
3173 gsi = gsi_for_stmt (store);
bc8a8451 3174 unlink_stmt_vdef (store);
eefa05c8 3175 gsi_remove (&gsi, true);
bc8a8451 3176 release_defs (store);
eefa05c8 3177 free_stmt_vec_info (store);
3178 }
3179 }
3180
fb85abff 3181 return is_store;
3182}
37545e54 3183
3184
3185/* Vectorize the basic block. */
3186
3187void
3188vect_slp_transform_bb (basic_block bb)
3189{
3190 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3191 gimple_stmt_iterator si;
3192
3193 gcc_assert (bb_vinfo);
3194
6d8fb6cf 3195 if (dump_enabled_p ())
7bd765d4 3196 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
37545e54 3197
3198 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3199 {
3200 gimple stmt = gsi_stmt (si);
3201 stmt_vec_info stmt_info;
3202
6d8fb6cf 3203 if (dump_enabled_p ())
37545e54 3204 {
7bd765d4 3205 dump_printf_loc (MSG_NOTE, vect_location,
3206 "------>SLPing statement: ");
3207 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3208 dump_printf (MSG_NOTE, "\n");
37545e54 3209 }
3210
3211 stmt_info = vinfo_for_stmt (stmt);
3212 gcc_assert (stmt_info);
3213
3214 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3215 if (STMT_SLP_TYPE (stmt_info))
3216 {
3217 vect_schedule_slp (NULL, bb_vinfo);
3218 break;
3219 }
3220 }
3221
6d8fb6cf 3222 if (dump_enabled_p ())
a21425b5 3223 dump_printf_loc (MSG_NOTE, vect_location,
b055bc88 3224 "BASIC BLOCK VECTORIZED\n");
37545e54 3225
765a637f 3226 destroy_bb_vec_info (bb_vinfo);
3227}