]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
fb85abff 1/* SLP - Basic Block Vectorization
d353bf18 2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
b20a8bb4 27#include "alias.h"
28#include "symtab.h"
fb85abff 29#include "tree.h"
b20a8bb4 30#include "fold-const.h"
9ed99284 31#include "stor-layout.h"
fb85abff 32#include "target.h"
94ea8568 33#include "predict.h"
94ea8568 34#include "hard-reg-set.h"
94ea8568 35#include "function.h"
fb85abff 36#include "basic-block.h"
ce084dfc 37#include "gimple-pretty-print.h"
bc61cadb 38#include "tree-ssa-alias.h"
39#include "internal-fn.h"
40#include "gimple-expr.h"
073c1fd5 41#include "gimple.h"
dcf1a1ec 42#include "gimple-iterator.h"
073c1fd5 43#include "gimple-ssa.h"
44#include "tree-phinodes.h"
45#include "ssa-iterators.h"
9ed99284 46#include "stringpool.h"
073c1fd5 47#include "tree-ssanames.h"
b9ed1410 48#include "tree-pass.h"
fb85abff 49#include "cfgloop.h"
d53441c8 50#include "rtl.h"
51#include "flags.h"
d53441c8 52#include "insn-config.h"
53#include "expmed.h"
54#include "dojump.h"
55#include "explow.h"
56#include "calls.h"
57#include "emit-rtl.h"
58#include "varasm.h"
59#include "stmt.h"
fb85abff 60#include "expr.h"
b9ed1410 61#include "recog.h" /* FIXME: for insn_data */
34517c64 62#include "insn-codes.h"
fb85abff 63#include "optabs.h"
64#include "tree-vectorizer.h"
964ec9cd 65#include "langhooks.h"
041208c6 66#include "gimple-walk.h"
fb85abff 67
37545e54 68/* Extract the location of the basic block in the source code.
69 Return the basic block location if succeed and NULL if not. */
70
36f39b2e 71source_location
37545e54 72find_bb_location (basic_block bb)
73{
74 gimple stmt = NULL;
75 gimple_stmt_iterator si;
76
77 if (!bb)
36f39b2e 78 return UNKNOWN_LOCATION;
37545e54 79
80 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
81 {
82 stmt = gsi_stmt (si);
36f39b2e 83 if (gimple_location (stmt) != UNKNOWN_LOCATION)
37545e54 84 return gimple_location (stmt);
85 }
86
36f39b2e 87 return UNKNOWN_LOCATION;
37545e54 88}
89
90
fb85abff 91/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
92
93static void
94vect_free_slp_tree (slp_tree node)
95{
b0f64919 96 int i;
40bcc7c2 97 slp_tree child;
b0f64919 98
fb85abff 99 if (!node)
100 return;
101
f1f41a6c 102 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 103 vect_free_slp_tree (child);
48e1416a 104
f1f41a6c 105 SLP_TREE_CHILDREN (node).release ();
106 SLP_TREE_SCALAR_STMTS (node).release ();
107 SLP_TREE_VEC_STMTS (node).release ();
678e3d6e 108 SLP_TREE_LOAD_PERMUTATION (node).release ();
fb85abff 109
110 free (node);
111}
112
113
114/* Free the memory allocated for the SLP instance. */
115
116void
117vect_free_slp_instance (slp_instance instance)
118{
119 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
f1f41a6c 120 SLP_INSTANCE_LOADS (instance).release ();
9f793cdf 121 free (instance);
fb85abff 122}
123
124
b0f64919 125/* Create an SLP node for SCALAR_STMTS. */
126
127static slp_tree
f1f41a6c 128vect_create_new_slp_node (vec<gimple> scalar_stmts)
b0f64919 129{
3c7a3fca 130 slp_tree node;
f1f41a6c 131 gimple stmt = scalar_stmts[0];
b0f64919 132 unsigned int nops;
133
134 if (is_gimple_call (stmt))
135 nops = gimple_call_num_args (stmt);
136 else if (is_gimple_assign (stmt))
f2104a54 137 {
138 nops = gimple_num_ops (stmt) - 1;
139 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
140 nops++;
141 }
b0f64919 142 else
143 return NULL;
144
3c7a3fca 145 node = XNEW (struct _slp_tree);
b0f64919 146 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
f1f41a6c 147 SLP_TREE_VEC_STMTS (node).create (0);
148 SLP_TREE_CHILDREN (node).create (nops);
678e3d6e 149 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
66e30248 150 SLP_TREE_TWO_OPERATORS (node) = false;
b0f64919 151
152 return node;
153}
154
155
156/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
157 operand. */
f1f41a6c 158static vec<slp_oprnd_info>
b0f64919 159vect_create_oprnd_info (int nops, int group_size)
160{
161 int i;
162 slp_oprnd_info oprnd_info;
f1f41a6c 163 vec<slp_oprnd_info> oprnds_info;
b0f64919 164
f1f41a6c 165 oprnds_info.create (nops);
b0f64919 166 for (i = 0; i < nops; i++)
167 {
168 oprnd_info = XNEW (struct _slp_oprnd_info);
f1f41a6c 169 oprnd_info->def_stmts.create (group_size);
b0f64919 170 oprnd_info->first_dt = vect_uninitialized_def;
3533e58d 171 oprnd_info->first_op_type = NULL_TREE;
b0f64919 172 oprnd_info->first_pattern = false;
423475a3 173 oprnd_info->second_pattern = false;
f1f41a6c 174 oprnds_info.quick_push (oprnd_info);
b0f64919 175 }
176
177 return oprnds_info;
178}
179
180
3c7a3fca 181/* Free operands info. */
182
b0f64919 183static void
f1f41a6c 184vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
b0f64919 185{
186 int i;
187 slp_oprnd_info oprnd_info;
188
f1f41a6c 189 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
3c7a3fca 190 {
f1f41a6c 191 oprnd_info->def_stmts.release ();
3c7a3fca 192 XDELETE (oprnd_info);
193 }
b0f64919 194
f1f41a6c 195 oprnds_info.release ();
b0f64919 196}
197
198
40bcc7c2 199/* Find the place of the data-ref in STMT in the interleaving chain that starts
200 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
201
202static int
203vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
204{
205 gimple next_stmt = first_stmt;
206 int result = 0;
207
208 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
209 return -1;
210
211 do
212 {
213 if (next_stmt == stmt)
214 return result;
40bcc7c2 215 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
257941fb 216 if (next_stmt)
217 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
40bcc7c2 218 }
219 while (next_stmt);
220
221 return -1;
222}
223
224
b0f64919 225/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
226 they are of a valid type and that they match the defs of the first stmt of
291e6474 227 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
228 return -1, if the error could be corrected by swapping operands of the
229 operation return 1, if everything is ok return 0. */
fb85abff 230
291e6474 231static int
37545e54 232vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
423475a3 233 gimple stmt, unsigned stmt_num,
ee6e9ba5 234 vec<slp_oprnd_info> *oprnds_info)
fb85abff 235{
236 tree oprnd;
237 unsigned int i, number_of_oprnds;
0eba4748 238 tree def;
fb85abff 239 gimple def_stmt;
b0f64919 240 enum vect_def_type dt = vect_uninitialized_def;
37545e54 241 struct loop *loop = NULL;
b0f64919 242 bool pattern = false;
0eba4748 243 slp_oprnd_info oprnd_info;
291e6474 244 int first_op_idx = 1;
245 bool commutative = false;
246 bool first_op_cond = false;
423475a3 247 bool first = stmt_num == 0;
248 bool second = stmt_num == 1;
48e1416a 249
37545e54 250 if (loop_vinfo)
251 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 252
b0f64919 253 if (is_gimple_call (stmt))
b57a47d0 254 {
255 number_of_oprnds = gimple_call_num_args (stmt);
291e6474 256 first_op_idx = 3;
b57a47d0 257 }
f2104a54 258 else if (is_gimple_assign (stmt))
259 {
291e6474 260 enum tree_code code = gimple_assign_rhs_code (stmt);
f2104a54 261 number_of_oprnds = gimple_num_ops (stmt) - 1;
262 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
291e6474 263 {
264 first_op_cond = true;
265 commutative = true;
266 number_of_oprnds++;
267 }
268 else
269 commutative = commutative_tree_code (code);
f2104a54 270 }
b0f64919 271 else
291e6474 272 return -1;
fb85abff 273
291e6474 274 bool swapped = false;
fb85abff 275 for (i = 0; i < number_of_oprnds; i++)
276 {
291e6474 277again:
278 if (first_op_cond)
f2104a54 279 {
291e6474 280 if (i == 0 || i == 1)
281 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
282 swapped ? !i : i);
283 else
284 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f2104a54 285 }
286 else
291e6474 287 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f2104a54 288
f1f41a6c 289 oprnd_info = (*oprnds_info)[i];
fb85abff 290
bed8b93b 291 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
7a6adcfd 292 &def, &dt))
fb85abff 293 {
6d8fb6cf 294 if (dump_enabled_p ())
fb85abff 295 {
7bd765d4 296 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7a6adcfd 297 "Build SLP failed: can't analyze def for ");
7bd765d4 298 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 299 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 300 }
301
291e6474 302 return -1;
fb85abff 303 }
304
37545e54 305 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
282bf14c 306 from the pattern. Check that all the stmts of the node are in the
fb85abff 307 pattern. */
4c0c783a 308 if (def_stmt && gimple_bb (def_stmt)
309 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
310 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
311 && gimple_code (def_stmt) != GIMPLE_PHI))
fb85abff 312 && vinfo_for_stmt (def_stmt)
8bf58742 313 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
4c0c783a 314 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
315 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
fb85abff 316 {
b0f64919 317 pattern = true;
423475a3 318 if (!first && !oprnd_info->first_pattern
319 /* Allow different pattern state for the defs of the
320 first stmt in reduction chains. */
321 && (oprnd_info->first_dt != vect_reduction_def
322 || (!second && !oprnd_info->second_pattern)))
b0f64919 323 {
291e6474 324 if (i == 0
325 && !swapped
326 && commutative)
327 {
328 swapped = true;
329 goto again;
330 }
331
6d8fb6cf 332 if (dump_enabled_p ())
b0f64919 333 {
7bd765d4 334 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
335 "Build SLP failed: some of the stmts"
336 " are in a pattern, and others are not ");
337 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
78bb46f5 338 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b0f64919 339 }
fb85abff 340
291e6474 341 return 1;
fb85abff 342 }
343
344 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
b0f64919 345 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
fb85abff 346
f2104a54 347 if (dt == vect_unknown_def_type)
fb85abff 348 {
6d8fb6cf 349 if (dump_enabled_p ())
7bd765d4 350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 351 "Unsupported pattern.\n");
291e6474 352 return -1;
fb85abff 353 }
354
355 switch (gimple_code (def_stmt))
356 {
357 case GIMPLE_PHI:
b0f64919 358 def = gimple_phi_result (def_stmt);
fb85abff 359 break;
360
361 case GIMPLE_ASSIGN:
b0f64919 362 def = gimple_assign_lhs (def_stmt);
fb85abff 363 break;
364
365 default:
6d8fb6cf 366 if (dump_enabled_p ())
7bd765d4 367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 368 "unsupported defining stmt:\n");
291e6474 369 return -1;
fb85abff 370 }
371 }
372
423475a3 373 if (second)
374 oprnd_info->second_pattern = pattern;
375
b0f64919 376 if (first)
fb85abff 377 {
b0f64919 378 oprnd_info->first_dt = dt;
379 oprnd_info->first_pattern = pattern;
3533e58d 380 oprnd_info->first_op_type = TREE_TYPE (oprnd);
fb85abff 381 }
fb85abff 382 else
383 {
b0f64919 384 /* Not first stmt of the group, check that the def-stmt/s match
385 the def-stmt/s of the first stmt. Allow different definition
386 types for reduction chains: the first stmt must be a
387 vect_reduction_def (a phi node), and the rest
388 vect_internal_def. */
389 if (((oprnd_info->first_dt != dt
390 && !(oprnd_info->first_dt == vect_reduction_def
3533e58d 391 && dt == vect_internal_def)
392 && !((oprnd_info->first_dt == vect_external_def
393 || oprnd_info->first_dt == vect_constant_def)
394 && (dt == vect_external_def
395 || dt == vect_constant_def)))
396 || !types_compatible_p (oprnd_info->first_op_type,
397 TREE_TYPE (oprnd))))
fb85abff 398 {
291e6474 399 /* Try swapping operands if we got a mismatch. */
400 if (i == 0
401 && !swapped
402 && commutative)
403 {
404 swapped = true;
405 goto again;
406 }
407
0eba4748 408 if (dump_enabled_p ())
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 410 "Build SLP failed: different types\n");
b0f64919 411
291e6474 412 return 1;
fb85abff 413 }
414 }
415
416 /* Check the types of the definitions. */
b0f64919 417 switch (dt)
fb85abff 418 {
419 case vect_constant_def:
f083cd24 420 case vect_external_def:
b0f64919 421 case vect_reduction_def:
fb85abff 422 break;
48e1416a 423
f083cd24 424 case vect_internal_def:
0eba4748 425 oprnd_info->def_stmts.quick_push (def_stmt);
fb85abff 426 break;
427
428 default:
429 /* FORNOW: Not supported. */
6d8fb6cf 430 if (dump_enabled_p ())
fb85abff 431 {
7bd765d4 432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
433 "Build SLP failed: illegal type of def ");
434 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
78bb46f5 435 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 436 }
437
291e6474 438 return -1;
fb85abff 439 }
440 }
441
291e6474 442 /* Swap operands. */
443 if (swapped)
444 {
445 if (first_op_cond)
446 {
447 tree cond = gimple_assign_rhs1 (stmt);
448 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
449 &TREE_OPERAND (cond, 1));
450 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
451 }
452 else
453 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
454 gimple_assign_rhs2_ptr (stmt));
455 }
456
457 return 0;
fb85abff 458}
459
460
16513aa4 461/* Verify if the scalar stmts STMTS are isomorphic, require data
462 permutation or are of unsupported types of operation. Return
463 true if they are, otherwise return false and indicate in *MATCHES
464 which stmts are not isomorphic to the first one. If MATCHES[0]
465 is false then this indicates the comparison could not be
466 carried out or the stmts will never be vectorized by SLP. */
fb85abff 467
468static bool
16513aa4 469vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
470 vec<gimple> stmts, unsigned int group_size,
471 unsigned nops, unsigned int *max_nunits,
66e30248 472 unsigned int vectorization_factor, bool *matches,
473 bool *two_operators)
fb85abff 474{
fb85abff 475 unsigned int i;
66e30248 476 gimple first_stmt = stmts[0], stmt = stmts[0];
477 enum tree_code first_stmt_code = ERROR_MARK;
478 enum tree_code alt_stmt_code = ERROR_MARK;
479 enum tree_code rhs_code = ERROR_MARK;
f2104a54 480 enum tree_code first_cond_code = ERROR_MARK;
fb85abff 481 tree lhs;
16513aa4 482 bool need_same_oprnds = false;
fb85abff 483 tree vectype, scalar_type, first_op1 = NULL_TREE;
fb85abff 484 optab optab;
485 int icode;
3754d046 486 machine_mode optab_op2_mode;
487 machine_mode vec_mode;
fb85abff 488 struct data_reference *first_dr;
fb85abff 489 HOST_WIDE_INT dummy;
4db2b577 490 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
f2104a54 491 tree cond;
b0f64919 492
fb85abff 493 /* For every stmt in NODE find its def stmt/s. */
f1f41a6c 494 FOR_EACH_VEC_ELT (stmts, i, stmt)
fb85abff 495 {
16513aa4 496 matches[i] = false;
497
6d8fb6cf 498 if (dump_enabled_p ())
fb85abff 499 {
7bd765d4 500 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
501 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 502 dump_printf (MSG_NOTE, "\n");
fb85abff 503 }
504
6ea6a380 505 /* Fail to vectorize statements marked as unvectorizable. */
506 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
507 {
6d8fb6cf 508 if (dump_enabled_p ())
6ea6a380 509 {
7bd765d4 510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
511 "Build SLP failed: unvectorizable statement ");
512 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 513 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
6ea6a380 514 }
16513aa4 515 /* Fatal mismatch. */
516 matches[0] = false;
6ea6a380 517 return false;
518 }
519
fb85abff 520 lhs = gimple_get_lhs (stmt);
521 if (lhs == NULL_TREE)
522 {
6d8fb6cf 523 if (dump_enabled_p ())
fb85abff 524 {
7bd765d4 525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "Build SLP failed: not GIMPLE_ASSIGN nor "
527 "GIMPLE_CALL ");
528 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 529 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 530 }
16513aa4 531 /* Fatal mismatch. */
532 matches[0] = false;
fb85abff 533 return false;
534 }
535
f2104a54 536 if (is_gimple_assign (stmt)
537 && gimple_assign_rhs_code (stmt) == COND_EXPR
538 && (cond = gimple_assign_rhs1 (stmt))
539 && !COMPARISON_CLASS_P (cond))
540 {
6d8fb6cf 541 if (dump_enabled_p ())
f2104a54 542 {
7bd765d4 543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
544 "Build SLP failed: condition is not "
545 "comparison ");
546 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 547 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 548 }
16513aa4 549 /* Fatal mismatch. */
550 matches[0] = false;
f2104a54 551 return false;
552 }
553
48e1416a 554 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
fb85abff 555 vectype = get_vectype_for_scalar_type (scalar_type);
556 if (!vectype)
557 {
6d8fb6cf 558 if (dump_enabled_p ())
fb85abff 559 {
7bd765d4 560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type ");
562 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
563 scalar_type);
78bb46f5 564 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 565 }
16513aa4 566 /* Fatal mismatch. */
567 matches[0] = false;
fb85abff 568 return false;
569 }
48e1416a 570
0d80b45d 571 /* If populating the vector type requires unrolling then fail
572 before adjusting *max_nunits for basic-block vectorization. */
573 if (bb_vinfo
574 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
575 {
576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
577 "Build SLP failed: unrolling required "
578 "in basic block SLP\n");
579 /* Fatal mismatch. */
580 matches[0] = false;
581 return false;
582 }
583
03dab834 584 /* In case of multiple types we need to detect the smallest type. */
585 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
37545e54 586 {
03dab834 587 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
588 if (bb_vinfo)
589 vectorization_factor = *max_nunits;
37545e54 590 }
48e1416a 591
1a91d914 592 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
b57a47d0 593 {
594 rhs_code = CALL_EXPR;
1a91d914 595 if (gimple_call_internal_p (call_stmt)
596 || gimple_call_tail_p (call_stmt)
597 || gimple_call_noreturn_p (call_stmt)
598 || !gimple_call_nothrow_p (call_stmt)
599 || gimple_call_chain (call_stmt))
b57a47d0 600 {
6d8fb6cf 601 if (dump_enabled_p ())
b57a47d0 602 {
7bd765d4 603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
604 "Build SLP failed: unsupported call type ");
1a91d914 605 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
606 call_stmt, 0);
78bb46f5 607 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 608 }
16513aa4 609 /* Fatal mismatch. */
610 matches[0] = false;
b57a47d0 611 return false;
612 }
613 }
fb85abff 614 else
615 rhs_code = gimple_assign_rhs_code (stmt);
616
617 /* Check the operation. */
618 if (i == 0)
619 {
620 first_stmt_code = rhs_code;
621
48e1416a 622 /* Shift arguments should be equal in all the packed stmts for a
fb85abff 623 vector shift with scalar shift operand. */
624 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
625 || rhs_code == LROTATE_EXPR
626 || rhs_code == RROTATE_EXPR)
627 {
628 vec_mode = TYPE_MODE (vectype);
629
630 /* First see if we have a vector/vector shift. */
631 optab = optab_for_tree_code (rhs_code, vectype,
632 optab_vector);
633
634 if (!optab
d6bf3b14 635 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
fb85abff 636 {
637 /* No vector/vector shift, try for a vector/scalar shift. */
638 optab = optab_for_tree_code (rhs_code, vectype,
639 optab_scalar);
640
641 if (!optab)
642 {
6d8fb6cf 643 if (dump_enabled_p ())
7bd765d4 644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 645 "Build SLP failed: no optab.\n");
16513aa4 646 /* Fatal mismatch. */
647 matches[0] = false;
fb85abff 648 return false;
649 }
d6bf3b14 650 icode = (int) optab_handler (optab, vec_mode);
fb85abff 651 if (icode == CODE_FOR_nothing)
652 {
6d8fb6cf 653 if (dump_enabled_p ())
7bd765d4 654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
655 "Build SLP failed: "
78bb46f5 656 "op not supported by target.\n");
16513aa4 657 /* Fatal mismatch. */
658 matches[0] = false;
fb85abff 659 return false;
660 }
661 optab_op2_mode = insn_data[icode].operand[2].mode;
662 if (!VECTOR_MODE_P (optab_op2_mode))
663 {
664 need_same_oprnds = true;
665 first_op1 = gimple_assign_rhs2 (stmt);
666 }
667 }
668 }
6083c152 669 else if (rhs_code == WIDEN_LSHIFT_EXPR)
670 {
671 need_same_oprnds = true;
672 first_op1 = gimple_assign_rhs2 (stmt);
673 }
fb85abff 674 }
675 else
676 {
66e30248 677 if (first_stmt_code != rhs_code
678 && alt_stmt_code == ERROR_MARK)
679 alt_stmt_code = rhs_code;
fb85abff 680 if (first_stmt_code != rhs_code
681 && (first_stmt_code != IMAGPART_EXPR
682 || rhs_code != REALPART_EXPR)
683 && (first_stmt_code != REALPART_EXPR
ad074595 684 || rhs_code != IMAGPART_EXPR)
66e30248 685 /* Handle mismatches in plus/minus by computing both
686 and merging the results. */
687 && !((first_stmt_code == PLUS_EXPR
688 || first_stmt_code == MINUS_EXPR)
689 && (alt_stmt_code == PLUS_EXPR
690 || alt_stmt_code == MINUS_EXPR)
691 && rhs_code == alt_stmt_code)
ee612634 692 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
ad074595 693 && (first_stmt_code == ARRAY_REF
19bacd59 694 || first_stmt_code == BIT_FIELD_REF
ad074595 695 || first_stmt_code == INDIRECT_REF
696 || first_stmt_code == COMPONENT_REF
697 || first_stmt_code == MEM_REF)))
fb85abff 698 {
6d8fb6cf 699 if (dump_enabled_p ())
fb85abff 700 {
7bd765d4 701 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
702 "Build SLP failed: different operation "
703 "in stmt ");
704 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
66e30248 705 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
706 "original stmt ");
707 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
708 first_stmt, 0);
fb85abff 709 }
16513aa4 710 /* Mismatch. */
711 continue;
fb85abff 712 }
48e1416a 713
714 if (need_same_oprnds
fb85abff 715 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
716 {
6d8fb6cf 717 if (dump_enabled_p ())
fb85abff 718 {
7bd765d4 719 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
720 "Build SLP failed: different shift "
721 "arguments in ");
722 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 723 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 724 }
16513aa4 725 /* Mismatch. */
726 continue;
fb85abff 727 }
b57a47d0 728
729 if (rhs_code == CALL_EXPR)
730 {
f1f41a6c 731 gimple first_stmt = stmts[0];
b57a47d0 732 if (gimple_call_num_args (stmt) != nops
733 || !operand_equal_p (gimple_call_fn (first_stmt),
734 gimple_call_fn (stmt), 0)
735 || gimple_call_fntype (first_stmt)
736 != gimple_call_fntype (stmt))
737 {
6d8fb6cf 738 if (dump_enabled_p ())
b57a47d0 739 {
7bd765d4 740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "Build SLP failed: different calls in ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
743 stmt, 0);
78bb46f5 744 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b57a47d0 745 }
16513aa4 746 /* Mismatch. */
747 continue;
b57a47d0 748 }
749 }
fb85abff 750 }
751
ee612634 752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
fb85abff 754 {
755 if (REFERENCE_CLASS_P (lhs))
756 {
757 /* Store. */
16513aa4 758 ;
fb85abff 759 }
eefa05c8 760 else
761 {
762 /* Load. */
eefa05c8 763 /* Check that the size of interleaved loads group is not
764 greater than the SLP group size. */
f77a1eec 765 unsigned ncopies
766 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
24c271ef 767 if (loop_vinfo
8f3e4987 768 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
769 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
770 - GROUP_GAP (vinfo_for_stmt (stmt)))
771 > ncopies * group_size))
eefa05c8 772 {
6d8fb6cf 773 if (dump_enabled_p ())
eefa05c8 774 {
7bd765d4 775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: the number "
777 "of interleaved loads is greater than "
778 "the SLP group size ");
779 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
780 stmt, 0);
78bb46f5 781 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 782 }
16513aa4 783 /* Fatal mismatch. */
784 matches[0] = false;
eefa05c8 785 return false;
786 }
787
4db2b577 788 old_first_load = first_load;
21009880 789 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
eefa05c8 790 if (prev_first_load)
791 {
792 /* Check that there are no loads from different interleaving
16513aa4 793 chains in the same node. */
794 if (prev_first_load != first_load)
7bd765d4 795 {
6d8fb6cf 796 if (dump_enabled_p ())
eefa05c8 797 {
7bd765d4 798 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
799 vect_location,
800 "Build SLP failed: different "
801 "interleaving chains in one node ");
802 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
803 stmt, 0);
78bb46f5 804 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
eefa05c8 805 }
16513aa4 806 /* Mismatch. */
807 continue;
eefa05c8 808 }
809 }
810 else
811 prev_first_load = first_load;
48e1416a 812
4db2b577 813 /* In some cases a group of loads is just the same load
814 repeated N times. Only analyze its cost once. */
815 if (first_load == stmt && old_first_load != first_load)
fb85abff 816 {
817 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
0822b158 818 if (vect_supportable_dr_alignment (first_dr, false)
fb85abff 819 == dr_unaligned_unsupported)
820 {
6d8fb6cf 821 if (dump_enabled_p ())
fb85abff 822 {
7bd765d4 823 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
824 vect_location,
825 "Build SLP failed: unsupported "
826 "unaligned load ");
827 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
828 stmt, 0);
78bb46f5 829 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 830 }
16513aa4 831 /* Fatal mismatch. */
832 matches[0] = false;
fb85abff 833 return false;
834 }
fb85abff 835 }
fb85abff 836 }
ee612634 837 } /* Grouped access. */
fb85abff 838 else
839 {
840 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
841 {
ee612634 842 /* Not grouped load. */
6d8fb6cf 843 if (dump_enabled_p ())
fb85abff 844 {
7bd765d4 845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: not grouped load ");
847 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 848 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 849 }
850
ee612634 851 /* FORNOW: Not grouped loads are not supported. */
16513aa4 852 /* Fatal mismatch. */
853 matches[0] = false;
fb85abff 854 return false;
855 }
856
857 /* Not memory operation. */
858 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f2104a54 859 && TREE_CODE_CLASS (rhs_code) != tcc_unary
423475a3 860 && TREE_CODE_CLASS (rhs_code) != tcc_expression
b57a47d0 861 && rhs_code != CALL_EXPR)
fb85abff 862 {
6d8fb6cf 863 if (dump_enabled_p ())
fb85abff 864 {
7bd765d4 865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
866 "Build SLP failed: operation");
867 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
868 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 869 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 870 }
16513aa4 871 /* Fatal mismatch. */
872 matches[0] = false;
fb85abff 873 return false;
874 }
875
f2104a54 876 if (rhs_code == COND_EXPR)
877 {
878 tree cond_expr = gimple_assign_rhs1 (stmt);
879
880 if (i == 0)
881 first_cond_code = TREE_CODE (cond_expr);
882 else if (first_cond_code != TREE_CODE (cond_expr))
883 {
6d8fb6cf 884 if (dump_enabled_p ())
f2104a54 885 {
7bd765d4 886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
887 "Build SLP failed: different"
888 " operation");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890 stmt, 0);
78bb46f5 891 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f2104a54 892 }
16513aa4 893 /* Mismatch. */
894 continue;
f2104a54 895 }
896 }
fb85abff 897 }
16513aa4 898
899 matches[i] = true;
900 }
901
902 for (i = 0; i < group_size; ++i)
903 if (!matches[i])
904 return false;
905
66e30248 906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
908 if (alt_stmt_code != ERROR_MARK
909 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
910 {
911 unsigned char *sel
912 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
913 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
914 {
915 sel[i] = i;
916 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
917 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
918 }
919 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
920 {
921 for (i = 0; i < group_size; ++i)
922 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
923 {
924 matches[i] = false;
925 if (dump_enabled_p ())
926 {
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "Build SLP failed: different operation "
929 "in stmt ");
930 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
931 stmts[i], 0);
932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
933 "original stmt ");
934 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
935 first_stmt, 0);
936 }
937 }
938 return false;
939 }
940 *two_operators = true;
941 }
942
16513aa4 943 return true;
944}
945
946/* Recursively build an SLP tree starting from NODE.
947 Fail (and return a value not equal to zero) if def-stmts are not
948 isomorphic, require data permutation or are of unsupported types of
949 operation. Otherwise, return 0.
950 The value returned is the depth in the SLP tree where a mismatch
951 was found. */
952
953static bool
954vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
955 slp_tree *node, unsigned int group_size,
956 unsigned int *max_nunits,
957 vec<slp_tree> *loads,
958 unsigned int vectorization_factor,
ca91d3f8 959 bool *matches, unsigned *npermutes, unsigned *tree_size,
960 unsigned max_tree_size)
16513aa4 961{
bad02805 962 unsigned nops, i, this_tree_size = 0;
16513aa4 963 gimple stmt;
964
16513aa4 965 matches[0] = false;
966
967 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
968 if (is_gimple_call (stmt))
969 nops = gimple_call_num_args (stmt);
970 else if (is_gimple_assign (stmt))
971 {
972 nops = gimple_num_ops (stmt) - 1;
973 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
974 nops++;
fb85abff 975 }
16513aa4 976 else
977 return false;
978
66e30248 979 bool two_operators = false;
16513aa4 980 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
981 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
66e30248 982 max_nunits, vectorization_factor, matches,
983 &two_operators))
16513aa4 984 return false;
66e30248 985 SLP_TREE_TWO_OPERATORS (*node) = two_operators;
fb85abff 986
16513aa4 987 /* If the SLP node is a load, terminate the recursion. */
988 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
989 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
fb85abff 990 {
f1f41a6c 991 loads->safe_push (*node);
fb85abff 992 return true;
993 }
994
16513aa4 995 /* Get at the operands, verifying they are compatible. */
996 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
997 slp_oprnd_info oprnd_info;
998 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
999 {
291e6474 1000 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
423475a3 1001 stmt, i, &oprnds_info))
16513aa4 1002 {
291e6474 1003 case 0:
1004 break;
1005 case -1:
1006 matches[0] = false;
16513aa4 1007 vect_free_oprnd_info (oprnds_info);
1008 return false;
291e6474 1009 case 1:
1010 matches[i] = false;
1011 break;
16513aa4 1012 }
1013 }
291e6474 1014 for (i = 0; i < group_size; ++i)
1015 if (!matches[i])
1016 {
1017 vect_free_oprnd_info (oprnds_info);
1018 return false;
1019 }
16513aa4 1020
1021 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
1022
48e1416a 1023 /* Create SLP_TREE nodes for the definition node/s. */
f1f41a6c 1024 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
fb85abff 1025 {
b0f64919 1026 slp_tree child;
16513aa4 1027 unsigned old_nloads = loads->length ();
1028 unsigned old_max_nunits = *max_nunits;
48e1416a 1029
b0f64919 1030 if (oprnd_info->first_dt != vect_internal_def)
1031 continue;
fb85abff 1032
ca91d3f8 1033 if (++this_tree_size > max_tree_size)
1034 {
1035 vect_free_oprnd_info (oprnds_info);
1036 return false;
1037 }
1038
b0f64919 1039 child = vect_create_new_slp_node (oprnd_info->def_stmts);
16513aa4 1040 if (!child)
1041 {
f1f41a6c 1042 vect_free_oprnd_info (oprnds_info);
16513aa4 1043 return false;
b0f64919 1044 }
48e1416a 1045
16513aa4 1046 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1047 group_size, max_nunits, loads,
ca91d3f8 1048 vectorization_factor, matches,
1049 npermutes, &this_tree_size, max_tree_size))
16513aa4 1050 {
7a6adcfd 1051 /* If we have all children of child built up from scalars then just
1052 throw that away and build it up this node from scalars. */
1053 if (!SLP_TREE_CHILDREN (child).is_empty ())
1054 {
1055 unsigned int j;
1056 slp_tree grandchild;
1057
1058 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1059 if (grandchild != NULL)
1060 break;
1061 if (!grandchild)
1062 {
1063 /* Roll back. */
1064 *max_nunits = old_max_nunits;
1065 loads->truncate (old_nloads);
1066 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1067 vect_free_slp_tree (grandchild);
1068 SLP_TREE_CHILDREN (child).truncate (0);
1069
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "Building parent vector operands from "
1072 "scalars instead\n");
1073 oprnd_info->def_stmts = vNULL;
1074 vect_free_slp_tree (child);
1075 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1076 continue;
1077 }
1078 }
1079
16513aa4 1080 oprnd_info->def_stmts = vNULL;
1081 SLP_TREE_CHILDREN (*node).quick_push (child);
1082 continue;
1083 }
1084
6501e749 1085 /* If the SLP build failed fatally and we analyze a basic-block
1086 simply treat nodes we fail to build as externally defined
1087 (and thus build vectors from the scalar defs).
1088 The cost model will reject outright expensive cases.
1089 ??? This doesn't treat cases where permutation ultimatively
1090 fails (or we don't try permutation below). Ideally we'd
1091 even compute a permutation that will end up with the maximum
1092 SLP tree size... */
1093 if (bb_vinfo
1094 && !matches[0]
1095 /* ??? Rejecting patterns this way doesn't work. We'd have to
1096 do extra work to cancel the pattern so the uses see the
1097 scalar version. */
1098 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1099 {
41fdc863 1100 unsigned int j;
1101 slp_tree grandchild;
1102
1103 /* Roll back. */
1104 *max_nunits = old_max_nunits;
1105 loads->truncate (old_nloads);
1106 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1107 vect_free_slp_tree (grandchild);
1108 SLP_TREE_CHILDREN (child).truncate (0);
1109
6501e749 1110 dump_printf_loc (MSG_NOTE, vect_location,
1111 "Building vector operands from scalars\n");
1112 oprnd_info->def_stmts = vNULL;
1113 vect_free_slp_tree (child);
1114 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1115 continue;
1116 }
1117
16513aa4 1118 /* If the SLP build for operand zero failed and operand zero
1119 and one can be commutated try that for the scalar stmts
1120 that failed the match. */
1121 if (i == 0
1122 /* A first scalar stmt mismatch signals a fatal mismatch. */
1123 && matches[0]
1124 /* ??? For COND_EXPRs we can swap the comparison operands
1125 as well as the arms under some constraints. */
1126 && nops == 2
1127 && oprnds_info[1]->first_dt == vect_internal_def
1128 && is_gimple_assign (stmt)
1129 && commutative_tree_code (gimple_assign_rhs_code (stmt))
7f2a705a 1130 && !SLP_TREE_TWO_OPERATORS (*node)
16513aa4 1131 /* Do so only if the number of not successful permutes was nor more
1132 than a cut-ff as re-trying the recursive match on
1133 possibly each level of the tree would expose exponential
1134 behavior. */
1135 && *npermutes < 4)
1136 {
8979d917 1137 unsigned int j;
1138 slp_tree grandchild;
1139
16513aa4 1140 /* Roll back. */
1141 *max_nunits = old_max_nunits;
1142 loads->truncate (old_nloads);
8979d917 1143 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1144 vect_free_slp_tree (grandchild);
1145 SLP_TREE_CHILDREN (child).truncate (0);
1146
16513aa4 1147 /* Swap mismatched definition stmts. */
291e6474 1148 dump_printf_loc (MSG_NOTE, vect_location,
1149 "Re-trying with swapped operands of stmts ");
8979d917 1150 for (j = 0; j < group_size; ++j)
16513aa4 1151 if (!matches[j])
1152 {
1153 gimple tem = oprnds_info[0]->def_stmts[j];
1154 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
1155 oprnds_info[1]->def_stmts[j] = tem;
291e6474 1156 dump_printf (MSG_NOTE, "%d ", j);
16513aa4 1157 }
291e6474 1158 dump_printf (MSG_NOTE, "\n");
f116baa1 1159 /* And try again with scratch 'matches' ... */
1160 bool *tem = XALLOCAVEC (bool, group_size);
16513aa4 1161 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1162 group_size, max_nunits, loads,
1163 vectorization_factor,
f116baa1 1164 tem, npermutes, &this_tree_size,
ca91d3f8 1165 max_tree_size))
16513aa4 1166 {
f116baa1 1167 /* ... so if successful we can apply the operand swapping
1168 to the GIMPLE IL. This is necessary because for example
1169 vect_get_slp_defs uses operand indexes and thus expects
1170 canonical operand order. */
1171 for (j = 0; j < group_size; ++j)
1172 if (!matches[j])
1173 {
1174 gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1175 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1176 gimple_assign_rhs2_ptr (stmt));
1177 }
16513aa4 1178 oprnd_info->def_stmts = vNULL;
1179 SLP_TREE_CHILDREN (*node).quick_push (child);
1180 continue;
1181 }
1182
1183 ++*npermutes;
1184 }
1185
1186 oprnd_info->def_stmts = vNULL;
1187 vect_free_slp_tree (child);
1188 vect_free_oprnd_info (oprnds_info);
1189 return false;
fb85abff 1190 }
1191
ca91d3f8 1192 if (tree_size)
1193 *tree_size += this_tree_size;
1194
f1f41a6c 1195 vect_free_oprnd_info (oprnds_info);
fb85abff 1196 return true;
1197}
1198
7bd765d4 1199/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
fb85abff 1200
1201static void
7bd765d4 1202vect_print_slp_tree (int dump_kind, slp_tree node)
fb85abff 1203{
1204 int i;
1205 gimple stmt;
40bcc7c2 1206 slp_tree child;
fb85abff 1207
1208 if (!node)
1209 return;
1210
7bd765d4 1211 dump_printf (dump_kind, "node ");
f1f41a6c 1212 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1213 {
7bd765d4 1214 dump_printf (dump_kind, "\n\tstmt %d ", i);
1215 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
fb85abff 1216 }
7bd765d4 1217 dump_printf (dump_kind, "\n");
fb85abff 1218
f1f41a6c 1219 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1220 vect_print_slp_tree (dump_kind, child);
fb85abff 1221}
1222
1223
48e1416a 1224/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1225 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
282bf14c 1226 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
fb85abff 1227 stmts in NODE are to be marked. */
1228
1229static void
1230vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1231{
1232 int i;
1233 gimple stmt;
40bcc7c2 1234 slp_tree child;
fb85abff 1235
1236 if (!node)
1237 return;
1238
f1f41a6c 1239 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
fb85abff 1240 if (j < 0 || i == j)
1241 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1242
f1f41a6c 1243 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1244 vect_mark_slp_stmts (child, mark, j);
fb85abff 1245}
1246
1247
37545e54 1248/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1249
1250static void
1251vect_mark_slp_stmts_relevant (slp_tree node)
1252{
1253 int i;
1254 gimple stmt;
1255 stmt_vec_info stmt_info;
40bcc7c2 1256 slp_tree child;
37545e54 1257
1258 if (!node)
1259 return;
1260
f1f41a6c 1261 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 1262 {
1263 stmt_info = vinfo_for_stmt (stmt);
48e1416a 1264 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
37545e54 1265 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1266 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1267 }
1268
f1f41a6c 1269 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1270 vect_mark_slp_stmts_relevant (child);
37545e54 1271}
1272
1273
eefa05c8 1274/* Rearrange the statements of NODE according to PERMUTATION. */
1275
1276static void
1277vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
678e3d6e 1278 vec<unsigned> permutation)
eefa05c8 1279{
1280 gimple stmt;
f1f41a6c 1281 vec<gimple> tmp_stmts;
40bcc7c2 1282 unsigned int i;
1283 slp_tree child;
eefa05c8 1284
f1f41a6c 1285 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 1286 vect_slp_rearrange_stmts (child, group_size, permutation);
eefa05c8 1287
f1f41a6c 1288 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1289 tmp_stmts.create (group_size);
40bcc7c2 1290 tmp_stmts.quick_grow_cleared (group_size);
eefa05c8 1291
f1f41a6c 1292 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
40bcc7c2 1293 tmp_stmts[permutation[i]] = stmt;
eefa05c8 1294
f1f41a6c 1295 SLP_TREE_SCALAR_STMTS (node).release ();
eefa05c8 1296 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1297}
1298
1299
2457db4b 1300/* Attempt to reorder stmts in a reduction chain so that we don't
1301 require any load permutation. Return true if that was possible,
1302 otherwise return false. */
1303
1304static bool
1305vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1306{
1307 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1308 unsigned int i, j;
1309 sbitmap load_index;
1310 unsigned int lidx;
1311 slp_tree node, load;
1312
1313 /* Compare all the permutation sequences to the first one. We know
1314 that at least one load is permuted. */
1315 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1316 if (!node->load_permutation.exists ())
1317 return false;
1318 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1319 {
1320 if (!load->load_permutation.exists ())
1321 return false;
1322 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1323 if (lidx != node->load_permutation[j])
1324 return false;
1325 }
1326
1327 /* Check that the loads in the first sequence are different and there
1328 are no gaps between them. */
1329 load_index = sbitmap_alloc (group_size);
1330 bitmap_clear (load_index);
1331 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1332 {
1333 if (bitmap_bit_p (load_index, lidx))
1334 {
1335 sbitmap_free (load_index);
1336 return false;
1337 }
1338 bitmap_set_bit (load_index, lidx);
1339 }
1340 for (i = 0; i < group_size; i++)
1341 if (!bitmap_bit_p (load_index, i))
1342 {
1343 sbitmap_free (load_index);
1344 return false;
1345 }
1346 sbitmap_free (load_index);
1347
1348 /* This permutation is valid for reduction. Since the order of the
1349 statements in the nodes is not important unless they are memory
1350 accesses, we can rearrange the statements in all the nodes
1351 according to the order of the loads. */
1352 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1353 node->load_permutation);
1354
1355 /* We are done, no actual permutations need to be generated. */
1356 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1357 SLP_TREE_LOAD_PERMUTATION (node).release ();
1358 return true;
1359}
1360
678e3d6e 1361/* Check if the required load permutations in the SLP instance
1362 SLP_INSTN are supported. */
fb85abff 1363
1364static bool
678e3d6e 1365vect_supported_load_permutation_p (slp_instance slp_instn)
fb85abff 1366{
678e3d6e 1367 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1368 unsigned int i, j, k, next;
16513aa4 1369 slp_tree node;
1370 gimple stmt, load, next_load, first_load;
24c271ef 1371 struct data_reference *dr;
fb85abff 1372
6d8fb6cf 1373 if (dump_enabled_p ())
fb85abff 1374 {
7bd765d4 1375 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
678e3d6e 1376 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1377 if (node->load_permutation.exists ())
1378 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1379 dump_printf (MSG_NOTE, "%d ", next);
1380 else
9cf883aa 1381 for (k = 0; k < group_size; ++k)
1382 dump_printf (MSG_NOTE, "%d ", k);
78bb46f5 1383 dump_printf (MSG_NOTE, "\n");
fb85abff 1384 }
1385
eefa05c8 1386 /* In case of reduction every load permutation is allowed, since the order
1387 of the reduction statements is not important (as opposed to the case of
ee612634 1388 grouped stores). The only condition we need to check is that all the
eefa05c8 1389 load nodes are of the same size and have the same permutation (and then
1390 rearrange all the nodes of the SLP instance according to this
1391 permutation). */
1392
1393 /* Check that all the load nodes are of the same size. */
678e3d6e 1394 /* ??? Can't we assert this? */
f1f41a6c 1395 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
16513aa4 1396 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1397 return false;
a2e70a99 1398
eefa05c8 1399 node = SLP_INSTANCE_TREE (slp_instn);
f1f41a6c 1400 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
eefa05c8 1401
39a5d6b1 1402 /* Reduction (there are no data-refs in the root).
2457db4b 1403 In reduction chain the order of the loads is not important. */
39a5d6b1 1404 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1405 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1406 {
2457db4b 1407 if (vect_attempt_slp_rearrange_stmts (slp_instn))
1408 return true;
eefa05c8 1409
2457db4b 1410 /* Fallthru to general load permutation handling. */
eefa05c8 1411 }
1412
24c271ef 1413 /* In basic block vectorization we allow any subchain of an interleaving
1414 chain.
1415 FORNOW: not supported in loop SLP because of realignment compications. */
678e3d6e 1416 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
24c271ef 1417 {
aa83abc3 1418 /* Check whether the loads in an instance form a subchain and thus
1419 no permutation is necessary. */
f1f41a6c 1420 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
24c271ef 1421 {
41fdc863 1422 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1423 continue;
aa83abc3 1424 bool subchain_p = true;
24c271ef 1425 next_load = NULL;
f1f41a6c 1426 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
24c271ef 1427 {
bd5123a9 1428 if (j != 0
1429 && (next_load != load
1430 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
aa83abc3 1431 {
1432 subchain_p = false;
1433 break;
1434 }
24c271ef 1435 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1436 }
aa83abc3 1437 if (subchain_p)
1438 SLP_TREE_LOAD_PERMUTATION (node).release ();
1439 else
1440 {
1441 /* Verify the permutation can be generated. */
1442 vec<tree> tem;
1443 if (!vect_transform_slp_perm_load (node, tem, NULL,
1444 1, slp_instn, true))
1445 {
1446 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1447 vect_location,
1448 "unsupported load permutation\n");
1449 return false;
1450 }
1451 }
24c271ef 1452 }
1453
1454 /* Check that the alignment of the first load in every subchain, i.e.,
678e3d6e 1455 the first statement in every load node, is supported.
1456 ??? This belongs in alignment checking. */
1457 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1458 {
1459 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1460 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1461 {
1462 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1463 if (vect_supportable_dr_alignment (dr, false)
1464 == dr_unaligned_unsupported)
1465 {
1466 if (dump_enabled_p ())
1467 {
1468 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1469 vect_location,
1470 "unsupported unaligned load ");
1471 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1472 first_load, 0);
78bb46f5 1473 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
678e3d6e 1474 }
1475 return false;
1476 }
1477 }
1478 }
24c271ef 1479
678e3d6e 1480 return true;
24c271ef 1481 }
1482
4e30dcb3 1483 /* For loop vectorization verify we can generate the permutation. */
678e3d6e 1484 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1485 if (node->load_permutation.exists ()
1486 && !vect_transform_slp_perm_load
1487 (node, vNULL, NULL,
1488 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1489 return false;
4e30dcb3 1490
678e3d6e 1491 return true;
fb85abff 1492}
1493
1494
d4b21757 1495/* Find the last store in SLP INSTANCE. */
282bf14c 1496
d4b21757 1497static gimple
b4026f25 1498vect_find_last_scalar_stmt_in_slp (slp_tree node)
d4b21757 1499{
b4026f25 1500 gimple last = NULL, stmt;
d4b21757 1501
b4026f25 1502 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1503 {
1504 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1505 if (is_pattern_stmt_p (stmt_vinfo))
1506 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1507 else
1508 last = get_later_stmt (stmt, last);
1509 }
d4b21757 1510
b4026f25 1511 return last;
d4b21757 1512}
1513
ee6e9ba5 1514/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1515
1516static void
31bf2c9a 1517vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
ee6e9ba5 1518 stmt_vector_for_cost *prologue_cost_vec,
31bf2c9a 1519 stmt_vector_for_cost *body_cost_vec,
ee6e9ba5 1520 unsigned ncopies_for_cost)
1521{
ee6e9ba5 1522 unsigned i;
1523 slp_tree child;
1524 gimple stmt, s;
1525 stmt_vec_info stmt_info;
1526 tree lhs;
1527 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1528
1529 /* Recurse down the SLP tree. */
1530 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6501e749 1531 if (child)
31bf2c9a 1532 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1533 body_cost_vec, ncopies_for_cost);
ee6e9ba5 1534
1535 /* Look at the first scalar stmt to determine the cost. */
1536 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1537 stmt_info = vinfo_for_stmt (stmt);
1538 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1539 {
1540 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1541 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1542 vect_uninitialized_def,
1543 node, prologue_cost_vec, body_cost_vec);
1544 else
1545 {
1546 int i;
1547 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1548 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1549 node, prologue_cost_vec, body_cost_vec);
1550 /* If the load is permuted record the cost for the permutation.
1551 ??? Loads from multiple chains are let through here only
1552 for a single special case involving complex numbers where
1553 in the end no permutation is necessary. */
1554 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1555 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1556 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1557 && vect_get_place_in_interleaving_chain
1558 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1559 {
1560 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1561 stmt_info, 0, vect_body);
1562 break;
1563 }
1564 }
1565 }
1566 else
66e30248 1567 {
1568 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1569 stmt_info, 0, vect_body);
1570 if (SLP_TREE_TWO_OPERATORS (node))
1571 {
1572 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1573 stmt_info, 0, vect_body);
1574 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1575 stmt_info, 0, vect_body);
1576 }
1577 }
ee6e9ba5 1578
1579 /* Scan operands and account for prologue cost of constants/externals.
1580 ??? This over-estimates cost for multiple uses and should be
1581 re-engineered. */
1582 lhs = gimple_get_lhs (stmt);
1583 for (i = 0; i < gimple_num_ops (stmt); ++i)
1584 {
1585 tree def, op = gimple_op (stmt, i);
1586 gimple def_stmt;
1587 enum vect_def_type dt;
1588 if (!op || op == lhs)
1589 continue;
31bf2c9a 1590 if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1591 STMT_VINFO_BB_VINFO (stmt_info),
b4026f25 1592 &def_stmt, &def, &dt))
1593 {
1594 /* Without looking at the actual initializer a vector of
1595 constants can be implemented as load from the constant pool.
1596 ??? We need to pass down stmt_info for a vector type
1597 even if it points to the wrong stmt. */
1598 if (dt == vect_constant_def)
1599 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1600 stmt_info, 0, vect_prologue);
1601 else if (dt == vect_external_def)
1602 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1603 stmt_info, 0, vect_prologue);
1604 }
ee6e9ba5 1605 }
1606}
1607
1608/* Compute the cost for the SLP instance INSTANCE. */
1609
1610static void
31bf2c9a 1611vect_analyze_slp_cost (slp_instance instance, void *data)
ee6e9ba5 1612{
1613 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1614 unsigned ncopies_for_cost;
1615 stmt_info_for_cost *si;
1616 unsigned i;
1617
1618 /* Calculate the number of vector stmts to create based on the unrolling
1619 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1620 GROUP_SIZE / NUNITS otherwise. */
1621 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
31bf2c9a 1622 slp_tree node = SLP_INSTANCE_TREE (instance);
1623 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1624 /* Adjust the group_size by the vectorization factor which is always one
1625 for basic-block vectorization. */
1626 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1627 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1628 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1629 /* For reductions look at a reduction operand in case the reduction
1630 operation is widening like DOT_PROD or SAD. */
1631 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1632 {
1633 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1634 switch (gimple_assign_rhs_code (stmt))
1635 {
1636 case DOT_PROD_EXPR:
1637 case SAD_EXPR:
1638 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1639 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1640 break;
1641 default:;
1642 }
1643 }
ee6e9ba5 1644 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1645
1646 prologue_cost_vec.create (10);
1647 body_cost_vec.create (10);
31bf2c9a 1648 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1649 &prologue_cost_vec, &body_cost_vec,
1650 ncopies_for_cost);
ee6e9ba5 1651
1652 /* Record the prologue costs, which were delayed until we were
31bf2c9a 1653 sure that SLP was successful. */
ee6e9ba5 1654 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1655 {
1656 struct _stmt_vec_info *stmt_info
1657 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1658 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1659 si->misalign, vect_prologue);
1660 }
1661
31bf2c9a 1662 /* Record the instance's instructions in the target cost model. */
1663 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1664 {
1665 struct _stmt_vec_info *stmt_info
1666 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1667 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1668 si->misalign, vect_body);
1669 }
1670
ee6e9ba5 1671 prologue_cost_vec.release ();
31bf2c9a 1672 body_cost_vec.release ();
ee6e9ba5 1673}
d4b21757 1674
ee612634 1675/* Analyze an SLP instance starting from a group of grouped stores. Call
48e1416a 1676 vect_build_slp_tree to build a tree of packed stmts if possible.
fb85abff 1677 Return FALSE if it's impossible to SLP any stmt in the loop. */
1678
1679static bool
37545e54 1680vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
ca91d3f8 1681 gimple stmt, unsigned max_tree_size)
fb85abff 1682{
1683 slp_instance new_instance;
b0f64919 1684 slp_tree node;
21009880 1685 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
fb85abff 1686 unsigned int unrolling_factor = 1, nunits;
eefa05c8 1687 tree vectype, scalar_type = NULL_TREE;
fb85abff 1688 gimple next;
f018d957 1689 unsigned int vectorization_factor = 0;
ee6e9ba5 1690 int i;
fb85abff 1691 unsigned int max_nunits = 0;
f1f41a6c 1692 vec<slp_tree> loads;
eefa05c8 1693 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
f1f41a6c 1694 vec<gimple> scalar_stmts;
eefa05c8 1695
39a5d6b1 1696 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
eefa05c8 1697 {
39a5d6b1 1698 if (dr)
1699 {
1700 scalar_type = TREE_TYPE (DR_REF (dr));
1701 vectype = get_vectype_for_scalar_type (scalar_type);
1702 }
1703 else
1704 {
1705 gcc_assert (loop_vinfo);
1706 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1707 }
1708
21009880 1709 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
eefa05c8 1710 }
1711 else
1712 {
1713 gcc_assert (loop_vinfo);
1714 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
f1f41a6c 1715 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
eefa05c8 1716 }
48e1416a 1717
fb85abff 1718 if (!vectype)
1719 {
6d8fb6cf 1720 if (dump_enabled_p ())
fb85abff 1721 {
7bd765d4 1722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1723 "Build SLP failed: unsupported data-type ");
1724 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
78bb46f5 1725 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1726 }
eefa05c8 1727
fb85abff 1728 return false;
1729 }
1730
1731 nunits = TYPE_VECTOR_SUBPARTS (vectype);
37545e54 1732 if (loop_vinfo)
1733 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1734 else
37545e54 1735 vectorization_factor = nunits;
1736
37545e54 1737 /* Calculate the unrolling factor. */
1738 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1739 if (unrolling_factor != 1 && !loop_vinfo)
1740 {
6d8fb6cf 1741 if (dump_enabled_p ())
78bb46f5 1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1743 "Build SLP failed: unrolling required in basic"
78bb46f5 1744 " block SLP\n");
48e1416a 1745
37545e54 1746 return false;
1747 }
1748
ee612634 1749 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
f1f41a6c 1750 scalar_stmts.create (group_size);
fb85abff 1751 next = stmt;
39a5d6b1 1752 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 1753 {
eefa05c8 1754 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1755 while (next)
1756 {
f2104a54 1757 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1758 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
f1f41a6c 1759 scalar_stmts.safe_push (
1760 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f2104a54 1761 else
f1f41a6c 1762 scalar_stmts.safe_push (next);
21009880 1763 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
eefa05c8 1764 }
34563054 1765 /* Mark the first element of the reduction chain as reduction to properly
1766 transform the node. In the reduction analysis phase only the last
1767 element of the chain is marked as reduction. */
1768 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1769 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
eefa05c8 1770 }
1771 else
1772 {
1773 /* Collect reduction statements. */
f1f41a6c 1774 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1775 for (i = 0; reductions.iterate (i, &next); i++)
1776 scalar_stmts.safe_push (next);
fb85abff 1777 }
1778
b0f64919 1779 node = vect_create_new_slp_node (scalar_stmts);
fb85abff 1780
f1f41a6c 1781 loads.create (group_size);
fb85abff 1782
1783 /* Build the tree for the SLP instance. */
bad02805 1784 bool *matches = XALLOCAVEC (bool, group_size);
1785 unsigned npermutes = 0;
48e1416a 1786 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
0eba4748 1787 &max_nunits, &loads,
bad02805 1788 vectorization_factor, matches, &npermutes, NULL,
ca91d3f8 1789 max_tree_size))
fb85abff 1790 {
03dab834 1791 /* Calculate the unrolling factor based on the smallest type. */
fb85abff 1792 if (max_nunits > nunits)
1793 unrolling_factor = least_common_multiple (max_nunits, group_size)
1794 / group_size;
48e1416a 1795
03dab834 1796 if (unrolling_factor != 1 && !loop_vinfo)
1797 {
6d8fb6cf 1798 if (dump_enabled_p ())
78bb46f5 1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1800 "Build SLP failed: unrolling required in basic"
78bb46f5 1801 " block SLP\n");
9f793cdf 1802 vect_free_slp_tree (node);
f1f41a6c 1803 loads.release ();
03dab834 1804 return false;
1805 }
1806
1807 /* Create a new SLP instance. */
1808 new_instance = XNEW (struct _slp_instance);
1809 SLP_INSTANCE_TREE (new_instance) = node;
1810 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
fb85abff 1811 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
fb85abff 1812 SLP_INSTANCE_LOADS (new_instance) = loads;
0eba4748 1813
1814 /* Compute the load permutation. */
1815 slp_tree load_node;
1816 bool loads_permuted = false;
0eba4748 1817 FOR_EACH_VEC_ELT (loads, i, load_node)
1818 {
678e3d6e 1819 vec<unsigned> load_permutation;
0eba4748 1820 int j;
16513aa4 1821 gimple load, first_stmt;
678e3d6e 1822 bool this_load_permuted = false;
1823 load_permutation.create (group_size);
16513aa4 1824 first_stmt = GROUP_FIRST_ELEMENT
1825 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
0eba4748 1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1827 {
16513aa4 1828 int load_place
1829 = vect_get_place_in_interleaving_chain (load, first_stmt);
1830 gcc_assert (load_place != -1);
1831 if (load_place != j)
678e3d6e 1832 this_load_permuted = true;
0eba4748 1833 load_permutation.safe_push (load_place);
1834 }
6f3ba68a 1835 if (!this_load_permuted
1836 /* The load requires permutation when unrolling exposes
1837 a gap either because the group is larger than the SLP
1838 group-size or because there is a gap between the groups. */
1839 && (unrolling_factor == 1
1840 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1841 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
678e3d6e 1842 {
1843 load_permutation.release ();
1844 continue;
1845 }
1846 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1847 loads_permuted = true;
0eba4748 1848 }
24c271ef 1849
1850 if (loads_permuted)
fb85abff 1851 {
678e3d6e 1852 if (!vect_supported_load_permutation_p (new_instance))
fb85abff 1853 {
6d8fb6cf 1854 if (dump_enabled_p ())
fb85abff 1855 {
78bb46f5 1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 1857 "Build SLP failed: unsupported load "
1858 "permutation ");
1859 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 1860 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 1861 }
fb85abff 1862 vect_free_slp_instance (new_instance);
1863 return false;
1864 }
fb85abff 1865 }
fb85abff 1866
f97dec81 1867
37545e54 1868 if (loop_vinfo)
31bf2c9a 1869 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
37545e54 1870 else
f1f41a6c 1871 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
48e1416a 1872
6d8fb6cf 1873 if (dump_enabled_p ())
7bd765d4 1874 vect_print_slp_tree (MSG_NOTE, node);
fb85abff 1875
1876 return true;
1877 }
1878
1879 /* Failed to SLP. */
1880 /* Free the allocated memory. */
1881 vect_free_slp_tree (node);
f1f41a6c 1882 loads.release ();
48e1416a 1883
37545e54 1884 return false;
fb85abff 1885}
1886
1887
282bf14c 1888/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
fb85abff 1889 trees of packed scalar stmts if SLP is possible. */
1890
1891bool
ca91d3f8 1892vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1893 unsigned max_tree_size)
fb85abff 1894{
1895 unsigned int i;
f1f41a6c 1896 vec<gimple> grouped_stores;
1e094109 1897 vec<gimple> reductions = vNULL;
1898 vec<gimple> reduc_chains = vNULL;
39a5d6b1 1899 gimple first_element;
37545e54 1900 bool ok = false;
fb85abff 1901
6d8fb6cf 1902 if (dump_enabled_p ())
78bb46f5 1903 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
fb85abff 1904
37545e54 1905 if (loop_vinfo)
eefa05c8 1906 {
ee612634 1907 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
39a5d6b1 1908 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
eefa05c8 1909 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1910 }
37545e54 1911 else
ee612634 1912 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
48e1416a 1913
ee612634 1914 /* Find SLP sequences starting from groups of grouped stores. */
f1f41a6c 1915 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
ca91d3f8 1916 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1917 max_tree_size))
37545e54 1918 ok = true;
fb85abff 1919
423475a3 1920 if (reduc_chains.length () > 0)
39a5d6b1 1921 {
1922 /* Find SLP sequences starting from reduction chains. */
f1f41a6c 1923 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
ca91d3f8 1924 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1925 max_tree_size))
39a5d6b1 1926 ok = true;
1927 else
1928 return false;
1929
1930 /* Don't try to vectorize SLP reductions if reduction chain was
1931 detected. */
1932 return ok;
1933 }
1934
eefa05c8 1935 /* Find SLP sequences starting from groups of reductions. */
423475a3 1936 if (reductions.length () > 1
ca91d3f8 1937 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1938 max_tree_size))
eefa05c8 1939 ok = true;
1940
fb85abff 1941 return true;
1942}
1943
1944
1945/* For each possible SLP instance decide whether to SLP it and calculate overall
bc937a44 1946 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1947 least one instance. */
fb85abff 1948
bc937a44 1949bool
fb85abff 1950vect_make_slp_decision (loop_vec_info loop_vinfo)
1951{
1952 unsigned int i, unrolling_factor = 1;
f1f41a6c 1953 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 1954 slp_instance instance;
1955 int decided_to_slp = 0;
1956
6d8fb6cf 1957 if (dump_enabled_p ())
78bb46f5 1958 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1959 "\n");
fb85abff 1960
f1f41a6c 1961 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 1962 {
1963 /* FORNOW: SLP if you can. */
1964 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1965 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1966
282bf14c 1967 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
48e1416a 1968 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
282bf14c 1969 loop-based vectorization. Such stmts will be marked as HYBRID. */
fb85abff 1970 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1971 decided_to_slp++;
1972 }
1973
1974 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1975
6d8fb6cf 1976 if (decided_to_slp && dump_enabled_p ())
b055bc88 1977 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1978 "Decided to SLP %d instances. Unrolling factor %d\n",
7bd765d4 1979 decided_to_slp, unrolling_factor);
bc937a44 1980
1981 return (decided_to_slp > 0);
fb85abff 1982}
1983
1984
1985/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
282bf14c 1986 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
fb85abff 1987
1988static void
041208c6 1989vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
fb85abff 1990{
041208c6 1991 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
fb85abff 1992 imm_use_iterator imm_iter;
1993 gimple use_stmt;
041208c6 1994 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
40bcc7c2 1995 slp_tree child;
a1123047 1996 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
041208c6 1997 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1998 int j;
1999
2000 /* Propagate hybrid down the SLP tree. */
2001 if (stype == hybrid)
2002 ;
2003 else if (HYBRID_SLP_STMT (stmt_vinfo))
2004 stype = hybrid;
2005 else
2006 {
2007 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2008 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
0906dd56 2009 /* We always get the pattern stmt here, but for immediate
2010 uses we have to use the LHS of the original stmt. */
2011 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2012 if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
2013 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
041208c6 2014 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2015 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
0906dd56 2016 {
2017 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2018 continue;
2019 use_vinfo = vinfo_for_stmt (use_stmt);
2020 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2021 && STMT_VINFO_RELATED_STMT (use_vinfo))
2022 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2023 if (!STMT_SLP_TYPE (use_vinfo)
2024 && (STMT_VINFO_RELEVANT (use_vinfo)
2025 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2026 && !(gimple_code (use_stmt) == GIMPLE_PHI
2027 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2028 stype = hybrid;
2029 }
041208c6 2030 }
fb85abff 2031
041208c6 2032 if (stype == hybrid)
81ad436b 2033 {
2034 if (dump_enabled_p ())
2035 {
2036 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2037 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2038 }
2039 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2040 }
fb85abff 2041
041208c6 2042 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
6501e749 2043 if (child)
2044 vect_detect_hybrid_slp_stmts (child, i, stype);
041208c6 2045}
a1123047 2046
041208c6 2047/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
fb85abff 2048
041208c6 2049static tree
2050vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2051{
2052 walk_stmt_info *wi = (walk_stmt_info *)data;
2053 struct loop *loopp = (struct loop *)wi->info;
2054
2055 if (wi->is_lhs)
2056 return NULL_TREE;
2057
2058 if (TREE_CODE (*tp) == SSA_NAME
2059 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2060 {
2061 gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2062 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2063 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
81ad436b 2064 {
2065 if (dump_enabled_p ())
2066 {
2067 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2068 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2069 }
2070 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2071 }
041208c6 2072 }
2073
2074 return NULL_TREE;
fb85abff 2075}
2076
041208c6 2077static tree
2078vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2079 walk_stmt_info *)
2080{
2081 /* If the stmt is in a SLP instance then this isn't a reason
2082 to mark use definitions in other SLP instances as hybrid. */
2083 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2084 *handled = true;
2085 return NULL_TREE;
2086}
fb85abff 2087
2088/* Find stmts that must be both vectorized and SLPed. */
2089
2090void
2091vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2092{
2093 unsigned int i;
f1f41a6c 2094 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
fb85abff 2095 slp_instance instance;
2096
6d8fb6cf 2097 if (dump_enabled_p ())
78bb46f5 2098 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2099 "\n");
fb85abff 2100
041208c6 2101 /* First walk all pattern stmt in the loop and mark defs of uses as
2102 hybrid because immediate uses in them are not recorded. */
2103 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2104 {
2105 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2106 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2107 gsi_next (&gsi))
2108 {
2109 gimple stmt = gsi_stmt (gsi);
2110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2111 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2112 {
2113 walk_stmt_info wi;
2114 memset (&wi, 0, sizeof (wi));
2115 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2116 gimple_stmt_iterator gsi2
2117 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2118 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2119 vect_detect_hybrid_slp_1, &wi);
2120 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2121 vect_detect_hybrid_slp_2,
2122 vect_detect_hybrid_slp_1, &wi);
2123 }
2124 }
2125 }
2126
2127 /* Then walk the SLP instance trees marking stmts with uses in
2128 non-SLP stmts as hybrid, also propagating hybrid down the
2129 SLP tree, collecting the above info on-the-fly. */
f1f41a6c 2130 FOR_EACH_VEC_ELT (slp_instances, i, instance)
041208c6 2131 {
2132 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2133 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2134 i, pure_slp);
2135 }
fb85abff 2136}
2137
37545e54 2138
2139/* Create and initialize a new bb_vec_info struct for BB, as well as
2140 stmt_vec_info structs for all the stmts in it. */
48e1416a 2141
37545e54 2142static bb_vec_info
2143new_bb_vec_info (basic_block bb)
2144{
2145 bb_vec_info res = NULL;
2146 gimple_stmt_iterator gsi;
2147
2148 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2149 BB_VINFO_BB (res) = bb;
2150
2151 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2152 {
2153 gimple stmt = gsi_stmt (gsi);
2154 gimple_set_uid (stmt, 0);
2155 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2156 }
2157
f1f41a6c 2158 BB_VINFO_GROUPED_STORES (res).create (10);
2159 BB_VINFO_SLP_INSTANCES (res).create (2);
4db2b577 2160 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
37545e54 2161
2162 bb->aux = res;
2163 return res;
2164}
2165
2166
2167/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2168 stmts in the basic block. */
2169
2170static void
2171destroy_bb_vec_info (bb_vec_info bb_vinfo)
2172{
f1f41a6c 2173 vec<slp_instance> slp_instances;
9f793cdf 2174 slp_instance instance;
37545e54 2175 basic_block bb;
2176 gimple_stmt_iterator si;
9f793cdf 2177 unsigned i;
37545e54 2178
2179 if (!bb_vinfo)
2180 return;
2181
2182 bb = BB_VINFO_BB (bb_vinfo);
2183
2184 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2185 {
2186 gimple stmt = gsi_stmt (si);
2187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2188
2189 if (stmt_info)
2190 /* Free stmt_vec_info. */
2191 free_stmt_vec_info (stmt);
2192 }
2193
23e1875f 2194 vect_destroy_datarefs (NULL, bb_vinfo);
a8af2e86 2195 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
f1f41a6c 2196 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
9f793cdf 2197 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
f1f41a6c 2198 FOR_EACH_VEC_ELT (slp_instances, i, instance)
9f793cdf 2199 vect_free_slp_instance (instance);
f1f41a6c 2200 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
4db2b577 2201 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
37545e54 2202 free (bb_vinfo);
2203 bb->aux = NULL;
2204}
2205
2206
2207/* Analyze statements contained in SLP tree node after recursively analyzing
2208 the subtree. Return TRUE if the operations are supported. */
2209
2210static bool
5cb834f3 2211vect_slp_analyze_node_operations (slp_tree node)
37545e54 2212{
2213 bool dummy;
2214 int i;
2215 gimple stmt;
40bcc7c2 2216 slp_tree child;
37545e54 2217
2218 if (!node)
2219 return true;
2220
f1f41a6c 2221 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
5cb834f3 2222 if (!vect_slp_analyze_node_operations (child))
b0f64919 2223 return false;
37545e54 2224
f1f41a6c 2225 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
37545e54 2226 {
2227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2228 gcc_assert (stmt_info);
5cb834f3 2229 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
37545e54 2230
2231 if (!vect_analyze_stmt (stmt, &dummy, node))
5cb834f3 2232 return false;
37545e54 2233 }
2234
2235 return true;
2236}
2237
2238
282bf14c 2239/* Analyze statements in SLP instances of the basic block. Return TRUE if the
37545e54 2240 operations are supported. */
2241
5cb834f3 2242bool
31bf2c9a 2243vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
37545e54 2244{
37545e54 2245 slp_instance instance;
2246 int i;
2247
5cb834f3 2248 if (dump_enabled_p ())
2249 dump_printf_loc (MSG_NOTE, vect_location,
2250 "=== vect_slp_analyze_operations ===\n");
2251
f1f41a6c 2252 for (i = 0; slp_instances.iterate (i, &instance); )
37545e54 2253 {
5cb834f3 2254 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
37545e54 2255 {
5cb834f3 2256 dump_printf_loc (MSG_NOTE, vect_location,
2257 "removing SLP instance operations starting from: ");
2258 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2259 SLP_TREE_SCALAR_STMTS
2260 (SLP_INSTANCE_TREE (instance))[0], 0);
2261 vect_free_slp_instance (instance);
f1f41a6c 2262 slp_instances.ordered_remove (i);
37545e54 2263 }
2264 else
31bf2c9a 2265 {
2266 /* Compute the costs of the SLP instance. */
2267 vect_analyze_slp_cost (instance, data);
2268 i++;
2269 }
48e1416a 2270 }
2271
f1f41a6c 2272 if (!slp_instances.length ())
37545e54 2273 return false;
2274
2275 return true;
2276}
2277
32afb748 2278
2279/* Compute the scalar cost of the SLP node NODE and its children
2280 and return it. Do not account defs that are marked in LIFE and
2281 update LIFE according to uses of NODE. */
2282
2283static unsigned
65526f7f 2284vect_bb_slp_scalar_cost (basic_block bb,
d70aebca 2285 slp_tree node, vec<bool, va_heap> *life)
32afb748 2286{
2287 unsigned scalar_cost = 0;
2288 unsigned i;
2289 gimple stmt;
2290 slp_tree child;
2291
2292 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2293 {
2294 unsigned stmt_cost;
2295 ssa_op_iter op_iter;
2296 def_operand_p def_p;
2297 stmt_vec_info stmt_info;
2298
d70aebca 2299 if ((*life)[i])
32afb748 2300 continue;
2301
2302 /* If there is a non-vectorized use of the defs then the scalar
2303 stmt is kept live in which case we do not account it or any
2304 required defs in the SLP children in the scalar cost. This
2305 way we make the vectorization more costly when compared to
2306 the scalar cost. */
2307 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2308 {
2309 imm_use_iterator use_iter;
2310 gimple use_stmt;
2311 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
ab2dc680 2312 if (!is_gimple_debug (use_stmt)
2313 && (gimple_code (use_stmt) == GIMPLE_PHI
2314 || gimple_bb (use_stmt) != bb
2315 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
32afb748 2316 {
d70aebca 2317 (*life)[i] = true;
32afb748 2318 BREAK_FROM_IMM_USE_STMT (use_iter);
2319 }
2320 }
d70aebca 2321 if ((*life)[i])
32afb748 2322 continue;
2323
2324 stmt_info = vinfo_for_stmt (stmt);
2325 if (STMT_VINFO_DATA_REF (stmt_info))
2326 {
2327 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2328 stmt_cost = vect_get_stmt_cost (scalar_load);
2329 else
2330 stmt_cost = vect_get_stmt_cost (scalar_store);
2331 }
2332 else
2333 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2334
2335 scalar_cost += stmt_cost;
2336 }
2337
2338 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6501e749 2339 if (child)
2340 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
32afb748 2341
2342 return scalar_cost;
2343}
2344
ad074595 2345/* Check if vectorization of the basic block is profitable. */
2346
2347static bool
2348vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2349{
f1f41a6c 2350 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
ad074595 2351 slp_instance instance;
31bf2c9a 2352 int i;
4db2b577 2353 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
f97dec81 2354 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
ad074595 2355
2356 /* Calculate scalar cost. */
32afb748 2357 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ad074595 2358 {
4997014d 2359 auto_vec<bool, 20> life;
d70aebca 2360 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
65526f7f 2361 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2362 SLP_INSTANCE_TREE (instance),
d70aebca 2363 &life);
ad074595 2364 }
2365
4db2b577 2366 /* Complete the target-specific cost calculation. */
f97dec81 2367 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2368 &vec_inside_cost, &vec_epilogue_cost);
2369
2370 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
4db2b577 2371
6d8fb6cf 2372 if (dump_enabled_p ())
ad074595 2373 {
7bd765d4 2374 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2375 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2376 vec_inside_cost);
2377 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2378 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
78bb46f5 2379 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
ad074595 2380 }
2381
2382 /* Vectorization is profitable if its cost is less than the cost of scalar
2383 version. */
2384 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2385 return false;
2386
2387 return true;
2388}
2389
2390/* Check if the basic block can be vectorized. */
37545e54 2391
5bbe6156 2392static bb_vec_info
2393vect_slp_analyze_bb_1 (basic_block bb)
37545e54 2394{
2395 bb_vec_info bb_vinfo;
f1f41a6c 2396 vec<slp_instance> slp_instances;
37545e54 2397 slp_instance instance;
5bbe6156 2398 int i;
91a74fc6 2399 int min_vf = 2;
ca91d3f8 2400 unsigned n_stmts = 0;
d4b21757 2401
37545e54 2402 bb_vinfo = new_bb_vec_info (bb);
2403 if (!bb_vinfo)
2404 return NULL;
2405
ca91d3f8 2406 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
37545e54 2407 {
6d8fb6cf 2408 if (dump_enabled_p ())
7bd765d4 2409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2410 "not vectorized: unhandled data-ref in basic "
2411 "block.\n");
48e1416a 2412
37545e54 2413 destroy_bb_vec_info (bb_vinfo);
2414 return NULL;
2415 }
2416
07e3bcbf 2417 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
37545e54 2418 {
6d8fb6cf 2419 if (dump_enabled_p ())
7bd765d4 2420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2421 "not vectorized: not enough data-refs in "
2422 "basic block.\n");
37545e54 2423
2424 destroy_bb_vec_info (bb_vinfo);
2425 return NULL;
2426 }
2427
68f15e9d 2428 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2429 {
2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "not vectorized: unhandled data access in "
2433 "basic block.\n");
2434
2435 destroy_bb_vec_info (bb_vinfo);
2436 return NULL;
2437 }
2438
4c0c783a 2439 vect_pattern_recog (NULL, bb_vinfo);
2440
37545e54 2441 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2442 {
6d8fb6cf 2443 if (dump_enabled_p ())
7bd765d4 2444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2445 "not vectorized: bad data alignment in basic "
2446 "block.\n");
48e1416a 2447
37545e54 2448 destroy_bb_vec_info (bb_vinfo);
2449 return NULL;
2450 }
48e1416a 2451
37545e54 2452 /* Check the SLP opportunities in the basic block, analyze and build SLP
2453 trees. */
ca91d3f8 2454 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
37545e54 2455 {
6d8fb6cf 2456 if (dump_enabled_p ())
423475a3 2457 {
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "Failed to SLP the basic block.\n");
2460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2461 "not vectorized: failed to find SLP opportunities "
2462 "in basic block.\n");
2463 }
37545e54 2464
2465 destroy_bb_vec_info (bb_vinfo);
2466 return NULL;
2467 }
48e1416a 2468
37545e54 2469 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2470
2471 /* Mark all the statements that we want to vectorize as pure SLP and
2472 relevant. */
f1f41a6c 2473 FOR_EACH_VEC_ELT (slp_instances, i, instance)
37545e54 2474 {
2475 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2476 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
48e1416a 2477 }
37545e54 2478
1824e1e2 2479 /* Mark all the statements that we do not want to vectorize. */
2480 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2481 !gsi_end_p (gsi); gsi_next (&gsi))
2482 {
2483 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2484 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2485 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2486 }
2487
2488 /* Analyze dependences. At this point all stmts not participating in
2489 vectorization have to be marked. Dependence analysis assumes
2490 that we either vectorize all SLP instances or none at all. */
2491 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2492 {
2493 if (dump_enabled_p ())
2494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2495 "not vectorized: unhandled data dependence "
2496 "in basic block.\n");
2497
2498 destroy_bb_vec_info (bb_vinfo);
2499 return NULL;
2500 }
2501
4db2b577 2502 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1ad41595 2503 {
6d8fb6cf 2504 if (dump_enabled_p ())
7bd765d4 2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2506 "not vectorized: unsupported alignment in basic "
2507 "block.\n");
1ad41595 2508 destroy_bb_vec_info (bb_vinfo);
2509 return NULL;
2510 }
2511
31bf2c9a 2512 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2513 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
37545e54 2514 {
6d8fb6cf 2515 if (dump_enabled_p ())
78bb46f5 2516 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2517 "not vectorized: bad operation in basic block.\n");
37545e54 2518
2519 destroy_bb_vec_info (bb_vinfo);
2520 return NULL;
2521 }
2522
ad074595 2523 /* Cost model: check if the vectorization is worthwhile. */
3e398f5b 2524 if (!unlimited_cost_model (NULL)
ad074595 2525 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2526 {
6d8fb6cf 2527 if (dump_enabled_p ())
7bd765d4 2528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2529 "not vectorized: vectorization is not "
2530 "profitable.\n");
ad074595 2531
2532 destroy_bb_vec_info (bb_vinfo);
2533 return NULL;
2534 }
2535
6d8fb6cf 2536 if (dump_enabled_p ())
7bd765d4 2537 dump_printf_loc (MSG_NOTE, vect_location,
2538 "Basic block will be vectorized using SLP\n");
37545e54 2539
2540 return bb_vinfo;
2541}
2542
2543
5bbe6156 2544bb_vec_info
2545vect_slp_analyze_bb (basic_block bb)
2546{
2547 bb_vec_info bb_vinfo;
2548 int insns = 0;
2549 gimple_stmt_iterator gsi;
2550 unsigned int vector_sizes;
2551
6d8fb6cf 2552 if (dump_enabled_p ())
7bd765d4 2553 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
5bbe6156 2554
2555 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2556 {
2557 gimple stmt = gsi_stmt (gsi);
2558 if (!is_gimple_debug (stmt)
2559 && !gimple_nop_p (stmt)
2560 && gimple_code (stmt) != GIMPLE_LABEL)
2561 insns++;
2562 }
2563
2564 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2565 {
6d8fb6cf 2566 if (dump_enabled_p ())
7bd765d4 2567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2568 "not vectorized: too many instructions in "
2569 "basic block.\n");
5bbe6156 2570
2571 return NULL;
2572 }
2573
2574 /* Autodetect first vector size we try. */
2575 current_vector_size = 0;
2576 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2577
2578 while (1)
2579 {
2580 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2581 if (bb_vinfo)
2582 return bb_vinfo;
2583
2584 destroy_bb_vec_info (bb_vinfo);
2585
2586 vector_sizes &= ~current_vector_size;
2587 if (vector_sizes == 0
2588 || current_vector_size == 0)
2589 return NULL;
2590
2591 /* Try the next biggest vector size. */
2592 current_vector_size = 1 << floor_log2 (vector_sizes);
6d8fb6cf 2593 if (dump_enabled_p ())
7bd765d4 2594 dump_printf_loc (MSG_NOTE, vect_location,
2595 "***** Re-trying analysis with "
2596 "vector size %d\n", current_vector_size);
5bbe6156 2597 }
2598}
2599
2600
48e1416a 2601/* For constant and loop invariant defs of SLP_NODE this function returns
2602 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
987502fa 2603 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2604 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
eefa05c8 2605 REDUC_INDEX is the index of the reduction operand in the statements, unless
2606 it is -1. */
fb85abff 2607
2608static void
09e31a48 2609vect_get_constant_vectors (tree op, slp_tree slp_node,
f1f41a6c 2610 vec<tree> *vec_oprnds,
eefa05c8 2611 unsigned int op_num, unsigned int number_of_vectors,
2612 int reduc_index)
fb85abff 2613{
f1f41a6c 2614 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2615 gimple stmt = stmts[0];
fb85abff 2616 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
fadf62f4 2617 unsigned nunits;
fb85abff 2618 tree vec_cst;
fadf62f4 2619 tree *elts;
2620 unsigned j, number_of_places_left_in_vector;
fb85abff 2621 tree vector_type;
09e31a48 2622 tree vop;
f1f41a6c 2623 int group_size = stmts.length ();
fb85abff 2624 unsigned int vec_num, i;
fadf62f4 2625 unsigned number_of_copies = 1;
f1f41a6c 2626 vec<tree> voprnds;
2627 voprnds.create (number_of_vectors);
fb85abff 2628 bool constant_p, is_store;
eefa05c8 2629 tree neutral_op = NULL;
98dbf32d 2630 enum tree_code code = gimple_expr_code (stmt);
d706d35c 2631 gimple def_stmt;
2632 struct loop *loop;
0ff8139c 2633 gimple_seq ctor_seq = NULL;
eefa05c8 2634
95fd3578 2635 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2636 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2637
38fbbe0d 2638 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2639 && reduc_index != -1)
eefa05c8 2640 {
95fd3578 2641 op_num = reduc_index;
2642 op = gimple_op (stmt, op_num + 1);
eefa05c8 2643 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
282bf14c 2644 we need either neutral operands or the original operands. See
eefa05c8 2645 get_initial_def_for_reduction() for details. */
2646 switch (code)
2647 {
2648 case WIDEN_SUM_EXPR:
2649 case DOT_PROD_EXPR:
95fd3578 2650 case SAD_EXPR:
eefa05c8 2651 case PLUS_EXPR:
2652 case MINUS_EXPR:
2653 case BIT_IOR_EXPR:
2654 case BIT_XOR_EXPR:
2655 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2656 neutral_op = build_real (TREE_TYPE (op), dconst0);
2657 else
2658 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2659
2660 break;
2661
2662 case MULT_EXPR:
eefa05c8 2663 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2664 neutral_op = build_real (TREE_TYPE (op), dconst1);
2665 else
2666 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2667
2668 break;
2669
b036fcd8 2670 case BIT_AND_EXPR:
2671 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2672 break;
2673
298cab21 2674 /* For MIN/MAX we don't have an easy neutral operand but
2675 the initial values can be used fine here. Only for
2676 a reduction chain we have to force a neutral element. */
2677 case MAX_EXPR:
2678 case MIN_EXPR:
2679 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2680 neutral_op = NULL;
2681 else
2682 {
2683 def_stmt = SSA_NAME_DEF_STMT (op);
2684 loop = (gimple_bb (stmt))->loop_father;
2685 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2686 loop_preheader_edge (loop));
2687 }
2688 break;
d706d35c 2689
eefa05c8 2690 default:
95fd3578 2691 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
d706d35c 2692 neutral_op = NULL;
eefa05c8 2693 }
2694 }
fb85abff 2695
2696 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2697 {
2698 is_store = true;
2699 op = gimple_assign_rhs1 (stmt);
2700 }
2701 else
09e31a48 2702 is_store = false;
2703
2704 gcc_assert (op);
fb85abff 2705
2706 if (CONSTANT_CLASS_P (op))
987502fa 2707 constant_p = true;
fb85abff 2708 else
987502fa 2709 constant_p = false;
2710
fb85abff 2711 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
48e1416a 2712 created vectors. It is greater than 1 if unrolling is performed.
fb85abff 2713
2714 For example, we have two scalar operands, s1 and s2 (e.g., group of
2715 strided accesses of size two), while NUNITS is four (i.e., four scalars
f2104a54 2716 of this type can be packed in a vector). The output vector will contain
2717 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
fb85abff 2718 will be 2).
2719
48e1416a 2720 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
fb85abff 2721 containing the operands.
2722
2723 For example, NUNITS is four as before, and the group size is 8
f2104a54 2724 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
fb85abff 2725 {s5, s6, s7, s8}. */
48e1416a 2726
34563054 2727 number_of_copies = nunits * number_of_vectors / group_size;
fb85abff 2728
2729 number_of_places_left_in_vector = nunits;
fadf62f4 2730 elts = XALLOCAVEC (tree, nunits);
6501e749 2731 bool place_after_defs = false;
fb85abff 2732 for (j = 0; j < number_of_copies; j++)
2733 {
f1f41a6c 2734 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
fb85abff 2735 {
2736 if (is_store)
2737 op = gimple_assign_rhs1 (stmt);
98dbf32d 2738 else
f2104a54 2739 {
98dbf32d 2740 switch (code)
f2104a54 2741 {
98dbf32d 2742 case COND_EXPR:
2743 if (op_num == 0 || op_num == 1)
2744 {
2745 tree cond = gimple_assign_rhs1 (stmt);
2746 op = TREE_OPERAND (cond, op_num);
2747 }
2748 else
2749 {
2750 if (op_num == 2)
2751 op = gimple_assign_rhs2 (stmt);
2752 else
2753 op = gimple_assign_rhs3 (stmt);
2754 }
2755 break;
2756
2757 case CALL_EXPR:
2758 op = gimple_call_arg (stmt, op_num);
2759 break;
2760
aa17a496 2761 case LSHIFT_EXPR:
2762 case RSHIFT_EXPR:
2763 case LROTATE_EXPR:
2764 case RROTATE_EXPR:
2765 op = gimple_op (stmt, op_num + 1);
2766 /* Unlike the other binary operators, shifts/rotates have
2767 the shift count being int, instead of the same type as
2768 the lhs, so make sure the scalar is the right type if
2769 we are dealing with vectors of
2770 long long/long/short/char. */
3533e58d 2771 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
aa17a496 2772 op = fold_convert (TREE_TYPE (vector_type), op);
2773 break;
2774
98dbf32d 2775 default:
2776 op = gimple_op (stmt, op_num + 1);
aa17a496 2777 break;
f2104a54 2778 }
2779 }
48e1416a 2780
eefa05c8 2781 if (reduc_index != -1)
2782 {
d706d35c 2783 loop = (gimple_bb (stmt))->loop_father;
2784 def_stmt = SSA_NAME_DEF_STMT (op);
eefa05c8 2785
2786 gcc_assert (loop);
39a5d6b1 2787
2788 /* Get the def before the loop. In reduction chain we have only
2789 one initial value. */
2790 if ((j != (number_of_copies - 1)
2791 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2792 && i != 0))
2793 && neutral_op)
eefa05c8 2794 op = neutral_op;
39a5d6b1 2795 else
2796 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2797 loop_preheader_edge (loop));
eefa05c8 2798 }
2799
fb85abff 2800 /* Create 'vect_ = {op0,op1,...,opn}'. */
fb85abff 2801 number_of_places_left_in_vector--;
6501e749 2802 tree orig_op = op;
0ff8139c 2803 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
6fa78636 2804 {
3533e58d 2805 if (CONSTANT_CLASS_P (op))
0ff8139c 2806 {
2807 op = fold_unary (VIEW_CONVERT_EXPR,
2808 TREE_TYPE (vector_type), op);
2809 gcc_assert (op && CONSTANT_CLASS_P (op));
2810 }
2811 else
2812 {
f9e245b2 2813 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
0ff8139c 2814 gimple init_stmt;
e9cf809e 2815 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
0ff8139c 2816 init_stmt
e9cf809e 2817 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
0ff8139c 2818 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2819 op = new_temp;
2820 }
6fa78636 2821 }
fadf62f4 2822 elts[number_of_places_left_in_vector] = op;
3533e58d 2823 if (!CONSTANT_CLASS_P (op))
2824 constant_p = false;
6501e749 2825 if (TREE_CODE (orig_op) == SSA_NAME
2826 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2827 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2828 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2829 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2830 place_after_defs = true;
fb85abff 2831
2832 if (number_of_places_left_in_vector == 0)
2833 {
2834 number_of_places_left_in_vector = nunits;
2835
2836 if (constant_p)
fadf62f4 2837 vec_cst = build_vector (vector_type, elts);
fb85abff 2838 else
fadf62f4 2839 {
f1f41a6c 2840 vec<constructor_elt, va_gc> *v;
fadf62f4 2841 unsigned k;
f1f41a6c 2842 vec_alloc (v, nunits);
fadf62f4 2843 for (k = 0; k < nunits; ++k)
2844 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2845 vec_cst = build_constructor (vector_type, v);
2846 }
6501e749 2847 tree init;
2848 gimple_stmt_iterator gsi;
2849 if (place_after_defs)
2850 {
2851 gsi = gsi_for_stmt
2852 (vect_find_last_scalar_stmt_in_slp (slp_node));
2853 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2854 }
2855 else
2856 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
0ff8139c 2857 if (ctor_seq != NULL)
2858 {
6501e749 2859 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
0ff8139c 2860 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2861 GSI_SAME_STMT);
2862 ctor_seq = NULL;
2863 }
6501e749 2864 voprnds.quick_push (init);
2865 place_after_defs = false;
fb85abff 2866 }
2867 }
2868 }
2869
48e1416a 2870 /* Since the vectors are created in the reverse order, we should invert
fb85abff 2871 them. */
f1f41a6c 2872 vec_num = voprnds.length ();
fadf62f4 2873 for (j = vec_num; j != 0; j--)
fb85abff 2874 {
f1f41a6c 2875 vop = voprnds[j - 1];
2876 vec_oprnds->quick_push (vop);
fb85abff 2877 }
2878
f1f41a6c 2879 voprnds.release ();
fb85abff 2880
2881 /* In case that VF is greater than the unrolling factor needed for the SLP
48e1416a 2882 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2883 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
fb85abff 2884 to replicate the vectors. */
f1f41a6c 2885 while (number_of_vectors > vec_oprnds->length ())
fb85abff 2886 {
eefa05c8 2887 tree neutral_vec = NULL;
2888
2889 if (neutral_op)
2890 {
2891 if (!neutral_vec)
b797154e 2892 neutral_vec = build_vector_from_val (vector_type, neutral_op);
eefa05c8 2893
f1f41a6c 2894 vec_oprnds->quick_push (neutral_vec);
eefa05c8 2895 }
2896 else
2897 {
f1f41a6c 2898 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2899 vec_oprnds->quick_push (vop);
eefa05c8 2900 }
fb85abff 2901 }
2902}
2903
2904
2905/* Get vectorized definitions from SLP_NODE that contains corresponding
2906 vectorized def-stmts. */
2907
2908static void
f1f41a6c 2909vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
fb85abff 2910{
2911 tree vec_oprnd;
2912 gimple vec_def_stmt;
2913 unsigned int i;
2914
f1f41a6c 2915 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
fb85abff 2916
f1f41a6c 2917 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
fb85abff 2918 {
2919 gcc_assert (vec_def_stmt);
2920 vec_oprnd = gimple_get_lhs (vec_def_stmt);
f1f41a6c 2921 vec_oprnds->quick_push (vec_oprnd);
fb85abff 2922 }
2923}
2924
2925
48e1416a 2926/* Get vectorized definitions for SLP_NODE.
2927 If the scalar definitions are loop invariants or constants, collect them and
fb85abff 2928 call vect_get_constant_vectors() to create vector stmts.
2929 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
b0f64919 2930 must be stored in the corresponding child of SLP_NODE, and we call
2931 vect_get_slp_vect_defs () to retrieve them. */
48e1416a 2932
fb85abff 2933void
f1f41a6c 2934vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
7f7695a7 2935 vec<vec<tree> > *vec_oprnds, int reduc_index)
fb85abff 2936{
a8bcfda2 2937 gimple first_stmt;
b0f64919 2938 int number_of_vects = 0, i;
2939 unsigned int child_index = 0;
48e1416a 2940 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
b0f64919 2941 slp_tree child = NULL;
7f7695a7 2942 vec<tree> vec_defs;
a8bcfda2 2943 tree oprnd;
b0f64919 2944 bool vectorized_defs;
fb85abff 2945
f1f41a6c 2946 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2947 FOR_EACH_VEC_ELT (ops, i, oprnd)
fb85abff 2948 {
b0f64919 2949 /* For each operand we check if it has vectorized definitions in a child
2950 node or we need to create them (for invariants and constants). We
2951 check if the LHS of the first stmt of the next child matches OPRND.
2952 If it does, we found the correct child. Otherwise, we call
2953 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2954 to check this child node for the next operand. */
2955 vectorized_defs = false;
f1f41a6c 2956 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
fb85abff 2957 {
678e3d6e 2958 child = SLP_TREE_CHILDREN (slp_node)[child_index];
b0f64919 2959
a8bcfda2 2960 /* We have to check both pattern and original def, if available. */
6501e749 2961 if (child)
a8bcfda2 2962 {
6501e749 2963 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2964 gimple related
2965 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2966
2967 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2968 || (related
2969 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2970 {
2971 /* The number of vector defs is determined by the number of
2972 vector statements in the node from which we get those
2973 statements. */
2974 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2975 vectorized_defs = true;
2976 child_index++;
2977 }
a8bcfda2 2978 }
6501e749 2979 else
2980 child_index++;
b0f64919 2981 }
fb85abff 2982
b0f64919 2983 if (!vectorized_defs)
2984 {
2985 if (i == 0)
2986 {
2987 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2988 /* Number of vector stmts was calculated according to LHS in
2989 vect_schedule_slp_instance (), fix it by replacing LHS with
2990 RHS, if necessary. See vect_get_smallest_scalar_type () for
2991 details. */
2992 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2993 &rhs_size_unit);
2994 if (rhs_size_unit != lhs_size_unit)
2995 {
2996 number_of_vects *= rhs_size_unit;
2997 number_of_vects /= lhs_size_unit;
2998 }
2999 }
3000 }
eefa05c8 3001
b0f64919 3002 /* Allocate memory for vectorized defs. */
7f7695a7 3003 vec_defs = vNULL;
3004 vec_defs.create (number_of_vects);
fb85abff 3005
b0f64919 3006 /* For reduction defs we call vect_get_constant_vectors (), since we are
3007 looking for initial loop invariant values. */
3008 if (vectorized_defs && reduc_index == -1)
3009 /* The defs are already vectorized. */
7f7695a7 3010 vect_get_slp_vect_defs (child, &vec_defs);
b0f64919 3011 else
3012 /* Build vectors from scalar defs. */
7f7695a7 3013 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
b0f64919 3014 number_of_vects, reduc_index);
fb85abff 3015
7f7695a7 3016 vec_oprnds->quick_push (vec_defs);
fb85abff 3017
b0f64919 3018 /* For reductions, we only need initial values. */
3019 if (reduc_index != -1)
3020 return;
3021 }
fb85abff 3022}
3023
37545e54 3024
48e1416a 3025/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
fb85abff 3026 building a vector of type MASK_TYPE from it) and two input vectors placed in
3027 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3028 shifting by STRIDE elements of DR_CHAIN for every copy.
3029 (STRIDE is the number of vectorized stmts for NODE divided by the number of
48e1416a 3030 copies).
fb85abff 3031 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3032 the created stmts must be inserted. */
3033
3034static inline void
48e1416a 3035vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
afaef242 3036 tree mask, int first_vec_indx, int second_vec_indx,
48e1416a 3037 gimple_stmt_iterator *gsi, slp_tree node,
f1f41a6c 3038 tree vectype, vec<tree> dr_chain,
fb85abff 3039 int ncopies, int vect_stmts_counter)
3040{
afaef242 3041 tree perm_dest;
fb85abff 3042 gimple perm_stmt = NULL;
3043 stmt_vec_info next_stmt_info;
f018d957 3044 int i, stride;
fb85abff 3045 tree first_vec, second_vec, data_ref;
fb85abff 3046
fb85abff 3047 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
fb85abff 3048
48e1416a 3049 /* Initialize the vect stmts of NODE to properly insert the generated
fb85abff 3050 stmts later. */
f1f41a6c 3051 for (i = SLP_TREE_VEC_STMTS (node).length ();
fb85abff 3052 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
f1f41a6c 3053 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
fb85abff 3054
3055 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3056 for (i = 0; i < ncopies; i++)
3057 {
f1f41a6c 3058 first_vec = dr_chain[first_vec_indx];
3059 second_vec = dr_chain[second_vec_indx];
fb85abff 3060
fb85abff 3061 /* Generate the permute statement. */
e9cf809e 3062 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3063 first_vec, second_vec, mask);
fb85abff 3064 data_ref = make_ssa_name (perm_dest, perm_stmt);
964ec9cd 3065 gimple_set_lhs (perm_stmt, data_ref);
fb85abff 3066 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 3067
48e1416a 3068 /* Store the vector statement in NODE. */
f1f41a6c 3069 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
fb85abff 3070
3071 first_vec_indx += stride;
3072 second_vec_indx += stride;
3073 }
3074
3075 /* Mark the scalar stmt as vectorized. */
3076 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
3077 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
3078}
3079
3080
48e1416a 3081/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
fb85abff 3082 return in CURRENT_MASK_ELEMENT its equivalent in target specific
282bf14c 3083 representation. Check that the mask is valid and return FALSE if not.
fb85abff 3084 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3085 the next vector, i.e., the current first vector is not needed. */
48e1416a 3086
fb85abff 3087static bool
48e1416a 3088vect_get_mask_element (gimple stmt, int first_mask_element, int m,
fb85abff 3089 int mask_nunits, bool only_one_vec, int index,
e21c468f 3090 unsigned char *mask, int *current_mask_element,
90674129 3091 bool *need_next_vector, int *number_of_mask_fixes,
3092 bool *mask_fixed, bool *needs_first_vector)
fb85abff 3093{
3094 int i;
fb85abff 3095
3096 /* Convert to target specific representation. */
3097 *current_mask_element = first_mask_element + m;
3098 /* Adjust the value in case it's a mask for second and third vectors. */
90674129 3099 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
fb85abff 3100
257941fb 3101 if (*current_mask_element < 0)
3102 {
3103 if (dump_enabled_p ())
3104 {
3105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3106 "permutation requires past vector ");
3107 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3108 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3109 }
3110 return false;
3111 }
3112
fb85abff 3113 if (*current_mask_element < mask_nunits)
90674129 3114 *needs_first_vector = true;
fb85abff 3115
3116 /* We have only one input vector to permute but the mask accesses values in
3117 the next vector as well. */
3118 if (only_one_vec && *current_mask_element >= mask_nunits)
3119 {
6d8fb6cf 3120 if (dump_enabled_p ())
fb85abff 3121 {
78bb46f5 3122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 3123 "permutation requires at least two vectors ");
3124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3125 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3126 }
3127
3128 return false;
3129 }
3130
3131 /* The mask requires the next vector. */
e5cf270d 3132 while (*current_mask_element >= mask_nunits * 2)
fb85abff 3133 {
90674129 3134 if (*needs_first_vector || *mask_fixed)
fb85abff 3135 {
3136 /* We either need the first vector too or have already moved to the
48e1416a 3137 next vector. In both cases, this permutation needs three
fb85abff 3138 vectors. */
6d8fb6cf 3139 if (dump_enabled_p ())
fb85abff 3140 {
7bd765d4 3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3142 "permutation requires at "
3143 "least three vectors ");
3144 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3145 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3146 }
3147
3148 return false;
3149 }
3150
3151 /* We move to the next vector, dropping the first one and working with
3152 the second and the third - we need to adjust the values of the mask
3153 accordingly. */
90674129 3154 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
fb85abff 3155
3156 for (i = 0; i < index; i++)
90674129 3157 mask[i] -= mask_nunits * *number_of_mask_fixes;
fb85abff 3158
90674129 3159 (*number_of_mask_fixes)++;
3160 *mask_fixed = true;
fb85abff 3161 }
3162
90674129 3163 *need_next_vector = *mask_fixed;
fb85abff 3164
3165 /* This was the last element of this mask. Start a new one. */
3166 if (index == mask_nunits - 1)
3167 {
90674129 3168 *number_of_mask_fixes = 1;
3169 *mask_fixed = false;
3170 *needs_first_vector = false;
fb85abff 3171 }
3172
3173 return true;
3174}
3175
3176
3177/* Generate vector permute statements from a list of loads in DR_CHAIN.
3178 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
678e3d6e 3179 permute statements for the SLP node NODE of the SLP instance
3180 SLP_NODE_INSTANCE. */
3181
fb85abff 3182bool
678e3d6e 3183vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
fb85abff 3184 gimple_stmt_iterator *gsi, int vf,
3185 slp_instance slp_node_instance, bool analyze_only)
3186{
678e3d6e 3187 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3188 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3189 tree mask_element_type = NULL_TREE, mask_type;
964ec9cd 3190 int i, j, k, nunits, vec_index = 0, scalar_index;
964ec9cd 3191 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3192 gimple next_scalar_stmt;
3193 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3194 int first_mask_element;
e21c468f 3195 int index, unroll_factor, current_mask_element, ncopies;
3196 unsigned char *mask;
fb85abff 3197 bool only_one_vec = false, need_next_vector = false;
3198 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
90674129 3199 int number_of_mask_fixes = 1;
3200 bool mask_fixed = false;
3201 bool needs_first_vector = false;
3754d046 3202 machine_mode mode;
fb85abff 3203
e21c468f 3204 mode = TYPE_MODE (vectype);
3205
3206 if (!can_vec_perm_p (mode, false, NULL))
fb85abff 3207 {
6d8fb6cf 3208 if (dump_enabled_p ())
fb85abff 3209 {
7bd765d4 3210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3211 "no vect permute for ");
3212 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3213 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3214 }
964ec9cd 3215 return false;
fb85abff 3216 }
3217
964ec9cd 3218 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3219 same size as the vector element being permuted. */
3cea8318 3220 mask_element_type = lang_hooks.types.type_for_mode
3221 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
fb85abff 3222 mask_type = get_vectype_for_scalar_type (mask_element_type);
fb85abff 3223 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e21c468f 3224 mask = XALLOCAVEC (unsigned char, nunits);
fb85abff 3225 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3226
3227 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3228 unrolling factor. */
48e1416a 3229 orig_vec_stmts_num = group_size *
fb85abff 3230 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
3231 if (orig_vec_stmts_num == 1)
3232 only_one_vec = true;
3233
48e1416a 3234 /* Number of copies is determined by the final vectorization factor
fb85abff 3235 relatively to SLP_NODE_INSTANCE unrolling factor. */
48e1416a 3236 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
fb85abff 3237
678e3d6e 3238 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3239 return false;
3240
4e30dcb3 3241 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3242
48e1416a 3243 /* Generate permutation masks for every NODE. Number of masks for each NODE
3244 is equal to GROUP_SIZE.
3245 E.g., we have a group of three nodes with three loads from the same
3246 location in each node, and the vector size is 4. I.e., we have a
3247 a0b0c0a1b1c1... sequence and we need to create the following vectors:
fb85abff 3248 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3249 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3250 ...
3251
964ec9cd 3252 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
48e1416a 3253 The last mask is illegal since we assume two operands for permute
282bf14c 3254 operation, and the mask element values can't be outside that range.
3255 Hence, the last mask must be converted into {2,5,5,5}.
48e1416a 3256 For the first two permutations we need the first and the second input
fb85abff 3257 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
48e1416a 3258 we need the second and the third vectors: {b1,c1,a2,b2} and
fb85abff 3259 {c2,a3,b3,c3}. */
3260
5c6f6a61 3261 {
fb85abff 3262 scalar_index = 0;
3263 index = 0;
3264 vect_stmts_counter = 0;
3265 vec_index = 0;
3266 first_vec_index = vec_index++;
3267 if (only_one_vec)
3268 second_vec_index = first_vec_index;
3269 else
3270 second_vec_index = vec_index++;
3271
3272 for (j = 0; j < unroll_factor; j++)
3273 {
3274 for (k = 0; k < group_size; k++)
3275 {
678e3d6e 3276 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
4e30dcb3 3277 first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
964ec9cd 3278 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3279 nunits, only_one_vec, index,
3280 mask, &current_mask_element,
3281 &need_next_vector,
3282 &number_of_mask_fixes, &mask_fixed,
3283 &needs_first_vector))
3284 return false;
257941fb 3285 gcc_assert (current_mask_element >= 0
3286 && current_mask_element < 2 * nunits);
964ec9cd 3287 mask[index++] = current_mask_element;
fb85abff 3288
964ec9cd 3289 if (index == nunits)
fb85abff 3290 {
678e3d6e 3291 index = 0;
e21c468f 3292 if (!can_vec_perm_p (mode, false, mask))
3293 {
6d8fb6cf 3294 if (dump_enabled_p ())
e21c468f 3295 {
7bd765d4 3296 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3297 vect_location,
3298 "unsupported vect permute { ");
e21c468f 3299 for (i = 0; i < nunits; ++i)
7bd765d4 3300 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3301 mask[i]);
3302 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
e21c468f 3303 }
3304 return false;
3305 }
3306
fb85abff 3307 if (!analyze_only)
3308 {
678e3d6e 3309 int l;
3310 tree mask_vec, *mask_elts;
3311 mask_elts = XALLOCAVEC (tree, nunits);
3312 for (l = 0; l < nunits; ++l)
3313 mask_elts[l] = build_int_cst (mask_element_type,
3314 mask[l]);
3315 mask_vec = build_vector (mask_type, mask_elts);
3316
3317 if (need_next_vector)
fb85abff 3318 {
3319 first_vec_index = second_vec_index;
3320 second_vec_index = vec_index;
3321 }
3322
f1f41a6c 3323 next_scalar_stmt
3324 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
fb85abff 3325
3326 vect_create_mask_and_perm (stmt, next_scalar_stmt,
afaef242 3327 mask_vec, first_vec_index, second_vec_index,
964ec9cd 3328 gsi, node, vectype, dr_chain,
afaef242 3329 ncopies, vect_stmts_counter++);
fb85abff 3330 }
48e1416a 3331 }
3332 }
3333 }
3334 }
fb85abff 3335
fb85abff 3336 return true;
3337}
3338
3339
3340
3341/* Vectorize SLP instance tree in postorder. */
3342
3343static bool
3344vect_schedule_slp_instance (slp_tree node, slp_instance instance,
37545e54 3345 unsigned int vectorization_factor)
fb85abff 3346{
3347 gimple stmt;
ee612634 3348 bool grouped_store, is_store;
fb85abff 3349 gimple_stmt_iterator si;
3350 stmt_vec_info stmt_info;
3351 unsigned int vec_stmts_size, nunits, group_size;
3352 tree vectype;
3353 int i;
40bcc7c2 3354 slp_tree child;
fb85abff 3355
3356 if (!node)
3357 return false;
3358
f1f41a6c 3359 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3360 vect_schedule_slp_instance (child, instance, vectorization_factor);
48e1416a 3361
f1f41a6c 3362 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
fb85abff 3363 stmt_info = vinfo_for_stmt (stmt);
3364
3365 /* VECTYPE is the type of the destination. */
b334cbba 3366 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 3367 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3368 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3369
3370 /* For each SLP instance calculate number of vector stmts to be created
282bf14c 3371 for the scalar stmts in each node of the SLP tree. Number of vector
fb85abff 3372 elements in one vector iteration is the number of scalar elements in
3373 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
34563054 3374 size.
3375 Unless this is a SLP reduction in which case the number of vector
3376 stmts is equal to the number of vector stmts of the children. */
3377 if (GROUP_FIRST_ELEMENT (stmt_info)
3378 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3379 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3380 else
3381 vec_stmts_size = (vectorization_factor * group_size) / nunits;
fb85abff 3382
f1f41a6c 3383 if (!SLP_TREE_VEC_STMTS (node).exists ())
fb85abff 3384 {
f1f41a6c 3385 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
fb85abff 3386 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3387 }
3388
6d8fb6cf 3389 if (dump_enabled_p ())
fb85abff 3390 {
7bd765d4 3391 dump_printf_loc (MSG_NOTE,vect_location,
3392 "------>vectorizing SLP node starting from: ");
3393 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3394 dump_printf (MSG_NOTE, "\n");
48e1416a 3395 }
fb85abff 3396
b4026f25 3397 /* Vectorized stmts go before the last scalar stmt which is where
3398 all uses are ready. */
3399 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
d4b21757 3400
39a5d6b1 3401 /* Mark the first element of the reduction chain as reduction to properly
3402 transform the node. In the analysis phase only the last element of the
3403 chain is marked as reduction. */
ee612634 3404 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
39a5d6b1 3405 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3406 {
3407 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3408 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3409 }
3410
66e30248 3411 /* Handle two-operation SLP nodes by vectorizing the group with
3412 both operations and then performing a merge. */
3413 if (SLP_TREE_TWO_OPERATORS (node))
3414 {
3415 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3416 enum tree_code ocode;
3417 gimple ostmt;
3418 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3419 bool allsame = true;
3420 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3421 if (gimple_assign_rhs_code (ostmt) != code0)
3422 {
3423 mask[i] = 1;
3424 allsame = false;
3425 ocode = gimple_assign_rhs_code (ostmt);
3426 }
3427 else
3428 mask[i] = 0;
3429 if (!allsame)
3430 {
3431 vec<gimple> v0;
3432 vec<gimple> v1;
3433 unsigned j;
3434 tree tmask = NULL_TREE;
3435 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3436 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3437 SLP_TREE_VEC_STMTS (node).truncate (0);
3438 gimple_assign_set_rhs_code (stmt, ocode);
3439 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3440 gimple_assign_set_rhs_code (stmt, code0);
3441 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3442 SLP_TREE_VEC_STMTS (node).truncate (0);
3443 tree meltype = build_nonstandard_integer_type
3444 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3445 tree mvectype = get_same_sized_vectype (meltype, vectype);
3446 unsigned k = 0, l;
3447 for (j = 0; j < v0.length (); ++j)
3448 {
3449 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3450 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3451 {
7f2a705a 3452 if (k >= group_size)
66e30248 3453 k = 0;
3454 melts[l] = build_int_cst
3455 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3456 }
3457 tmask = build_vector (mvectype, melts);
3458
3459 /* ??? Not all targets support a VEC_PERM_EXPR with a
3460 constant mask that would translate to a vec_merge RTX
3461 (with their vec_perm_const_ok). We can either not
3462 vectorize in that case or let veclower do its job.
3463 Unfortunately that isn't too great and at least for
3464 plus/minus we'd eventually like to match targets
3465 vector addsub instructions. */
3466 gimple vstmt;
3467 vstmt = gimple_build_assign (make_ssa_name (vectype),
3468 VEC_PERM_EXPR,
3469 gimple_assign_lhs (v0[j]),
3470 gimple_assign_lhs (v1[j]), tmask);
3471 vect_finish_stmt_generation (stmt, vstmt, &si);
3472 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3473 }
3474 v0.release ();
3475 v1.release ();
3476 return false;
3477 }
3478 }
ee612634 3479 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
eefa05c8 3480 return is_store;
fb85abff 3481}
3482
529fcc2a 3483/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3484 For loop vectorization this is done in vectorizable_call, but for SLP
3485 it needs to be deferred until end of vect_schedule_slp, because multiple
3486 SLP instances may refer to the same scalar stmt. */
3487
3488static void
3489vect_remove_slp_scalar_calls (slp_tree node)
3490{
3491 gimple stmt, new_stmt;
3492 gimple_stmt_iterator gsi;
3493 int i;
40bcc7c2 3494 slp_tree child;
529fcc2a 3495 tree lhs;
3496 stmt_vec_info stmt_info;
3497
3498 if (!node)
3499 return;
3500
f1f41a6c 3501 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
40bcc7c2 3502 vect_remove_slp_scalar_calls (child);
529fcc2a 3503
f1f41a6c 3504 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
529fcc2a 3505 {
3506 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3507 continue;
3508 stmt_info = vinfo_for_stmt (stmt);
3509 if (stmt_info == NULL
3510 || is_pattern_stmt_p (stmt_info)
3511 || !PURE_SLP_STMT (stmt_info))
3512 continue;
3513 lhs = gimple_call_lhs (stmt);
3514 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3515 set_vinfo_for_stmt (new_stmt, stmt_info);
3516 set_vinfo_for_stmt (stmt, NULL);
3517 STMT_VINFO_STMT (stmt_info) = new_stmt;
3518 gsi = gsi_for_stmt (stmt);
3519 gsi_replace (&gsi, new_stmt, false);
3520 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3521 }
3522}
fb85abff 3523
282bf14c 3524/* Generate vector code for all SLP instances in the loop/basic block. */
3525
fb85abff 3526bool
37545e54 3527vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 3528{
f1f41a6c 3529 vec<slp_instance> slp_instances;
fb85abff 3530 slp_instance instance;
678e3d6e 3531 unsigned int i, vf;
fb85abff 3532 bool is_store = false;
3533
37545e54 3534 if (loop_vinfo)
3535 {
3536 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3537 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
48e1416a 3538 }
37545e54 3539 else
3540 {
3541 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3542 vf = 1;
48e1416a 3543 }
37545e54 3544
f1f41a6c 3545 FOR_EACH_VEC_ELT (slp_instances, i, instance)
fb85abff 3546 {
3547 /* Schedule the tree of INSTANCE. */
3548 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
37545e54 3549 instance, vf);
6d8fb6cf 3550 if (dump_enabled_p ())
7bd765d4 3551 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3552 "vectorizing stmts using SLP.\n");
fb85abff 3553 }
3554
f1f41a6c 3555 FOR_EACH_VEC_ELT (slp_instances, i, instance)
eefa05c8 3556 {
3557 slp_tree root = SLP_INSTANCE_TREE (instance);
3558 gimple store;
3559 unsigned int j;
3560 gimple_stmt_iterator gsi;
3561
814c4f66 3562 /* Remove scalar call stmts. Do not do this for basic-block
3563 vectorization as not all uses may be vectorized.
3564 ??? Why should this be necessary? DCE should be able to
3565 remove the stmts itself.
3566 ??? For BB vectorization we can as well remove scalar
3567 stmts starting from the SLP tree root if they have no
3568 uses. */
3569 if (loop_vinfo)
3570 vect_remove_slp_scalar_calls (root);
529fcc2a 3571
f1f41a6c 3572 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
eefa05c8 3573 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3574 {
3575 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3576 break;
3577
0bc79e04 3578 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3579 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
eefa05c8 3580 /* Free the attached stmt_vec_info and remove the stmt. */
3581 gsi = gsi_for_stmt (store);
bc8a8451 3582 unlink_stmt_vdef (store);
eefa05c8 3583 gsi_remove (&gsi, true);
bc8a8451 3584 release_defs (store);
eefa05c8 3585 free_stmt_vec_info (store);
3586 }
3587 }
3588
fb85abff 3589 return is_store;
3590}
37545e54 3591
3592
3593/* Vectorize the basic block. */
3594
3595void
3596vect_slp_transform_bb (basic_block bb)
3597{
3598 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3599 gimple_stmt_iterator si;
3600
3601 gcc_assert (bb_vinfo);
3602
6d8fb6cf 3603 if (dump_enabled_p ())
7bd765d4 3604 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
37545e54 3605
3606 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3607 {
3608 gimple stmt = gsi_stmt (si);
3609 stmt_vec_info stmt_info;
3610
6d8fb6cf 3611 if (dump_enabled_p ())
37545e54 3612 {
7bd765d4 3613 dump_printf_loc (MSG_NOTE, vect_location,
3614 "------>SLPing statement: ");
3615 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
78bb46f5 3616 dump_printf (MSG_NOTE, "\n");
37545e54 3617 }
3618
3619 stmt_info = vinfo_for_stmt (stmt);
3620 gcc_assert (stmt_info);
3621
3622 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3623 if (STMT_SLP_TYPE (stmt_info))
3624 {
3625 vect_schedule_slp (NULL, bb_vinfo);
3626 break;
3627 }
3628 }
3629
6d8fb6cf 3630 if (dump_enabled_p ())
a21425b5 3631 dump_printf_loc (MSG_NOTE, vect_location,
b055bc88 3632 "BASIC BLOCK VECTORIZED\n");
37545e54 3633
765a637f 3634 destroy_bb_vec_info (bb_vinfo);
3635}