]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
[combine] Don't create LSHIFTRT of zero bits in change_zero_ext
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
5624e564 2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
a70d6342
IR
44
45
ebfd146a
IR
46/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48static void
49vect_free_slp_tree (slp_tree node)
50{
d092494c 51 int i;
d755c7ef 52 slp_tree child;
d092494c 53
ebfd146a
IR
54 if (!node)
55 return;
56
9771b263 57 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 58 vect_free_slp_tree (child);
b8698a0f 59
9771b263
DN
60 SLP_TREE_CHILDREN (node).release ();
61 SLP_TREE_SCALAR_STMTS (node).release ();
62 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 63 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
64
65 free (node);
66}
67
68
69/* Free the memory allocated for the SLP instance. */
70
71void
72vect_free_slp_instance (slp_instance instance)
73{
74 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 75 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 76 free (instance);
ebfd146a
IR
77}
78
79
d092494c
IR
80/* Create an SLP node for SCALAR_STMTS. */
81
82static slp_tree
355fe088 83vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 84{
d3cfd39e 85 slp_tree node;
355fe088 86 gimple *stmt = scalar_stmts[0];
d092494c
IR
87 unsigned int nops;
88
89 if (is_gimple_call (stmt))
90 nops = gimple_call_num_args (stmt);
91 else if (is_gimple_assign (stmt))
f7e531cf
IR
92 {
93 nops = gimple_num_ops (stmt) - 1;
94 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
95 nops++;
96 }
d092494c
IR
97 else
98 return NULL;
99
d3cfd39e 100 node = XNEW (struct _slp_tree);
d092494c 101 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
102 SLP_TREE_VEC_STMTS (node).create (0);
103 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 104 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 105 SLP_TREE_TWO_OPERATORS (node) = false;
d092494c
IR
106
107 return node;
108}
109
110
ddf56386
RB
111/* This structure is used in creation of an SLP tree. Each instance
112 corresponds to the same operand in a group of scalar stmts in an SLP
113 node. */
114typedef struct _slp_oprnd_info
115{
116 /* Def-stmts for the operands. */
117 vec<gimple *> def_stmts;
118 /* Information about the first statement, its vector def-type, type, the
119 operand itself in case it's constant, and an indication if it's a pattern
120 stmt. */
121 enum vect_def_type first_dt;
122 tree first_op_type;
123 bool first_pattern;
124 bool second_pattern;
125} *slp_oprnd_info;
126
127
d092494c
IR
128/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
9771b263 130static vec<slp_oprnd_info>
d092494c
IR
131vect_create_oprnd_info (int nops, int group_size)
132{
133 int i;
134 slp_oprnd_info oprnd_info;
9771b263 135 vec<slp_oprnd_info> oprnds_info;
d092494c 136
9771b263 137 oprnds_info.create (nops);
d092494c
IR
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 141 oprnd_info->def_stmts.create (group_size);
d092494c 142 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 143 oprnd_info->first_op_type = NULL_TREE;
d092494c 144 oprnd_info->first_pattern = false;
effb52da 145 oprnd_info->second_pattern = false;
9771b263 146 oprnds_info.quick_push (oprnd_info);
d092494c
IR
147 }
148
149 return oprnds_info;
150}
151
152
d3cfd39e
JJ
153/* Free operands info. */
154
d092494c 155static void
9771b263 156vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
157{
158 int i;
159 slp_oprnd_info oprnd_info;
160
9771b263 161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 162 {
9771b263 163 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
164 XDELETE (oprnd_info);
165 }
d092494c 166
9771b263 167 oprnds_info.release ();
d092494c
IR
168}
169
170
d755c7ef
RB
171/* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174static int
355fe088 175vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 176{
355fe088 177 gimple *next_stmt = first_stmt;
d755c7ef
RB
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
d755c7ef 187 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
188 if (next_stmt)
189 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
190 }
191 while (next_stmt);
192
193 return -1;
194}
195
196
d092494c
IR
197/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
198 they are of a valid type and that they match the defs of the first stmt of
b0b4483e
RB
199 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
200 return -1, if the error could be corrected by swapping operands of the
201 operation return 1, if everything is ok return 0. */
ebfd146a 202
b0b4483e 203static int
310213d4 204vect_get_and_check_slp_defs (vec_info *vinfo,
355fe088 205 gimple *stmt, unsigned stmt_num,
23847df4 206 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
207{
208 tree oprnd;
209 unsigned int i, number_of_oprnds;
355fe088 210 gimple *def_stmt;
d092494c 211 enum vect_def_type dt = vect_uninitialized_def;
d092494c 212 bool pattern = false;
abf9bfbc 213 slp_oprnd_info oprnd_info;
b0b4483e
RB
214 int first_op_idx = 1;
215 bool commutative = false;
216 bool first_op_cond = false;
effb52da
RB
217 bool first = stmt_num == 0;
218 bool second = stmt_num == 1;
b8698a0f 219
d092494c 220 if (is_gimple_call (stmt))
190c2236
JJ
221 {
222 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 223 first_op_idx = 3;
190c2236 224 }
f7e531cf
IR
225 else if (is_gimple_assign (stmt))
226 {
b0b4483e 227 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 228 number_of_oprnds = gimple_num_ops (stmt) - 1;
a414c77f
IE
229 if (gimple_assign_rhs_code (stmt) == COND_EXPR
230 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
231 {
232 first_op_cond = true;
233 commutative = true;
234 number_of_oprnds++;
235 }
236 else
237 commutative = commutative_tree_code (code);
f7e531cf 238 }
d092494c 239 else
b0b4483e 240 return -1;
ebfd146a 241
b0b4483e 242 bool swapped = false;
ebfd146a
IR
243 for (i = 0; i < number_of_oprnds; i++)
244 {
b0b4483e
RB
245again:
246 if (first_op_cond)
f7e531cf 247 {
b0b4483e
RB
248 if (i == 0 || i == 1)
249 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
250 swapped ? !i : i);
251 else
252 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f7e531cf
IR
253 }
254 else
b0b4483e 255 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 256
9771b263 257 oprnd_info = (*oprnds_info)[i];
ebfd146a 258
81c40241 259 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 260 {
73fbfcad 261 if (dump_enabled_p ())
ebfd146a 262 {
78c60e3d 263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 264 "Build SLP failed: can't analyze def for ");
78c60e3d 265 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 266 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
267 }
268
b0b4483e 269 return -1;
ebfd146a
IR
270 }
271
a70d6342 272 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 273 from the pattern. Check that all the stmts of the node are in the
ebfd146a 274 pattern. */
f5709183 275 if (def_stmt && gimple_bb (def_stmt)
61d371eb 276 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 277 && vinfo_for_stmt (def_stmt)
83197f37 278 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
279 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
280 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 281 {
d092494c 282 pattern = true;
effb52da
RB
283 if (!first && !oprnd_info->first_pattern
284 /* Allow different pattern state for the defs of the
285 first stmt in reduction chains. */
286 && (oprnd_info->first_dt != vect_reduction_def
287 || (!second && !oprnd_info->second_pattern)))
d092494c 288 {
b0b4483e
RB
289 if (i == 0
290 && !swapped
291 && commutative)
292 {
293 swapped = true;
294 goto again;
295 }
296
73fbfcad 297 if (dump_enabled_p ())
d092494c 298 {
78c60e3d
SS
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
300 "Build SLP failed: some of the stmts"
301 " are in a pattern, and others are not ");
302 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 303 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 304 }
ebfd146a 305
b0b4483e 306 return 1;
ebfd146a
IR
307 }
308
309 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 310 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 311
f7e531cf 312 if (dt == vect_unknown_def_type)
ebfd146a 313 {
73fbfcad 314 if (dump_enabled_p ())
78c60e3d 315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 316 "Unsupported pattern.\n");
b0b4483e 317 return -1;
ebfd146a
IR
318 }
319
320 switch (gimple_code (def_stmt))
321 {
81c40241
RB
322 case GIMPLE_PHI:
323 case GIMPLE_ASSIGN:
324 break;
325
326 default:
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
329 "unsupported defining stmt:\n");
330 return -1;
ebfd146a
IR
331 }
332 }
333
effb52da
RB
334 if (second)
335 oprnd_info->second_pattern = pattern;
336
d092494c 337 if (first)
ebfd146a 338 {
d092494c
IR
339 oprnd_info->first_dt = dt;
340 oprnd_info->first_pattern = pattern;
793d9a16 341 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 342 }
ebfd146a
IR
343 else
344 {
d092494c
IR
345 /* Not first stmt of the group, check that the def-stmt/s match
346 the def-stmt/s of the first stmt. Allow different definition
347 types for reduction chains: the first stmt must be a
348 vect_reduction_def (a phi node), and the rest
349 vect_internal_def. */
350 if (((oprnd_info->first_dt != dt
351 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
352 && dt == vect_internal_def)
353 && !((oprnd_info->first_dt == vect_external_def
354 || oprnd_info->first_dt == vect_constant_def)
355 && (dt == vect_external_def
356 || dt == vect_constant_def)))
357 || !types_compatible_p (oprnd_info->first_op_type,
358 TREE_TYPE (oprnd))))
ebfd146a 359 {
b0b4483e
RB
360 /* Try swapping operands if we got a mismatch. */
361 if (i == 0
362 && !swapped
363 && commutative)
364 {
365 swapped = true;
366 goto again;
367 }
368
abf9bfbc
RB
369 if (dump_enabled_p ())
370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 371 "Build SLP failed: different types\n");
d092494c 372
b0b4483e 373 return 1;
ebfd146a
IR
374 }
375 }
376
377 /* Check the types of the definitions. */
d092494c 378 switch (dt)
ebfd146a
IR
379 {
380 case vect_constant_def:
8644a673 381 case vect_external_def:
d092494c 382 case vect_reduction_def:
ebfd146a 383 break;
b8698a0f 384
8644a673 385 case vect_internal_def:
abf9bfbc 386 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
387 break;
388
389 default:
390 /* FORNOW: Not supported. */
73fbfcad 391 if (dump_enabled_p ())
ebfd146a 392 {
78c60e3d
SS
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
394 "Build SLP failed: illegal type of def ");
81c40241 395 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 396 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
397 }
398
b0b4483e 399 return -1;
ebfd146a
IR
400 }
401 }
402
b0b4483e
RB
403 /* Swap operands. */
404 if (swapped)
405 {
406 if (first_op_cond)
407 {
408 tree cond = gimple_assign_rhs1 (stmt);
409 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
410 &TREE_OPERAND (cond, 1));
411 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
412 }
413 else
414 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
415 gimple_assign_rhs2_ptr (stmt));
416 }
417
418 return 0;
ebfd146a
IR
419}
420
421
6983e6b5
RB
422/* Verify if the scalar stmts STMTS are isomorphic, require data
423 permutation or are of unsupported types of operation. Return
424 true if they are, otherwise return false and indicate in *MATCHES
425 which stmts are not isomorphic to the first one. If MATCHES[0]
426 is false then this indicates the comparison could not be
427 carried out or the stmts will never be vectorized by SLP. */
ebfd146a
IR
428
429static bool
310213d4 430vect_build_slp_tree_1 (vec_info *vinfo,
355fe088 431 vec<gimple *> stmts, unsigned int group_size,
6983e6b5 432 unsigned nops, unsigned int *max_nunits,
97a1a642 433 bool *matches, bool *two_operators)
ebfd146a 434{
ebfd146a 435 unsigned int i;
355fe088 436 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
437 enum tree_code first_stmt_code = ERROR_MARK;
438 enum tree_code alt_stmt_code = ERROR_MARK;
439 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 440 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 441 tree lhs;
6983e6b5 442 bool need_same_oprnds = false;
e00cdb8a 443 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
444 optab optab;
445 int icode;
ef4bddc2
RS
446 machine_mode optab_op2_mode;
447 machine_mode vec_mode;
ebfd146a 448 HOST_WIDE_INT dummy;
355fe088 449 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 450
ebfd146a 451 /* For every stmt in NODE find its def stmt/s. */
9771b263 452 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 453 {
6983e6b5
RB
454 matches[i] = false;
455
73fbfcad 456 if (dump_enabled_p ())
ebfd146a 457 {
78c60e3d
SS
458 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
460 }
461
4b5caab7
IR
462 /* Fail to vectorize statements marked as unvectorizable. */
463 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
464 {
73fbfcad 465 if (dump_enabled_p ())
4b5caab7 466 {
78c60e3d
SS
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
468 "Build SLP failed: unvectorizable statement ");
469 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 470 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7 471 }
6983e6b5
RB
472 /* Fatal mismatch. */
473 matches[0] = false;
4b5caab7
IR
474 return false;
475 }
476
ebfd146a
IR
477 lhs = gimple_get_lhs (stmt);
478 if (lhs == NULL_TREE)
479 {
73fbfcad 480 if (dump_enabled_p ())
ebfd146a 481 {
78c60e3d
SS
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "Build SLP failed: not GIMPLE_ASSIGN nor "
484 "GIMPLE_CALL ");
485 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 486 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 487 }
6983e6b5
RB
488 /* Fatal mismatch. */
489 matches[0] = false;
ebfd146a
IR
490 return false;
491 }
492
b8698a0f 493 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
494 vectype = get_vectype_for_scalar_type (scalar_type);
495 if (!vectype)
496 {
73fbfcad 497 if (dump_enabled_p ())
ebfd146a 498 {
78c60e3d
SS
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
500 "Build SLP failed: unsupported data-type ");
501 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
502 scalar_type);
e645e942 503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 504 }
6983e6b5
RB
505 /* Fatal mismatch. */
506 matches[0] = false;
ebfd146a
IR
507 return false;
508 }
b8698a0f 509
dfc55d30
RB
510 /* If populating the vector type requires unrolling then fail
511 before adjusting *max_nunits for basic-block vectorization. */
310213d4 512 if (is_a <bb_vec_info> (vinfo)
dfc55d30
RB
513 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
514 {
515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
516 "Build SLP failed: unrolling required "
517 "in basic block SLP\n");
518 /* Fatal mismatch. */
519 matches[0] = false;
520 return false;
521 }
522
4ef69dfc
IR
523 /* In case of multiple types we need to detect the smallest type. */
524 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
97a1a642 525 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 526
538dd0b7 527 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
528 {
529 rhs_code = CALL_EXPR;
538dd0b7
DM
530 if (gimple_call_internal_p (call_stmt)
531 || gimple_call_tail_p (call_stmt)
532 || gimple_call_noreturn_p (call_stmt)
533 || !gimple_call_nothrow_p (call_stmt)
534 || gimple_call_chain (call_stmt))
190c2236 535 {
73fbfcad 536 if (dump_enabled_p ())
190c2236 537 {
78c60e3d
SS
538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
539 "Build SLP failed: unsupported call type ");
538dd0b7
DM
540 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
541 call_stmt, 0);
e645e942 542 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 543 }
6983e6b5
RB
544 /* Fatal mismatch. */
545 matches[0] = false;
190c2236
JJ
546 return false;
547 }
548 }
ebfd146a
IR
549 else
550 rhs_code = gimple_assign_rhs_code (stmt);
551
552 /* Check the operation. */
553 if (i == 0)
554 {
555 first_stmt_code = rhs_code;
556
b8698a0f 557 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
558 vector shift with scalar shift operand. */
559 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
560 || rhs_code == LROTATE_EXPR
561 || rhs_code == RROTATE_EXPR)
562 {
563 vec_mode = TYPE_MODE (vectype);
564
565 /* First see if we have a vector/vector shift. */
566 optab = optab_for_tree_code (rhs_code, vectype,
567 optab_vector);
568
569 if (!optab
947131ba 570 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
571 {
572 /* No vector/vector shift, try for a vector/scalar shift. */
573 optab = optab_for_tree_code (rhs_code, vectype,
574 optab_scalar);
575
576 if (!optab)
577 {
73fbfcad 578 if (dump_enabled_p ())
78c60e3d 579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 580 "Build SLP failed: no optab.\n");
6983e6b5
RB
581 /* Fatal mismatch. */
582 matches[0] = false;
ebfd146a
IR
583 return false;
584 }
947131ba 585 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
586 if (icode == CODE_FOR_nothing)
587 {
73fbfcad 588 if (dump_enabled_p ())
78c60e3d
SS
589 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
590 "Build SLP failed: "
e645e942 591 "op not supported by target.\n");
6983e6b5
RB
592 /* Fatal mismatch. */
593 matches[0] = false;
ebfd146a
IR
594 return false;
595 }
596 optab_op2_mode = insn_data[icode].operand[2].mode;
597 if (!VECTOR_MODE_P (optab_op2_mode))
598 {
599 need_same_oprnds = true;
600 first_op1 = gimple_assign_rhs2 (stmt);
601 }
602 }
603 }
36ba4aae
IR
604 else if (rhs_code == WIDEN_LSHIFT_EXPR)
605 {
606 need_same_oprnds = true;
607 first_op1 = gimple_assign_rhs2 (stmt);
608 }
ebfd146a
IR
609 }
610 else
611 {
6876e5bc
RB
612 if (first_stmt_code != rhs_code
613 && alt_stmt_code == ERROR_MARK)
614 alt_stmt_code = rhs_code;
ebfd146a
IR
615 if (first_stmt_code != rhs_code
616 && (first_stmt_code != IMAGPART_EXPR
617 || rhs_code != REALPART_EXPR)
618 && (first_stmt_code != REALPART_EXPR
69f11a13 619 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
620 /* Handle mismatches in plus/minus by computing both
621 and merging the results. */
622 && !((first_stmt_code == PLUS_EXPR
623 || first_stmt_code == MINUS_EXPR)
624 && (alt_stmt_code == PLUS_EXPR
625 || alt_stmt_code == MINUS_EXPR)
626 && rhs_code == alt_stmt_code)
0d0293ac 627 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 628 && (first_stmt_code == ARRAY_REF
38000232 629 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
630 || first_stmt_code == INDIRECT_REF
631 || first_stmt_code == COMPONENT_REF
632 || first_stmt_code == MEM_REF)))
ebfd146a 633 {
73fbfcad 634 if (dump_enabled_p ())
ebfd146a 635 {
78c60e3d
SS
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
637 "Build SLP failed: different operation "
638 "in stmt ");
639 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
641 "original stmt ");
642 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
643 first_stmt, 0);
ebfd146a 644 }
6983e6b5
RB
645 /* Mismatch. */
646 continue;
ebfd146a 647 }
b8698a0f
L
648
649 if (need_same_oprnds
ebfd146a
IR
650 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
651 {
73fbfcad 652 if (dump_enabled_p ())
ebfd146a 653 {
78c60e3d
SS
654 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
655 "Build SLP failed: different shift "
656 "arguments in ");
657 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 658 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 659 }
6983e6b5
RB
660 /* Mismatch. */
661 continue;
ebfd146a 662 }
190c2236
JJ
663
664 if (rhs_code == CALL_EXPR)
665 {
355fe088 666 gimple *first_stmt = stmts[0];
190c2236
JJ
667 if (gimple_call_num_args (stmt) != nops
668 || !operand_equal_p (gimple_call_fn (first_stmt),
669 gimple_call_fn (stmt), 0)
670 || gimple_call_fntype (first_stmt)
671 != gimple_call_fntype (stmt))
672 {
73fbfcad 673 if (dump_enabled_p ())
190c2236 674 {
78c60e3d
SS
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
676 "Build SLP failed: different calls in ");
677 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
678 stmt, 0);
e645e942 679 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 680 }
6983e6b5
RB
681 /* Mismatch. */
682 continue;
190c2236
JJ
683 }
684 }
ebfd146a
IR
685 }
686
0d0293ac
MM
687 /* Grouped store or load. */
688 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
689 {
690 if (REFERENCE_CLASS_P (lhs))
691 {
692 /* Store. */
6983e6b5 693 ;
ebfd146a 694 }
b5aeb3bb
IR
695 else
696 {
697 /* Load. */
e14c1050 698 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
699 if (prev_first_load)
700 {
701 /* Check that there are no loads from different interleaving
6983e6b5
RB
702 chains in the same node. */
703 if (prev_first_load != first_load)
78c60e3d 704 {
73fbfcad 705 if (dump_enabled_p ())
b5aeb3bb 706 {
78c60e3d
SS
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
708 vect_location,
709 "Build SLP failed: different "
710 "interleaving chains in one node ");
711 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
712 stmt, 0);
e645e942 713 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 714 }
6983e6b5
RB
715 /* Mismatch. */
716 continue;
b5aeb3bb
IR
717 }
718 }
719 else
720 prev_first_load = first_load;
ebfd146a 721 }
0d0293ac 722 } /* Grouped access. */
ebfd146a
IR
723 else
724 {
725 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
726 {
0d0293ac 727 /* Not grouped load. */
73fbfcad 728 if (dump_enabled_p ())
ebfd146a 729 {
78c60e3d
SS
730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
731 "Build SLP failed: not grouped load ");
732 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 733 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
734 }
735
0d0293ac 736 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
737 /* Fatal mismatch. */
738 matches[0] = false;
ebfd146a
IR
739 return false;
740 }
741
742 /* Not memory operation. */
743 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 744 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 745 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 746 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 747 && rhs_code != CALL_EXPR)
ebfd146a 748 {
73fbfcad 749 if (dump_enabled_p ())
ebfd146a 750 {
78c60e3d
SS
751 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
752 "Build SLP failed: operation");
753 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
754 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 755 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 756 }
6983e6b5
RB
757 /* Fatal mismatch. */
758 matches[0] = false;
ebfd146a
IR
759 return false;
760 }
761
f7e531cf
IR
762 if (rhs_code == COND_EXPR)
763 {
764 tree cond_expr = gimple_assign_rhs1 (stmt);
765
766 if (i == 0)
767 first_cond_code = TREE_CODE (cond_expr);
768 else if (first_cond_code != TREE_CODE (cond_expr))
769 {
73fbfcad 770 if (dump_enabled_p ())
f7e531cf 771 {
78c60e3d
SS
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
773 "Build SLP failed: different"
774 " operation");
775 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
776 stmt, 0);
e645e942 777 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 778 }
6983e6b5
RB
779 /* Mismatch. */
780 continue;
f7e531cf
IR
781 }
782 }
ebfd146a 783 }
6983e6b5
RB
784
785 matches[i] = true;
786 }
787
788 for (i = 0; i < group_size; ++i)
789 if (!matches[i])
790 return false;
791
6876e5bc
RB
792 /* If we allowed a two-operation SLP node verify the target can cope
793 with the permute we are going to use. */
794 if (alt_stmt_code != ERROR_MARK
795 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
796 {
797 unsigned char *sel
798 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
799 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
800 {
801 sel[i] = i;
802 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
803 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
804 }
805 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
806 {
807 for (i = 0; i < group_size; ++i)
808 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
809 {
810 matches[i] = false;
811 if (dump_enabled_p ())
812 {
813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
814 "Build SLP failed: different operation "
815 "in stmt ");
816 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
817 stmts[i], 0);
818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
819 "original stmt ");
820 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
821 first_stmt, 0);
822 }
823 }
824 return false;
825 }
826 *two_operators = true;
827 }
828
6983e6b5
RB
829 return true;
830}
831
832/* Recursively build an SLP tree starting from NODE.
833 Fail (and return a value not equal to zero) if def-stmts are not
834 isomorphic, require data permutation or are of unsupported types of
835 operation. Otherwise, return 0.
836 The value returned is the depth in the SLP tree where a mismatch
837 was found. */
838
839static bool
310213d4 840vect_build_slp_tree (vec_info *vinfo,
6983e6b5
RB
841 slp_tree *node, unsigned int group_size,
842 unsigned int *max_nunits,
843 vec<slp_tree> *loads,
1428105c
RB
844 bool *matches, unsigned *npermutes, unsigned *tree_size,
845 unsigned max_tree_size)
6983e6b5 846{
89d390e5 847 unsigned nops, i, this_tree_size = 0;
355fe088 848 gimple *stmt;
6983e6b5 849
6983e6b5
RB
850 matches[0] = false;
851
852 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
853 if (is_gimple_call (stmt))
854 nops = gimple_call_num_args (stmt);
855 else if (is_gimple_assign (stmt))
856 {
857 nops = gimple_num_ops (stmt) - 1;
858 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
859 nops++;
ebfd146a 860 }
6983e6b5
RB
861 else
862 return false;
863
6876e5bc 864 bool two_operators = false;
310213d4 865 if (!vect_build_slp_tree_1 (vinfo,
6983e6b5 866 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
97a1a642 867 max_nunits, matches, &two_operators))
6983e6b5 868 return false;
6876e5bc 869 SLP_TREE_TWO_OPERATORS (*node) = two_operators;
ebfd146a 870
6983e6b5
RB
871 /* If the SLP node is a load, terminate the recursion. */
872 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
873 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 874 {
9771b263 875 loads->safe_push (*node);
ebfd146a
IR
876 return true;
877 }
878
6983e6b5
RB
879 /* Get at the operands, verifying they are compatible. */
880 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
881 slp_oprnd_info oprnd_info;
882 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
883 {
310213d4 884 switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
6983e6b5 885 {
b0b4483e
RB
886 case 0:
887 break;
888 case -1:
889 matches[0] = false;
6983e6b5
RB
890 vect_free_oprnd_info (oprnds_info);
891 return false;
b0b4483e
RB
892 case 1:
893 matches[i] = false;
894 break;
6983e6b5
RB
895 }
896 }
b0b4483e
RB
897 for (i = 0; i < group_size; ++i)
898 if (!matches[i])
899 {
900 vect_free_oprnd_info (oprnds_info);
901 return false;
902 }
6983e6b5
RB
903
904 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
905
b8698a0f 906 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 907 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 908 {
d092494c 909 slp_tree child;
6983e6b5
RB
910 unsigned old_nloads = loads->length ();
911 unsigned old_max_nunits = *max_nunits;
b8698a0f 912
d092494c
IR
913 if (oprnd_info->first_dt != vect_internal_def)
914 continue;
ebfd146a 915
1428105c
RB
916 if (++this_tree_size > max_tree_size)
917 {
918 vect_free_oprnd_info (oprnds_info);
919 return false;
920 }
921
d092494c 922 child = vect_create_new_slp_node (oprnd_info->def_stmts);
6983e6b5
RB
923 if (!child)
924 {
9771b263 925 vect_free_oprnd_info (oprnds_info);
6983e6b5 926 return false;
d092494c 927 }
b8698a0f 928
310213d4 929 if (vect_build_slp_tree (vinfo, &child,
97a1a642 930 group_size, max_nunits, loads, matches,
1428105c 931 npermutes, &this_tree_size, max_tree_size))
6983e6b5 932 {
3fc356dc
RB
933 /* If we have all children of child built up from scalars then just
934 throw that away and build it up this node from scalars. */
935 if (!SLP_TREE_CHILDREN (child).is_empty ())
936 {
937 unsigned int j;
938 slp_tree grandchild;
939
940 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
941 if (grandchild != NULL)
942 break;
943 if (!grandchild)
944 {
945 /* Roll back. */
946 *max_nunits = old_max_nunits;
947 loads->truncate (old_nloads);
948 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
949 vect_free_slp_tree (grandchild);
950 SLP_TREE_CHILDREN (child).truncate (0);
951
952 dump_printf_loc (MSG_NOTE, vect_location,
953 "Building parent vector operands from "
954 "scalars instead\n");
955 oprnd_info->def_stmts = vNULL;
956 vect_free_slp_tree (child);
957 SLP_TREE_CHILDREN (*node).quick_push (NULL);
958 continue;
959 }
960 }
961
6983e6b5
RB
962 oprnd_info->def_stmts = vNULL;
963 SLP_TREE_CHILDREN (*node).quick_push (child);
964 continue;
965 }
966
90dd6e3d
RB
967 /* If the SLP build failed fatally and we analyze a basic-block
968 simply treat nodes we fail to build as externally defined
969 (and thus build vectors from the scalar defs).
970 The cost model will reject outright expensive cases.
971 ??? This doesn't treat cases where permutation ultimatively
972 fails (or we don't try permutation below). Ideally we'd
973 even compute a permutation that will end up with the maximum
974 SLP tree size... */
310213d4 975 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
976 && !matches[0]
977 /* ??? Rejecting patterns this way doesn't work. We'd have to
978 do extra work to cancel the pattern so the uses see the
979 scalar version. */
980 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
981 {
9626d143
RB
982 unsigned int j;
983 slp_tree grandchild;
984
985 /* Roll back. */
986 *max_nunits = old_max_nunits;
987 loads->truncate (old_nloads);
988 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
989 vect_free_slp_tree (grandchild);
990 SLP_TREE_CHILDREN (child).truncate (0);
991
90dd6e3d
RB
992 dump_printf_loc (MSG_NOTE, vect_location,
993 "Building vector operands from scalars\n");
994 oprnd_info->def_stmts = vNULL;
995 vect_free_slp_tree (child);
996 SLP_TREE_CHILDREN (*node).quick_push (NULL);
997 continue;
998 }
999
6983e6b5
RB
1000 /* If the SLP build for operand zero failed and operand zero
1001 and one can be commutated try that for the scalar stmts
1002 that failed the match. */
1003 if (i == 0
1004 /* A first scalar stmt mismatch signals a fatal mismatch. */
1005 && matches[0]
1006 /* ??? For COND_EXPRs we can swap the comparison operands
1007 as well as the arms under some constraints. */
1008 && nops == 2
1009 && oprnds_info[1]->first_dt == vect_internal_def
1010 && is_gimple_assign (stmt)
1011 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1ece8d4c 1012 && !SLP_TREE_TWO_OPERATORS (*node)
6983e6b5
RB
1013 /* Do so only if the number of not successful permutes was nor more
1014 than a cut-ff as re-trying the recursive match on
1015 possibly each level of the tree would expose exponential
1016 behavior. */
1017 && *npermutes < 4)
1018 {
e72baed7
JJ
1019 unsigned int j;
1020 slp_tree grandchild;
1021
6983e6b5
RB
1022 /* Roll back. */
1023 *max_nunits = old_max_nunits;
1024 loads->truncate (old_nloads);
e72baed7
JJ
1025 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1026 vect_free_slp_tree (grandchild);
1027 SLP_TREE_CHILDREN (child).truncate (0);
1028
6983e6b5 1029 /* Swap mismatched definition stmts. */
b0b4483e
RB
1030 dump_printf_loc (MSG_NOTE, vect_location,
1031 "Re-trying with swapped operands of stmts ");
e72baed7 1032 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1033 if (!matches[j])
1034 {
6b4db501
MM
1035 std::swap (oprnds_info[0]->def_stmts[j],
1036 oprnds_info[1]->def_stmts[j]);
b0b4483e 1037 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1038 }
b0b4483e 1039 dump_printf (MSG_NOTE, "\n");
74574669
RB
1040 /* And try again with scratch 'matches' ... */
1041 bool *tem = XALLOCAVEC (bool, group_size);
310213d4 1042 if (vect_build_slp_tree (vinfo, &child,
6983e6b5 1043 group_size, max_nunits, loads,
74574669 1044 tem, npermutes, &this_tree_size,
1428105c 1045 max_tree_size))
6983e6b5 1046 {
85c69b0b
RB
1047 /* If we have all children of child built up from scalars then
1048 just throw that away and build it up this node from scalars. */
1049 if (!SLP_TREE_CHILDREN (child).is_empty ())
1050 {
1051 unsigned int j;
1052 slp_tree grandchild;
1053
1054 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1055 if (grandchild != NULL)
1056 break;
1057 if (!grandchild)
1058 {
1059 /* Roll back. */
1060 *max_nunits = old_max_nunits;
1061 loads->truncate (old_nloads);
1062 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1063 vect_free_slp_tree (grandchild);
1064 SLP_TREE_CHILDREN (child).truncate (0);
1065
1066 dump_printf_loc (MSG_NOTE, vect_location,
1067 "Building parent vector operands from "
1068 "scalars instead\n");
1069 oprnd_info->def_stmts = vNULL;
1070 vect_free_slp_tree (child);
1071 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1072 continue;
1073 }
1074 }
1075
74574669
RB
1076 /* ... so if successful we can apply the operand swapping
1077 to the GIMPLE IL. This is necessary because for example
1078 vect_get_slp_defs uses operand indexes and thus expects
1079 canonical operand order. */
1080 for (j = 0; j < group_size; ++j)
1081 if (!matches[j])
1082 {
355fe088 1083 gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
74574669
RB
1084 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1085 gimple_assign_rhs2_ptr (stmt));
1086 }
6983e6b5
RB
1087 oprnd_info->def_stmts = vNULL;
1088 SLP_TREE_CHILDREN (*node).quick_push (child);
1089 continue;
1090 }
1091
1092 ++*npermutes;
1093 }
1094
1095 oprnd_info->def_stmts = vNULL;
1096 vect_free_slp_tree (child);
1097 vect_free_oprnd_info (oprnds_info);
1098 return false;
ebfd146a
IR
1099 }
1100
1428105c
RB
1101 if (tree_size)
1102 *tree_size += this_tree_size;
1103
9771b263 1104 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
1105 return true;
1106}
1107
78c60e3d 1108/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1109
1110static void
c2a12ca0 1111vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1112{
1113 int i;
355fe088 1114 gimple *stmt;
d755c7ef 1115 slp_tree child;
ebfd146a
IR
1116
1117 if (!node)
1118 return;
1119
c2a12ca0 1120 dump_printf_loc (dump_kind, loc, "node\n");
9771b263 1121 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1122 {
c2a12ca0 1123 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1124 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1125 }
9771b263 1126 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1127 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1128}
1129
1130
b8698a0f
L
1131/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1132 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1133 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1134 stmts in NODE are to be marked. */
1135
1136static void
1137vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1138{
1139 int i;
355fe088 1140 gimple *stmt;
d755c7ef 1141 slp_tree child;
ebfd146a
IR
1142
1143 if (!node)
1144 return;
1145
9771b263 1146 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1147 if (j < 0 || i == j)
1148 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1149
9771b263 1150 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1151 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1152}
1153
1154
a70d6342
IR
1155/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1156
1157static void
1158vect_mark_slp_stmts_relevant (slp_tree node)
1159{
1160 int i;
355fe088 1161 gimple *stmt;
a70d6342 1162 stmt_vec_info stmt_info;
d755c7ef 1163 slp_tree child;
a70d6342
IR
1164
1165 if (!node)
1166 return;
1167
9771b263 1168 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1169 {
1170 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1171 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1172 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1173 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1174 }
1175
9771b263 1176 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1177 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1178}
1179
1180
b5aeb3bb
IR
1181/* Rearrange the statements of NODE according to PERMUTATION. */
1182
1183static void
1184vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1185 vec<unsigned> permutation)
b5aeb3bb 1186{
355fe088
TS
1187 gimple *stmt;
1188 vec<gimple *> tmp_stmts;
d755c7ef
RB
1189 unsigned int i;
1190 slp_tree child;
b5aeb3bb 1191
9771b263 1192 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1193 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1194
9771b263
DN
1195 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1196 tmp_stmts.create (group_size);
d755c7ef 1197 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1198
9771b263 1199 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1200 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1201
9771b263 1202 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1203 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1204}
1205
1206
b266b968
RB
1207/* Attempt to reorder stmts in a reduction chain so that we don't
1208 require any load permutation. Return true if that was possible,
1209 otherwise return false. */
1210
1211static bool
1212vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1213{
1214 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1215 unsigned int i, j;
1216 sbitmap load_index;
1217 unsigned int lidx;
1218 slp_tree node, load;
1219
1220 /* Compare all the permutation sequences to the first one. We know
1221 that at least one load is permuted. */
1222 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1223 if (!node->load_permutation.exists ())
1224 return false;
1225 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1226 {
1227 if (!load->load_permutation.exists ())
1228 return false;
1229 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1230 if (lidx != node->load_permutation[j])
1231 return false;
1232 }
1233
1234 /* Check that the loads in the first sequence are different and there
1235 are no gaps between them. */
1236 load_index = sbitmap_alloc (group_size);
1237 bitmap_clear (load_index);
1238 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1239 {
41eefe13
RB
1240 if (lidx >= group_size)
1241 return false;
b266b968
RB
1242 if (bitmap_bit_p (load_index, lidx))
1243 {
1244 sbitmap_free (load_index);
1245 return false;
1246 }
1247 bitmap_set_bit (load_index, lidx);
1248 }
1249 for (i = 0; i < group_size; i++)
1250 if (!bitmap_bit_p (load_index, i))
1251 {
1252 sbitmap_free (load_index);
1253 return false;
1254 }
1255 sbitmap_free (load_index);
1256
1257 /* This permutation is valid for reduction. Since the order of the
1258 statements in the nodes is not important unless they are memory
1259 accesses, we can rearrange the statements in all the nodes
1260 according to the order of the loads. */
1261 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1262 node->load_permutation);
1263
1264 /* We are done, no actual permutations need to be generated. */
1265 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1266 SLP_TREE_LOAD_PERMUTATION (node).release ();
1267 return true;
1268}
1269
01d8bf07
RB
1270/* Check if the required load permutations in the SLP instance
1271 SLP_INSTN are supported. */
ebfd146a
IR
1272
1273static bool
01d8bf07 1274vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1275{
01d8bf07
RB
1276 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1277 unsigned int i, j, k, next;
6983e6b5 1278 slp_tree node;
a5b50aa1 1279 gimple *stmt, *load, *next_load;
ebfd146a 1280
73fbfcad 1281 if (dump_enabled_p ())
ebfd146a 1282 {
78c60e3d 1283 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1284 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1285 if (node->load_permutation.exists ())
1286 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1287 dump_printf (MSG_NOTE, "%d ", next);
1288 else
bddc974e
TJ
1289 for (k = 0; k < group_size; ++k)
1290 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1291 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1292 }
1293
b5aeb3bb
IR
1294 /* In case of reduction every load permutation is allowed, since the order
1295 of the reduction statements is not important (as opposed to the case of
0d0293ac 1296 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1297 load nodes are of the same size and have the same permutation (and then
1298 rearrange all the nodes of the SLP instance according to this
1299 permutation). */
1300
1301 /* Check that all the load nodes are of the same size. */
01d8bf07 1302 /* ??? Can't we assert this? */
9771b263 1303 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1304 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1305 return false;
2200fc49 1306
b5aeb3bb 1307 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1308 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1309
b010117a 1310 /* Reduction (there are no data-refs in the root).
b266b968 1311 In reduction chain the order of the loads is not important. */
b010117a
IR
1312 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1313 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1314 {
b266b968
RB
1315 if (vect_attempt_slp_rearrange_stmts (slp_instn))
1316 return true;
b5aeb3bb 1317
b266b968 1318 /* Fallthru to general load permutation handling. */
b5aeb3bb
IR
1319 }
1320
6aa904c4
IR
1321 /* In basic block vectorization we allow any subchain of an interleaving
1322 chain.
1323 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1324 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1325 {
240a94da
RB
1326 /* Check whether the loads in an instance form a subchain and thus
1327 no permutation is necessary. */
9771b263 1328 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1329 {
9626d143
RB
1330 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1331 continue;
240a94da 1332 bool subchain_p = true;
6aa904c4 1333 next_load = NULL;
9771b263 1334 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1335 {
5b5826c4
RB
1336 if (j != 0
1337 && (next_load != load
1338 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1339 {
1340 subchain_p = false;
1341 break;
1342 }
6aa904c4
IR
1343 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1344 }
240a94da
RB
1345 if (subchain_p)
1346 SLP_TREE_LOAD_PERMUTATION (node).release ();
1347 else
1348 {
1349 /* Verify the permutation can be generated. */
1350 vec<tree> tem;
1351 if (!vect_transform_slp_perm_load (node, tem, NULL,
1352 1, slp_instn, true))
1353 {
1354 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1355 vect_location,
1356 "unsupported load permutation\n");
1357 return false;
1358 }
1359 }
6aa904c4 1360 }
01d8bf07 1361 return true;
6aa904c4
IR
1362 }
1363
9b999e8c 1364 /* For loop vectorization verify we can generate the permutation. */
01d8bf07
RB
1365 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1366 if (node->load_permutation.exists ()
1367 && !vect_transform_slp_perm_load
1368 (node, vNULL, NULL,
1369 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1370 return false;
9b999e8c 1371
01d8bf07 1372 return true;
ebfd146a
IR
1373}
1374
1375
e4a707c4 1376/* Find the last store in SLP INSTANCE. */
ff802fa1 1377
64900538 1378gimple *
2e8ab70c 1379vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1380{
355fe088 1381 gimple *last = NULL, *stmt;
e4a707c4 1382
2e8ab70c
RB
1383 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1384 {
1385 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1386 if (is_pattern_stmt_p (stmt_vinfo))
1387 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1388 else
1389 last = get_later_stmt (stmt, last);
1390 }
e4a707c4 1391
2e8ab70c 1392 return last;
e4a707c4
IR
1393}
1394
23847df4
RB
1395/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1396
1397static void
1a4b99c1 1398vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1399 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1400 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1401 unsigned ncopies_for_cost)
1402{
23847df4
RB
1403 unsigned i;
1404 slp_tree child;
355fe088 1405 gimple *stmt, *s;
23847df4
RB
1406 stmt_vec_info stmt_info;
1407 tree lhs;
1408 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1409
1410 /* Recurse down the SLP tree. */
1411 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d 1412 if (child)
1a4b99c1
RB
1413 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1414 body_cost_vec, ncopies_for_cost);
23847df4
RB
1415
1416 /* Look at the first scalar stmt to determine the cost. */
1417 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1418 stmt_info = vinfo_for_stmt (stmt);
1419 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1420 {
1421 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1422 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1423 vect_uninitialized_def,
1424 node, prologue_cost_vec, body_cost_vec);
1425 else
1426 {
1427 int i;
1428 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
52eab378
RB
1429 /* If the load is permuted then the alignment is determined by
1430 the first group element not by the first scalar stmt DR. */
1431 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1432 {
1433 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1434 stmt_info = vinfo_for_stmt (stmt);
1435 }
23847df4
RB
1436 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1437 node, prologue_cost_vec, body_cost_vec);
1438 /* If the load is permuted record the cost for the permutation.
1439 ??? Loads from multiple chains are let through here only
1440 for a single special case involving complex numbers where
1441 in the end no permutation is necessary. */
1442 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1443 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1444 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1445 && vect_get_place_in_interleaving_chain
1446 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1447 {
1448 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1449 stmt_info, 0, vect_body);
1450 break;
1451 }
1452 }
1453 }
1454 else
6876e5bc
RB
1455 {
1456 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1457 stmt_info, 0, vect_body);
1458 if (SLP_TREE_TWO_OPERATORS (node))
1459 {
1460 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1461 stmt_info, 0, vect_body);
1462 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1463 stmt_info, 0, vect_body);
1464 }
1465 }
23847df4
RB
1466
1467 /* Scan operands and account for prologue cost of constants/externals.
1468 ??? This over-estimates cost for multiple uses and should be
1469 re-engineered. */
1470 lhs = gimple_get_lhs (stmt);
1471 for (i = 0; i < gimple_num_ops (stmt); ++i)
1472 {
81c40241 1473 tree op = gimple_op (stmt, i);
355fe088 1474 gimple *def_stmt;
23847df4
RB
1475 enum vect_def_type dt;
1476 if (!op || op == lhs)
1477 continue;
81c40241 1478 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1479 {
1480 /* Without looking at the actual initializer a vector of
1481 constants can be implemented as load from the constant pool.
1482 ??? We need to pass down stmt_info for a vector type
1483 even if it points to the wrong stmt. */
1484 if (dt == vect_constant_def)
1485 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1486 stmt_info, 0, vect_prologue);
1487 else if (dt == vect_external_def)
1488 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1489 stmt_info, 0, vect_prologue);
1490 }
23847df4
RB
1491 }
1492}
1493
1494/* Compute the cost for the SLP instance INSTANCE. */
1495
1496static void
1a4b99c1 1497vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1498{
1499 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1500 unsigned ncopies_for_cost;
1501 stmt_info_for_cost *si;
1502 unsigned i;
1503
b939ea86
RB
1504 if (dump_enabled_p ())
1505 dump_printf_loc (MSG_NOTE, vect_location,
1506 "=== vect_analyze_slp_cost ===\n");
1507
23847df4
RB
1508 /* Calculate the number of vector stmts to create based on the unrolling
1509 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1510 GROUP_SIZE / NUNITS otherwise. */
1511 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1512 slp_tree node = SLP_INSTANCE_TREE (instance);
1513 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1514 /* Adjust the group_size by the vectorization factor which is always one
1515 for basic-block vectorization. */
1516 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1517 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1518 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1519 /* For reductions look at a reduction operand in case the reduction
1520 operation is widening like DOT_PROD or SAD. */
1521 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1522 {
355fe088 1523 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1524 switch (gimple_assign_rhs_code (stmt))
1525 {
1526 case DOT_PROD_EXPR:
1527 case SAD_EXPR:
1528 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1529 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1530 break;
1531 default:;
1532 }
1533 }
23847df4
RB
1534 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1535
1536 prologue_cost_vec.create (10);
1537 body_cost_vec.create (10);
1a4b99c1
RB
1538 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1539 &prologue_cost_vec, &body_cost_vec,
1540 ncopies_for_cost);
23847df4
RB
1541
1542 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1543 sure that SLP was successful. */
23847df4
RB
1544 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1545 {
1546 struct _stmt_vec_info *stmt_info
1547 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1548 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1549 si->misalign, vect_prologue);
1550 }
1551
1a4b99c1
RB
1552 /* Record the instance's instructions in the target cost model. */
1553 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1554 {
1555 struct _stmt_vec_info *stmt_info
1556 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1557 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1558 si->misalign, vect_body);
1559 }
1560
23847df4 1561 prologue_cost_vec.release ();
1a4b99c1 1562 body_cost_vec.release ();
23847df4 1563}
e4a707c4 1564
1ba91a49
AL
1565/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1566 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1567 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1568 containing the remainder.
1569 Return the first stmt in the second group. */
1570
1571static gimple *
1572vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1573{
1574 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1575 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1576 gcc_assert (group1_size > 0);
1577 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1578 gcc_assert (group2_size > 0);
1579 GROUP_SIZE (first_vinfo) = group1_size;
1580
1581 gimple *stmt = first_stmt;
1582 for (unsigned i = group1_size; i > 1; i--)
1583 {
1584 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1585 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1586 }
1587 /* STMT is now the last element of the first group. */
1588 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1589 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1590
1591 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1592 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1593 {
1594 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1595 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1596 }
1597
1598 /* For the second group, the GROUP_GAP is that before the original group,
1599 plus skipping over the first vector. */
1600 GROUP_GAP (vinfo_for_stmt (group2)) =
1601 GROUP_GAP (first_vinfo) + group1_size;
1602
1603 /* GROUP_GAP of the first group now has to skip over the second group too. */
1604 GROUP_GAP (first_vinfo) += group2_size;
1605
1606 if (dump_enabled_p ())
1607 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1608 group1_size, group2_size);
1609
1610 return group2;
1611}
1612
0d0293ac 1613/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1614 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1615 Return FALSE if it's impossible to SLP any stmt in the loop. */
1616
1617static bool
310213d4 1618vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1619 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1620{
1621 slp_instance new_instance;
d092494c 1622 slp_tree node;
e14c1050 1623 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1624 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1625 tree vectype, scalar_type = NULL_TREE;
355fe088 1626 gimple *next;
1ba91a49 1627 unsigned int i;
ebfd146a 1628 unsigned int max_nunits = 0;
9771b263 1629 vec<slp_tree> loads;
b5aeb3bb 1630 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1631 vec<gimple *> scalar_stmts;
b5aeb3bb 1632
b010117a 1633 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1634 {
b010117a
IR
1635 if (dr)
1636 {
1637 scalar_type = TREE_TYPE (DR_REF (dr));
1638 vectype = get_vectype_for_scalar_type (scalar_type);
1639 }
1640 else
1641 {
310213d4 1642 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
1643 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1644 }
1645
e14c1050 1646 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1647 }
1648 else
1649 {
310213d4 1650 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1651 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1652 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1653 }
b8698a0f 1654
ebfd146a
IR
1655 if (!vectype)
1656 {
73fbfcad 1657 if (dump_enabled_p ())
ebfd146a 1658 {
78c60e3d
SS
1659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1660 "Build SLP failed: unsupported data-type ");
1661 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1662 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1663 }
b5aeb3bb 1664
ebfd146a
IR
1665 return false;
1666 }
ebfd146a 1667 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1668
a70d6342
IR
1669 /* Calculate the unrolling factor. */
1670 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
310213d4 1671 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
a70d6342 1672 {
73fbfcad 1673 if (dump_enabled_p ())
e645e942 1674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1675 "Build SLP failed: unrolling required in basic"
e645e942 1676 " block SLP\n");
b8698a0f 1677
a70d6342
IR
1678 return false;
1679 }
1680
0d0293ac 1681 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1682 scalar_stmts.create (group_size);
ebfd146a 1683 next = stmt;
b010117a 1684 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1685 {
b5aeb3bb
IR
1686 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1687 while (next)
1688 {
f7e531cf
IR
1689 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1690 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1691 scalar_stmts.safe_push (
1692 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1693 else
9771b263 1694 scalar_stmts.safe_push (next);
e14c1050 1695 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1696 }
14a61437
RB
1697 /* Mark the first element of the reduction chain as reduction to properly
1698 transform the node. In the reduction analysis phase only the last
1699 element of the chain is marked as reduction. */
1700 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1701 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1702 }
1703 else
1704 {
1705 /* Collect reduction statements. */
310213d4 1706 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
1707 for (i = 0; reductions.iterate (i, &next); i++)
1708 scalar_stmts.safe_push (next);
ebfd146a
IR
1709 }
1710
d092494c 1711 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1712
9771b263 1713 loads.create (group_size);
ebfd146a
IR
1714
1715 /* Build the tree for the SLP instance. */
89d390e5
RB
1716 bool *matches = XALLOCAVEC (bool, group_size);
1717 unsigned npermutes = 0;
310213d4 1718 if (vect_build_slp_tree (vinfo, &node, group_size,
abf9bfbc 1719 &max_nunits, &loads,
97a1a642 1720 matches, &npermutes, NULL, max_tree_size))
ebfd146a 1721 {
4ef69dfc 1722 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1723 if (max_nunits > nunits)
1724 unrolling_factor = least_common_multiple (max_nunits, group_size)
1725 / group_size;
b8698a0f 1726
310213d4 1727 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
4ef69dfc 1728 {
73fbfcad 1729 if (dump_enabled_p ())
e645e942 1730 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1731 "Build SLP failed: unrolling required in basic"
e645e942 1732 " block SLP\n");
c7e62a26 1733 vect_free_slp_tree (node);
9771b263 1734 loads.release ();
4ef69dfc
IR
1735 return false;
1736 }
1737
1738 /* Create a new SLP instance. */
1739 new_instance = XNEW (struct _slp_instance);
1740 SLP_INSTANCE_TREE (new_instance) = node;
1741 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1742 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1743 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1744
1745 /* Compute the load permutation. */
1746 slp_tree load_node;
1747 bool loads_permuted = false;
abf9bfbc
RB
1748 FOR_EACH_VEC_ELT (loads, i, load_node)
1749 {
01d8bf07 1750 vec<unsigned> load_permutation;
abf9bfbc 1751 int j;
355fe088 1752 gimple *load, *first_stmt;
01d8bf07
RB
1753 bool this_load_permuted = false;
1754 load_permutation.create (group_size);
6983e6b5
RB
1755 first_stmt = GROUP_FIRST_ELEMENT
1756 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1757 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1758 {
6983e6b5
RB
1759 int load_place
1760 = vect_get_place_in_interleaving_chain (load, first_stmt);
1761 gcc_assert (load_place != -1);
1762 if (load_place != j)
01d8bf07 1763 this_load_permuted = true;
abf9bfbc
RB
1764 load_permutation.safe_push (load_place);
1765 }
fe2bef71
RB
1766 if (!this_load_permuted
1767 /* The load requires permutation when unrolling exposes
1768 a gap either because the group is larger than the SLP
1769 group-size or because there is a gap between the groups. */
1770 && (unrolling_factor == 1
1771 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1772 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
1773 {
1774 load_permutation.release ();
1775 continue;
1776 }
1777 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1778 loads_permuted = true;
abf9bfbc 1779 }
6aa904c4
IR
1780
1781 if (loads_permuted)
ebfd146a 1782 {
01d8bf07 1783 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1784 {
73fbfcad 1785 if (dump_enabled_p ())
ebfd146a 1786 {
e645e942 1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1788 "Build SLP failed: unsupported load "
1789 "permutation ");
1790 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 1791 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1792 }
ebfd146a
IR
1793 vect_free_slp_instance (new_instance);
1794 return false;
1795 }
ebfd146a 1796 }
ebfd146a 1797
310213d4 1798 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 1799
73fbfcad 1800 if (dump_enabled_p ())
c2a12ca0
RB
1801 {
1802 dump_printf_loc (MSG_NOTE, vect_location,
1803 "Final SLP tree for instance:\n");
1804 vect_print_slp_tree (MSG_NOTE, vect_location, node);
1805 }
ebfd146a
IR
1806
1807 return true;
1808 }
1809
1810 /* Failed to SLP. */
1811 /* Free the allocated memory. */
1812 vect_free_slp_tree (node);
9771b263 1813 loads.release ();
b8698a0f 1814
1ba91a49 1815 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 1816 vector size. */
1ba91a49
AL
1817 if (is_a <bb_vec_info> (vinfo)
1818 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1819 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1820 {
1821 /* We consider breaking the group only on VF boundaries from the existing
1822 start. */
1823 for (i = 0; i < group_size; i++)
1824 if (!matches[i]) break;
1825
97a1a642 1826 if (i >= nunits && i < group_size)
1ba91a49
AL
1827 {
1828 /* Split into two groups at the first vector boundary before i. */
97a1a642
RB
1829 gcc_assert ((nunits & (nunits - 1)) == 0);
1830 unsigned group1_size = i & ~(nunits - 1);
1ba91a49
AL
1831
1832 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
1833 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
1834 /* If the first non-match was in the middle of a vector,
1835 skip the rest of that vector. */
1836 if (group1_size < i)
1837 {
97a1a642 1838 i = group1_size + nunits;
1ba91a49 1839 if (i < group_size)
97a1a642 1840 rest = vect_split_slp_store_group (rest, nunits);
1ba91a49
AL
1841 }
1842 if (i < group_size)
1843 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
1844 return res;
1845 }
1846 /* Even though the first vector did not all match, we might be able to SLP
1847 (some) of the remainder. FORNOW ignore this possibility. */
1848 }
1849
a70d6342 1850 return false;
ebfd146a
IR
1851}
1852
1853
ff802fa1 1854/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1855 trees of packed scalar stmts if SLP is possible. */
1856
1857bool
310213d4 1858vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
1859{
1860 unsigned int i;
355fe088 1861 gimple *first_element;
a70d6342 1862 bool ok = false;
ebfd146a 1863
73fbfcad 1864 if (dump_enabled_p ())
e645e942 1865 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 1866
0d0293ac 1867 /* Find SLP sequences starting from groups of grouped stores. */
310213d4
RB
1868 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1869 if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
a70d6342 1870 ok = true;
ebfd146a 1871
310213d4 1872 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 1873 {
310213d4
RB
1874 if (loop_vinfo->reduction_chains.length () > 0)
1875 {
1876 /* Find SLP sequences starting from reduction chains. */
1877 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1878 if (vect_analyze_slp_instance (vinfo, first_element,
1879 max_tree_size))
1880 ok = true;
1881 else
1882 return false;
b010117a 1883
310213d4
RB
1884 /* Don't try to vectorize SLP reductions if reduction chain was
1885 detected. */
1886 return ok;
1887 }
b010117a 1888
310213d4
RB
1889 /* Find SLP sequences starting from groups of reductions. */
1890 if (loop_vinfo->reductions.length () > 1
1891 && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
1892 max_tree_size))
1893 ok = true;
1894 }
b5aeb3bb 1895
ebfd146a
IR
1896 return true;
1897}
1898
1899
1900/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1901 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1902 least one instance. */
ebfd146a 1903
437f4a00 1904bool
ebfd146a
IR
1905vect_make_slp_decision (loop_vec_info loop_vinfo)
1906{
1907 unsigned int i, unrolling_factor = 1;
9771b263 1908 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1909 slp_instance instance;
1910 int decided_to_slp = 0;
1911
73fbfcad 1912 if (dump_enabled_p ())
e645e942
TJ
1913 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1914 "\n");
ebfd146a 1915
9771b263 1916 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1917 {
1918 /* FORNOW: SLP if you can. */
1919 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1920 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1921
ff802fa1 1922 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1923 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1924 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1925 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1926 decided_to_slp++;
1927 }
1928
1929 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1930
73fbfcad 1931 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 1932 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1933 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 1934 decided_to_slp, unrolling_factor);
437f4a00
IR
1935
1936 return (decided_to_slp > 0);
ebfd146a
IR
1937}
1938
1939
1940/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1941 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1942
1943static void
642fce57 1944vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 1945{
355fe088 1946 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 1947 imm_use_iterator imm_iter;
355fe088 1948 gimple *use_stmt;
642fce57 1949 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 1950 slp_tree child;
f2c74cc4 1951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
1952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1953 int j;
1954
1955 /* Propagate hybrid down the SLP tree. */
1956 if (stype == hybrid)
1957 ;
1958 else if (HYBRID_SLP_STMT (stmt_vinfo))
1959 stype = hybrid;
1960 else
1961 {
1962 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1963 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
29764870
RB
1964 /* We always get the pattern stmt here, but for immediate
1965 uses we have to use the LHS of the original stmt. */
1966 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1967 if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
1968 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
642fce57
RB
1969 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1970 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
29764870
RB
1971 {
1972 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1973 continue;
1974 use_vinfo = vinfo_for_stmt (use_stmt);
1975 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1976 && STMT_VINFO_RELATED_STMT (use_vinfo))
1977 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
1978 if (!STMT_SLP_TYPE (use_vinfo)
1979 && (STMT_VINFO_RELEVANT (use_vinfo)
1980 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
1981 && !(gimple_code (use_stmt) == GIMPLE_PHI
1982 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
1983 {
1984 if (dump_enabled_p ())
1985 {
1986 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
1987 "def in non-SLP stmt: ");
1988 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
1989 }
1990 stype = hybrid;
1991 }
29764870 1992 }
642fce57 1993 }
ebfd146a 1994
502f0263
RB
1995 if (stype == hybrid
1996 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
1997 {
1998 if (dump_enabled_p ())
1999 {
2000 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2001 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2002 }
2003 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2004 }
ebfd146a 2005
642fce57 2006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
90dd6e3d
RB
2007 if (child)
2008 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2009}
f2c74cc4 2010
642fce57 2011/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2012
642fce57
RB
2013static tree
2014vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2015{
2016 walk_stmt_info *wi = (walk_stmt_info *)data;
2017 struct loop *loopp = (struct loop *)wi->info;
2018
2019 if (wi->is_lhs)
2020 return NULL_TREE;
2021
2022 if (TREE_CODE (*tp) == SSA_NAME
2023 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2024 {
355fe088 2025 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2026 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2027 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2028 {
2029 if (dump_enabled_p ())
2030 {
2031 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2032 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2033 }
2034 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2035 }
642fce57
RB
2036 }
2037
2038 return NULL_TREE;
ebfd146a
IR
2039}
2040
642fce57
RB
2041static tree
2042vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2043 walk_stmt_info *)
2044{
2045 /* If the stmt is in a SLP instance then this isn't a reason
2046 to mark use definitions in other SLP instances as hybrid. */
2047 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2048 *handled = true;
2049 return NULL_TREE;
2050}
ebfd146a
IR
2051
2052/* Find stmts that must be both vectorized and SLPed. */
2053
2054void
2055vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2056{
2057 unsigned int i;
9771b263 2058 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2059 slp_instance instance;
2060
73fbfcad 2061 if (dump_enabled_p ())
e645e942
TJ
2062 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2063 "\n");
ebfd146a 2064
642fce57
RB
2065 /* First walk all pattern stmt in the loop and mark defs of uses as
2066 hybrid because immediate uses in them are not recorded. */
2067 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2068 {
2069 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2070 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2071 gsi_next (&gsi))
2072 {
355fe088 2073 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2075 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2076 {
2077 walk_stmt_info wi;
2078 memset (&wi, 0, sizeof (wi));
2079 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2080 gimple_stmt_iterator gsi2
2081 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2082 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2083 vect_detect_hybrid_slp_1, &wi);
2084 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2085 vect_detect_hybrid_slp_2,
2086 vect_detect_hybrid_slp_1, &wi);
2087 }
2088 }
2089 }
2090
2091 /* Then walk the SLP instance trees marking stmts with uses in
2092 non-SLP stmts as hybrid, also propagating hybrid down the
2093 SLP tree, collecting the above info on-the-fly. */
9771b263 2094 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2095 {
2096 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2097 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2098 i, pure_slp);
2099 }
ebfd146a
IR
2100}
2101
a70d6342
IR
2102
2103/* Create and initialize a new bb_vec_info struct for BB, as well as
2104 stmt_vec_info structs for all the stmts in it. */
b8698a0f 2105
a70d6342 2106static bb_vec_info
61d371eb
RB
2107new_bb_vec_info (gimple_stmt_iterator region_begin,
2108 gimple_stmt_iterator region_end)
a70d6342 2109{
61d371eb 2110 basic_block bb = gsi_bb (region_begin);
a70d6342
IR
2111 bb_vec_info res = NULL;
2112 gimple_stmt_iterator gsi;
2113
2114 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
310213d4 2115 res->kind = vec_info::bb;
a70d6342 2116 BB_VINFO_BB (res) = bb;
61d371eb
RB
2117 res->region_begin = region_begin;
2118 res->region_end = region_end;
a70d6342 2119
61d371eb
RB
2120 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2121 gsi_next (&gsi))
a70d6342 2122 {
355fe088 2123 gimple *stmt = gsi_stmt (gsi);
a70d6342 2124 gimple_set_uid (stmt, 0);
310213d4 2125 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
a70d6342
IR
2126 }
2127
9771b263
DN
2128 BB_VINFO_GROUPED_STORES (res).create (10);
2129 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 2130 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
2131
2132 bb->aux = res;
2133 return res;
2134}
2135
2136
2137/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2138 stmts in the basic block. */
2139
2140static void
2141destroy_bb_vec_info (bb_vec_info bb_vinfo)
2142{
9771b263 2143 vec<slp_instance> slp_instances;
c7e62a26 2144 slp_instance instance;
a70d6342
IR
2145 basic_block bb;
2146 gimple_stmt_iterator si;
c7e62a26 2147 unsigned i;
a70d6342
IR
2148
2149 if (!bb_vinfo)
2150 return;
2151
2152 bb = BB_VINFO_BB (bb_vinfo);
2153
61d371eb
RB
2154 for (si = bb_vinfo->region_begin;
2155 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
a70d6342 2156 {
355fe088 2157 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2158 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2159
2160 if (stmt_info)
2161 /* Free stmt_vec_info. */
2162 free_stmt_vec_info (stmt);
61d371eb
RB
2163
2164 /* Reset region marker. */
2165 gimple_set_uid (stmt, -1);
a70d6342
IR
2166 }
2167
310213d4 2168 vect_destroy_datarefs (bb_vinfo);
01be8516 2169 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
9771b263 2170 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
c7e62a26 2171 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
9771b263 2172 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c7e62a26 2173 vect_free_slp_instance (instance);
9771b263 2174 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
c3e7ee41 2175 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342
IR
2176 free (bb_vinfo);
2177 bb->aux = NULL;
2178}
2179
2180
2181/* Analyze statements contained in SLP tree node after recursively analyzing
2182 the subtree. Return TRUE if the operations are supported. */
2183
2184static bool
a12e42fc 2185vect_slp_analyze_node_operations (slp_tree node)
a70d6342
IR
2186{
2187 bool dummy;
2188 int i;
355fe088 2189 gimple *stmt;
d755c7ef 2190 slp_tree child;
a70d6342
IR
2191
2192 if (!node)
2193 return true;
2194
9771b263 2195 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a12e42fc 2196 if (!vect_slp_analyze_node_operations (child))
d092494c 2197 return false;
a70d6342 2198
9771b263 2199 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
2200 {
2201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2202 gcc_assert (stmt_info);
a12e42fc 2203 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
a70d6342
IR
2204
2205 if (!vect_analyze_stmt (stmt, &dummy, node))
a12e42fc 2206 return false;
a70d6342
IR
2207 }
2208
2209 return true;
2210}
2211
2212
ff802fa1 2213/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2214 operations are supported. */
2215
a12e42fc 2216bool
1a4b99c1 2217vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2218{
a70d6342
IR
2219 slp_instance instance;
2220 int i;
2221
a12e42fc
RB
2222 if (dump_enabled_p ())
2223 dump_printf_loc (MSG_NOTE, vect_location,
2224 "=== vect_slp_analyze_operations ===\n");
2225
9771b263 2226 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2227 {
a12e42fc 2228 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
a70d6342 2229 {
a12e42fc
RB
2230 dump_printf_loc (MSG_NOTE, vect_location,
2231 "removing SLP instance operations starting from: ");
2232 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2233 SLP_TREE_SCALAR_STMTS
2234 (SLP_INSTANCE_TREE (instance))[0], 0);
2235 vect_free_slp_instance (instance);
9771b263 2236 slp_instances.ordered_remove (i);
a70d6342
IR
2237 }
2238 else
1a4b99c1
RB
2239 {
2240 /* Compute the costs of the SLP instance. */
2241 vect_analyze_slp_cost (instance, data);
2242 i++;
2243 }
b8698a0f
L
2244 }
2245
9771b263 2246 if (!slp_instances.length ())
a70d6342
IR
2247 return false;
2248
2249 return true;
2250}
2251
6eddf228
RB
2252
2253/* Compute the scalar cost of the SLP node NODE and its children
2254 and return it. Do not account defs that are marked in LIFE and
2255 update LIFE according to uses of NODE. */
2256
2257static unsigned
292cba13 2258vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2259 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2260{
2261 unsigned scalar_cost = 0;
2262 unsigned i;
355fe088 2263 gimple *stmt;
6eddf228
RB
2264 slp_tree child;
2265
2266 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2267 {
2268 unsigned stmt_cost;
2269 ssa_op_iter op_iter;
2270 def_operand_p def_p;
2271 stmt_vec_info stmt_info;
2272
ff4c81cc 2273 if ((*life)[i])
6eddf228
RB
2274 continue;
2275
2276 /* If there is a non-vectorized use of the defs then the scalar
2277 stmt is kept live in which case we do not account it or any
2278 required defs in the SLP children in the scalar cost. This
2279 way we make the vectorization more costly when compared to
2280 the scalar cost. */
2281 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2282 {
2283 imm_use_iterator use_iter;
355fe088 2284 gimple *use_stmt;
6eddf228 2285 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2286 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2287 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2288 use_stmt)
f30a0ba5 2289 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
6eddf228 2290 {
ff4c81cc 2291 (*life)[i] = true;
6eddf228
RB
2292 BREAK_FROM_IMM_USE_STMT (use_iter);
2293 }
2294 }
ff4c81cc 2295 if ((*life)[i])
6eddf228
RB
2296 continue;
2297
2298 stmt_info = vinfo_for_stmt (stmt);
2299 if (STMT_VINFO_DATA_REF (stmt_info))
2300 {
2301 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2302 stmt_cost = vect_get_stmt_cost (scalar_load);
2303 else
2304 stmt_cost = vect_get_stmt_cost (scalar_store);
2305 }
2306 else
2307 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2308
2309 scalar_cost += stmt_cost;
2310 }
2311
2312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d
RB
2313 if (child)
2314 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
2315
2316 return scalar_cost;
2317}
2318
69f11a13
IR
2319/* Check if vectorization of the basic block is profitable. */
2320
2321static bool
2322vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2323{
9771b263 2324 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2325 slp_instance instance;
1a4b99c1 2326 int i;
c3e7ee41 2327 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2328 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2329
2330 /* Calculate scalar cost. */
6eddf228 2331 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2332 {
00f96dc9 2333 auto_vec<bool, 20> life;
ff4c81cc 2334 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2335 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2336 SLP_INSTANCE_TREE (instance),
ff4c81cc 2337 &life);
69f11a13
IR
2338 }
2339
c3e7ee41 2340 /* Complete the target-specific cost calculation. */
92345349
BS
2341 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2342 &vec_inside_cost, &vec_epilogue_cost);
2343
2344 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2345
73fbfcad 2346 if (dump_enabled_p ())
69f11a13 2347 {
78c60e3d
SS
2348 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2349 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2350 vec_inside_cost);
2351 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2352 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2353 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2354 }
2355
a6524bba
RB
2356 /* Vectorization is profitable if its cost is more than the cost of scalar
2357 version. Note that we err on the vector side for equal cost because
2358 the cost estimate is otherwise quite pessimistic (constant uses are
2359 free on the scalar side but cost a load on the vector side for
2360 example). */
2361 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2362 return false;
2363
2364 return true;
2365}
2366
a5b50aa1
RB
2367/* Check if the basic block can be vectorized. Returns a bb_vec_info
2368 if so and sets fatal to true if failure is independent of
2369 current_vector_size. */
a70d6342 2370
8e19f5a1 2371static bb_vec_info
61d371eb
RB
2372vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2373 gimple_stmt_iterator region_end,
a5b50aa1
RB
2374 vec<data_reference_p> datarefs, int n_stmts,
2375 bool &fatal)
a70d6342
IR
2376{
2377 bb_vec_info bb_vinfo;
a70d6342 2378 slp_instance instance;
8e19f5a1 2379 int i;
777e1f09 2380 int min_vf = 2;
e4a707c4 2381
a5b50aa1
RB
2382 /* The first group of checks is independent of the vector size. */
2383 fatal = true;
2384
61d371eb
RB
2385 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2386 {
2387 if (dump_enabled_p ())
2388 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2389 "not vectorized: too many instructions in "
2390 "basic block.\n");
2391 free_data_refs (datarefs);
2392 return NULL;
2393 }
2394
2395 bb_vinfo = new_bb_vec_info (region_begin, region_end);
a70d6342
IR
2396 if (!bb_vinfo)
2397 return NULL;
2398
61d371eb 2399 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2400
2401 /* Analyze the data references. */
2402
2403 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2404 {
73fbfcad 2405 if (dump_enabled_p ())
78c60e3d
SS
2406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2407 "not vectorized: unhandled data-ref in basic "
2408 "block.\n");
b8698a0f 2409
a70d6342
IR
2410 destroy_bb_vec_info (bb_vinfo);
2411 return NULL;
2412 }
2413
fcac74a1 2414 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2415 {
73fbfcad 2416 if (dump_enabled_p ())
78c60e3d
SS
2417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2418 "not vectorized: not enough data-refs in "
2419 "basic block.\n");
a70d6342
IR
2420
2421 destroy_bb_vec_info (bb_vinfo);
2422 return NULL;
2423 }
2424
310213d4 2425 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2426 {
2427 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2429 "not vectorized: unhandled data access in "
2430 "basic block.\n");
2431
2432 destroy_bb_vec_info (bb_vinfo);
2433 return NULL;
2434 }
2435
a5b50aa1
RB
2436 /* If there are no grouped stores in the region there is no need
2437 to continue with pattern recog as vect_analyze_slp will fail
2438 anyway. */
2439 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2440 {
73fbfcad 2441 if (dump_enabled_p ())
a5b50aa1
RB
2442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2443 "not vectorized: no grouped stores in "
2444 "basic block.\n");
b8698a0f 2445
a70d6342
IR
2446 destroy_bb_vec_info (bb_vinfo);
2447 return NULL;
2448 }
b8698a0f 2449
a5b50aa1
RB
2450 /* While the rest of the analysis below depends on it in some way. */
2451 fatal = false;
2452
2453 vect_pattern_recog (bb_vinfo);
2454
a70d6342
IR
2455 /* Check the SLP opportunities in the basic block, analyze and build SLP
2456 trees. */
310213d4 2457 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2458 {
73fbfcad 2459 if (dump_enabled_p ())
effb52da
RB
2460 {
2461 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2462 "Failed to SLP the basic block.\n");
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2464 "not vectorized: failed to find SLP opportunities "
2465 "in basic block.\n");
2466 }
a70d6342
IR
2467
2468 destroy_bb_vec_info (bb_vinfo);
2469 return NULL;
2470 }
b8698a0f 2471
c2a12ca0
RB
2472 /* Analyze and verify the alignment of data references and the
2473 dependence in the SLP instances. */
a5b50aa1
RB
2474 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2475 {
c2a12ca0
RB
2476 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2477 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2478 {
2479 dump_printf_loc (MSG_NOTE, vect_location,
2480 "removing SLP instance operations starting from: ");
2481 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2482 SLP_TREE_SCALAR_STMTS
2483 (SLP_INSTANCE_TREE (instance))[0], 0);
2484 vect_free_slp_instance (instance);
2485 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2486 continue;
2487 }
c2a12ca0
RB
2488
2489 /* Mark all the statements that we want to vectorize as pure SLP and
2490 relevant. */
2491 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2492 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2493
a5b50aa1
RB
2494 i++;
2495 }
a5b50aa1
RB
2496 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2497 {
2498 destroy_bb_vec_info (bb_vinfo);
2499 return NULL;
2500 }
2501
5e6667b2 2502 /* Mark all the statements that we do not want to vectorize. */
61d371eb
RB
2503 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2504 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
5e6667b2
RB
2505 {
2506 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2507 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2508 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2509 }
2510
1a4b99c1
RB
2511 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2512 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2513 {
73fbfcad 2514 if (dump_enabled_p ())
e645e942 2515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2516 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2517
2518 destroy_bb_vec_info (bb_vinfo);
2519 return NULL;
2520 }
2521
69f11a13 2522 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2523 if (!unlimited_cost_model (NULL)
69f11a13
IR
2524 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2525 {
73fbfcad 2526 if (dump_enabled_p ())
78c60e3d
SS
2527 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2528 "not vectorized: vectorization is not "
2529 "profitable.\n");
69f11a13
IR
2530
2531 destroy_bb_vec_info (bb_vinfo);
2532 return NULL;
2533 }
2534
73fbfcad 2535 if (dump_enabled_p ())
78c60e3d
SS
2536 dump_printf_loc (MSG_NOTE, vect_location,
2537 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2538
2539 return bb_vinfo;
2540}
2541
2542
428db0ba
RB
2543/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2544 true if anything in the basic-block was vectorized. */
2545
2546bool
2547vect_slp_bb (basic_block bb)
8e19f5a1
IR
2548{
2549 bb_vec_info bb_vinfo;
8e19f5a1
IR
2550 gimple_stmt_iterator gsi;
2551 unsigned int vector_sizes;
61d371eb 2552 bool any_vectorized = false;
8e19f5a1 2553
73fbfcad 2554 if (dump_enabled_p ())
78c60e3d 2555 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2556
8e19f5a1
IR
2557 /* Autodetect first vector size we try. */
2558 current_vector_size = 0;
2559 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2560
61d371eb
RB
2561 gsi = gsi_start_bb (bb);
2562
8e19f5a1
IR
2563 while (1)
2564 {
61d371eb
RB
2565 if (gsi_end_p (gsi))
2566 break;
2567
2568 gimple_stmt_iterator region_begin = gsi;
2569 vec<data_reference_p> datarefs = vNULL;
2570 int insns = 0;
2571
2572 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2573 {
61d371eb
RB
2574 gimple *stmt = gsi_stmt (gsi);
2575 if (is_gimple_debug (stmt))
2576 continue;
2577 insns++;
2578
2579 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2580 vect_location = gimple_location (stmt);
2581
2582 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2583 break;
2584 }
2585
2586 /* Skip leading unhandled stmts. */
2587 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2588 {
2589 gsi_next (&gsi);
2590 continue;
2591 }
428db0ba 2592
61d371eb
RB
2593 gimple_stmt_iterator region_end = gsi;
2594
2595 bool vectorized = false;
a5b50aa1 2596 bool fatal = false;
61d371eb 2597 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 2598 datarefs, insns, fatal);
61d371eb
RB
2599 if (bb_vinfo
2600 && dbg_cnt (vect_slp))
2601 {
428db0ba 2602 if (dump_enabled_p ())
61d371eb 2603 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
2604
2605 vect_schedule_slp (bb_vinfo);
2606
2607 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 2609 "basic block part vectorized\n");
428db0ba
RB
2610
2611 destroy_bb_vec_info (bb_vinfo);
2612
61d371eb 2613 vectorized = true;
428db0ba 2614 }
61d371eb
RB
2615 else
2616 destroy_bb_vec_info (bb_vinfo);
8e19f5a1 2617
61d371eb 2618 any_vectorized |= vectorized;
8e19f5a1
IR
2619
2620 vector_sizes &= ~current_vector_size;
61d371eb
RB
2621 if (vectorized
2622 || vector_sizes == 0
a5b50aa1
RB
2623 || current_vector_size == 0
2624 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2625 vector sizes will fail do not bother iterating. */
2626 || fatal)
61d371eb
RB
2627 {
2628 if (gsi_end_p (region_end))
2629 break;
8e19f5a1 2630
61d371eb
RB
2631 /* Skip the unhandled stmt. */
2632 gsi_next (&gsi);
2633
2634 /* And reset vector sizes. */
2635 current_vector_size = 0;
2636 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2637 }
2638 else
2639 {
2640 /* Try the next biggest vector size. */
2641 current_vector_size = 1 << floor_log2 (vector_sizes);
2642 if (dump_enabled_p ())
2643 dump_printf_loc (MSG_NOTE, vect_location,
2644 "***** Re-trying analysis with "
2645 "vector size %d\n", current_vector_size);
2646
2647 /* Start over. */
2648 gsi = region_begin;
2649 }
8e19f5a1 2650 }
61d371eb
RB
2651
2652 return any_vectorized;
8e19f5a1
IR
2653}
2654
2655
e4af0bc4
IE
2656/* Return 1 if vector type of boolean constant which is OPNUM
2657 operand in statement STMT is a boolean vector. */
2658
2659static bool
2660vect_mask_constant_operand_p (gimple *stmt, int opnum)
2661{
2662 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2663 enum tree_code code = gimple_expr_code (stmt);
2664 tree op, vectype;
2665 gimple *def_stmt;
2666 enum vect_def_type dt;
2667
2668 /* For comparison and COND_EXPR type is chosen depending
2669 on the other comparison operand. */
2670 if (TREE_CODE_CLASS (code) == tcc_comparison)
2671 {
2672 if (opnum)
2673 op = gimple_assign_rhs1 (stmt);
2674 else
2675 op = gimple_assign_rhs2 (stmt);
2676
2677 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2678 &dt, &vectype))
2679 gcc_unreachable ();
2680
2681 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2682 }
2683
2684 if (code == COND_EXPR)
2685 {
2686 tree cond = gimple_assign_rhs1 (stmt);
2687
2688 if (TREE_CODE (cond) == SSA_NAME)
2689 return false;
2690
2691 if (opnum)
2692 op = TREE_OPERAND (cond, 1);
2693 else
2694 op = TREE_OPERAND (cond, 0);
2695
2696 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2697 &dt, &vectype))
2698 gcc_unreachable ();
2699
2700 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2701 }
2702
2703 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2704}
2705
2706
b8698a0f
L
2707/* For constant and loop invariant defs of SLP_NODE this function returns
2708 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2709 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2710 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2711 REDUC_INDEX is the index of the reduction operand in the statements, unless
2712 it is -1. */
ebfd146a
IR
2713
2714static void
9dc3f7de 2715vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2716 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2717 unsigned int op_num, unsigned int number_of_vectors,
2718 int reduc_index)
ebfd146a 2719{
355fe088
TS
2720 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2721 gimple *stmt = stmts[0];
ebfd146a 2722 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2723 unsigned nunits;
ebfd146a 2724 tree vec_cst;
d2a12ae7
RG
2725 tree *elts;
2726 unsigned j, number_of_places_left_in_vector;
ebfd146a 2727 tree vector_type;
9dc3f7de 2728 tree vop;
9771b263 2729 int group_size = stmts.length ();
ebfd146a 2730 unsigned int vec_num, i;
d2a12ae7 2731 unsigned number_of_copies = 1;
9771b263
DN
2732 vec<tree> voprnds;
2733 voprnds.create (number_of_vectors);
ebfd146a 2734 bool constant_p, is_store;
b5aeb3bb 2735 tree neutral_op = NULL;
bac430c9 2736 enum tree_code code = gimple_expr_code (stmt);
355fe088 2737 gimple *def_stmt;
0e93a64e 2738 struct loop *loop;
13396b6e 2739 gimple_seq ctor_seq = NULL;
b5aeb3bb 2740
42fd8198
IE
2741 /* Check if vector type is a boolean vector. */
2742 if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
e4af0bc4 2743 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
2744 vector_type
2745 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2746 else
2747 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325
RB
2748 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2749
29ed4920
IR
2750 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2751 && reduc_index != -1)
b5aeb3bb 2752 {
afbe6325
RB
2753 op_num = reduc_index;
2754 op = gimple_op (stmt, op_num + 1);
b5aeb3bb 2755 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2756 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2757 get_initial_def_for_reduction() for details. */
2758 switch (code)
2759 {
2760 case WIDEN_SUM_EXPR:
2761 case DOT_PROD_EXPR:
afbe6325 2762 case SAD_EXPR:
b5aeb3bb
IR
2763 case PLUS_EXPR:
2764 case MINUS_EXPR:
2765 case BIT_IOR_EXPR:
2766 case BIT_XOR_EXPR:
2767 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2768 neutral_op = build_real (TREE_TYPE (op), dconst0);
2769 else
2770 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2771
2772 break;
2773
2774 case MULT_EXPR:
b5aeb3bb
IR
2775 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2776 neutral_op = build_real (TREE_TYPE (op), dconst1);
2777 else
2778 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2779
2780 break;
2781
c1e822d5
IR
2782 case BIT_AND_EXPR:
2783 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2784 break;
2785
f1485e5b
RB
2786 /* For MIN/MAX we don't have an easy neutral operand but
2787 the initial values can be used fine here. Only for
2788 a reduction chain we have to force a neutral element. */
2789 case MAX_EXPR:
2790 case MIN_EXPR:
2791 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2792 neutral_op = NULL;
2793 else
2794 {
2795 def_stmt = SSA_NAME_DEF_STMT (op);
2796 loop = (gimple_bb (stmt))->loop_father;
2797 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2798 loop_preheader_edge (loop));
2799 }
2800 break;
0e93a64e 2801
b5aeb3bb 2802 default:
afbe6325 2803 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
0e93a64e 2804 neutral_op = NULL;
b5aeb3bb
IR
2805 }
2806 }
ebfd146a
IR
2807
2808 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2809 {
2810 is_store = true;
2811 op = gimple_assign_rhs1 (stmt);
2812 }
2813 else
9dc3f7de
IR
2814 is_store = false;
2815
2816 gcc_assert (op);
ebfd146a
IR
2817
2818 if (CONSTANT_CLASS_P (op))
d59dc888 2819 constant_p = true;
ebfd146a 2820 else
d59dc888
IR
2821 constant_p = false;
2822
ebfd146a 2823 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2824 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2825
2826 For example, we have two scalar operands, s1 and s2 (e.g., group of
2827 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2828 of this type can be packed in a vector). The output vector will contain
2829 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2830 will be 2).
2831
b8698a0f 2832 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2833 containing the operands.
2834
2835 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2836 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2837 {s5, s6, s7, s8}. */
b8698a0f 2838
14a61437 2839 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
2840
2841 number_of_places_left_in_vector = nunits;
d2a12ae7 2842 elts = XALLOCAVEC (tree, nunits);
90dd6e3d 2843 bool place_after_defs = false;
ebfd146a
IR
2844 for (j = 0; j < number_of_copies; j++)
2845 {
9771b263 2846 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2847 {
2848 if (is_store)
2849 op = gimple_assign_rhs1 (stmt);
bac430c9 2850 else
f7e531cf 2851 {
bac430c9 2852 switch (code)
f7e531cf 2853 {
bac430c9 2854 case COND_EXPR:
a989bcc3
IE
2855 {
2856 tree cond = gimple_assign_rhs1 (stmt);
2857 if (TREE_CODE (cond) == SSA_NAME)
2858 op = gimple_op (stmt, op_num + 1);
2859 else if (op_num == 0 || op_num == 1)
bac430c9 2860 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
2861 else
2862 {
2863 if (op_num == 2)
2864 op = gimple_assign_rhs2 (stmt);
2865 else
2866 op = gimple_assign_rhs3 (stmt);
2867 }
2868 }
bac430c9
IR
2869 break;
2870
2871 case CALL_EXPR:
2872 op = gimple_call_arg (stmt, op_num);
2873 break;
2874
b84b294a
JJ
2875 case LSHIFT_EXPR:
2876 case RSHIFT_EXPR:
2877 case LROTATE_EXPR:
2878 case RROTATE_EXPR:
2879 op = gimple_op (stmt, op_num + 1);
2880 /* Unlike the other binary operators, shifts/rotates have
2881 the shift count being int, instead of the same type as
2882 the lhs, so make sure the scalar is the right type if
2883 we are dealing with vectors of
2884 long long/long/short/char. */
793d9a16 2885 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
2886 op = fold_convert (TREE_TYPE (vector_type), op);
2887 break;
2888
bac430c9
IR
2889 default:
2890 op = gimple_op (stmt, op_num + 1);
b84b294a 2891 break;
f7e531cf
IR
2892 }
2893 }
b8698a0f 2894
b5aeb3bb
IR
2895 if (reduc_index != -1)
2896 {
0e93a64e
IR
2897 loop = (gimple_bb (stmt))->loop_father;
2898 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2899
2900 gcc_assert (loop);
b010117a
IR
2901
2902 /* Get the def before the loop. In reduction chain we have only
2903 one initial value. */
2904 if ((j != (number_of_copies - 1)
2905 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2906 && i != 0))
2907 && neutral_op)
b5aeb3bb 2908 op = neutral_op;
b010117a
IR
2909 else
2910 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2911 loop_preheader_edge (loop));
b5aeb3bb
IR
2912 }
2913
ebfd146a 2914 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2915 number_of_places_left_in_vector--;
90dd6e3d 2916 tree orig_op = op;
13396b6e 2917 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 2918 {
793d9a16 2919 if (CONSTANT_CLASS_P (op))
13396b6e 2920 {
42fd8198
IE
2921 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
2922 {
2923 /* Can't use VIEW_CONVERT_EXPR for booleans because
2924 of possibly different sizes of scalar value and
2925 vector element. */
2926 if (integer_zerop (op))
2927 op = build_int_cst (TREE_TYPE (vector_type), 0);
2928 else if (integer_onep (op))
2929 op = build_int_cst (TREE_TYPE (vector_type), 1);
2930 else
2931 gcc_unreachable ();
2932 }
2933 else
2934 op = fold_unary (VIEW_CONVERT_EXPR,
2935 TREE_TYPE (vector_type), op);
13396b6e
JJ
2936 gcc_assert (op && CONSTANT_CLASS_P (op));
2937 }
2938 else
2939 {
b731b390 2940 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 2941 gimple *init_stmt;
0d0e4a03 2942 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
13396b6e 2943 init_stmt
0d0e4a03 2944 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
13396b6e
JJ
2945 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2946 op = new_temp;
2947 }
50eeef09 2948 }
d2a12ae7 2949 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
2950 if (!CONSTANT_CLASS_P (op))
2951 constant_p = false;
90dd6e3d
RB
2952 if (TREE_CODE (orig_op) == SSA_NAME
2953 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2954 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2955 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2956 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2957 place_after_defs = true;
ebfd146a
IR
2958
2959 if (number_of_places_left_in_vector == 0)
2960 {
2961 number_of_places_left_in_vector = nunits;
2962
2963 if (constant_p)
d2a12ae7 2964 vec_cst = build_vector (vector_type, elts);
ebfd146a 2965 else
d2a12ae7 2966 {
9771b263 2967 vec<constructor_elt, va_gc> *v;
d2a12ae7 2968 unsigned k;
9771b263 2969 vec_alloc (v, nunits);
d2a12ae7
RG
2970 for (k = 0; k < nunits; ++k)
2971 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2972 vec_cst = build_constructor (vector_type, v);
2973 }
90dd6e3d
RB
2974 tree init;
2975 gimple_stmt_iterator gsi;
2976 if (place_after_defs)
2977 {
2978 gsi = gsi_for_stmt
2979 (vect_find_last_scalar_stmt_in_slp (slp_node));
2980 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2981 }
2982 else
2983 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
2984 if (ctor_seq != NULL)
2985 {
90dd6e3d 2986 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
2987 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2988 GSI_SAME_STMT);
2989 ctor_seq = NULL;
2990 }
90dd6e3d
RB
2991 voprnds.quick_push (init);
2992 place_after_defs = false;
ebfd146a
IR
2993 }
2994 }
2995 }
2996
b8698a0f 2997 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 2998 them. */
9771b263 2999 vec_num = voprnds.length ();
d2a12ae7 3000 for (j = vec_num; j != 0; j--)
ebfd146a 3001 {
9771b263
DN
3002 vop = voprnds[j - 1];
3003 vec_oprnds->quick_push (vop);
ebfd146a
IR
3004 }
3005
9771b263 3006 voprnds.release ();
ebfd146a
IR
3007
3008 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3009 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3010 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3011 to replicate the vectors. */
9771b263 3012 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3013 {
b5aeb3bb
IR
3014 tree neutral_vec = NULL;
3015
3016 if (neutral_op)
3017 {
3018 if (!neutral_vec)
b9acc9f1 3019 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3020
9771b263 3021 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3022 }
3023 else
3024 {
9771b263
DN
3025 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3026 vec_oprnds->quick_push (vop);
b5aeb3bb 3027 }
ebfd146a
IR
3028 }
3029}
3030
3031
3032/* Get vectorized definitions from SLP_NODE that contains corresponding
3033 vectorized def-stmts. */
3034
3035static void
9771b263 3036vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3037{
3038 tree vec_oprnd;
355fe088 3039 gimple *vec_def_stmt;
ebfd146a
IR
3040 unsigned int i;
3041
9771b263 3042 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3043
9771b263 3044 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3045 {
3046 gcc_assert (vec_def_stmt);
3047 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3048 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3049 }
3050}
3051
3052
b8698a0f
L
3053/* Get vectorized definitions for SLP_NODE.
3054 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3055 call vect_get_constant_vectors() to create vector stmts.
3056 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3057 must be stored in the corresponding child of SLP_NODE, and we call
3058 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3059
ebfd146a 3060void
9771b263 3061vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 3062 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 3063{
355fe088 3064 gimple *first_stmt;
d092494c
IR
3065 int number_of_vects = 0, i;
3066 unsigned int child_index = 0;
b8698a0f 3067 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3068 slp_tree child = NULL;
37b5ec8f 3069 vec<tree> vec_defs;
e44978dc 3070 tree oprnd;
d092494c 3071 bool vectorized_defs;
ebfd146a 3072
9771b263
DN
3073 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3074 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3075 {
d092494c
IR
3076 /* For each operand we check if it has vectorized definitions in a child
3077 node or we need to create them (for invariants and constants). We
3078 check if the LHS of the first stmt of the next child matches OPRND.
3079 If it does, we found the correct child. Otherwise, we call
3080 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3081 to check this child node for the next operand. */
3082 vectorized_defs = false;
9771b263 3083 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3084 {
01d8bf07 3085 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3086
e44978dc 3087 /* We have to check both pattern and original def, if available. */
90dd6e3d 3088 if (child)
e44978dc 3089 {
355fe088
TS
3090 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3091 gimple *related
90dd6e3d
RB
3092 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3093
3094 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3095 || (related
3096 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3097 {
3098 /* The number of vector defs is determined by the number of
3099 vector statements in the node from which we get those
3100 statements. */
3101 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3102 vectorized_defs = true;
3103 child_index++;
3104 }
e44978dc 3105 }
90dd6e3d
RB
3106 else
3107 child_index++;
d092494c 3108 }
ebfd146a 3109
d092494c
IR
3110 if (!vectorized_defs)
3111 {
3112 if (i == 0)
3113 {
3114 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3115 /* Number of vector stmts was calculated according to LHS in
3116 vect_schedule_slp_instance (), fix it by replacing LHS with
3117 RHS, if necessary. See vect_get_smallest_scalar_type () for
3118 details. */
3119 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3120 &rhs_size_unit);
3121 if (rhs_size_unit != lhs_size_unit)
3122 {
3123 number_of_vects *= rhs_size_unit;
3124 number_of_vects /= lhs_size_unit;
3125 }
3126 }
3127 }
b5aeb3bb 3128
d092494c 3129 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3130 vec_defs = vNULL;
3131 vec_defs.create (number_of_vects);
ebfd146a 3132
d092494c
IR
3133 /* For reduction defs we call vect_get_constant_vectors (), since we are
3134 looking for initial loop invariant values. */
3135 if (vectorized_defs && reduc_index == -1)
3136 /* The defs are already vectorized. */
37b5ec8f 3137 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
3138 else
3139 /* Build vectors from scalar defs. */
37b5ec8f 3140 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 3141 number_of_vects, reduc_index);
ebfd146a 3142
37b5ec8f 3143 vec_oprnds->quick_push (vec_defs);
ebfd146a 3144
d092494c
IR
3145 /* For reductions, we only need initial values. */
3146 if (reduc_index != -1)
3147 return;
3148 }
ebfd146a
IR
3149}
3150
a70d6342 3151
b8698a0f 3152/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
3153 building a vector of type MASK_TYPE from it) and two input vectors placed in
3154 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3155 shifting by STRIDE elements of DR_CHAIN for every copy.
3156 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 3157 copies).
ebfd146a
IR
3158 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3159 the created stmts must be inserted. */
3160
3161static inline void
355fe088 3162vect_create_mask_and_perm (gimple *stmt,
faf63e39 3163 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 3164 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 3165 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
3166 int ncopies, int vect_stmts_counter)
3167{
faf63e39 3168 tree perm_dest;
355fe088 3169 gimple *perm_stmt = NULL;
7706cb01 3170 int i, stride_in, stride_out;
ebfd146a 3171 tree first_vec, second_vec, data_ref;
ebfd146a 3172
7706cb01
RB
3173 stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3174 stride_in = dr_chain.length () / ncopies;
ebfd146a 3175
b8698a0f 3176 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 3177 stmts later. */
9771b263 3178 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 3179 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 3180 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
3181
3182 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3183 for (i = 0; i < ncopies; i++)
3184 {
9771b263
DN
3185 first_vec = dr_chain[first_vec_indx];
3186 second_vec = dr_chain[second_vec_indx];
ebfd146a 3187
be377c80
RB
3188 /* Generate the permute statement if necessary. */
3189 if (mask)
3190 {
3191 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3192 first_vec, second_vec, mask);
3193 data_ref = make_ssa_name (perm_dest, perm_stmt);
3194 gimple_set_lhs (perm_stmt, data_ref);
3195 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3196 }
3197 else
3198 /* If mask was NULL_TREE generate the requested identity transform. */
3199 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
ebfd146a 3200
b8698a0f 3201 /* Store the vector statement in NODE. */
7706cb01
RB
3202 SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
3203 = perm_stmt;
ebfd146a 3204
7706cb01
RB
3205 first_vec_indx += stride_in;
3206 second_vec_indx += stride_in;
ebfd146a 3207 }
ebfd146a
IR
3208}
3209
3210
ebfd146a
IR
3211/* Generate vector permute statements from a list of loads in DR_CHAIN.
3212 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3213 permute statements for the SLP node NODE of the SLP instance
3214 SLP_NODE_INSTANCE. */
3215
ebfd146a 3216bool
01d8bf07 3217vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a
IR
3218 gimple_stmt_iterator *gsi, int vf,
3219 slp_instance slp_node_instance, bool analyze_only)
3220{
355fe088 3221 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3222 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3223 tree mask_element_type = NULL_TREE, mask_type;
2ce27200 3224 int nunits, vec_index = 0;
2635892a 3225 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3226 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2ce27200 3227 int unroll_factor, mask_element, ncopies;
22e4dee7 3228 unsigned char *mask;
ef4bddc2 3229 machine_mode mode;
ebfd146a 3230
91ff1504
RB
3231 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3232 return false;
3233
3234 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3235
22e4dee7
RH
3236 mode = TYPE_MODE (vectype);
3237
2635892a
RH
3238 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3239 same size as the vector element being permuted. */
96f9265a
RG
3240 mask_element_type = lang_hooks.types.type_for_mode
3241 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 3242 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3243 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 3244 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
3245 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3246
b8698a0f 3247 /* Number of copies is determined by the final vectorization factor
ebfd146a 3248 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 3249 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 3250
b8698a0f
L
3251 /* Generate permutation masks for every NODE. Number of masks for each NODE
3252 is equal to GROUP_SIZE.
3253 E.g., we have a group of three nodes with three loads from the same
3254 location in each node, and the vector size is 4. I.e., we have a
3255 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3256 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3257 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3258 ...
3259
2635892a 3260 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3261 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3262 operation, and the mask element values can't be outside that range.
3263 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3264 For the first two permutations we need the first and the second input
ebfd146a 3265 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3266 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3267 {c2,a3,b3,c3}. */
3268
2ce27200
RB
3269 int vect_stmts_counter = 0;
3270 int index = 0;
3271 int first_vec_index = -1;
3272 int second_vec_index = -1;
be377c80 3273 bool noop_p = true;
ebfd146a 3274
2ce27200
RB
3275 for (int j = 0; j < unroll_factor; j++)
3276 {
3277 for (int k = 0; k < group_size; k++)
3278 {
3279 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3280 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3281 vec_index = i / nunits;
3282 mask_element = i % nunits;
3283 if (vec_index == first_vec_index
3284 || first_vec_index == -1)
3285 {
3286 first_vec_index = vec_index;
3287 }
3288 else if (vec_index == second_vec_index
3289 || second_vec_index == -1)
3290 {
3291 second_vec_index = vec_index;
3292 mask_element += nunits;
3293 }
3294 else
3295 {
3296 if (dump_enabled_p ())
3297 {
3298 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3299 "permutation requires at "
3300 "least three vectors ");
3301 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3302 stmt, 0);
3303 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3304 }
3305 return false;
3306 }
ebfd146a 3307
2ce27200
RB
3308 gcc_assert (mask_element >= 0
3309 && mask_element < 2 * nunits);
be377c80
RB
3310 if (mask_element != index)
3311 noop_p = false;
2ce27200
RB
3312 mask[index++] = mask_element;
3313
3314 if (index == nunits)
3315 {
be377c80
RB
3316 if (! noop_p
3317 && ! can_vec_perm_p (mode, false, mask))
2ce27200
RB
3318 {
3319 if (dump_enabled_p ())
22e4dee7 3320 {
2ce27200
RB
3321 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3322 vect_location,
3323 "unsupported vect permute { ");
3324 for (i = 0; i < nunits; ++i)
3325 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3326 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3327 }
2ce27200
RB
3328 return false;
3329 }
22e4dee7 3330
2ce27200
RB
3331 if (!analyze_only)
3332 {
be377c80
RB
3333 tree mask_vec = NULL_TREE;
3334
3335 if (! noop_p)
3336 {
3337 tree *mask_elts = XALLOCAVEC (tree, nunits);
3338 for (int l = 0; l < nunits; ++l)
3339 mask_elts[l] = build_int_cst (mask_element_type,
3340 mask[l]);
3341 mask_vec = build_vector (mask_type, mask_elts);
3342 }
2ce27200
RB
3343
3344 if (second_vec_index == -1)
3345 second_vec_index = first_vec_index;
3346 vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
3347 second_vec_index,
3348 gsi, node, vectype, dr_chain,
3349 ncopies, vect_stmts_counter++);
3350 }
ebfd146a 3351
2ce27200
RB
3352 index = 0;
3353 first_vec_index = -1;
3354 second_vec_index = -1;
be377c80 3355 noop_p = true;
2ce27200
RB
3356 }
3357 }
b8698a0f 3358 }
ebfd146a 3359
ebfd146a
IR
3360 return true;
3361}
3362
3363
3364
3365/* Vectorize SLP instance tree in postorder. */
3366
3367static bool
3368vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3369 unsigned int vectorization_factor)
ebfd146a 3370{
355fe088 3371 gimple *stmt;
0d0293ac 3372 bool grouped_store, is_store;
ebfd146a
IR
3373 gimple_stmt_iterator si;
3374 stmt_vec_info stmt_info;
3375 unsigned int vec_stmts_size, nunits, group_size;
3376 tree vectype;
3377 int i;
d755c7ef 3378 slp_tree child;
ebfd146a
IR
3379
3380 if (!node)
3381 return false;
3382
9771b263 3383 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3384 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3385
9771b263 3386 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3387 stmt_info = vinfo_for_stmt (stmt);
3388
3389 /* VECTYPE is the type of the destination. */
b690cc0f 3390 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3391 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3392 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3393
3394 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3395 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3396 elements in one vector iteration is the number of scalar elements in
3397 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3398 size.
3399 Unless this is a SLP reduction in which case the number of vector
3400 stmts is equal to the number of vector stmts of the children. */
3401 if (GROUP_FIRST_ELEMENT (stmt_info)
3402 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3403 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3404 else
3405 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3406
9771b263 3407 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3408 {
9771b263 3409 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3410 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3411 }
3412
73fbfcad 3413 if (dump_enabled_p ())
ebfd146a 3414 {
78c60e3d
SS
3415 dump_printf_loc (MSG_NOTE,vect_location,
3416 "------>vectorizing SLP node starting from: ");
3417 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3418 dump_printf (MSG_NOTE, "\n");
b8698a0f 3419 }
ebfd146a 3420
2e8ab70c
RB
3421 /* Vectorized stmts go before the last scalar stmt which is where
3422 all uses are ready. */
3423 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3424
b010117a
IR
3425 /* Mark the first element of the reduction chain as reduction to properly
3426 transform the node. In the analysis phase only the last element of the
3427 chain is marked as reduction. */
0d0293ac 3428 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3429 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3430 {
3431 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3432 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3433 }
3434
6876e5bc
RB
3435 /* Handle two-operation SLP nodes by vectorizing the group with
3436 both operations and then performing a merge. */
3437 if (SLP_TREE_TWO_OPERATORS (node))
3438 {
3439 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3440 enum tree_code ocode;
355fe088 3441 gimple *ostmt;
6876e5bc
RB
3442 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3443 bool allsame = true;
3444 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3445 if (gimple_assign_rhs_code (ostmt) != code0)
3446 {
3447 mask[i] = 1;
3448 allsame = false;
3449 ocode = gimple_assign_rhs_code (ostmt);
3450 }
3451 else
3452 mask[i] = 0;
3453 if (!allsame)
3454 {
355fe088
TS
3455 vec<gimple *> v0;
3456 vec<gimple *> v1;
6876e5bc
RB
3457 unsigned j;
3458 tree tmask = NULL_TREE;
3459 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3460 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3461 SLP_TREE_VEC_STMTS (node).truncate (0);
3462 gimple_assign_set_rhs_code (stmt, ocode);
3463 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3464 gimple_assign_set_rhs_code (stmt, code0);
3465 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3466 SLP_TREE_VEC_STMTS (node).truncate (0);
3467 tree meltype = build_nonstandard_integer_type
3468 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3469 tree mvectype = get_same_sized_vectype (meltype, vectype);
3470 unsigned k = 0, l;
3471 for (j = 0; j < v0.length (); ++j)
3472 {
3473 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3474 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3475 {
1ece8d4c 3476 if (k >= group_size)
6876e5bc
RB
3477 k = 0;
3478 melts[l] = build_int_cst
3479 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3480 }
3481 tmask = build_vector (mvectype, melts);
3482
3483 /* ??? Not all targets support a VEC_PERM_EXPR with a
3484 constant mask that would translate to a vec_merge RTX
3485 (with their vec_perm_const_ok). We can either not
3486 vectorize in that case or let veclower do its job.
3487 Unfortunately that isn't too great and at least for
3488 plus/minus we'd eventually like to match targets
3489 vector addsub instructions. */
355fe088 3490 gimple *vstmt;
6876e5bc
RB
3491 vstmt = gimple_build_assign (make_ssa_name (vectype),
3492 VEC_PERM_EXPR,
3493 gimple_assign_lhs (v0[j]),
3494 gimple_assign_lhs (v1[j]), tmask);
3495 vect_finish_stmt_generation (stmt, vstmt, &si);
3496 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3497 }
3498 v0.release ();
3499 v1.release ();
3500 return false;
3501 }
3502 }
0d0293ac 3503 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 3504 return is_store;
ebfd146a
IR
3505}
3506
dd34c087
JJ
3507/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3508 For loop vectorization this is done in vectorizable_call, but for SLP
3509 it needs to be deferred until end of vect_schedule_slp, because multiple
3510 SLP instances may refer to the same scalar stmt. */
3511
3512static void
3513vect_remove_slp_scalar_calls (slp_tree node)
3514{
355fe088 3515 gimple *stmt, *new_stmt;
dd34c087
JJ
3516 gimple_stmt_iterator gsi;
3517 int i;
d755c7ef 3518 slp_tree child;
dd34c087
JJ
3519 tree lhs;
3520 stmt_vec_info stmt_info;
3521
3522 if (!node)
3523 return;
3524
9771b263 3525 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3526 vect_remove_slp_scalar_calls (child);
dd34c087 3527
9771b263 3528 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3529 {
3530 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3531 continue;
3532 stmt_info = vinfo_for_stmt (stmt);
3533 if (stmt_info == NULL
3534 || is_pattern_stmt_p (stmt_info)
3535 || !PURE_SLP_STMT (stmt_info))
3536 continue;
3537 lhs = gimple_call_lhs (stmt);
3538 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3539 set_vinfo_for_stmt (new_stmt, stmt_info);
3540 set_vinfo_for_stmt (stmt, NULL);
3541 STMT_VINFO_STMT (stmt_info) = new_stmt;
3542 gsi = gsi_for_stmt (stmt);
3543 gsi_replace (&gsi, new_stmt, false);
3544 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3545 }
3546}
ebfd146a 3547
ff802fa1
IR
3548/* Generate vector code for all SLP instances in the loop/basic block. */
3549
ebfd146a 3550bool
310213d4 3551vect_schedule_slp (vec_info *vinfo)
ebfd146a 3552{
9771b263 3553 vec<slp_instance> slp_instances;
ebfd146a 3554 slp_instance instance;
01d8bf07 3555 unsigned int i, vf;
ebfd146a
IR
3556 bool is_store = false;
3557
310213d4
RB
3558 slp_instances = vinfo->slp_instances;
3559 if (is_a <loop_vec_info> (vinfo))
3560 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 3561 else
310213d4 3562 vf = 1;
a70d6342 3563
9771b263 3564 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3565 {
3566 /* Schedule the tree of INSTANCE. */
3567 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3568 instance, vf);
73fbfcad 3569 if (dump_enabled_p ())
78c60e3d 3570 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3571 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3572 }
3573
9771b263 3574 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3575 {
3576 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 3577 gimple *store;
b5aeb3bb
IR
3578 unsigned int j;
3579 gimple_stmt_iterator gsi;
3580
c40eced0
RB
3581 /* Remove scalar call stmts. Do not do this for basic-block
3582 vectorization as not all uses may be vectorized.
3583 ??? Why should this be necessary? DCE should be able to
3584 remove the stmts itself.
3585 ??? For BB vectorization we can as well remove scalar
3586 stmts starting from the SLP tree root if they have no
3587 uses. */
310213d4 3588 if (is_a <loop_vec_info> (vinfo))
c40eced0 3589 vect_remove_slp_scalar_calls (root);
dd34c087 3590
9771b263 3591 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3592 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3593 {
3594 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3595 break;
3596
a024e70e
IR
3597 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3598 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3599 /* Free the attached stmt_vec_info and remove the stmt. */
3600 gsi = gsi_for_stmt (store);
3d3f2249 3601 unlink_stmt_vdef (store);
b5aeb3bb 3602 gsi_remove (&gsi, true);
3d3f2249 3603 release_defs (store);
b5aeb3bb
IR
3604 free_stmt_vec_info (store);
3605 }
3606 }
3607
ebfd146a
IR
3608 return is_store;
3609}