]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
* update-copyright.py: Skip pdt-5.f03 in gfortran.dg subdir.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
cbe34bb5 2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
a70d6342
IR
46
47
ebfd146a
IR
48/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
49
50static void
51vect_free_slp_tree (slp_tree node)
52{
d092494c 53 int i;
d755c7ef 54 slp_tree child;
d092494c 55
9771b263 56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 57 vect_free_slp_tree (child);
b8698a0f 58
78810bd3
RB
59 gimple *stmt;
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt))
63 {
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
66 }
67
9771b263
DN
68 SLP_TREE_CHILDREN (node).release ();
69 SLP_TREE_SCALAR_STMTS (node).release ();
70 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 71 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
72
73 free (node);
74}
75
76
77/* Free the memory allocated for the SLP instance. */
78
79void
80vect_free_slp_instance (slp_instance instance)
81{
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 83 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 84 free (instance);
ebfd146a
IR
85}
86
87
d092494c
IR
88/* Create an SLP node for SCALAR_STMTS. */
89
90static slp_tree
355fe088 91vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 92{
d3cfd39e 93 slp_tree node;
355fe088 94 gimple *stmt = scalar_stmts[0];
d092494c
IR
95 unsigned int nops;
96
97 if (is_gimple_call (stmt))
98 nops = gimple_call_num_args (stmt);
99 else if (is_gimple_assign (stmt))
f7e531cf
IR
100 {
101 nops = gimple_num_ops (stmt) - 1;
102 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
103 nops++;
104 }
e7baeb39
RB
105 else if (gimple_code (stmt) == GIMPLE_PHI)
106 nops = 0;
d092494c
IR
107 else
108 return NULL;
109
d3cfd39e 110 node = XNEW (struct _slp_tree);
d092494c 111 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
112 SLP_TREE_VEC_STMTS (node).create (0);
113 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 114 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 115 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 116 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 117
78810bd3
RB
118 unsigned i;
119 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
121
d092494c
IR
122 return node;
123}
124
125
ddf56386
RB
126/* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
128 node. */
129typedef struct _slp_oprnd_info
130{
131 /* Def-stmts for the operands. */
132 vec<gimple *> def_stmts;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
135 stmt. */
ddf56386 136 tree first_op_type;
34e82342 137 enum vect_def_type first_dt;
ddf56386
RB
138 bool first_pattern;
139 bool second_pattern;
140} *slp_oprnd_info;
141
142
d092494c
IR
143/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
144 operand. */
9771b263 145static vec<slp_oprnd_info>
d092494c
IR
146vect_create_oprnd_info (int nops, int group_size)
147{
148 int i;
149 slp_oprnd_info oprnd_info;
9771b263 150 vec<slp_oprnd_info> oprnds_info;
d092494c 151
9771b263 152 oprnds_info.create (nops);
d092494c
IR
153 for (i = 0; i < nops; i++)
154 {
155 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 156 oprnd_info->def_stmts.create (group_size);
d092494c 157 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 158 oprnd_info->first_op_type = NULL_TREE;
d092494c 159 oprnd_info->first_pattern = false;
effb52da 160 oprnd_info->second_pattern = false;
9771b263 161 oprnds_info.quick_push (oprnd_info);
d092494c
IR
162 }
163
164 return oprnds_info;
165}
166
167
d3cfd39e
JJ
168/* Free operands info. */
169
d092494c 170static void
9771b263 171vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
172{
173 int i;
174 slp_oprnd_info oprnd_info;
175
9771b263 176 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 177 {
9771b263 178 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
179 XDELETE (oprnd_info);
180 }
d092494c 181
9771b263 182 oprnds_info.release ();
d092494c
IR
183}
184
185
d755c7ef
RB
186/* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
188
189static int
355fe088 190vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 191{
355fe088 192 gimple *next_stmt = first_stmt;
d755c7ef
RB
193 int result = 0;
194
195 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
196 return -1;
197
198 do
199 {
200 if (next_stmt == stmt)
201 return result;
d755c7ef 202 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
203 if (next_stmt)
204 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
205 }
206 while (next_stmt);
207
208 return -1;
209}
210
211
d092494c
IR
212/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
4cecd659
BC
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
222 ok return 0. */
ebfd146a 223
4cecd659
BC
224static int
225vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
355fe088 226 gimple *stmt, unsigned stmt_num,
4cecd659 227 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
228{
229 tree oprnd;
230 unsigned int i, number_of_oprnds;
355fe088 231 gimple *def_stmt;
d092494c 232 enum vect_def_type dt = vect_uninitialized_def;
d092494c 233 bool pattern = false;
abf9bfbc 234 slp_oprnd_info oprnd_info;
b0b4483e
RB
235 int first_op_idx = 1;
236 bool commutative = false;
237 bool first_op_cond = false;
effb52da
RB
238 bool first = stmt_num == 0;
239 bool second = stmt_num == 1;
b8698a0f 240
d092494c 241 if (is_gimple_call (stmt))
190c2236
JJ
242 {
243 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 244 first_op_idx = 3;
190c2236 245 }
f7e531cf
IR
246 else if (is_gimple_assign (stmt))
247 {
b0b4483e 248 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 249 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
a414c77f
IE
252 if (gimple_assign_rhs_code (stmt) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
254 {
255 first_op_cond = true;
b0b4483e
RB
256 number_of_oprnds++;
257 }
258 else
259 commutative = commutative_tree_code (code);
f7e531cf 260 }
d092494c 261 else
b0b4483e 262 return -1;
ebfd146a 263
4cecd659
BC
264 bool swapped = (*swap != 0);
265 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
266 for (i = 0; i < number_of_oprnds; i++)
267 {
b0b4483e
RB
268again:
269 if (first_op_cond)
f7e531cf 270 {
4cecd659
BC
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map = maps[*swap];
274
275 if (i < 2)
276 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 277 else
4cecd659 278 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
279 }
280 else
4cecd659 281 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 282
9771b263 283 oprnd_info = (*oprnds_info)[i];
ebfd146a 284
81c40241 285 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 286 {
73fbfcad 287 if (dump_enabled_p ())
ebfd146a 288 {
78c60e3d 289 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 290 "Build SLP failed: can't analyze def for ");
78c60e3d 291 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
293 }
294
b0b4483e 295 return -1;
ebfd146a
IR
296 }
297
a70d6342 298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 299 from the pattern. Check that all the stmts of the node are in the
ebfd146a 300 pattern. */
f5709183 301 if (def_stmt && gimple_bb (def_stmt)
61d371eb 302 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 303 && vinfo_for_stmt (def_stmt)
83197f37 304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 307 {
d092494c 308 pattern = true;
effb52da
RB
309 if (!first && !oprnd_info->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info->first_dt != vect_reduction_def
313 || (!second && !oprnd_info->second_pattern)))
d092494c 314 {
b0b4483e
RB
315 if (i == 0
316 && !swapped
317 && commutative)
318 {
319 swapped = true;
320 goto again;
321 }
322
73fbfcad 323 if (dump_enabled_p ())
d092494c 324 {
78c60e3d
SS
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 330 }
ebfd146a 331
b0b4483e 332 return 1;
ebfd146a
IR
333 }
334
335 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 336 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 337
f7e531cf 338 if (dt == vect_unknown_def_type)
ebfd146a 339 {
73fbfcad 340 if (dump_enabled_p ())
78c60e3d 341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 342 "Unsupported pattern.\n");
b0b4483e 343 return -1;
ebfd146a
IR
344 }
345
346 switch (gimple_code (def_stmt))
347 {
81c40241
RB
348 case GIMPLE_PHI:
349 case GIMPLE_ASSIGN:
350 break;
351
352 default:
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
355 "unsupported defining stmt:\n");
356 return -1;
ebfd146a
IR
357 }
358 }
359
effb52da
RB
360 if (second)
361 oprnd_info->second_pattern = pattern;
362
d092494c 363 if (first)
ebfd146a 364 {
d092494c
IR
365 oprnd_info->first_dt = dt;
366 oprnd_info->first_pattern = pattern;
793d9a16 367 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 368 }
ebfd146a
IR
369 else
370 {
d092494c
IR
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info->first_dt != dt
377 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
378 && dt == vect_internal_def)
379 && !((oprnd_info->first_dt == vect_external_def
380 || oprnd_info->first_dt == vect_constant_def)
381 && (dt == vect_external_def
382 || dt == vect_constant_def)))
383 || !types_compatible_p (oprnd_info->first_op_type,
384 TREE_TYPE (oprnd))))
ebfd146a 385 {
b0b4483e
RB
386 /* Try swapping operands if we got a mismatch. */
387 if (i == 0
388 && !swapped
389 && commutative)
390 {
391 swapped = true;
392 goto again;
393 }
394
abf9bfbc
RB
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 397 "Build SLP failed: different types\n");
d092494c 398
b0b4483e 399 return 1;
ebfd146a
IR
400 }
401 }
402
403 /* Check the types of the definitions. */
d092494c 404 switch (dt)
ebfd146a
IR
405 {
406 case vect_constant_def:
8644a673 407 case vect_external_def:
a23644f2
RS
408 /* We must already have set a vector size by now. */
409 gcc_checking_assert (maybe_ne (current_vector_size, 0U));
410 if (!current_vector_size.is_constant ())
411 {
412 if (dump_enabled_p ())
413 {
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: invalid type of def "
416 "for variable-length SLP ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
418 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
419 }
420 return -1;
421 }
ebfd146a 422 break;
b8698a0f 423
c78e3652 424 case vect_reduction_def:
e7baeb39 425 case vect_induction_def:
8644a673 426 case vect_internal_def:
abf9bfbc 427 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
428 break;
429
430 default:
431 /* FORNOW: Not supported. */
73fbfcad 432 if (dump_enabled_p ())
ebfd146a 433 {
78c60e3d
SS
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
435 "Build SLP failed: illegal type of def ");
81c40241 436 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 437 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
438 }
439
b0b4483e 440 return -1;
ebfd146a
IR
441 }
442 }
443
b0b4483e
RB
444 /* Swap operands. */
445 if (swapped)
446 {
78810bd3
RB
447 /* If there are already uses of this stmt in a SLP instance then
448 we've committed to the operand order and can't swap it. */
449 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
450 {
451 if (dump_enabled_p ())
452 {
453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
454 "Build SLP failed: cannot swap operands of "
455 "shared stmt ");
456 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
457 }
458 return -1;
459 }
460
b0b4483e
RB
461 if (first_op_cond)
462 {
463 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
464 enum tree_code code = TREE_CODE (cond);
465
466 /* Swap. */
467 if (*swap == 1)
468 {
469 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
470 &TREE_OPERAND (cond, 1));
471 TREE_SET_CODE (cond, swap_tree_comparison (code));
472 }
473 /* Invert. */
474 else
475 {
476 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
477 gimple_assign_rhs3_ptr (stmt));
478 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
479 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
480 gcc_assert (code != ERROR_MARK);
481 TREE_SET_CODE (cond, code);
482 }
b0b4483e
RB
483 }
484 else
485 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
486 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
487 if (dump_enabled_p ())
488 {
489 dump_printf_loc (MSG_NOTE, vect_location,
490 "swapped operands to match def types in ");
491 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
492 }
b0b4483e
RB
493 }
494
4cecd659 495 *swap = swapped;
b0b4483e 496 return 0;
ebfd146a
IR
497}
498
b161f2c9
RS
499/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
500 caller's attempt to find the vector type in STMT with the narrowest
501 element type. Return true if VECTYPE is nonnull and if it is valid
502 for VINFO. When returning true, update MAX_NUNITS to reflect the
503 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
504 as for vect_build_slp_tree. */
505
506static bool
507vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 508 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
509{
510 if (!vectype)
511 {
512 if (dump_enabled_p ())
513 {
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
515 "Build SLP failed: unsupported data-type in ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
518 }
519 /* Fatal mismatch. */
520 return false;
521 }
522
523 /* If populating the vector type requires unrolling then fail
524 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
525 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
526 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 527 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
528 && (!nunits.is_constant (&const_nunits)
529 || const_nunits > group_size))
b161f2c9
RS
530 {
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "Build SLP failed: unrolling required "
533 "in basic block SLP\n");
534 /* Fatal mismatch. */
535 return false;
536 }
537
538 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 539 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
540 return true;
541}
ebfd146a 542
6983e6b5
RB
543/* Verify if the scalar stmts STMTS are isomorphic, require data
544 permutation or are of unsupported types of operation. Return
545 true if they are, otherwise return false and indicate in *MATCHES
546 which stmts are not isomorphic to the first one. If MATCHES[0]
547 is false then this indicates the comparison could not be
4cecd659
BC
548 carried out or the stmts will never be vectorized by SLP.
549
550 Note COND_EXPR is possibly ismorphic to another one after swapping its
551 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
552 the first stmt by swapping the two operands of comparison; set SWAP[i]
553 to 2 if stmt I is isormorphic to the first stmt by inverting the code
554 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
555 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
556
557static bool
4cecd659 558vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 559 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 560 unsigned nops, poly_uint64 *max_nunits,
97a1a642 561 bool *matches, bool *two_operators)
ebfd146a 562{
ebfd146a 563 unsigned int i;
355fe088 564 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
565 enum tree_code first_stmt_code = ERROR_MARK;
566 enum tree_code alt_stmt_code = ERROR_MARK;
567 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 568 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 569 tree lhs;
6983e6b5 570 bool need_same_oprnds = false;
e00cdb8a 571 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
572 optab optab;
573 int icode;
ef4bddc2
RS
574 machine_mode optab_op2_mode;
575 machine_mode vec_mode;
ebfd146a 576 HOST_WIDE_INT dummy;
355fe088 577 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 578
ebfd146a 579 /* For every stmt in NODE find its def stmt/s. */
9771b263 580 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 581 {
4cecd659 582 swap[i] = 0;
6983e6b5
RB
583 matches[i] = false;
584
73fbfcad 585 if (dump_enabled_p ())
ebfd146a 586 {
78c60e3d
SS
587 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
588 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
589 }
590
4b5caab7
IR
591 /* Fail to vectorize statements marked as unvectorizable. */
592 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
593 {
73fbfcad 594 if (dump_enabled_p ())
4b5caab7 595 {
78c60e3d
SS
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
597 "Build SLP failed: unvectorizable statement ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 599 }
6983e6b5
RB
600 /* Fatal mismatch. */
601 matches[0] = false;
4b5caab7
IR
602 return false;
603 }
604
ebfd146a
IR
605 lhs = gimple_get_lhs (stmt);
606 if (lhs == NULL_TREE)
607 {
73fbfcad 608 if (dump_enabled_p ())
ebfd146a 609 {
78c60e3d
SS
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
611 "Build SLP failed: not GIMPLE_ASSIGN nor "
612 "GIMPLE_CALL ");
613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 614 }
6983e6b5
RB
615 /* Fatal mismatch. */
616 matches[0] = false;
ebfd146a
IR
617 return false;
618 }
619
b8698a0f 620 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a 621 vectype = get_vectype_for_scalar_type (scalar_type);
b161f2c9
RS
622 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
623 max_nunits))
624 {
6983e6b5
RB
625 /* Fatal mismatch. */
626 matches[0] = false;
ebfd146a
IR
627 return false;
628 }
b8698a0f 629
538dd0b7 630 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
631 {
632 rhs_code = CALL_EXPR;
538dd0b7
DM
633 if (gimple_call_internal_p (call_stmt)
634 || gimple_call_tail_p (call_stmt)
635 || gimple_call_noreturn_p (call_stmt)
636 || !gimple_call_nothrow_p (call_stmt)
637 || gimple_call_chain (call_stmt))
190c2236 638 {
73fbfcad 639 if (dump_enabled_p ())
190c2236 640 {
78c60e3d
SS
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unsupported call type ");
538dd0b7
DM
643 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
644 call_stmt, 0);
190c2236 645 }
6983e6b5
RB
646 /* Fatal mismatch. */
647 matches[0] = false;
190c2236
JJ
648 return false;
649 }
650 }
ebfd146a
IR
651 else
652 rhs_code = gimple_assign_rhs_code (stmt);
653
654 /* Check the operation. */
655 if (i == 0)
656 {
657 first_stmt_code = rhs_code;
658
b8698a0f 659 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
660 vector shift with scalar shift operand. */
661 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
662 || rhs_code == LROTATE_EXPR
663 || rhs_code == RROTATE_EXPR)
664 {
665 vec_mode = TYPE_MODE (vectype);
666
667 /* First see if we have a vector/vector shift. */
668 optab = optab_for_tree_code (rhs_code, vectype,
669 optab_vector);
670
671 if (!optab
947131ba 672 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
673 {
674 /* No vector/vector shift, try for a vector/scalar shift. */
675 optab = optab_for_tree_code (rhs_code, vectype,
676 optab_scalar);
677
678 if (!optab)
679 {
73fbfcad 680 if (dump_enabled_p ())
78c60e3d 681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 682 "Build SLP failed: no optab.\n");
6983e6b5
RB
683 /* Fatal mismatch. */
684 matches[0] = false;
ebfd146a
IR
685 return false;
686 }
947131ba 687 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
688 if (icode == CODE_FOR_nothing)
689 {
73fbfcad 690 if (dump_enabled_p ())
78c60e3d
SS
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
692 "Build SLP failed: "
e645e942 693 "op not supported by target.\n");
6983e6b5
RB
694 /* Fatal mismatch. */
695 matches[0] = false;
ebfd146a
IR
696 return false;
697 }
698 optab_op2_mode = insn_data[icode].operand[2].mode;
699 if (!VECTOR_MODE_P (optab_op2_mode))
700 {
701 need_same_oprnds = true;
702 first_op1 = gimple_assign_rhs2 (stmt);
703 }
704 }
705 }
36ba4aae
IR
706 else if (rhs_code == WIDEN_LSHIFT_EXPR)
707 {
708 need_same_oprnds = true;
709 first_op1 = gimple_assign_rhs2 (stmt);
710 }
ebfd146a
IR
711 }
712 else
713 {
6876e5bc
RB
714 if (first_stmt_code != rhs_code
715 && alt_stmt_code == ERROR_MARK)
716 alt_stmt_code = rhs_code;
ebfd146a
IR
717 if (first_stmt_code != rhs_code
718 && (first_stmt_code != IMAGPART_EXPR
719 || rhs_code != REALPART_EXPR)
720 && (first_stmt_code != REALPART_EXPR
69f11a13 721 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
722 /* Handle mismatches in plus/minus by computing both
723 and merging the results. */
724 && !((first_stmt_code == PLUS_EXPR
725 || first_stmt_code == MINUS_EXPR)
726 && (alt_stmt_code == PLUS_EXPR
727 || alt_stmt_code == MINUS_EXPR)
728 && rhs_code == alt_stmt_code)
0d0293ac 729 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 730 && (first_stmt_code == ARRAY_REF
38000232 731 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
732 || first_stmt_code == INDIRECT_REF
733 || first_stmt_code == COMPONENT_REF
734 || first_stmt_code == MEM_REF)))
ebfd146a 735 {
73fbfcad 736 if (dump_enabled_p ())
ebfd146a 737 {
78c60e3d
SS
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: different operation "
740 "in stmt ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
743 "original stmt ");
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
745 first_stmt, 0);
ebfd146a 746 }
6983e6b5
RB
747 /* Mismatch. */
748 continue;
ebfd146a 749 }
b8698a0f
L
750
751 if (need_same_oprnds
ebfd146a
IR
752 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
753 {
73fbfcad 754 if (dump_enabled_p ())
ebfd146a 755 {
78c60e3d
SS
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: different shift "
758 "arguments in ");
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 760 }
6983e6b5
RB
761 /* Mismatch. */
762 continue;
ebfd146a 763 }
190c2236
JJ
764
765 if (rhs_code == CALL_EXPR)
766 {
355fe088 767 gimple *first_stmt = stmts[0];
190c2236
JJ
768 if (gimple_call_num_args (stmt) != nops
769 || !operand_equal_p (gimple_call_fn (first_stmt),
770 gimple_call_fn (stmt), 0)
771 || gimple_call_fntype (first_stmt)
772 != gimple_call_fntype (stmt))
773 {
73fbfcad 774 if (dump_enabled_p ())
190c2236 775 {
78c60e3d
SS
776 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
777 "Build SLP failed: different calls in ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
779 stmt, 0);
190c2236 780 }
6983e6b5
RB
781 /* Mismatch. */
782 continue;
190c2236
JJ
783 }
784 }
ebfd146a
IR
785 }
786
0d0293ac
MM
787 /* Grouped store or load. */
788 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
789 {
790 if (REFERENCE_CLASS_P (lhs))
791 {
792 /* Store. */
6983e6b5 793 ;
ebfd146a 794 }
b5aeb3bb
IR
795 else
796 {
797 /* Load. */
e14c1050 798 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
799 if (prev_first_load)
800 {
801 /* Check that there are no loads from different interleaving
6983e6b5
RB
802 chains in the same node. */
803 if (prev_first_load != first_load)
78c60e3d 804 {
73fbfcad 805 if (dump_enabled_p ())
b5aeb3bb 806 {
78c60e3d
SS
807 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
808 vect_location,
809 "Build SLP failed: different "
810 "interleaving chains in one node ");
811 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
812 stmt, 0);
b5aeb3bb 813 }
6983e6b5
RB
814 /* Mismatch. */
815 continue;
b5aeb3bb
IR
816 }
817 }
818 else
819 prev_first_load = first_load;
ebfd146a 820 }
0d0293ac 821 } /* Grouped access. */
ebfd146a
IR
822 else
823 {
824 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
825 {
0d0293ac 826 /* Not grouped load. */
73fbfcad 827 if (dump_enabled_p ())
ebfd146a 828 {
78c60e3d
SS
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
830 "Build SLP failed: not grouped load ");
831 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
832 }
833
0d0293ac 834 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
835 /* Fatal mismatch. */
836 matches[0] = false;
ebfd146a
IR
837 return false;
838 }
839
840 /* Not memory operation. */
841 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 842 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 843 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 844 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 845 && rhs_code != CALL_EXPR)
ebfd146a 846 {
73fbfcad 847 if (dump_enabled_p ())
ebfd146a 848 {
78c60e3d
SS
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: operation");
851 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 853 }
6983e6b5
RB
854 /* Fatal mismatch. */
855 matches[0] = false;
ebfd146a
IR
856 return false;
857 }
858
4cecd659
BC
859 if (rhs_code == COND_EXPR)
860 {
861 tree cond_expr = gimple_assign_rhs1 (stmt);
862 enum tree_code cond_code = TREE_CODE (cond_expr);
863 enum tree_code swap_code = ERROR_MARK;
864 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
865
866 if (i == 0)
867 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
868 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
869 {
870 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
871 swap_code = swap_tree_comparison (cond_code);
872 invert_code = invert_tree_comparison (cond_code, honor_nans);
873 }
874
875 if (first_cond_code == cond_code)
876 ;
877 /* Isomorphic can be achieved by swapping. */
878 else if (first_cond_code == swap_code)
879 swap[i] = 1;
880 /* Isomorphic can be achieved by inverting. */
881 else if (first_cond_code == invert_code)
882 swap[i] = 2;
883 else
884 {
885 if (dump_enabled_p ())
886 {
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
888 "Build SLP failed: different"
889 " operation");
4cecd659 890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 891 stmt, 0);
4cecd659 892 }
6983e6b5
RB
893 /* Mismatch. */
894 continue;
f7e531cf 895 }
4cecd659 896 }
ebfd146a 897 }
6983e6b5
RB
898
899 matches[i] = true;
900 }
901
902 for (i = 0; i < group_size; ++i)
903 if (!matches[i])
904 return false;
905
6876e5bc
RB
906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
dad55d70 908 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6876e5bc
RB
909 if (alt_stmt_code != ERROR_MARK
910 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
911 {
dad55d70
RS
912 unsigned HOST_WIDE_INT count;
913 if (!nunits.is_constant (&count))
914 {
915 if (dump_enabled_p ())
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "Build SLP failed: different operations "
918 "not allowed with variable-length SLP.\n");
919 return false;
920 }
e3342de4 921 vec_perm_builder sel (count, count, 1);
908a1a16 922 for (i = 0; i < count; ++i)
6876e5bc 923 {
908a1a16 924 unsigned int elt = i;
6876e5bc 925 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
908a1a16
RS
926 elt += count;
927 sel.quick_push (elt);
6876e5bc 928 }
e3342de4
RS
929 vec_perm_indices indices (sel, 2, count);
930 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
6876e5bc
RB
931 {
932 for (i = 0; i < group_size; ++i)
933 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
934 {
935 matches[i] = false;
936 if (dump_enabled_p ())
937 {
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
939 "Build SLP failed: different operation "
940 "in stmt ");
941 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
942 stmts[i], 0);
943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
944 "original stmt ");
945 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
946 first_stmt, 0);
947 }
948 }
949 return false;
950 }
951 *two_operators = true;
952 }
953
6983e6b5
RB
954 return true;
955}
956
26d66f28
RB
957/* Traits for the hash_set to record failed SLP builds for a stmt set.
958 Note we never remove apart from at destruction time so we do not
959 need a special value for deleted that differs from empty. */
960struct bst_traits
961{
962 typedef vec <gimple *> value_type;
963 typedef vec <gimple *> compare_type;
964 static inline hashval_t hash (value_type);
965 static inline bool equal (value_type existing, value_type candidate);
966 static inline bool is_empty (value_type x) { return !x.exists (); }
967 static inline bool is_deleted (value_type x) { return !x.exists (); }
968 static inline void mark_empty (value_type &x) { x.release (); }
969 static inline void mark_deleted (value_type &x) { x.release (); }
970 static inline void remove (value_type &x) { x.release (); }
971};
972inline hashval_t
973bst_traits::hash (value_type x)
974{
975 inchash::hash h;
976 for (unsigned i = 0; i < x.length (); ++i)
977 h.add_int (gimple_uid (x[i]));
978 return h.end ();
979}
980inline bool
981bst_traits::equal (value_type existing, value_type candidate)
982{
983 if (existing.length () != candidate.length ())
984 return false;
985 for (unsigned i = 0; i < existing.length (); ++i)
986 if (existing[i] != candidate[i])
987 return false;
988 return true;
989}
990
f7300fff
RB
991typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
992static scalar_stmts_set_t *bst_fail;
26d66f28
RB
993
994static slp_tree
995vect_build_slp_tree_2 (vec_info *vinfo,
996 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 997 poly_uint64 *max_nunits,
26d66f28
RB
998 vec<slp_tree> *loads,
999 bool *matches, unsigned *npermutes, unsigned *tree_size,
1000 unsigned max_tree_size);
6983e6b5 1001
e403d17e 1002static slp_tree
310213d4 1003vect_build_slp_tree (vec_info *vinfo,
4b6068ea
RS
1004 vec<gimple *> stmts, unsigned int group_size,
1005 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1006 bool *matches, unsigned *npermutes, unsigned *tree_size,
1007 unsigned max_tree_size)
26d66f28
RB
1008{
1009 if (bst_fail->contains (stmts))
1010 return NULL;
1011 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1012 loads, matches, npermutes, tree_size,
1013 max_tree_size);
1014 /* When SLP build fails for stmts record this, otherwise SLP build
1015 can be exponential in time when we allow to construct parts from
1016 scalars, see PR81723. */
1017 if (! res)
1018 {
1019 vec <gimple *> x;
1020 x.create (stmts.length ());
1021 x.splice (stmts);
1022 bst_fail->add (x);
1023 }
1024 return res;
1025}
1026
1027/* Recursively build an SLP tree starting from NODE.
1028 Fail (and return a value not equal to zero) if def-stmts are not
1029 isomorphic, require data permutation or are of unsupported types of
1030 operation. Otherwise, return 0.
1031 The value returned is the depth in the SLP tree where a mismatch
1032 was found. */
1033
1034static slp_tree
1035vect_build_slp_tree_2 (vec_info *vinfo,
1036 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1037 poly_uint64 *max_nunits,
26d66f28
RB
1038 vec<slp_tree> *loads,
1039 bool *matches, unsigned *npermutes, unsigned *tree_size,
1040 unsigned max_tree_size)
6983e6b5 1041{
4b6068ea
RS
1042 unsigned nops, i, this_tree_size = 0;
1043 poly_uint64 this_max_nunits = *max_nunits;
355fe088 1044 gimple *stmt;
e403d17e 1045 slp_tree node;
6983e6b5 1046
6983e6b5
RB
1047 matches[0] = false;
1048
e403d17e 1049 stmt = stmts[0];
6983e6b5
RB
1050 if (is_gimple_call (stmt))
1051 nops = gimple_call_num_args (stmt);
1052 else if (is_gimple_assign (stmt))
1053 {
1054 nops = gimple_num_ops (stmt) - 1;
1055 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1056 nops++;
ebfd146a 1057 }
e7baeb39
RB
1058 else if (gimple_code (stmt) == GIMPLE_PHI)
1059 nops = 0;
6983e6b5 1060 else
e403d17e 1061 return NULL;
6983e6b5 1062
c78e3652
RB
1063 /* If the SLP node is a PHI (induction or reduction), terminate
1064 the recursion. */
e7baeb39
RB
1065 if (gimple_code (stmt) == GIMPLE_PHI)
1066 {
b161f2c9
RS
1067 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1068 tree vectype = get_vectype_for_scalar_type (scalar_type);
1069 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1070 max_nunits))
1071 return NULL;
1072
719488f8 1073 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 1074 /* Induction from different IVs is not supported. */
719488f8
RB
1075 if (def_type == vect_induction_def)
1076 {
1077 FOR_EACH_VEC_ELT (stmts, i, stmt)
1078 if (stmt != stmts[0])
1079 return NULL;
1080 }
1081 else
1082 {
1083 /* Else def types have to match. */
1084 FOR_EACH_VEC_ELT (stmts, i, stmt)
1085 {
1086 /* But for reduction chains only check on the first stmt. */
1087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1088 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1089 continue;
1090 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1091 return NULL;
1092 }
1093 }
e7baeb39
RB
1094 node = vect_create_new_slp_node (stmts);
1095 return node;
1096 }
1097
1098
6876e5bc 1099 bool two_operators = false;
4cecd659
BC
1100 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1101 if (!vect_build_slp_tree_1 (vinfo, swap,
e403d17e
RB
1102 stmts, group_size, nops,
1103 &this_max_nunits, matches, &two_operators))
1104 return NULL;
ebfd146a 1105
6983e6b5
RB
1106 /* If the SLP node is a load, terminate the recursion. */
1107 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1108 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 1109 {
e403d17e
RB
1110 *max_nunits = this_max_nunits;
1111 node = vect_create_new_slp_node (stmts);
1112 loads->safe_push (node);
1113 return node;
ebfd146a
IR
1114 }
1115
6983e6b5
RB
1116 /* Get at the operands, verifying they are compatible. */
1117 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1118 slp_oprnd_info oprnd_info;
e403d17e 1119 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 1120 {
4cecd659
BC
1121 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1122 stmt, i, &oprnds_info);
1123 if (res != 0)
1124 matches[(res == -1) ? 0 : i] = false;
1125 if (!matches[0])
1126 break;
6983e6b5 1127 }
b0b4483e
RB
1128 for (i = 0; i < group_size; ++i)
1129 if (!matches[i])
1130 {
1131 vect_free_oprnd_info (oprnds_info);
e403d17e 1132 return NULL;
b0b4483e 1133 }
6983e6b5 1134
e403d17e
RB
1135 auto_vec<slp_tree, 4> children;
1136 auto_vec<slp_tree> this_loads;
1137
1138 stmt = stmts[0];
6983e6b5 1139
26d66f28
RB
1140 if (tree_size)
1141 max_tree_size -= *tree_size;
1142
b8698a0f 1143 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1144 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1145 {
d092494c 1146 slp_tree child;
e403d17e
RB
1147 unsigned old_nloads = this_loads.length ();
1148 unsigned old_tree_size = this_tree_size;
1149 unsigned int j;
b8698a0f 1150
e7baeb39 1151 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1152 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1153 && oprnd_info->first_dt != vect_induction_def)
d092494c 1154 continue;
ebfd146a 1155
1428105c
RB
1156 if (++this_tree_size > max_tree_size)
1157 {
e403d17e
RB
1158 FOR_EACH_VEC_ELT (children, j, child)
1159 vect_free_slp_tree (child);
1428105c 1160 vect_free_oprnd_info (oprnds_info);
e403d17e 1161 return NULL;
1428105c
RB
1162 }
1163
e403d17e
RB
1164 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1165 group_size, &this_max_nunits,
1166 &this_loads, matches, npermutes,
1167 &this_tree_size,
1168 max_tree_size)) != NULL)
6983e6b5 1169 {
3fc356dc
RB
1170 /* If we have all children of child built up from scalars then just
1171 throw that away and build it up this node from scalars. */
995b6fe0
RB
1172 if (!SLP_TREE_CHILDREN (child).is_empty ()
1173 /* ??? Rejecting patterns this way doesn't work. We'd have to
1174 do extra work to cancel the pattern so the uses see the
1175 scalar version. */
1176 && !is_pattern_stmt_p
1177 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1178 {
3fc356dc
RB
1179 slp_tree grandchild;
1180
1181 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1182 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1183 break;
1184 if (!grandchild)
1185 {
1186 /* Roll back. */
e403d17e
RB
1187 this_loads.truncate (old_nloads);
1188 this_tree_size = old_tree_size;
3fc356dc 1189 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1190 vect_free_slp_tree (grandchild);
3fc356dc
RB
1191 SLP_TREE_CHILDREN (child).truncate (0);
1192
1193 dump_printf_loc (MSG_NOTE, vect_location,
1194 "Building parent vector operands from "
1195 "scalars instead\n");
1196 oprnd_info->def_stmts = vNULL;
603cca93 1197 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1198 children.safe_push (child);
3fc356dc
RB
1199 continue;
1200 }
1201 }
1202
6983e6b5 1203 oprnd_info->def_stmts = vNULL;
e403d17e 1204 children.safe_push (child);
6983e6b5
RB
1205 continue;
1206 }
1207
90dd6e3d
RB
1208 /* If the SLP build failed fatally and we analyze a basic-block
1209 simply treat nodes we fail to build as externally defined
1210 (and thus build vectors from the scalar defs).
1211 The cost model will reject outright expensive cases.
1212 ??? This doesn't treat cases where permutation ultimatively
1213 fails (or we don't try permutation below). Ideally we'd
1214 even compute a permutation that will end up with the maximum
1215 SLP tree size... */
310213d4 1216 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1217 && !matches[0]
1218 /* ??? Rejecting patterns this way doesn't work. We'd have to
1219 do extra work to cancel the pattern so the uses see the
1220 scalar version. */
1221 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1222 {
1223 dump_printf_loc (MSG_NOTE, vect_location,
1224 "Building vector operands from scalars\n");
e403d17e 1225 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1226 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1227 children.safe_push (child);
1228 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1229 continue;
1230 }
1231
6983e6b5
RB
1232 /* If the SLP build for operand zero failed and operand zero
1233 and one can be commutated try that for the scalar stmts
1234 that failed the match. */
1235 if (i == 0
1236 /* A first scalar stmt mismatch signals a fatal mismatch. */
1237 && matches[0]
1238 /* ??? For COND_EXPRs we can swap the comparison operands
1239 as well as the arms under some constraints. */
1240 && nops == 2
1241 && oprnds_info[1]->first_dt == vect_internal_def
1242 && is_gimple_assign (stmt)
1243 && commutative_tree_code (gimple_assign_rhs_code (stmt))
e403d17e 1244 && ! two_operators
6983e6b5
RB
1245 /* Do so only if the number of not successful permutes was nor more
1246 than a cut-ff as re-trying the recursive match on
1247 possibly each level of the tree would expose exponential
1248 behavior. */
1249 && *npermutes < 4)
1250 {
78810bd3
RB
1251 /* Verify if we can safely swap or if we committed to a specific
1252 operand order already. */
1253 for (j = 0; j < group_size; ++j)
1254 if (!matches[j]
4cecd659
BC
1255 && (swap[j] != 0
1256 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
78810bd3
RB
1257 {
1258 if (dump_enabled_p ())
1259 {
1260 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1261 "Build SLP failed: cannot swap operands "
1262 "of shared stmt ");
1263 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1264 stmts[j], 0);
1265 }
1266 goto fail;
1267 }
1268
6983e6b5 1269 /* Swap mismatched definition stmts. */
b0b4483e
RB
1270 dump_printf_loc (MSG_NOTE, vect_location,
1271 "Re-trying with swapped operands of stmts ");
e72baed7 1272 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1273 if (!matches[j])
1274 {
6b4db501
MM
1275 std::swap (oprnds_info[0]->def_stmts[j],
1276 oprnds_info[1]->def_stmts[j]);
b0b4483e 1277 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1278 }
b0b4483e 1279 dump_printf (MSG_NOTE, "\n");
74574669
RB
1280 /* And try again with scratch 'matches' ... */
1281 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1282 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1283 group_size, &this_max_nunits,
1284 &this_loads, tem, npermutes,
1285 &this_tree_size,
1286 max_tree_size)) != NULL)
6983e6b5 1287 {
60f2b864
RB
1288 /* ... so if successful we can apply the operand swapping
1289 to the GIMPLE IL. This is necessary because for example
1290 vect_get_slp_defs uses operand indexes and thus expects
1291 canonical operand order. This is also necessary even
1292 if we end up building the operand from scalars as
1293 we'll continue to process swapped operand two. */
1294 for (j = 0; j < group_size; ++j)
f47cda24 1295 {
e403d17e 1296 gimple *stmt = stmts[j];
f47cda24
RB
1297 gimple_set_plf (stmt, GF_PLF_1, false);
1298 }
1299 for (j = 0; j < group_size; ++j)
1300 {
e403d17e 1301 gimple *stmt = stmts[j];
f47cda24
RB
1302 if (!matches[j])
1303 {
1304 /* Avoid swapping operands twice. */
1305 if (gimple_plf (stmt, GF_PLF_1))
1306 continue;
1307 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1308 gimple_assign_rhs2_ptr (stmt));
1309 gimple_set_plf (stmt, GF_PLF_1, true);
1310 }
1311 }
1312 /* Verify we swap all duplicates or none. */
1313 if (flag_checking)
1314 for (j = 0; j < group_size; ++j)
60f2b864 1315 {
e403d17e 1316 gimple *stmt = stmts[j];
f47cda24 1317 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
60f2b864
RB
1318 }
1319
85c69b0b
RB
1320 /* If we have all children of child built up from scalars then
1321 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1322 if (!SLP_TREE_CHILDREN (child).is_empty ()
1323 /* ??? Rejecting patterns this way doesn't work. We'd have
1324 to do extra work to cancel the pattern so the uses see the
1325 scalar version. */
1326 && !is_pattern_stmt_p
1327 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1328 {
1329 unsigned int j;
1330 slp_tree grandchild;
1331
1332 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1333 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1334 break;
1335 if (!grandchild)
1336 {
1337 /* Roll back. */
e403d17e
RB
1338 this_loads.truncate (old_nloads);
1339 this_tree_size = old_tree_size;
85c69b0b
RB
1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1341 vect_free_slp_tree (grandchild);
1342 SLP_TREE_CHILDREN (child).truncate (0);
1343
1344 dump_printf_loc (MSG_NOTE, vect_location,
1345 "Building parent vector operands from "
1346 "scalars instead\n");
1347 oprnd_info->def_stmts = vNULL;
603cca93 1348 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1349 children.safe_push (child);
85c69b0b
RB
1350 continue;
1351 }
1352 }
1353
6983e6b5 1354 oprnd_info->def_stmts = vNULL;
e403d17e 1355 children.safe_push (child);
6983e6b5
RB
1356 continue;
1357 }
1358
1359 ++*npermutes;
1360 }
1361
78810bd3 1362fail:
e403d17e
RB
1363 gcc_assert (child == NULL);
1364 FOR_EACH_VEC_ELT (children, j, child)
1365 vect_free_slp_tree (child);
6983e6b5 1366 vect_free_oprnd_info (oprnds_info);
e403d17e 1367 return NULL;
ebfd146a
IR
1368 }
1369
e403d17e
RB
1370 vect_free_oprnd_info (oprnds_info);
1371
1428105c
RB
1372 if (tree_size)
1373 *tree_size += this_tree_size;
e403d17e
RB
1374 *max_nunits = this_max_nunits;
1375 loads->safe_splice (this_loads);
1428105c 1376
e403d17e
RB
1377 node = vect_create_new_slp_node (stmts);
1378 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1379 SLP_TREE_CHILDREN (node).splice (children);
1380 return node;
ebfd146a
IR
1381}
1382
78c60e3d 1383/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1384
1385static void
1a817418 1386vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1387{
1388 int i;
355fe088 1389 gimple *stmt;
d755c7ef 1390 slp_tree child;
ebfd146a 1391
603cca93
RB
1392 dump_printf_loc (dump_kind, loc, "node%s\n",
1393 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1394 ? " (external)" : "");
9771b263 1395 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1396 {
c2a12ca0 1397 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1398 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1399 }
9771b263 1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1401 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1402}
1403
1404
b8698a0f
L
1405/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1406 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1407 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1408 stmts in NODE are to be marked. */
1409
1410static void
1411vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1412{
1413 int i;
355fe088 1414 gimple *stmt;
d755c7ef 1415 slp_tree child;
ebfd146a 1416
603cca93 1417 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1418 return;
1419
9771b263 1420 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1421 if (j < 0 || i == j)
1422 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1423
9771b263 1424 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1425 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1426}
1427
1428
a70d6342
IR
1429/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1430
1431static void
1432vect_mark_slp_stmts_relevant (slp_tree node)
1433{
1434 int i;
355fe088 1435 gimple *stmt;
a70d6342 1436 stmt_vec_info stmt_info;
d755c7ef 1437 slp_tree child;
a70d6342 1438
603cca93 1439 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1440 return;
1441
9771b263 1442 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1443 {
1444 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1445 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1446 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1447 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1448 }
1449
9771b263 1450 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1451 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1452}
1453
1454
b5aeb3bb
IR
1455/* Rearrange the statements of NODE according to PERMUTATION. */
1456
1457static void
1458vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1459 vec<unsigned> permutation)
b5aeb3bb 1460{
355fe088
TS
1461 gimple *stmt;
1462 vec<gimple *> tmp_stmts;
d755c7ef
RB
1463 unsigned int i;
1464 slp_tree child;
b5aeb3bb 1465
9771b263 1466 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1467 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1468
9771b263
DN
1469 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1470 tmp_stmts.create (group_size);
d755c7ef 1471 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1472
9771b263 1473 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1474 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1475
9771b263 1476 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1477 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1478}
1479
1480
b266b968
RB
1481/* Attempt to reorder stmts in a reduction chain so that we don't
1482 require any load permutation. Return true if that was possible,
1483 otherwise return false. */
1484
1485static bool
1486vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1487{
1488 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1489 unsigned int i, j;
b266b968
RB
1490 unsigned int lidx;
1491 slp_tree node, load;
1492
1493 /* Compare all the permutation sequences to the first one. We know
1494 that at least one load is permuted. */
1495 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1496 if (!node->load_permutation.exists ())
1497 return false;
1498 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1499 {
1500 if (!load->load_permutation.exists ())
1501 return false;
1502 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1503 if (lidx != node->load_permutation[j])
1504 return false;
1505 }
1506
1507 /* Check that the loads in the first sequence are different and there
1508 are no gaps between them. */
7ba9e72d 1509 auto_sbitmap load_index (group_size);
b266b968
RB
1510 bitmap_clear (load_index);
1511 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1512 {
41eefe13 1513 if (lidx >= group_size)
7ba9e72d 1514 return false;
b266b968 1515 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1516 return false;
1517
b266b968
RB
1518 bitmap_set_bit (load_index, lidx);
1519 }
1520 for (i = 0; i < group_size; i++)
1521 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1522 return false;
b266b968
RB
1523
1524 /* This permutation is valid for reduction. Since the order of the
1525 statements in the nodes is not important unless they are memory
1526 accesses, we can rearrange the statements in all the nodes
1527 according to the order of the loads. */
1528 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1529 node->load_permutation);
1530
1531 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1532 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1533 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1534 {
1535 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1536 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1537 /* But we have to keep those permutations that are required because
1538 of handling of gaps. */
d9f21f6a 1539 if (known_eq (unrolling_factor, 1U)
c4e360f4
RB
1540 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1541 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1542 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1543 else
1544 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1545 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1546 }
1547
b266b968
RB
1548 return true;
1549}
1550
01d8bf07
RB
1551/* Check if the required load permutations in the SLP instance
1552 SLP_INSTN are supported. */
ebfd146a
IR
1553
1554static bool
01d8bf07 1555vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1556{
01d8bf07
RB
1557 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1558 unsigned int i, j, k, next;
6983e6b5 1559 slp_tree node;
a5b50aa1 1560 gimple *stmt, *load, *next_load;
ebfd146a 1561
73fbfcad 1562 if (dump_enabled_p ())
ebfd146a 1563 {
78c60e3d 1564 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1565 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1566 if (node->load_permutation.exists ())
1567 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1568 dump_printf (MSG_NOTE, "%d ", next);
1569 else
bddc974e
TJ
1570 for (k = 0; k < group_size; ++k)
1571 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1572 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1573 }
1574
b5aeb3bb
IR
1575 /* In case of reduction every load permutation is allowed, since the order
1576 of the reduction statements is not important (as opposed to the case of
0d0293ac 1577 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1578 load nodes are of the same size and have the same permutation (and then
1579 rearrange all the nodes of the SLP instance according to this
1580 permutation). */
1581
1582 /* Check that all the load nodes are of the same size. */
01d8bf07 1583 /* ??? Can't we assert this? */
9771b263 1584 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1585 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1586 return false;
2200fc49 1587
b5aeb3bb 1588 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1589 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1590
b010117a 1591 /* Reduction (there are no data-refs in the root).
b266b968 1592 In reduction chain the order of the loads is not important. */
b010117a
IR
1593 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1594 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1595 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1596
6aa904c4
IR
1597 /* In basic block vectorization we allow any subchain of an interleaving
1598 chain.
1599 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1600 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1601 {
240a94da
RB
1602 /* Check whether the loads in an instance form a subchain and thus
1603 no permutation is necessary. */
9771b263 1604 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1605 {
9626d143
RB
1606 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1607 continue;
240a94da 1608 bool subchain_p = true;
6aa904c4 1609 next_load = NULL;
9771b263 1610 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1611 {
5b5826c4
RB
1612 if (j != 0
1613 && (next_load != load
1614 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1615 {
1616 subchain_p = false;
1617 break;
1618 }
6aa904c4
IR
1619 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1620 }
240a94da
RB
1621 if (subchain_p)
1622 SLP_TREE_LOAD_PERMUTATION (node).release ();
1623 else
1624 {
fe73a332
RB
1625 stmt_vec_info group_info
1626 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1627 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1628 unsigned nunits
1629 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1630 unsigned k, maxk = 0;
1631 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1632 if (k > maxk)
1633 maxk = k;
1634 /* In BB vectorization we may not actually use a loaded vector
1635 accessing elements in excess of GROUP_SIZE. */
1636 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1637 {
1638 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1639 "BB vectorization with gaps at the end of "
1640 "a load is not supported\n");
1641 return false;
1642 }
1643
240a94da
RB
1644 /* Verify the permutation can be generated. */
1645 vec<tree> tem;
29afecdf 1646 unsigned n_perms;
240a94da 1647 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1648 1, slp_instn, true, &n_perms))
240a94da
RB
1649 {
1650 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1651 vect_location,
1652 "unsupported load permutation\n");
1653 return false;
1654 }
1655 }
6aa904c4 1656 }
01d8bf07 1657 return true;
6aa904c4
IR
1658 }
1659
31bee964
RB
1660 /* For loop vectorization verify we can generate the permutation. Be
1661 conservative about the vectorization factor, there are permutations
1662 that will use three vector inputs only starting from a specific factor
1663 and the vectorization factor is not yet final.
1664 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1665 unsigned n_perms;
d9f21f6a
RS
1666 poly_uint64 test_vf
1667 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1668 LOOP_VINFO_VECT_FACTOR
d9f21f6a 1669 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
01d8bf07
RB
1670 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1671 if (node->load_permutation.exists ()
31bee964
RB
1672 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1673 slp_instn, true, &n_perms))
01d8bf07 1674 return false;
9b999e8c 1675
01d8bf07 1676 return true;
ebfd146a
IR
1677}
1678
1679
e4a707c4 1680/* Find the last store in SLP INSTANCE. */
ff802fa1 1681
64900538 1682gimple *
2e8ab70c 1683vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1684{
355fe088 1685 gimple *last = NULL, *stmt;
e4a707c4 1686
2e8ab70c
RB
1687 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1688 {
1689 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1690 if (is_pattern_stmt_p (stmt_vinfo))
1691 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1692 else
1693 last = get_later_stmt (stmt, last);
1694 }
e4a707c4 1695
2e8ab70c 1696 return last;
e4a707c4
IR
1697}
1698
23847df4
RB
1699/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1700
1701static void
1a4b99c1 1702vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1703 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1704 stmt_vector_for_cost *body_cost_vec,
f7300fff
RB
1705 unsigned ncopies_for_cost,
1706 scalar_stmts_set_t* visited)
23847df4 1707{
603cca93 1708 unsigned i, j;
23847df4 1709 slp_tree child;
8155f4d8 1710 gimple *stmt;
23847df4
RB
1711 stmt_vec_info stmt_info;
1712 tree lhs;
23847df4 1713
f7300fff
RB
1714 /* If we already costed the exact same set of scalar stmts we're done.
1715 We share the generated vector stmts for those. */
1716 if (visited->contains (SLP_TREE_SCALAR_STMTS (node)))
1717 return;
1718
1719 visited->add (SLP_TREE_SCALAR_STMTS (node).copy ());
1720
23847df4
RB
1721 /* Recurse down the SLP tree. */
1722 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 1723 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1a4b99c1 1724 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
f7300fff 1725 body_cost_vec, ncopies_for_cost, visited);
23847df4
RB
1726
1727 /* Look at the first scalar stmt to determine the cost. */
1728 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1729 stmt_info = vinfo_for_stmt (stmt);
1730 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1731 {
2de001ee
RS
1732 vect_memory_access_type memory_access_type
1733 = (STMT_VINFO_STRIDED_P (stmt_info)
1734 ? VMAT_STRIDED_SLP
1735 : VMAT_CONTIGUOUS);
23847df4 1736 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
2de001ee
RS
1737 vect_model_store_cost (stmt_info, ncopies_for_cost,
1738 memory_access_type, vect_uninitialized_def,
23847df4
RB
1739 node, prologue_cost_vec, body_cost_vec);
1740 else
1741 {
23847df4 1742 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
52eab378
RB
1743 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1744 {
8155f4d8
RB
1745 /* If the load is permuted then the alignment is determined by
1746 the first group element not by the first scalar stmt DR. */
52eab378
RB
1747 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1748 stmt_info = vinfo_for_stmt (stmt);
8155f4d8 1749 /* Record the cost for the permutation. */
29afecdf
RB
1750 unsigned n_perms;
1751 vect_transform_slp_perm_load (node, vNULL, NULL,
1752 ncopies_for_cost, instance, true,
1753 &n_perms);
1754 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
8155f4d8 1755 stmt_info, 0, vect_body);
c5126ce8
RS
1756 unsigned assumed_nunits
1757 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info));
29afecdf
RB
1758 /* And adjust the number of loads performed. This handles
1759 redundancies as well as loads that are later dead. */
1760 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1761 bitmap_clear (perm);
1762 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1763 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1764 ncopies_for_cost = 0;
1765 bool load_seen = false;
1766 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1767 {
c5126ce8 1768 if (i % assumed_nunits == 0)
29afecdf
RB
1769 {
1770 if (load_seen)
1771 ncopies_for_cost++;
1772 load_seen = false;
1773 }
1774 if (bitmap_bit_p (perm, i))
1775 load_seen = true;
1776 }
1777 if (load_seen)
1778 ncopies_for_cost++;
1779 gcc_assert (ncopies_for_cost
1780 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
c5126ce8 1781 + assumed_nunits - 1) / assumed_nunits);
d9f21f6a
RS
1782 poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1783 ncopies_for_cost *= estimated_poly_value (uf);
52eab378 1784 }
8155f4d8 1785 /* Record the cost for the vector loads. */
2de001ee
RS
1786 vect_model_load_cost (stmt_info, ncopies_for_cost,
1787 memory_access_type, node, prologue_cost_vec,
1788 body_cost_vec);
89483f99 1789 return;
23847df4
RB
1790 }
1791 }
e7baeb39
RB
1792 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1793 {
1794 /* ncopies_for_cost is the number of IVs we generate. */
1795 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1796 stmt_info, 0, vect_body);
1797
1798 /* Prologue cost for the initial values and step vector. */
1799 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1800 CONSTANT_CLASS_P
1801 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1802 (stmt_info))
1803 ? vector_load : vec_construct,
1804 stmt_info, 0, vect_prologue);
1805 record_stmt_cost (prologue_cost_vec, 1,
1806 CONSTANT_CLASS_P
1807 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1808 ? vector_load : vec_construct,
1809 stmt_info, 0, vect_prologue);
1810
1811 /* ??? No easy way to get at the actual number of vector stmts
1812 to be geneated and thus the derived IVs. */
1813 }
89483f99 1814 else
6876e5bc
RB
1815 {
1816 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1817 stmt_info, 0, vect_body);
89483f99
RB
1818 if (SLP_TREE_TWO_OPERATORS (node))
1819 {
1820 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1821 stmt_info, 0, vect_body);
1822 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1823 stmt_info, 0, vect_body);
1824 }
6876e5bc 1825 }
23847df4 1826
603cca93
RB
1827 /* Push SLP node def-type to stmts. */
1828 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1829 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1830 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1831 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1832
23847df4
RB
1833 /* Scan operands and account for prologue cost of constants/externals.
1834 ??? This over-estimates cost for multiple uses and should be
1835 re-engineered. */
603cca93 1836 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
23847df4
RB
1837 lhs = gimple_get_lhs (stmt);
1838 for (i = 0; i < gimple_num_ops (stmt); ++i)
1839 {
81c40241 1840 tree op = gimple_op (stmt, i);
355fe088 1841 gimple *def_stmt;
23847df4
RB
1842 enum vect_def_type dt;
1843 if (!op || op == lhs)
1844 continue;
81c40241 1845 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1846 {
1847 /* Without looking at the actual initializer a vector of
1848 constants can be implemented as load from the constant pool.
1849 ??? We need to pass down stmt_info for a vector type
1850 even if it points to the wrong stmt. */
1851 if (dt == vect_constant_def)
1852 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1853 stmt_info, 0, vect_prologue);
1854 else if (dt == vect_external_def)
1855 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1856 stmt_info, 0, vect_prologue);
1857 }
23847df4 1858 }
603cca93
RB
1859
1860 /* Restore stmt def-types. */
1861 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1862 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1864 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
23847df4
RB
1865}
1866
1867/* Compute the cost for the SLP instance INSTANCE. */
1868
1869static void
1a4b99c1 1870vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1871{
1872 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1873 unsigned ncopies_for_cost;
1874 stmt_info_for_cost *si;
1875 unsigned i;
1876
b939ea86
RB
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE, vect_location,
1879 "=== vect_analyze_slp_cost ===\n");
1880
23847df4
RB
1881 /* Calculate the number of vector stmts to create based on the unrolling
1882 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1883 GROUP_SIZE / NUNITS otherwise. */
1884 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1885 slp_tree node = SLP_INSTANCE_TREE (instance);
1886 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
d9f21f6a
RS
1887 /* Get the estimated vectorization factor, which is always one for
1888 basic-block vectorization. */
1889 unsigned int assumed_vf;
1a4b99c1 1890 if (STMT_VINFO_LOOP_VINFO (stmt_info))
d9f21f6a
RS
1891 assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info));
1892 else
1893 assumed_vf = 1;
1a4b99c1
RB
1894 /* For reductions look at a reduction operand in case the reduction
1895 operation is widening like DOT_PROD or SAD. */
c5126ce8 1896 tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info);
1a4b99c1
RB
1897 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1898 {
355fe088 1899 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1900 switch (gimple_assign_rhs_code (stmt))
1901 {
1902 case DOT_PROD_EXPR:
1903 case SAD_EXPR:
c5126ce8
RS
1904 vectype_for_cost = get_vectype_for_scalar_type
1905 (TREE_TYPE (gimple_assign_rhs1 (stmt)));
1a4b99c1
RB
1906 break;
1907 default:;
1908 }
1909 }
c5126ce8
RS
1910 unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
1911 ncopies_for_cost = (least_common_multiple (assumed_nunits,
1912 group_size * assumed_vf)
1913 / assumed_nunits);
23847df4
RB
1914
1915 prologue_cost_vec.create (10);
1916 body_cost_vec.create (10);
f7300fff 1917 scalar_stmts_set_t *visited = new scalar_stmts_set_t ();
1a4b99c1
RB
1918 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1919 &prologue_cost_vec, &body_cost_vec,
f7300fff
RB
1920 ncopies_for_cost, visited);
1921 delete visited;
23847df4
RB
1922
1923 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1924 sure that SLP was successful. */
23847df4
RB
1925 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1926 {
1927 struct _stmt_vec_info *stmt_info
1928 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1929 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1930 si->misalign, vect_prologue);
1931 }
1932
1a4b99c1
RB
1933 /* Record the instance's instructions in the target cost model. */
1934 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1935 {
1936 struct _stmt_vec_info *stmt_info
1937 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1938 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1939 si->misalign, vect_body);
1940 }
1941
23847df4 1942 prologue_cost_vec.release ();
1a4b99c1 1943 body_cost_vec.release ();
23847df4 1944}
e4a707c4 1945
1ba91a49
AL
1946/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1947 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1948 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1949 containing the remainder.
1950 Return the first stmt in the second group. */
1951
1952static gimple *
1953vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1954{
1955 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1956 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1957 gcc_assert (group1_size > 0);
1958 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1959 gcc_assert (group2_size > 0);
1960 GROUP_SIZE (first_vinfo) = group1_size;
1961
1962 gimple *stmt = first_stmt;
1963 for (unsigned i = group1_size; i > 1; i--)
1964 {
1965 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1966 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1967 }
1968 /* STMT is now the last element of the first group. */
1969 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1970 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1971
1972 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1973 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1974 {
1975 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1976 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1977 }
1978
1979 /* For the second group, the GROUP_GAP is that before the original group,
1980 plus skipping over the first vector. */
1981 GROUP_GAP (vinfo_for_stmt (group2)) =
1982 GROUP_GAP (first_vinfo) + group1_size;
1983
1984 /* GROUP_GAP of the first group now has to skip over the second group too. */
1985 GROUP_GAP (first_vinfo) += group2_size;
1986
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1989 group1_size, group2_size);
1990
1991 return group2;
1992}
1993
4b6068ea
RS
1994/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1995 statements and a vector of NUNITS elements. */
1996
1997static poly_uint64
1998calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1999{
2000 return exact_div (common_multiple (nunits, group_size), group_size);
2001}
2002
0d0293ac 2003/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 2004 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
2005 Return FALSE if it's impossible to SLP any stmt in the loop. */
2006
2007static bool
310213d4 2008vect_analyze_slp_instance (vec_info *vinfo,
355fe088 2009 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
2010{
2011 slp_instance new_instance;
d092494c 2012 slp_tree node;
e14c1050 2013 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb 2014 tree vectype, scalar_type = NULL_TREE;
355fe088 2015 gimple *next;
1ba91a49 2016 unsigned int i;
9771b263 2017 vec<slp_tree> loads;
b5aeb3bb 2018 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 2019 vec<gimple *> scalar_stmts;
b5aeb3bb 2020
b010117a 2021 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 2022 {
b010117a
IR
2023 if (dr)
2024 {
2025 scalar_type = TREE_TYPE (DR_REF (dr));
2026 vectype = get_vectype_for_scalar_type (scalar_type);
2027 }
2028 else
2029 {
310213d4 2030 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
2031 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2032 }
2033
e14c1050 2034 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
2035 }
2036 else
2037 {
310213d4 2038 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 2039 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 2040 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 2041 }
b8698a0f 2042
ebfd146a
IR
2043 if (!vectype)
2044 {
73fbfcad 2045 if (dump_enabled_p ())
ebfd146a 2046 {
78c60e3d
SS
2047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2048 "Build SLP failed: unsupported data-type ");
2049 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 2050 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 2051 }
b5aeb3bb 2052
ebfd146a
IR
2053 return false;
2054 }
4b6068ea 2055 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 2056
0d0293ac 2057 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 2058 scalar_stmts.create (group_size);
ebfd146a 2059 next = stmt;
b010117a 2060 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 2061 {
b5aeb3bb
IR
2062 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2063 while (next)
2064 {
f7e531cf
IR
2065 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
2066 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
2067 scalar_stmts.safe_push (
2068 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 2069 else
9771b263 2070 scalar_stmts.safe_push (next);
e14c1050 2071 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 2072 }
14a61437
RB
2073 /* Mark the first element of the reduction chain as reduction to properly
2074 transform the node. In the reduction analysis phase only the last
2075 element of the chain is marked as reduction. */
2076 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2077 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
2078 }
2079 else
2080 {
2081 /* Collect reduction statements. */
310213d4 2082 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
2083 for (i = 0; reductions.iterate (i, &next); i++)
2084 scalar_stmts.safe_push (next);
ebfd146a
IR
2085 }
2086
9771b263 2087 loads.create (group_size);
ebfd146a
IR
2088
2089 /* Build the tree for the SLP instance. */
89d390e5
RB
2090 bool *matches = XALLOCAVEC (bool, group_size);
2091 unsigned npermutes = 0;
f7300fff 2092 bst_fail = new scalar_stmts_set_t ();
4b6068ea 2093 poly_uint64 max_nunits = nunits;
e569db5f 2094 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 2095 &max_nunits, &loads, matches, &npermutes,
e569db5f 2096 NULL, max_tree_size);
26d66f28 2097 delete bst_fail;
e569db5f 2098 if (node != NULL)
ebfd146a 2099 {
4ef69dfc 2100 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2101 poly_uint64 unrolling_factor
4b6068ea 2102 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2103
d9f21f6a 2104 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2105 && is_a <bb_vec_info> (vinfo))
2106 {
4b6068ea
RS
2107 unsigned HOST_WIDE_INT const_max_nunits;
2108 if (!max_nunits.is_constant (&const_max_nunits)
2109 || const_max_nunits > group_size)
2110 {
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2113 "Build SLP failed: store group "
2114 "size not a multiple of the vector size "
2115 "in basic block SLP\n");
2116 vect_free_slp_tree (node);
2117 loads.release ();
2118 return false;
2119 }
e569db5f 2120 /* Fatal mismatch. */
4b6068ea 2121 matches[group_size / const_max_nunits * const_max_nunits] = false;
e569db5f
VK
2122 vect_free_slp_tree (node);
2123 loads.release ();
2124 }
2125 else
2126 {
4ef69dfc
IR
2127 /* Create a new SLP instance. */
2128 new_instance = XNEW (struct _slp_instance);
2129 SLP_INSTANCE_TREE (new_instance) = node;
2130 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2131 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2132 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2133
2134 /* Compute the load permutation. */
2135 slp_tree load_node;
2136 bool loads_permuted = false;
abf9bfbc
RB
2137 FOR_EACH_VEC_ELT (loads, i, load_node)
2138 {
01d8bf07 2139 vec<unsigned> load_permutation;
abf9bfbc 2140 int j;
355fe088 2141 gimple *load, *first_stmt;
01d8bf07
RB
2142 bool this_load_permuted = false;
2143 load_permutation.create (group_size);
6983e6b5
RB
2144 first_stmt = GROUP_FIRST_ELEMENT
2145 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
2146 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2147 {
e569db5f
VK
2148 int load_place = vect_get_place_in_interleaving_chain
2149 (load, first_stmt);
6983e6b5
RB
2150 gcc_assert (load_place != -1);
2151 if (load_place != j)
01d8bf07 2152 this_load_permuted = true;
abf9bfbc
RB
2153 load_permutation.safe_push (load_place);
2154 }
fe2bef71
RB
2155 if (!this_load_permuted
2156 /* The load requires permutation when unrolling exposes
2157 a gap either because the group is larger than the SLP
2158 group-size or because there is a gap between the groups. */
d9f21f6a 2159 && (known_eq (unrolling_factor, 1U)
fe2bef71
RB
2160 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2161 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2162 {
2163 load_permutation.release ();
2164 continue;
2165 }
2166 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2167 loads_permuted = true;
abf9bfbc 2168 }
6aa904c4
IR
2169
2170 if (loads_permuted)
ebfd146a 2171 {
01d8bf07 2172 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2173 {
73fbfcad 2174 if (dump_enabled_p ())
ebfd146a 2175 {
e645e942 2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2177 "Build SLP failed: unsupported load "
2178 "permutation ");
e569db5f
VK
2179 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2180 TDF_SLIM, stmt, 0);
ebfd146a 2181 }
ebfd146a
IR
2182 vect_free_slp_instance (new_instance);
2183 return false;
2184 }
ebfd146a 2185 }
ebfd146a 2186
e569db5f 2187 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2188 instructions do not generate this SLP instance. */
2189 if (is_a <loop_vec_info> (vinfo)
2190 && loads_permuted
2191 && dr && vect_store_lanes_supported (vectype, group_size))
2192 {
2193 slp_tree load_node;
2194 FOR_EACH_VEC_ELT (loads, i, load_node)
2195 {
2196 gimple *first_stmt = GROUP_FIRST_ELEMENT
2197 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2198 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2199 /* Use SLP for strided accesses (or if we
2200 can't load-lanes). */
bb0f5ca7
AL
2201 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2202 || ! vect_load_lanes_supported
2203 (STMT_VINFO_VECTYPE (stmt_vinfo),
2204 GROUP_SIZE (stmt_vinfo)))
2205 break;
2206 }
2207 if (i == loads.length ())
2208 {
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2211 "Built SLP cancelled: can use "
2212 "load/store-lanes\n");
2213 vect_free_slp_instance (new_instance);
2214 return false;
2215 }
2216 }
2217
310213d4 2218 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2219
73fbfcad 2220 if (dump_enabled_p ())
c2a12ca0
RB
2221 {
2222 dump_printf_loc (MSG_NOTE, vect_location,
2223 "Final SLP tree for instance:\n");
2224 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2225 }
ebfd146a
IR
2226
2227 return true;
2228 }
e569db5f
VK
2229 }
2230 else
2231 {
ebfd146a
IR
2232 /* Failed to SLP. */
2233 /* Free the allocated memory. */
e403d17e 2234 scalar_stmts.release ();
9771b263 2235 loads.release ();
e569db5f 2236 }
b8698a0f 2237
1ba91a49 2238 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2239 vector size. */
4b6068ea 2240 unsigned HOST_WIDE_INT const_nunits;
1ba91a49
AL
2241 if (is_a <bb_vec_info> (vinfo)
2242 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4b6068ea
RS
2243 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2244 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2245 {
2246 /* We consider breaking the group only on VF boundaries from the existing
2247 start. */
2248 for (i = 0; i < group_size; i++)
2249 if (!matches[i]) break;
2250
4b6068ea 2251 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2252 {
2253 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2254 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2255 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49
AL
2256
2257 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2258 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2259 /* If the first non-match was in the middle of a vector,
2260 skip the rest of that vector. */
2261 if (group1_size < i)
2262 {
4b6068ea 2263 i = group1_size + const_nunits;
1ba91a49 2264 if (i < group_size)
4b6068ea 2265 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2266 }
2267 if (i < group_size)
2268 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2269 return res;
2270 }
2271 /* Even though the first vector did not all match, we might be able to SLP
2272 (some) of the remainder. FORNOW ignore this possibility. */
2273 }
2274
a70d6342 2275 return false;
ebfd146a
IR
2276}
2277
2278
ff802fa1 2279/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2280 trees of packed scalar stmts if SLP is possible. */
2281
2282bool
310213d4 2283vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2284{
2285 unsigned int i;
355fe088 2286 gimple *first_element;
ebfd146a 2287
73fbfcad 2288 if (dump_enabled_p ())
e645e942 2289 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 2290
0d0293ac 2291 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2292 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2293 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2294
310213d4 2295 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2296 {
310213d4
RB
2297 if (loop_vinfo->reduction_chains.length () > 0)
2298 {
2299 /* Find SLP sequences starting from reduction chains. */
2300 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2301 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2302 max_tree_size))
6b5e165b
RB
2303 {
2304 /* Dissolve reduction chain group. */
2305 gimple *next, *stmt = first_element;
2306 while (stmt)
2307 {
2308 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2309 next = GROUP_NEXT_ELEMENT (vinfo);
2310 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2311 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2312 stmt = next;
2313 }
2314 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2315 = vect_internal_def;
2316 }
310213d4 2317 }
b010117a 2318
310213d4 2319 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2320 if (loop_vinfo->reductions.length () > 1)
2321 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2322 max_tree_size);
310213d4 2323 }
b5aeb3bb 2324
ebfd146a
IR
2325 return true;
2326}
2327
2328
2329/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2330 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2331 least one instance. */
ebfd146a 2332
437f4a00 2333bool
ebfd146a
IR
2334vect_make_slp_decision (loop_vec_info loop_vinfo)
2335{
d9f21f6a
RS
2336 unsigned int i;
2337 poly_uint64 unrolling_factor = 1;
9771b263 2338 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2339 slp_instance instance;
2340 int decided_to_slp = 0;
2341
73fbfcad 2342 if (dump_enabled_p ())
e645e942
TJ
2343 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2344 "\n");
ebfd146a 2345
9771b263 2346 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2347 {
2348 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2349 /* All unroll factors have the form current_vector_size * X for some
2350 rational X, so they must have a common multiple. */
2351 unrolling_factor
2352 = force_common_multiple (unrolling_factor,
2353 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2354
ff802fa1 2355 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2356 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2357 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2358 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2359 decided_to_slp++;
2360 }
2361
2362 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2363
73fbfcad 2364 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2365 {
2366 dump_printf_loc (MSG_NOTE, vect_location,
2367 "Decided to SLP %d instances. Unrolling factor ",
2368 decided_to_slp);
2369 dump_dec (MSG_NOTE, unrolling_factor);
2370 dump_printf (MSG_NOTE, "\n");
2371 }
437f4a00
IR
2372
2373 return (decided_to_slp > 0);
ebfd146a
IR
2374}
2375
2376
2377/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2378 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2379
2380static void
642fce57 2381vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2382{
355fe088 2383 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2384 imm_use_iterator imm_iter;
355fe088 2385 gimple *use_stmt;
642fce57 2386 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2387 slp_tree child;
f2c74cc4 2388 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2390 int j;
2391
2392 /* Propagate hybrid down the SLP tree. */
2393 if (stype == hybrid)
2394 ;
2395 else if (HYBRID_SLP_STMT (stmt_vinfo))
2396 stype = hybrid;
2397 else
2398 {
2399 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2400 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2401 /* If we get a pattern stmt here we have to use the LHS of the
2402 original stmt for immediate uses. */
2403 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2404 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2405 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2406 tree def;
2407 if (gimple_code (stmt) == GIMPLE_PHI)
2408 def = gimple_phi_result (stmt);
2409 else
2410 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2411 if (def)
2412 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870
RB
2413 {
2414 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2415 continue;
2416 use_vinfo = vinfo_for_stmt (use_stmt);
2417 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2418 && STMT_VINFO_RELATED_STMT (use_vinfo))
2419 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2420 if (!STMT_SLP_TYPE (use_vinfo)
2421 && (STMT_VINFO_RELEVANT (use_vinfo)
2422 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2423 && !(gimple_code (use_stmt) == GIMPLE_PHI
2424 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2425 {
2426 if (dump_enabled_p ())
2427 {
2428 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2429 "def in non-SLP stmt: ");
2430 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2431 }
2432 stype = hybrid;
2433 }
29764870 2434 }
642fce57 2435 }
ebfd146a 2436
502f0263
RB
2437 if (stype == hybrid
2438 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2439 {
2440 if (dump_enabled_p ())
2441 {
2442 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2443 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2444 }
2445 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2446 }
ebfd146a 2447
642fce57 2448 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2449 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2450 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2451}
f2c74cc4 2452
642fce57 2453/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2454
642fce57
RB
2455static tree
2456vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2457{
2458 walk_stmt_info *wi = (walk_stmt_info *)data;
2459 struct loop *loopp = (struct loop *)wi->info;
2460
2461 if (wi->is_lhs)
2462 return NULL_TREE;
2463
2464 if (TREE_CODE (*tp) == SSA_NAME
2465 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2466 {
355fe088 2467 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2468 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2469 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2470 {
2471 if (dump_enabled_p ())
2472 {
2473 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2474 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2475 }
2476 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2477 }
642fce57
RB
2478 }
2479
2480 return NULL_TREE;
ebfd146a
IR
2481}
2482
642fce57
RB
2483static tree
2484vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2485 walk_stmt_info *)
2486{
9186a9d3 2487 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
642fce57
RB
2488 /* If the stmt is in a SLP instance then this isn't a reason
2489 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2490 if (! STMT_SLP_TYPE (use_vinfo)
2491 && (STMT_VINFO_RELEVANT (use_vinfo)
2492 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2493 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2494 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2495 ;
2496 else
642fce57
RB
2497 *handled = true;
2498 return NULL_TREE;
2499}
ebfd146a
IR
2500
2501/* Find stmts that must be both vectorized and SLPed. */
2502
2503void
2504vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2505{
2506 unsigned int i;
9771b263 2507 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2508 slp_instance instance;
2509
73fbfcad 2510 if (dump_enabled_p ())
e645e942
TJ
2511 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2512 "\n");
ebfd146a 2513
642fce57
RB
2514 /* First walk all pattern stmt in the loop and mark defs of uses as
2515 hybrid because immediate uses in them are not recorded. */
2516 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2517 {
2518 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2519 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2520 gsi_next (&gsi))
2521 {
355fe088 2522 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2524 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2525 {
2526 walk_stmt_info wi;
2527 memset (&wi, 0, sizeof (wi));
2528 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2529 gimple_stmt_iterator gsi2
2530 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2531 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2532 vect_detect_hybrid_slp_1, &wi);
2533 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2534 vect_detect_hybrid_slp_2,
2535 vect_detect_hybrid_slp_1, &wi);
2536 }
2537 }
2538 }
2539
2540 /* Then walk the SLP instance trees marking stmts with uses in
2541 non-SLP stmts as hybrid, also propagating hybrid down the
2542 SLP tree, collecting the above info on-the-fly. */
9771b263 2543 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2544 {
2545 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2546 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2547 i, pure_slp);
2548 }
ebfd146a
IR
2549}
2550
a70d6342 2551
2c515559
RS
2552/* Initialize a bb_vec_info struct for the statements between
2553 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2554
2555_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2556 gimple_stmt_iterator region_end_in)
2557 : vec_info (vec_info::bb, init_cost (NULL)),
2558 bb (gsi_bb (region_begin_in)),
2559 region_begin (region_begin_in),
2560 region_end (region_end_in)
a70d6342 2561{
a70d6342
IR
2562 gimple_stmt_iterator gsi;
2563
61d371eb
RB
2564 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2565 gsi_next (&gsi))
a70d6342 2566 {
355fe088 2567 gimple *stmt = gsi_stmt (gsi);
a70d6342 2568 gimple_set_uid (stmt, 0);
2c515559 2569 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
a70d6342
IR
2570 }
2571
2c515559 2572 bb->aux = this;
a70d6342
IR
2573}
2574
2575
2576/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2577 stmts in the basic block. */
2578
2c515559 2579_bb_vec_info::~_bb_vec_info ()
a70d6342 2580{
2c515559
RS
2581 for (gimple_stmt_iterator si = region_begin;
2582 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2583 {
355fe088 2584 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2585 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2586
2587 if (stmt_info)
2588 /* Free stmt_vec_info. */
2589 free_stmt_vec_info (stmt);
61d371eb
RB
2590
2591 /* Reset region marker. */
2592 gimple_set_uid (stmt, -1);
a70d6342
IR
2593 }
2594
2c515559 2595 bb->aux = NULL;
a70d6342
IR
2596}
2597
2598
8b7e9dba
RS
2599/* Analyze statements contained in SLP tree NODE after recursively analyzing
2600 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2601
2602 Return true if the operations are supported. */
a70d6342
IR
2603
2604static bool
8b7e9dba
RS
2605vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2606 slp_instance node_instance)
a70d6342
IR
2607{
2608 bool dummy;
603cca93 2609 int i, j;
355fe088 2610 gimple *stmt;
d755c7ef 2611 slp_tree child;
a70d6342 2612
603cca93 2613 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
2614 return true;
2615
9771b263 2616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
8b7e9dba 2617 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
d092494c 2618 return false;
a70d6342 2619
bd2f172f
RB
2620 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2622 gcc_assert (stmt_info);
2623 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2624
2625 /* For BB vectorization vector types are assigned here.
2626 Memory accesses already got their vector type assigned
2627 in vect_analyze_data_refs. */
2628 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2629 if (bb_vinfo
2630 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2631 {
bd2f172f
RB
2632 gcc_assert (PURE_SLP_STMT (stmt_info));
2633
2634 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2635 if (dump_enabled_p ())
2636 {
2637 dump_printf_loc (MSG_NOTE, vect_location,
2638 "get vectype for scalar type: ");
2639 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2640 dump_printf (MSG_NOTE, "\n");
2641 }
2642
2643 tree vectype = get_vectype_for_scalar_type (scalar_type);
2644 if (!vectype)
2645 {
2646 if (dump_enabled_p ())
2647 {
2648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2649 "not SLPed: unsupported data-type ");
2650 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2651 scalar_type);
2652 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2653 }
2654 return false;
2655 }
2656
2657 if (dump_enabled_p ())
2658 {
2659 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2660 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2661 dump_printf (MSG_NOTE, "\n");
2662 }
2663
2664 gimple *sstmt;
2665 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2666 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2667 }
2668
8b7e9dba
RS
2669 /* Calculate the number of vector statements to be created for the
2670 scalar stmts in this node. For SLP reductions it is equal to the
2671 number of vector statements in the children (which has already been
2672 calculated by the recursive call). Otherwise it is the number of
2673 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2674 VF divided by the number of elements in a vector. */
2675 if (GROUP_FIRST_ELEMENT (stmt_info)
2676 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2677 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2678 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2679 else
2680 {
d9f21f6a 2681 poly_uint64 vf;
8b7e9dba
RS
2682 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2683 vf = loop_vinfo->vectorization_factor;
2684 else
2685 vf = 1;
2686 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2687 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2688 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2689 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2690 }
2691
bd2f172f
RB
2692 /* Push SLP node def-type to stmt operands. */
2693 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2694 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2695 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2696 = SLP_TREE_DEF_TYPE (child);
891ad31c 2697 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
bd2f172f
RB
2698 /* Restore def-types. */
2699 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2700 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2701 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2702 = vect_internal_def;
2703 if (! res)
2704 return false;
2705
2706 return true;
a70d6342
IR
2707}
2708
2709
8b7e9dba 2710/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2711 operations are supported. */
2712
a12e42fc 2713bool
8b7e9dba 2714vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2715{
a70d6342
IR
2716 slp_instance instance;
2717 int i;
2718
a12e42fc
RB
2719 if (dump_enabled_p ())
2720 dump_printf_loc (MSG_NOTE, vect_location,
2721 "=== vect_slp_analyze_operations ===\n");
2722
8b7e9dba 2723 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2724 {
8b7e9dba
RS
2725 if (!vect_slp_analyze_node_operations (vinfo,
2726 SLP_INSTANCE_TREE (instance),
891ad31c 2727 instance))
a70d6342 2728 {
a12e42fc
RB
2729 dump_printf_loc (MSG_NOTE, vect_location,
2730 "removing SLP instance operations starting from: ");
2731 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2732 SLP_TREE_SCALAR_STMTS
2733 (SLP_INSTANCE_TREE (instance))[0], 0);
2734 vect_free_slp_instance (instance);
8b7e9dba 2735 vinfo->slp_instances.ordered_remove (i);
a70d6342
IR
2736 }
2737 else
1a4b99c1
RB
2738 {
2739 /* Compute the costs of the SLP instance. */
8b7e9dba 2740 vect_analyze_slp_cost (instance, vinfo->target_cost_data);
1a4b99c1
RB
2741 i++;
2742 }
b8698a0f
L
2743 }
2744
8b7e9dba 2745 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2746}
2747
6eddf228
RB
2748
2749/* Compute the scalar cost of the SLP node NODE and its children
2750 and return it. Do not account defs that are marked in LIFE and
2751 update LIFE according to uses of NODE. */
2752
2753static unsigned
292cba13 2754vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2755 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2756{
2757 unsigned scalar_cost = 0;
2758 unsigned i;
355fe088 2759 gimple *stmt;
6eddf228
RB
2760 slp_tree child;
2761
2762 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2763 {
2764 unsigned stmt_cost;
2765 ssa_op_iter op_iter;
2766 def_operand_p def_p;
2767 stmt_vec_info stmt_info;
2768
ff4c81cc 2769 if ((*life)[i])
6eddf228
RB
2770 continue;
2771
2772 /* If there is a non-vectorized use of the defs then the scalar
2773 stmt is kept live in which case we do not account it or any
2774 required defs in the SLP children in the scalar cost. This
2775 way we make the vectorization more costly when compared to
2776 the scalar cost. */
2777 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2778 {
2779 imm_use_iterator use_iter;
355fe088 2780 gimple *use_stmt;
6eddf228 2781 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2782 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2783 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2784 use_stmt)
603cca93 2785 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2786 {
ff4c81cc 2787 (*life)[i] = true;
6eddf228
RB
2788 BREAK_FROM_IMM_USE_STMT (use_iter);
2789 }
2790 }
ff4c81cc 2791 if ((*life)[i])
6eddf228
RB
2792 continue;
2793
b555a2e4
RB
2794 /* Count scalar stmts only once. */
2795 if (gimple_visited_p (stmt))
2796 continue;
2797 gimple_set_visited (stmt, true);
2798
6eddf228
RB
2799 stmt_info = vinfo_for_stmt (stmt);
2800 if (STMT_VINFO_DATA_REF (stmt_info))
2801 {
2802 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2803 stmt_cost = vect_get_stmt_cost (scalar_load);
2804 else
2805 stmt_cost = vect_get_stmt_cost (scalar_store);
2806 }
2807 else
2808 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2809
2810 scalar_cost += stmt_cost;
2811 }
2812
faa5399b 2813 auto_vec<bool, 20> subtree_life;
6eddf228 2814 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2815 {
2816 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2817 {
2818 /* Do not directly pass LIFE to the recursive call, copy it to
2819 confine changes in the callee to the current child/subtree. */
2820 subtree_life.safe_splice (*life);
2821 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2822 subtree_life.truncate (0);
2823 }
2824 }
6eddf228
RB
2825
2826 return scalar_cost;
2827}
2828
69f11a13
IR
2829/* Check if vectorization of the basic block is profitable. */
2830
2831static bool
2832vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2833{
9771b263 2834 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2835 slp_instance instance;
1a4b99c1 2836 int i;
c3e7ee41 2837 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2838 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2839
2840 /* Calculate scalar cost. */
6eddf228 2841 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2842 {
00f96dc9 2843 auto_vec<bool, 20> life;
ff4c81cc 2844 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2845 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2846 SLP_INSTANCE_TREE (instance),
ff4c81cc 2847 &life);
69f11a13
IR
2848 }
2849
b555a2e4
RB
2850 /* Unset visited flag. */
2851 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2852 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2853 gimple_set_visited (gsi_stmt (gsi), false);
2854
c3e7ee41 2855 /* Complete the target-specific cost calculation. */
92345349
BS
2856 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2857 &vec_inside_cost, &vec_epilogue_cost);
2858
2859 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2860
73fbfcad 2861 if (dump_enabled_p ())
69f11a13 2862 {
78c60e3d
SS
2863 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2864 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2865 vec_inside_cost);
2866 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2867 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2868 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2869 }
2870
a6524bba
RB
2871 /* Vectorization is profitable if its cost is more than the cost of scalar
2872 version. Note that we err on the vector side for equal cost because
2873 the cost estimate is otherwise quite pessimistic (constant uses are
2874 free on the scalar side but cost a load on the vector side for
2875 example). */
2876 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2877 return false;
2878
2879 return true;
2880}
2881
a5b50aa1
RB
2882/* Check if the basic block can be vectorized. Returns a bb_vec_info
2883 if so and sets fatal to true if failure is independent of
2884 current_vector_size. */
a70d6342 2885
8e19f5a1 2886static bb_vec_info
61d371eb
RB
2887vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2888 gimple_stmt_iterator region_end,
a5b50aa1
RB
2889 vec<data_reference_p> datarefs, int n_stmts,
2890 bool &fatal)
a70d6342
IR
2891{
2892 bb_vec_info bb_vinfo;
a70d6342 2893 slp_instance instance;
8e19f5a1 2894 int i;
d9f21f6a 2895 poly_uint64 min_vf = 2;
e4a707c4 2896
a5b50aa1
RB
2897 /* The first group of checks is independent of the vector size. */
2898 fatal = true;
2899
61d371eb
RB
2900 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2901 {
2902 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2904 "not vectorized: too many instructions in "
2905 "basic block.\n");
2906 free_data_refs (datarefs);
2907 return NULL;
2908 }
2909
2c515559 2910 bb_vinfo = new _bb_vec_info (region_begin, region_end);
a70d6342
IR
2911 if (!bb_vinfo)
2912 return NULL;
2913
61d371eb 2914 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2915
2916 /* Analyze the data references. */
2917
2918 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2919 {
73fbfcad 2920 if (dump_enabled_p ())
78c60e3d
SS
2921 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2922 "not vectorized: unhandled data-ref in basic "
2923 "block.\n");
b8698a0f 2924
2c515559 2925 delete bb_vinfo;
a70d6342
IR
2926 return NULL;
2927 }
2928
fcac74a1 2929 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2930 {
73fbfcad 2931 if (dump_enabled_p ())
78c60e3d
SS
2932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2933 "not vectorized: not enough data-refs in "
2934 "basic block.\n");
a70d6342 2935
2c515559 2936 delete bb_vinfo;
a70d6342
IR
2937 return NULL;
2938 }
2939
310213d4 2940 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2941 {
2942 if (dump_enabled_p ())
2943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2944 "not vectorized: unhandled data access in "
2945 "basic block.\n");
2946
2c515559 2947 delete bb_vinfo;
5abe1e05
RB
2948 return NULL;
2949 }
2950
a5b50aa1
RB
2951 /* If there are no grouped stores in the region there is no need
2952 to continue with pattern recog as vect_analyze_slp will fail
2953 anyway. */
2954 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2955 {
73fbfcad 2956 if (dump_enabled_p ())
a5b50aa1
RB
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "not vectorized: no grouped stores in "
2959 "basic block.\n");
b8698a0f 2960
2c515559 2961 delete bb_vinfo;
a70d6342
IR
2962 return NULL;
2963 }
b8698a0f 2964
a5b50aa1
RB
2965 /* While the rest of the analysis below depends on it in some way. */
2966 fatal = false;
2967
2968 vect_pattern_recog (bb_vinfo);
2969
a70d6342
IR
2970 /* Check the SLP opportunities in the basic block, analyze and build SLP
2971 trees. */
310213d4 2972 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2973 {
73fbfcad 2974 if (dump_enabled_p ())
effb52da
RB
2975 {
2976 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2977 "Failed to SLP the basic block.\n");
2978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2979 "not vectorized: failed to find SLP opportunities "
2980 "in basic block.\n");
2981 }
a70d6342 2982
2c515559 2983 delete bb_vinfo;
a70d6342
IR
2984 return NULL;
2985 }
b8698a0f 2986
62c8a2cf
RS
2987 vect_record_base_alignments (bb_vinfo);
2988
c2a12ca0
RB
2989 /* Analyze and verify the alignment of data references and the
2990 dependence in the SLP instances. */
a5b50aa1
RB
2991 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2992 {
c2a12ca0
RB
2993 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2994 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2995 {
2996 dump_printf_loc (MSG_NOTE, vect_location,
2997 "removing SLP instance operations starting from: ");
2998 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2999 SLP_TREE_SCALAR_STMTS
3000 (SLP_INSTANCE_TREE (instance))[0], 0);
3001 vect_free_slp_instance (instance);
3002 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3003 continue;
3004 }
c2a12ca0
RB
3005
3006 /* Mark all the statements that we want to vectorize as pure SLP and
3007 relevant. */
3008 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3009 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3010
a5b50aa1
RB
3011 i++;
3012 }
a5b50aa1
RB
3013 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3014 {
2c515559 3015 delete bb_vinfo;
a5b50aa1
RB
3016 return NULL;
3017 }
3018
8b7e9dba 3019 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 3020 {
73fbfcad 3021 if (dump_enabled_p ())
e645e942 3022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3023 "not vectorized: bad operation in basic block.\n");
a70d6342 3024
2c515559 3025 delete bb_vinfo;
a70d6342
IR
3026 return NULL;
3027 }
3028
69f11a13 3029 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 3030 if (!unlimited_cost_model (NULL)
69f11a13
IR
3031 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3032 {
73fbfcad 3033 if (dump_enabled_p ())
78c60e3d
SS
3034 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3035 "not vectorized: vectorization is not "
3036 "profitable.\n");
69f11a13 3037
2c515559 3038 delete bb_vinfo;
69f11a13
IR
3039 return NULL;
3040 }
3041
73fbfcad 3042 if (dump_enabled_p ())
78c60e3d
SS
3043 dump_printf_loc (MSG_NOTE, vect_location,
3044 "Basic block will be vectorized using SLP\n");
a70d6342
IR
3045
3046 return bb_vinfo;
3047}
3048
3049
428db0ba
RB
3050/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3051 true if anything in the basic-block was vectorized. */
3052
3053bool
3054vect_slp_bb (basic_block bb)
8e19f5a1
IR
3055{
3056 bb_vec_info bb_vinfo;
8e19f5a1 3057 gimple_stmt_iterator gsi;
61d371eb 3058 bool any_vectorized = false;
86e36728 3059 auto_vector_sizes vector_sizes;
8e19f5a1 3060
73fbfcad 3061 if (dump_enabled_p ())
78c60e3d 3062 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 3063
8e19f5a1
IR
3064 /* Autodetect first vector size we try. */
3065 current_vector_size = 0;
86e36728
RS
3066 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3067 unsigned int next_size = 0;
8e19f5a1 3068
61d371eb
RB
3069 gsi = gsi_start_bb (bb);
3070
86e36728 3071 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3072 while (1)
3073 {
61d371eb
RB
3074 if (gsi_end_p (gsi))
3075 break;
3076
3077 gimple_stmt_iterator region_begin = gsi;
3078 vec<data_reference_p> datarefs = vNULL;
3079 int insns = 0;
3080
3081 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3082 {
61d371eb
RB
3083 gimple *stmt = gsi_stmt (gsi);
3084 if (is_gimple_debug (stmt))
3085 continue;
3086 insns++;
3087
3088 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3089 vect_location = gimple_location (stmt);
3090
3091 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
3092 break;
3093 }
3094
3095 /* Skip leading unhandled stmts. */
3096 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3097 {
3098 gsi_next (&gsi);
3099 continue;
3100 }
428db0ba 3101
61d371eb
RB
3102 gimple_stmt_iterator region_end = gsi;
3103
3104 bool vectorized = false;
a5b50aa1 3105 bool fatal = false;
61d371eb 3106 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 3107 datarefs, insns, fatal);
61d371eb
RB
3108 if (bb_vinfo
3109 && dbg_cnt (vect_slp))
3110 {
428db0ba 3111 if (dump_enabled_p ())
61d371eb 3112 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
3113
3114 vect_schedule_slp (bb_vinfo);
3115
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 3118 "basic block part vectorized\n");
428db0ba 3119
61d371eb 3120 vectorized = true;
428db0ba 3121 }
2c515559 3122 delete bb_vinfo;
8e19f5a1 3123
61d371eb 3124 any_vectorized |= vectorized;
8e19f5a1 3125
86e36728
RS
3126 if (next_size == 0)
3127 autodetected_vector_size = current_vector_size;
3128
3129 if (next_size < vector_sizes.length ()
3130 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3131 next_size += 1;
3132
61d371eb 3133 if (vectorized
86e36728
RS
3134 || next_size == vector_sizes.length ()
3135 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3136 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3137 vector sizes will fail do not bother iterating. */
3138 || fatal)
61d371eb
RB
3139 {
3140 if (gsi_end_p (region_end))
3141 break;
8e19f5a1 3142
61d371eb
RB
3143 /* Skip the unhandled stmt. */
3144 gsi_next (&gsi);
3145
3146 /* And reset vector sizes. */
3147 current_vector_size = 0;
86e36728 3148 next_size = 0;
61d371eb
RB
3149 }
3150 else
3151 {
3152 /* Try the next biggest vector size. */
86e36728 3153 current_vector_size = vector_sizes[next_size++];
61d371eb 3154 if (dump_enabled_p ())
86e36728
RS
3155 {
3156 dump_printf_loc (MSG_NOTE, vect_location,
3157 "***** Re-trying analysis with "
3158 "vector size ");
3159 dump_dec (MSG_NOTE, current_vector_size);
3160 dump_printf (MSG_NOTE, "\n");
3161 }
61d371eb
RB
3162
3163 /* Start over. */
3164 gsi = region_begin;
3165 }
8e19f5a1 3166 }
61d371eb
RB
3167
3168 return any_vectorized;
8e19f5a1
IR
3169}
3170
3171
e4af0bc4
IE
3172/* Return 1 if vector type of boolean constant which is OPNUM
3173 operand in statement STMT is a boolean vector. */
3174
3175static bool
3176vect_mask_constant_operand_p (gimple *stmt, int opnum)
3177{
3178 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3179 enum tree_code code = gimple_expr_code (stmt);
3180 tree op, vectype;
3181 gimple *def_stmt;
3182 enum vect_def_type dt;
3183
3184 /* For comparison and COND_EXPR type is chosen depending
3185 on the other comparison operand. */
3186 if (TREE_CODE_CLASS (code) == tcc_comparison)
3187 {
3188 if (opnum)
3189 op = gimple_assign_rhs1 (stmt);
3190 else
3191 op = gimple_assign_rhs2 (stmt);
3192
3193 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3194 &dt, &vectype))
3195 gcc_unreachable ();
3196
3197 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3198 }
3199
3200 if (code == COND_EXPR)
3201 {
3202 tree cond = gimple_assign_rhs1 (stmt);
3203
3204 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3205 op = cond;
3206 else if (opnum)
e4af0bc4
IE
3207 op = TREE_OPERAND (cond, 1);
3208 else
3209 op = TREE_OPERAND (cond, 0);
3210
3211 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3212 &dt, &vectype))
3213 gcc_unreachable ();
3214
3215 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3216 }
3217
3218 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3219}
3220
3221
b8698a0f
L
3222/* For constant and loop invariant defs of SLP_NODE this function returns
3223 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3224 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3225 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3226 REDUC_INDEX is the index of the reduction operand in the statements, unless
3227 it is -1. */
ebfd146a
IR
3228
3229static void
9dc3f7de 3230vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3231 vec<tree> *vec_oprnds,
306b0c92 3232 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3233{
355fe088
TS
3234 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3235 gimple *stmt = stmts[0];
ebfd146a 3236 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 3237 unsigned nunits;
ebfd146a 3238 tree vec_cst;
d2a12ae7 3239 unsigned j, number_of_places_left_in_vector;
ebfd146a 3240 tree vector_type;
9dc3f7de 3241 tree vop;
9771b263 3242 int group_size = stmts.length ();
ebfd146a 3243 unsigned int vec_num, i;
d2a12ae7 3244 unsigned number_of_copies = 1;
9771b263
DN
3245 vec<tree> voprnds;
3246 voprnds.create (number_of_vectors);
ebfd146a 3247 bool constant_p, is_store;
b5aeb3bb 3248 tree neutral_op = NULL;
bac430c9 3249 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3250 gimple_seq ctor_seq = NULL;
b5aeb3bb 3251
42fd8198 3252 /* Check if vector type is a boolean vector. */
2568d8a1 3253 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3254 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3255 vector_type
3256 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3257 else
3258 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
a23644f2 3259 /* Enforced by vect_get_and_check_slp_defs. */
afbe6325
RB
3260 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3261
ebfd146a
IR
3262 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3263 {
3264 is_store = true;
3265 op = gimple_assign_rhs1 (stmt);
3266 }
3267 else
9dc3f7de
IR
3268 is_store = false;
3269
3270 gcc_assert (op);
ebfd146a 3271
ebfd146a 3272 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3273 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3274
3275 For example, we have two scalar operands, s1 and s2 (e.g., group of
3276 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3277 of this type can be packed in a vector). The output vector will contain
3278 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3279 will be 2).
3280
b8698a0f 3281 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3282 containing the operands.
3283
3284 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3285 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3286 {s5, s6, s7, s8}. */
b8698a0f 3287
14a61437 3288 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3289
3290 number_of_places_left_in_vector = nunits;
62cf7335 3291 constant_p = true;
5ebaa477 3292 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3293 elts.quick_grow (nunits);
90dd6e3d 3294 bool place_after_defs = false;
ebfd146a
IR
3295 for (j = 0; j < number_of_copies; j++)
3296 {
9771b263 3297 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3298 {
3299 if (is_store)
3300 op = gimple_assign_rhs1 (stmt);
bac430c9 3301 else
f7e531cf 3302 {
bac430c9 3303 switch (code)
f7e531cf 3304 {
bac430c9 3305 case COND_EXPR:
a989bcc3
IE
3306 {
3307 tree cond = gimple_assign_rhs1 (stmt);
3308 if (TREE_CODE (cond) == SSA_NAME)
3309 op = gimple_op (stmt, op_num + 1);
3310 else if (op_num == 0 || op_num == 1)
bac430c9 3311 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3312 else
3313 {
3314 if (op_num == 2)
3315 op = gimple_assign_rhs2 (stmt);
3316 else
3317 op = gimple_assign_rhs3 (stmt);
3318 }
3319 }
bac430c9
IR
3320 break;
3321
3322 case CALL_EXPR:
3323 op = gimple_call_arg (stmt, op_num);
3324 break;
3325
b84b294a
JJ
3326 case LSHIFT_EXPR:
3327 case RSHIFT_EXPR:
3328 case LROTATE_EXPR:
3329 case RROTATE_EXPR:
3330 op = gimple_op (stmt, op_num + 1);
3331 /* Unlike the other binary operators, shifts/rotates have
3332 the shift count being int, instead of the same type as
3333 the lhs, so make sure the scalar is the right type if
3334 we are dealing with vectors of
3335 long long/long/short/char. */
793d9a16 3336 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3337 op = fold_convert (TREE_TYPE (vector_type), op);
3338 break;
3339
bac430c9
IR
3340 default:
3341 op = gimple_op (stmt, op_num + 1);
b84b294a 3342 break;
f7e531cf
IR
3343 }
3344 }
b8698a0f 3345
ebfd146a 3346 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3347 number_of_places_left_in_vector--;
90dd6e3d 3348 tree orig_op = op;
13396b6e 3349 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3350 {
793d9a16 3351 if (CONSTANT_CLASS_P (op))
13396b6e 3352 {
42fd8198
IE
3353 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3354 {
3355 /* Can't use VIEW_CONVERT_EXPR for booleans because
3356 of possibly different sizes of scalar value and
3357 vector element. */
3358 if (integer_zerop (op))
3359 op = build_int_cst (TREE_TYPE (vector_type), 0);
3360 else if (integer_onep (op))
158beb4a 3361 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3362 else
3363 gcc_unreachable ();
3364 }
3365 else
3366 op = fold_unary (VIEW_CONVERT_EXPR,
3367 TREE_TYPE (vector_type), op);
13396b6e
JJ
3368 gcc_assert (op && CONSTANT_CLASS_P (op));
3369 }
3370 else
3371 {
b731b390 3372 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3373 gimple *init_stmt;
262a363f
JJ
3374 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3375 {
158beb4a
JJ
3376 tree true_val
3377 = build_all_ones_cst (TREE_TYPE (vector_type));
3378 tree false_val
3379 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3380 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3381 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3382 op, true_val,
3383 false_val);
262a363f 3384 }
262a363f
JJ
3385 else
3386 {
3387 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3388 op);
3389 init_stmt
3390 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3391 op);
3392 }
13396b6e
JJ
3393 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3394 op = new_temp;
3395 }
50eeef09 3396 }
d2a12ae7 3397 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3398 if (!CONSTANT_CLASS_P (op))
3399 constant_p = false;
90dd6e3d
RB
3400 if (TREE_CODE (orig_op) == SSA_NAME
3401 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3402 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3403 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3404 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3405 place_after_defs = true;
ebfd146a
IR
3406
3407 if (number_of_places_left_in_vector == 0)
3408 {
ebfd146a 3409 if (constant_p)
5ebaa477 3410 vec_cst = elts.build ();
ebfd146a 3411 else
d2a12ae7 3412 {
9771b263 3413 vec<constructor_elt, va_gc> *v;
d2a12ae7 3414 unsigned k;
9771b263 3415 vec_alloc (v, nunits);
d2a12ae7
RG
3416 for (k = 0; k < nunits; ++k)
3417 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3418 vec_cst = build_constructor (vector_type, v);
3419 }
90dd6e3d
RB
3420 tree init;
3421 gimple_stmt_iterator gsi;
3422 if (place_after_defs)
3423 {
3424 gsi = gsi_for_stmt
3425 (vect_find_last_scalar_stmt_in_slp (slp_node));
3426 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3427 }
3428 else
3429 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3430 if (ctor_seq != NULL)
3431 {
90dd6e3d 3432 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
3433 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3434 GSI_SAME_STMT);
3435 ctor_seq = NULL;
3436 }
90dd6e3d
RB
3437 voprnds.quick_push (init);
3438 place_after_defs = false;
62cf7335
RB
3439 number_of_places_left_in_vector = nunits;
3440 constant_p = true;
5ebaa477
RS
3441 elts.new_vector (vector_type, nunits, 1);
3442 elts.quick_grow (nunits);
ebfd146a
IR
3443 }
3444 }
3445 }
3446
b8698a0f 3447 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3448 them. */
9771b263 3449 vec_num = voprnds.length ();
d2a12ae7 3450 for (j = vec_num; j != 0; j--)
ebfd146a 3451 {
9771b263
DN
3452 vop = voprnds[j - 1];
3453 vec_oprnds->quick_push (vop);
ebfd146a
IR
3454 }
3455
9771b263 3456 voprnds.release ();
ebfd146a
IR
3457
3458 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3459 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3460 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3461 to replicate the vectors. */
9771b263 3462 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3463 {
b5aeb3bb
IR
3464 tree neutral_vec = NULL;
3465
3466 if (neutral_op)
3467 {
3468 if (!neutral_vec)
b9acc9f1 3469 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3470
9771b263 3471 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3472 }
3473 else
3474 {
9771b263
DN
3475 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3476 vec_oprnds->quick_push (vop);
b5aeb3bb 3477 }
ebfd146a
IR
3478 }
3479}
3480
3481
3482/* Get vectorized definitions from SLP_NODE that contains corresponding
3483 vectorized def-stmts. */
3484
3485static void
9771b263 3486vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3487{
3488 tree vec_oprnd;
355fe088 3489 gimple *vec_def_stmt;
ebfd146a
IR
3490 unsigned int i;
3491
9771b263 3492 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3493
9771b263 3494 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3495 {
3496 gcc_assert (vec_def_stmt);
e7baeb39
RB
3497 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3498 vec_oprnd = gimple_phi_result (vec_def_stmt);
3499 else
3500 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3501 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3502 }
3503}
3504
3505
b8698a0f
L
3506/* Get vectorized definitions for SLP_NODE.
3507 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3508 call vect_get_constant_vectors() to create vector stmts.
3509 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3510 must be stored in the corresponding child of SLP_NODE, and we call
3511 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3512
ebfd146a 3513void
9771b263 3514vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3515 vec<vec<tree> > *vec_oprnds)
ebfd146a 3516{
355fe088 3517 gimple *first_stmt;
d092494c 3518 int number_of_vects = 0, i;
77eefb71 3519 unsigned int child_index = 0;
b8698a0f 3520 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3521 slp_tree child = NULL;
37b5ec8f 3522 vec<tree> vec_defs;
e44978dc 3523 tree oprnd;
77eefb71 3524 bool vectorized_defs;
ebfd146a 3525
9771b263
DN
3526 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3527 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3528 {
d092494c
IR
3529 /* For each operand we check if it has vectorized definitions in a child
3530 node or we need to create them (for invariants and constants). We
3531 check if the LHS of the first stmt of the next child matches OPRND.
3532 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3533 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3534 to check this child node for the next operand. */
3535 vectorized_defs = false;
3536 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3537 {
01d8bf07 3538 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3539
e44978dc 3540 /* We have to check both pattern and original def, if available. */
603cca93 3541 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3542 {
355fe088
TS
3543 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3544 gimple *related
90dd6e3d 3545 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3546 tree first_def_op;
90dd6e3d 3547
e7baeb39
RB
3548 if (gimple_code (first_def) == GIMPLE_PHI)
3549 first_def_op = gimple_phi_result (first_def);
3550 else
3551 first_def_op = gimple_get_lhs (first_def);
3552 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3553 || (related
3554 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3555 {
3556 /* The number of vector defs is determined by the number of
3557 vector statements in the node from which we get those
3558 statements. */
3559 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3560 vectorized_defs = true;
77eefb71 3561 child_index++;
90dd6e3d 3562 }
e44978dc 3563 }
77eefb71
RB
3564 else
3565 child_index++;
d092494c 3566 }
ebfd146a 3567
77eefb71
RB
3568 if (!vectorized_defs)
3569 {
3570 if (i == 0)
3571 {
3572 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3573 /* Number of vector stmts was calculated according to LHS in
3574 vect_schedule_slp_instance (), fix it by replacing LHS with
3575 RHS, if necessary. See vect_get_smallest_scalar_type () for
3576 details. */
3577 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3578 &rhs_size_unit);
3579 if (rhs_size_unit != lhs_size_unit)
3580 {
3581 number_of_vects *= rhs_size_unit;
3582 number_of_vects /= lhs_size_unit;
3583 }
3584 }
d092494c 3585 }
b5aeb3bb 3586
d092494c 3587 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3588 vec_defs = vNULL;
3589 vec_defs.create (number_of_vects);
ebfd146a 3590
d092494c
IR
3591 /* For reduction defs we call vect_get_constant_vectors (), since we are
3592 looking for initial loop invariant values. */
306b0c92 3593 if (vectorized_defs)
d092494c 3594 /* The defs are already vectorized. */
37b5ec8f 3595 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3596 else
e7baeb39 3597 /* Build vectors from scalar defs. */
37b5ec8f 3598 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3599 number_of_vects);
ebfd146a 3600
37b5ec8f 3601 vec_oprnds->quick_push (vec_defs);
d092494c 3602 }
ebfd146a
IR
3603}
3604
ebfd146a
IR
3605/* Generate vector permute statements from a list of loads in DR_CHAIN.
3606 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3607 permute statements for the SLP node NODE of the SLP instance
3608 SLP_NODE_INSTANCE. */
3609
ebfd146a 3610bool
01d8bf07 3611vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3612 gimple_stmt_iterator *gsi, poly_uint64 vf,
3613 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3614 unsigned *n_perms)
ebfd146a 3615{
355fe088 3616 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3617 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3618 tree mask_element_type = NULL_TREE, mask_type;
2ce27200 3619 int nunits, vec_index = 0;
2635892a 3620 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3621 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
61fdfd8c 3622 int mask_element;
ef4bddc2 3623 machine_mode mode;
d9f21f6a 3624 unsigned HOST_WIDE_INT const_vf;
ebfd146a 3625
91ff1504
RB
3626 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3627 return false;
3628
3629 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3630
22e4dee7
RH
3631 mode = TYPE_MODE (vectype);
3632
d9f21f6a
RS
3633 /* At the moment, all permutations are represented using per-element
3634 indices, so we can't cope with variable vectorization factors. */
3635 if (!vf.is_constant (&const_vf))
3636 return false;
3637
2635892a
RH
3638 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3639 same size as the vector element being permuted. */
96f9265a 3640 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3641 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3642 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3643 nunits = TYPE_VECTOR_SUBPARTS (vectype);
e3342de4 3644 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3645 mask.quick_grow (nunits);
e3342de4 3646 vec_perm_indices indices;
ebfd146a 3647
61fdfd8c
RB
3648 /* Initialize the vect stmts of NODE to properly insert the generated
3649 stmts later. */
3650 if (! analyze_only)
3651 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3652 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3653 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3654
b8698a0f
L
3655 /* Generate permutation masks for every NODE. Number of masks for each NODE
3656 is equal to GROUP_SIZE.
3657 E.g., we have a group of three nodes with three loads from the same
3658 location in each node, and the vector size is 4. I.e., we have a
3659 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3660 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3661 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3662 ...
3663
2635892a 3664 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3665 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3666 operation, and the mask element values can't be outside that range.
3667 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3668 For the first two permutations we need the first and the second input
ebfd146a 3669 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3670 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3671 {c2,a3,b3,c3}. */
3672
2ce27200
RB
3673 int vect_stmts_counter = 0;
3674 int index = 0;
3675 int first_vec_index = -1;
3676 int second_vec_index = -1;
be377c80 3677 bool noop_p = true;
29afecdf 3678 *n_perms = 0;
ebfd146a 3679
d9f21f6a 3680 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3681 {
3682 for (int k = 0; k < group_size; k++)
3683 {
3684 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3685 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3686 vec_index = i / nunits;
3687 mask_element = i % nunits;
3688 if (vec_index == first_vec_index
3689 || first_vec_index == -1)
3690 {
3691 first_vec_index = vec_index;
3692 }
3693 else if (vec_index == second_vec_index
3694 || second_vec_index == -1)
3695 {
3696 second_vec_index = vec_index;
3697 mask_element += nunits;
3698 }
3699 else
3700 {
3701 if (dump_enabled_p ())
3702 {
3703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3704 "permutation requires at "
3705 "least three vectors ");
3706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3707 stmt, 0);
2ce27200 3708 }
31bee964 3709 gcc_assert (analyze_only);
2ce27200
RB
3710 return false;
3711 }
ebfd146a 3712
2ce27200
RB
3713 gcc_assert (mask_element >= 0
3714 && mask_element < 2 * nunits);
be377c80
RB
3715 if (mask_element != index)
3716 noop_p = false;
2ce27200
RB
3717 mask[index++] = mask_element;
3718
e3342de4 3719 if (index == nunits && !noop_p)
2ce27200 3720 {
e3342de4
RS
3721 indices.new_vector (mask, 2, nunits);
3722 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3723 {
3724 if (dump_enabled_p ())
22e4dee7 3725 {
2ce27200
RB
3726 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3727 vect_location,
3728 "unsupported vect permute { ");
3729 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3730 {
3731 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3732 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3733 }
2ce27200 3734 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3735 }
31bee964 3736 gcc_assert (analyze_only);
2ce27200
RB
3737 return false;
3738 }
22e4dee7 3739
e3342de4
RS
3740 ++*n_perms;
3741 }
29afecdf 3742
e3342de4
RS
3743 if (index == nunits)
3744 {
2ce27200
RB
3745 if (!analyze_only)
3746 {
be377c80
RB
3747 tree mask_vec = NULL_TREE;
3748
3749 if (! noop_p)
736d0f28 3750 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3751
3752 if (second_vec_index == -1)
3753 second_vec_index = first_vec_index;
61fdfd8c
RB
3754
3755 /* Generate the permute statement if necessary. */
3756 tree first_vec = dr_chain[first_vec_index];
3757 tree second_vec = dr_chain[second_vec_index];
3758 gimple *perm_stmt;
3759 if (! noop_p)
3760 {
3761 tree perm_dest
3762 = vect_create_destination_var (gimple_assign_lhs (stmt),
3763 vectype);
3764 perm_dest = make_ssa_name (perm_dest);
3765 perm_stmt = gimple_build_assign (perm_dest,
3766 VEC_PERM_EXPR,
3767 first_vec, second_vec,
3768 mask_vec);
3769 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3770 }
3771 else
3772 /* If mask was NULL_TREE generate the requested
3773 identity transform. */
3774 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3775
3776 /* Store the vector statement in NODE. */
3777 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3778 }
ebfd146a 3779
2ce27200
RB
3780 index = 0;
3781 first_vec_index = -1;
3782 second_vec_index = -1;
be377c80 3783 noop_p = true;
2ce27200
RB
3784 }
3785 }
b8698a0f 3786 }
ebfd146a 3787
ebfd146a
IR
3788 return true;
3789}
3790
f7300fff
RB
3791typedef hash_map <vec <gimple *>, slp_tree,
3792 simple_hashmap_traits <bst_traits, slp_tree> >
3793 scalar_stmts_to_slp_tree_map_t;
ebfd146a
IR
3794
3795/* Vectorize SLP instance tree in postorder. */
3796
3797static bool
f7300fff
RB
3798vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3799 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3800{
355fe088 3801 gimple *stmt;
0d0293ac 3802 bool grouped_store, is_store;
ebfd146a
IR
3803 gimple_stmt_iterator si;
3804 stmt_vec_info stmt_info;
8b7e9dba 3805 unsigned int group_size;
ebfd146a 3806 tree vectype;
603cca93 3807 int i, j;
d755c7ef 3808 slp_tree child;
ebfd146a 3809
603cca93 3810 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3811 return false;
3812
f7300fff
RB
3813 /* See if we have already vectorized the same set of stmts and reuse their
3814 vectorized stmts. */
3815 slp_tree &leader
3816 = bst_map->get_or_insert (SLP_TREE_SCALAR_STMTS (node).copy ());
3817 if (leader)
3818 {
3819 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (leader));
3820 return false;
3821 }
3822
3823 leader = node;
9771b263 3824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3825 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3826
603cca93
RB
3827 /* Push SLP node def-type to stmts. */
3828 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3829 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3830 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3831 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3832
9771b263 3833 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3834 stmt_info = vinfo_for_stmt (stmt);
3835
3836 /* VECTYPE is the type of the destination. */
b690cc0f 3837 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3838 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3839 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3840
9771b263 3841 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3842 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3843
73fbfcad 3844 if (dump_enabled_p ())
ebfd146a 3845 {
78c60e3d
SS
3846 dump_printf_loc (MSG_NOTE,vect_location,
3847 "------>vectorizing SLP node starting from: ");
3848 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3849 }
ebfd146a 3850
2e8ab70c
RB
3851 /* Vectorized stmts go before the last scalar stmt which is where
3852 all uses are ready. */
3853 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3854
b010117a
IR
3855 /* Mark the first element of the reduction chain as reduction to properly
3856 transform the node. In the analysis phase only the last element of the
3857 chain is marked as reduction. */
0d0293ac 3858 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3859 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3860 {
3861 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3862 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3863 }
3864
6876e5bc
RB
3865 /* Handle two-operation SLP nodes by vectorizing the group with
3866 both operations and then performing a merge. */
3867 if (SLP_TREE_TWO_OPERATORS (node))
3868 {
3869 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3870 enum tree_code ocode = ERROR_MARK;
355fe088 3871 gimple *ostmt;
e3342de4 3872 vec_perm_builder mask (group_size, group_size, 1);
6876e5bc
RB
3873 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3874 if (gimple_assign_rhs_code (ostmt) != code0)
3875 {
908a1a16 3876 mask.quick_push (1);
6876e5bc
RB
3877 ocode = gimple_assign_rhs_code (ostmt);
3878 }
3879 else
908a1a16 3880 mask.quick_push (0);
567a3691 3881 if (ocode != ERROR_MARK)
6876e5bc 3882 {
355fe088
TS
3883 vec<gimple *> v0;
3884 vec<gimple *> v1;
6876e5bc
RB
3885 unsigned j;
3886 tree tmask = NULL_TREE;
3887 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3888 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3889 SLP_TREE_VEC_STMTS (node).truncate (0);
3890 gimple_assign_set_rhs_code (stmt, ocode);
3891 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3892 gimple_assign_set_rhs_code (stmt, code0);
3893 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3894 SLP_TREE_VEC_STMTS (node).truncate (0);
3895 tree meltype = build_nonstandard_integer_type
b397965c 3896 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3897 tree mvectype = get_same_sized_vectype (meltype, vectype);
3898 unsigned k = 0, l;
3899 for (j = 0; j < v0.length (); ++j)
3900 {
dad55d70
RS
3901 /* Enforced by vect_build_slp_tree, which rejects variable-length
3902 vectors for SLP_TREE_TWO_OPERATORS. */
3903 unsigned int const_nunits = nunits.to_constant ();
3904 tree_vector_builder melts (mvectype, const_nunits, 1);
3905 for (l = 0; l < const_nunits; ++l)
6876e5bc 3906 {
1ece8d4c 3907 if (k >= group_size)
6876e5bc 3908 k = 0;
dad55d70
RS
3909 tree t = build_int_cst (meltype,
3910 mask[k++] * const_nunits + l);
794e3180 3911 melts.quick_push (t);
6876e5bc 3912 }
5ebaa477 3913 tmask = melts.build ();
6876e5bc
RB
3914
3915 /* ??? Not all targets support a VEC_PERM_EXPR with a
3916 constant mask that would translate to a vec_merge RTX
3917 (with their vec_perm_const_ok). We can either not
3918 vectorize in that case or let veclower do its job.
3919 Unfortunately that isn't too great and at least for
3920 plus/minus we'd eventually like to match targets
3921 vector addsub instructions. */
355fe088 3922 gimple *vstmt;
6876e5bc
RB
3923 vstmt = gimple_build_assign (make_ssa_name (vectype),
3924 VEC_PERM_EXPR,
3925 gimple_assign_lhs (v0[j]),
3926 gimple_assign_lhs (v1[j]), tmask);
3927 vect_finish_stmt_generation (stmt, vstmt, &si);
3928 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3929 }
3930 v0.release ();
3931 v1.release ();
3932 return false;
3933 }
3934 }
0d0293ac 3935 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
3936
3937 /* Restore stmt def-types. */
3938 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3939 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3940 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3941 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3942
b5aeb3bb 3943 return is_store;
ebfd146a
IR
3944}
3945
dd34c087
JJ
3946/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3947 For loop vectorization this is done in vectorizable_call, but for SLP
3948 it needs to be deferred until end of vect_schedule_slp, because multiple
3949 SLP instances may refer to the same scalar stmt. */
3950
3951static void
3952vect_remove_slp_scalar_calls (slp_tree node)
3953{
355fe088 3954 gimple *stmt, *new_stmt;
dd34c087
JJ
3955 gimple_stmt_iterator gsi;
3956 int i;
d755c7ef 3957 slp_tree child;
dd34c087
JJ
3958 tree lhs;
3959 stmt_vec_info stmt_info;
3960
603cca93 3961 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3962 return;
3963
9771b263 3964 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3965 vect_remove_slp_scalar_calls (child);
dd34c087 3966
9771b263 3967 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3968 {
3969 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3970 continue;
3971 stmt_info = vinfo_for_stmt (stmt);
3972 if (stmt_info == NULL
3973 || is_pattern_stmt_p (stmt_info)
3974 || !PURE_SLP_STMT (stmt_info))
3975 continue;
3976 lhs = gimple_call_lhs (stmt);
3977 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3978 set_vinfo_for_stmt (new_stmt, stmt_info);
3979 set_vinfo_for_stmt (stmt, NULL);
3980 STMT_VINFO_STMT (stmt_info) = new_stmt;
3981 gsi = gsi_for_stmt (stmt);
3982 gsi_replace (&gsi, new_stmt, false);
3983 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3984 }
3985}
ebfd146a 3986
ff802fa1
IR
3987/* Generate vector code for all SLP instances in the loop/basic block. */
3988
ebfd146a 3989bool
310213d4 3990vect_schedule_slp (vec_info *vinfo)
ebfd146a 3991{
9771b263 3992 vec<slp_instance> slp_instances;
ebfd146a 3993 slp_instance instance;
8b7e9dba 3994 unsigned int i;
ebfd146a
IR
3995 bool is_store = false;
3996
310213d4 3997 slp_instances = vinfo->slp_instances;
9771b263 3998 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3999 {
4000 /* Schedule the tree of INSTANCE. */
f7300fff
RB
4001 scalar_stmts_to_slp_tree_map_t *bst_map
4002 = new scalar_stmts_to_slp_tree_map_t ();
ebfd146a 4003 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff
RB
4004 instance, bst_map);
4005 delete bst_map;
73fbfcad 4006 if (dump_enabled_p ())
78c60e3d 4007 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4008 "vectorizing stmts using SLP.\n");
ebfd146a
IR
4009 }
4010
9771b263 4011 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4012 {
4013 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 4014 gimple *store;
b5aeb3bb
IR
4015 unsigned int j;
4016 gimple_stmt_iterator gsi;
4017
c40eced0
RB
4018 /* Remove scalar call stmts. Do not do this for basic-block
4019 vectorization as not all uses may be vectorized.
4020 ??? Why should this be necessary? DCE should be able to
4021 remove the stmts itself.
4022 ??? For BB vectorization we can as well remove scalar
4023 stmts starting from the SLP tree root if they have no
4024 uses. */
310213d4 4025 if (is_a <loop_vec_info> (vinfo))
c40eced0 4026 vect_remove_slp_scalar_calls (root);
dd34c087 4027
9771b263 4028 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
4029 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4030 {
4031 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4032 break;
4033
a024e70e
IR
4034 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4035 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
4036 /* Free the attached stmt_vec_info and remove the stmt. */
4037 gsi = gsi_for_stmt (store);
3d3f2249 4038 unlink_stmt_vdef (store);
b5aeb3bb 4039 gsi_remove (&gsi, true);
3d3f2249 4040 release_defs (store);
b5aeb3bb
IR
4041 free_stmt_vec_info (store);
4042 }
4043 }
4044
ebfd146a
IR
4045 return is_store;
4046}