]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
PR c++/86171 - ICE with recursive alias instantiation.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
ebfd146a
IR
50/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51
52static void
53vect_free_slp_tree (slp_tree node)
54{
d092494c 55 int i;
d755c7ef 56 slp_tree child;
d092494c 57
9771b263 58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 59 vect_free_slp_tree (child);
b8698a0f 60
78810bd3
RB
61 gimple *stmt;
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt))
65 {
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 }
69
9771b263
DN
70 SLP_TREE_CHILDREN (node).release ();
71 SLP_TREE_SCALAR_STMTS (node).release ();
72 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 73 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
74
75 free (node);
76}
77
78
79/* Free the memory allocated for the SLP instance. */
80
81void
82vect_free_slp_instance (slp_instance instance)
83{
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 85 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 86 free (instance);
ebfd146a
IR
87}
88
89
d092494c
IR
90/* Create an SLP node for SCALAR_STMTS. */
91
92static slp_tree
355fe088 93vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 94{
d3cfd39e 95 slp_tree node;
355fe088 96 gimple *stmt = scalar_stmts[0];
d092494c
IR
97 unsigned int nops;
98
99 if (is_gimple_call (stmt))
100 nops = gimple_call_num_args (stmt);
101 else if (is_gimple_assign (stmt))
f7e531cf
IR
102 {
103 nops = gimple_num_ops (stmt) - 1;
104 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
105 nops++;
106 }
e7baeb39
RB
107 else if (gimple_code (stmt) == GIMPLE_PHI)
108 nops = 0;
d092494c
IR
109 else
110 return NULL;
111
d3cfd39e 112 node = XNEW (struct _slp_tree);
d092494c 113 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 114 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 115 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 116 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 117 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 118 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 119 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 120
78810bd3
RB
121 unsigned i;
122 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
124
d092494c
IR
125 return node;
126}
127
128
ddf56386
RB
129/* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
131 node. */
132typedef struct _slp_oprnd_info
133{
134 /* Def-stmts for the operands. */
135 vec<gimple *> def_stmts;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
138 stmt. */
ddf56386 139 tree first_op_type;
34e82342 140 enum vect_def_type first_dt;
ddf56386
RB
141 bool first_pattern;
142 bool second_pattern;
143} *slp_oprnd_info;
144
145
d092494c
IR
146/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 operand. */
9771b263 148static vec<slp_oprnd_info>
d092494c
IR
149vect_create_oprnd_info (int nops, int group_size)
150{
151 int i;
152 slp_oprnd_info oprnd_info;
9771b263 153 vec<slp_oprnd_info> oprnds_info;
d092494c 154
9771b263 155 oprnds_info.create (nops);
d092494c
IR
156 for (i = 0; i < nops; i++)
157 {
158 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 159 oprnd_info->def_stmts.create (group_size);
d092494c 160 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 161 oprnd_info->first_op_type = NULL_TREE;
d092494c 162 oprnd_info->first_pattern = false;
effb52da 163 oprnd_info->second_pattern = false;
9771b263 164 oprnds_info.quick_push (oprnd_info);
d092494c
IR
165 }
166
167 return oprnds_info;
168}
169
170
d3cfd39e
JJ
171/* Free operands info. */
172
d092494c 173static void
9771b263 174vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
175{
176 int i;
177 slp_oprnd_info oprnd_info;
178
9771b263 179 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 180 {
9771b263 181 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
182 XDELETE (oprnd_info);
183 }
d092494c 184
9771b263 185 oprnds_info.release ();
d092494c
IR
186}
187
188
d755c7ef
RB
189/* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
191
b210f45f 192int
355fe088 193vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 194{
355fe088 195 gimple *next_stmt = first_stmt;
d755c7ef
RB
196 int result = 0;
197
2c53b149 198 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
d755c7ef
RB
199 return -1;
200
201 do
202 {
203 if (next_stmt == stmt)
204 return result;
2c53b149 205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699 206 if (next_stmt)
2c53b149 207 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
208 }
209 while (next_stmt);
210
211 return -1;
212}
213
018b2744
RS
214/* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
218 (if nonnull). */
219
f1739b48 220bool
018b2744 221can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
222 unsigned int *nvectors_out,
223 tree *vector_type_out,
224 tree *permutes)
018b2744
RS
225{
226 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
227 poly_int64 nelts;
228 unsigned int nvectors = 1;
229 for (;;)
230 {
231 scalar_int_mode int_mode;
232 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
233 if (multiple_p (current_vector_size, elt_bytes, &nelts)
234 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
235 {
236 tree int_type = build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode), 1);
238 tree vector_type = build_vector_type (int_type, nelts);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
240 {
241 vec_perm_builder sel1 (nelts, 2, 3);
242 vec_perm_builder sel2 (nelts, 2, 3);
243 poly_int64 half_nelts = exact_div (nelts, 2);
244 for (unsigned int i = 0; i < 3; ++i)
245 {
246 sel1.quick_push (i);
247 sel1.quick_push (i + nelts);
248 sel2.quick_push (half_nelts + i);
249 sel2.quick_push (half_nelts + i + nelts);
250 }
251 vec_perm_indices indices1 (sel1, 2, nelts);
252 vec_perm_indices indices2 (sel2, 2, nelts);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
255 {
256 if (nvectors_out)
257 *nvectors_out = nvectors;
258 if (vector_type_out)
259 *vector_type_out = vector_type;
260 if (permutes)
261 {
262 permutes[0] = vect_gen_perm_mask_checked (vector_type,
263 indices1);
264 permutes[1] = vect_gen_perm_mask_checked (vector_type,
265 indices2);
266 }
267 return true;
268 }
269 }
270 }
271 if (!multiple_p (elt_bytes, 2, &elt_bytes))
272 return false;
273 nvectors *= 2;
274 }
275}
d755c7ef 276
d092494c
IR
277/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
4cecd659 279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
285 value.
4cecd659
BC
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
288 ok return 0. */
4cecd659
BC
289static int
290vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
018b2744 291 vec<gimple *> stmts, unsigned stmt_num,
4cecd659 292 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 293{
018b2744 294 gimple *stmt = stmts[stmt_num];
ebfd146a
IR
295 tree oprnd;
296 unsigned int i, number_of_oprnds;
355fe088 297 gimple *def_stmt;
d092494c 298 enum vect_def_type dt = vect_uninitialized_def;
d092494c 299 bool pattern = false;
abf9bfbc 300 slp_oprnd_info oprnd_info;
b0b4483e
RB
301 int first_op_idx = 1;
302 bool commutative = false;
303 bool first_op_cond = false;
effb52da
RB
304 bool first = stmt_num == 0;
305 bool second = stmt_num == 1;
b8698a0f 306
d092494c 307 if (is_gimple_call (stmt))
190c2236
JJ
308 {
309 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 310 first_op_idx = 3;
190c2236 311 }
f7e531cf
IR
312 else if (is_gimple_assign (stmt))
313 {
b0b4483e 314 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 315 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
a414c77f
IE
318 if (gimple_assign_rhs_code (stmt) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
320 {
321 first_op_cond = true;
b0b4483e
RB
322 number_of_oprnds++;
323 }
324 else
325 commutative = commutative_tree_code (code);
f7e531cf 326 }
d092494c 327 else
b0b4483e 328 return -1;
ebfd146a 329
4cecd659
BC
330 bool swapped = (*swap != 0);
331 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
332 for (i = 0; i < number_of_oprnds; i++)
333 {
b0b4483e
RB
334again:
335 if (first_op_cond)
f7e531cf 336 {
4cecd659
BC
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map = maps[*swap];
340
341 if (i < 2)
342 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 343 else
4cecd659 344 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
345 }
346 else
4cecd659 347 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 348
9771b263 349 oprnd_info = (*oprnds_info)[i];
ebfd146a 350
81c40241 351 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 352 {
73fbfcad 353 if (dump_enabled_p ())
ebfd146a 354 {
78c60e3d 355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 356 "Build SLP failed: can't analyze def for ");
78c60e3d 357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 358 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
359 }
360
b0b4483e 361 return -1;
ebfd146a
IR
362 }
363
a70d6342 364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 365 from the pattern. Check that all the stmts of the node are in the
ebfd146a 366 pattern. */
f5709183 367 if (def_stmt && gimple_bb (def_stmt)
61d371eb 368 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 369 && vinfo_for_stmt (def_stmt)
83197f37 370 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
371 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
372 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 373 {
d092494c 374 pattern = true;
effb52da
RB
375 if (!first && !oprnd_info->first_pattern
376 /* Allow different pattern state for the defs of the
377 first stmt in reduction chains. */
378 && (oprnd_info->first_dt != vect_reduction_def
379 || (!second && !oprnd_info->second_pattern)))
d092494c 380 {
b0b4483e
RB
381 if (i == 0
382 && !swapped
383 && commutative)
384 {
385 swapped = true;
386 goto again;
387 }
388
73fbfcad 389 if (dump_enabled_p ())
d092494c 390 {
78c60e3d
SS
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
392 "Build SLP failed: some of the stmts"
393 " are in a pattern, and others are not ");
394 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 395 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 396 }
ebfd146a 397
b0b4483e 398 return 1;
ebfd146a
IR
399 }
400
401 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 402 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 403
f7e531cf 404 if (dt == vect_unknown_def_type)
ebfd146a 405 {
73fbfcad 406 if (dump_enabled_p ())
78c60e3d 407 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 408 "Unsupported pattern.\n");
b0b4483e 409 return -1;
ebfd146a
IR
410 }
411
412 switch (gimple_code (def_stmt))
413 {
81c40241
RB
414 case GIMPLE_PHI:
415 case GIMPLE_ASSIGN:
416 break;
417
418 default:
419 if (dump_enabled_p ())
420 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
421 "unsupported defining stmt:\n");
422 return -1;
ebfd146a
IR
423 }
424 }
425
effb52da
RB
426 if (second)
427 oprnd_info->second_pattern = pattern;
428
d092494c 429 if (first)
ebfd146a 430 {
d092494c
IR
431 oprnd_info->first_dt = dt;
432 oprnd_info->first_pattern = pattern;
793d9a16 433 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 434 }
ebfd146a
IR
435 else
436 {
d092494c
IR
437 /* Not first stmt of the group, check that the def-stmt/s match
438 the def-stmt/s of the first stmt. Allow different definition
439 types for reduction chains: the first stmt must be a
440 vect_reduction_def (a phi node), and the rest
441 vect_internal_def. */
018b2744
RS
442 tree type = TREE_TYPE (oprnd);
443 if ((oprnd_info->first_dt != dt
444 && !(oprnd_info->first_dt == vect_reduction_def
445 && dt == vect_internal_def)
446 && !((oprnd_info->first_dt == vect_external_def
447 || oprnd_info->first_dt == vect_constant_def)
448 && (dt == vect_external_def
449 || dt == vect_constant_def)))
450 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 451 {
b0b4483e
RB
452 /* Try swapping operands if we got a mismatch. */
453 if (i == 0
454 && !swapped
455 && commutative)
456 {
457 swapped = true;
458 goto again;
459 }
460
abf9bfbc
RB
461 if (dump_enabled_p ())
462 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 463 "Build SLP failed: different types\n");
d092494c 464
b0b4483e 465 return 1;
ebfd146a 466 }
018b2744
RS
467 if ((dt == vect_constant_def
468 || dt == vect_external_def)
469 && !current_vector_size.is_constant ()
470 && (TREE_CODE (type) == BOOLEAN_TYPE
471 || !can_duplicate_and_interleave_p (stmts.length (),
472 TYPE_MODE (type))))
a23644f2
RS
473 {
474 if (dump_enabled_p ())
475 {
476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
477 "Build SLP failed: invalid type of def "
478 "for variable-length SLP ");
479 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
480 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
481 }
482 return -1;
483 }
018b2744
RS
484 }
485
486 /* Check the types of the definitions. */
487 switch (dt)
488 {
489 case vect_constant_def:
490 case vect_external_def:
ebfd146a 491 break;
b8698a0f 492
c78e3652 493 case vect_reduction_def:
e7baeb39 494 case vect_induction_def:
8644a673 495 case vect_internal_def:
abf9bfbc 496 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
497 break;
498
499 default:
500 /* FORNOW: Not supported. */
73fbfcad 501 if (dump_enabled_p ())
ebfd146a 502 {
78c60e3d
SS
503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
504 "Build SLP failed: illegal type of def ");
81c40241 505 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 506 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
507 }
508
b0b4483e 509 return -1;
ebfd146a
IR
510 }
511 }
512
b0b4483e
RB
513 /* Swap operands. */
514 if (swapped)
515 {
78810bd3
RB
516 /* If there are already uses of this stmt in a SLP instance then
517 we've committed to the operand order and can't swap it. */
518 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
519 {
520 if (dump_enabled_p ())
521 {
522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
523 "Build SLP failed: cannot swap operands of "
524 "shared stmt ");
525 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
526 }
527 return -1;
528 }
529
b0b4483e
RB
530 if (first_op_cond)
531 {
532 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
533 enum tree_code code = TREE_CODE (cond);
534
535 /* Swap. */
536 if (*swap == 1)
537 {
538 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
539 &TREE_OPERAND (cond, 1));
540 TREE_SET_CODE (cond, swap_tree_comparison (code));
541 }
542 /* Invert. */
543 else
544 {
545 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
546 gimple_assign_rhs3_ptr (stmt));
547 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
548 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
549 gcc_assert (code != ERROR_MARK);
550 TREE_SET_CODE (cond, code);
551 }
b0b4483e
RB
552 }
553 else
554 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
555 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
556 if (dump_enabled_p ())
557 {
558 dump_printf_loc (MSG_NOTE, vect_location,
559 "swapped operands to match def types in ");
560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
561 }
b0b4483e
RB
562 }
563
4cecd659 564 *swap = swapped;
b0b4483e 565 return 0;
ebfd146a
IR
566}
567
b161f2c9
RS
568/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
569 caller's attempt to find the vector type in STMT with the narrowest
570 element type. Return true if VECTYPE is nonnull and if it is valid
571 for VINFO. When returning true, update MAX_NUNITS to reflect the
572 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
573 as for vect_build_slp_tree. */
574
575static bool
576vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 577 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
578{
579 if (!vectype)
580 {
581 if (dump_enabled_p ())
582 {
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unsupported data-type in ");
585 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
586 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
587 }
588 /* Fatal mismatch. */
589 return false;
590 }
591
592 /* If populating the vector type requires unrolling then fail
593 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
594 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
595 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 596 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
597 && (!nunits.is_constant (&const_nunits)
598 || const_nunits > group_size))
b161f2c9
RS
599 {
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
601 "Build SLP failed: unrolling required "
602 "in basic block SLP\n");
603 /* Fatal mismatch. */
604 return false;
605 }
606
607 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 608 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
609 return true;
610}
ebfd146a 611
1f3cb663
RS
612/* STMTS is a group of GROUP_SIZE SLP statements in which some
613 statements do the same operation as the first statement and in which
614 the others do ALT_STMT_CODE. Return true if we can take one vector
615 of the first operation and one vector of the second and permute them
616 to get the required result. VECTYPE is the type of the vector that
617 would be permuted. */
618
619static bool
620vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
621 tree vectype, tree_code alt_stmt_code)
622{
623 unsigned HOST_WIDE_INT count;
624 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
625 return false;
626
627 vec_perm_builder sel (count, count, 1);
628 for (unsigned int i = 0; i < count; ++i)
629 {
630 unsigned int elt = i;
631 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
632 elt += count;
633 sel.quick_push (elt);
634 }
635 vec_perm_indices indices (sel, 2, count);
636 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
637}
638
6983e6b5
RB
639/* Verify if the scalar stmts STMTS are isomorphic, require data
640 permutation or are of unsupported types of operation. Return
641 true if they are, otherwise return false and indicate in *MATCHES
642 which stmts are not isomorphic to the first one. If MATCHES[0]
643 is false then this indicates the comparison could not be
4cecd659
BC
644 carried out or the stmts will never be vectorized by SLP.
645
646 Note COND_EXPR is possibly ismorphic to another one after swapping its
647 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
648 the first stmt by swapping the two operands of comparison; set SWAP[i]
649 to 2 if stmt I is isormorphic to the first stmt by inverting the code
650 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
651 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
652
653static bool
4cecd659 654vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 655 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 656 unsigned nops, poly_uint64 *max_nunits,
97a1a642 657 bool *matches, bool *two_operators)
ebfd146a 658{
ebfd146a 659 unsigned int i;
355fe088 660 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
661 enum tree_code first_stmt_code = ERROR_MARK;
662 enum tree_code alt_stmt_code = ERROR_MARK;
663 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 664 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 665 tree lhs;
6983e6b5 666 bool need_same_oprnds = false;
1f3cb663 667 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
668 optab optab;
669 int icode;
ef4bddc2
RS
670 machine_mode optab_op2_mode;
671 machine_mode vec_mode;
355fe088 672 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 673
ebfd146a 674 /* For every stmt in NODE find its def stmt/s. */
9771b263 675 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 676 {
1f3cb663 677 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4cecd659 678 swap[i] = 0;
6983e6b5
RB
679 matches[i] = false;
680
73fbfcad 681 if (dump_enabled_p ())
ebfd146a 682 {
78c60e3d
SS
683 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
684 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
685 }
686
4b5caab7
IR
687 /* Fail to vectorize statements marked as unvectorizable. */
688 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
689 {
73fbfcad 690 if (dump_enabled_p ())
4b5caab7 691 {
78c60e3d
SS
692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
693 "Build SLP failed: unvectorizable statement ");
694 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 695 }
6983e6b5
RB
696 /* Fatal mismatch. */
697 matches[0] = false;
4b5caab7
IR
698 return false;
699 }
700
ebfd146a
IR
701 lhs = gimple_get_lhs (stmt);
702 if (lhs == NULL_TREE)
703 {
73fbfcad 704 if (dump_enabled_p ())
ebfd146a 705 {
78c60e3d
SS
706 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
707 "Build SLP failed: not GIMPLE_ASSIGN nor "
708 "GIMPLE_CALL ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 710 }
6983e6b5
RB
711 /* Fatal mismatch. */
712 matches[0] = false;
ebfd146a
IR
713 return false;
714 }
715
1f3cb663
RS
716 tree nunits_vectype;
717 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
718 &nunits_vectype)
719 || (nunits_vectype
720 && !vect_record_max_nunits (vinfo, stmt, group_size,
721 nunits_vectype, max_nunits)))
b161f2c9 722 {
6983e6b5
RB
723 /* Fatal mismatch. */
724 matches[0] = false;
1f3cb663
RS
725 return false;
726 }
727
728 gcc_assert (vectype);
b8698a0f 729
538dd0b7 730 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
731 {
732 rhs_code = CALL_EXPR;
538dd0b7
DM
733 if (gimple_call_internal_p (call_stmt)
734 || gimple_call_tail_p (call_stmt)
735 || gimple_call_noreturn_p (call_stmt)
736 || !gimple_call_nothrow_p (call_stmt)
737 || gimple_call_chain (call_stmt))
190c2236 738 {
73fbfcad 739 if (dump_enabled_p ())
190c2236 740 {
78c60e3d
SS
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported call type ");
538dd0b7
DM
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 call_stmt, 0);
190c2236 745 }
6983e6b5
RB
746 /* Fatal mismatch. */
747 matches[0] = false;
190c2236
JJ
748 return false;
749 }
750 }
ebfd146a
IR
751 else
752 rhs_code = gimple_assign_rhs_code (stmt);
753
754 /* Check the operation. */
755 if (i == 0)
756 {
757 first_stmt_code = rhs_code;
758
b8698a0f 759 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
760 vector shift with scalar shift operand. */
761 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
762 || rhs_code == LROTATE_EXPR
763 || rhs_code == RROTATE_EXPR)
764 {
1f3cb663
RS
765 if (vectype == boolean_type_node)
766 {
767 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
769 "Build SLP failed: shift of a"
770 " boolean.\n");
771 /* Fatal mismatch. */
772 matches[0] = false;
773 return false;
774 }
775
ebfd146a
IR
776 vec_mode = TYPE_MODE (vectype);
777
778 /* First see if we have a vector/vector shift. */
779 optab = optab_for_tree_code (rhs_code, vectype,
780 optab_vector);
781
782 if (!optab
947131ba 783 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
784 {
785 /* No vector/vector shift, try for a vector/scalar shift. */
786 optab = optab_for_tree_code (rhs_code, vectype,
787 optab_scalar);
788
789 if (!optab)
790 {
73fbfcad 791 if (dump_enabled_p ())
78c60e3d 792 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 793 "Build SLP failed: no optab.\n");
6983e6b5
RB
794 /* Fatal mismatch. */
795 matches[0] = false;
ebfd146a
IR
796 return false;
797 }
947131ba 798 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
799 if (icode == CODE_FOR_nothing)
800 {
73fbfcad 801 if (dump_enabled_p ())
78c60e3d
SS
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
803 "Build SLP failed: "
e645e942 804 "op not supported by target.\n");
6983e6b5
RB
805 /* Fatal mismatch. */
806 matches[0] = false;
ebfd146a
IR
807 return false;
808 }
809 optab_op2_mode = insn_data[icode].operand[2].mode;
810 if (!VECTOR_MODE_P (optab_op2_mode))
811 {
812 need_same_oprnds = true;
813 first_op1 = gimple_assign_rhs2 (stmt);
814 }
815 }
816 }
36ba4aae
IR
817 else if (rhs_code == WIDEN_LSHIFT_EXPR)
818 {
819 need_same_oprnds = true;
820 first_op1 = gimple_assign_rhs2 (stmt);
821 }
ebfd146a
IR
822 }
823 else
824 {
6876e5bc
RB
825 if (first_stmt_code != rhs_code
826 && alt_stmt_code == ERROR_MARK)
827 alt_stmt_code = rhs_code;
ebfd146a
IR
828 if (first_stmt_code != rhs_code
829 && (first_stmt_code != IMAGPART_EXPR
830 || rhs_code != REALPART_EXPR)
831 && (first_stmt_code != REALPART_EXPR
69f11a13 832 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
833 /* Handle mismatches in plus/minus by computing both
834 and merging the results. */
835 && !((first_stmt_code == PLUS_EXPR
836 || first_stmt_code == MINUS_EXPR)
837 && (alt_stmt_code == PLUS_EXPR
838 || alt_stmt_code == MINUS_EXPR)
839 && rhs_code == alt_stmt_code)
0d0293ac 840 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 841 && (first_stmt_code == ARRAY_REF
38000232 842 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
843 || first_stmt_code == INDIRECT_REF
844 || first_stmt_code == COMPONENT_REF
845 || first_stmt_code == MEM_REF)))
ebfd146a 846 {
73fbfcad 847 if (dump_enabled_p ())
ebfd146a 848 {
78c60e3d
SS
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "Build SLP failed: different operation "
851 "in stmt ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
854 "original stmt ");
855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
856 first_stmt, 0);
ebfd146a 857 }
6983e6b5
RB
858 /* Mismatch. */
859 continue;
ebfd146a 860 }
b8698a0f
L
861
862 if (need_same_oprnds
ebfd146a
IR
863 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
864 {
73fbfcad 865 if (dump_enabled_p ())
ebfd146a 866 {
78c60e3d
SS
867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
868 "Build SLP failed: different shift "
869 "arguments in ");
870 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 871 }
6983e6b5
RB
872 /* Mismatch. */
873 continue;
ebfd146a 874 }
190c2236
JJ
875
876 if (rhs_code == CALL_EXPR)
877 {
355fe088 878 gimple *first_stmt = stmts[0];
190c2236
JJ
879 if (gimple_call_num_args (stmt) != nops
880 || !operand_equal_p (gimple_call_fn (first_stmt),
881 gimple_call_fn (stmt), 0)
882 || gimple_call_fntype (first_stmt)
883 != gimple_call_fntype (stmt))
884 {
73fbfcad 885 if (dump_enabled_p ())
190c2236 886 {
78c60e3d
SS
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "Build SLP failed: different calls in ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890 stmt, 0);
190c2236 891 }
6983e6b5
RB
892 /* Mismatch. */
893 continue;
190c2236
JJ
894 }
895 }
ebfd146a
IR
896 }
897
0d0293ac
MM
898 /* Grouped store or load. */
899 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
900 {
901 if (REFERENCE_CLASS_P (lhs))
902 {
903 /* Store. */
6983e6b5 904 ;
ebfd146a 905 }
b5aeb3bb
IR
906 else
907 {
908 /* Load. */
2c53b149 909 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
910 if (prev_first_load)
911 {
912 /* Check that there are no loads from different interleaving
6983e6b5
RB
913 chains in the same node. */
914 if (prev_first_load != first_load)
78c60e3d 915 {
73fbfcad 916 if (dump_enabled_p ())
b5aeb3bb 917 {
78c60e3d
SS
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
919 vect_location,
920 "Build SLP failed: different "
921 "interleaving chains in one node ");
922 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
923 stmt, 0);
b5aeb3bb 924 }
6983e6b5
RB
925 /* Mismatch. */
926 continue;
b5aeb3bb
IR
927 }
928 }
929 else
930 prev_first_load = first_load;
ebfd146a 931 }
0d0293ac 932 } /* Grouped access. */
ebfd146a
IR
933 else
934 {
935 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
936 {
0d0293ac 937 /* Not grouped load. */
73fbfcad 938 if (dump_enabled_p ())
ebfd146a 939 {
78c60e3d
SS
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
941 "Build SLP failed: not grouped load ");
942 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
943 }
944
0d0293ac 945 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
946 /* Fatal mismatch. */
947 matches[0] = false;
ebfd146a
IR
948 return false;
949 }
950
951 /* Not memory operation. */
952 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 953 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 954 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 955 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 956 && rhs_code != CALL_EXPR)
ebfd146a 957 {
73fbfcad 958 if (dump_enabled_p ())
ebfd146a 959 {
78c60e3d
SS
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
961 "Build SLP failed: operation");
962 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
963 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 964 }
6983e6b5
RB
965 /* Fatal mismatch. */
966 matches[0] = false;
ebfd146a
IR
967 return false;
968 }
969
4cecd659
BC
970 if (rhs_code == COND_EXPR)
971 {
972 tree cond_expr = gimple_assign_rhs1 (stmt);
973 enum tree_code cond_code = TREE_CODE (cond_expr);
974 enum tree_code swap_code = ERROR_MARK;
975 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
976
977 if (i == 0)
978 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
979 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
980 {
981 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
982 swap_code = swap_tree_comparison (cond_code);
983 invert_code = invert_tree_comparison (cond_code, honor_nans);
984 }
985
986 if (first_cond_code == cond_code)
987 ;
988 /* Isomorphic can be achieved by swapping. */
989 else if (first_cond_code == swap_code)
990 swap[i] = 1;
991 /* Isomorphic can be achieved by inverting. */
992 else if (first_cond_code == invert_code)
993 swap[i] = 2;
994 else
995 {
996 if (dump_enabled_p ())
997 {
998 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
999 "Build SLP failed: different"
1000 " operation");
4cecd659 1001 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 1002 stmt, 0);
4cecd659 1003 }
6983e6b5
RB
1004 /* Mismatch. */
1005 continue;
f7e531cf 1006 }
4cecd659 1007 }
ebfd146a 1008 }
6983e6b5
RB
1009
1010 matches[i] = true;
1011 }
1012
1013 for (i = 0; i < group_size; ++i)
1014 if (!matches[i])
1015 return false;
1016
6876e5bc
RB
1017 /* If we allowed a two-operation SLP node verify the target can cope
1018 with the permute we are going to use. */
1019 if (alt_stmt_code != ERROR_MARK
1020 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1021 {
1f3cb663
RS
1022 if (vectype == boolean_type_node
1023 || !vect_two_operations_perm_ok_p (stmts, group_size,
1024 vectype, alt_stmt_code))
6876e5bc
RB
1025 {
1026 for (i = 0; i < group_size; ++i)
1027 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1028 {
1029 matches[i] = false;
1030 if (dump_enabled_p ())
1031 {
1032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1033 "Build SLP failed: different operation "
1034 "in stmt ");
1035 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1036 stmts[i], 0);
1037 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1038 "original stmt ");
1039 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1040 first_stmt, 0);
1041 }
1042 }
1043 return false;
1044 }
1045 *two_operators = true;
1046 }
1047
6983e6b5
RB
1048 return true;
1049}
1050
26d66f28
RB
1051/* Traits for the hash_set to record failed SLP builds for a stmt set.
1052 Note we never remove apart from at destruction time so we do not
1053 need a special value for deleted that differs from empty. */
1054struct bst_traits
1055{
1056 typedef vec <gimple *> value_type;
1057 typedef vec <gimple *> compare_type;
1058 static inline hashval_t hash (value_type);
1059 static inline bool equal (value_type existing, value_type candidate);
1060 static inline bool is_empty (value_type x) { return !x.exists (); }
1061 static inline bool is_deleted (value_type x) { return !x.exists (); }
1062 static inline void mark_empty (value_type &x) { x.release (); }
1063 static inline void mark_deleted (value_type &x) { x.release (); }
1064 static inline void remove (value_type &x) { x.release (); }
1065};
1066inline hashval_t
1067bst_traits::hash (value_type x)
1068{
1069 inchash::hash h;
1070 for (unsigned i = 0; i < x.length (); ++i)
1071 h.add_int (gimple_uid (x[i]));
1072 return h.end ();
1073}
1074inline bool
1075bst_traits::equal (value_type existing, value_type candidate)
1076{
1077 if (existing.length () != candidate.length ())
1078 return false;
1079 for (unsigned i = 0; i < existing.length (); ++i)
1080 if (existing[i] != candidate[i])
1081 return false;
1082 return true;
1083}
1084
f7300fff
RB
1085typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1086static scalar_stmts_set_t *bst_fail;
26d66f28 1087
68435eb2
RB
1088typedef hash_map <vec <gimple *>, slp_tree,
1089 simple_hashmap_traits <bst_traits, slp_tree> >
1090 scalar_stmts_to_slp_tree_map_t;
1091
26d66f28
RB
1092static slp_tree
1093vect_build_slp_tree_2 (vec_info *vinfo,
1094 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1095 poly_uint64 *max_nunits,
26d66f28
RB
1096 vec<slp_tree> *loads,
1097 bool *matches, unsigned *npermutes, unsigned *tree_size,
1098 unsigned max_tree_size);
6983e6b5 1099
e403d17e 1100static slp_tree
310213d4 1101vect_build_slp_tree (vec_info *vinfo,
4b6068ea
RS
1102 vec<gimple *> stmts, unsigned int group_size,
1103 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1104 bool *matches, unsigned *npermutes, unsigned *tree_size,
1105 unsigned max_tree_size)
26d66f28
RB
1106{
1107 if (bst_fail->contains (stmts))
1108 return NULL;
1109 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1110 loads, matches, npermutes, tree_size,
1111 max_tree_size);
1112 /* When SLP build fails for stmts record this, otherwise SLP build
1113 can be exponential in time when we allow to construct parts from
1114 scalars, see PR81723. */
1115 if (! res)
1116 {
1117 vec <gimple *> x;
1118 x.create (stmts.length ());
1119 x.splice (stmts);
1120 bst_fail->add (x);
1121 }
1122 return res;
1123}
1124
1125/* Recursively build an SLP tree starting from NODE.
1126 Fail (and return a value not equal to zero) if def-stmts are not
1127 isomorphic, require data permutation or are of unsupported types of
1128 operation. Otherwise, return 0.
1129 The value returned is the depth in the SLP tree where a mismatch
1130 was found. */
1131
1132static slp_tree
1133vect_build_slp_tree_2 (vec_info *vinfo,
1134 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1135 poly_uint64 *max_nunits,
26d66f28
RB
1136 vec<slp_tree> *loads,
1137 bool *matches, unsigned *npermutes, unsigned *tree_size,
1138 unsigned max_tree_size)
6983e6b5 1139{
4b6068ea
RS
1140 unsigned nops, i, this_tree_size = 0;
1141 poly_uint64 this_max_nunits = *max_nunits;
355fe088 1142 gimple *stmt;
e403d17e 1143 slp_tree node;
6983e6b5 1144
6983e6b5
RB
1145 matches[0] = false;
1146
e403d17e 1147 stmt = stmts[0];
6983e6b5
RB
1148 if (is_gimple_call (stmt))
1149 nops = gimple_call_num_args (stmt);
1150 else if (is_gimple_assign (stmt))
1151 {
1152 nops = gimple_num_ops (stmt) - 1;
1153 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1154 nops++;
ebfd146a 1155 }
e7baeb39
RB
1156 else if (gimple_code (stmt) == GIMPLE_PHI)
1157 nops = 0;
6983e6b5 1158 else
e403d17e 1159 return NULL;
6983e6b5 1160
c78e3652
RB
1161 /* If the SLP node is a PHI (induction or reduction), terminate
1162 the recursion. */
e7baeb39
RB
1163 if (gimple_code (stmt) == GIMPLE_PHI)
1164 {
b161f2c9
RS
1165 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1166 tree vectype = get_vectype_for_scalar_type (scalar_type);
1167 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1168 max_nunits))
1169 return NULL;
1170
719488f8 1171 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 1172 /* Induction from different IVs is not supported. */
719488f8
RB
1173 if (def_type == vect_induction_def)
1174 {
1175 FOR_EACH_VEC_ELT (stmts, i, stmt)
1176 if (stmt != stmts[0])
1177 return NULL;
1178 }
1179 else
1180 {
1181 /* Else def types have to match. */
1182 FOR_EACH_VEC_ELT (stmts, i, stmt)
1183 {
1184 /* But for reduction chains only check on the first stmt. */
2c53b149
RB
1185 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1186 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
719488f8
RB
1187 continue;
1188 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1189 return NULL;
1190 }
1191 }
e7baeb39
RB
1192 node = vect_create_new_slp_node (stmts);
1193 return node;
1194 }
1195
1196
6876e5bc 1197 bool two_operators = false;
4cecd659
BC
1198 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1199 if (!vect_build_slp_tree_1 (vinfo, swap,
e403d17e
RB
1200 stmts, group_size, nops,
1201 &this_max_nunits, matches, &two_operators))
1202 return NULL;
ebfd146a 1203
6983e6b5
RB
1204 /* If the SLP node is a load, terminate the recursion. */
1205 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1206 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 1207 {
e403d17e
RB
1208 *max_nunits = this_max_nunits;
1209 node = vect_create_new_slp_node (stmts);
1210 loads->safe_push (node);
1211 return node;
ebfd146a
IR
1212 }
1213
6983e6b5
RB
1214 /* Get at the operands, verifying they are compatible. */
1215 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1216 slp_oprnd_info oprnd_info;
e403d17e 1217 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 1218 {
4cecd659 1219 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1220 stmts, i, &oprnds_info);
4cecd659
BC
1221 if (res != 0)
1222 matches[(res == -1) ? 0 : i] = false;
1223 if (!matches[0])
1224 break;
6983e6b5 1225 }
b0b4483e
RB
1226 for (i = 0; i < group_size; ++i)
1227 if (!matches[i])
1228 {
1229 vect_free_oprnd_info (oprnds_info);
e403d17e 1230 return NULL;
b0b4483e 1231 }
6983e6b5 1232
e403d17e
RB
1233 auto_vec<slp_tree, 4> children;
1234 auto_vec<slp_tree> this_loads;
1235
1236 stmt = stmts[0];
6983e6b5 1237
26d66f28
RB
1238 if (tree_size)
1239 max_tree_size -= *tree_size;
1240
b8698a0f 1241 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1242 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1243 {
d092494c 1244 slp_tree child;
e403d17e
RB
1245 unsigned old_nloads = this_loads.length ();
1246 unsigned old_tree_size = this_tree_size;
1247 unsigned int j;
b8698a0f 1248
e7baeb39 1249 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1250 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1251 && oprnd_info->first_dt != vect_induction_def)
d092494c 1252 continue;
ebfd146a 1253
1428105c
RB
1254 if (++this_tree_size > max_tree_size)
1255 {
e403d17e
RB
1256 FOR_EACH_VEC_ELT (children, j, child)
1257 vect_free_slp_tree (child);
1428105c 1258 vect_free_oprnd_info (oprnds_info);
e403d17e 1259 return NULL;
1428105c
RB
1260 }
1261
e403d17e
RB
1262 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1263 group_size, &this_max_nunits,
1264 &this_loads, matches, npermutes,
1265 &this_tree_size,
1266 max_tree_size)) != NULL)
6983e6b5 1267 {
3fc356dc
RB
1268 /* If we have all children of child built up from scalars then just
1269 throw that away and build it up this node from scalars. */
995b6fe0
RB
1270 if (!SLP_TREE_CHILDREN (child).is_empty ()
1271 /* ??? Rejecting patterns this way doesn't work. We'd have to
1272 do extra work to cancel the pattern so the uses see the
1273 scalar version. */
1274 && !is_pattern_stmt_p
1275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1276 {
3fc356dc
RB
1277 slp_tree grandchild;
1278
1279 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1280 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1281 break;
1282 if (!grandchild)
1283 {
1284 /* Roll back. */
e403d17e
RB
1285 this_loads.truncate (old_nloads);
1286 this_tree_size = old_tree_size;
3fc356dc 1287 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1288 vect_free_slp_tree (grandchild);
3fc356dc
RB
1289 SLP_TREE_CHILDREN (child).truncate (0);
1290
1291 dump_printf_loc (MSG_NOTE, vect_location,
1292 "Building parent vector operands from "
1293 "scalars instead\n");
1294 oprnd_info->def_stmts = vNULL;
603cca93 1295 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1296 children.safe_push (child);
3fc356dc
RB
1297 continue;
1298 }
1299 }
1300
6983e6b5 1301 oprnd_info->def_stmts = vNULL;
e403d17e 1302 children.safe_push (child);
6983e6b5
RB
1303 continue;
1304 }
1305
90dd6e3d
RB
1306 /* If the SLP build failed fatally and we analyze a basic-block
1307 simply treat nodes we fail to build as externally defined
1308 (and thus build vectors from the scalar defs).
1309 The cost model will reject outright expensive cases.
1310 ??? This doesn't treat cases where permutation ultimatively
1311 fails (or we don't try permutation below). Ideally we'd
1312 even compute a permutation that will end up with the maximum
1313 SLP tree size... */
310213d4 1314 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1315 && !matches[0]
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1318 scalar version. */
1319 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1320 {
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "Building vector operands from scalars\n");
e403d17e 1323 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1324 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1325 children.safe_push (child);
1326 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1327 continue;
1328 }
1329
6983e6b5
RB
1330 /* If the SLP build for operand zero failed and operand zero
1331 and one can be commutated try that for the scalar stmts
1332 that failed the match. */
1333 if (i == 0
1334 /* A first scalar stmt mismatch signals a fatal mismatch. */
1335 && matches[0]
1336 /* ??? For COND_EXPRs we can swap the comparison operands
1337 as well as the arms under some constraints. */
1338 && nops == 2
1339 && oprnds_info[1]->first_dt == vect_internal_def
1340 && is_gimple_assign (stmt)
6983e6b5
RB
1341 /* Do so only if the number of not successful permutes was nor more
1342 than a cut-ff as re-trying the recursive match on
1343 possibly each level of the tree would expose exponential
1344 behavior. */
1345 && *npermutes < 4)
1346 {
85c5e2f5
RB
1347 /* See whether we can swap the matching or the non-matching
1348 stmt operands. */
1349 bool swap_not_matching = true;
1350 do
1351 {
1352 for (j = 0; j < group_size; ++j)
1353 {
1354 if (matches[j] != !swap_not_matching)
1355 continue;
1356 gimple *stmt = stmts[j];
1357 /* Verify if we can swap operands of this stmt. */
1358 if (!is_gimple_assign (stmt)
1359 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1360 {
1361 if (!swap_not_matching)
1362 goto fail;
1363 swap_not_matching = false;
1364 break;
1365 }
1366 /* Verify if we can safely swap or if we committed to a
1367 specific operand order already.
1368 ??? Instead of modifying GIMPLE stmts here we could
1369 record whether we want to swap operands in the SLP
1370 node and temporarily do that when processing it
1371 (or wrap operand accessors in a helper). */
1372 else if (swap[j] != 0
1373 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1374 {
1375 if (!swap_not_matching)
1376 {
1377 if (dump_enabled_p ())
1378 {
1379 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1380 vect_location,
1381 "Build SLP failed: cannot swap "
1382 "operands of shared stmt ");
1383 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1384 TDF_SLIM, stmts[j], 0);
1385 }
1386 goto fail;
1387 }
1388 swap_not_matching = false;
1389 break;
1390 }
1391 }
1392 }
1393 while (j != group_size);
78810bd3 1394
6983e6b5 1395 /* Swap mismatched definition stmts. */
b0b4483e
RB
1396 dump_printf_loc (MSG_NOTE, vect_location,
1397 "Re-trying with swapped operands of stmts ");
e72baed7 1398 for (j = 0; j < group_size; ++j)
85c5e2f5 1399 if (matches[j] == !swap_not_matching)
6983e6b5 1400 {
6b4db501
MM
1401 std::swap (oprnds_info[0]->def_stmts[j],
1402 oprnds_info[1]->def_stmts[j]);
b0b4483e 1403 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1404 }
b0b4483e 1405 dump_printf (MSG_NOTE, "\n");
74574669
RB
1406 /* And try again with scratch 'matches' ... */
1407 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1408 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1409 group_size, &this_max_nunits,
1410 &this_loads, tem, npermutes,
1411 &this_tree_size,
1412 max_tree_size)) != NULL)
6983e6b5 1413 {
60f2b864
RB
1414 /* ... so if successful we can apply the operand swapping
1415 to the GIMPLE IL. This is necessary because for example
1416 vect_get_slp_defs uses operand indexes and thus expects
1417 canonical operand order. This is also necessary even
1418 if we end up building the operand from scalars as
1419 we'll continue to process swapped operand two. */
1420 for (j = 0; j < group_size; ++j)
f47cda24 1421 {
e403d17e 1422 gimple *stmt = stmts[j];
f47cda24
RB
1423 gimple_set_plf (stmt, GF_PLF_1, false);
1424 }
1425 for (j = 0; j < group_size; ++j)
1426 {
e403d17e 1427 gimple *stmt = stmts[j];
85c5e2f5 1428 if (matches[j] == !swap_not_matching)
f47cda24
RB
1429 {
1430 /* Avoid swapping operands twice. */
1431 if (gimple_plf (stmt, GF_PLF_1))
1432 continue;
1433 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1434 gimple_assign_rhs2_ptr (stmt));
1435 gimple_set_plf (stmt, GF_PLF_1, true);
1436 }
1437 }
1438 /* Verify we swap all duplicates or none. */
1439 if (flag_checking)
1440 for (j = 0; j < group_size; ++j)
60f2b864 1441 {
e403d17e 1442 gimple *stmt = stmts[j];
85c5e2f5
RB
1443 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1444 == (matches[j] == !swap_not_matching));
60f2b864
RB
1445 }
1446
85c69b0b
RB
1447 /* If we have all children of child built up from scalars then
1448 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1449 if (!SLP_TREE_CHILDREN (child).is_empty ()
1450 /* ??? Rejecting patterns this way doesn't work. We'd have
1451 to do extra work to cancel the pattern so the uses see the
1452 scalar version. */
1453 && !is_pattern_stmt_p
1454 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1455 {
1456 unsigned int j;
1457 slp_tree grandchild;
1458
1459 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1460 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1461 break;
1462 if (!grandchild)
1463 {
1464 /* Roll back. */
e403d17e
RB
1465 this_loads.truncate (old_nloads);
1466 this_tree_size = old_tree_size;
85c69b0b
RB
1467 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1468 vect_free_slp_tree (grandchild);
1469 SLP_TREE_CHILDREN (child).truncate (0);
1470
1471 dump_printf_loc (MSG_NOTE, vect_location,
1472 "Building parent vector operands from "
1473 "scalars instead\n");
1474 oprnd_info->def_stmts = vNULL;
603cca93 1475 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1476 children.safe_push (child);
85c69b0b
RB
1477 continue;
1478 }
1479 }
1480
6983e6b5 1481 oprnd_info->def_stmts = vNULL;
e403d17e 1482 children.safe_push (child);
6983e6b5
RB
1483 continue;
1484 }
1485
1486 ++*npermutes;
1487 }
1488
78810bd3 1489fail:
e403d17e
RB
1490 gcc_assert (child == NULL);
1491 FOR_EACH_VEC_ELT (children, j, child)
1492 vect_free_slp_tree (child);
6983e6b5 1493 vect_free_oprnd_info (oprnds_info);
e403d17e 1494 return NULL;
ebfd146a
IR
1495 }
1496
e403d17e
RB
1497 vect_free_oprnd_info (oprnds_info);
1498
1428105c
RB
1499 if (tree_size)
1500 *tree_size += this_tree_size;
e403d17e
RB
1501 *max_nunits = this_max_nunits;
1502 loads->safe_splice (this_loads);
1428105c 1503
e403d17e
RB
1504 node = vect_create_new_slp_node (stmts);
1505 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1506 SLP_TREE_CHILDREN (node).splice (children);
1507 return node;
ebfd146a
IR
1508}
1509
78c60e3d 1510/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1511
1512static void
1a817418 1513vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1514{
1515 int i;
355fe088 1516 gimple *stmt;
d755c7ef 1517 slp_tree child;
ebfd146a 1518
603cca93
RB
1519 dump_printf_loc (dump_kind, loc, "node%s\n",
1520 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1521 ? " (external)" : "");
9771b263 1522 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1523 {
c2a12ca0 1524 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1525 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1526 }
9771b263 1527 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1528 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1529}
1530
1531
b8698a0f
L
1532/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1533 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1534 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1535 stmts in NODE are to be marked. */
1536
1537static void
1538vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1539{
1540 int i;
355fe088 1541 gimple *stmt;
d755c7ef 1542 slp_tree child;
ebfd146a 1543
603cca93 1544 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1545 return;
1546
9771b263 1547 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1548 if (j < 0 || i == j)
1549 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1550
9771b263 1551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1552 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1553}
1554
1555
a70d6342
IR
1556/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1557
1558static void
1559vect_mark_slp_stmts_relevant (slp_tree node)
1560{
1561 int i;
355fe088 1562 gimple *stmt;
a70d6342 1563 stmt_vec_info stmt_info;
d755c7ef 1564 slp_tree child;
a70d6342 1565
603cca93 1566 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1567 return;
1568
9771b263 1569 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1570 {
1571 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1572 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1573 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1574 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1575 }
1576
9771b263 1577 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1578 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1579}
1580
1581
b5aeb3bb
IR
1582/* Rearrange the statements of NODE according to PERMUTATION. */
1583
1584static void
1585vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1586 vec<unsigned> permutation)
b5aeb3bb 1587{
355fe088
TS
1588 gimple *stmt;
1589 vec<gimple *> tmp_stmts;
d755c7ef
RB
1590 unsigned int i;
1591 slp_tree child;
b5aeb3bb 1592
9771b263 1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1594 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1595
9771b263
DN
1596 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1597 tmp_stmts.create (group_size);
d755c7ef 1598 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1599
9771b263 1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1601 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1602
9771b263 1603 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1604 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1605}
1606
1607
b266b968
RB
1608/* Attempt to reorder stmts in a reduction chain so that we don't
1609 require any load permutation. Return true if that was possible,
1610 otherwise return false. */
1611
1612static bool
1613vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1614{
1615 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1616 unsigned int i, j;
b266b968
RB
1617 unsigned int lidx;
1618 slp_tree node, load;
1619
1620 /* Compare all the permutation sequences to the first one. We know
1621 that at least one load is permuted. */
1622 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1623 if (!node->load_permutation.exists ())
1624 return false;
1625 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1626 {
1627 if (!load->load_permutation.exists ())
1628 return false;
1629 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1630 if (lidx != node->load_permutation[j])
1631 return false;
1632 }
1633
1634 /* Check that the loads in the first sequence are different and there
1635 are no gaps between them. */
7ba9e72d 1636 auto_sbitmap load_index (group_size);
b266b968
RB
1637 bitmap_clear (load_index);
1638 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1639 {
41eefe13 1640 if (lidx >= group_size)
7ba9e72d 1641 return false;
b266b968 1642 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1643 return false;
1644
b266b968
RB
1645 bitmap_set_bit (load_index, lidx);
1646 }
1647 for (i = 0; i < group_size; i++)
1648 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1649 return false;
b266b968
RB
1650
1651 /* This permutation is valid for reduction. Since the order of the
1652 statements in the nodes is not important unless they are memory
1653 accesses, we can rearrange the statements in all the nodes
1654 according to the order of the loads. */
1655 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1656 node->load_permutation);
1657
1658 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1659 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1660 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1661 {
1662 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2c53b149 1663 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
c4e360f4
RB
1664 /* But we have to keep those permutations that are required because
1665 of handling of gaps. */
d9f21f6a 1666 if (known_eq (unrolling_factor, 1U)
2c53b149
RB
1667 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1668 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
c4e360f4 1669 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1670 else
1671 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1672 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1673 }
1674
b266b968
RB
1675 return true;
1676}
1677
01d8bf07
RB
1678/* Check if the required load permutations in the SLP instance
1679 SLP_INSTN are supported. */
ebfd146a
IR
1680
1681static bool
01d8bf07 1682vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1683{
01d8bf07
RB
1684 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1685 unsigned int i, j, k, next;
6983e6b5 1686 slp_tree node;
a5b50aa1 1687 gimple *stmt, *load, *next_load;
ebfd146a 1688
73fbfcad 1689 if (dump_enabled_p ())
ebfd146a 1690 {
78c60e3d 1691 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1693 if (node->load_permutation.exists ())
1694 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1695 dump_printf (MSG_NOTE, "%d ", next);
1696 else
bddc974e
TJ
1697 for (k = 0; k < group_size; ++k)
1698 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1699 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1700 }
1701
b5aeb3bb
IR
1702 /* In case of reduction every load permutation is allowed, since the order
1703 of the reduction statements is not important (as opposed to the case of
0d0293ac 1704 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1705 load nodes are of the same size and have the same permutation (and then
1706 rearrange all the nodes of the SLP instance according to this
1707 permutation). */
1708
1709 /* Check that all the load nodes are of the same size. */
01d8bf07 1710 /* ??? Can't we assert this? */
9771b263 1711 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1712 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1713 return false;
2200fc49 1714
b5aeb3bb 1715 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1716 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1717
b010117a 1718 /* Reduction (there are no data-refs in the root).
b266b968 1719 In reduction chain the order of the loads is not important. */
b010117a 1720 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
2c53b149 1721 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1722 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1723
6aa904c4
IR
1724 /* In basic block vectorization we allow any subchain of an interleaving
1725 chain.
1726 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1727 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1728 {
240a94da
RB
1729 /* Check whether the loads in an instance form a subchain and thus
1730 no permutation is necessary. */
9771b263 1731 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1732 {
9626d143
RB
1733 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1734 continue;
240a94da 1735 bool subchain_p = true;
6aa904c4 1736 next_load = NULL;
9771b263 1737 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1738 {
5b5826c4
RB
1739 if (j != 0
1740 && (next_load != load
2c53b149 1741 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1742 {
1743 subchain_p = false;
1744 break;
1745 }
2c53b149 1746 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
6aa904c4 1747 }
240a94da
RB
1748 if (subchain_p)
1749 SLP_TREE_LOAD_PERMUTATION (node).release ();
1750 else
1751 {
fe73a332
RB
1752 stmt_vec_info group_info
1753 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2c53b149 1754 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
928686b1 1755 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1756 unsigned k, maxk = 0;
1757 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1758 if (k > maxk)
1759 maxk = k;
1760 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1761 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1762 tree vectype = STMT_VINFO_VECTYPE (group_info);
1763 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1764 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1765 {
1766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1767 "BB vectorization with gaps at the end of "
1768 "a load is not supported\n");
1769 return false;
1770 }
1771
240a94da
RB
1772 /* Verify the permutation can be generated. */
1773 vec<tree> tem;
29afecdf 1774 unsigned n_perms;
240a94da 1775 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1776 1, slp_instn, true, &n_perms))
240a94da
RB
1777 {
1778 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1779 vect_location,
1780 "unsupported load permutation\n");
1781 return false;
1782 }
1783 }
6aa904c4 1784 }
01d8bf07 1785 return true;
6aa904c4
IR
1786 }
1787
31bee964
RB
1788 /* For loop vectorization verify we can generate the permutation. Be
1789 conservative about the vectorization factor, there are permutations
1790 that will use three vector inputs only starting from a specific factor
1791 and the vectorization factor is not yet final.
1792 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1793 unsigned n_perms;
d9f21f6a
RS
1794 poly_uint64 test_vf
1795 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1796 LOOP_VINFO_VECT_FACTOR
d9f21f6a 1797 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
01d8bf07
RB
1798 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1799 if (node->load_permutation.exists ()
31bee964
RB
1800 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1801 slp_instn, true, &n_perms))
01d8bf07 1802 return false;
9b999e8c 1803
01d8bf07 1804 return true;
ebfd146a
IR
1805}
1806
1807
e4a707c4 1808/* Find the last store in SLP INSTANCE. */
ff802fa1 1809
64900538 1810gimple *
2e8ab70c 1811vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1812{
355fe088 1813 gimple *last = NULL, *stmt;
e4a707c4 1814
2e8ab70c
RB
1815 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1816 {
1817 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1818 if (is_pattern_stmt_p (stmt_vinfo))
1819 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1820 else
1821 last = get_later_stmt (stmt, last);
1822 }
e4a707c4 1823
2e8ab70c 1824 return last;
e4a707c4
IR
1825}
1826
1ba91a49
AL
1827/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1828 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1829 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1830 containing the remainder.
1831 Return the first stmt in the second group. */
1832
1833static gimple *
1834vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1835{
1836 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
2c53b149 1837 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1ba91a49 1838 gcc_assert (group1_size > 0);
2c53b149 1839 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1840 gcc_assert (group2_size > 0);
2c53b149 1841 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49
AL
1842
1843 gimple *stmt = first_stmt;
1844 for (unsigned i = group1_size; i > 1; i--)
1845 {
2c53b149
RB
1846 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1847 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1848 }
1849 /* STMT is now the last element of the first group. */
2c53b149
RB
1850 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1851 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1ba91a49 1852
2c53b149
RB
1853 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1854 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1ba91a49 1855 {
2c53b149
RB
1856 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1857 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1858 }
1859
2c53b149 1860 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1861 plus skipping over the first vector. */
2c53b149
RB
1862 DR_GROUP_GAP (vinfo_for_stmt (group2))
1863 = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1864
2c53b149
RB
1865 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1866 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1867
1868 if (dump_enabled_p ())
1869 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1870 group1_size, group2_size);
1871
1872 return group2;
1873}
1874
4b6068ea
RS
1875/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1876 statements and a vector of NUNITS elements. */
1877
1878static poly_uint64
1879calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1880{
1881 return exact_div (common_multiple (nunits, group_size), group_size);
1882}
1883
0d0293ac 1884/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1885 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1886 Return FALSE if it's impossible to SLP any stmt in the loop. */
1887
1888static bool
310213d4 1889vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1890 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1891{
1892 slp_instance new_instance;
d092494c 1893 slp_tree node;
2c53b149 1894 unsigned int group_size;
b5aeb3bb 1895 tree vectype, scalar_type = NULL_TREE;
355fe088 1896 gimple *next;
1ba91a49 1897 unsigned int i;
9771b263 1898 vec<slp_tree> loads;
b5aeb3bb 1899 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1900 vec<gimple *> scalar_stmts;
b5aeb3bb 1901
2c53b149 1902 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
b5aeb3bb 1903 {
2c53b149
RB
1904 scalar_type = TREE_TYPE (DR_REF (dr));
1905 vectype = get_vectype_for_scalar_type (scalar_type);
1906 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1907 }
1908 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1909 {
1910 gcc_assert (is_a <loop_vec_info> (vinfo));
1911 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1912 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1913 }
1914 else
1915 {
310213d4 1916 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1917 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1918 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1919 }
b8698a0f 1920
ebfd146a
IR
1921 if (!vectype)
1922 {
73fbfcad 1923 if (dump_enabled_p ())
ebfd146a 1924 {
78c60e3d
SS
1925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1926 "Build SLP failed: unsupported data-type ");
1927 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1928 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1929 }
b5aeb3bb 1930
ebfd146a
IR
1931 return false;
1932 }
4b6068ea 1933 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1934
0d0293ac 1935 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1936 scalar_stmts.create (group_size);
ebfd146a 1937 next = stmt;
2c53b149 1938 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a 1939 {
b5aeb3bb
IR
1940 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1941 while (next)
1942 {
f7e531cf
IR
1943 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1944 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1945 scalar_stmts.safe_push (
1946 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1947 else
9771b263 1948 scalar_stmts.safe_push (next);
2c53b149
RB
1949 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1950 }
1951 }
1952 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1953 {
1954 /* Collect the reduction stmts and store them in
1955 SLP_TREE_SCALAR_STMTS. */
1956 while (next)
1957 {
1958 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1959 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1960 scalar_stmts.safe_push (
1961 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1962 else
1963 scalar_stmts.safe_push (next);
1964 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1965 }
14a61437
RB
1966 /* Mark the first element of the reduction chain as reduction to properly
1967 transform the node. In the reduction analysis phase only the last
1968 element of the chain is marked as reduction. */
2c53b149 1969 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1970 }
1971 else
1972 {
1973 /* Collect reduction statements. */
310213d4 1974 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
1975 for (i = 0; reductions.iterate (i, &next); i++)
1976 scalar_stmts.safe_push (next);
ebfd146a
IR
1977 }
1978
9771b263 1979 loads.create (group_size);
ebfd146a
IR
1980
1981 /* Build the tree for the SLP instance. */
89d390e5
RB
1982 bool *matches = XALLOCAVEC (bool, group_size);
1983 unsigned npermutes = 0;
f7300fff 1984 bst_fail = new scalar_stmts_set_t ();
4b6068ea 1985 poly_uint64 max_nunits = nunits;
e569db5f 1986 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 1987 &max_nunits, &loads, matches, &npermutes,
e569db5f 1988 NULL, max_tree_size);
26d66f28 1989 delete bst_fail;
e569db5f 1990 if (node != NULL)
ebfd146a 1991 {
4ef69dfc 1992 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 1993 poly_uint64 unrolling_factor
4b6068ea 1994 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 1995
d9f21f6a 1996 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
1997 && is_a <bb_vec_info> (vinfo))
1998 {
4b6068ea
RS
1999 unsigned HOST_WIDE_INT const_max_nunits;
2000 if (!max_nunits.is_constant (&const_max_nunits)
2001 || const_max_nunits > group_size)
2002 {
2003 if (dump_enabled_p ())
2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2005 "Build SLP failed: store group "
2006 "size not a multiple of the vector size "
2007 "in basic block SLP\n");
2008 vect_free_slp_tree (node);
2009 loads.release ();
2010 return false;
2011 }
e569db5f 2012 /* Fatal mismatch. */
4b6068ea 2013 matches[group_size / const_max_nunits * const_max_nunits] = false;
e569db5f
VK
2014 vect_free_slp_tree (node);
2015 loads.release ();
2016 }
2017 else
2018 {
4ef69dfc
IR
2019 /* Create a new SLP instance. */
2020 new_instance = XNEW (struct _slp_instance);
2021 SLP_INSTANCE_TREE (new_instance) = node;
2022 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2023 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2024 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2025
2026 /* Compute the load permutation. */
2027 slp_tree load_node;
2028 bool loads_permuted = false;
abf9bfbc
RB
2029 FOR_EACH_VEC_ELT (loads, i, load_node)
2030 {
01d8bf07 2031 vec<unsigned> load_permutation;
abf9bfbc 2032 int j;
355fe088 2033 gimple *load, *first_stmt;
01d8bf07
RB
2034 bool this_load_permuted = false;
2035 load_permutation.create (group_size);
2c53b149 2036 first_stmt = DR_GROUP_FIRST_ELEMENT
6983e6b5 2037 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
2038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2039 {
e569db5f
VK
2040 int load_place = vect_get_place_in_interleaving_chain
2041 (load, first_stmt);
6983e6b5
RB
2042 gcc_assert (load_place != -1);
2043 if (load_place != j)
01d8bf07 2044 this_load_permuted = true;
abf9bfbc
RB
2045 load_permutation.safe_push (load_place);
2046 }
fe2bef71
RB
2047 if (!this_load_permuted
2048 /* The load requires permutation when unrolling exposes
2049 a gap either because the group is larger than the SLP
2050 group-size or because there is a gap between the groups. */
d9f21f6a 2051 && (known_eq (unrolling_factor, 1U)
2c53b149
RB
2052 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2053 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2054 {
2055 load_permutation.release ();
2056 continue;
2057 }
2058 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2059 loads_permuted = true;
abf9bfbc 2060 }
6aa904c4
IR
2061
2062 if (loads_permuted)
ebfd146a 2063 {
01d8bf07 2064 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2065 {
73fbfcad 2066 if (dump_enabled_p ())
ebfd146a 2067 {
e645e942 2068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2069 "Build SLP failed: unsupported load "
2070 "permutation ");
e569db5f
VK
2071 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2072 TDF_SLIM, stmt, 0);
ebfd146a 2073 }
ebfd146a
IR
2074 vect_free_slp_instance (new_instance);
2075 return false;
2076 }
ebfd146a 2077 }
ebfd146a 2078
e569db5f 2079 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2080 instructions do not generate this SLP instance. */
2081 if (is_a <loop_vec_info> (vinfo)
2082 && loads_permuted
7e11fc7f 2083 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
2084 {
2085 slp_tree load_node;
2086 FOR_EACH_VEC_ELT (loads, i, load_node)
2087 {
2c53b149 2088 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
bb0f5ca7
AL
2089 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2090 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2091 /* Use SLP for strided accesses (or if we
2092 can't load-lanes). */
bb0f5ca7
AL
2093 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2094 || ! vect_load_lanes_supported
2095 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 2096 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
2097 break;
2098 }
2099 if (i == loads.length ())
2100 {
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2103 "Built SLP cancelled: can use "
2104 "load/store-lanes\n");
2105 vect_free_slp_instance (new_instance);
2106 return false;
2107 }
2108 }
2109
310213d4 2110 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2111
73fbfcad 2112 if (dump_enabled_p ())
c2a12ca0
RB
2113 {
2114 dump_printf_loc (MSG_NOTE, vect_location,
2115 "Final SLP tree for instance:\n");
2116 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2117 }
ebfd146a
IR
2118
2119 return true;
2120 }
e569db5f
VK
2121 }
2122 else
2123 {
ebfd146a
IR
2124 /* Failed to SLP. */
2125 /* Free the allocated memory. */
e403d17e 2126 scalar_stmts.release ();
9771b263 2127 loads.release ();
e569db5f 2128 }
b8698a0f 2129
1ba91a49 2130 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2131 vector size. */
4b6068ea 2132 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2133 if (is_a <bb_vec_info> (vinfo)
4b6068ea 2134 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2c53b149 2135 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4b6068ea 2136 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2137 {
2138 /* We consider breaking the group only on VF boundaries from the existing
2139 start. */
2140 for (i = 0; i < group_size; i++)
2141 if (!matches[i]) break;
2142
4b6068ea 2143 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2144 {
2145 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2146 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2147 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49
AL
2148
2149 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2150 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2151 /* If the first non-match was in the middle of a vector,
2152 skip the rest of that vector. */
2153 if (group1_size < i)
2154 {
4b6068ea 2155 i = group1_size + const_nunits;
1ba91a49 2156 if (i < group_size)
4b6068ea 2157 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2158 }
2159 if (i < group_size)
2160 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2161 return res;
2162 }
2163 /* Even though the first vector did not all match, we might be able to SLP
2164 (some) of the remainder. FORNOW ignore this possibility. */
2165 }
2166
a70d6342 2167 return false;
ebfd146a
IR
2168}
2169
2170
ff802fa1 2171/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2172 trees of packed scalar stmts if SLP is possible. */
2173
2174bool
310213d4 2175vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2176{
2177 unsigned int i;
355fe088 2178 gimple *first_element;
ebfd146a 2179
73fbfcad 2180 if (dump_enabled_p ())
e645e942 2181 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 2182
0d0293ac 2183 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2184 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2185 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2186
310213d4 2187 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2188 {
310213d4
RB
2189 if (loop_vinfo->reduction_chains.length () > 0)
2190 {
2191 /* Find SLP sequences starting from reduction chains. */
2192 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2193 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2194 max_tree_size))
6b5e165b
RB
2195 {
2196 /* Dissolve reduction chain group. */
2197 gimple *next, *stmt = first_element;
2198 while (stmt)
2199 {
2200 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2c53b149
RB
2201 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2202 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2203 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
6b5e165b
RB
2204 stmt = next;
2205 }
2206 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2207 = vect_internal_def;
2208 }
310213d4 2209 }
b010117a 2210
310213d4 2211 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2212 if (loop_vinfo->reductions.length () > 1)
2213 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2214 max_tree_size);
310213d4 2215 }
b5aeb3bb 2216
ebfd146a
IR
2217 return true;
2218}
2219
2220
2221/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2222 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2223 least one instance. */
ebfd146a 2224
437f4a00 2225bool
ebfd146a
IR
2226vect_make_slp_decision (loop_vec_info loop_vinfo)
2227{
d9f21f6a
RS
2228 unsigned int i;
2229 poly_uint64 unrolling_factor = 1;
9771b263 2230 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2231 slp_instance instance;
2232 int decided_to_slp = 0;
2233
73fbfcad 2234 if (dump_enabled_p ())
e645e942
TJ
2235 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2236 "\n");
ebfd146a 2237
9771b263 2238 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2239 {
2240 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2241 /* All unroll factors have the form current_vector_size * X for some
2242 rational X, so they must have a common multiple. */
2243 unrolling_factor
2244 = force_common_multiple (unrolling_factor,
2245 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2246
ff802fa1 2247 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2248 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2249 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2250 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2251 decided_to_slp++;
2252 }
2253
2254 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2255
73fbfcad 2256 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2257 {
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "Decided to SLP %d instances. Unrolling factor ",
2260 decided_to_slp);
2261 dump_dec (MSG_NOTE, unrolling_factor);
2262 dump_printf (MSG_NOTE, "\n");
2263 }
437f4a00
IR
2264
2265 return (decided_to_slp > 0);
ebfd146a
IR
2266}
2267
2268
2269/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2270 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2271
2272static void
642fce57 2273vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2274{
355fe088 2275 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2276 imm_use_iterator imm_iter;
355fe088 2277 gimple *use_stmt;
642fce57 2278 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2279 slp_tree child;
f2c74cc4 2280 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2281 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2282 int j;
2283
2284 /* Propagate hybrid down the SLP tree. */
2285 if (stype == hybrid)
2286 ;
2287 else if (HYBRID_SLP_STMT (stmt_vinfo))
2288 stype = hybrid;
2289 else
2290 {
2291 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2292 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2293 /* If we get a pattern stmt here we have to use the LHS of the
2294 original stmt for immediate uses. */
2295 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2296 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2297 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2298 tree def;
2299 if (gimple_code (stmt) == GIMPLE_PHI)
2300 def = gimple_phi_result (stmt);
2301 else
2302 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2303 if (def)
2304 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870
RB
2305 {
2306 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2307 continue;
2308 use_vinfo = vinfo_for_stmt (use_stmt);
2309 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2310 && STMT_VINFO_RELATED_STMT (use_vinfo))
2311 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2312 if (!STMT_SLP_TYPE (use_vinfo)
2313 && (STMT_VINFO_RELEVANT (use_vinfo)
2314 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2315 && !(gimple_code (use_stmt) == GIMPLE_PHI
2316 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2317 {
2318 if (dump_enabled_p ())
2319 {
2320 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2321 "def in non-SLP stmt: ");
2322 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2323 }
2324 stype = hybrid;
2325 }
29764870 2326 }
642fce57 2327 }
ebfd146a 2328
502f0263
RB
2329 if (stype == hybrid
2330 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2331 {
2332 if (dump_enabled_p ())
2333 {
2334 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2336 }
2337 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2338 }
ebfd146a 2339
642fce57 2340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2341 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2342 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2343}
f2c74cc4 2344
642fce57 2345/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2346
642fce57
RB
2347static tree
2348vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2349{
2350 walk_stmt_info *wi = (walk_stmt_info *)data;
2351 struct loop *loopp = (struct loop *)wi->info;
2352
2353 if (wi->is_lhs)
2354 return NULL_TREE;
2355
2356 if (TREE_CODE (*tp) == SSA_NAME
2357 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2358 {
355fe088 2359 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2360 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2361 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2362 {
2363 if (dump_enabled_p ())
2364 {
2365 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2366 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2367 }
2368 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2369 }
642fce57
RB
2370 }
2371
2372 return NULL_TREE;
ebfd146a
IR
2373}
2374
642fce57
RB
2375static tree
2376vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2377 walk_stmt_info *)
2378{
9186a9d3 2379 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
642fce57
RB
2380 /* If the stmt is in a SLP instance then this isn't a reason
2381 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2382 if (! STMT_SLP_TYPE (use_vinfo)
2383 && (STMT_VINFO_RELEVANT (use_vinfo)
2384 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2385 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2386 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2387 ;
2388 else
642fce57
RB
2389 *handled = true;
2390 return NULL_TREE;
2391}
ebfd146a
IR
2392
2393/* Find stmts that must be both vectorized and SLPed. */
2394
2395void
2396vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2397{
2398 unsigned int i;
9771b263 2399 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2400 slp_instance instance;
2401
73fbfcad 2402 if (dump_enabled_p ())
e645e942
TJ
2403 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2404 "\n");
ebfd146a 2405
642fce57
RB
2406 /* First walk all pattern stmt in the loop and mark defs of uses as
2407 hybrid because immediate uses in them are not recorded. */
2408 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2409 {
2410 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2411 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2412 gsi_next (&gsi))
2413 {
355fe088 2414 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2416 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2417 {
2418 walk_stmt_info wi;
2419 memset (&wi, 0, sizeof (wi));
2420 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2421 gimple_stmt_iterator gsi2
2422 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2423 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2424 vect_detect_hybrid_slp_1, &wi);
2425 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2426 vect_detect_hybrid_slp_2,
2427 vect_detect_hybrid_slp_1, &wi);
2428 }
2429 }
2430 }
2431
2432 /* Then walk the SLP instance trees marking stmts with uses in
2433 non-SLP stmts as hybrid, also propagating hybrid down the
2434 SLP tree, collecting the above info on-the-fly. */
9771b263 2435 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2436 {
2437 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2438 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2439 i, pure_slp);
2440 }
ebfd146a
IR
2441}
2442
a70d6342 2443
2c515559
RS
2444/* Initialize a bb_vec_info struct for the statements between
2445 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2446
2447_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2448 gimple_stmt_iterator region_end_in)
2449 : vec_info (vec_info::bb, init_cost (NULL)),
2450 bb (gsi_bb (region_begin_in)),
2451 region_begin (region_begin_in),
2452 region_end (region_end_in)
a70d6342 2453{
a70d6342
IR
2454 gimple_stmt_iterator gsi;
2455
61d371eb
RB
2456 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2457 gsi_next (&gsi))
a70d6342 2458 {
355fe088 2459 gimple *stmt = gsi_stmt (gsi);
a70d6342 2460 gimple_set_uid (stmt, 0);
2c515559 2461 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
a70d6342
IR
2462 }
2463
2c515559 2464 bb->aux = this;
a70d6342
IR
2465}
2466
2467
2468/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2469 stmts in the basic block. */
2470
2c515559 2471_bb_vec_info::~_bb_vec_info ()
a70d6342 2472{
2c515559
RS
2473 for (gimple_stmt_iterator si = region_begin;
2474 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2475 {
355fe088 2476 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2477 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2478
2479 if (stmt_info)
2480 /* Free stmt_vec_info. */
2481 free_stmt_vec_info (stmt);
61d371eb
RB
2482
2483 /* Reset region marker. */
2484 gimple_set_uid (stmt, -1);
a70d6342
IR
2485 }
2486
2c515559 2487 bb->aux = NULL;
a70d6342
IR
2488}
2489
15944069
RS
2490/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2491 given then that child nodes have already been processed, and that
2492 their def types currently match their SLP node's def type. */
a70d6342
IR
2493
2494static bool
15944069
RS
2495vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2496 slp_instance node_instance,
2497 stmt_vector_for_cost *cost_vec)
a70d6342 2498{
15944069 2499 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2500 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2501 gcc_assert (stmt_info);
2502 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2503
2504 /* For BB vectorization vector types are assigned here.
2505 Memory accesses already got their vector type assigned
2506 in vect_analyze_data_refs. */
2507 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2508 if (bb_vinfo
2509 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2510 {
1f3cb663
RS
2511 tree vectype, nunits_vectype;
2512 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2513 &nunits_vectype))
2514 /* We checked this when building the node. */
2515 gcc_unreachable ();
2516 if (vectype == boolean_type_node)
bd2f172f 2517 {
1f3cb663
RS
2518 vectype = vect_get_mask_type_for_stmt (stmt_info);
2519 if (!vectype)
2520 /* vect_get_mask_type_for_stmt has already explained the
2521 failure. */
2522 return false;
bd2f172f
RB
2523 }
2524
2525 gimple *sstmt;
15944069 2526 unsigned int i;
bd2f172f
RB
2527 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2528 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2529 }
2530
8b7e9dba
RS
2531 /* Calculate the number of vector statements to be created for the
2532 scalar stmts in this node. For SLP reductions it is equal to the
2533 number of vector statements in the children (which has already been
2534 calculated by the recursive call). Otherwise it is the number of
2c53b149 2535 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2536 VF divided by the number of elements in a vector. */
2c53b149
RB
2537 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2538 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2539 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2540 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2541 else
2542 {
d9f21f6a 2543 poly_uint64 vf;
8b7e9dba
RS
2544 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2545 vf = loop_vinfo->vectorization_factor;
2546 else
2547 vf = 1;
2548 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2549 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2550 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2551 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2552 }
2553
15944069
RS
2554 bool dummy;
2555 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2556}
2557
2558/* Analyze statements contained in SLP tree NODE after recursively analyzing
2559 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2560
2561 Return true if the operations are supported. */
2562
2563static bool
2564vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2565 slp_instance node_instance,
2566 scalar_stmts_to_slp_tree_map_t *visited,
2567 scalar_stmts_to_slp_tree_map_t *lvisited,
2568 stmt_vector_for_cost *cost_vec)
2569{
2570 int i, j;
2571 slp_tree child;
2572
2573 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2574 return true;
2575
2576 /* If we already analyzed the exact same set of scalar stmts we're done.
2577 We share the generated vector stmts for those. */
2578 slp_tree *leader;
2579 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2580 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2581 {
2582 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2583 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2584 return true;
2585 }
2586
2587 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2588 doesn't result in any issue since we throw away the lvisited set
2589 when we fail. */
2590 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2591
2592 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2593 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2594 visited, lvisited, cost_vec))
2595 return false;
2596
bd2f172f
RB
2597 /* Push SLP node def-type to stmt operands. */
2598 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2599 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2600 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2601 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2602 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2603 cost_vec);
bd2f172f
RB
2604 /* Restore def-types. */
2605 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2606 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2607 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2608 = vect_internal_def;
2609 if (! res)
2610 return false;
2611
2612 return true;
a70d6342
IR
2613}
2614
2615
8b7e9dba 2616/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2617 operations are supported. */
2618
a12e42fc 2619bool
8b7e9dba 2620vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2621{
a70d6342
IR
2622 slp_instance instance;
2623 int i;
2624
a12e42fc
RB
2625 if (dump_enabled_p ())
2626 dump_printf_loc (MSG_NOTE, vect_location,
2627 "=== vect_slp_analyze_operations ===\n");
2628
68435eb2
RB
2629 scalar_stmts_to_slp_tree_map_t *visited
2630 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2631 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2632 {
68435eb2
RB
2633 scalar_stmts_to_slp_tree_map_t lvisited;
2634 stmt_vector_for_cost cost_vec;
2635 cost_vec.create (2);
8b7e9dba
RS
2636 if (!vect_slp_analyze_node_operations (vinfo,
2637 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2638 instance, visited, &lvisited,
2639 &cost_vec))
a70d6342 2640 {
a12e42fc
RB
2641 dump_printf_loc (MSG_NOTE, vect_location,
2642 "removing SLP instance operations starting from: ");
2643 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2644 SLP_TREE_SCALAR_STMTS
2645 (SLP_INSTANCE_TREE (instance))[0], 0);
2646 vect_free_slp_instance (instance);
8b7e9dba 2647 vinfo->slp_instances.ordered_remove (i);
68435eb2 2648 cost_vec.release ();
a70d6342
IR
2649 }
2650 else
68435eb2
RB
2651 {
2652 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2653 x != lvisited.end(); ++x)
2654 visited->put ((*x).first.copy (), (*x).second);
2655 i++;
78604de0 2656
68435eb2
RB
2657 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2658 cost_vec.release ();
2659 }
2660 }
78604de0
RB
2661 delete visited;
2662
8b7e9dba 2663 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2664}
2665
6eddf228
RB
2666
2667/* Compute the scalar cost of the SLP node NODE and its children
2668 and return it. Do not account defs that are marked in LIFE and
2669 update LIFE according to uses of NODE. */
2670
a296d6d3 2671static void
292cba13 2672vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2673 slp_tree node, vec<bool, va_heap> *life,
2674 stmt_vector_for_cost *cost_vec)
6eddf228 2675{
6eddf228 2676 unsigned i;
355fe088 2677 gimple *stmt;
6eddf228
RB
2678 slp_tree child;
2679
2680 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2681 {
6eddf228
RB
2682 ssa_op_iter op_iter;
2683 def_operand_p def_p;
2684 stmt_vec_info stmt_info;
2685
ff4c81cc 2686 if ((*life)[i])
6eddf228
RB
2687 continue;
2688
2689 /* If there is a non-vectorized use of the defs then the scalar
2690 stmt is kept live in which case we do not account it or any
2691 required defs in the SLP children in the scalar cost. This
2692 way we make the vectorization more costly when compared to
2693 the scalar cost. */
2694 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2695 {
2696 imm_use_iterator use_iter;
355fe088 2697 gimple *use_stmt;
6eddf228 2698 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2699 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2700 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2701 use_stmt)
603cca93 2702 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2703 {
ff4c81cc 2704 (*life)[i] = true;
6eddf228
RB
2705 BREAK_FROM_IMM_USE_STMT (use_iter);
2706 }
2707 }
ff4c81cc 2708 if ((*life)[i])
6eddf228
RB
2709 continue;
2710
b555a2e4
RB
2711 /* Count scalar stmts only once. */
2712 if (gimple_visited_p (stmt))
2713 continue;
2714 gimple_set_visited (stmt, true);
2715
6eddf228 2716 stmt_info = vinfo_for_stmt (stmt);
a296d6d3 2717 vect_cost_for_stmt kind;
6eddf228
RB
2718 if (STMT_VINFO_DATA_REF (stmt_info))
2719 {
2720 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2721 kind = scalar_load;
6eddf228 2722 else
a296d6d3 2723 kind = scalar_store;
6eddf228
RB
2724 }
2725 else
a296d6d3
RB
2726 kind = scalar_stmt;
2727 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2728 }
2729
faa5399b 2730 auto_vec<bool, 20> subtree_life;
6eddf228 2731 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2732 {
2733 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2734 {
2735 /* Do not directly pass LIFE to the recursive call, copy it to
2736 confine changes in the callee to the current child/subtree. */
2737 subtree_life.safe_splice (*life);
a296d6d3 2738 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2739 subtree_life.truncate (0);
2740 }
2741 }
6eddf228
RB
2742}
2743
69f11a13
IR
2744/* Check if vectorization of the basic block is profitable. */
2745
2746static bool
2747vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2748{
9771b263 2749 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2750 slp_instance instance;
1a4b99c1 2751 int i;
c3e7ee41 2752 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2753 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2754
2755 /* Calculate scalar cost. */
a296d6d3
RB
2756 stmt_vector_for_cost scalar_costs;
2757 scalar_costs.create (0);
6eddf228 2758 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2759 {
00f96dc9 2760 auto_vec<bool, 20> life;
ff4c81cc 2761 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2762 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2763 SLP_INSTANCE_TREE (instance),
2764 &life, &scalar_costs);
2765 }
2766 void *target_cost_data = init_cost (NULL);
68435eb2 2767 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2768 scalar_costs.release ();
2769 unsigned dummy;
2770 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2771 destroy_cost_data (target_cost_data);
69f11a13 2772
b555a2e4
RB
2773 /* Unset visited flag. */
2774 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2775 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2776 gimple_set_visited (gsi_stmt (gsi), false);
2777
c3e7ee41 2778 /* Complete the target-specific cost calculation. */
92345349
BS
2779 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2780 &vec_inside_cost, &vec_epilogue_cost);
2781
2782 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2783
73fbfcad 2784 if (dump_enabled_p ())
69f11a13 2785 {
78c60e3d
SS
2786 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2787 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2788 vec_inside_cost);
2789 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2790 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2791 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2792 }
2793
a6524bba
RB
2794 /* Vectorization is profitable if its cost is more than the cost of scalar
2795 version. Note that we err on the vector side for equal cost because
2796 the cost estimate is otherwise quite pessimistic (constant uses are
2797 free on the scalar side but cost a load on the vector side for
2798 example). */
2799 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2800 return false;
2801
2802 return true;
2803}
2804
a5b50aa1
RB
2805/* Check if the basic block can be vectorized. Returns a bb_vec_info
2806 if so and sets fatal to true if failure is independent of
2807 current_vector_size. */
a70d6342 2808
8e19f5a1 2809static bb_vec_info
61d371eb
RB
2810vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2811 gimple_stmt_iterator region_end,
a5b50aa1
RB
2812 vec<data_reference_p> datarefs, int n_stmts,
2813 bool &fatal)
a70d6342
IR
2814{
2815 bb_vec_info bb_vinfo;
a70d6342 2816 slp_instance instance;
8e19f5a1 2817 int i;
d9f21f6a 2818 poly_uint64 min_vf = 2;
e4a707c4 2819
a5b50aa1
RB
2820 /* The first group of checks is independent of the vector size. */
2821 fatal = true;
2822
61d371eb
RB
2823 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2824 {
2825 if (dump_enabled_p ())
2826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2827 "not vectorized: too many instructions in "
2828 "basic block.\n");
2829 free_data_refs (datarefs);
2830 return NULL;
2831 }
2832
2c515559 2833 bb_vinfo = new _bb_vec_info (region_begin, region_end);
a70d6342
IR
2834 if (!bb_vinfo)
2835 return NULL;
2836
61d371eb 2837 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2838
2839 /* Analyze the data references. */
2840
2841 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2842 {
73fbfcad 2843 if (dump_enabled_p ())
78c60e3d
SS
2844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2845 "not vectorized: unhandled data-ref in basic "
2846 "block.\n");
b8698a0f 2847
2c515559 2848 delete bb_vinfo;
a70d6342
IR
2849 return NULL;
2850 }
2851
fcac74a1 2852 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2853 {
73fbfcad 2854 if (dump_enabled_p ())
78c60e3d
SS
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2856 "not vectorized: not enough data-refs in "
2857 "basic block.\n");
a70d6342 2858
2c515559 2859 delete bb_vinfo;
a70d6342
IR
2860 return NULL;
2861 }
2862
310213d4 2863 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2864 {
2865 if (dump_enabled_p ())
2866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2867 "not vectorized: unhandled data access in "
2868 "basic block.\n");
2869
2c515559 2870 delete bb_vinfo;
5abe1e05
RB
2871 return NULL;
2872 }
2873
a5b50aa1
RB
2874 /* If there are no grouped stores in the region there is no need
2875 to continue with pattern recog as vect_analyze_slp will fail
2876 anyway. */
2877 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2878 {
73fbfcad 2879 if (dump_enabled_p ())
a5b50aa1
RB
2880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2881 "not vectorized: no grouped stores in "
2882 "basic block.\n");
b8698a0f 2883
2c515559 2884 delete bb_vinfo;
a70d6342
IR
2885 return NULL;
2886 }
b8698a0f 2887
a5b50aa1
RB
2888 /* While the rest of the analysis below depends on it in some way. */
2889 fatal = false;
2890
2891 vect_pattern_recog (bb_vinfo);
2892
a70d6342
IR
2893 /* Check the SLP opportunities in the basic block, analyze and build SLP
2894 trees. */
310213d4 2895 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2896 {
73fbfcad 2897 if (dump_enabled_p ())
effb52da
RB
2898 {
2899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2900 "Failed to SLP the basic block.\n");
2901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2902 "not vectorized: failed to find SLP opportunities "
2903 "in basic block.\n");
2904 }
a70d6342 2905
2c515559 2906 delete bb_vinfo;
a70d6342
IR
2907 return NULL;
2908 }
b8698a0f 2909
62c8a2cf
RS
2910 vect_record_base_alignments (bb_vinfo);
2911
c2a12ca0
RB
2912 /* Analyze and verify the alignment of data references and the
2913 dependence in the SLP instances. */
a5b50aa1
RB
2914 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2915 {
c2a12ca0
RB
2916 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2917 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2918 {
2919 dump_printf_loc (MSG_NOTE, vect_location,
2920 "removing SLP instance operations starting from: ");
2921 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2922 SLP_TREE_SCALAR_STMTS
2923 (SLP_INSTANCE_TREE (instance))[0], 0);
2924 vect_free_slp_instance (instance);
2925 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2926 continue;
2927 }
c2a12ca0
RB
2928
2929 /* Mark all the statements that we want to vectorize as pure SLP and
2930 relevant. */
2931 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2932 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2933
a5b50aa1
RB
2934 i++;
2935 }
a5b50aa1
RB
2936 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2937 {
2c515559 2938 delete bb_vinfo;
a5b50aa1
RB
2939 return NULL;
2940 }
2941
8b7e9dba 2942 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2943 {
73fbfcad 2944 if (dump_enabled_p ())
e645e942 2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2946 "not vectorized: bad operation in basic block.\n");
a70d6342 2947
2c515559 2948 delete bb_vinfo;
a70d6342
IR
2949 return NULL;
2950 }
2951
69f11a13 2952 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2953 if (!unlimited_cost_model (NULL)
69f11a13
IR
2954 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2955 {
73fbfcad 2956 if (dump_enabled_p ())
78c60e3d
SS
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2958 "not vectorized: vectorization is not "
2959 "profitable.\n");
69f11a13 2960
2c515559 2961 delete bb_vinfo;
69f11a13
IR
2962 return NULL;
2963 }
2964
73fbfcad 2965 if (dump_enabled_p ())
78c60e3d
SS
2966 dump_printf_loc (MSG_NOTE, vect_location,
2967 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2968
2969 return bb_vinfo;
2970}
2971
2972
428db0ba
RB
2973/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2974 true if anything in the basic-block was vectorized. */
2975
2976bool
2977vect_slp_bb (basic_block bb)
8e19f5a1
IR
2978{
2979 bb_vec_info bb_vinfo;
8e19f5a1 2980 gimple_stmt_iterator gsi;
61d371eb 2981 bool any_vectorized = false;
86e36728 2982 auto_vector_sizes vector_sizes;
8e19f5a1 2983
73fbfcad 2984 if (dump_enabled_p ())
78c60e3d 2985 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2986
8e19f5a1
IR
2987 /* Autodetect first vector size we try. */
2988 current_vector_size = 0;
86e36728
RS
2989 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2990 unsigned int next_size = 0;
8e19f5a1 2991
61d371eb
RB
2992 gsi = gsi_start_bb (bb);
2993
86e36728 2994 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
2995 while (1)
2996 {
61d371eb
RB
2997 if (gsi_end_p (gsi))
2998 break;
2999
3000 gimple_stmt_iterator region_begin = gsi;
3001 vec<data_reference_p> datarefs = vNULL;
3002 int insns = 0;
3003
3004 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3005 {
61d371eb
RB
3006 gimple *stmt = gsi_stmt (gsi);
3007 if (is_gimple_debug (stmt))
3008 continue;
3009 insns++;
3010
3011 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3012 vect_location = gimple_location (stmt);
3013
8e846c66 3014 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
3015 break;
3016 }
3017
3018 /* Skip leading unhandled stmts. */
3019 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3020 {
3021 gsi_next (&gsi);
3022 continue;
3023 }
428db0ba 3024
61d371eb
RB
3025 gimple_stmt_iterator region_end = gsi;
3026
3027 bool vectorized = false;
a5b50aa1 3028 bool fatal = false;
61d371eb 3029 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 3030 datarefs, insns, fatal);
61d371eb
RB
3031 if (bb_vinfo
3032 && dbg_cnt (vect_slp))
3033 {
428db0ba 3034 if (dump_enabled_p ())
61d371eb 3035 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
3036
3037 vect_schedule_slp (bb_vinfo);
3038
d1ac60d5
RB
3039 unsigned HOST_WIDE_INT bytes;
3040 if (current_vector_size.is_constant (&bytes))
3041 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3042 "basic block part vectorized using "
3043 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3044 "vectors\n", bytes);
3045 else
3046 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3047 "basic block part vectorized using variable "
3048 "length vectors\n");
428db0ba 3049
61d371eb 3050 vectorized = true;
428db0ba 3051 }
2c515559 3052 delete bb_vinfo;
8e19f5a1 3053
61d371eb 3054 any_vectorized |= vectorized;
8e19f5a1 3055
86e36728
RS
3056 if (next_size == 0)
3057 autodetected_vector_size = current_vector_size;
3058
3059 if (next_size < vector_sizes.length ()
3060 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3061 next_size += 1;
3062
61d371eb 3063 if (vectorized
86e36728
RS
3064 || next_size == vector_sizes.length ()
3065 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3066 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3067 vector sizes will fail do not bother iterating. */
3068 || fatal)
61d371eb
RB
3069 {
3070 if (gsi_end_p (region_end))
3071 break;
8e19f5a1 3072
61d371eb
RB
3073 /* Skip the unhandled stmt. */
3074 gsi_next (&gsi);
3075
3076 /* And reset vector sizes. */
3077 current_vector_size = 0;
86e36728 3078 next_size = 0;
61d371eb
RB
3079 }
3080 else
3081 {
3082 /* Try the next biggest vector size. */
86e36728 3083 current_vector_size = vector_sizes[next_size++];
61d371eb 3084 if (dump_enabled_p ())
86e36728
RS
3085 {
3086 dump_printf_loc (MSG_NOTE, vect_location,
3087 "***** Re-trying analysis with "
3088 "vector size ");
3089 dump_dec (MSG_NOTE, current_vector_size);
3090 dump_printf (MSG_NOTE, "\n");
3091 }
61d371eb
RB
3092
3093 /* Start over. */
3094 gsi = region_begin;
3095 }
8e19f5a1 3096 }
61d371eb
RB
3097
3098 return any_vectorized;
8e19f5a1
IR
3099}
3100
3101
e4af0bc4
IE
3102/* Return 1 if vector type of boolean constant which is OPNUM
3103 operand in statement STMT is a boolean vector. */
3104
3105static bool
3106vect_mask_constant_operand_p (gimple *stmt, int opnum)
3107{
3108 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3109 enum tree_code code = gimple_expr_code (stmt);
3110 tree op, vectype;
3111 gimple *def_stmt;
3112 enum vect_def_type dt;
3113
3114 /* For comparison and COND_EXPR type is chosen depending
3115 on the other comparison operand. */
3116 if (TREE_CODE_CLASS (code) == tcc_comparison)
3117 {
3118 if (opnum)
3119 op = gimple_assign_rhs1 (stmt);
3120 else
3121 op = gimple_assign_rhs2 (stmt);
3122
3123 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3124 &dt, &vectype))
3125 gcc_unreachable ();
3126
3127 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3128 }
3129
3130 if (code == COND_EXPR)
3131 {
3132 tree cond = gimple_assign_rhs1 (stmt);
3133
3134 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3135 op = cond;
3136 else if (opnum)
e4af0bc4
IE
3137 op = TREE_OPERAND (cond, 1);
3138 else
3139 op = TREE_OPERAND (cond, 0);
3140
3141 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3142 &dt, &vectype))
3143 gcc_unreachable ();
3144
3145 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3146 }
3147
3148 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3149}
3150
018b2744
RS
3151/* Build a variable-length vector in which the elements in ELTS are repeated
3152 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3153 RESULTS and add any new instructions to SEQ.
3154
3155 The approach we use is:
3156
3157 (1) Find a vector mode VM with integer elements of mode IM.
3158
3159 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3160 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3161 from small vectors to IM.
3162
3163 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3164
3165 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3166 correct byte contents.
3167
3168 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3169
3170 We try to find the largest IM for which this sequence works, in order
3171 to cut down on the number of interleaves. */
3172
f1739b48 3173void
018b2744
RS
3174duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3175 unsigned int nresults, vec<tree> &results)
3176{
3177 unsigned int nelts = elts.length ();
3178 tree element_type = TREE_TYPE (vector_type);
3179
3180 /* (1) Find a vector mode VM with integer elements of mode IM. */
3181 unsigned int nvectors = 1;
3182 tree new_vector_type;
3183 tree permutes[2];
3184 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3185 &nvectors, &new_vector_type,
3186 permutes))
3187 gcc_unreachable ();
3188
3189 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3190 unsigned int partial_nelts = nelts / nvectors;
3191 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3192
3193 tree_vector_builder partial_elts;
3194 auto_vec<tree, 32> pieces (nvectors * 2);
3195 pieces.quick_grow (nvectors * 2);
3196 for (unsigned int i = 0; i < nvectors; ++i)
3197 {
3198 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3199 ELTS' has mode IM. */
3200 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3201 for (unsigned int j = 0; j < partial_nelts; ++j)
3202 partial_elts.quick_push (elts[i * partial_nelts + j]);
3203 tree t = gimple_build_vector (seq, &partial_elts);
3204 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3205 TREE_TYPE (new_vector_type), t);
3206
3207 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3208 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3209 }
3210
3211 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3212 correct byte contents.
3213
3214 We need to repeat the following operation log2(nvectors) times:
3215
3216 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3217 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3218
3219 However, if each input repeats every N elements and the VF is
3220 a multiple of N * 2, the HI result is the same as the LO. */
3221 unsigned int in_start = 0;
3222 unsigned int out_start = nvectors;
3223 unsigned int hi_start = nvectors / 2;
3224 /* A bound on the number of outputs needed to produce NRESULTS results
3225 in the final iteration. */
3226 unsigned int noutputs_bound = nvectors * nresults;
3227 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3228 {
3229 noutputs_bound /= 2;
3230 unsigned int limit = MIN (noutputs_bound, nvectors);
3231 for (unsigned int i = 0; i < limit; ++i)
3232 {
3233 if ((i & 1) != 0
3234 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3235 2 * in_repeat))
3236 {
3237 pieces[out_start + i] = pieces[out_start + i - 1];
3238 continue;
3239 }
3240
3241 tree output = make_ssa_name (new_vector_type);
3242 tree input1 = pieces[in_start + (i / 2)];
3243 tree input2 = pieces[in_start + (i / 2) + hi_start];
3244 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3245 input1, input2,
3246 permutes[i & 1]);
3247 gimple_seq_add_stmt (seq, stmt);
3248 pieces[out_start + i] = output;
3249 }
3250 std::swap (in_start, out_start);
3251 }
3252
3253 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3254 results.reserve (nresults);
3255 for (unsigned int i = 0; i < nresults; ++i)
3256 if (i < nvectors)
3257 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3258 pieces[in_start + i]));
3259 else
3260 results.quick_push (results[i - nvectors]);
3261}
3262
e4af0bc4 3263
b8698a0f
L
3264/* For constant and loop invariant defs of SLP_NODE this function returns
3265 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3266 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3267 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3268 REDUC_INDEX is the index of the reduction operand in the statements, unless
3269 it is -1. */
ebfd146a
IR
3270
3271static void
9dc3f7de 3272vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3273 vec<tree> *vec_oprnds,
306b0c92 3274 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3275{
355fe088
TS
3276 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3277 gimple *stmt = stmts[0];
ebfd146a 3278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
018b2744 3279 unsigned HOST_WIDE_INT nunits;
ebfd146a 3280 tree vec_cst;
d2a12ae7 3281 unsigned j, number_of_places_left_in_vector;
ebfd146a 3282 tree vector_type;
9dc3f7de 3283 tree vop;
9771b263 3284 int group_size = stmts.length ();
ebfd146a 3285 unsigned int vec_num, i;
d2a12ae7 3286 unsigned number_of_copies = 1;
9771b263
DN
3287 vec<tree> voprnds;
3288 voprnds.create (number_of_vectors);
ebfd146a 3289 bool constant_p, is_store;
b5aeb3bb 3290 tree neutral_op = NULL;
bac430c9 3291 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3292 gimple_seq ctor_seq = NULL;
018b2744 3293 auto_vec<tree, 16> permute_results;
b5aeb3bb 3294
42fd8198 3295 /* Check if vector type is a boolean vector. */
2568d8a1 3296 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3297 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3298 vector_type
3299 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3300 else
3301 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3302
ebfd146a
IR
3303 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3304 {
3305 is_store = true;
3306 op = gimple_assign_rhs1 (stmt);
3307 }
3308 else
9dc3f7de
IR
3309 is_store = false;
3310
3311 gcc_assert (op);
ebfd146a 3312
ebfd146a 3313 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3314 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3315
3316 For example, we have two scalar operands, s1 and s2 (e.g., group of
3317 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3318 of this type can be packed in a vector). The output vector will contain
3319 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3320 will be 2).
3321
b8698a0f 3322 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3323 containing the operands.
3324
3325 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3326 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3327 {s5, s6, s7, s8}. */
b8698a0f 3328
018b2744
RS
3329 /* When using duplicate_and_interleave, we just need one element for
3330 each scalar statement. */
3331 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3332 nunits = group_size;
3333
14a61437 3334 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3335
3336 number_of_places_left_in_vector = nunits;
62cf7335 3337 constant_p = true;
5ebaa477 3338 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3339 elts.quick_grow (nunits);
90dd6e3d 3340 bool place_after_defs = false;
ebfd146a
IR
3341 for (j = 0; j < number_of_copies; j++)
3342 {
9771b263 3343 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3344 {
3345 if (is_store)
3346 op = gimple_assign_rhs1 (stmt);
bac430c9 3347 else
f7e531cf 3348 {
bac430c9 3349 switch (code)
f7e531cf 3350 {
bac430c9 3351 case COND_EXPR:
a989bcc3
IE
3352 {
3353 tree cond = gimple_assign_rhs1 (stmt);
3354 if (TREE_CODE (cond) == SSA_NAME)
3355 op = gimple_op (stmt, op_num + 1);
3356 else if (op_num == 0 || op_num == 1)
bac430c9 3357 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3358 else
3359 {
3360 if (op_num == 2)
3361 op = gimple_assign_rhs2 (stmt);
3362 else
3363 op = gimple_assign_rhs3 (stmt);
3364 }
3365 }
bac430c9
IR
3366 break;
3367
3368 case CALL_EXPR:
3369 op = gimple_call_arg (stmt, op_num);
3370 break;
3371
b84b294a
JJ
3372 case LSHIFT_EXPR:
3373 case RSHIFT_EXPR:
3374 case LROTATE_EXPR:
3375 case RROTATE_EXPR:
3376 op = gimple_op (stmt, op_num + 1);
3377 /* Unlike the other binary operators, shifts/rotates have
3378 the shift count being int, instead of the same type as
3379 the lhs, so make sure the scalar is the right type if
3380 we are dealing with vectors of
3381 long long/long/short/char. */
793d9a16 3382 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3383 op = fold_convert (TREE_TYPE (vector_type), op);
3384 break;
3385
bac430c9
IR
3386 default:
3387 op = gimple_op (stmt, op_num + 1);
b84b294a 3388 break;
f7e531cf
IR
3389 }
3390 }
b8698a0f 3391
ebfd146a 3392 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3393 number_of_places_left_in_vector--;
90dd6e3d 3394 tree orig_op = op;
13396b6e 3395 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3396 {
793d9a16 3397 if (CONSTANT_CLASS_P (op))
13396b6e 3398 {
42fd8198
IE
3399 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3400 {
3401 /* Can't use VIEW_CONVERT_EXPR for booleans because
3402 of possibly different sizes of scalar value and
3403 vector element. */
3404 if (integer_zerop (op))
3405 op = build_int_cst (TREE_TYPE (vector_type), 0);
3406 else if (integer_onep (op))
158beb4a 3407 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3408 else
3409 gcc_unreachable ();
3410 }
3411 else
3412 op = fold_unary (VIEW_CONVERT_EXPR,
3413 TREE_TYPE (vector_type), op);
13396b6e
JJ
3414 gcc_assert (op && CONSTANT_CLASS_P (op));
3415 }
3416 else
3417 {
b731b390 3418 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3419 gimple *init_stmt;
262a363f
JJ
3420 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3421 {
158beb4a
JJ
3422 tree true_val
3423 = build_all_ones_cst (TREE_TYPE (vector_type));
3424 tree false_val
3425 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3426 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3427 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3428 op, true_val,
3429 false_val);
262a363f 3430 }
262a363f
JJ
3431 else
3432 {
3433 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3434 op);
3435 init_stmt
3436 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3437 op);
3438 }
13396b6e
JJ
3439 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3440 op = new_temp;
3441 }
50eeef09 3442 }
d2a12ae7 3443 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3444 if (!CONSTANT_CLASS_P (op))
3445 constant_p = false;
90dd6e3d
RB
3446 if (TREE_CODE (orig_op) == SSA_NAME
3447 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3448 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3449 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3450 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3451 place_after_defs = true;
ebfd146a
IR
3452
3453 if (number_of_places_left_in_vector == 0)
3454 {
018b2744
RS
3455 if (constant_p
3456 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3457 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3458 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3459 else
d2a12ae7 3460 {
018b2744
RS
3461 if (vec_oprnds->is_empty ())
3462 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3463 number_of_vectors,
3464 permute_results);
3465 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3466 }
90dd6e3d
RB
3467 tree init;
3468 gimple_stmt_iterator gsi;
3469 if (place_after_defs)
3470 {
3471 gsi = gsi_for_stmt
3472 (vect_find_last_scalar_stmt_in_slp (slp_node));
3473 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3474 }
3475 else
3476 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3477 if (ctor_seq != NULL)
3478 {
90dd6e3d 3479 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3480 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3481 ctor_seq = NULL;
3482 }
90dd6e3d
RB
3483 voprnds.quick_push (init);
3484 place_after_defs = false;
62cf7335
RB
3485 number_of_places_left_in_vector = nunits;
3486 constant_p = true;
5ebaa477
RS
3487 elts.new_vector (vector_type, nunits, 1);
3488 elts.quick_grow (nunits);
ebfd146a
IR
3489 }
3490 }
3491 }
3492
b8698a0f 3493 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3494 them. */
9771b263 3495 vec_num = voprnds.length ();
d2a12ae7 3496 for (j = vec_num; j != 0; j--)
ebfd146a 3497 {
9771b263
DN
3498 vop = voprnds[j - 1];
3499 vec_oprnds->quick_push (vop);
ebfd146a
IR
3500 }
3501
9771b263 3502 voprnds.release ();
ebfd146a
IR
3503
3504 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3505 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3506 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3507 to replicate the vectors. */
9771b263 3508 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3509 {
b5aeb3bb
IR
3510 tree neutral_vec = NULL;
3511
3512 if (neutral_op)
3513 {
3514 if (!neutral_vec)
b9acc9f1 3515 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3516
9771b263 3517 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3518 }
3519 else
3520 {
9771b263
DN
3521 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3522 vec_oprnds->quick_push (vop);
b5aeb3bb 3523 }
ebfd146a
IR
3524 }
3525}
3526
3527
3528/* Get vectorized definitions from SLP_NODE that contains corresponding
3529 vectorized def-stmts. */
3530
3531static void
9771b263 3532vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3533{
3534 tree vec_oprnd;
355fe088 3535 gimple *vec_def_stmt;
ebfd146a
IR
3536 unsigned int i;
3537
9771b263 3538 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3539
9771b263 3540 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3541 {
3542 gcc_assert (vec_def_stmt);
e7baeb39
RB
3543 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3544 vec_oprnd = gimple_phi_result (vec_def_stmt);
3545 else
3546 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3547 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3548 }
3549}
3550
3551
b8698a0f
L
3552/* Get vectorized definitions for SLP_NODE.
3553 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3554 call vect_get_constant_vectors() to create vector stmts.
3555 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3556 must be stored in the corresponding child of SLP_NODE, and we call
3557 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3558
ebfd146a 3559void
9771b263 3560vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3561 vec<vec<tree> > *vec_oprnds)
ebfd146a 3562{
355fe088 3563 gimple *first_stmt;
d092494c 3564 int number_of_vects = 0, i;
77eefb71 3565 unsigned int child_index = 0;
b8698a0f 3566 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3567 slp_tree child = NULL;
37b5ec8f 3568 vec<tree> vec_defs;
e44978dc 3569 tree oprnd;
77eefb71 3570 bool vectorized_defs;
ebfd146a 3571
9771b263
DN
3572 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3573 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3574 {
d092494c
IR
3575 /* For each operand we check if it has vectorized definitions in a child
3576 node or we need to create them (for invariants and constants). We
3577 check if the LHS of the first stmt of the next child matches OPRND.
3578 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3579 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3580 to check this child node for the next operand. */
3581 vectorized_defs = false;
3582 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3583 {
01d8bf07 3584 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3585
e44978dc 3586 /* We have to check both pattern and original def, if available. */
603cca93 3587 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3588 {
355fe088
TS
3589 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3590 gimple *related
90dd6e3d 3591 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3592 tree first_def_op;
90dd6e3d 3593
e7baeb39
RB
3594 if (gimple_code (first_def) == GIMPLE_PHI)
3595 first_def_op = gimple_phi_result (first_def);
3596 else
3597 first_def_op = gimple_get_lhs (first_def);
3598 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3599 || (related
3600 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3601 {
3602 /* The number of vector defs is determined by the number of
3603 vector statements in the node from which we get those
3604 statements. */
3605 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3606 vectorized_defs = true;
77eefb71 3607 child_index++;
90dd6e3d 3608 }
e44978dc 3609 }
77eefb71
RB
3610 else
3611 child_index++;
d092494c 3612 }
ebfd146a 3613
77eefb71
RB
3614 if (!vectorized_defs)
3615 {
3616 if (i == 0)
3617 {
3618 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3619 /* Number of vector stmts was calculated according to LHS in
3620 vect_schedule_slp_instance (), fix it by replacing LHS with
3621 RHS, if necessary. See vect_get_smallest_scalar_type () for
3622 details. */
3623 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3624 &rhs_size_unit);
3625 if (rhs_size_unit != lhs_size_unit)
3626 {
3627 number_of_vects *= rhs_size_unit;
3628 number_of_vects /= lhs_size_unit;
3629 }
3630 }
d092494c 3631 }
b5aeb3bb 3632
d092494c 3633 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3634 vec_defs = vNULL;
3635 vec_defs.create (number_of_vects);
ebfd146a 3636
d092494c
IR
3637 /* For reduction defs we call vect_get_constant_vectors (), since we are
3638 looking for initial loop invariant values. */
306b0c92 3639 if (vectorized_defs)
d092494c 3640 /* The defs are already vectorized. */
37b5ec8f 3641 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3642 else
e7baeb39 3643 /* Build vectors from scalar defs. */
37b5ec8f 3644 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3645 number_of_vects);
ebfd146a 3646
37b5ec8f 3647 vec_oprnds->quick_push (vec_defs);
d092494c 3648 }
ebfd146a
IR
3649}
3650
ebfd146a
IR
3651/* Generate vector permute statements from a list of loads in DR_CHAIN.
3652 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3653 permute statements for the SLP node NODE of the SLP instance
3654 SLP_NODE_INSTANCE. */
3655
ebfd146a 3656bool
01d8bf07 3657vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3658 gimple_stmt_iterator *gsi, poly_uint64 vf,
3659 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3660 unsigned *n_perms)
ebfd146a 3661{
355fe088 3662 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3663 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3664 tree mask_element_type = NULL_TREE, mask_type;
928686b1 3665 int vec_index = 0;
2635892a 3666 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3667 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3668 unsigned int mask_element;
ef4bddc2 3669 machine_mode mode;
928686b1 3670 unsigned HOST_WIDE_INT nunits, const_vf;
ebfd146a 3671
91ff1504
RB
3672 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3673 return false;
3674
2c53b149 3675 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
91ff1504 3676
22e4dee7
RH
3677 mode = TYPE_MODE (vectype);
3678
d9f21f6a 3679 /* At the moment, all permutations are represented using per-element
928686b1
RS
3680 indices, so we can't cope with variable vector lengths or
3681 vectorization factors. */
3682 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3683 || !vf.is_constant (&const_vf))
d9f21f6a
RS
3684 return false;
3685
2635892a
RH
3686 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3687 same size as the vector element being permuted. */
96f9265a 3688 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3689 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3690 mask_type = get_vectype_for_scalar_type (mask_element_type);
e3342de4 3691 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3692 mask.quick_grow (nunits);
e3342de4 3693 vec_perm_indices indices;
ebfd146a 3694
61fdfd8c
RB
3695 /* Initialize the vect stmts of NODE to properly insert the generated
3696 stmts later. */
3697 if (! analyze_only)
3698 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3699 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3700 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3701
b8698a0f
L
3702 /* Generate permutation masks for every NODE. Number of masks for each NODE
3703 is equal to GROUP_SIZE.
3704 E.g., we have a group of three nodes with three loads from the same
3705 location in each node, and the vector size is 4. I.e., we have a
3706 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3707 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3708 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3709 ...
3710
2635892a 3711 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3712 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3713 operation, and the mask element values can't be outside that range.
3714 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3715 For the first two permutations we need the first and the second input
ebfd146a 3716 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3717 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3718 {c2,a3,b3,c3}. */
3719
2ce27200 3720 int vect_stmts_counter = 0;
928686b1 3721 unsigned int index = 0;
2ce27200
RB
3722 int first_vec_index = -1;
3723 int second_vec_index = -1;
be377c80 3724 bool noop_p = true;
29afecdf 3725 *n_perms = 0;
ebfd146a 3726
d9f21f6a 3727 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3728 {
3729 for (int k = 0; k < group_size; k++)
3730 {
928686b1 3731 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
2c53b149 3732 + j * DR_GROUP_SIZE (stmt_info));
2ce27200
RB
3733 vec_index = i / nunits;
3734 mask_element = i % nunits;
3735 if (vec_index == first_vec_index
3736 || first_vec_index == -1)
3737 {
3738 first_vec_index = vec_index;
3739 }
3740 else if (vec_index == second_vec_index
3741 || second_vec_index == -1)
3742 {
3743 second_vec_index = vec_index;
3744 mask_element += nunits;
3745 }
3746 else
3747 {
3748 if (dump_enabled_p ())
3749 {
3750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3751 "permutation requires at "
3752 "least three vectors ");
3753 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3754 stmt, 0);
2ce27200 3755 }
31bee964 3756 gcc_assert (analyze_only);
2ce27200
RB
3757 return false;
3758 }
ebfd146a 3759
928686b1 3760 gcc_assert (mask_element < 2 * nunits);
be377c80
RB
3761 if (mask_element != index)
3762 noop_p = false;
2ce27200
RB
3763 mask[index++] = mask_element;
3764
e3342de4 3765 if (index == nunits && !noop_p)
2ce27200 3766 {
e3342de4
RS
3767 indices.new_vector (mask, 2, nunits);
3768 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3769 {
3770 if (dump_enabled_p ())
22e4dee7 3771 {
2ce27200
RB
3772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3773 vect_location,
3774 "unsupported vect permute { ");
3775 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3776 {
3777 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3778 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3779 }
2ce27200 3780 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3781 }
31bee964 3782 gcc_assert (analyze_only);
2ce27200
RB
3783 return false;
3784 }
22e4dee7 3785
e3342de4
RS
3786 ++*n_perms;
3787 }
29afecdf 3788
e3342de4
RS
3789 if (index == nunits)
3790 {
2ce27200
RB
3791 if (!analyze_only)
3792 {
be377c80
RB
3793 tree mask_vec = NULL_TREE;
3794
3795 if (! noop_p)
736d0f28 3796 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3797
3798 if (second_vec_index == -1)
3799 second_vec_index = first_vec_index;
61fdfd8c
RB
3800
3801 /* Generate the permute statement if necessary. */
3802 tree first_vec = dr_chain[first_vec_index];
3803 tree second_vec = dr_chain[second_vec_index];
3804 gimple *perm_stmt;
3805 if (! noop_p)
3806 {
3807 tree perm_dest
3808 = vect_create_destination_var (gimple_assign_lhs (stmt),
3809 vectype);
3810 perm_dest = make_ssa_name (perm_dest);
3811 perm_stmt = gimple_build_assign (perm_dest,
3812 VEC_PERM_EXPR,
3813 first_vec, second_vec,
3814 mask_vec);
3815 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3816 }
3817 else
3818 /* If mask was NULL_TREE generate the requested
3819 identity transform. */
3820 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3821
3822 /* Store the vector statement in NODE. */
3823 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3824 }
ebfd146a 3825
2ce27200
RB
3826 index = 0;
3827 first_vec_index = -1;
3828 second_vec_index = -1;
be377c80 3829 noop_p = true;
2ce27200
RB
3830 }
3831 }
b8698a0f 3832 }
ebfd146a 3833
ebfd146a
IR
3834 return true;
3835}
3836
ebfd146a
IR
3837/* Vectorize SLP instance tree in postorder. */
3838
3839static bool
f7300fff
RB
3840vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3841 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3842{
355fe088 3843 gimple *stmt;
0d0293ac 3844 bool grouped_store, is_store;
ebfd146a
IR
3845 gimple_stmt_iterator si;
3846 stmt_vec_info stmt_info;
8b7e9dba 3847 unsigned int group_size;
ebfd146a 3848 tree vectype;
603cca93 3849 int i, j;
d755c7ef 3850 slp_tree child;
ebfd146a 3851
603cca93 3852 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3853 return false;
3854
f7300fff
RB
3855 /* See if we have already vectorized the same set of stmts and reuse their
3856 vectorized stmts. */
dd172744 3857 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3858 {
dd172744 3859 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
f7300fff
RB
3860 return false;
3861 }
3862
dd172744 3863 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3864 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3865 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3866
603cca93
RB
3867 /* Push SLP node def-type to stmts. */
3868 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3869 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3870 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3871 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3872
9771b263 3873 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3874 stmt_info = vinfo_for_stmt (stmt);
3875
3876 /* VECTYPE is the type of the destination. */
b690cc0f 3877 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3878 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3879 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3880
68435eb2 3881 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3882 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3883 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3884
73fbfcad 3885 if (dump_enabled_p ())
ebfd146a 3886 {
78c60e3d
SS
3887 dump_printf_loc (MSG_NOTE,vect_location,
3888 "------>vectorizing SLP node starting from: ");
3889 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3890 }
ebfd146a 3891
2e8ab70c
RB
3892 /* Vectorized stmts go before the last scalar stmt which is where
3893 all uses are ready. */
3894 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3895
b010117a
IR
3896 /* Mark the first element of the reduction chain as reduction to properly
3897 transform the node. In the analysis phase only the last element of the
3898 chain is marked as reduction. */
2c53b149
RB
3899 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3900 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3901 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
b010117a
IR
3902 {
3903 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3904 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3905 }
3906
6876e5bc
RB
3907 /* Handle two-operation SLP nodes by vectorizing the group with
3908 both operations and then performing a merge. */
3909 if (SLP_TREE_TWO_OPERATORS (node))
3910 {
3911 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3912 enum tree_code ocode = ERROR_MARK;
355fe088 3913 gimple *ostmt;
e3342de4 3914 vec_perm_builder mask (group_size, group_size, 1);
6876e5bc
RB
3915 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3916 if (gimple_assign_rhs_code (ostmt) != code0)
3917 {
908a1a16 3918 mask.quick_push (1);
6876e5bc
RB
3919 ocode = gimple_assign_rhs_code (ostmt);
3920 }
3921 else
908a1a16 3922 mask.quick_push (0);
567a3691 3923 if (ocode != ERROR_MARK)
6876e5bc 3924 {
355fe088
TS
3925 vec<gimple *> v0;
3926 vec<gimple *> v1;
6876e5bc
RB
3927 unsigned j;
3928 tree tmask = NULL_TREE;
3929 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3930 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3931 SLP_TREE_VEC_STMTS (node).truncate (0);
3932 gimple_assign_set_rhs_code (stmt, ocode);
3933 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3934 gimple_assign_set_rhs_code (stmt, code0);
3935 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3936 SLP_TREE_VEC_STMTS (node).truncate (0);
3937 tree meltype = build_nonstandard_integer_type
b397965c 3938 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3939 tree mvectype = get_same_sized_vectype (meltype, vectype);
3940 unsigned k = 0, l;
3941 for (j = 0; j < v0.length (); ++j)
3942 {
dad55d70
RS
3943 /* Enforced by vect_build_slp_tree, which rejects variable-length
3944 vectors for SLP_TREE_TWO_OPERATORS. */
3945 unsigned int const_nunits = nunits.to_constant ();
3946 tree_vector_builder melts (mvectype, const_nunits, 1);
3947 for (l = 0; l < const_nunits; ++l)
6876e5bc 3948 {
1ece8d4c 3949 if (k >= group_size)
6876e5bc 3950 k = 0;
dad55d70
RS
3951 tree t = build_int_cst (meltype,
3952 mask[k++] * const_nunits + l);
794e3180 3953 melts.quick_push (t);
6876e5bc 3954 }
5ebaa477 3955 tmask = melts.build ();
6876e5bc
RB
3956
3957 /* ??? Not all targets support a VEC_PERM_EXPR with a
3958 constant mask that would translate to a vec_merge RTX
3959 (with their vec_perm_const_ok). We can either not
3960 vectorize in that case or let veclower do its job.
3961 Unfortunately that isn't too great and at least for
3962 plus/minus we'd eventually like to match targets
3963 vector addsub instructions. */
355fe088 3964 gimple *vstmt;
6876e5bc
RB
3965 vstmt = gimple_build_assign (make_ssa_name (vectype),
3966 VEC_PERM_EXPR,
3967 gimple_assign_lhs (v0[j]),
3968 gimple_assign_lhs (v1[j]), tmask);
3969 vect_finish_stmt_generation (stmt, vstmt, &si);
3970 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3971 }
3972 v0.release ();
3973 v1.release ();
3974 return false;
3975 }
3976 }
0d0293ac 3977 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
3978
3979 /* Restore stmt def-types. */
3980 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3981 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3982 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3983 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3984
b5aeb3bb 3985 return is_store;
ebfd146a
IR
3986}
3987
dd34c087
JJ
3988/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3989 For loop vectorization this is done in vectorizable_call, but for SLP
3990 it needs to be deferred until end of vect_schedule_slp, because multiple
3991 SLP instances may refer to the same scalar stmt. */
3992
3993static void
3994vect_remove_slp_scalar_calls (slp_tree node)
3995{
355fe088 3996 gimple *stmt, *new_stmt;
dd34c087
JJ
3997 gimple_stmt_iterator gsi;
3998 int i;
d755c7ef 3999 slp_tree child;
dd34c087
JJ
4000 tree lhs;
4001 stmt_vec_info stmt_info;
4002
603cca93 4003 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4004 return;
4005
9771b263 4006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 4007 vect_remove_slp_scalar_calls (child);
dd34c087 4008
9771b263 4009 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
4010 {
4011 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4012 continue;
4013 stmt_info = vinfo_for_stmt (stmt);
4014 if (stmt_info == NULL
4015 || is_pattern_stmt_p (stmt_info)
4016 || !PURE_SLP_STMT (stmt_info))
4017 continue;
4018 lhs = gimple_call_lhs (stmt);
4019 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4020 set_vinfo_for_stmt (new_stmt, stmt_info);
4021 set_vinfo_for_stmt (stmt, NULL);
4022 STMT_VINFO_STMT (stmt_info) = new_stmt;
4023 gsi = gsi_for_stmt (stmt);
4024 gsi_replace (&gsi, new_stmt, false);
4025 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4026 }
4027}
ebfd146a 4028
ff802fa1
IR
4029/* Generate vector code for all SLP instances in the loop/basic block. */
4030
ebfd146a 4031bool
310213d4 4032vect_schedule_slp (vec_info *vinfo)
ebfd146a 4033{
9771b263 4034 vec<slp_instance> slp_instances;
ebfd146a 4035 slp_instance instance;
8b7e9dba 4036 unsigned int i;
ebfd146a
IR
4037 bool is_store = false;
4038
78604de0
RB
4039
4040 scalar_stmts_to_slp_tree_map_t *bst_map
4041 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4042 slp_instances = vinfo->slp_instances;
9771b263 4043 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
4044 {
4045 /* Schedule the tree of INSTANCE. */
4046 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff 4047 instance, bst_map);
73fbfcad 4048 if (dump_enabled_p ())
78c60e3d 4049 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4050 "vectorizing stmts using SLP.\n");
ebfd146a 4051 }
78604de0 4052 delete bst_map;
ebfd146a 4053
9771b263 4054 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4055 {
4056 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 4057 gimple *store;
b5aeb3bb
IR
4058 unsigned int j;
4059 gimple_stmt_iterator gsi;
4060
c40eced0
RB
4061 /* Remove scalar call stmts. Do not do this for basic-block
4062 vectorization as not all uses may be vectorized.
4063 ??? Why should this be necessary? DCE should be able to
4064 remove the stmts itself.
4065 ??? For BB vectorization we can as well remove scalar
4066 stmts starting from the SLP tree root if they have no
4067 uses. */
310213d4 4068 if (is_a <loop_vec_info> (vinfo))
c40eced0 4069 vect_remove_slp_scalar_calls (root);
dd34c087 4070
9771b263 4071 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
4072 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4073 {
4074 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4075 break;
4076
a024e70e
IR
4077 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4078 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
4079 /* Free the attached stmt_vec_info and remove the stmt. */
4080 gsi = gsi_for_stmt (store);
3d3f2249 4081 unlink_stmt_vdef (store);
b5aeb3bb 4082 gsi_remove (&gsi, true);
3d3f2249 4083 release_defs (store);
b5aeb3bb
IR
4084 free_stmt_vec_info (store);
4085 }
4086 }
4087
ebfd146a
IR
4088 return is_store;
4089}