]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
[02/46] Remove dead vectorizable_reduction code
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
ebfd146a
IR
50/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51
52static void
53vect_free_slp_tree (slp_tree node)
54{
d092494c 55 int i;
d755c7ef 56 slp_tree child;
d092494c 57
9771b263 58 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 59 vect_free_slp_tree (child);
b8698a0f 60
78810bd3
RB
61 gimple *stmt;
62 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
63 /* After transform some stmts are removed and thus their vinfo is gone. */
64 if (vinfo_for_stmt (stmt))
65 {
66 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
67 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
68 }
69
9771b263
DN
70 SLP_TREE_CHILDREN (node).release ();
71 SLP_TREE_SCALAR_STMTS (node).release ();
72 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 73 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
74
75 free (node);
76}
77
78
79/* Free the memory allocated for the SLP instance. */
80
81void
82vect_free_slp_instance (slp_instance instance)
83{
84 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 85 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 86 free (instance);
ebfd146a
IR
87}
88
89
d092494c
IR
90/* Create an SLP node for SCALAR_STMTS. */
91
92static slp_tree
355fe088 93vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 94{
d3cfd39e 95 slp_tree node;
355fe088 96 gimple *stmt = scalar_stmts[0];
d092494c
IR
97 unsigned int nops;
98
99 if (is_gimple_call (stmt))
100 nops = gimple_call_num_args (stmt);
101 else if (is_gimple_assign (stmt))
f7e531cf
IR
102 {
103 nops = gimple_num_ops (stmt) - 1;
104 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
105 nops++;
106 }
e7baeb39
RB
107 else if (gimple_code (stmt) == GIMPLE_PHI)
108 nops = 0;
d092494c
IR
109 else
110 return NULL;
111
d3cfd39e 112 node = XNEW (struct _slp_tree);
d092494c 113 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 114 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 115 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 116 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 117 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 118 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 119 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 120
78810bd3
RB
121 unsigned i;
122 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
123 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
124
d092494c
IR
125 return node;
126}
127
128
ddf56386
RB
129/* This structure is used in creation of an SLP tree. Each instance
130 corresponds to the same operand in a group of scalar stmts in an SLP
131 node. */
132typedef struct _slp_oprnd_info
133{
134 /* Def-stmts for the operands. */
135 vec<gimple *> def_stmts;
136 /* Information about the first statement, its vector def-type, type, the
137 operand itself in case it's constant, and an indication if it's a pattern
138 stmt. */
ddf56386 139 tree first_op_type;
34e82342 140 enum vect_def_type first_dt;
ddf56386
RB
141 bool first_pattern;
142 bool second_pattern;
143} *slp_oprnd_info;
144
145
d092494c
IR
146/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
147 operand. */
9771b263 148static vec<slp_oprnd_info>
d092494c
IR
149vect_create_oprnd_info (int nops, int group_size)
150{
151 int i;
152 slp_oprnd_info oprnd_info;
9771b263 153 vec<slp_oprnd_info> oprnds_info;
d092494c 154
9771b263 155 oprnds_info.create (nops);
d092494c
IR
156 for (i = 0; i < nops; i++)
157 {
158 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 159 oprnd_info->def_stmts.create (group_size);
d092494c 160 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 161 oprnd_info->first_op_type = NULL_TREE;
d092494c 162 oprnd_info->first_pattern = false;
effb52da 163 oprnd_info->second_pattern = false;
9771b263 164 oprnds_info.quick_push (oprnd_info);
d092494c
IR
165 }
166
167 return oprnds_info;
168}
169
170
d3cfd39e
JJ
171/* Free operands info. */
172
d092494c 173static void
9771b263 174vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
175{
176 int i;
177 slp_oprnd_info oprnd_info;
178
9771b263 179 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 180 {
9771b263 181 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
182 XDELETE (oprnd_info);
183 }
d092494c 184
9771b263 185 oprnds_info.release ();
d092494c
IR
186}
187
188
d755c7ef
RB
189/* Find the place of the data-ref in STMT in the interleaving chain that starts
190 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
191
b210f45f 192int
355fe088 193vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 194{
355fe088 195 gimple *next_stmt = first_stmt;
d755c7ef
RB
196 int result = 0;
197
2c53b149 198 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
d755c7ef
RB
199 return -1;
200
201 do
202 {
203 if (next_stmt == stmt)
204 return result;
2c53b149 205 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699 206 if (next_stmt)
2c53b149 207 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
208 }
209 while (next_stmt);
210
211 return -1;
212}
213
018b2744
RS
214/* Check whether it is possible to load COUNT elements of type ELT_MODE
215 using the method implemented by duplicate_and_interleave. Return true
216 if so, returning the number of intermediate vectors in *NVECTORS_OUT
217 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
218 (if nonnull). */
219
f1739b48 220bool
018b2744 221can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
222 unsigned int *nvectors_out,
223 tree *vector_type_out,
224 tree *permutes)
018b2744
RS
225{
226 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
227 poly_int64 nelts;
228 unsigned int nvectors = 1;
229 for (;;)
230 {
231 scalar_int_mode int_mode;
232 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
233 if (multiple_p (current_vector_size, elt_bytes, &nelts)
234 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
235 {
236 tree int_type = build_nonstandard_integer_type
237 (GET_MODE_BITSIZE (int_mode), 1);
238 tree vector_type = build_vector_type (int_type, nelts);
239 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
240 {
241 vec_perm_builder sel1 (nelts, 2, 3);
242 vec_perm_builder sel2 (nelts, 2, 3);
243 poly_int64 half_nelts = exact_div (nelts, 2);
244 for (unsigned int i = 0; i < 3; ++i)
245 {
246 sel1.quick_push (i);
247 sel1.quick_push (i + nelts);
248 sel2.quick_push (half_nelts + i);
249 sel2.quick_push (half_nelts + i + nelts);
250 }
251 vec_perm_indices indices1 (sel1, 2, nelts);
252 vec_perm_indices indices2 (sel2, 2, nelts);
253 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
254 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
255 {
256 if (nvectors_out)
257 *nvectors_out = nvectors;
258 if (vector_type_out)
259 *vector_type_out = vector_type;
260 if (permutes)
261 {
262 permutes[0] = vect_gen_perm_mask_checked (vector_type,
263 indices1);
264 permutes[1] = vect_gen_perm_mask_checked (vector_type,
265 indices2);
266 }
267 return true;
268 }
269 }
270 }
271 if (!multiple_p (elt_bytes, 2, &elt_bytes))
272 return false;
273 nvectors *= 2;
274 }
275}
d755c7ef 276
d092494c
IR
277/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
278 they are of a valid type and that they match the defs of the first stmt of
4cecd659 279 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
280 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
281 indicates swap is required for cond_expr stmts. Specifically, *SWAP
282 is 1 if STMT is cond and operands of comparison need to be swapped;
283 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
284 If there is any operand swap in this function, *SWAP is set to non-zero
285 value.
4cecd659
BC
286 If there was a fatal error return -1; if the error could be corrected by
287 swapping operands of father node of this one, return 1; if everything is
288 ok return 0. */
4cecd659
BC
289static int
290vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
018b2744 291 vec<gimple *> stmts, unsigned stmt_num,
4cecd659 292 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 293{
018b2744 294 gimple *stmt = stmts[stmt_num];
ebfd146a
IR
295 tree oprnd;
296 unsigned int i, number_of_oprnds;
355fe088 297 gimple *def_stmt;
d092494c 298 enum vect_def_type dt = vect_uninitialized_def;
d092494c 299 bool pattern = false;
abf9bfbc 300 slp_oprnd_info oprnd_info;
b0b4483e
RB
301 int first_op_idx = 1;
302 bool commutative = false;
303 bool first_op_cond = false;
effb52da
RB
304 bool first = stmt_num == 0;
305 bool second = stmt_num == 1;
b8698a0f 306
d092494c 307 if (is_gimple_call (stmt))
190c2236
JJ
308 {
309 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 310 first_op_idx = 3;
190c2236 311 }
f7e531cf
IR
312 else if (is_gimple_assign (stmt))
313 {
b0b4483e 314 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 315 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
316 /* Swap can only be done for cond_expr if asked to, otherwise we
317 could result in different comparison code to the first stmt. */
a414c77f
IE
318 if (gimple_assign_rhs_code (stmt) == COND_EXPR
319 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
320 {
321 first_op_cond = true;
b0b4483e
RB
322 number_of_oprnds++;
323 }
324 else
325 commutative = commutative_tree_code (code);
f7e531cf 326 }
d092494c 327 else
b0b4483e 328 return -1;
ebfd146a 329
4cecd659
BC
330 bool swapped = (*swap != 0);
331 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
332 for (i = 0; i < number_of_oprnds; i++)
333 {
b0b4483e
RB
334again:
335 if (first_op_cond)
f7e531cf 336 {
4cecd659
BC
337 /* Map indicating how operands of cond_expr should be swapped. */
338 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
339 int *map = maps[*swap];
340
341 if (i < 2)
342 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 343 else
4cecd659 344 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
345 }
346 else
4cecd659 347 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 348
9771b263 349 oprnd_info = (*oprnds_info)[i];
ebfd146a 350
894dd753 351 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt))
ebfd146a 352 {
73fbfcad 353 if (dump_enabled_p ())
ebfd146a 354 {
78c60e3d 355 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 356 "Build SLP failed: can't analyze def for ");
78c60e3d 357 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 358 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
359 }
360
b0b4483e 361 return -1;
ebfd146a
IR
362 }
363
a70d6342 364 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 365 from the pattern. Check that all the stmts of the node are in the
ebfd146a 366 pattern. */
f5709183 367 if (def_stmt && gimple_bb (def_stmt)
0f8c840c
RS
368 && vect_stmt_in_region_p (vinfo, def_stmt)
369 && vinfo_for_stmt (def_stmt)
370 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt)))
ebfd146a 371 {
d092494c 372 pattern = true;
effb52da
RB
373 if (!first && !oprnd_info->first_pattern
374 /* Allow different pattern state for the defs of the
375 first stmt in reduction chains. */
376 && (oprnd_info->first_dt != vect_reduction_def
377 || (!second && !oprnd_info->second_pattern)))
d092494c 378 {
b0b4483e
RB
379 if (i == 0
380 && !swapped
381 && commutative)
382 {
383 swapped = true;
384 goto again;
385 }
386
73fbfcad 387 if (dump_enabled_p ())
d092494c 388 {
78c60e3d
SS
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
390 "Build SLP failed: some of the stmts"
391 " are in a pattern, and others are not ");
392 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 393 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 394 }
ebfd146a 395
b0b4483e 396 return 1;
ebfd146a
IR
397 }
398
d092494c 399 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 400
f7e531cf 401 if (dt == vect_unknown_def_type)
ebfd146a 402 {
73fbfcad 403 if (dump_enabled_p ())
78c60e3d 404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 405 "Unsupported pattern.\n");
b0b4483e 406 return -1;
ebfd146a
IR
407 }
408
409 switch (gimple_code (def_stmt))
410 {
81c40241
RB
411 case GIMPLE_PHI:
412 case GIMPLE_ASSIGN:
413 break;
414
415 default:
416 if (dump_enabled_p ())
417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 "unsupported defining stmt:\n");
419 return -1;
ebfd146a
IR
420 }
421 }
422
effb52da
RB
423 if (second)
424 oprnd_info->second_pattern = pattern;
425
d092494c 426 if (first)
ebfd146a 427 {
d092494c
IR
428 oprnd_info->first_dt = dt;
429 oprnd_info->first_pattern = pattern;
793d9a16 430 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 431 }
ebfd146a
IR
432 else
433 {
d092494c
IR
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
438 vect_internal_def. */
018b2744
RS
439 tree type = TREE_TYPE (oprnd);
440 if ((oprnd_info->first_dt != dt
441 && !(oprnd_info->first_dt == vect_reduction_def
442 && dt == vect_internal_def)
443 && !((oprnd_info->first_dt == vect_external_def
444 || oprnd_info->first_dt == vect_constant_def)
445 && (dt == vect_external_def
446 || dt == vect_constant_def)))
447 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 448 {
b0b4483e
RB
449 /* Try swapping operands if we got a mismatch. */
450 if (i == 0
451 && !swapped
452 && commutative)
453 {
454 swapped = true;
455 goto again;
456 }
457
abf9bfbc
RB
458 if (dump_enabled_p ())
459 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 460 "Build SLP failed: different types\n");
d092494c 461
b0b4483e 462 return 1;
ebfd146a 463 }
018b2744
RS
464 if ((dt == vect_constant_def
465 || dt == vect_external_def)
466 && !current_vector_size.is_constant ()
467 && (TREE_CODE (type) == BOOLEAN_TYPE
468 || !can_duplicate_and_interleave_p (stmts.length (),
469 TYPE_MODE (type))))
a23644f2
RS
470 {
471 if (dump_enabled_p ())
472 {
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
474 "Build SLP failed: invalid type of def "
475 "for variable-length SLP ");
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
477 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
478 }
479 return -1;
480 }
018b2744
RS
481 }
482
483 /* Check the types of the definitions. */
484 switch (dt)
485 {
486 case vect_constant_def:
487 case vect_external_def:
ebfd146a 488 break;
b8698a0f 489
c78e3652 490 case vect_reduction_def:
e7baeb39 491 case vect_induction_def:
8644a673 492 case vect_internal_def:
abf9bfbc 493 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
494 break;
495
496 default:
497 /* FORNOW: Not supported. */
73fbfcad 498 if (dump_enabled_p ())
ebfd146a 499 {
78c60e3d
SS
500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
501 "Build SLP failed: illegal type of def ");
81c40241 502 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 503 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
504 }
505
b0b4483e 506 return -1;
ebfd146a
IR
507 }
508 }
509
b0b4483e
RB
510 /* Swap operands. */
511 if (swapped)
512 {
78810bd3
RB
513 /* If there are already uses of this stmt in a SLP instance then
514 we've committed to the operand order and can't swap it. */
515 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
516 {
517 if (dump_enabled_p ())
518 {
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
520 "Build SLP failed: cannot swap operands of "
521 "shared stmt ");
522 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
523 }
524 return -1;
525 }
526
b0b4483e
RB
527 if (first_op_cond)
528 {
529 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
530 enum tree_code code = TREE_CODE (cond);
531
532 /* Swap. */
533 if (*swap == 1)
534 {
535 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
536 &TREE_OPERAND (cond, 1));
537 TREE_SET_CODE (cond, swap_tree_comparison (code));
538 }
539 /* Invert. */
540 else
541 {
542 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
543 gimple_assign_rhs3_ptr (stmt));
544 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
545 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
546 gcc_assert (code != ERROR_MARK);
547 TREE_SET_CODE (cond, code);
548 }
b0b4483e
RB
549 }
550 else
551 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
552 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
553 if (dump_enabled_p ())
554 {
555 dump_printf_loc (MSG_NOTE, vect_location,
556 "swapped operands to match def types in ");
557 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
558 }
b0b4483e
RB
559 }
560
4cecd659 561 *swap = swapped;
b0b4483e 562 return 0;
ebfd146a
IR
563}
564
5249ee4d
RS
565/* Return true if call statements CALL1 and CALL2 are similar enough
566 to be combined into the same SLP group. */
567
568static bool
569compatible_calls_p (gcall *call1, gcall *call2)
570{
571 unsigned int nargs = gimple_call_num_args (call1);
572 if (nargs != gimple_call_num_args (call2))
573 return false;
574
575 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
576 return false;
577
578 if (gimple_call_internal_p (call1))
579 {
580 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
581 TREE_TYPE (gimple_call_lhs (call2))))
582 return false;
583 for (unsigned int i = 0; i < nargs; ++i)
584 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
585 TREE_TYPE (gimple_call_arg (call2, i))))
586 return false;
587 }
588 else
589 {
590 if (!operand_equal_p (gimple_call_fn (call1),
591 gimple_call_fn (call2), 0))
592 return false;
593
594 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
595 return false;
596 }
597 return true;
598}
599
b161f2c9
RS
600/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
601 caller's attempt to find the vector type in STMT with the narrowest
602 element type. Return true if VECTYPE is nonnull and if it is valid
603 for VINFO. When returning true, update MAX_NUNITS to reflect the
604 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
605 as for vect_build_slp_tree. */
606
607static bool
608vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 609 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
610{
611 if (!vectype)
612 {
613 if (dump_enabled_p ())
614 {
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: unsupported data-type in ");
617 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
618 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
619 }
620 /* Fatal mismatch. */
621 return false;
622 }
623
624 /* If populating the vector type requires unrolling then fail
625 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
626 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
627 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 628 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
629 && (!nunits.is_constant (&const_nunits)
630 || const_nunits > group_size))
b161f2c9
RS
631 {
632 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
633 "Build SLP failed: unrolling required "
634 "in basic block SLP\n");
635 /* Fatal mismatch. */
636 return false;
637 }
638
639 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 640 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
641 return true;
642}
ebfd146a 643
1f3cb663
RS
644/* STMTS is a group of GROUP_SIZE SLP statements in which some
645 statements do the same operation as the first statement and in which
646 the others do ALT_STMT_CODE. Return true if we can take one vector
647 of the first operation and one vector of the second and permute them
648 to get the required result. VECTYPE is the type of the vector that
649 would be permuted. */
650
651static bool
652vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
653 tree vectype, tree_code alt_stmt_code)
654{
655 unsigned HOST_WIDE_INT count;
656 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
657 return false;
658
659 vec_perm_builder sel (count, count, 1);
660 for (unsigned int i = 0; i < count; ++i)
661 {
662 unsigned int elt = i;
663 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
664 elt += count;
665 sel.quick_push (elt);
666 }
667 vec_perm_indices indices (sel, 2, count);
668 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
669}
670
6983e6b5
RB
671/* Verify if the scalar stmts STMTS are isomorphic, require data
672 permutation or are of unsupported types of operation. Return
673 true if they are, otherwise return false and indicate in *MATCHES
674 which stmts are not isomorphic to the first one. If MATCHES[0]
675 is false then this indicates the comparison could not be
4cecd659
BC
676 carried out or the stmts will never be vectorized by SLP.
677
678 Note COND_EXPR is possibly ismorphic to another one after swapping its
679 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
680 the first stmt by swapping the two operands of comparison; set SWAP[i]
681 to 2 if stmt I is isormorphic to the first stmt by inverting the code
682 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
683 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
684
685static bool
4cecd659 686vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 687 vec<gimple *> stmts, unsigned int group_size,
5249ee4d
RS
688 poly_uint64 *max_nunits, bool *matches,
689 bool *two_operators)
ebfd146a 690{
ebfd146a 691 unsigned int i;
355fe088 692 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
693 enum tree_code first_stmt_code = ERROR_MARK;
694 enum tree_code alt_stmt_code = ERROR_MARK;
695 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 696 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 697 tree lhs;
6983e6b5 698 bool need_same_oprnds = false;
1f3cb663 699 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
700 optab optab;
701 int icode;
ef4bddc2
RS
702 machine_mode optab_op2_mode;
703 machine_mode vec_mode;
355fe088 704 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 705
ebfd146a 706 /* For every stmt in NODE find its def stmt/s. */
9771b263 707 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 708 {
1f3cb663 709 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4cecd659 710 swap[i] = 0;
6983e6b5
RB
711 matches[i] = false;
712
73fbfcad 713 if (dump_enabled_p ())
ebfd146a 714 {
78c60e3d
SS
715 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
716 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
717 }
718
4b5caab7
IR
719 /* Fail to vectorize statements marked as unvectorizable. */
720 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
721 {
73fbfcad 722 if (dump_enabled_p ())
4b5caab7 723 {
78c60e3d
SS
724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
725 "Build SLP failed: unvectorizable statement ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 727 }
6983e6b5
RB
728 /* Fatal mismatch. */
729 matches[0] = false;
4b5caab7
IR
730 return false;
731 }
732
ebfd146a
IR
733 lhs = gimple_get_lhs (stmt);
734 if (lhs == NULL_TREE)
735 {
73fbfcad 736 if (dump_enabled_p ())
ebfd146a 737 {
78c60e3d
SS
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: not GIMPLE_ASSIGN nor "
740 "GIMPLE_CALL ");
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 742 }
6983e6b5
RB
743 /* Fatal mismatch. */
744 matches[0] = false;
ebfd146a
IR
745 return false;
746 }
747
1f3cb663
RS
748 tree nunits_vectype;
749 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
750 &nunits_vectype)
751 || (nunits_vectype
752 && !vect_record_max_nunits (vinfo, stmt, group_size,
753 nunits_vectype, max_nunits)))
b161f2c9 754 {
6983e6b5
RB
755 /* Fatal mismatch. */
756 matches[0] = false;
1f3cb663
RS
757 return false;
758 }
759
760 gcc_assert (vectype);
b8698a0f 761
538dd0b7 762 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
763 {
764 rhs_code = CALL_EXPR;
5249ee4d
RS
765 if ((gimple_call_internal_p (call_stmt)
766 && (!vectorizable_internal_fn_p
767 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
768 || gimple_call_tail_p (call_stmt)
769 || gimple_call_noreturn_p (call_stmt)
770 || !gimple_call_nothrow_p (call_stmt)
771 || gimple_call_chain (call_stmt))
190c2236 772 {
73fbfcad 773 if (dump_enabled_p ())
190c2236 774 {
78c60e3d
SS
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: unsupported call type ");
538dd0b7
DM
777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
778 call_stmt, 0);
190c2236 779 }
6983e6b5
RB
780 /* Fatal mismatch. */
781 matches[0] = false;
190c2236
JJ
782 return false;
783 }
784 }
ebfd146a
IR
785 else
786 rhs_code = gimple_assign_rhs_code (stmt);
787
788 /* Check the operation. */
789 if (i == 0)
790 {
791 first_stmt_code = rhs_code;
792
b8698a0f 793 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
794 vector shift with scalar shift operand. */
795 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
796 || rhs_code == LROTATE_EXPR
797 || rhs_code == RROTATE_EXPR)
798 {
1f3cb663
RS
799 if (vectype == boolean_type_node)
800 {
801 if (dump_enabled_p ())
802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
803 "Build SLP failed: shift of a"
804 " boolean.\n");
805 /* Fatal mismatch. */
806 matches[0] = false;
807 return false;
808 }
809
ebfd146a
IR
810 vec_mode = TYPE_MODE (vectype);
811
812 /* First see if we have a vector/vector shift. */
813 optab = optab_for_tree_code (rhs_code, vectype,
814 optab_vector);
815
816 if (!optab
947131ba 817 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
818 {
819 /* No vector/vector shift, try for a vector/scalar shift. */
820 optab = optab_for_tree_code (rhs_code, vectype,
821 optab_scalar);
822
823 if (!optab)
824 {
73fbfcad 825 if (dump_enabled_p ())
78c60e3d 826 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 827 "Build SLP failed: no optab.\n");
6983e6b5
RB
828 /* Fatal mismatch. */
829 matches[0] = false;
ebfd146a
IR
830 return false;
831 }
947131ba 832 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
833 if (icode == CODE_FOR_nothing)
834 {
73fbfcad 835 if (dump_enabled_p ())
78c60e3d
SS
836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
837 "Build SLP failed: "
e645e942 838 "op not supported by target.\n");
6983e6b5
RB
839 /* Fatal mismatch. */
840 matches[0] = false;
ebfd146a
IR
841 return false;
842 }
843 optab_op2_mode = insn_data[icode].operand[2].mode;
844 if (!VECTOR_MODE_P (optab_op2_mode))
845 {
846 need_same_oprnds = true;
847 first_op1 = gimple_assign_rhs2 (stmt);
848 }
849 }
850 }
36ba4aae
IR
851 else if (rhs_code == WIDEN_LSHIFT_EXPR)
852 {
853 need_same_oprnds = true;
854 first_op1 = gimple_assign_rhs2 (stmt);
855 }
ebfd146a
IR
856 }
857 else
858 {
6876e5bc
RB
859 if (first_stmt_code != rhs_code
860 && alt_stmt_code == ERROR_MARK)
861 alt_stmt_code = rhs_code;
ebfd146a
IR
862 if (first_stmt_code != rhs_code
863 && (first_stmt_code != IMAGPART_EXPR
864 || rhs_code != REALPART_EXPR)
865 && (first_stmt_code != REALPART_EXPR
69f11a13 866 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
867 /* Handle mismatches in plus/minus by computing both
868 and merging the results. */
869 && !((first_stmt_code == PLUS_EXPR
870 || first_stmt_code == MINUS_EXPR)
871 && (alt_stmt_code == PLUS_EXPR
872 || alt_stmt_code == MINUS_EXPR)
873 && rhs_code == alt_stmt_code)
0d0293ac 874 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 875 && (first_stmt_code == ARRAY_REF
38000232 876 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
877 || first_stmt_code == INDIRECT_REF
878 || first_stmt_code == COMPONENT_REF
879 || first_stmt_code == MEM_REF)))
ebfd146a 880 {
73fbfcad 881 if (dump_enabled_p ())
ebfd146a 882 {
78c60e3d
SS
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "Build SLP failed: different operation "
885 "in stmt ");
886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
888 "original stmt ");
889 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
890 first_stmt, 0);
ebfd146a 891 }
6983e6b5
RB
892 /* Mismatch. */
893 continue;
ebfd146a 894 }
b8698a0f
L
895
896 if (need_same_oprnds
ebfd146a
IR
897 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
898 {
73fbfcad 899 if (dump_enabled_p ())
ebfd146a 900 {
78c60e3d
SS
901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
902 "Build SLP failed: different shift "
903 "arguments in ");
904 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 905 }
6983e6b5
RB
906 /* Mismatch. */
907 continue;
ebfd146a 908 }
190c2236
JJ
909
910 if (rhs_code == CALL_EXPR)
911 {
355fe088 912 gimple *first_stmt = stmts[0];
5249ee4d
RS
913 if (!compatible_calls_p (as_a <gcall *> (first_stmt),
914 as_a <gcall *> (stmt)))
190c2236 915 {
73fbfcad 916 if (dump_enabled_p ())
190c2236 917 {
78c60e3d
SS
918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
919 "Build SLP failed: different calls in ");
920 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
921 stmt, 0);
190c2236 922 }
6983e6b5
RB
923 /* Mismatch. */
924 continue;
190c2236
JJ
925 }
926 }
ebfd146a
IR
927 }
928
0d0293ac
MM
929 /* Grouped store or load. */
930 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
931 {
932 if (REFERENCE_CLASS_P (lhs))
933 {
934 /* Store. */
6983e6b5 935 ;
ebfd146a 936 }
b5aeb3bb
IR
937 else
938 {
939 /* Load. */
2c53b149 940 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
941 if (prev_first_load)
942 {
943 /* Check that there are no loads from different interleaving
6983e6b5
RB
944 chains in the same node. */
945 if (prev_first_load != first_load)
78c60e3d 946 {
73fbfcad 947 if (dump_enabled_p ())
b5aeb3bb 948 {
78c60e3d
SS
949 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
950 vect_location,
951 "Build SLP failed: different "
952 "interleaving chains in one node ");
953 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
954 stmt, 0);
b5aeb3bb 955 }
6983e6b5
RB
956 /* Mismatch. */
957 continue;
b5aeb3bb
IR
958 }
959 }
960 else
961 prev_first_load = first_load;
ebfd146a 962 }
0d0293ac 963 } /* Grouped access. */
ebfd146a
IR
964 else
965 {
966 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
967 {
0d0293ac 968 /* Not grouped load. */
73fbfcad 969 if (dump_enabled_p ())
ebfd146a 970 {
78c60e3d
SS
971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
972 "Build SLP failed: not grouped load ");
973 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
974 }
975
0d0293ac 976 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
977 /* Fatal mismatch. */
978 matches[0] = false;
ebfd146a
IR
979 return false;
980 }
981
982 /* Not memory operation. */
983 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 984 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 985 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 986 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 987 && rhs_code != CALL_EXPR)
ebfd146a 988 {
73fbfcad 989 if (dump_enabled_p ())
ebfd146a 990 {
78c60e3d
SS
991 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
992 "Build SLP failed: operation");
993 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
994 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 995 }
6983e6b5
RB
996 /* Fatal mismatch. */
997 matches[0] = false;
ebfd146a
IR
998 return false;
999 }
1000
4cecd659
BC
1001 if (rhs_code == COND_EXPR)
1002 {
1003 tree cond_expr = gimple_assign_rhs1 (stmt);
1004 enum tree_code cond_code = TREE_CODE (cond_expr);
1005 enum tree_code swap_code = ERROR_MARK;
1006 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1007
1008 if (i == 0)
1009 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1010 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1011 {
1012 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1013 swap_code = swap_tree_comparison (cond_code);
1014 invert_code = invert_tree_comparison (cond_code, honor_nans);
1015 }
1016
1017 if (first_cond_code == cond_code)
1018 ;
1019 /* Isomorphic can be achieved by swapping. */
1020 else if (first_cond_code == swap_code)
1021 swap[i] = 1;
1022 /* Isomorphic can be achieved by inverting. */
1023 else if (first_cond_code == invert_code)
1024 swap[i] = 2;
1025 else
1026 {
1027 if (dump_enabled_p ())
1028 {
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1030 "Build SLP failed: different"
1031 " operation");
4cecd659 1032 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 1033 stmt, 0);
4cecd659 1034 }
6983e6b5
RB
1035 /* Mismatch. */
1036 continue;
f7e531cf 1037 }
4cecd659 1038 }
ebfd146a 1039 }
6983e6b5
RB
1040
1041 matches[i] = true;
1042 }
1043
1044 for (i = 0; i < group_size; ++i)
1045 if (!matches[i])
1046 return false;
1047
6876e5bc
RB
1048 /* If we allowed a two-operation SLP node verify the target can cope
1049 with the permute we are going to use. */
1050 if (alt_stmt_code != ERROR_MARK
1051 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1052 {
1f3cb663
RS
1053 if (vectype == boolean_type_node
1054 || !vect_two_operations_perm_ok_p (stmts, group_size,
1055 vectype, alt_stmt_code))
6876e5bc
RB
1056 {
1057 for (i = 0; i < group_size; ++i)
1058 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1059 {
1060 matches[i] = false;
1061 if (dump_enabled_p ())
1062 {
1063 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1064 "Build SLP failed: different operation "
1065 "in stmt ");
1066 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1067 stmts[i], 0);
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1069 "original stmt ");
1070 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1071 first_stmt, 0);
1072 }
1073 }
1074 return false;
1075 }
1076 *two_operators = true;
1077 }
1078
6983e6b5
RB
1079 return true;
1080}
1081
26d66f28
RB
1082/* Traits for the hash_set to record failed SLP builds for a stmt set.
1083 Note we never remove apart from at destruction time so we do not
1084 need a special value for deleted that differs from empty. */
1085struct bst_traits
1086{
1087 typedef vec <gimple *> value_type;
1088 typedef vec <gimple *> compare_type;
1089 static inline hashval_t hash (value_type);
1090 static inline bool equal (value_type existing, value_type candidate);
1091 static inline bool is_empty (value_type x) { return !x.exists (); }
1092 static inline bool is_deleted (value_type x) { return !x.exists (); }
1093 static inline void mark_empty (value_type &x) { x.release (); }
1094 static inline void mark_deleted (value_type &x) { x.release (); }
1095 static inline void remove (value_type &x) { x.release (); }
1096};
1097inline hashval_t
1098bst_traits::hash (value_type x)
1099{
1100 inchash::hash h;
1101 for (unsigned i = 0; i < x.length (); ++i)
1102 h.add_int (gimple_uid (x[i]));
1103 return h.end ();
1104}
1105inline bool
1106bst_traits::equal (value_type existing, value_type candidate)
1107{
1108 if (existing.length () != candidate.length ())
1109 return false;
1110 for (unsigned i = 0; i < existing.length (); ++i)
1111 if (existing[i] != candidate[i])
1112 return false;
1113 return true;
1114}
1115
f7300fff
RB
1116typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1117static scalar_stmts_set_t *bst_fail;
26d66f28 1118
68435eb2
RB
1119typedef hash_map <vec <gimple *>, slp_tree,
1120 simple_hashmap_traits <bst_traits, slp_tree> >
1121 scalar_stmts_to_slp_tree_map_t;
1122
26d66f28
RB
1123static slp_tree
1124vect_build_slp_tree_2 (vec_info *vinfo,
1125 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1126 poly_uint64 *max_nunits,
26d66f28
RB
1127 vec<slp_tree> *loads,
1128 bool *matches, unsigned *npermutes, unsigned *tree_size,
1129 unsigned max_tree_size);
6983e6b5 1130
e403d17e 1131static slp_tree
310213d4 1132vect_build_slp_tree (vec_info *vinfo,
4b6068ea
RS
1133 vec<gimple *> stmts, unsigned int group_size,
1134 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1135 bool *matches, unsigned *npermutes, unsigned *tree_size,
1136 unsigned max_tree_size)
26d66f28
RB
1137{
1138 if (bst_fail->contains (stmts))
1139 return NULL;
1140 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1141 loads, matches, npermutes, tree_size,
1142 max_tree_size);
1143 /* When SLP build fails for stmts record this, otherwise SLP build
1144 can be exponential in time when we allow to construct parts from
1145 scalars, see PR81723. */
1146 if (! res)
1147 {
1148 vec <gimple *> x;
1149 x.create (stmts.length ());
1150 x.splice (stmts);
1151 bst_fail->add (x);
1152 }
1153 return res;
1154}
1155
1156/* Recursively build an SLP tree starting from NODE.
1157 Fail (and return a value not equal to zero) if def-stmts are not
1158 isomorphic, require data permutation or are of unsupported types of
1159 operation. Otherwise, return 0.
1160 The value returned is the depth in the SLP tree where a mismatch
1161 was found. */
1162
1163static slp_tree
1164vect_build_slp_tree_2 (vec_info *vinfo,
1165 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1166 poly_uint64 *max_nunits,
26d66f28
RB
1167 vec<slp_tree> *loads,
1168 bool *matches, unsigned *npermutes, unsigned *tree_size,
1169 unsigned max_tree_size)
6983e6b5 1170{
4b6068ea
RS
1171 unsigned nops, i, this_tree_size = 0;
1172 poly_uint64 this_max_nunits = *max_nunits;
355fe088 1173 gimple *stmt;
e403d17e 1174 slp_tree node;
6983e6b5 1175
6983e6b5
RB
1176 matches[0] = false;
1177
e403d17e 1178 stmt = stmts[0];
6983e6b5
RB
1179 if (is_gimple_call (stmt))
1180 nops = gimple_call_num_args (stmt);
1181 else if (is_gimple_assign (stmt))
1182 {
1183 nops = gimple_num_ops (stmt) - 1;
1184 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1185 nops++;
ebfd146a 1186 }
e7baeb39
RB
1187 else if (gimple_code (stmt) == GIMPLE_PHI)
1188 nops = 0;
6983e6b5 1189 else
e403d17e 1190 return NULL;
6983e6b5 1191
c78e3652
RB
1192 /* If the SLP node is a PHI (induction or reduction), terminate
1193 the recursion. */
e7baeb39
RB
1194 if (gimple_code (stmt) == GIMPLE_PHI)
1195 {
b161f2c9
RS
1196 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1197 tree vectype = get_vectype_for_scalar_type (scalar_type);
1198 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1199 max_nunits))
1200 return NULL;
1201
719488f8 1202 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 1203 /* Induction from different IVs is not supported. */
719488f8
RB
1204 if (def_type == vect_induction_def)
1205 {
1206 FOR_EACH_VEC_ELT (stmts, i, stmt)
1207 if (stmt != stmts[0])
1208 return NULL;
1209 }
1210 else
1211 {
1212 /* Else def types have to match. */
1213 FOR_EACH_VEC_ELT (stmts, i, stmt)
1214 {
1215 /* But for reduction chains only check on the first stmt. */
2c53b149
RB
1216 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1217 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
719488f8
RB
1218 continue;
1219 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1220 return NULL;
1221 }
1222 }
e7baeb39
RB
1223 node = vect_create_new_slp_node (stmts);
1224 return node;
1225 }
1226
1227
6876e5bc 1228 bool two_operators = false;
4cecd659 1229 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
5249ee4d 1230 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
e403d17e
RB
1231 &this_max_nunits, matches, &two_operators))
1232 return NULL;
ebfd146a 1233
6983e6b5
RB
1234 /* If the SLP node is a load, terminate the recursion. */
1235 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1236 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 1237 {
e403d17e
RB
1238 *max_nunits = this_max_nunits;
1239 node = vect_create_new_slp_node (stmts);
1240 loads->safe_push (node);
1241 return node;
ebfd146a
IR
1242 }
1243
6983e6b5
RB
1244 /* Get at the operands, verifying they are compatible. */
1245 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1246 slp_oprnd_info oprnd_info;
e403d17e 1247 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 1248 {
4cecd659 1249 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1250 stmts, i, &oprnds_info);
4cecd659
BC
1251 if (res != 0)
1252 matches[(res == -1) ? 0 : i] = false;
1253 if (!matches[0])
1254 break;
6983e6b5 1255 }
b0b4483e
RB
1256 for (i = 0; i < group_size; ++i)
1257 if (!matches[i])
1258 {
1259 vect_free_oprnd_info (oprnds_info);
e403d17e 1260 return NULL;
b0b4483e 1261 }
6983e6b5 1262
e403d17e
RB
1263 auto_vec<slp_tree, 4> children;
1264 auto_vec<slp_tree> this_loads;
1265
1266 stmt = stmts[0];
6983e6b5 1267
26d66f28
RB
1268 if (tree_size)
1269 max_tree_size -= *tree_size;
1270
b8698a0f 1271 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1272 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1273 {
d092494c 1274 slp_tree child;
e403d17e
RB
1275 unsigned old_nloads = this_loads.length ();
1276 unsigned old_tree_size = this_tree_size;
1277 unsigned int j;
b8698a0f 1278
e7baeb39 1279 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1280 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1281 && oprnd_info->first_dt != vect_induction_def)
d092494c 1282 continue;
ebfd146a 1283
1428105c
RB
1284 if (++this_tree_size > max_tree_size)
1285 {
e403d17e
RB
1286 FOR_EACH_VEC_ELT (children, j, child)
1287 vect_free_slp_tree (child);
1428105c 1288 vect_free_oprnd_info (oprnds_info);
e403d17e 1289 return NULL;
1428105c
RB
1290 }
1291
e403d17e
RB
1292 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1293 group_size, &this_max_nunits,
1294 &this_loads, matches, npermutes,
1295 &this_tree_size,
1296 max_tree_size)) != NULL)
6983e6b5 1297 {
3fc356dc
RB
1298 /* If we have all children of child built up from scalars then just
1299 throw that away and build it up this node from scalars. */
995b6fe0
RB
1300 if (!SLP_TREE_CHILDREN (child).is_empty ()
1301 /* ??? Rejecting patterns this way doesn't work. We'd have to
1302 do extra work to cancel the pattern so the uses see the
1303 scalar version. */
1304 && !is_pattern_stmt_p
1305 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1306 {
3fc356dc
RB
1307 slp_tree grandchild;
1308
1309 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1310 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1311 break;
1312 if (!grandchild)
1313 {
1314 /* Roll back. */
e403d17e
RB
1315 this_loads.truncate (old_nloads);
1316 this_tree_size = old_tree_size;
3fc356dc 1317 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1318 vect_free_slp_tree (grandchild);
3fc356dc
RB
1319 SLP_TREE_CHILDREN (child).truncate (0);
1320
1321 dump_printf_loc (MSG_NOTE, vect_location,
1322 "Building parent vector operands from "
1323 "scalars instead\n");
1324 oprnd_info->def_stmts = vNULL;
603cca93 1325 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1326 children.safe_push (child);
3fc356dc
RB
1327 continue;
1328 }
1329 }
1330
6983e6b5 1331 oprnd_info->def_stmts = vNULL;
e403d17e 1332 children.safe_push (child);
6983e6b5
RB
1333 continue;
1334 }
1335
90dd6e3d
RB
1336 /* If the SLP build failed fatally and we analyze a basic-block
1337 simply treat nodes we fail to build as externally defined
1338 (and thus build vectors from the scalar defs).
1339 The cost model will reject outright expensive cases.
1340 ??? This doesn't treat cases where permutation ultimatively
1341 fails (or we don't try permutation below). Ideally we'd
1342 even compute a permutation that will end up with the maximum
1343 SLP tree size... */
310213d4 1344 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1345 && !matches[0]
1346 /* ??? Rejecting patterns this way doesn't work. We'd have to
1347 do extra work to cancel the pattern so the uses see the
1348 scalar version. */
1349 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1350 {
1351 dump_printf_loc (MSG_NOTE, vect_location,
1352 "Building vector operands from scalars\n");
e403d17e 1353 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1354 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1355 children.safe_push (child);
1356 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1357 continue;
1358 }
1359
6983e6b5
RB
1360 /* If the SLP build for operand zero failed and operand zero
1361 and one can be commutated try that for the scalar stmts
1362 that failed the match. */
1363 if (i == 0
1364 /* A first scalar stmt mismatch signals a fatal mismatch. */
1365 && matches[0]
1366 /* ??? For COND_EXPRs we can swap the comparison operands
1367 as well as the arms under some constraints. */
1368 && nops == 2
1369 && oprnds_info[1]->first_dt == vect_internal_def
1370 && is_gimple_assign (stmt)
6983e6b5
RB
1371 /* Do so only if the number of not successful permutes was nor more
1372 than a cut-ff as re-trying the recursive match on
1373 possibly each level of the tree would expose exponential
1374 behavior. */
1375 && *npermutes < 4)
1376 {
85c5e2f5
RB
1377 /* See whether we can swap the matching or the non-matching
1378 stmt operands. */
1379 bool swap_not_matching = true;
1380 do
1381 {
1382 for (j = 0; j < group_size; ++j)
1383 {
1384 if (matches[j] != !swap_not_matching)
1385 continue;
1386 gimple *stmt = stmts[j];
1387 /* Verify if we can swap operands of this stmt. */
1388 if (!is_gimple_assign (stmt)
1389 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1390 {
1391 if (!swap_not_matching)
1392 goto fail;
1393 swap_not_matching = false;
1394 break;
1395 }
1396 /* Verify if we can safely swap or if we committed to a
1397 specific operand order already.
1398 ??? Instead of modifying GIMPLE stmts here we could
1399 record whether we want to swap operands in the SLP
1400 node and temporarily do that when processing it
1401 (or wrap operand accessors in a helper). */
1402 else if (swap[j] != 0
1403 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1404 {
1405 if (!swap_not_matching)
1406 {
1407 if (dump_enabled_p ())
1408 {
1409 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1410 vect_location,
1411 "Build SLP failed: cannot swap "
1412 "operands of shared stmt ");
1413 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1414 TDF_SLIM, stmts[j], 0);
1415 }
1416 goto fail;
1417 }
1418 swap_not_matching = false;
1419 break;
1420 }
1421 }
1422 }
1423 while (j != group_size);
78810bd3 1424
6983e6b5 1425 /* Swap mismatched definition stmts. */
b0b4483e
RB
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Re-trying with swapped operands of stmts ");
e72baed7 1428 for (j = 0; j < group_size; ++j)
85c5e2f5 1429 if (matches[j] == !swap_not_matching)
6983e6b5 1430 {
6b4db501
MM
1431 std::swap (oprnds_info[0]->def_stmts[j],
1432 oprnds_info[1]->def_stmts[j]);
b0b4483e 1433 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1434 }
b0b4483e 1435 dump_printf (MSG_NOTE, "\n");
74574669
RB
1436 /* And try again with scratch 'matches' ... */
1437 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1438 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1439 group_size, &this_max_nunits,
1440 &this_loads, tem, npermutes,
1441 &this_tree_size,
1442 max_tree_size)) != NULL)
6983e6b5 1443 {
60f2b864
RB
1444 /* ... so if successful we can apply the operand swapping
1445 to the GIMPLE IL. This is necessary because for example
1446 vect_get_slp_defs uses operand indexes and thus expects
1447 canonical operand order. This is also necessary even
1448 if we end up building the operand from scalars as
1449 we'll continue to process swapped operand two. */
1450 for (j = 0; j < group_size; ++j)
f47cda24 1451 {
e403d17e 1452 gimple *stmt = stmts[j];
f47cda24
RB
1453 gimple_set_plf (stmt, GF_PLF_1, false);
1454 }
1455 for (j = 0; j < group_size; ++j)
1456 {
e403d17e 1457 gimple *stmt = stmts[j];
85c5e2f5 1458 if (matches[j] == !swap_not_matching)
f47cda24
RB
1459 {
1460 /* Avoid swapping operands twice. */
1461 if (gimple_plf (stmt, GF_PLF_1))
1462 continue;
1463 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1464 gimple_assign_rhs2_ptr (stmt));
1465 gimple_set_plf (stmt, GF_PLF_1, true);
1466 }
1467 }
1468 /* Verify we swap all duplicates or none. */
1469 if (flag_checking)
1470 for (j = 0; j < group_size; ++j)
60f2b864 1471 {
e403d17e 1472 gimple *stmt = stmts[j];
85c5e2f5
RB
1473 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1474 == (matches[j] == !swap_not_matching));
60f2b864
RB
1475 }
1476
85c69b0b
RB
1477 /* If we have all children of child built up from scalars then
1478 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1479 if (!SLP_TREE_CHILDREN (child).is_empty ()
1480 /* ??? Rejecting patterns this way doesn't work. We'd have
1481 to do extra work to cancel the pattern so the uses see the
1482 scalar version. */
1483 && !is_pattern_stmt_p
1484 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1485 {
1486 unsigned int j;
1487 slp_tree grandchild;
1488
1489 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1490 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1491 break;
1492 if (!grandchild)
1493 {
1494 /* Roll back. */
e403d17e
RB
1495 this_loads.truncate (old_nloads);
1496 this_tree_size = old_tree_size;
85c69b0b
RB
1497 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1498 vect_free_slp_tree (grandchild);
1499 SLP_TREE_CHILDREN (child).truncate (0);
1500
1501 dump_printf_loc (MSG_NOTE, vect_location,
1502 "Building parent vector operands from "
1503 "scalars instead\n");
1504 oprnd_info->def_stmts = vNULL;
603cca93 1505 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1506 children.safe_push (child);
85c69b0b
RB
1507 continue;
1508 }
1509 }
1510
6983e6b5 1511 oprnd_info->def_stmts = vNULL;
e403d17e 1512 children.safe_push (child);
6983e6b5
RB
1513 continue;
1514 }
1515
1516 ++*npermutes;
1517 }
1518
78810bd3 1519fail:
e403d17e
RB
1520 gcc_assert (child == NULL);
1521 FOR_EACH_VEC_ELT (children, j, child)
1522 vect_free_slp_tree (child);
6983e6b5 1523 vect_free_oprnd_info (oprnds_info);
e403d17e 1524 return NULL;
ebfd146a
IR
1525 }
1526
e403d17e
RB
1527 vect_free_oprnd_info (oprnds_info);
1528
1428105c
RB
1529 if (tree_size)
1530 *tree_size += this_tree_size;
e403d17e
RB
1531 *max_nunits = this_max_nunits;
1532 loads->safe_splice (this_loads);
1428105c 1533
e403d17e
RB
1534 node = vect_create_new_slp_node (stmts);
1535 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1536 SLP_TREE_CHILDREN (node).splice (children);
1537 return node;
ebfd146a
IR
1538}
1539
78c60e3d 1540/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1541
1542static void
4f5b9c80
DM
1543vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1544 slp_tree node)
ebfd146a
IR
1545{
1546 int i;
355fe088 1547 gimple *stmt;
d755c7ef 1548 slp_tree child;
ebfd146a 1549
603cca93
RB
1550 dump_printf_loc (dump_kind, loc, "node%s\n",
1551 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1552 ? " (external)" : "");
9771b263 1553 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1554 {
c2a12ca0 1555 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1556 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1557 }
9771b263 1558 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1559 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1560}
1561
1562
b8698a0f
L
1563/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1564 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1565 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1566 stmts in NODE are to be marked. */
1567
1568static void
1569vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1570{
1571 int i;
355fe088 1572 gimple *stmt;
d755c7ef 1573 slp_tree child;
ebfd146a 1574
603cca93 1575 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1576 return;
1577
9771b263 1578 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1579 if (j < 0 || i == j)
1580 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1581
9771b263 1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1583 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1584}
1585
1586
a70d6342
IR
1587/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1588
1589static void
1590vect_mark_slp_stmts_relevant (slp_tree node)
1591{
1592 int i;
355fe088 1593 gimple *stmt;
a70d6342 1594 stmt_vec_info stmt_info;
d755c7ef 1595 slp_tree child;
a70d6342 1596
603cca93 1597 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1598 return;
1599
9771b263 1600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1601 {
1602 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1603 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1604 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1605 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1606 }
1607
9771b263 1608 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1609 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1610}
1611
1612
b5aeb3bb
IR
1613/* Rearrange the statements of NODE according to PERMUTATION. */
1614
1615static void
1616vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1617 vec<unsigned> permutation)
b5aeb3bb 1618{
355fe088
TS
1619 gimple *stmt;
1620 vec<gimple *> tmp_stmts;
d755c7ef
RB
1621 unsigned int i;
1622 slp_tree child;
b5aeb3bb 1623
9771b263 1624 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1625 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1626
9771b263
DN
1627 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1628 tmp_stmts.create (group_size);
d755c7ef 1629 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1630
9771b263 1631 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1632 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1633
9771b263 1634 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1635 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1636}
1637
1638
b266b968
RB
1639/* Attempt to reorder stmts in a reduction chain so that we don't
1640 require any load permutation. Return true if that was possible,
1641 otherwise return false. */
1642
1643static bool
1644vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1645{
1646 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1647 unsigned int i, j;
b266b968
RB
1648 unsigned int lidx;
1649 slp_tree node, load;
1650
1651 /* Compare all the permutation sequences to the first one. We know
1652 that at least one load is permuted. */
1653 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1654 if (!node->load_permutation.exists ())
1655 return false;
1656 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1657 {
1658 if (!load->load_permutation.exists ())
1659 return false;
1660 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1661 if (lidx != node->load_permutation[j])
1662 return false;
1663 }
1664
1665 /* Check that the loads in the first sequence are different and there
1666 are no gaps between them. */
7ba9e72d 1667 auto_sbitmap load_index (group_size);
b266b968
RB
1668 bitmap_clear (load_index);
1669 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1670 {
41eefe13 1671 if (lidx >= group_size)
7ba9e72d 1672 return false;
b266b968 1673 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1674 return false;
1675
b266b968
RB
1676 bitmap_set_bit (load_index, lidx);
1677 }
1678 for (i = 0; i < group_size; i++)
1679 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1680 return false;
b266b968
RB
1681
1682 /* This permutation is valid for reduction. Since the order of the
1683 statements in the nodes is not important unless they are memory
1684 accesses, we can rearrange the statements in all the nodes
1685 according to the order of the loads. */
1686 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1687 node->load_permutation);
1688
1689 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1690 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1691 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1692 {
1693 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2c53b149 1694 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
c4e360f4
RB
1695 /* But we have to keep those permutations that are required because
1696 of handling of gaps. */
d9f21f6a 1697 if (known_eq (unrolling_factor, 1U)
2c53b149
RB
1698 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1699 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
c4e360f4 1700 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1701 else
1702 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1703 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1704 }
1705
b266b968
RB
1706 return true;
1707}
1708
01d8bf07
RB
1709/* Check if the required load permutations in the SLP instance
1710 SLP_INSTN are supported. */
ebfd146a
IR
1711
1712static bool
01d8bf07 1713vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1714{
01d8bf07
RB
1715 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1716 unsigned int i, j, k, next;
6983e6b5 1717 slp_tree node;
a5b50aa1 1718 gimple *stmt, *load, *next_load;
ebfd146a 1719
73fbfcad 1720 if (dump_enabled_p ())
ebfd146a 1721 {
78c60e3d 1722 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1724 if (node->load_permutation.exists ())
1725 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1726 dump_printf (MSG_NOTE, "%d ", next);
1727 else
bddc974e
TJ
1728 for (k = 0; k < group_size; ++k)
1729 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1730 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1731 }
1732
b5aeb3bb
IR
1733 /* In case of reduction every load permutation is allowed, since the order
1734 of the reduction statements is not important (as opposed to the case of
0d0293ac 1735 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1736 load nodes are of the same size and have the same permutation (and then
1737 rearrange all the nodes of the SLP instance according to this
1738 permutation). */
1739
1740 /* Check that all the load nodes are of the same size. */
01d8bf07 1741 /* ??? Can't we assert this? */
9771b263 1742 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1743 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1744 return false;
2200fc49 1745
b5aeb3bb 1746 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1747 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1748
b010117a 1749 /* Reduction (there are no data-refs in the root).
b266b968 1750 In reduction chain the order of the loads is not important. */
b010117a 1751 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
2c53b149 1752 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1753 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1754
6aa904c4
IR
1755 /* In basic block vectorization we allow any subchain of an interleaving
1756 chain.
1757 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1758 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1759 {
240a94da
RB
1760 /* Check whether the loads in an instance form a subchain and thus
1761 no permutation is necessary. */
9771b263 1762 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1763 {
9626d143
RB
1764 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1765 continue;
240a94da 1766 bool subchain_p = true;
6aa904c4 1767 next_load = NULL;
9771b263 1768 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1769 {
5b5826c4
RB
1770 if (j != 0
1771 && (next_load != load
2c53b149 1772 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1773 {
1774 subchain_p = false;
1775 break;
1776 }
2c53b149 1777 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
6aa904c4 1778 }
240a94da
RB
1779 if (subchain_p)
1780 SLP_TREE_LOAD_PERMUTATION (node).release ();
1781 else
1782 {
fe73a332
RB
1783 stmt_vec_info group_info
1784 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2c53b149 1785 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
928686b1 1786 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1787 unsigned k, maxk = 0;
1788 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1789 if (k > maxk)
1790 maxk = k;
1791 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1792 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1793 tree vectype = STMT_VINFO_VECTYPE (group_info);
1794 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1795 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1796 {
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "BB vectorization with gaps at the end of "
1799 "a load is not supported\n");
1800 return false;
1801 }
1802
240a94da
RB
1803 /* Verify the permutation can be generated. */
1804 vec<tree> tem;
29afecdf 1805 unsigned n_perms;
240a94da 1806 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1807 1, slp_instn, true, &n_perms))
240a94da
RB
1808 {
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1810 vect_location,
1811 "unsupported load permutation\n");
1812 return false;
1813 }
1814 }
6aa904c4 1815 }
01d8bf07 1816 return true;
6aa904c4
IR
1817 }
1818
31bee964
RB
1819 /* For loop vectorization verify we can generate the permutation. Be
1820 conservative about the vectorization factor, there are permutations
1821 that will use three vector inputs only starting from a specific factor
1822 and the vectorization factor is not yet final.
1823 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1824 unsigned n_perms;
d9f21f6a
RS
1825 poly_uint64 test_vf
1826 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1827 LOOP_VINFO_VECT_FACTOR
d9f21f6a 1828 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
01d8bf07
RB
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1830 if (node->load_permutation.exists ()
31bee964
RB
1831 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1832 slp_instn, true, &n_perms))
01d8bf07 1833 return false;
9b999e8c 1834
01d8bf07 1835 return true;
ebfd146a
IR
1836}
1837
1838
e4a707c4 1839/* Find the last store in SLP INSTANCE. */
ff802fa1 1840
64900538 1841gimple *
2e8ab70c 1842vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1843{
355fe088 1844 gimple *last = NULL, *stmt;
e4a707c4 1845
2e8ab70c
RB
1846 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1847 {
1848 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1849 if (is_pattern_stmt_p (stmt_vinfo))
1850 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1851 else
1852 last = get_later_stmt (stmt, last);
1853 }
e4a707c4 1854
2e8ab70c 1855 return last;
e4a707c4
IR
1856}
1857
1ba91a49
AL
1858/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1859 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1860 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1861 containing the remainder.
1862 Return the first stmt in the second group. */
1863
1864static gimple *
1865vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1866{
1867 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
2c53b149 1868 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1ba91a49 1869 gcc_assert (group1_size > 0);
2c53b149 1870 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1871 gcc_assert (group2_size > 0);
2c53b149 1872 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49
AL
1873
1874 gimple *stmt = first_stmt;
1875 for (unsigned i = group1_size; i > 1; i--)
1876 {
2c53b149
RB
1877 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1878 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1879 }
1880 /* STMT is now the last element of the first group. */
2c53b149
RB
1881 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1882 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1ba91a49 1883
2c53b149
RB
1884 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1885 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1ba91a49 1886 {
2c53b149
RB
1887 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1888 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1889 }
1890
2c53b149 1891 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1892 plus skipping over the first vector. */
2c53b149
RB
1893 DR_GROUP_GAP (vinfo_for_stmt (group2))
1894 = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1895
2c53b149
RB
1896 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1897 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1898
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1901 group1_size, group2_size);
1902
1903 return group2;
1904}
1905
4b6068ea
RS
1906/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1907 statements and a vector of NUNITS elements. */
1908
1909static poly_uint64
1910calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1911{
1912 return exact_div (common_multiple (nunits, group_size), group_size);
1913}
1914
0d0293ac 1915/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1916 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1917 Return FALSE if it's impossible to SLP any stmt in the loop. */
1918
1919static bool
310213d4 1920vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1921 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1922{
1923 slp_instance new_instance;
d092494c 1924 slp_tree node;
2c53b149 1925 unsigned int group_size;
b5aeb3bb 1926 tree vectype, scalar_type = NULL_TREE;
355fe088 1927 gimple *next;
1ba91a49 1928 unsigned int i;
9771b263 1929 vec<slp_tree> loads;
b5aeb3bb 1930 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1931 vec<gimple *> scalar_stmts;
b5aeb3bb 1932
2c53b149 1933 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
b5aeb3bb 1934 {
2c53b149
RB
1935 scalar_type = TREE_TYPE (DR_REF (dr));
1936 vectype = get_vectype_for_scalar_type (scalar_type);
1937 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1938 }
1939 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1940 {
1941 gcc_assert (is_a <loop_vec_info> (vinfo));
1942 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1943 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1944 }
1945 else
1946 {
310213d4 1947 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1948 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1949 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1950 }
b8698a0f 1951
ebfd146a
IR
1952 if (!vectype)
1953 {
73fbfcad 1954 if (dump_enabled_p ())
ebfd146a 1955 {
78c60e3d
SS
1956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1957 "Build SLP failed: unsupported data-type ");
1958 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1959 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1960 }
b5aeb3bb 1961
ebfd146a
IR
1962 return false;
1963 }
4b6068ea 1964 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1965
0d0293ac 1966 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1967 scalar_stmts.create (group_size);
ebfd146a 1968 next = stmt;
2c53b149 1969 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a 1970 {
b5aeb3bb
IR
1971 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1972 while (next)
1973 {
f7e531cf
IR
1974 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1975 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1976 scalar_stmts.safe_push (
1977 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1978 else
9771b263 1979 scalar_stmts.safe_push (next);
2c53b149
RB
1980 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1981 }
1982 }
1983 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1984 {
1985 /* Collect the reduction stmts and store them in
1986 SLP_TREE_SCALAR_STMTS. */
1987 while (next)
1988 {
1989 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1990 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1991 scalar_stmts.safe_push (
1992 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1993 else
1994 scalar_stmts.safe_push (next);
1995 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1996 }
14a61437
RB
1997 /* Mark the first element of the reduction chain as reduction to properly
1998 transform the node. In the reduction analysis phase only the last
1999 element of the chain is marked as reduction. */
2c53b149 2000 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
2001 }
2002 else
2003 {
2004 /* Collect reduction statements. */
310213d4 2005 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
2006 for (i = 0; reductions.iterate (i, &next); i++)
2007 scalar_stmts.safe_push (next);
ebfd146a
IR
2008 }
2009
9771b263 2010 loads.create (group_size);
ebfd146a
IR
2011
2012 /* Build the tree for the SLP instance. */
89d390e5
RB
2013 bool *matches = XALLOCAVEC (bool, group_size);
2014 unsigned npermutes = 0;
f7300fff 2015 bst_fail = new scalar_stmts_set_t ();
4b6068ea 2016 poly_uint64 max_nunits = nunits;
e569db5f 2017 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 2018 &max_nunits, &loads, matches, &npermutes,
e569db5f 2019 NULL, max_tree_size);
26d66f28 2020 delete bst_fail;
e569db5f 2021 if (node != NULL)
ebfd146a 2022 {
4ef69dfc 2023 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2024 poly_uint64 unrolling_factor
4b6068ea 2025 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2026
d9f21f6a 2027 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2028 && is_a <bb_vec_info> (vinfo))
2029 {
4b6068ea
RS
2030 unsigned HOST_WIDE_INT const_max_nunits;
2031 if (!max_nunits.is_constant (&const_max_nunits)
2032 || const_max_nunits > group_size)
2033 {
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2036 "Build SLP failed: store group "
2037 "size not a multiple of the vector size "
2038 "in basic block SLP\n");
2039 vect_free_slp_tree (node);
2040 loads.release ();
2041 return false;
2042 }
e569db5f 2043 /* Fatal mismatch. */
4b6068ea 2044 matches[group_size / const_max_nunits * const_max_nunits] = false;
e569db5f
VK
2045 vect_free_slp_tree (node);
2046 loads.release ();
2047 }
2048 else
2049 {
4ef69dfc
IR
2050 /* Create a new SLP instance. */
2051 new_instance = XNEW (struct _slp_instance);
2052 SLP_INSTANCE_TREE (new_instance) = node;
2053 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2054 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2055 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2056
2057 /* Compute the load permutation. */
2058 slp_tree load_node;
2059 bool loads_permuted = false;
abf9bfbc
RB
2060 FOR_EACH_VEC_ELT (loads, i, load_node)
2061 {
01d8bf07 2062 vec<unsigned> load_permutation;
abf9bfbc 2063 int j;
355fe088 2064 gimple *load, *first_stmt;
01d8bf07
RB
2065 bool this_load_permuted = false;
2066 load_permutation.create (group_size);
2c53b149 2067 first_stmt = DR_GROUP_FIRST_ELEMENT
6983e6b5 2068 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
2069 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2070 {
e569db5f
VK
2071 int load_place = vect_get_place_in_interleaving_chain
2072 (load, first_stmt);
6983e6b5
RB
2073 gcc_assert (load_place != -1);
2074 if (load_place != j)
01d8bf07 2075 this_load_permuted = true;
abf9bfbc
RB
2076 load_permutation.safe_push (load_place);
2077 }
fe2bef71
RB
2078 if (!this_load_permuted
2079 /* The load requires permutation when unrolling exposes
2080 a gap either because the group is larger than the SLP
2081 group-size or because there is a gap between the groups. */
d9f21f6a 2082 && (known_eq (unrolling_factor, 1U)
2c53b149
RB
2083 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2084 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2085 {
2086 load_permutation.release ();
2087 continue;
2088 }
2089 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2090 loads_permuted = true;
abf9bfbc 2091 }
6aa904c4
IR
2092
2093 if (loads_permuted)
ebfd146a 2094 {
01d8bf07 2095 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2096 {
73fbfcad 2097 if (dump_enabled_p ())
ebfd146a 2098 {
e645e942 2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2100 "Build SLP failed: unsupported load "
2101 "permutation ");
e569db5f
VK
2102 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2103 TDF_SLIM, stmt, 0);
ebfd146a 2104 }
ebfd146a
IR
2105 vect_free_slp_instance (new_instance);
2106 return false;
2107 }
ebfd146a 2108 }
ebfd146a 2109
e569db5f 2110 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2111 instructions do not generate this SLP instance. */
2112 if (is_a <loop_vec_info> (vinfo)
2113 && loads_permuted
7e11fc7f 2114 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
2115 {
2116 slp_tree load_node;
2117 FOR_EACH_VEC_ELT (loads, i, load_node)
2118 {
2c53b149 2119 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
bb0f5ca7
AL
2120 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2122 /* Use SLP for strided accesses (or if we
2123 can't load-lanes). */
bb0f5ca7
AL
2124 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2125 || ! vect_load_lanes_supported
2126 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 2127 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
2128 break;
2129 }
2130 if (i == loads.length ())
2131 {
2132 if (dump_enabled_p ())
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2134 "Built SLP cancelled: can use "
2135 "load/store-lanes\n");
2136 vect_free_slp_instance (new_instance);
2137 return false;
2138 }
2139 }
2140
310213d4 2141 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2142
73fbfcad 2143 if (dump_enabled_p ())
c2a12ca0
RB
2144 {
2145 dump_printf_loc (MSG_NOTE, vect_location,
2146 "Final SLP tree for instance:\n");
2147 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2148 }
ebfd146a
IR
2149
2150 return true;
2151 }
e569db5f
VK
2152 }
2153 else
2154 {
ebfd146a
IR
2155 /* Failed to SLP. */
2156 /* Free the allocated memory. */
e403d17e 2157 scalar_stmts.release ();
9771b263 2158 loads.release ();
e569db5f 2159 }
b8698a0f 2160
1ba91a49 2161 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2162 vector size. */
4b6068ea 2163 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2164 if (is_a <bb_vec_info> (vinfo)
4b6068ea 2165 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2c53b149 2166 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4b6068ea 2167 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2168 {
2169 /* We consider breaking the group only on VF boundaries from the existing
2170 start. */
2171 for (i = 0; i < group_size; i++)
2172 if (!matches[i]) break;
2173
4b6068ea 2174 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2175 {
2176 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2177 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2178 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49
AL
2179
2180 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2181 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2182 /* If the first non-match was in the middle of a vector,
2183 skip the rest of that vector. */
2184 if (group1_size < i)
2185 {
4b6068ea 2186 i = group1_size + const_nunits;
1ba91a49 2187 if (i < group_size)
4b6068ea 2188 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2189 }
2190 if (i < group_size)
2191 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2192 return res;
2193 }
2194 /* Even though the first vector did not all match, we might be able to SLP
2195 (some) of the remainder. FORNOW ignore this possibility. */
2196 }
2197
a70d6342 2198 return false;
ebfd146a
IR
2199}
2200
2201
ff802fa1 2202/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2203 trees of packed scalar stmts if SLP is possible. */
2204
2205bool
310213d4 2206vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2207{
2208 unsigned int i;
355fe088 2209 gimple *first_element;
ebfd146a 2210
adac3a68 2211 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2212
0d0293ac 2213 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2214 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2215 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2216
310213d4 2217 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2218 {
310213d4
RB
2219 if (loop_vinfo->reduction_chains.length () > 0)
2220 {
2221 /* Find SLP sequences starting from reduction chains. */
2222 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2223 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2224 max_tree_size))
6b5e165b
RB
2225 {
2226 /* Dissolve reduction chain group. */
2227 gimple *next, *stmt = first_element;
2228 while (stmt)
2229 {
2230 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2c53b149
RB
2231 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2232 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2233 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
6b5e165b
RB
2234 stmt = next;
2235 }
2236 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2237 = vect_internal_def;
2238 }
310213d4 2239 }
b010117a 2240
310213d4 2241 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2242 if (loop_vinfo->reductions.length () > 1)
2243 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2244 max_tree_size);
310213d4 2245 }
b5aeb3bb 2246
ebfd146a
IR
2247 return true;
2248}
2249
2250
2251/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2252 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2253 least one instance. */
ebfd146a 2254
437f4a00 2255bool
ebfd146a
IR
2256vect_make_slp_decision (loop_vec_info loop_vinfo)
2257{
d9f21f6a
RS
2258 unsigned int i;
2259 poly_uint64 unrolling_factor = 1;
9771b263 2260 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2261 slp_instance instance;
2262 int decided_to_slp = 0;
2263
adac3a68 2264 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2265
9771b263 2266 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2267 {
2268 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2269 /* All unroll factors have the form current_vector_size * X for some
2270 rational X, so they must have a common multiple. */
2271 unrolling_factor
2272 = force_common_multiple (unrolling_factor,
2273 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2274
ff802fa1 2275 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2276 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2277 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2278 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2279 decided_to_slp++;
2280 }
2281
2282 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2283
73fbfcad 2284 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2285 {
2286 dump_printf_loc (MSG_NOTE, vect_location,
2287 "Decided to SLP %d instances. Unrolling factor ",
2288 decided_to_slp);
2289 dump_dec (MSG_NOTE, unrolling_factor);
2290 dump_printf (MSG_NOTE, "\n");
2291 }
437f4a00
IR
2292
2293 return (decided_to_slp > 0);
ebfd146a
IR
2294}
2295
2296
2297/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2298 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2299
2300static void
642fce57 2301vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2302{
355fe088 2303 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2304 imm_use_iterator imm_iter;
355fe088 2305 gimple *use_stmt;
642fce57 2306 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2307 slp_tree child;
f2c74cc4 2308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2310 int j;
2311
2312 /* Propagate hybrid down the SLP tree. */
2313 if (stype == hybrid)
2314 ;
2315 else if (HYBRID_SLP_STMT (stmt_vinfo))
2316 stype = hybrid;
2317 else
2318 {
2319 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2320 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2321 /* If we get a pattern stmt here we have to use the LHS of the
2322 original stmt for immediate uses. */
2323 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2324 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2325 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2326 tree def;
2327 if (gimple_code (stmt) == GIMPLE_PHI)
2328 def = gimple_phi_result (stmt);
2329 else
2330 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2331 if (def)
2332 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870
RB
2333 {
2334 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2335 continue;
2336 use_vinfo = vinfo_for_stmt (use_stmt);
2337 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2338 && STMT_VINFO_RELATED_STMT (use_vinfo))
2339 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2340 if (!STMT_SLP_TYPE (use_vinfo)
2341 && (STMT_VINFO_RELEVANT (use_vinfo)
2342 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2343 && !(gimple_code (use_stmt) == GIMPLE_PHI
2344 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2345 {
2346 if (dump_enabled_p ())
2347 {
2348 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2349 "def in non-SLP stmt: ");
2350 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2351 }
2352 stype = hybrid;
2353 }
29764870 2354 }
642fce57 2355 }
ebfd146a 2356
502f0263
RB
2357 if (stype == hybrid
2358 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2359 {
2360 if (dump_enabled_p ())
2361 {
2362 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2363 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2364 }
2365 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2366 }
ebfd146a 2367
642fce57 2368 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2369 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2370 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2371}
f2c74cc4 2372
642fce57 2373/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2374
642fce57
RB
2375static tree
2376vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2377{
2378 walk_stmt_info *wi = (walk_stmt_info *)data;
2379 struct loop *loopp = (struct loop *)wi->info;
2380
2381 if (wi->is_lhs)
2382 return NULL_TREE;
2383
2384 if (TREE_CODE (*tp) == SSA_NAME
2385 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2386 {
355fe088 2387 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2388 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2389 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2390 {
2391 if (dump_enabled_p ())
2392 {
2393 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2394 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2395 }
2396 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2397 }
642fce57
RB
2398 }
2399
2400 return NULL_TREE;
ebfd146a
IR
2401}
2402
642fce57
RB
2403static tree
2404vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2405 walk_stmt_info *)
2406{
9186a9d3 2407 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
642fce57
RB
2408 /* If the stmt is in a SLP instance then this isn't a reason
2409 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2410 if (! STMT_SLP_TYPE (use_vinfo)
2411 && (STMT_VINFO_RELEVANT (use_vinfo)
2412 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2413 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2414 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2415 ;
2416 else
642fce57
RB
2417 *handled = true;
2418 return NULL_TREE;
2419}
ebfd146a
IR
2420
2421/* Find stmts that must be both vectorized and SLPed. */
2422
2423void
2424vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2425{
2426 unsigned int i;
9771b263 2427 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2428 slp_instance instance;
2429
adac3a68 2430 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2431
642fce57
RB
2432 /* First walk all pattern stmt in the loop and mark defs of uses as
2433 hybrid because immediate uses in them are not recorded. */
2434 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2435 {
2436 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2437 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2438 gsi_next (&gsi))
2439 {
355fe088 2440 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2442 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2443 {
2444 walk_stmt_info wi;
2445 memset (&wi, 0, sizeof (wi));
2446 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2447 gimple_stmt_iterator gsi2
2448 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2449 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2450 vect_detect_hybrid_slp_1, &wi);
2451 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2452 vect_detect_hybrid_slp_2,
2453 vect_detect_hybrid_slp_1, &wi);
2454 }
2455 }
2456 }
2457
2458 /* Then walk the SLP instance trees marking stmts with uses in
2459 non-SLP stmts as hybrid, also propagating hybrid down the
2460 SLP tree, collecting the above info on-the-fly. */
9771b263 2461 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2462 {
2463 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2464 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2465 i, pure_slp);
2466 }
ebfd146a
IR
2467}
2468
a70d6342 2469
2c515559
RS
2470/* Initialize a bb_vec_info struct for the statements between
2471 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2472
2473_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2474 gimple_stmt_iterator region_end_in,
2475 vec_info_shared *shared)
2476 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2477 bb (gsi_bb (region_begin_in)),
2478 region_begin (region_begin_in),
2479 region_end (region_end_in)
a70d6342 2480{
a70d6342
IR
2481 gimple_stmt_iterator gsi;
2482
61d371eb
RB
2483 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2484 gsi_next (&gsi))
a70d6342 2485 {
355fe088 2486 gimple *stmt = gsi_stmt (gsi);
a70d6342 2487 gimple_set_uid (stmt, 0);
2c515559 2488 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
a70d6342
IR
2489 }
2490
2c515559 2491 bb->aux = this;
a70d6342
IR
2492}
2493
2494
2495/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2496 stmts in the basic block. */
2497
2c515559 2498_bb_vec_info::~_bb_vec_info ()
a70d6342 2499{
2c515559
RS
2500 for (gimple_stmt_iterator si = region_begin;
2501 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2502 {
355fe088 2503 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2504 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2505
2506 if (stmt_info)
2507 /* Free stmt_vec_info. */
2508 free_stmt_vec_info (stmt);
61d371eb
RB
2509
2510 /* Reset region marker. */
2511 gimple_set_uid (stmt, -1);
a70d6342
IR
2512 }
2513
2c515559 2514 bb->aux = NULL;
a70d6342
IR
2515}
2516
15944069
RS
2517/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2518 given then that child nodes have already been processed, and that
2519 their def types currently match their SLP node's def type. */
a70d6342
IR
2520
2521static bool
15944069
RS
2522vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2523 slp_instance node_instance,
2524 stmt_vector_for_cost *cost_vec)
a70d6342 2525{
15944069 2526 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2527 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2528 gcc_assert (stmt_info);
2529 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2530
2531 /* For BB vectorization vector types are assigned here.
2532 Memory accesses already got their vector type assigned
2533 in vect_analyze_data_refs. */
2534 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2535 if (bb_vinfo
2536 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2537 {
1f3cb663
RS
2538 tree vectype, nunits_vectype;
2539 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2540 &nunits_vectype))
2541 /* We checked this when building the node. */
2542 gcc_unreachable ();
2543 if (vectype == boolean_type_node)
bd2f172f 2544 {
1f3cb663
RS
2545 vectype = vect_get_mask_type_for_stmt (stmt_info);
2546 if (!vectype)
2547 /* vect_get_mask_type_for_stmt has already explained the
2548 failure. */
2549 return false;
bd2f172f
RB
2550 }
2551
2552 gimple *sstmt;
15944069 2553 unsigned int i;
bd2f172f
RB
2554 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2555 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2556 }
2557
8b7e9dba
RS
2558 /* Calculate the number of vector statements to be created for the
2559 scalar stmts in this node. For SLP reductions it is equal to the
2560 number of vector statements in the children (which has already been
2561 calculated by the recursive call). Otherwise it is the number of
2c53b149 2562 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2563 VF divided by the number of elements in a vector. */
2c53b149
RB
2564 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2565 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2566 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2567 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2568 else
2569 {
d9f21f6a 2570 poly_uint64 vf;
8b7e9dba
RS
2571 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2572 vf = loop_vinfo->vectorization_factor;
2573 else
2574 vf = 1;
2575 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2576 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2577 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2578 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2579 }
2580
15944069
RS
2581 bool dummy;
2582 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2583}
2584
2585/* Analyze statements contained in SLP tree NODE after recursively analyzing
2586 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2587
2588 Return true if the operations are supported. */
2589
2590static bool
2591vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2592 slp_instance node_instance,
2593 scalar_stmts_to_slp_tree_map_t *visited,
2594 scalar_stmts_to_slp_tree_map_t *lvisited,
2595 stmt_vector_for_cost *cost_vec)
2596{
2597 int i, j;
2598 slp_tree child;
2599
2600 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2601 return true;
2602
2603 /* If we already analyzed the exact same set of scalar stmts we're done.
2604 We share the generated vector stmts for those. */
2605 slp_tree *leader;
2606 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2607 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2608 {
2609 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2610 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2611 return true;
2612 }
2613
2614 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2615 doesn't result in any issue since we throw away the lvisited set
2616 when we fail. */
2617 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2618
2619 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2620 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2621 visited, lvisited, cost_vec))
2622 return false;
2623
bd2f172f
RB
2624 /* Push SLP node def-type to stmt operands. */
2625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2626 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2627 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2628 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2629 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2630 cost_vec);
bd2f172f
RB
2631 /* Restore def-types. */
2632 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2633 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2634 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2635 = vect_internal_def;
2636 if (! res)
2637 return false;
2638
2639 return true;
a70d6342
IR
2640}
2641
2642
8b7e9dba 2643/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2644 operations are supported. */
2645
a12e42fc 2646bool
8b7e9dba 2647vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2648{
a70d6342
IR
2649 slp_instance instance;
2650 int i;
2651
adac3a68 2652 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2653
68435eb2
RB
2654 scalar_stmts_to_slp_tree_map_t *visited
2655 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2656 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2657 {
68435eb2
RB
2658 scalar_stmts_to_slp_tree_map_t lvisited;
2659 stmt_vector_for_cost cost_vec;
2660 cost_vec.create (2);
8b7e9dba
RS
2661 if (!vect_slp_analyze_node_operations (vinfo,
2662 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2663 instance, visited, &lvisited,
2664 &cost_vec))
a70d6342 2665 {
a12e42fc
RB
2666 dump_printf_loc (MSG_NOTE, vect_location,
2667 "removing SLP instance operations starting from: ");
2668 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2669 SLP_TREE_SCALAR_STMTS
2670 (SLP_INSTANCE_TREE (instance))[0], 0);
2671 vect_free_slp_instance (instance);
8b7e9dba 2672 vinfo->slp_instances.ordered_remove (i);
68435eb2 2673 cost_vec.release ();
a70d6342
IR
2674 }
2675 else
68435eb2
RB
2676 {
2677 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2678 x != lvisited.end(); ++x)
2679 visited->put ((*x).first.copy (), (*x).second);
2680 i++;
78604de0 2681
68435eb2
RB
2682 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2683 cost_vec.release ();
2684 }
2685 }
78604de0
RB
2686 delete visited;
2687
8b7e9dba 2688 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2689}
2690
6eddf228
RB
2691
2692/* Compute the scalar cost of the SLP node NODE and its children
2693 and return it. Do not account defs that are marked in LIFE and
2694 update LIFE according to uses of NODE. */
2695
a296d6d3 2696static void
292cba13 2697vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2698 slp_tree node, vec<bool, va_heap> *life,
2699 stmt_vector_for_cost *cost_vec)
6eddf228 2700{
6eddf228 2701 unsigned i;
355fe088 2702 gimple *stmt;
6eddf228
RB
2703 slp_tree child;
2704
2705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2706 {
6eddf228
RB
2707 ssa_op_iter op_iter;
2708 def_operand_p def_p;
2709 stmt_vec_info stmt_info;
2710
ff4c81cc 2711 if ((*life)[i])
6eddf228
RB
2712 continue;
2713
2714 /* If there is a non-vectorized use of the defs then the scalar
2715 stmt is kept live in which case we do not account it or any
2716 required defs in the SLP children in the scalar cost. This
2717 way we make the vectorization more costly when compared to
2718 the scalar cost. */
2719 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2720 {
2721 imm_use_iterator use_iter;
355fe088 2722 gimple *use_stmt;
6eddf228 2723 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2724 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2725 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2726 use_stmt)
603cca93 2727 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2728 {
ff4c81cc 2729 (*life)[i] = true;
6eddf228
RB
2730 BREAK_FROM_IMM_USE_STMT (use_iter);
2731 }
2732 }
ff4c81cc 2733 if ((*life)[i])
6eddf228
RB
2734 continue;
2735
b555a2e4
RB
2736 /* Count scalar stmts only once. */
2737 if (gimple_visited_p (stmt))
2738 continue;
2739 gimple_set_visited (stmt, true);
2740
6eddf228 2741 stmt_info = vinfo_for_stmt (stmt);
a296d6d3 2742 vect_cost_for_stmt kind;
6eddf228
RB
2743 if (STMT_VINFO_DATA_REF (stmt_info))
2744 {
2745 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2746 kind = scalar_load;
6eddf228 2747 else
a296d6d3 2748 kind = scalar_store;
6eddf228
RB
2749 }
2750 else
a296d6d3
RB
2751 kind = scalar_stmt;
2752 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2753 }
2754
faa5399b 2755 auto_vec<bool, 20> subtree_life;
6eddf228 2756 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2757 {
2758 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2759 {
2760 /* Do not directly pass LIFE to the recursive call, copy it to
2761 confine changes in the callee to the current child/subtree. */
2762 subtree_life.safe_splice (*life);
a296d6d3 2763 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2764 subtree_life.truncate (0);
2765 }
2766 }
6eddf228
RB
2767}
2768
69f11a13
IR
2769/* Check if vectorization of the basic block is profitable. */
2770
2771static bool
2772vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2773{
9771b263 2774 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2775 slp_instance instance;
1a4b99c1 2776 int i;
c3e7ee41 2777 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2778 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2779
2780 /* Calculate scalar cost. */
a296d6d3
RB
2781 stmt_vector_for_cost scalar_costs;
2782 scalar_costs.create (0);
6eddf228 2783 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2784 {
00f96dc9 2785 auto_vec<bool, 20> life;
ff4c81cc 2786 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2787 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2788 SLP_INSTANCE_TREE (instance),
2789 &life, &scalar_costs);
2790 }
2791 void *target_cost_data = init_cost (NULL);
68435eb2 2792 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2793 scalar_costs.release ();
2794 unsigned dummy;
2795 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2796 destroy_cost_data (target_cost_data);
69f11a13 2797
b555a2e4
RB
2798 /* Unset visited flag. */
2799 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2800 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2801 gimple_set_visited (gsi_stmt (gsi), false);
2802
c3e7ee41 2803 /* Complete the target-specific cost calculation. */
92345349
BS
2804 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2805 &vec_inside_cost, &vec_epilogue_cost);
2806
2807 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2808
73fbfcad 2809 if (dump_enabled_p ())
69f11a13 2810 {
78c60e3d
SS
2811 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2812 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2813 vec_inside_cost);
2814 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2815 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2816 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2817 }
2818
a6524bba
RB
2819 /* Vectorization is profitable if its cost is more than the cost of scalar
2820 version. Note that we err on the vector side for equal cost because
2821 the cost estimate is otherwise quite pessimistic (constant uses are
2822 free on the scalar side but cost a load on the vector side for
2823 example). */
2824 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2825 return false;
2826
2827 return true;
2828}
2829
a5b50aa1
RB
2830/* Check if the basic block can be vectorized. Returns a bb_vec_info
2831 if so and sets fatal to true if failure is independent of
2832 current_vector_size. */
a70d6342 2833
8e19f5a1 2834static bb_vec_info
61d371eb
RB
2835vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2836 gimple_stmt_iterator region_end,
a5b50aa1 2837 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2838 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2839{
2840 bb_vec_info bb_vinfo;
a70d6342 2841 slp_instance instance;
8e19f5a1 2842 int i;
d9f21f6a 2843 poly_uint64 min_vf = 2;
e4a707c4 2844
a5b50aa1
RB
2845 /* The first group of checks is independent of the vector size. */
2846 fatal = true;
2847
61d371eb
RB
2848 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2849 {
2850 if (dump_enabled_p ())
2851 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2852 "not vectorized: too many instructions in "
2853 "basic block.\n");
2854 free_data_refs (datarefs);
2855 return NULL;
2856 }
2857
ca823c85 2858 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2859 if (!bb_vinfo)
2860 return NULL;
2861
61d371eb 2862 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2863 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2864
2865 /* Analyze the data references. */
2866
2867 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2868 {
73fbfcad 2869 if (dump_enabled_p ())
78c60e3d
SS
2870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2871 "not vectorized: unhandled data-ref in basic "
2872 "block.\n");
b8698a0f 2873
2c515559 2874 delete bb_vinfo;
a70d6342
IR
2875 return NULL;
2876 }
2877
fcac74a1 2878 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2879 {
73fbfcad 2880 if (dump_enabled_p ())
78c60e3d
SS
2881 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2882 "not vectorized: not enough data-refs in "
2883 "basic block.\n");
a70d6342 2884
2c515559 2885 delete bb_vinfo;
a70d6342
IR
2886 return NULL;
2887 }
2888
310213d4 2889 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2890 {
2891 if (dump_enabled_p ())
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "not vectorized: unhandled data access in "
2894 "basic block.\n");
2895
2c515559 2896 delete bb_vinfo;
5abe1e05
RB
2897 return NULL;
2898 }
2899
a5b50aa1
RB
2900 /* If there are no grouped stores in the region there is no need
2901 to continue with pattern recog as vect_analyze_slp will fail
2902 anyway. */
2903 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2904 {
73fbfcad 2905 if (dump_enabled_p ())
a5b50aa1
RB
2906 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2907 "not vectorized: no grouped stores in "
2908 "basic block.\n");
b8698a0f 2909
2c515559 2910 delete bb_vinfo;
a70d6342
IR
2911 return NULL;
2912 }
b8698a0f 2913
a5b50aa1
RB
2914 /* While the rest of the analysis below depends on it in some way. */
2915 fatal = false;
2916
2917 vect_pattern_recog (bb_vinfo);
2918
a70d6342
IR
2919 /* Check the SLP opportunities in the basic block, analyze and build SLP
2920 trees. */
310213d4 2921 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2922 {
73fbfcad 2923 if (dump_enabled_p ())
effb52da
RB
2924 {
2925 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2926 "Failed to SLP the basic block.\n");
2927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2928 "not vectorized: failed to find SLP opportunities "
2929 "in basic block.\n");
2930 }
a70d6342 2931
2c515559 2932 delete bb_vinfo;
a70d6342
IR
2933 return NULL;
2934 }
b8698a0f 2935
62c8a2cf
RS
2936 vect_record_base_alignments (bb_vinfo);
2937
c2a12ca0
RB
2938 /* Analyze and verify the alignment of data references and the
2939 dependence in the SLP instances. */
a5b50aa1
RB
2940 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2941 {
c2a12ca0
RB
2942 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2943 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2944 {
2945 dump_printf_loc (MSG_NOTE, vect_location,
2946 "removing SLP instance operations starting from: ");
2947 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2948 SLP_TREE_SCALAR_STMTS
2949 (SLP_INSTANCE_TREE (instance))[0], 0);
2950 vect_free_slp_instance (instance);
2951 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2952 continue;
2953 }
c2a12ca0
RB
2954
2955 /* Mark all the statements that we want to vectorize as pure SLP and
2956 relevant. */
2957 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2958 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2959
a5b50aa1
RB
2960 i++;
2961 }
a5b50aa1
RB
2962 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2963 {
2c515559 2964 delete bb_vinfo;
a5b50aa1
RB
2965 return NULL;
2966 }
2967
8b7e9dba 2968 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2969 {
73fbfcad 2970 if (dump_enabled_p ())
e645e942 2971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2972 "not vectorized: bad operation in basic block.\n");
a70d6342 2973
2c515559 2974 delete bb_vinfo;
a70d6342
IR
2975 return NULL;
2976 }
2977
69f11a13 2978 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2979 if (!unlimited_cost_model (NULL)
69f11a13
IR
2980 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2981 {
73fbfcad 2982 if (dump_enabled_p ())
78c60e3d
SS
2983 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2984 "not vectorized: vectorization is not "
2985 "profitable.\n");
69f11a13 2986
2c515559 2987 delete bb_vinfo;
69f11a13
IR
2988 return NULL;
2989 }
2990
73fbfcad 2991 if (dump_enabled_p ())
78c60e3d
SS
2992 dump_printf_loc (MSG_NOTE, vect_location,
2993 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2994
2995 return bb_vinfo;
2996}
2997
2998
428db0ba
RB
2999/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3000 true if anything in the basic-block was vectorized. */
3001
3002bool
3003vect_slp_bb (basic_block bb)
8e19f5a1
IR
3004{
3005 bb_vec_info bb_vinfo;
8e19f5a1 3006 gimple_stmt_iterator gsi;
61d371eb 3007 bool any_vectorized = false;
86e36728 3008 auto_vector_sizes vector_sizes;
8e19f5a1 3009
adac3a68 3010 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 3011
8e19f5a1
IR
3012 /* Autodetect first vector size we try. */
3013 current_vector_size = 0;
86e36728
RS
3014 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3015 unsigned int next_size = 0;
8e19f5a1 3016
61d371eb
RB
3017 gsi = gsi_start_bb (bb);
3018
86e36728 3019 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3020 while (1)
3021 {
61d371eb
RB
3022 if (gsi_end_p (gsi))
3023 break;
3024
3025 gimple_stmt_iterator region_begin = gsi;
3026 vec<data_reference_p> datarefs = vNULL;
3027 int insns = 0;
3028
3029 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3030 {
61d371eb
RB
3031 gimple *stmt = gsi_stmt (gsi);
3032 if (is_gimple_debug (stmt))
3033 continue;
3034 insns++;
3035
3036 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 3037 vect_location = stmt;
61d371eb 3038
8e846c66 3039 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
3040 break;
3041 }
3042
3043 /* Skip leading unhandled stmts. */
3044 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3045 {
3046 gsi_next (&gsi);
3047 continue;
3048 }
428db0ba 3049
61d371eb
RB
3050 gimple_stmt_iterator region_end = gsi;
3051
3052 bool vectorized = false;
a5b50aa1 3053 bool fatal = false;
ca823c85 3054 vec_info_shared shared;
61d371eb 3055 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 3056 datarefs, insns, fatal, &shared);
61d371eb
RB
3057 if (bb_vinfo
3058 && dbg_cnt (vect_slp))
3059 {
428db0ba 3060 if (dump_enabled_p ())
61d371eb 3061 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 3062
ca823c85 3063 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3064 vect_schedule_slp (bb_vinfo);
3065
d1ac60d5
RB
3066 unsigned HOST_WIDE_INT bytes;
3067 if (current_vector_size.is_constant (&bytes))
3068 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3069 "basic block part vectorized using "
3070 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3071 "vectors\n", bytes);
3072 else
3073 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3074 "basic block part vectorized using variable "
3075 "length vectors\n");
428db0ba 3076
61d371eb 3077 vectorized = true;
428db0ba 3078 }
2c515559 3079 delete bb_vinfo;
8e19f5a1 3080
61d371eb 3081 any_vectorized |= vectorized;
8e19f5a1 3082
86e36728
RS
3083 if (next_size == 0)
3084 autodetected_vector_size = current_vector_size;
3085
3086 if (next_size < vector_sizes.length ()
3087 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3088 next_size += 1;
3089
61d371eb 3090 if (vectorized
86e36728
RS
3091 || next_size == vector_sizes.length ()
3092 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3093 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3094 vector sizes will fail do not bother iterating. */
3095 || fatal)
61d371eb
RB
3096 {
3097 if (gsi_end_p (region_end))
3098 break;
8e19f5a1 3099
61d371eb
RB
3100 /* Skip the unhandled stmt. */
3101 gsi_next (&gsi);
3102
3103 /* And reset vector sizes. */
3104 current_vector_size = 0;
86e36728 3105 next_size = 0;
61d371eb
RB
3106 }
3107 else
3108 {
3109 /* Try the next biggest vector size. */
86e36728 3110 current_vector_size = vector_sizes[next_size++];
61d371eb 3111 if (dump_enabled_p ())
86e36728
RS
3112 {
3113 dump_printf_loc (MSG_NOTE, vect_location,
3114 "***** Re-trying analysis with "
3115 "vector size ");
3116 dump_dec (MSG_NOTE, current_vector_size);
3117 dump_printf (MSG_NOTE, "\n");
3118 }
61d371eb
RB
3119
3120 /* Start over. */
3121 gsi = region_begin;
3122 }
8e19f5a1 3123 }
61d371eb
RB
3124
3125 return any_vectorized;
8e19f5a1
IR
3126}
3127
3128
e4af0bc4
IE
3129/* Return 1 if vector type of boolean constant which is OPNUM
3130 operand in statement STMT is a boolean vector. */
3131
3132static bool
3133vect_mask_constant_operand_p (gimple *stmt, int opnum)
3134{
3135 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3136 enum tree_code code = gimple_expr_code (stmt);
3137 tree op, vectype;
e4af0bc4
IE
3138 enum vect_def_type dt;
3139
3140 /* For comparison and COND_EXPR type is chosen depending
3141 on the other comparison operand. */
3142 if (TREE_CODE_CLASS (code) == tcc_comparison)
3143 {
3144 if (opnum)
3145 op = gimple_assign_rhs1 (stmt);
3146 else
3147 op = gimple_assign_rhs2 (stmt);
3148
894dd753 3149 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3150 gcc_unreachable ();
3151
3152 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3153 }
3154
3155 if (code == COND_EXPR)
3156 {
3157 tree cond = gimple_assign_rhs1 (stmt);
3158
3159 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3160 op = cond;
3161 else if (opnum)
e4af0bc4
IE
3162 op = TREE_OPERAND (cond, 1);
3163 else
3164 op = TREE_OPERAND (cond, 0);
3165
894dd753 3166 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3167 gcc_unreachable ();
3168
3169 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3170 }
3171
3172 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3173}
3174
018b2744
RS
3175/* Build a variable-length vector in which the elements in ELTS are repeated
3176 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3177 RESULTS and add any new instructions to SEQ.
3178
3179 The approach we use is:
3180
3181 (1) Find a vector mode VM with integer elements of mode IM.
3182
3183 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3184 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3185 from small vectors to IM.
3186
3187 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3188
3189 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3190 correct byte contents.
3191
3192 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3193
3194 We try to find the largest IM for which this sequence works, in order
3195 to cut down on the number of interleaves. */
3196
f1739b48 3197void
018b2744
RS
3198duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3199 unsigned int nresults, vec<tree> &results)
3200{
3201 unsigned int nelts = elts.length ();
3202 tree element_type = TREE_TYPE (vector_type);
3203
3204 /* (1) Find a vector mode VM with integer elements of mode IM. */
3205 unsigned int nvectors = 1;
3206 tree new_vector_type;
3207 tree permutes[2];
3208 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3209 &nvectors, &new_vector_type,
3210 permutes))
3211 gcc_unreachable ();
3212
3213 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3214 unsigned int partial_nelts = nelts / nvectors;
3215 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3216
3217 tree_vector_builder partial_elts;
3218 auto_vec<tree, 32> pieces (nvectors * 2);
3219 pieces.quick_grow (nvectors * 2);
3220 for (unsigned int i = 0; i < nvectors; ++i)
3221 {
3222 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3223 ELTS' has mode IM. */
3224 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3225 for (unsigned int j = 0; j < partial_nelts; ++j)
3226 partial_elts.quick_push (elts[i * partial_nelts + j]);
3227 tree t = gimple_build_vector (seq, &partial_elts);
3228 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3229 TREE_TYPE (new_vector_type), t);
3230
3231 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3232 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3233 }
3234
3235 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3236 correct byte contents.
3237
3238 We need to repeat the following operation log2(nvectors) times:
3239
3240 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3241 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3242
3243 However, if each input repeats every N elements and the VF is
3244 a multiple of N * 2, the HI result is the same as the LO. */
3245 unsigned int in_start = 0;
3246 unsigned int out_start = nvectors;
3247 unsigned int hi_start = nvectors / 2;
3248 /* A bound on the number of outputs needed to produce NRESULTS results
3249 in the final iteration. */
3250 unsigned int noutputs_bound = nvectors * nresults;
3251 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3252 {
3253 noutputs_bound /= 2;
3254 unsigned int limit = MIN (noutputs_bound, nvectors);
3255 for (unsigned int i = 0; i < limit; ++i)
3256 {
3257 if ((i & 1) != 0
3258 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3259 2 * in_repeat))
3260 {
3261 pieces[out_start + i] = pieces[out_start + i - 1];
3262 continue;
3263 }
3264
3265 tree output = make_ssa_name (new_vector_type);
3266 tree input1 = pieces[in_start + (i / 2)];
3267 tree input2 = pieces[in_start + (i / 2) + hi_start];
3268 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3269 input1, input2,
3270 permutes[i & 1]);
3271 gimple_seq_add_stmt (seq, stmt);
3272 pieces[out_start + i] = output;
3273 }
3274 std::swap (in_start, out_start);
3275 }
3276
3277 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3278 results.reserve (nresults);
3279 for (unsigned int i = 0; i < nresults; ++i)
3280 if (i < nvectors)
3281 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3282 pieces[in_start + i]));
3283 else
3284 results.quick_push (results[i - nvectors]);
3285}
3286
e4af0bc4 3287
b8698a0f
L
3288/* For constant and loop invariant defs of SLP_NODE this function returns
3289 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3290 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3291 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3292 REDUC_INDEX is the index of the reduction operand in the statements, unless
3293 it is -1. */
ebfd146a
IR
3294
3295static void
9dc3f7de 3296vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3297 vec<tree> *vec_oprnds,
306b0c92 3298 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3299{
355fe088
TS
3300 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3301 gimple *stmt = stmts[0];
ebfd146a 3302 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
018b2744 3303 unsigned HOST_WIDE_INT nunits;
ebfd146a 3304 tree vec_cst;
d2a12ae7 3305 unsigned j, number_of_places_left_in_vector;
ebfd146a 3306 tree vector_type;
9dc3f7de 3307 tree vop;
9771b263 3308 int group_size = stmts.length ();
ebfd146a 3309 unsigned int vec_num, i;
d2a12ae7 3310 unsigned number_of_copies = 1;
9771b263
DN
3311 vec<tree> voprnds;
3312 voprnds.create (number_of_vectors);
ebfd146a 3313 bool constant_p, is_store;
b5aeb3bb 3314 tree neutral_op = NULL;
bac430c9 3315 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3316 gimple_seq ctor_seq = NULL;
018b2744 3317 auto_vec<tree, 16> permute_results;
b5aeb3bb 3318
42fd8198 3319 /* Check if vector type is a boolean vector. */
2568d8a1 3320 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3321 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3322 vector_type
3323 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3324 else
3325 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3326
ebfd146a
IR
3327 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3328 {
3329 is_store = true;
3330 op = gimple_assign_rhs1 (stmt);
3331 }
3332 else
9dc3f7de
IR
3333 is_store = false;
3334
3335 gcc_assert (op);
ebfd146a 3336
ebfd146a 3337 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3338 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3339
3340 For example, we have two scalar operands, s1 and s2 (e.g., group of
3341 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3342 of this type can be packed in a vector). The output vector will contain
3343 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3344 will be 2).
3345
b8698a0f 3346 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3347 containing the operands.
3348
3349 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3350 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3351 {s5, s6, s7, s8}. */
b8698a0f 3352
018b2744
RS
3353 /* When using duplicate_and_interleave, we just need one element for
3354 each scalar statement. */
3355 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3356 nunits = group_size;
3357
14a61437 3358 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3359
3360 number_of_places_left_in_vector = nunits;
62cf7335 3361 constant_p = true;
5ebaa477 3362 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3363 elts.quick_grow (nunits);
90dd6e3d 3364 bool place_after_defs = false;
ebfd146a
IR
3365 for (j = 0; j < number_of_copies; j++)
3366 {
9771b263 3367 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3368 {
3369 if (is_store)
3370 op = gimple_assign_rhs1 (stmt);
bac430c9 3371 else
f7e531cf 3372 {
bac430c9 3373 switch (code)
f7e531cf 3374 {
bac430c9 3375 case COND_EXPR:
a989bcc3
IE
3376 {
3377 tree cond = gimple_assign_rhs1 (stmt);
3378 if (TREE_CODE (cond) == SSA_NAME)
3379 op = gimple_op (stmt, op_num + 1);
3380 else if (op_num == 0 || op_num == 1)
bac430c9 3381 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3382 else
3383 {
3384 if (op_num == 2)
3385 op = gimple_assign_rhs2 (stmt);
3386 else
3387 op = gimple_assign_rhs3 (stmt);
3388 }
3389 }
bac430c9
IR
3390 break;
3391
3392 case CALL_EXPR:
3393 op = gimple_call_arg (stmt, op_num);
3394 break;
3395
b84b294a
JJ
3396 case LSHIFT_EXPR:
3397 case RSHIFT_EXPR:
3398 case LROTATE_EXPR:
3399 case RROTATE_EXPR:
3400 op = gimple_op (stmt, op_num + 1);
3401 /* Unlike the other binary operators, shifts/rotates have
3402 the shift count being int, instead of the same type as
3403 the lhs, so make sure the scalar is the right type if
3404 we are dealing with vectors of
3405 long long/long/short/char. */
793d9a16 3406 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3407 op = fold_convert (TREE_TYPE (vector_type), op);
3408 break;
3409
bac430c9
IR
3410 default:
3411 op = gimple_op (stmt, op_num + 1);
b84b294a 3412 break;
f7e531cf
IR
3413 }
3414 }
b8698a0f 3415
ebfd146a 3416 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3417 number_of_places_left_in_vector--;
90dd6e3d 3418 tree orig_op = op;
13396b6e 3419 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3420 {
793d9a16 3421 if (CONSTANT_CLASS_P (op))
13396b6e 3422 {
42fd8198
IE
3423 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3424 {
3425 /* Can't use VIEW_CONVERT_EXPR for booleans because
3426 of possibly different sizes of scalar value and
3427 vector element. */
3428 if (integer_zerop (op))
3429 op = build_int_cst (TREE_TYPE (vector_type), 0);
3430 else if (integer_onep (op))
158beb4a 3431 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3432 else
3433 gcc_unreachable ();
3434 }
3435 else
3436 op = fold_unary (VIEW_CONVERT_EXPR,
3437 TREE_TYPE (vector_type), op);
13396b6e
JJ
3438 gcc_assert (op && CONSTANT_CLASS_P (op));
3439 }
3440 else
3441 {
b731b390 3442 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3443 gimple *init_stmt;
262a363f
JJ
3444 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3445 {
158beb4a
JJ
3446 tree true_val
3447 = build_all_ones_cst (TREE_TYPE (vector_type));
3448 tree false_val
3449 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3450 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3451 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3452 op, true_val,
3453 false_val);
262a363f 3454 }
262a363f
JJ
3455 else
3456 {
3457 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3458 op);
3459 init_stmt
3460 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3461 op);
3462 }
13396b6e
JJ
3463 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3464 op = new_temp;
3465 }
50eeef09 3466 }
d2a12ae7 3467 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3468 if (!CONSTANT_CLASS_P (op))
3469 constant_p = false;
90dd6e3d
RB
3470 if (TREE_CODE (orig_op) == SSA_NAME
3471 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3472 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3473 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3474 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3475 place_after_defs = true;
ebfd146a
IR
3476
3477 if (number_of_places_left_in_vector == 0)
3478 {
018b2744
RS
3479 if (constant_p
3480 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3481 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3482 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3483 else
d2a12ae7 3484 {
018b2744
RS
3485 if (vec_oprnds->is_empty ())
3486 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3487 number_of_vectors,
3488 permute_results);
3489 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3490 }
90dd6e3d
RB
3491 tree init;
3492 gimple_stmt_iterator gsi;
3493 if (place_after_defs)
3494 {
3495 gsi = gsi_for_stmt
3496 (vect_find_last_scalar_stmt_in_slp (slp_node));
3497 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3498 }
3499 else
3500 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3501 if (ctor_seq != NULL)
3502 {
90dd6e3d 3503 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3504 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3505 ctor_seq = NULL;
3506 }
90dd6e3d
RB
3507 voprnds.quick_push (init);
3508 place_after_defs = false;
62cf7335
RB
3509 number_of_places_left_in_vector = nunits;
3510 constant_p = true;
5ebaa477
RS
3511 elts.new_vector (vector_type, nunits, 1);
3512 elts.quick_grow (nunits);
ebfd146a
IR
3513 }
3514 }
3515 }
3516
b8698a0f 3517 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3518 them. */
9771b263 3519 vec_num = voprnds.length ();
d2a12ae7 3520 for (j = vec_num; j != 0; j--)
ebfd146a 3521 {
9771b263
DN
3522 vop = voprnds[j - 1];
3523 vec_oprnds->quick_push (vop);
ebfd146a
IR
3524 }
3525
9771b263 3526 voprnds.release ();
ebfd146a
IR
3527
3528 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3529 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3530 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3531 to replicate the vectors. */
9771b263 3532 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3533 {
b5aeb3bb
IR
3534 tree neutral_vec = NULL;
3535
3536 if (neutral_op)
3537 {
3538 if (!neutral_vec)
b9acc9f1 3539 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3540
9771b263 3541 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3542 }
3543 else
3544 {
9771b263
DN
3545 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3546 vec_oprnds->quick_push (vop);
b5aeb3bb 3547 }
ebfd146a
IR
3548 }
3549}
3550
3551
3552/* Get vectorized definitions from SLP_NODE that contains corresponding
3553 vectorized def-stmts. */
3554
3555static void
9771b263 3556vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3557{
3558 tree vec_oprnd;
355fe088 3559 gimple *vec_def_stmt;
ebfd146a
IR
3560 unsigned int i;
3561
9771b263 3562 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3563
9771b263 3564 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3565 {
3566 gcc_assert (vec_def_stmt);
e7baeb39
RB
3567 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3568 vec_oprnd = gimple_phi_result (vec_def_stmt);
3569 else
3570 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3571 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3572 }
3573}
3574
3575
b8698a0f
L
3576/* Get vectorized definitions for SLP_NODE.
3577 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3578 call vect_get_constant_vectors() to create vector stmts.
3579 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3580 must be stored in the corresponding child of SLP_NODE, and we call
3581 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3582
ebfd146a 3583void
9771b263 3584vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3585 vec<vec<tree> > *vec_oprnds)
ebfd146a 3586{
355fe088 3587 gimple *first_stmt;
d092494c 3588 int number_of_vects = 0, i;
77eefb71 3589 unsigned int child_index = 0;
b8698a0f 3590 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3591 slp_tree child = NULL;
37b5ec8f 3592 vec<tree> vec_defs;
e44978dc 3593 tree oprnd;
77eefb71 3594 bool vectorized_defs;
ebfd146a 3595
9771b263
DN
3596 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3597 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3598 {
d092494c
IR
3599 /* For each operand we check if it has vectorized definitions in a child
3600 node or we need to create them (for invariants and constants). We
3601 check if the LHS of the first stmt of the next child matches OPRND.
3602 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3603 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3604 to check this child node for the next operand. */
3605 vectorized_defs = false;
3606 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3607 {
01d8bf07 3608 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3609
e44978dc 3610 /* We have to check both pattern and original def, if available. */
603cca93 3611 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3612 {
355fe088
TS
3613 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3614 gimple *related
90dd6e3d 3615 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3616 tree first_def_op;
90dd6e3d 3617
e7baeb39
RB
3618 if (gimple_code (first_def) == GIMPLE_PHI)
3619 first_def_op = gimple_phi_result (first_def);
3620 else
3621 first_def_op = gimple_get_lhs (first_def);
3622 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3623 || (related
3624 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3625 {
3626 /* The number of vector defs is determined by the number of
3627 vector statements in the node from which we get those
3628 statements. */
3629 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3630 vectorized_defs = true;
77eefb71 3631 child_index++;
90dd6e3d 3632 }
e44978dc 3633 }
77eefb71
RB
3634 else
3635 child_index++;
d092494c 3636 }
ebfd146a 3637
77eefb71
RB
3638 if (!vectorized_defs)
3639 {
3640 if (i == 0)
3641 {
3642 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3643 /* Number of vector stmts was calculated according to LHS in
3644 vect_schedule_slp_instance (), fix it by replacing LHS with
3645 RHS, if necessary. See vect_get_smallest_scalar_type () for
3646 details. */
3647 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3648 &rhs_size_unit);
3649 if (rhs_size_unit != lhs_size_unit)
3650 {
3651 number_of_vects *= rhs_size_unit;
3652 number_of_vects /= lhs_size_unit;
3653 }
3654 }
d092494c 3655 }
b5aeb3bb 3656
d092494c 3657 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3658 vec_defs = vNULL;
3659 vec_defs.create (number_of_vects);
ebfd146a 3660
d092494c
IR
3661 /* For reduction defs we call vect_get_constant_vectors (), since we are
3662 looking for initial loop invariant values. */
306b0c92 3663 if (vectorized_defs)
d092494c 3664 /* The defs are already vectorized. */
37b5ec8f 3665 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3666 else
e7baeb39 3667 /* Build vectors from scalar defs. */
37b5ec8f 3668 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3669 number_of_vects);
ebfd146a 3670
37b5ec8f 3671 vec_oprnds->quick_push (vec_defs);
d092494c 3672 }
ebfd146a
IR
3673}
3674
ebfd146a
IR
3675/* Generate vector permute statements from a list of loads in DR_CHAIN.
3676 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3677 permute statements for the SLP node NODE of the SLP instance
3678 SLP_NODE_INSTANCE. */
3679
ebfd146a 3680bool
01d8bf07 3681vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3682 gimple_stmt_iterator *gsi, poly_uint64 vf,
3683 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3684 unsigned *n_perms)
ebfd146a 3685{
355fe088 3686 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3687 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3688 tree mask_element_type = NULL_TREE, mask_type;
928686b1 3689 int vec_index = 0;
2635892a 3690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3691 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3692 unsigned int mask_element;
ef4bddc2 3693 machine_mode mode;
928686b1 3694 unsigned HOST_WIDE_INT nunits, const_vf;
ebfd146a 3695
91ff1504
RB
3696 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3697 return false;
3698
2c53b149 3699 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
91ff1504 3700
22e4dee7
RH
3701 mode = TYPE_MODE (vectype);
3702
d9f21f6a 3703 /* At the moment, all permutations are represented using per-element
928686b1
RS
3704 indices, so we can't cope with variable vector lengths or
3705 vectorization factors. */
3706 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3707 || !vf.is_constant (&const_vf))
d9f21f6a
RS
3708 return false;
3709
2635892a
RH
3710 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3711 same size as the vector element being permuted. */
96f9265a 3712 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3713 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3714 mask_type = get_vectype_for_scalar_type (mask_element_type);
e3342de4 3715 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3716 mask.quick_grow (nunits);
e3342de4 3717 vec_perm_indices indices;
ebfd146a 3718
61fdfd8c
RB
3719 /* Initialize the vect stmts of NODE to properly insert the generated
3720 stmts later. */
3721 if (! analyze_only)
3722 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3723 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3724 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3725
b8698a0f
L
3726 /* Generate permutation masks for every NODE. Number of masks for each NODE
3727 is equal to GROUP_SIZE.
3728 E.g., we have a group of three nodes with three loads from the same
3729 location in each node, and the vector size is 4. I.e., we have a
3730 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3731 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3732 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3733 ...
3734
2635892a 3735 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3736 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3737 operation, and the mask element values can't be outside that range.
3738 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3739 For the first two permutations we need the first and the second input
ebfd146a 3740 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3741 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3742 {c2,a3,b3,c3}. */
3743
2ce27200 3744 int vect_stmts_counter = 0;
928686b1 3745 unsigned int index = 0;
2ce27200
RB
3746 int first_vec_index = -1;
3747 int second_vec_index = -1;
be377c80 3748 bool noop_p = true;
29afecdf 3749 *n_perms = 0;
ebfd146a 3750
d9f21f6a 3751 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3752 {
3753 for (int k = 0; k < group_size; k++)
3754 {
928686b1 3755 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
2c53b149 3756 + j * DR_GROUP_SIZE (stmt_info));
2ce27200
RB
3757 vec_index = i / nunits;
3758 mask_element = i % nunits;
3759 if (vec_index == first_vec_index
3760 || first_vec_index == -1)
3761 {
3762 first_vec_index = vec_index;
3763 }
3764 else if (vec_index == second_vec_index
3765 || second_vec_index == -1)
3766 {
3767 second_vec_index = vec_index;
3768 mask_element += nunits;
3769 }
3770 else
3771 {
3772 if (dump_enabled_p ())
3773 {
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3775 "permutation requires at "
3776 "least three vectors ");
3777 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3778 stmt, 0);
2ce27200 3779 }
31bee964 3780 gcc_assert (analyze_only);
2ce27200
RB
3781 return false;
3782 }
ebfd146a 3783
928686b1 3784 gcc_assert (mask_element < 2 * nunits);
be377c80
RB
3785 if (mask_element != index)
3786 noop_p = false;
2ce27200
RB
3787 mask[index++] = mask_element;
3788
e3342de4 3789 if (index == nunits && !noop_p)
2ce27200 3790 {
e3342de4
RS
3791 indices.new_vector (mask, 2, nunits);
3792 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3793 {
3794 if (dump_enabled_p ())
22e4dee7 3795 {
2ce27200
RB
3796 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3797 vect_location,
3798 "unsupported vect permute { ");
3799 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3800 {
3801 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3802 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3803 }
2ce27200 3804 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3805 }
31bee964 3806 gcc_assert (analyze_only);
2ce27200
RB
3807 return false;
3808 }
22e4dee7 3809
e3342de4
RS
3810 ++*n_perms;
3811 }
29afecdf 3812
e3342de4
RS
3813 if (index == nunits)
3814 {
2ce27200
RB
3815 if (!analyze_only)
3816 {
be377c80
RB
3817 tree mask_vec = NULL_TREE;
3818
3819 if (! noop_p)
736d0f28 3820 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3821
3822 if (second_vec_index == -1)
3823 second_vec_index = first_vec_index;
61fdfd8c
RB
3824
3825 /* Generate the permute statement if necessary. */
3826 tree first_vec = dr_chain[first_vec_index];
3827 tree second_vec = dr_chain[second_vec_index];
3828 gimple *perm_stmt;
3829 if (! noop_p)
3830 {
3831 tree perm_dest
3832 = vect_create_destination_var (gimple_assign_lhs (stmt),
3833 vectype);
3834 perm_dest = make_ssa_name (perm_dest);
3835 perm_stmt = gimple_build_assign (perm_dest,
3836 VEC_PERM_EXPR,
3837 first_vec, second_vec,
3838 mask_vec);
3839 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3840 }
3841 else
3842 /* If mask was NULL_TREE generate the requested
3843 identity transform. */
3844 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3845
3846 /* Store the vector statement in NODE. */
3847 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3848 }
ebfd146a 3849
2ce27200
RB
3850 index = 0;
3851 first_vec_index = -1;
3852 second_vec_index = -1;
be377c80 3853 noop_p = true;
2ce27200
RB
3854 }
3855 }
b8698a0f 3856 }
ebfd146a 3857
ebfd146a
IR
3858 return true;
3859}
3860
ebfd146a
IR
3861/* Vectorize SLP instance tree in postorder. */
3862
3863static bool
f7300fff
RB
3864vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3865 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3866{
355fe088 3867 gimple *stmt;
0d0293ac 3868 bool grouped_store, is_store;
ebfd146a
IR
3869 gimple_stmt_iterator si;
3870 stmt_vec_info stmt_info;
8b7e9dba 3871 unsigned int group_size;
ebfd146a 3872 tree vectype;
603cca93 3873 int i, j;
d755c7ef 3874 slp_tree child;
ebfd146a 3875
603cca93 3876 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3877 return false;
3878
f7300fff
RB
3879 /* See if we have already vectorized the same set of stmts and reuse their
3880 vectorized stmts. */
dd172744 3881 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3882 {
dd172744 3883 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
f7300fff
RB
3884 return false;
3885 }
3886
dd172744 3887 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3888 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3889 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3890
603cca93
RB
3891 /* Push SLP node def-type to stmts. */
3892 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3893 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3894 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3895 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3896
9771b263 3897 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3898 stmt_info = vinfo_for_stmt (stmt);
3899
3900 /* VECTYPE is the type of the destination. */
b690cc0f 3901 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3902 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3903 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3904
68435eb2 3905 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3906 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3907 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3908
73fbfcad 3909 if (dump_enabled_p ())
ebfd146a 3910 {
78c60e3d
SS
3911 dump_printf_loc (MSG_NOTE,vect_location,
3912 "------>vectorizing SLP node starting from: ");
3913 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3914 }
ebfd146a 3915
2e8ab70c
RB
3916 /* Vectorized stmts go before the last scalar stmt which is where
3917 all uses are ready. */
3918 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3919
b010117a
IR
3920 /* Mark the first element of the reduction chain as reduction to properly
3921 transform the node. In the analysis phase only the last element of the
3922 chain is marked as reduction. */
2c53b149
RB
3923 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3924 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3925 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
b010117a
IR
3926 {
3927 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3928 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3929 }
3930
6876e5bc
RB
3931 /* Handle two-operation SLP nodes by vectorizing the group with
3932 both operations and then performing a merge. */
3933 if (SLP_TREE_TWO_OPERATORS (node))
3934 {
3935 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3936 enum tree_code ocode = ERROR_MARK;
355fe088 3937 gimple *ostmt;
e3342de4 3938 vec_perm_builder mask (group_size, group_size, 1);
6876e5bc
RB
3939 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3940 if (gimple_assign_rhs_code (ostmt) != code0)
3941 {
908a1a16 3942 mask.quick_push (1);
6876e5bc
RB
3943 ocode = gimple_assign_rhs_code (ostmt);
3944 }
3945 else
908a1a16 3946 mask.quick_push (0);
567a3691 3947 if (ocode != ERROR_MARK)
6876e5bc 3948 {
355fe088
TS
3949 vec<gimple *> v0;
3950 vec<gimple *> v1;
6876e5bc
RB
3951 unsigned j;
3952 tree tmask = NULL_TREE;
3953 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3954 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3955 SLP_TREE_VEC_STMTS (node).truncate (0);
3956 gimple_assign_set_rhs_code (stmt, ocode);
3957 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3958 gimple_assign_set_rhs_code (stmt, code0);
3959 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3960 SLP_TREE_VEC_STMTS (node).truncate (0);
3961 tree meltype = build_nonstandard_integer_type
b397965c 3962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3963 tree mvectype = get_same_sized_vectype (meltype, vectype);
3964 unsigned k = 0, l;
3965 for (j = 0; j < v0.length (); ++j)
3966 {
dad55d70
RS
3967 /* Enforced by vect_build_slp_tree, which rejects variable-length
3968 vectors for SLP_TREE_TWO_OPERATORS. */
3969 unsigned int const_nunits = nunits.to_constant ();
3970 tree_vector_builder melts (mvectype, const_nunits, 1);
3971 for (l = 0; l < const_nunits; ++l)
6876e5bc 3972 {
1ece8d4c 3973 if (k >= group_size)
6876e5bc 3974 k = 0;
dad55d70
RS
3975 tree t = build_int_cst (meltype,
3976 mask[k++] * const_nunits + l);
794e3180 3977 melts.quick_push (t);
6876e5bc 3978 }
5ebaa477 3979 tmask = melts.build ();
6876e5bc
RB
3980
3981 /* ??? Not all targets support a VEC_PERM_EXPR with a
3982 constant mask that would translate to a vec_merge RTX
3983 (with their vec_perm_const_ok). We can either not
3984 vectorize in that case or let veclower do its job.
3985 Unfortunately that isn't too great and at least for
3986 plus/minus we'd eventually like to match targets
3987 vector addsub instructions. */
355fe088 3988 gimple *vstmt;
6876e5bc
RB
3989 vstmt = gimple_build_assign (make_ssa_name (vectype),
3990 VEC_PERM_EXPR,
3991 gimple_assign_lhs (v0[j]),
3992 gimple_assign_lhs (v1[j]), tmask);
3993 vect_finish_stmt_generation (stmt, vstmt, &si);
3994 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3995 }
3996 v0.release ();
3997 v1.release ();
3998 return false;
3999 }
4000 }
0d0293ac 4001 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
4002
4003 /* Restore stmt def-types. */
4004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4005 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4006 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
4007 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
4008
b5aeb3bb 4009 return is_store;
ebfd146a
IR
4010}
4011
dd34c087
JJ
4012/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4013 For loop vectorization this is done in vectorizable_call, but for SLP
4014 it needs to be deferred until end of vect_schedule_slp, because multiple
4015 SLP instances may refer to the same scalar stmt. */
4016
4017static void
4018vect_remove_slp_scalar_calls (slp_tree node)
4019{
355fe088 4020 gimple *stmt, *new_stmt;
dd34c087
JJ
4021 gimple_stmt_iterator gsi;
4022 int i;
d755c7ef 4023 slp_tree child;
dd34c087
JJ
4024 tree lhs;
4025 stmt_vec_info stmt_info;
4026
603cca93 4027 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4028 return;
4029
9771b263 4030 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 4031 vect_remove_slp_scalar_calls (child);
dd34c087 4032
9771b263 4033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
4034 {
4035 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4036 continue;
4037 stmt_info = vinfo_for_stmt (stmt);
4038 if (stmt_info == NULL
4039 || is_pattern_stmt_p (stmt_info)
4040 || !PURE_SLP_STMT (stmt_info))
4041 continue;
4042 lhs = gimple_call_lhs (stmt);
4043 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4044 set_vinfo_for_stmt (new_stmt, stmt_info);
4045 set_vinfo_for_stmt (stmt, NULL);
4046 STMT_VINFO_STMT (stmt_info) = new_stmt;
4047 gsi = gsi_for_stmt (stmt);
4048 gsi_replace (&gsi, new_stmt, false);
4049 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4050 }
4051}
ebfd146a 4052
ff802fa1
IR
4053/* Generate vector code for all SLP instances in the loop/basic block. */
4054
ebfd146a 4055bool
310213d4 4056vect_schedule_slp (vec_info *vinfo)
ebfd146a 4057{
9771b263 4058 vec<slp_instance> slp_instances;
ebfd146a 4059 slp_instance instance;
8b7e9dba 4060 unsigned int i;
ebfd146a
IR
4061 bool is_store = false;
4062
78604de0
RB
4063
4064 scalar_stmts_to_slp_tree_map_t *bst_map
4065 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4066 slp_instances = vinfo->slp_instances;
9771b263 4067 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
4068 {
4069 /* Schedule the tree of INSTANCE. */
4070 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff 4071 instance, bst_map);
73fbfcad 4072 if (dump_enabled_p ())
78c60e3d 4073 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4074 "vectorizing stmts using SLP.\n");
ebfd146a 4075 }
78604de0 4076 delete bst_map;
ebfd146a 4077
9771b263 4078 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4079 {
4080 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 4081 gimple *store;
b5aeb3bb
IR
4082 unsigned int j;
4083 gimple_stmt_iterator gsi;
4084
c40eced0
RB
4085 /* Remove scalar call stmts. Do not do this for basic-block
4086 vectorization as not all uses may be vectorized.
4087 ??? Why should this be necessary? DCE should be able to
4088 remove the stmts itself.
4089 ??? For BB vectorization we can as well remove scalar
4090 stmts starting from the SLP tree root if they have no
4091 uses. */
310213d4 4092 if (is_a <loop_vec_info> (vinfo))
c40eced0 4093 vect_remove_slp_scalar_calls (root);
dd34c087 4094
9771b263 4095 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
4096 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4097 {
4098 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4099 break;
4100
a024e70e
IR
4101 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4102 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
4103 /* Free the attached stmt_vec_info and remove the stmt. */
4104 gsi = gsi_for_stmt (store);
3d3f2249 4105 unlink_stmt_vdef (store);
b5aeb3bb 4106 gsi_remove (&gsi, true);
3d3f2249 4107 release_defs (store);
b5aeb3bb
IR
4108 free_stmt_vec_info (store);
4109 }
4110 }
4111
ebfd146a
IR
4112 return is_store;
4113}