]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
[26/46] Make more use of dyn_cast in tree-vect*
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
9771b263 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 61 vect_free_slp_tree (child, final_p);
b8698a0f 62
6e2dd807
RS
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
68 {
b9787581
RS
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 71 {
b9787581
RS
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
74 }
75 }
78810bd3 76
9771b263
DN
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 80 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
81
82 free (node);
83}
84
85
6e2dd807
RS
86/* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
ebfd146a
IR
89
90void
6e2dd807 91vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 92{
6e2dd807 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 94 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 95 free (instance);
ebfd146a
IR
96}
97
98
d092494c
IR
99/* Create an SLP node for SCALAR_STMTS. */
100
101static slp_tree
b9787581 102vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 103{
d3cfd39e 104 slp_tree node;
b9787581 105 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
106 unsigned int nops;
107
b9787581 108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 109 nops = gimple_call_num_args (stmt);
b9787581 110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
b9787581 116 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 117 nops = 0;
d092494c
IR
118 else
119 return NULL;
120
d3cfd39e 121 node = XNEW (struct _slp_tree);
d092494c 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 123 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 125 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 127 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 129
78810bd3 130 unsigned i;
b9787581
RS
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 133
d092494c
IR
134 return node;
135}
136
137
ddf56386
RB
138/* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141typedef struct _slp_oprnd_info
142{
143 /* Def-stmts for the operands. */
b9787581 144 vec<stmt_vec_info> def_stmts;
ddf56386
RB
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
ddf56386 148 tree first_op_type;
34e82342 149 enum vect_def_type first_dt;
ddf56386
RB
150 bool first_pattern;
151 bool second_pattern;
152} *slp_oprnd_info;
153
154
d092494c
IR
155/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
9771b263 157static vec<slp_oprnd_info>
d092494c
IR
158vect_create_oprnd_info (int nops, int group_size)
159{
160 int i;
161 slp_oprnd_info oprnd_info;
9771b263 162 vec<slp_oprnd_info> oprnds_info;
d092494c 163
9771b263 164 oprnds_info.create (nops);
d092494c
IR
165 for (i = 0; i < nops; i++)
166 {
167 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 168 oprnd_info->def_stmts.create (group_size);
d092494c 169 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 170 oprnd_info->first_op_type = NULL_TREE;
d092494c 171 oprnd_info->first_pattern = false;
effb52da 172 oprnd_info->second_pattern = false;
9771b263 173 oprnds_info.quick_push (oprnd_info);
d092494c
IR
174 }
175
176 return oprnds_info;
177}
178
179
d3cfd39e
JJ
180/* Free operands info. */
181
d092494c 182static void
9771b263 183vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
184{
185 int i;
186 slp_oprnd_info oprnd_info;
187
9771b263 188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 189 {
9771b263 190 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
191 XDELETE (oprnd_info);
192 }
d092494c 193
9771b263 194 oprnds_info.release ();
d092494c
IR
195}
196
197
d755c7ef
RB
198/* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
200
b210f45f 201int
355fe088 202vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 203{
355fe088 204 gimple *next_stmt = first_stmt;
d755c7ef
RB
205 int result = 0;
206
2c53b149 207 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
d755c7ef
RB
208 return -1;
209
210 do
211 {
212 if (next_stmt == stmt)
213 return result;
2c53b149 214 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699 215 if (next_stmt)
2c53b149 216 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
217 }
218 while (next_stmt);
219
220 return -1;
221}
222
018b2744
RS
223/* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
227 (if nonnull). */
228
f1739b48 229bool
018b2744 230can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
231 unsigned int *nvectors_out,
232 tree *vector_type_out,
233 tree *permutes)
018b2744
RS
234{
235 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
236 poly_int64 nelts;
237 unsigned int nvectors = 1;
238 for (;;)
239 {
240 scalar_int_mode int_mode;
241 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
242 if (multiple_p (current_vector_size, elt_bytes, &nelts)
243 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
244 {
245 tree int_type = build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode), 1);
247 tree vector_type = build_vector_type (int_type, nelts);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
249 {
250 vec_perm_builder sel1 (nelts, 2, 3);
251 vec_perm_builder sel2 (nelts, 2, 3);
252 poly_int64 half_nelts = exact_div (nelts, 2);
253 for (unsigned int i = 0; i < 3; ++i)
254 {
255 sel1.quick_push (i);
256 sel1.quick_push (i + nelts);
257 sel2.quick_push (half_nelts + i);
258 sel2.quick_push (half_nelts + i + nelts);
259 }
260 vec_perm_indices indices1 (sel1, 2, nelts);
261 vec_perm_indices indices2 (sel2, 2, nelts);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
264 {
265 if (nvectors_out)
266 *nvectors_out = nvectors;
267 if (vector_type_out)
268 *vector_type_out = vector_type;
269 if (permutes)
270 {
271 permutes[0] = vect_gen_perm_mask_checked (vector_type,
272 indices1);
273 permutes[1] = vect_gen_perm_mask_checked (vector_type,
274 indices2);
275 }
276 return true;
277 }
278 }
279 }
280 if (!multiple_p (elt_bytes, 2, &elt_bytes))
281 return false;
282 nvectors *= 2;
283 }
284}
d755c7ef 285
d092494c
IR
286/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
4cecd659 288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
294 value.
4cecd659
BC
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
297 ok return 0. */
4cecd659
BC
298static int
299vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 300 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 301 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 302{
b9787581 303 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
304 tree oprnd;
305 unsigned int i, number_of_oprnds;
d092494c 306 enum vect_def_type dt = vect_uninitialized_def;
d092494c 307 bool pattern = false;
abf9bfbc 308 slp_oprnd_info oprnd_info;
b0b4483e
RB
309 int first_op_idx = 1;
310 bool commutative = false;
311 bool first_op_cond = false;
effb52da
RB
312 bool first = stmt_num == 0;
313 bool second = stmt_num == 1;
b8698a0f 314
b9787581 315 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
316 {
317 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 318 first_op_idx = 3;
190c2236 319 }
b9787581 320 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 321 {
b0b4483e 322 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 323 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
324 /* Swap can only be done for cond_expr if asked to, otherwise we
325 could result in different comparison code to the first stmt. */
a414c77f
IE
326 if (gimple_assign_rhs_code (stmt) == COND_EXPR
327 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
328 {
329 first_op_cond = true;
b0b4483e
RB
330 number_of_oprnds++;
331 }
332 else
333 commutative = commutative_tree_code (code);
f7e531cf 334 }
d092494c 335 else
b0b4483e 336 return -1;
ebfd146a 337
4cecd659
BC
338 bool swapped = (*swap != 0);
339 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
340 for (i = 0; i < number_of_oprnds; i++)
341 {
b0b4483e
RB
342again:
343 if (first_op_cond)
f7e531cf 344 {
4cecd659
BC
345 /* Map indicating how operands of cond_expr should be swapped. */
346 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
347 int *map = maps[*swap];
348
349 if (i < 2)
b9787581
RS
350 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
351 first_op_idx), map[i]);
b0b4483e 352 else
b9787581 353 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
354 }
355 else
b9787581 356 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 357
9771b263 358 oprnd_info = (*oprnds_info)[i];
ebfd146a 359
fef96d8e
RS
360 stmt_vec_info def_stmt_info;
361 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 362 {
73fbfcad 363 if (dump_enabled_p ())
ebfd146a 364 {
78c60e3d 365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 366 "Build SLP failed: can't analyze def for ");
78c60e3d 367 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 368 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
369 }
370
b0b4483e 371 return -1;
ebfd146a
IR
372 }
373
fef96d8e
RS
374 /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
375 the def stmt from the pattern. Check that all the stmts of the
376 node are in the pattern. */
377 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
ebfd146a 378 {
d092494c 379 pattern = true;
effb52da
RB
380 if (!first && !oprnd_info->first_pattern
381 /* Allow different pattern state for the defs of the
382 first stmt in reduction chains. */
383 && (oprnd_info->first_dt != vect_reduction_def
384 || (!second && !oprnd_info->second_pattern)))
d092494c 385 {
b0b4483e
RB
386 if (i == 0
387 && !swapped
388 && commutative)
389 {
390 swapped = true;
391 goto again;
392 }
393
73fbfcad 394 if (dump_enabled_p ())
d092494c 395 {
78c60e3d
SS
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
397 "Build SLP failed: some of the stmts"
398 " are in a pattern, and others are not ");
399 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 400 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 401 }
ebfd146a 402
b0b4483e 403 return 1;
ebfd146a
IR
404 }
405
fef96d8e 406 dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
ebfd146a 407
f7e531cf 408 if (dt == vect_unknown_def_type)
ebfd146a 409 {
73fbfcad 410 if (dump_enabled_p ())
78c60e3d 411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 412 "Unsupported pattern.\n");
b0b4483e 413 return -1;
ebfd146a
IR
414 }
415
fef96d8e 416 switch (gimple_code (def_stmt_info->stmt))
ebfd146a 417 {
81c40241
RB
418 case GIMPLE_PHI:
419 case GIMPLE_ASSIGN:
420 break;
421
422 default:
423 if (dump_enabled_p ())
424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
425 "unsupported defining stmt:\n");
426 return -1;
ebfd146a
IR
427 }
428 }
429
effb52da
RB
430 if (second)
431 oprnd_info->second_pattern = pattern;
432
d092494c 433 if (first)
ebfd146a 434 {
d092494c
IR
435 oprnd_info->first_dt = dt;
436 oprnd_info->first_pattern = pattern;
793d9a16 437 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 438 }
ebfd146a
IR
439 else
440 {
d092494c
IR
441 /* Not first stmt of the group, check that the def-stmt/s match
442 the def-stmt/s of the first stmt. Allow different definition
443 types for reduction chains: the first stmt must be a
444 vect_reduction_def (a phi node), and the rest
445 vect_internal_def. */
018b2744
RS
446 tree type = TREE_TYPE (oprnd);
447 if ((oprnd_info->first_dt != dt
448 && !(oprnd_info->first_dt == vect_reduction_def
449 && dt == vect_internal_def)
450 && !((oprnd_info->first_dt == vect_external_def
451 || oprnd_info->first_dt == vect_constant_def)
452 && (dt == vect_external_def
453 || dt == vect_constant_def)))
454 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 455 {
b0b4483e
RB
456 /* Try swapping operands if we got a mismatch. */
457 if (i == 0
458 && !swapped
459 && commutative)
460 {
461 swapped = true;
462 goto again;
463 }
464
abf9bfbc
RB
465 if (dump_enabled_p ())
466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 467 "Build SLP failed: different types\n");
d092494c 468
b0b4483e 469 return 1;
ebfd146a 470 }
018b2744
RS
471 if ((dt == vect_constant_def
472 || dt == vect_external_def)
473 && !current_vector_size.is_constant ()
474 && (TREE_CODE (type) == BOOLEAN_TYPE
475 || !can_duplicate_and_interleave_p (stmts.length (),
476 TYPE_MODE (type))))
a23644f2
RS
477 {
478 if (dump_enabled_p ())
479 {
480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
481 "Build SLP failed: invalid type of def "
482 "for variable-length SLP ");
483 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
484 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
485 }
486 return -1;
487 }
018b2744
RS
488 }
489
490 /* Check the types of the definitions. */
491 switch (dt)
492 {
493 case vect_constant_def:
494 case vect_external_def:
ebfd146a 495 break;
b8698a0f 496
c78e3652 497 case vect_reduction_def:
e7baeb39 498 case vect_induction_def:
8644a673 499 case vect_internal_def:
fef96d8e 500 oprnd_info->def_stmts.quick_push (def_stmt_info);
ebfd146a
IR
501 break;
502
503 default:
504 /* FORNOW: Not supported. */
73fbfcad 505 if (dump_enabled_p ())
ebfd146a 506 {
78c60e3d
SS
507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
508 "Build SLP failed: illegal type of def ");
81c40241 509 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 510 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
511 }
512
b0b4483e 513 return -1;
ebfd146a
IR
514 }
515 }
516
b0b4483e
RB
517 /* Swap operands. */
518 if (swapped)
519 {
78810bd3
RB
520 /* If there are already uses of this stmt in a SLP instance then
521 we've committed to the operand order and can't swap it. */
b9787581 522 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
78810bd3
RB
523 {
524 if (dump_enabled_p ())
525 {
526 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
527 "Build SLP failed: cannot swap operands of "
528 "shared stmt ");
b9787581
RS
529 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
530 stmt_info->stmt, 0);
78810bd3
RB
531 }
532 return -1;
533 }
534
b9787581 535 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e
RB
536 if (first_op_cond)
537 {
538 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
539 enum tree_code code = TREE_CODE (cond);
540
541 /* Swap. */
542 if (*swap == 1)
543 {
544 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
545 &TREE_OPERAND (cond, 1));
546 TREE_SET_CODE (cond, swap_tree_comparison (code));
547 }
548 /* Invert. */
549 else
550 {
551 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
552 gimple_assign_rhs3_ptr (stmt));
553 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
554 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
555 gcc_assert (code != ERROR_MARK);
556 TREE_SET_CODE (cond, code);
557 }
b0b4483e
RB
558 }
559 else
560 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
561 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
562 if (dump_enabled_p ())
563 {
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
567 }
b0b4483e
RB
568 }
569
4cecd659 570 *swap = swapped;
b0b4483e 571 return 0;
ebfd146a
IR
572}
573
5249ee4d
RS
574/* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
576
577static bool
578compatible_calls_p (gcall *call1, gcall *call2)
579{
580 unsigned int nargs = gimple_call_num_args (call1);
581 if (nargs != gimple_call_num_args (call2))
582 return false;
583
584 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
585 return false;
586
587 if (gimple_call_internal_p (call1))
588 {
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
590 TREE_TYPE (gimple_call_lhs (call2))))
591 return false;
592 for (unsigned int i = 0; i < nargs; ++i)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
594 TREE_TYPE (gimple_call_arg (call2, i))))
595 return false;
596 }
597 else
598 {
599 if (!operand_equal_p (gimple_call_fn (call1),
600 gimple_call_fn (call2), 0))
601 return false;
602
603 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
604 return false;
605 }
606 return true;
607}
608
b161f2c9
RS
609/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
615
616static bool
617vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 618 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
619{
620 if (!vectype)
621 {
622 if (dump_enabled_p ())
623 {
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
628 }
629 /* Fatal mismatch. */
630 return false;
631 }
632
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
635 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
636 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 637 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
638 && (!nunits.is_constant (&const_nunits)
639 || const_nunits > group_size))
b161f2c9
RS
640 {
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
645 return false;
646 }
647
648 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 649 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
650 return true;
651}
ebfd146a 652
1f3cb663
RS
653/* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
659
660static bool
b9787581
RS
661vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
662 unsigned int group_size, tree vectype,
663 tree_code alt_stmt_code)
1f3cb663
RS
664{
665 unsigned HOST_WIDE_INT count;
666 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
667 return false;
668
669 vec_perm_builder sel (count, count, 1);
670 for (unsigned int i = 0; i < count; ++i)
671 {
672 unsigned int elt = i;
b9787581
RS
673 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
674 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
675 elt += count;
676 sel.quick_push (elt);
677 }
678 vec_perm_indices indices (sel, 2, count);
679 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
680}
681
6983e6b5
RB
682/* Verify if the scalar stmts STMTS are isomorphic, require data
683 permutation or are of unsupported types of operation. Return
684 true if they are, otherwise return false and indicate in *MATCHES
685 which stmts are not isomorphic to the first one. If MATCHES[0]
686 is false then this indicates the comparison could not be
4cecd659
BC
687 carried out or the stmts will never be vectorized by SLP.
688
689 Note COND_EXPR is possibly ismorphic to another one after swapping its
690 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
691 the first stmt by swapping the two operands of comparison; set SWAP[i]
692 to 2 if stmt I is isormorphic to the first stmt by inverting the code
693 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
694 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
695
696static bool
4cecd659 697vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
b9787581 698 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
699 poly_uint64 *max_nunits, bool *matches,
700 bool *two_operators)
ebfd146a 701{
ebfd146a 702 unsigned int i;
b9787581 703 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
704 enum tree_code first_stmt_code = ERROR_MARK;
705 enum tree_code alt_stmt_code = ERROR_MARK;
706 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 707 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 708 tree lhs;
6983e6b5 709 bool need_same_oprnds = false;
1f3cb663 710 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
711 optab optab;
712 int icode;
ef4bddc2
RS
713 machine_mode optab_op2_mode;
714 machine_mode vec_mode;
bffb8014 715 stmt_vec_info first_load = NULL, prev_first_load = NULL;
d092494c 716
ebfd146a 717 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
718 stmt_vec_info stmt_info;
719 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 720 {
b9787581 721 gimple *stmt = stmt_info->stmt;
4cecd659 722 swap[i] = 0;
6983e6b5
RB
723 matches[i] = false;
724
73fbfcad 725 if (dump_enabled_p ())
ebfd146a 726 {
78c60e3d
SS
727 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
728 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
729 }
730
4b5caab7 731 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 732 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 733 {
73fbfcad 734 if (dump_enabled_p ())
4b5caab7 735 {
78c60e3d
SS
736 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
737 "Build SLP failed: unvectorizable statement ");
738 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 739 }
6983e6b5
RB
740 /* Fatal mismatch. */
741 matches[0] = false;
4b5caab7
IR
742 return false;
743 }
744
ebfd146a
IR
745 lhs = gimple_get_lhs (stmt);
746 if (lhs == NULL_TREE)
747 {
73fbfcad 748 if (dump_enabled_p ())
ebfd146a 749 {
78c60e3d
SS
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
751 "Build SLP failed: not GIMPLE_ASSIGN nor "
752 "GIMPLE_CALL ");
753 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 754 }
6983e6b5
RB
755 /* Fatal mismatch. */
756 matches[0] = false;
ebfd146a
IR
757 return false;
758 }
759
1f3cb663
RS
760 tree nunits_vectype;
761 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
762 &nunits_vectype)
763 || (nunits_vectype
b9787581 764 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
1f3cb663 765 nunits_vectype, max_nunits)))
b161f2c9 766 {
6983e6b5
RB
767 /* Fatal mismatch. */
768 matches[0] = false;
1f3cb663
RS
769 return false;
770 }
771
772 gcc_assert (vectype);
b8698a0f 773
538dd0b7 774 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
775 {
776 rhs_code = CALL_EXPR;
5249ee4d
RS
777 if ((gimple_call_internal_p (call_stmt)
778 && (!vectorizable_internal_fn_p
779 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
780 || gimple_call_tail_p (call_stmt)
781 || gimple_call_noreturn_p (call_stmt)
782 || !gimple_call_nothrow_p (call_stmt)
783 || gimple_call_chain (call_stmt))
190c2236 784 {
73fbfcad 785 if (dump_enabled_p ())
190c2236 786 {
78c60e3d
SS
787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
788 "Build SLP failed: unsupported call type ");
538dd0b7
DM
789 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
790 call_stmt, 0);
190c2236 791 }
6983e6b5
RB
792 /* Fatal mismatch. */
793 matches[0] = false;
190c2236
JJ
794 return false;
795 }
796 }
ebfd146a
IR
797 else
798 rhs_code = gimple_assign_rhs_code (stmt);
799
800 /* Check the operation. */
801 if (i == 0)
802 {
803 first_stmt_code = rhs_code;
804
b8698a0f 805 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
806 vector shift with scalar shift operand. */
807 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
808 || rhs_code == LROTATE_EXPR
809 || rhs_code == RROTATE_EXPR)
810 {
1f3cb663
RS
811 if (vectype == boolean_type_node)
812 {
813 if (dump_enabled_p ())
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: shift of a"
816 " boolean.\n");
817 /* Fatal mismatch. */
818 matches[0] = false;
819 return false;
820 }
821
ebfd146a
IR
822 vec_mode = TYPE_MODE (vectype);
823
824 /* First see if we have a vector/vector shift. */
825 optab = optab_for_tree_code (rhs_code, vectype,
826 optab_vector);
827
828 if (!optab
947131ba 829 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
830 {
831 /* No vector/vector shift, try for a vector/scalar shift. */
832 optab = optab_for_tree_code (rhs_code, vectype,
833 optab_scalar);
834
835 if (!optab)
836 {
73fbfcad 837 if (dump_enabled_p ())
78c60e3d 838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 839 "Build SLP failed: no optab.\n");
6983e6b5
RB
840 /* Fatal mismatch. */
841 matches[0] = false;
ebfd146a
IR
842 return false;
843 }
947131ba 844 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
845 if (icode == CODE_FOR_nothing)
846 {
73fbfcad 847 if (dump_enabled_p ())
78c60e3d
SS
848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
849 "Build SLP failed: "
e645e942 850 "op not supported by target.\n");
6983e6b5
RB
851 /* Fatal mismatch. */
852 matches[0] = false;
ebfd146a
IR
853 return false;
854 }
855 optab_op2_mode = insn_data[icode].operand[2].mode;
856 if (!VECTOR_MODE_P (optab_op2_mode))
857 {
858 need_same_oprnds = true;
859 first_op1 = gimple_assign_rhs2 (stmt);
860 }
861 }
862 }
36ba4aae
IR
863 else if (rhs_code == WIDEN_LSHIFT_EXPR)
864 {
865 need_same_oprnds = true;
866 first_op1 = gimple_assign_rhs2 (stmt);
867 }
ebfd146a
IR
868 }
869 else
870 {
6876e5bc
RB
871 if (first_stmt_code != rhs_code
872 && alt_stmt_code == ERROR_MARK)
873 alt_stmt_code = rhs_code;
ebfd146a
IR
874 if (first_stmt_code != rhs_code
875 && (first_stmt_code != IMAGPART_EXPR
876 || rhs_code != REALPART_EXPR)
877 && (first_stmt_code != REALPART_EXPR
69f11a13 878 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
879 /* Handle mismatches in plus/minus by computing both
880 and merging the results. */
881 && !((first_stmt_code == PLUS_EXPR
882 || first_stmt_code == MINUS_EXPR)
883 && (alt_stmt_code == PLUS_EXPR
884 || alt_stmt_code == MINUS_EXPR)
885 && rhs_code == alt_stmt_code)
b9787581 886 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 887 && (first_stmt_code == ARRAY_REF
38000232 888 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
889 || first_stmt_code == INDIRECT_REF
890 || first_stmt_code == COMPONENT_REF
891 || first_stmt_code == MEM_REF)))
ebfd146a 892 {
73fbfcad 893 if (dump_enabled_p ())
ebfd146a 894 {
78c60e3d
SS
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
896 "Build SLP failed: different operation "
897 "in stmt ");
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
900 "original stmt ");
901 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 902 first_stmt_info->stmt, 0);
ebfd146a 903 }
6983e6b5
RB
904 /* Mismatch. */
905 continue;
ebfd146a 906 }
b8698a0f
L
907
908 if (need_same_oprnds
ebfd146a
IR
909 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
910 {
73fbfcad 911 if (dump_enabled_p ())
ebfd146a 912 {
78c60e3d
SS
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: different shift "
915 "arguments in ");
916 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 917 }
6983e6b5
RB
918 /* Mismatch. */
919 continue;
ebfd146a 920 }
190c2236
JJ
921
922 if (rhs_code == CALL_EXPR)
923 {
b9787581 924 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 925 as_a <gcall *> (stmt)))
190c2236 926 {
73fbfcad 927 if (dump_enabled_p ())
190c2236 928 {
78c60e3d
SS
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
930 "Build SLP failed: different calls in ");
931 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
932 stmt, 0);
190c2236 933 }
6983e6b5
RB
934 /* Mismatch. */
935 continue;
190c2236
JJ
936 }
937 }
ebfd146a
IR
938 }
939
0d0293ac 940 /* Grouped store or load. */
b9787581 941 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
942 {
943 if (REFERENCE_CLASS_P (lhs))
944 {
945 /* Store. */
6983e6b5 946 ;
ebfd146a 947 }
b5aeb3bb
IR
948 else
949 {
950 /* Load. */
b9787581 951 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
952 if (prev_first_load)
953 {
954 /* Check that there are no loads from different interleaving
6983e6b5
RB
955 chains in the same node. */
956 if (prev_first_load != first_load)
78c60e3d 957 {
73fbfcad 958 if (dump_enabled_p ())
b5aeb3bb 959 {
78c60e3d
SS
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
961 vect_location,
962 "Build SLP failed: different "
963 "interleaving chains in one node ");
964 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
965 stmt, 0);
b5aeb3bb 966 }
6983e6b5
RB
967 /* Mismatch. */
968 continue;
b5aeb3bb
IR
969 }
970 }
971 else
972 prev_first_load = first_load;
ebfd146a 973 }
0d0293ac 974 } /* Grouped access. */
ebfd146a
IR
975 else
976 {
977 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
978 {
0d0293ac 979 /* Not grouped load. */
73fbfcad 980 if (dump_enabled_p ())
ebfd146a 981 {
78c60e3d
SS
982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
983 "Build SLP failed: not grouped load ");
984 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
985 }
986
0d0293ac 987 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
988 /* Fatal mismatch. */
989 matches[0] = false;
ebfd146a
IR
990 return false;
991 }
992
993 /* Not memory operation. */
994 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 995 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 996 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 997 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 998 && rhs_code != CALL_EXPR)
ebfd146a 999 {
73fbfcad 1000 if (dump_enabled_p ())
ebfd146a 1001 {
78c60e3d
SS
1002 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1003 "Build SLP failed: operation");
1004 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1005 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 1006 }
6983e6b5
RB
1007 /* Fatal mismatch. */
1008 matches[0] = false;
ebfd146a
IR
1009 return false;
1010 }
1011
4cecd659
BC
1012 if (rhs_code == COND_EXPR)
1013 {
1014 tree cond_expr = gimple_assign_rhs1 (stmt);
1015 enum tree_code cond_code = TREE_CODE (cond_expr);
1016 enum tree_code swap_code = ERROR_MARK;
1017 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1018
1019 if (i == 0)
1020 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1021 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1022 {
1023 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1024 swap_code = swap_tree_comparison (cond_code);
1025 invert_code = invert_tree_comparison (cond_code, honor_nans);
1026 }
1027
1028 if (first_cond_code == cond_code)
1029 ;
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code == swap_code)
1032 swap[i] = 1;
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code == invert_code)
1035 swap[i] = 2;
1036 else
1037 {
1038 if (dump_enabled_p ())
1039 {
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1041 "Build SLP failed: different"
1042 " operation");
4cecd659 1043 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 1044 stmt, 0);
4cecd659 1045 }
6983e6b5
RB
1046 /* Mismatch. */
1047 continue;
f7e531cf 1048 }
4cecd659 1049 }
ebfd146a 1050 }
6983e6b5
RB
1051
1052 matches[i] = true;
1053 }
1054
1055 for (i = 0; i < group_size; ++i)
1056 if (!matches[i])
1057 return false;
1058
6876e5bc
RB
1059 /* If we allowed a two-operation SLP node verify the target can cope
1060 with the permute we are going to use. */
1061 if (alt_stmt_code != ERROR_MARK
1062 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1063 {
1f3cb663
RS
1064 if (vectype == boolean_type_node
1065 || !vect_two_operations_perm_ok_p (stmts, group_size,
1066 vectype, alt_stmt_code))
6876e5bc
RB
1067 {
1068 for (i = 0; i < group_size; ++i)
b9787581 1069 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1070 {
1071 matches[i] = false;
1072 if (dump_enabled_p ())
1073 {
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1075 "Build SLP failed: different operation "
1076 "in stmt ");
1077 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 1078 stmts[i]->stmt, 0);
6876e5bc
RB
1079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1080 "original stmt ");
1081 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 1082 first_stmt_info->stmt, 0);
6876e5bc
RB
1083 }
1084 }
1085 return false;
1086 }
1087 *two_operators = true;
1088 }
1089
6983e6b5
RB
1090 return true;
1091}
1092
26d66f28
RB
1093/* Traits for the hash_set to record failed SLP builds for a stmt set.
1094 Note we never remove apart from at destruction time so we do not
1095 need a special value for deleted that differs from empty. */
1096struct bst_traits
1097{
b9787581
RS
1098 typedef vec <stmt_vec_info> value_type;
1099 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1100 static inline hashval_t hash (value_type);
1101 static inline bool equal (value_type existing, value_type candidate);
1102 static inline bool is_empty (value_type x) { return !x.exists (); }
1103 static inline bool is_deleted (value_type x) { return !x.exists (); }
1104 static inline void mark_empty (value_type &x) { x.release (); }
1105 static inline void mark_deleted (value_type &x) { x.release (); }
1106 static inline void remove (value_type &x) { x.release (); }
1107};
1108inline hashval_t
1109bst_traits::hash (value_type x)
1110{
1111 inchash::hash h;
1112 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1113 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1114 return h.end ();
1115}
1116inline bool
1117bst_traits::equal (value_type existing, value_type candidate)
1118{
1119 if (existing.length () != candidate.length ())
1120 return false;
1121 for (unsigned i = 0; i < existing.length (); ++i)
1122 if (existing[i] != candidate[i])
1123 return false;
1124 return true;
1125}
1126
f7300fff
RB
1127typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1128static scalar_stmts_set_t *bst_fail;
26d66f28 1129
68435eb2
RB
1130typedef hash_map <vec <gimple *>, slp_tree,
1131 simple_hashmap_traits <bst_traits, slp_tree> >
1132 scalar_stmts_to_slp_tree_map_t;
1133
26d66f28
RB
1134static slp_tree
1135vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1136 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1137 poly_uint64 *max_nunits,
26d66f28
RB
1138 vec<slp_tree> *loads,
1139 bool *matches, unsigned *npermutes, unsigned *tree_size,
1140 unsigned max_tree_size);
6983e6b5 1141
e403d17e 1142static slp_tree
310213d4 1143vect_build_slp_tree (vec_info *vinfo,
b9787581 1144 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1145 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1146 bool *matches, unsigned *npermutes, unsigned *tree_size,
1147 unsigned max_tree_size)
26d66f28
RB
1148{
1149 if (bst_fail->contains (stmts))
1150 return NULL;
1151 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1152 loads, matches, npermutes, tree_size,
1153 max_tree_size);
1154 /* When SLP build fails for stmts record this, otherwise SLP build
1155 can be exponential in time when we allow to construct parts from
1156 scalars, see PR81723. */
1157 if (! res)
1158 {
b9787581 1159 vec <stmt_vec_info> x;
26d66f28
RB
1160 x.create (stmts.length ());
1161 x.splice (stmts);
1162 bst_fail->add (x);
1163 }
1164 return res;
1165}
1166
1167/* Recursively build an SLP tree starting from NODE.
1168 Fail (and return a value not equal to zero) if def-stmts are not
1169 isomorphic, require data permutation or are of unsupported types of
1170 operation. Otherwise, return 0.
1171 The value returned is the depth in the SLP tree where a mismatch
1172 was found. */
1173
1174static slp_tree
1175vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1176 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1177 poly_uint64 *max_nunits,
26d66f28
RB
1178 vec<slp_tree> *loads,
1179 bool *matches, unsigned *npermutes, unsigned *tree_size,
1180 unsigned max_tree_size)
6983e6b5 1181{
4b6068ea
RS
1182 unsigned nops, i, this_tree_size = 0;
1183 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1184 slp_tree node;
6983e6b5 1185
6983e6b5
RB
1186 matches[0] = false;
1187
b9787581
RS
1188 stmt_vec_info stmt_info = stmts[0];
1189 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1190 nops = gimple_call_num_args (stmt);
b9787581 1191 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1192 {
1193 nops = gimple_num_ops (stmt) - 1;
1194 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1195 nops++;
ebfd146a 1196 }
b9787581 1197 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1198 nops = 0;
6983e6b5 1199 else
e403d17e 1200 return NULL;
6983e6b5 1201
c78e3652
RB
1202 /* If the SLP node is a PHI (induction or reduction), terminate
1203 the recursion. */
b9787581 1204 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1205 {
b161f2c9
RS
1206 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1207 tree vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1208 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
b161f2c9
RS
1209 max_nunits))
1210 return NULL;
1211
b9787581 1212 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1213 /* Induction from different IVs is not supported. */
719488f8
RB
1214 if (def_type == vect_induction_def)
1215 {
b9787581
RS
1216 stmt_vec_info other_info;
1217 FOR_EACH_VEC_ELT (stmts, i, other_info)
1218 if (stmt_info != other_info)
719488f8
RB
1219 return NULL;
1220 }
1221 else
1222 {
1223 /* Else def types have to match. */
b9787581
RS
1224 stmt_vec_info other_info;
1225 FOR_EACH_VEC_ELT (stmts, i, other_info)
719488f8
RB
1226 {
1227 /* But for reduction chains only check on the first stmt. */
b9787581
RS
1228 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1229 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
719488f8 1230 continue;
b9787581 1231 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
719488f8
RB
1232 return NULL;
1233 }
1234 }
e7baeb39
RB
1235 node = vect_create_new_slp_node (stmts);
1236 return node;
1237 }
1238
1239
6876e5bc 1240 bool two_operators = false;
4cecd659 1241 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
5249ee4d 1242 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
e403d17e
RB
1243 &this_max_nunits, matches, &two_operators))
1244 return NULL;
ebfd146a 1245
6983e6b5 1246 /* If the SLP node is a load, terminate the recursion. */
b9787581
RS
1247 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1248 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1249 {
e403d17e
RB
1250 *max_nunits = this_max_nunits;
1251 node = vect_create_new_slp_node (stmts);
1252 loads->safe_push (node);
1253 return node;
ebfd146a
IR
1254 }
1255
6983e6b5
RB
1256 /* Get at the operands, verifying they are compatible. */
1257 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1258 slp_oprnd_info oprnd_info;
b9787581 1259 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1260 {
4cecd659 1261 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1262 stmts, i, &oprnds_info);
4cecd659
BC
1263 if (res != 0)
1264 matches[(res == -1) ? 0 : i] = false;
1265 if (!matches[0])
1266 break;
6983e6b5 1267 }
b0b4483e
RB
1268 for (i = 0; i < group_size; ++i)
1269 if (!matches[i])
1270 {
1271 vect_free_oprnd_info (oprnds_info);
e403d17e 1272 return NULL;
b0b4483e 1273 }
6983e6b5 1274
e403d17e
RB
1275 auto_vec<slp_tree, 4> children;
1276 auto_vec<slp_tree> this_loads;
1277
b9787581 1278 stmt_info = stmts[0];
6983e6b5 1279
26d66f28
RB
1280 if (tree_size)
1281 max_tree_size -= *tree_size;
1282
b8698a0f 1283 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1284 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1285 {
d092494c 1286 slp_tree child;
e403d17e
RB
1287 unsigned old_nloads = this_loads.length ();
1288 unsigned old_tree_size = this_tree_size;
1289 unsigned int j;
b8698a0f 1290
e7baeb39 1291 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1292 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1293 && oprnd_info->first_dt != vect_induction_def)
d092494c 1294 continue;
ebfd146a 1295
1428105c
RB
1296 if (++this_tree_size > max_tree_size)
1297 {
e403d17e 1298 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1299 vect_free_slp_tree (child, false);
1428105c 1300 vect_free_oprnd_info (oprnds_info);
e403d17e 1301 return NULL;
1428105c
RB
1302 }
1303
e403d17e
RB
1304 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1305 group_size, &this_max_nunits,
1306 &this_loads, matches, npermutes,
1307 &this_tree_size,
1308 max_tree_size)) != NULL)
6983e6b5 1309 {
3fc356dc
RB
1310 /* If we have all children of child built up from scalars then just
1311 throw that away and build it up this node from scalars. */
995b6fe0
RB
1312 if (!SLP_TREE_CHILDREN (child).is_empty ()
1313 /* ??? Rejecting patterns this way doesn't work. We'd have to
1314 do extra work to cancel the pattern so the uses see the
1315 scalar version. */
b9787581 1316 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
3fc356dc 1317 {
3fc356dc
RB
1318 slp_tree grandchild;
1319
1320 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1321 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1322 break;
1323 if (!grandchild)
1324 {
1325 /* Roll back. */
e403d17e
RB
1326 this_loads.truncate (old_nloads);
1327 this_tree_size = old_tree_size;
3fc356dc 1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1329 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1330 SLP_TREE_CHILDREN (child).truncate (0);
1331
1332 dump_printf_loc (MSG_NOTE, vect_location,
1333 "Building parent vector operands from "
1334 "scalars instead\n");
1335 oprnd_info->def_stmts = vNULL;
603cca93 1336 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1337 children.safe_push (child);
3fc356dc
RB
1338 continue;
1339 }
1340 }
1341
6983e6b5 1342 oprnd_info->def_stmts = vNULL;
e403d17e 1343 children.safe_push (child);
6983e6b5
RB
1344 continue;
1345 }
1346
90dd6e3d
RB
1347 /* If the SLP build failed fatally and we analyze a basic-block
1348 simply treat nodes we fail to build as externally defined
1349 (and thus build vectors from the scalar defs).
1350 The cost model will reject outright expensive cases.
1351 ??? This doesn't treat cases where permutation ultimatively
1352 fails (or we don't try permutation below). Ideally we'd
1353 even compute a permutation that will end up with the maximum
1354 SLP tree size... */
310213d4 1355 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1356 && !matches[0]
1357 /* ??? Rejecting patterns this way doesn't work. We'd have to
1358 do extra work to cancel the pattern so the uses see the
1359 scalar version. */
b9787581 1360 && !is_pattern_stmt_p (stmt_info))
90dd6e3d
RB
1361 {
1362 dump_printf_loc (MSG_NOTE, vect_location,
1363 "Building vector operands from scalars\n");
e403d17e 1364 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1365 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1366 children.safe_push (child);
1367 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1368 continue;
1369 }
1370
6983e6b5
RB
1371 /* If the SLP build for operand zero failed and operand zero
1372 and one can be commutated try that for the scalar stmts
1373 that failed the match. */
1374 if (i == 0
1375 /* A first scalar stmt mismatch signals a fatal mismatch. */
1376 && matches[0]
1377 /* ??? For COND_EXPRs we can swap the comparison operands
1378 as well as the arms under some constraints. */
1379 && nops == 2
1380 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1381 && is_gimple_assign (stmt_info->stmt)
6983e6b5
RB
1382 /* Do so only if the number of not successful permutes was nor more
1383 than a cut-ff as re-trying the recursive match on
1384 possibly each level of the tree would expose exponential
1385 behavior. */
1386 && *npermutes < 4)
1387 {
85c5e2f5
RB
1388 /* See whether we can swap the matching or the non-matching
1389 stmt operands. */
1390 bool swap_not_matching = true;
1391 do
1392 {
1393 for (j = 0; j < group_size; ++j)
1394 {
1395 if (matches[j] != !swap_not_matching)
1396 continue;
b9787581 1397 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1398 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1399 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1400 if (!stmt
85c5e2f5
RB
1401 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1402 {
1403 if (!swap_not_matching)
1404 goto fail;
1405 swap_not_matching = false;
1406 break;
1407 }
1408 /* Verify if we can safely swap or if we committed to a
1409 specific operand order already.
1410 ??? Instead of modifying GIMPLE stmts here we could
1411 record whether we want to swap operands in the SLP
1412 node and temporarily do that when processing it
1413 (or wrap operand accessors in a helper). */
1414 else if (swap[j] != 0
b9787581 1415 || STMT_VINFO_NUM_SLP_USES (stmt_info))
85c5e2f5
RB
1416 {
1417 if (!swap_not_matching)
1418 {
1419 if (dump_enabled_p ())
1420 {
1421 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1422 vect_location,
1423 "Build SLP failed: cannot swap "
1424 "operands of shared stmt ");
1425 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
b9787581 1426 TDF_SLIM, stmts[j]->stmt, 0);
85c5e2f5
RB
1427 }
1428 goto fail;
1429 }
1430 swap_not_matching = false;
1431 break;
1432 }
1433 }
1434 }
1435 while (j != group_size);
78810bd3 1436
6983e6b5 1437 /* Swap mismatched definition stmts. */
b0b4483e
RB
1438 dump_printf_loc (MSG_NOTE, vect_location,
1439 "Re-trying with swapped operands of stmts ");
e72baed7 1440 for (j = 0; j < group_size; ++j)
85c5e2f5 1441 if (matches[j] == !swap_not_matching)
6983e6b5 1442 {
6b4db501
MM
1443 std::swap (oprnds_info[0]->def_stmts[j],
1444 oprnds_info[1]->def_stmts[j]);
b0b4483e 1445 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1446 }
b0b4483e 1447 dump_printf (MSG_NOTE, "\n");
74574669
RB
1448 /* And try again with scratch 'matches' ... */
1449 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1450 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1451 group_size, &this_max_nunits,
1452 &this_loads, tem, npermutes,
1453 &this_tree_size,
1454 max_tree_size)) != NULL)
6983e6b5 1455 {
60f2b864
RB
1456 /* ... so if successful we can apply the operand swapping
1457 to the GIMPLE IL. This is necessary because for example
1458 vect_get_slp_defs uses operand indexes and thus expects
1459 canonical operand order. This is also necessary even
1460 if we end up building the operand from scalars as
1461 we'll continue to process swapped operand two. */
1462 for (j = 0; j < group_size; ++j)
b9787581 1463 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
f47cda24 1464 for (j = 0; j < group_size; ++j)
b9787581
RS
1465 if (matches[j] == !swap_not_matching)
1466 {
1467 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1468 /* Avoid swapping operands twice. */
1469 if (gimple_plf (stmt, GF_PLF_1))
1470 continue;
1471 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1472 gimple_assign_rhs2_ptr (stmt));
1473 gimple_set_plf (stmt, GF_PLF_1, true);
1474 }
f47cda24
RB
1475 /* Verify we swap all duplicates or none. */
1476 if (flag_checking)
1477 for (j = 0; j < group_size; ++j)
b9787581
RS
1478 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1479 == (matches[j] == !swap_not_matching));
60f2b864 1480
85c69b0b
RB
1481 /* If we have all children of child built up from scalars then
1482 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1483 if (!SLP_TREE_CHILDREN (child).is_empty ()
1484 /* ??? Rejecting patterns this way doesn't work. We'd have
1485 to do extra work to cancel the pattern so the uses see the
1486 scalar version. */
b9787581 1487 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
85c69b0b
RB
1488 {
1489 unsigned int j;
1490 slp_tree grandchild;
1491
1492 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1493 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1494 break;
1495 if (!grandchild)
1496 {
1497 /* Roll back. */
e403d17e
RB
1498 this_loads.truncate (old_nloads);
1499 this_tree_size = old_tree_size;
85c69b0b 1500 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1501 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1502 SLP_TREE_CHILDREN (child).truncate (0);
1503
1504 dump_printf_loc (MSG_NOTE, vect_location,
1505 "Building parent vector operands from "
1506 "scalars instead\n");
1507 oprnd_info->def_stmts = vNULL;
603cca93 1508 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1509 children.safe_push (child);
85c69b0b
RB
1510 continue;
1511 }
1512 }
1513
6983e6b5 1514 oprnd_info->def_stmts = vNULL;
e403d17e 1515 children.safe_push (child);
6983e6b5
RB
1516 continue;
1517 }
1518
1519 ++*npermutes;
1520 }
1521
78810bd3 1522fail:
e403d17e
RB
1523 gcc_assert (child == NULL);
1524 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1525 vect_free_slp_tree (child, false);
6983e6b5 1526 vect_free_oprnd_info (oprnds_info);
e403d17e 1527 return NULL;
ebfd146a
IR
1528 }
1529
e403d17e
RB
1530 vect_free_oprnd_info (oprnds_info);
1531
1428105c
RB
1532 if (tree_size)
1533 *tree_size += this_tree_size;
e403d17e
RB
1534 *max_nunits = this_max_nunits;
1535 loads->safe_splice (this_loads);
1428105c 1536
e403d17e
RB
1537 node = vect_create_new_slp_node (stmts);
1538 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1539 SLP_TREE_CHILDREN (node).splice (children);
1540 return node;
ebfd146a
IR
1541}
1542
78c60e3d 1543/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1544
1545static void
4f5b9c80
DM
1546vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1547 slp_tree node)
ebfd146a
IR
1548{
1549 int i;
b9787581 1550 stmt_vec_info stmt_info;
d755c7ef 1551 slp_tree child;
ebfd146a 1552
603cca93
RB
1553 dump_printf_loc (dump_kind, loc, "node%s\n",
1554 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1555 ? " (external)" : "");
b9787581 1556 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1557 {
c2a12ca0 1558 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
b9787581 1559 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt_info->stmt, 0);
ebfd146a 1560 }
9771b263 1561 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1562 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1563}
1564
1565
b8698a0f
L
1566/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1567 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1568 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1569 stmts in NODE are to be marked. */
1570
1571static void
1572vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1573{
1574 int i;
b9787581 1575 stmt_vec_info stmt_info;
d755c7ef 1576 slp_tree child;
ebfd146a 1577
603cca93 1578 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1579 return;
1580
b9787581 1581 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1582 if (j < 0 || i == j)
b9787581 1583 STMT_SLP_TYPE (stmt_info) = mark;
ebfd146a 1584
9771b263 1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1586 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1587}
1588
1589
a70d6342
IR
1590/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1591
1592static void
1593vect_mark_slp_stmts_relevant (slp_tree node)
1594{
1595 int i;
a70d6342 1596 stmt_vec_info stmt_info;
d755c7ef 1597 slp_tree child;
a70d6342 1598
603cca93 1599 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1600 return;
1601
b9787581 1602 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1603 {
b8698a0f 1604 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1605 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1606 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1607 }
1608
9771b263 1609 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1610 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1611}
1612
1613
b5aeb3bb
IR
1614/* Rearrange the statements of NODE according to PERMUTATION. */
1615
1616static void
1617vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1618 vec<unsigned> permutation)
b5aeb3bb 1619{
b9787581
RS
1620 stmt_vec_info stmt_info;
1621 vec<stmt_vec_info> tmp_stmts;
d755c7ef
RB
1622 unsigned int i;
1623 slp_tree child;
b5aeb3bb 1624
9771b263 1625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1626 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1627
9771b263
DN
1628 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1629 tmp_stmts.create (group_size);
d755c7ef 1630 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1631
b9787581
RS
1632 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1633 tmp_stmts[permutation[i]] = stmt_info;
b5aeb3bb 1634
9771b263 1635 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1636 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1637}
1638
1639
b266b968
RB
1640/* Attempt to reorder stmts in a reduction chain so that we don't
1641 require any load permutation. Return true if that was possible,
1642 otherwise return false. */
1643
1644static bool
1645vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1646{
1647 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1648 unsigned int i, j;
b266b968
RB
1649 unsigned int lidx;
1650 slp_tree node, load;
1651
1652 /* Compare all the permutation sequences to the first one. We know
1653 that at least one load is permuted. */
1654 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1655 if (!node->load_permutation.exists ())
1656 return false;
1657 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1658 {
1659 if (!load->load_permutation.exists ())
1660 return false;
1661 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1662 if (lidx != node->load_permutation[j])
1663 return false;
1664 }
1665
1666 /* Check that the loads in the first sequence are different and there
1667 are no gaps between them. */
7ba9e72d 1668 auto_sbitmap load_index (group_size);
b266b968
RB
1669 bitmap_clear (load_index);
1670 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1671 {
41eefe13 1672 if (lidx >= group_size)
7ba9e72d 1673 return false;
b266b968 1674 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1675 return false;
1676
b266b968
RB
1677 bitmap_set_bit (load_index, lidx);
1678 }
1679 for (i = 0; i < group_size; i++)
1680 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1681 return false;
b266b968
RB
1682
1683 /* This permutation is valid for reduction. Since the order of the
1684 statements in the nodes is not important unless they are memory
1685 accesses, we can rearrange the statements in all the nodes
1686 according to the order of the loads. */
1687 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1688 node->load_permutation);
1689
1690 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1691 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1692 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1693 {
b9787581 1694 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1695 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1696 /* But we have to keep those permutations that are required because
1697 of handling of gaps. */
d9f21f6a 1698 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1699 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1700 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1701 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1702 else
1703 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1704 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1705 }
1706
b266b968
RB
1707 return true;
1708}
1709
01d8bf07
RB
1710/* Check if the required load permutations in the SLP instance
1711 SLP_INSTN are supported. */
ebfd146a
IR
1712
1713static bool
01d8bf07 1714vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1715{
01d8bf07
RB
1716 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1717 unsigned int i, j, k, next;
6983e6b5 1718 slp_tree node;
ebfd146a 1719
73fbfcad 1720 if (dump_enabled_p ())
ebfd146a 1721 {
78c60e3d 1722 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1724 if (node->load_permutation.exists ())
1725 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1726 dump_printf (MSG_NOTE, "%d ", next);
1727 else
bddc974e
TJ
1728 for (k = 0; k < group_size; ++k)
1729 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1730 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1731 }
1732
b5aeb3bb
IR
1733 /* In case of reduction every load permutation is allowed, since the order
1734 of the reduction statements is not important (as opposed to the case of
0d0293ac 1735 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1736 load nodes are of the same size and have the same permutation (and then
1737 rearrange all the nodes of the SLP instance according to this
1738 permutation). */
1739
1740 /* Check that all the load nodes are of the same size. */
01d8bf07 1741 /* ??? Can't we assert this? */
9771b263 1742 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1743 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1744 return false;
2200fc49 1745
b5aeb3bb 1746 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1747 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1748
b010117a 1749 /* Reduction (there are no data-refs in the root).
b266b968 1750 In reduction chain the order of the loads is not important. */
b9787581
RS
1751 if (!STMT_VINFO_DATA_REF (stmt_info)
1752 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1753 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1754
6aa904c4
IR
1755 /* In basic block vectorization we allow any subchain of an interleaving
1756 chain.
1757 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1758 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1759 {
240a94da
RB
1760 /* Check whether the loads in an instance form a subchain and thus
1761 no permutation is necessary. */
9771b263 1762 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1763 {
9626d143
RB
1764 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1765 continue;
240a94da 1766 bool subchain_p = true;
bffb8014 1767 stmt_vec_info next_load_info = NULL;
b9787581
RS
1768 stmt_vec_info load_info;
1769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1770 {
1771 if (j != 0
bffb8014 1772 && (next_load_info != load_info
b9787581 1773 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1774 {
1775 subchain_p = false;
1776 break;
1777 }
bffb8014 1778 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1779 }
240a94da
RB
1780 if (subchain_p)
1781 SLP_TREE_LOAD_PERMUTATION (node).release ();
1782 else
1783 {
b9787581 1784 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1785 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1786 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1787 unsigned k, maxk = 0;
1788 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1789 if (k > maxk)
1790 maxk = k;
1791 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1792 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1793 tree vectype = STMT_VINFO_VECTYPE (group_info);
1794 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1795 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1796 {
1797 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1798 "BB vectorization with gaps at the end of "
1799 "a load is not supported\n");
1800 return false;
1801 }
1802
240a94da
RB
1803 /* Verify the permutation can be generated. */
1804 vec<tree> tem;
29afecdf 1805 unsigned n_perms;
240a94da 1806 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1807 1, slp_instn, true, &n_perms))
240a94da
RB
1808 {
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1810 vect_location,
1811 "unsupported load permutation\n");
1812 return false;
1813 }
1814 }
6aa904c4 1815 }
01d8bf07 1816 return true;
6aa904c4
IR
1817 }
1818
31bee964
RB
1819 /* For loop vectorization verify we can generate the permutation. Be
1820 conservative about the vectorization factor, there are permutations
1821 that will use three vector inputs only starting from a specific factor
1822 and the vectorization factor is not yet final.
1823 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1824 unsigned n_perms;
d9f21f6a
RS
1825 poly_uint64 test_vf
1826 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1827 LOOP_VINFO_VECT_FACTOR
b9787581 1828 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1830 if (node->load_permutation.exists ()
31bee964
RB
1831 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1832 slp_instn, true, &n_perms))
01d8bf07 1833 return false;
9b999e8c 1834
01d8bf07 1835 return true;
ebfd146a
IR
1836}
1837
1838
e4a707c4 1839/* Find the last store in SLP INSTANCE. */
ff802fa1 1840
95c68311 1841stmt_vec_info
2e8ab70c 1842vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1843{
95c68311 1844 stmt_vec_info last = NULL;
b9787581 1845 stmt_vec_info stmt_vinfo;
e4a707c4 1846
b9787581 1847 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1848 {
2e8ab70c 1849 if (is_pattern_stmt_p (stmt_vinfo))
95c68311
RS
1850 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1851 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1852 }
e4a707c4 1853
2e8ab70c 1854 return last;
e4a707c4
IR
1855}
1856
1ba91a49
AL
1857/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1858 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1859 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1860 containing the remainder.
1861 Return the first stmt in the second group. */
1862
1863static gimple *
1864vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1865{
1866 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
bffb8014 1867 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1868 gcc_assert (group1_size > 0);
2c53b149 1869 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1870 gcc_assert (group2_size > 0);
2c53b149 1871 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1872
bffb8014 1873 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1874 for (unsigned i = group1_size; i > 1; i--)
1875 {
bffb8014
RS
1876 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1877 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1878 }
1879 /* STMT is now the last element of the first group. */
bffb8014
RS
1880 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1881 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1882
bffb8014
RS
1883 DR_GROUP_SIZE (group2) = group2_size;
1884 for (stmt_info = group2; stmt_info;
1885 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1886 {
bffb8014
RS
1887 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1888 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1889 }
1890
2c53b149 1891 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1892 plus skipping over the first vector. */
bffb8014 1893 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1894
2c53b149
RB
1895 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1896 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1897
1898 if (dump_enabled_p ())
1899 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1900 group1_size, group2_size);
1901
1902 return group2;
1903}
1904
4b6068ea
RS
1905/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1906 statements and a vector of NUNITS elements. */
1907
1908static poly_uint64
1909calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1910{
1911 return exact_div (common_multiple (nunits, group_size), group_size);
1912}
1913
0d0293ac 1914/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1915 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1916 Return FALSE if it's impossible to SLP any stmt in the loop. */
1917
1918static bool
310213d4 1919vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1920 gimple *stmt, unsigned max_tree_size)
ebfd146a 1921{
b9787581 1922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 1923 slp_instance new_instance;
d092494c 1924 slp_tree node;
2c53b149 1925 unsigned int group_size;
b5aeb3bb 1926 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1927 unsigned int i;
9771b263 1928 vec<slp_tree> loads;
b9787581
RS
1929 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1930 vec<stmt_vec_info> scalar_stmts;
b5aeb3bb 1931
b9787581 1932 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1933 {
2c53b149
RB
1934 scalar_type = TREE_TYPE (DR_REF (dr));
1935 vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1936 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 1937 }
b9787581 1938 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1939 {
1940 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
1941 vectype = STMT_VINFO_VECTYPE (stmt_info);
1942 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb
IR
1943 }
1944 else
1945 {
310213d4 1946 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 1947 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 1948 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1949 }
b8698a0f 1950
ebfd146a
IR
1951 if (!vectype)
1952 {
73fbfcad 1953 if (dump_enabled_p ())
ebfd146a 1954 {
78c60e3d
SS
1955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1956 "Build SLP failed: unsupported data-type ");
1957 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1958 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1959 }
b5aeb3bb 1960
ebfd146a
IR
1961 return false;
1962 }
4b6068ea 1963 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1964
0d0293ac 1965 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1966 scalar_stmts.create (group_size);
bffb8014 1967 stmt_vec_info next_info = stmt_info;
b9787581 1968 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 1969 {
b5aeb3bb 1970 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 1971 while (next_info)
b5aeb3bb 1972 {
b9787581
RS
1973 if (STMT_VINFO_IN_PATTERN_P (next_info)
1974 && STMT_VINFO_RELATED_STMT (next_info))
1975 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
f7e531cf 1976 else
b9787581 1977 scalar_stmts.safe_push (next_info);
bffb8014 1978 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
1979 }
1980 }
b9787581 1981 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1982 {
1983 /* Collect the reduction stmts and store them in
1984 SLP_TREE_SCALAR_STMTS. */
bffb8014 1985 while (next_info)
2c53b149 1986 {
b9787581
RS
1987 if (STMT_VINFO_IN_PATTERN_P (next_info)
1988 && STMT_VINFO_RELATED_STMT (next_info))
1989 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
2c53b149 1990 else
b9787581 1991 scalar_stmts.safe_push (next_info);
bffb8014 1992 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 1993 }
14a61437
RB
1994 /* Mark the first element of the reduction chain as reduction to properly
1995 transform the node. In the reduction analysis phase only the last
1996 element of the chain is marked as reduction. */
b9787581 1997 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
b5aeb3bb
IR
1998 }
1999 else
2000 {
2001 /* Collect reduction statements. */
32c91dfc
RS
2002 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2003 for (i = 0; reductions.iterate (i, &next_info); i++)
2004 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2005 }
2006
9771b263 2007 loads.create (group_size);
ebfd146a
IR
2008
2009 /* Build the tree for the SLP instance. */
89d390e5
RB
2010 bool *matches = XALLOCAVEC (bool, group_size);
2011 unsigned npermutes = 0;
f7300fff 2012 bst_fail = new scalar_stmts_set_t ();
4b6068ea 2013 poly_uint64 max_nunits = nunits;
e569db5f 2014 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 2015 &max_nunits, &loads, matches, &npermutes,
e569db5f 2016 NULL, max_tree_size);
26d66f28 2017 delete bst_fail;
e569db5f 2018 if (node != NULL)
ebfd146a 2019 {
4ef69dfc 2020 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2021 poly_uint64 unrolling_factor
4b6068ea 2022 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2023
d9f21f6a 2024 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2025 && is_a <bb_vec_info> (vinfo))
2026 {
4b6068ea
RS
2027 unsigned HOST_WIDE_INT const_max_nunits;
2028 if (!max_nunits.is_constant (&const_max_nunits)
2029 || const_max_nunits > group_size)
2030 {
2031 if (dump_enabled_p ())
2032 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2033 "Build SLP failed: store group "
2034 "size not a multiple of the vector size "
2035 "in basic block SLP\n");
6e2dd807 2036 vect_free_slp_tree (node, false);
4b6068ea
RS
2037 loads.release ();
2038 return false;
2039 }
e569db5f 2040 /* Fatal mismatch. */
4b6068ea 2041 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2042 vect_free_slp_tree (node, false);
e569db5f
VK
2043 loads.release ();
2044 }
2045 else
2046 {
4ef69dfc
IR
2047 /* Create a new SLP instance. */
2048 new_instance = XNEW (struct _slp_instance);
2049 SLP_INSTANCE_TREE (new_instance) = node;
2050 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2051 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2052 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2053
2054 /* Compute the load permutation. */
2055 slp_tree load_node;
2056 bool loads_permuted = false;
abf9bfbc
RB
2057 FOR_EACH_VEC_ELT (loads, i, load_node)
2058 {
01d8bf07 2059 vec<unsigned> load_permutation;
abf9bfbc 2060 int j;
b9787581 2061 stmt_vec_info load_info;
01d8bf07
RB
2062 bool this_load_permuted = false;
2063 load_permutation.create (group_size);
bffb8014 2064 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
b9787581
RS
2065 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2066 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
abf9bfbc 2067 {
b9787581 2068 int load_place = vect_get_place_in_interleaving_chain
bffb8014 2069 (load_info, first_stmt_info);
6983e6b5
RB
2070 gcc_assert (load_place != -1);
2071 if (load_place != j)
01d8bf07 2072 this_load_permuted = true;
abf9bfbc
RB
2073 load_permutation.safe_push (load_place);
2074 }
fe2bef71
RB
2075 if (!this_load_permuted
2076 /* The load requires permutation when unrolling exposes
2077 a gap either because the group is larger than the SLP
2078 group-size or because there is a gap between the groups. */
d9f21f6a 2079 && (known_eq (unrolling_factor, 1U)
bffb8014
RS
2080 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2081 && DR_GROUP_GAP (first_stmt_info) == 0)))
01d8bf07
RB
2082 {
2083 load_permutation.release ();
2084 continue;
2085 }
2086 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2087 loads_permuted = true;
abf9bfbc 2088 }
6aa904c4
IR
2089
2090 if (loads_permuted)
ebfd146a 2091 {
01d8bf07 2092 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2093 {
73fbfcad 2094 if (dump_enabled_p ())
ebfd146a 2095 {
e645e942 2096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2097 "Build SLP failed: unsupported load "
2098 "permutation ");
e569db5f
VK
2099 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2100 TDF_SLIM, stmt, 0);
ebfd146a 2101 }
6e2dd807 2102 vect_free_slp_instance (new_instance, false);
ebfd146a
IR
2103 return false;
2104 }
ebfd146a 2105 }
ebfd146a 2106
e569db5f 2107 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2108 instructions do not generate this SLP instance. */
2109 if (is_a <loop_vec_info> (vinfo)
2110 && loads_permuted
7e11fc7f 2111 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
2112 {
2113 slp_tree load_node;
2114 FOR_EACH_VEC_ELT (loads, i, load_node)
2115 {
bffb8014 2116 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
b9787581 2117 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
bffb8014 2118 /* Use SLP for strided accesses (or if we can't load-lanes). */
bb0f5ca7
AL
2119 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2120 || ! vect_load_lanes_supported
2121 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 2122 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
2123 break;
2124 }
2125 if (i == loads.length ())
2126 {
2127 if (dump_enabled_p ())
2128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2129 "Built SLP cancelled: can use "
2130 "load/store-lanes\n");
6e2dd807 2131 vect_free_slp_instance (new_instance, false);
bb0f5ca7
AL
2132 return false;
2133 }
2134 }
2135
310213d4 2136 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2137
73fbfcad 2138 if (dump_enabled_p ())
c2a12ca0
RB
2139 {
2140 dump_printf_loc (MSG_NOTE, vect_location,
2141 "Final SLP tree for instance:\n");
2142 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2143 }
ebfd146a
IR
2144
2145 return true;
2146 }
e569db5f
VK
2147 }
2148 else
2149 {
ebfd146a
IR
2150 /* Failed to SLP. */
2151 /* Free the allocated memory. */
e403d17e 2152 scalar_stmts.release ();
9771b263 2153 loads.release ();
e569db5f 2154 }
b8698a0f 2155
1ba91a49 2156 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2157 vector size. */
4b6068ea 2158 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2159 if (is_a <bb_vec_info> (vinfo)
4b6068ea 2160 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2c53b149 2161 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4b6068ea 2162 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2163 {
2164 /* We consider breaking the group only on VF boundaries from the existing
2165 start. */
2166 for (i = 0; i < group_size; i++)
2167 if (!matches[i]) break;
2168
4b6068ea 2169 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2170 {
2171 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2172 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2173 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49
AL
2174
2175 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2176 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2177 /* If the first non-match was in the middle of a vector,
2178 skip the rest of that vector. */
2179 if (group1_size < i)
2180 {
4b6068ea 2181 i = group1_size + const_nunits;
1ba91a49 2182 if (i < group_size)
4b6068ea 2183 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2184 }
2185 if (i < group_size)
2186 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2187 return res;
2188 }
2189 /* Even though the first vector did not all match, we might be able to SLP
2190 (some) of the remainder. FORNOW ignore this possibility. */
2191 }
2192
a70d6342 2193 return false;
ebfd146a
IR
2194}
2195
2196
ff802fa1 2197/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2198 trees of packed scalar stmts if SLP is possible. */
2199
2200bool
310213d4 2201vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2202{
2203 unsigned int i;
f698fccf 2204 stmt_vec_info first_element;
ebfd146a 2205
adac3a68 2206 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2207
0d0293ac 2208 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2209 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2210 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2211
310213d4 2212 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2213 {
310213d4
RB
2214 if (loop_vinfo->reduction_chains.length () > 0)
2215 {
2216 /* Find SLP sequences starting from reduction chains. */
2217 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2218 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2219 max_tree_size))
6b5e165b
RB
2220 {
2221 /* Dissolve reduction chain group. */
f698fccf
RS
2222 stmt_vec_info vinfo = first_element;
2223 while (vinfo)
6b5e165b 2224 {
bffb8014 2225 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2226 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2227 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
f698fccf 2228 vinfo = next;
6b5e165b 2229 }
f698fccf 2230 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
6b5e165b 2231 }
310213d4 2232 }
b010117a 2233
310213d4 2234 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2235 if (loop_vinfo->reductions.length () > 1)
2236 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2237 max_tree_size);
310213d4 2238 }
b5aeb3bb 2239
ebfd146a
IR
2240 return true;
2241}
2242
2243
2244/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2245 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2246 least one instance. */
ebfd146a 2247
437f4a00 2248bool
ebfd146a
IR
2249vect_make_slp_decision (loop_vec_info loop_vinfo)
2250{
d9f21f6a
RS
2251 unsigned int i;
2252 poly_uint64 unrolling_factor = 1;
9771b263 2253 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2254 slp_instance instance;
2255 int decided_to_slp = 0;
2256
adac3a68 2257 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2258
9771b263 2259 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2260 {
2261 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2262 /* All unroll factors have the form current_vector_size * X for some
2263 rational X, so they must have a common multiple. */
2264 unrolling_factor
2265 = force_common_multiple (unrolling_factor,
2266 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2267
ff802fa1 2268 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2269 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2270 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2271 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2272 decided_to_slp++;
2273 }
2274
2275 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2276
73fbfcad 2277 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2278 {
2279 dump_printf_loc (MSG_NOTE, vect_location,
2280 "Decided to SLP %d instances. Unrolling factor ",
2281 decided_to_slp);
2282 dump_dec (MSG_NOTE, unrolling_factor);
2283 dump_printf (MSG_NOTE, "\n");
2284 }
437f4a00
IR
2285
2286 return (decided_to_slp > 0);
ebfd146a
IR
2287}
2288
2289
2290/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2291 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2292
2293static void
642fce57 2294vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2295{
b9787581 2296 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2297 imm_use_iterator imm_iter;
355fe088 2298 gimple *use_stmt;
b9787581 2299 stmt_vec_info use_vinfo;
d755c7ef 2300 slp_tree child;
f2c74cc4 2301 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2302 int j;
2303
2304 /* Propagate hybrid down the SLP tree. */
2305 if (stype == hybrid)
2306 ;
2307 else if (HYBRID_SLP_STMT (stmt_vinfo))
2308 stype = hybrid;
2309 else
2310 {
2311 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2312 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2313 /* If we get a pattern stmt here we have to use the LHS of the
2314 original stmt for immediate uses. */
b9787581 2315 gimple *stmt = stmt_vinfo->stmt;
2935d994
RB
2316 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2317 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
10681ce8 2318 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo)->stmt;
e7baeb39
RB
2319 tree def;
2320 if (gimple_code (stmt) == GIMPLE_PHI)
2321 def = gimple_phi_result (stmt);
2322 else
2323 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2324 if (def)
2325 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2326 {
6585ff8f
RS
2327 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2328 if (!use_vinfo)
29764870 2329 continue;
29764870
RB
2330 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2331 && STMT_VINFO_RELATED_STMT (use_vinfo))
10681ce8 2332 use_vinfo = STMT_VINFO_RELATED_STMT (use_vinfo);
29764870
RB
2333 if (!STMT_SLP_TYPE (use_vinfo)
2334 && (STMT_VINFO_RELEVANT (use_vinfo)
2335 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2336 && !(gimple_code (use_stmt) == GIMPLE_PHI
2337 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2338 {
2339 if (dump_enabled_p ())
2340 {
2341 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2342 "def in non-SLP stmt: ");
2343 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2344 }
2345 stype = hybrid;
2346 }
29764870 2347 }
642fce57 2348 }
ebfd146a 2349
502f0263
RB
2350 if (stype == hybrid
2351 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2352 {
2353 if (dump_enabled_p ())
2354 {
2355 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
b9787581 2356 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
b1af7da6
RB
2357 }
2358 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2359 }
ebfd146a 2360
642fce57 2361 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2362 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2363 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2364}
f2c74cc4 2365
642fce57 2366/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2367
642fce57
RB
2368static tree
2369vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2370{
2371 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2372 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2373
2374 if (wi->is_lhs)
2375 return NULL_TREE;
2376
c98d0595
RS
2377 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2378 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2379 {
6585ff8f 2380 if (dump_enabled_p ())
b1af7da6 2381 {
6585ff8f
RS
2382 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
b1af7da6 2384 }
6585ff8f 2385 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2386 }
2387
2388 return NULL_TREE;
ebfd146a
IR
2389}
2390
642fce57
RB
2391static tree
2392vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2393 walk_stmt_info *wi)
642fce57 2394{
6585ff8f
RS
2395 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2396 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2397 /* If the stmt is in a SLP instance then this isn't a reason
2398 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2399 if (! STMT_SLP_TYPE (use_vinfo)
2400 && (STMT_VINFO_RELEVANT (use_vinfo)
2401 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2402 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2403 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2404 ;
2405 else
642fce57
RB
2406 *handled = true;
2407 return NULL_TREE;
2408}
ebfd146a
IR
2409
2410/* Find stmts that must be both vectorized and SLPed. */
2411
2412void
2413vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2414{
2415 unsigned int i;
9771b263 2416 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2417 slp_instance instance;
2418
adac3a68 2419 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2420
642fce57
RB
2421 /* First walk all pattern stmt in the loop and mark defs of uses as
2422 hybrid because immediate uses in them are not recorded. */
2423 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2424 {
2425 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2426 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2427 gsi_next (&gsi))
2428 {
355fe088 2429 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2430 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2431 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2432 {
2433 walk_stmt_info wi;
2434 memset (&wi, 0, sizeof (wi));
6585ff8f 2435 wi.info = loop_vinfo;
642fce57 2436 gimple_stmt_iterator gsi2
10681ce8 2437 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2438 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2439 vect_detect_hybrid_slp_1, &wi);
2440 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2441 vect_detect_hybrid_slp_2,
2442 vect_detect_hybrid_slp_1, &wi);
2443 }
2444 }
2445 }
2446
2447 /* Then walk the SLP instance trees marking stmts with uses in
2448 non-SLP stmts as hybrid, also propagating hybrid down the
2449 SLP tree, collecting the above info on-the-fly. */
9771b263 2450 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2451 {
2452 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2453 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2454 i, pure_slp);
2455 }
ebfd146a
IR
2456}
2457
a70d6342 2458
2c515559
RS
2459/* Initialize a bb_vec_info struct for the statements between
2460 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2461
2462_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2463 gimple_stmt_iterator region_end_in,
2464 vec_info_shared *shared)
2465 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2466 bb (gsi_bb (region_begin_in)),
2467 region_begin (region_begin_in),
2468 region_end (region_end_in)
a70d6342 2469{
a70d6342
IR
2470 gimple_stmt_iterator gsi;
2471
61d371eb
RB
2472 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2473 gsi_next (&gsi))
a70d6342 2474 {
355fe088 2475 gimple *stmt = gsi_stmt (gsi);
a70d6342 2476 gimple_set_uid (stmt, 0);
4fbeb363 2477 add_stmt (stmt);
a70d6342
IR
2478 }
2479
2c515559 2480 bb->aux = this;
a70d6342
IR
2481}
2482
2483
2484/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2485 stmts in the basic block. */
2486
2c515559 2487_bb_vec_info::~_bb_vec_info ()
a70d6342 2488{
2c515559
RS
2489 for (gimple_stmt_iterator si = region_begin;
2490 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2491 {
355fe088 2492 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2493 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2494
2495 if (stmt_info)
2496 /* Free stmt_vec_info. */
2497 free_stmt_vec_info (stmt);
61d371eb
RB
2498
2499 /* Reset region marker. */
2500 gimple_set_uid (stmt, -1);
a70d6342
IR
2501 }
2502
2c515559 2503 bb->aux = NULL;
a70d6342
IR
2504}
2505
15944069
RS
2506/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2507 given then that child nodes have already been processed, and that
2508 their def types currently match their SLP node's def type. */
a70d6342
IR
2509
2510static bool
15944069
RS
2511vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2512 slp_instance node_instance,
2513 stmt_vector_for_cost *cost_vec)
a70d6342 2514{
b9787581
RS
2515 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2516 gimple *stmt = stmt_info->stmt;
bd2f172f
RB
2517 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2518
2519 /* For BB vectorization vector types are assigned here.
2520 Memory accesses already got their vector type assigned
2521 in vect_analyze_data_refs. */
2522 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2523 if (bb_vinfo
2524 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2525 {
1f3cb663
RS
2526 tree vectype, nunits_vectype;
2527 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2528 &nunits_vectype))
2529 /* We checked this when building the node. */
2530 gcc_unreachable ();
2531 if (vectype == boolean_type_node)
bd2f172f 2532 {
1f3cb663
RS
2533 vectype = vect_get_mask_type_for_stmt (stmt_info);
2534 if (!vectype)
2535 /* vect_get_mask_type_for_stmt has already explained the
2536 failure. */
2537 return false;
bd2f172f
RB
2538 }
2539
b9787581 2540 stmt_vec_info sstmt_info;
15944069 2541 unsigned int i;
b9787581
RS
2542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2543 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2544 }
2545
8b7e9dba
RS
2546 /* Calculate the number of vector statements to be created for the
2547 scalar stmts in this node. For SLP reductions it is equal to the
2548 number of vector statements in the children (which has already been
2549 calculated by the recursive call). Otherwise it is the number of
2c53b149 2550 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2551 VF divided by the number of elements in a vector. */
2c53b149
RB
2552 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2553 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2554 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2555 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2556 else
2557 {
d9f21f6a 2558 poly_uint64 vf;
8b7e9dba
RS
2559 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2560 vf = loop_vinfo->vectorization_factor;
2561 else
2562 vf = 1;
2563 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2564 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2565 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2566 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2567 }
2568
15944069
RS
2569 bool dummy;
2570 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2571}
2572
2573/* Analyze statements contained in SLP tree NODE after recursively analyzing
2574 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2575
2576 Return true if the operations are supported. */
2577
2578static bool
2579vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2580 slp_instance node_instance,
2581 scalar_stmts_to_slp_tree_map_t *visited,
2582 scalar_stmts_to_slp_tree_map_t *lvisited,
2583 stmt_vector_for_cost *cost_vec)
2584{
2585 int i, j;
2586 slp_tree child;
2587
2588 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2589 return true;
2590
2591 /* If we already analyzed the exact same set of scalar stmts we're done.
2592 We share the generated vector stmts for those. */
2593 slp_tree *leader;
2594 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2595 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2596 {
2597 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2598 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2599 return true;
2600 }
2601
2602 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2603 doesn't result in any issue since we throw away the lvisited set
2604 when we fail. */
2605 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2606
2607 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2608 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2609 visited, lvisited, cost_vec))
2610 return false;
2611
bd2f172f
RB
2612 /* Push SLP node def-type to stmt operands. */
2613 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2614 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2615 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2616 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2617 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2618 cost_vec);
bd2f172f
RB
2619 /* Restore def-types. */
2620 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2621 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2622 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f
RB
2623 = vect_internal_def;
2624 if (! res)
2625 return false;
2626
2627 return true;
a70d6342
IR
2628}
2629
2630
8b7e9dba 2631/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2632 operations are supported. */
2633
a12e42fc 2634bool
8b7e9dba 2635vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2636{
a70d6342
IR
2637 slp_instance instance;
2638 int i;
2639
adac3a68 2640 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2641
68435eb2
RB
2642 scalar_stmts_to_slp_tree_map_t *visited
2643 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2644 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2645 {
68435eb2
RB
2646 scalar_stmts_to_slp_tree_map_t lvisited;
2647 stmt_vector_for_cost cost_vec;
2648 cost_vec.create (2);
8b7e9dba
RS
2649 if (!vect_slp_analyze_node_operations (vinfo,
2650 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2651 instance, visited, &lvisited,
2652 &cost_vec))
a70d6342 2653 {
b9787581
RS
2654 slp_tree node = SLP_INSTANCE_TREE (instance);
2655 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a12e42fc
RB
2656 dump_printf_loc (MSG_NOTE, vect_location,
2657 "removing SLP instance operations starting from: ");
b9787581 2658 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
6e2dd807 2659 vect_free_slp_instance (instance, false);
8b7e9dba 2660 vinfo->slp_instances.ordered_remove (i);
68435eb2 2661 cost_vec.release ();
a70d6342
IR
2662 }
2663 else
68435eb2
RB
2664 {
2665 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2666 x != lvisited.end(); ++x)
2667 visited->put ((*x).first.copy (), (*x).second);
2668 i++;
78604de0 2669
68435eb2
RB
2670 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2671 cost_vec.release ();
2672 }
2673 }
78604de0
RB
2674 delete visited;
2675
8b7e9dba 2676 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2677}
2678
6eddf228
RB
2679
2680/* Compute the scalar cost of the SLP node NODE and its children
2681 and return it. Do not account defs that are marked in LIFE and
2682 update LIFE according to uses of NODE. */
2683
a296d6d3 2684static void
292cba13 2685vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2686 slp_tree node, vec<bool, va_heap> *life,
2687 stmt_vector_for_cost *cost_vec)
6eddf228 2688{
6eddf228 2689 unsigned i;
b9787581 2690 stmt_vec_info stmt_info;
6eddf228
RB
2691 slp_tree child;
2692
b9787581 2693 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2694 {
b9787581 2695 gimple *stmt = stmt_info->stmt;
6eddf228
RB
2696 ssa_op_iter op_iter;
2697 def_operand_p def_p;
6eddf228 2698
ff4c81cc 2699 if ((*life)[i])
6eddf228
RB
2700 continue;
2701
2702 /* If there is a non-vectorized use of the defs then the scalar
2703 stmt is kept live in which case we do not account it or any
2704 required defs in the SLP children in the scalar cost. This
2705 way we make the vectorization more costly when compared to
2706 the scalar cost. */
2707 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2708 {
2709 imm_use_iterator use_iter;
355fe088 2710 gimple *use_stmt;
6eddf228 2711 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2712 if (!is_gimple_debug (use_stmt)
b9787581 2713 && (! vect_stmt_in_region_p (stmt_info->vinfo, use_stmt)
603cca93 2714 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2715 {
ff4c81cc 2716 (*life)[i] = true;
6eddf228
RB
2717 BREAK_FROM_IMM_USE_STMT (use_iter);
2718 }
2719 }
ff4c81cc 2720 if ((*life)[i])
6eddf228
RB
2721 continue;
2722
b555a2e4
RB
2723 /* Count scalar stmts only once. */
2724 if (gimple_visited_p (stmt))
2725 continue;
2726 gimple_set_visited (stmt, true);
2727
a296d6d3 2728 vect_cost_for_stmt kind;
6eddf228
RB
2729 if (STMT_VINFO_DATA_REF (stmt_info))
2730 {
2731 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2732 kind = scalar_load;
6eddf228 2733 else
a296d6d3 2734 kind = scalar_store;
6eddf228
RB
2735 }
2736 else
a296d6d3
RB
2737 kind = scalar_stmt;
2738 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2739 }
2740
faa5399b 2741 auto_vec<bool, 20> subtree_life;
6eddf228 2742 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2743 {
2744 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2745 {
2746 /* Do not directly pass LIFE to the recursive call, copy it to
2747 confine changes in the callee to the current child/subtree. */
2748 subtree_life.safe_splice (*life);
a296d6d3 2749 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2750 subtree_life.truncate (0);
2751 }
2752 }
6eddf228
RB
2753}
2754
69f11a13
IR
2755/* Check if vectorization of the basic block is profitable. */
2756
2757static bool
2758vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2759{
9771b263 2760 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2761 slp_instance instance;
1a4b99c1 2762 int i;
c3e7ee41 2763 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2764 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2765
2766 /* Calculate scalar cost. */
a296d6d3
RB
2767 stmt_vector_for_cost scalar_costs;
2768 scalar_costs.create (0);
6eddf228 2769 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2770 {
00f96dc9 2771 auto_vec<bool, 20> life;
ff4c81cc 2772 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2773 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2774 SLP_INSTANCE_TREE (instance),
2775 &life, &scalar_costs);
2776 }
2777 void *target_cost_data = init_cost (NULL);
68435eb2 2778 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2779 scalar_costs.release ();
2780 unsigned dummy;
2781 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2782 destroy_cost_data (target_cost_data);
69f11a13 2783
b555a2e4
RB
2784 /* Unset visited flag. */
2785 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2786 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2787 gimple_set_visited (gsi_stmt (gsi), false);
2788
c3e7ee41 2789 /* Complete the target-specific cost calculation. */
92345349
BS
2790 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2791 &vec_inside_cost, &vec_epilogue_cost);
2792
2793 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2794
73fbfcad 2795 if (dump_enabled_p ())
69f11a13 2796 {
78c60e3d
SS
2797 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2798 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2799 vec_inside_cost);
2800 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2801 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2802 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2803 }
2804
a6524bba
RB
2805 /* Vectorization is profitable if its cost is more than the cost of scalar
2806 version. Note that we err on the vector side for equal cost because
2807 the cost estimate is otherwise quite pessimistic (constant uses are
2808 free on the scalar side but cost a load on the vector side for
2809 example). */
2810 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2811 return false;
2812
2813 return true;
2814}
2815
a5b50aa1
RB
2816/* Check if the basic block can be vectorized. Returns a bb_vec_info
2817 if so and sets fatal to true if failure is independent of
2818 current_vector_size. */
a70d6342 2819
8e19f5a1 2820static bb_vec_info
61d371eb
RB
2821vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2822 gimple_stmt_iterator region_end,
a5b50aa1 2823 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2824 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2825{
2826 bb_vec_info bb_vinfo;
a70d6342 2827 slp_instance instance;
8e19f5a1 2828 int i;
d9f21f6a 2829 poly_uint64 min_vf = 2;
e4a707c4 2830
a5b50aa1
RB
2831 /* The first group of checks is independent of the vector size. */
2832 fatal = true;
2833
61d371eb
RB
2834 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2835 {
2836 if (dump_enabled_p ())
2837 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2838 "not vectorized: too many instructions in "
2839 "basic block.\n");
2840 free_data_refs (datarefs);
2841 return NULL;
2842 }
2843
ca823c85 2844 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2845 if (!bb_vinfo)
2846 return NULL;
2847
61d371eb 2848 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2849 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2850
2851 /* Analyze the data references. */
2852
2853 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2854 {
73fbfcad 2855 if (dump_enabled_p ())
78c60e3d
SS
2856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2857 "not vectorized: unhandled data-ref in basic "
2858 "block.\n");
b8698a0f 2859
2c515559 2860 delete bb_vinfo;
a70d6342
IR
2861 return NULL;
2862 }
2863
fcac74a1 2864 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2865 {
73fbfcad 2866 if (dump_enabled_p ())
78c60e3d
SS
2867 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2868 "not vectorized: not enough data-refs in "
2869 "basic block.\n");
a70d6342 2870
2c515559 2871 delete bb_vinfo;
a70d6342
IR
2872 return NULL;
2873 }
2874
310213d4 2875 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2876 {
2877 if (dump_enabled_p ())
2878 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2879 "not vectorized: unhandled data access in "
2880 "basic block.\n");
2881
2c515559 2882 delete bb_vinfo;
5abe1e05
RB
2883 return NULL;
2884 }
2885
a5b50aa1
RB
2886 /* If there are no grouped stores in the region there is no need
2887 to continue with pattern recog as vect_analyze_slp will fail
2888 anyway. */
2889 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2890 {
73fbfcad 2891 if (dump_enabled_p ())
a5b50aa1
RB
2892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2893 "not vectorized: no grouped stores in "
2894 "basic block.\n");
b8698a0f 2895
2c515559 2896 delete bb_vinfo;
a70d6342
IR
2897 return NULL;
2898 }
b8698a0f 2899
a5b50aa1
RB
2900 /* While the rest of the analysis below depends on it in some way. */
2901 fatal = false;
2902
2903 vect_pattern_recog (bb_vinfo);
2904
a70d6342
IR
2905 /* Check the SLP opportunities in the basic block, analyze and build SLP
2906 trees. */
310213d4 2907 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2908 {
73fbfcad 2909 if (dump_enabled_p ())
effb52da
RB
2910 {
2911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2912 "Failed to SLP the basic block.\n");
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2914 "not vectorized: failed to find SLP opportunities "
2915 "in basic block.\n");
2916 }
a70d6342 2917
2c515559 2918 delete bb_vinfo;
a70d6342
IR
2919 return NULL;
2920 }
b8698a0f 2921
62c8a2cf
RS
2922 vect_record_base_alignments (bb_vinfo);
2923
c2a12ca0
RB
2924 /* Analyze and verify the alignment of data references and the
2925 dependence in the SLP instances. */
a5b50aa1
RB
2926 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2927 {
c2a12ca0
RB
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2929 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 2930 {
b9787581
RS
2931 slp_tree node = SLP_INSTANCE_TREE (instance);
2932 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a5b50aa1
RB
2933 dump_printf_loc (MSG_NOTE, vect_location,
2934 "removing SLP instance operations starting from: ");
b9787581 2935 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
6e2dd807 2936 vect_free_slp_instance (instance, false);
a5b50aa1
RB
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2938 continue;
2939 }
c2a12ca0
RB
2940
2941 /* Mark all the statements that we want to vectorize as pure SLP and
2942 relevant. */
2943 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2944 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2945
a5b50aa1
RB
2946 i++;
2947 }
a5b50aa1
RB
2948 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2949 {
2c515559 2950 delete bb_vinfo;
a5b50aa1
RB
2951 return NULL;
2952 }
2953
8b7e9dba 2954 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2955 {
73fbfcad 2956 if (dump_enabled_p ())
e645e942 2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2958 "not vectorized: bad operation in basic block.\n");
a70d6342 2959
2c515559 2960 delete bb_vinfo;
a70d6342
IR
2961 return NULL;
2962 }
2963
69f11a13 2964 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2965 if (!unlimited_cost_model (NULL)
69f11a13
IR
2966 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2967 {
73fbfcad 2968 if (dump_enabled_p ())
78c60e3d
SS
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2970 "not vectorized: vectorization is not "
2971 "profitable.\n");
69f11a13 2972
2c515559 2973 delete bb_vinfo;
69f11a13
IR
2974 return NULL;
2975 }
2976
73fbfcad 2977 if (dump_enabled_p ())
78c60e3d
SS
2978 dump_printf_loc (MSG_NOTE, vect_location,
2979 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2980
2981 return bb_vinfo;
2982}
2983
2984
428db0ba
RB
2985/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2986 true if anything in the basic-block was vectorized. */
2987
2988bool
2989vect_slp_bb (basic_block bb)
8e19f5a1
IR
2990{
2991 bb_vec_info bb_vinfo;
8e19f5a1 2992 gimple_stmt_iterator gsi;
61d371eb 2993 bool any_vectorized = false;
86e36728 2994 auto_vector_sizes vector_sizes;
8e19f5a1 2995
adac3a68 2996 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 2997
8e19f5a1
IR
2998 /* Autodetect first vector size we try. */
2999 current_vector_size = 0;
86e36728
RS
3000 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3001 unsigned int next_size = 0;
8e19f5a1 3002
61d371eb
RB
3003 gsi = gsi_start_bb (bb);
3004
86e36728 3005 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3006 while (1)
3007 {
61d371eb
RB
3008 if (gsi_end_p (gsi))
3009 break;
3010
3011 gimple_stmt_iterator region_begin = gsi;
3012 vec<data_reference_p> datarefs = vNULL;
3013 int insns = 0;
3014
3015 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3016 {
61d371eb
RB
3017 gimple *stmt = gsi_stmt (gsi);
3018 if (is_gimple_debug (stmt))
3019 continue;
3020 insns++;
3021
3022 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 3023 vect_location = stmt;
61d371eb 3024
8e846c66 3025 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
3026 break;
3027 }
3028
3029 /* Skip leading unhandled stmts. */
3030 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3031 {
3032 gsi_next (&gsi);
3033 continue;
3034 }
428db0ba 3035
61d371eb
RB
3036 gimple_stmt_iterator region_end = gsi;
3037
3038 bool vectorized = false;
a5b50aa1 3039 bool fatal = false;
ca823c85 3040 vec_info_shared shared;
61d371eb 3041 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 3042 datarefs, insns, fatal, &shared);
61d371eb
RB
3043 if (bb_vinfo
3044 && dbg_cnt (vect_slp))
3045 {
428db0ba 3046 if (dump_enabled_p ())
61d371eb 3047 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 3048
ca823c85 3049 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3050 vect_schedule_slp (bb_vinfo);
3051
d1ac60d5
RB
3052 unsigned HOST_WIDE_INT bytes;
3053 if (current_vector_size.is_constant (&bytes))
3054 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3055 "basic block part vectorized using "
3056 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3057 "vectors\n", bytes);
3058 else
3059 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3060 "basic block part vectorized using variable "
3061 "length vectors\n");
428db0ba 3062
61d371eb 3063 vectorized = true;
428db0ba 3064 }
2c515559 3065 delete bb_vinfo;
8e19f5a1 3066
61d371eb 3067 any_vectorized |= vectorized;
8e19f5a1 3068
86e36728
RS
3069 if (next_size == 0)
3070 autodetected_vector_size = current_vector_size;
3071
3072 if (next_size < vector_sizes.length ()
3073 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3074 next_size += 1;
3075
61d371eb 3076 if (vectorized
86e36728
RS
3077 || next_size == vector_sizes.length ()
3078 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3079 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3080 vector sizes will fail do not bother iterating. */
3081 || fatal)
61d371eb
RB
3082 {
3083 if (gsi_end_p (region_end))
3084 break;
8e19f5a1 3085
61d371eb
RB
3086 /* Skip the unhandled stmt. */
3087 gsi_next (&gsi);
3088
3089 /* And reset vector sizes. */
3090 current_vector_size = 0;
86e36728 3091 next_size = 0;
61d371eb
RB
3092 }
3093 else
3094 {
3095 /* Try the next biggest vector size. */
86e36728 3096 current_vector_size = vector_sizes[next_size++];
61d371eb 3097 if (dump_enabled_p ())
86e36728
RS
3098 {
3099 dump_printf_loc (MSG_NOTE, vect_location,
3100 "***** Re-trying analysis with "
3101 "vector size ");
3102 dump_dec (MSG_NOTE, current_vector_size);
3103 dump_printf (MSG_NOTE, "\n");
3104 }
61d371eb
RB
3105
3106 /* Start over. */
3107 gsi = region_begin;
3108 }
8e19f5a1 3109 }
61d371eb
RB
3110
3111 return any_vectorized;
8e19f5a1
IR
3112}
3113
3114
e4af0bc4
IE
3115/* Return 1 if vector type of boolean constant which is OPNUM
3116 operand in statement STMT is a boolean vector. */
3117
3118static bool
3119vect_mask_constant_operand_p (gimple *stmt, int opnum)
3120{
3121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3122 enum tree_code code = gimple_expr_code (stmt);
3123 tree op, vectype;
e4af0bc4
IE
3124 enum vect_def_type dt;
3125
3126 /* For comparison and COND_EXPR type is chosen depending
3127 on the other comparison operand. */
3128 if (TREE_CODE_CLASS (code) == tcc_comparison)
3129 {
3130 if (opnum)
3131 op = gimple_assign_rhs1 (stmt);
3132 else
3133 op = gimple_assign_rhs2 (stmt);
3134
894dd753 3135 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3136 gcc_unreachable ();
3137
3138 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3139 }
3140
3141 if (code == COND_EXPR)
3142 {
3143 tree cond = gimple_assign_rhs1 (stmt);
3144
3145 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3146 op = cond;
3147 else if (opnum)
e4af0bc4
IE
3148 op = TREE_OPERAND (cond, 1);
3149 else
3150 op = TREE_OPERAND (cond, 0);
3151
894dd753 3152 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3153 gcc_unreachable ();
3154
3155 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3156 }
3157
3158 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3159}
3160
018b2744
RS
3161/* Build a variable-length vector in which the elements in ELTS are repeated
3162 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3163 RESULTS and add any new instructions to SEQ.
3164
3165 The approach we use is:
3166
3167 (1) Find a vector mode VM with integer elements of mode IM.
3168
3169 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3170 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3171 from small vectors to IM.
3172
3173 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3174
3175 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3176 correct byte contents.
3177
3178 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3179
3180 We try to find the largest IM for which this sequence works, in order
3181 to cut down on the number of interleaves. */
3182
f1739b48 3183void
018b2744
RS
3184duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3185 unsigned int nresults, vec<tree> &results)
3186{
3187 unsigned int nelts = elts.length ();
3188 tree element_type = TREE_TYPE (vector_type);
3189
3190 /* (1) Find a vector mode VM with integer elements of mode IM. */
3191 unsigned int nvectors = 1;
3192 tree new_vector_type;
3193 tree permutes[2];
3194 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3195 &nvectors, &new_vector_type,
3196 permutes))
3197 gcc_unreachable ();
3198
3199 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3200 unsigned int partial_nelts = nelts / nvectors;
3201 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3202
3203 tree_vector_builder partial_elts;
3204 auto_vec<tree, 32> pieces (nvectors * 2);
3205 pieces.quick_grow (nvectors * 2);
3206 for (unsigned int i = 0; i < nvectors; ++i)
3207 {
3208 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3209 ELTS' has mode IM. */
3210 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3211 for (unsigned int j = 0; j < partial_nelts; ++j)
3212 partial_elts.quick_push (elts[i * partial_nelts + j]);
3213 tree t = gimple_build_vector (seq, &partial_elts);
3214 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3215 TREE_TYPE (new_vector_type), t);
3216
3217 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3218 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3219 }
3220
3221 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3222 correct byte contents.
3223
3224 We need to repeat the following operation log2(nvectors) times:
3225
3226 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3227 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3228
3229 However, if each input repeats every N elements and the VF is
3230 a multiple of N * 2, the HI result is the same as the LO. */
3231 unsigned int in_start = 0;
3232 unsigned int out_start = nvectors;
3233 unsigned int hi_start = nvectors / 2;
3234 /* A bound on the number of outputs needed to produce NRESULTS results
3235 in the final iteration. */
3236 unsigned int noutputs_bound = nvectors * nresults;
3237 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3238 {
3239 noutputs_bound /= 2;
3240 unsigned int limit = MIN (noutputs_bound, nvectors);
3241 for (unsigned int i = 0; i < limit; ++i)
3242 {
3243 if ((i & 1) != 0
3244 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3245 2 * in_repeat))
3246 {
3247 pieces[out_start + i] = pieces[out_start + i - 1];
3248 continue;
3249 }
3250
3251 tree output = make_ssa_name (new_vector_type);
3252 tree input1 = pieces[in_start + (i / 2)];
3253 tree input2 = pieces[in_start + (i / 2) + hi_start];
3254 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3255 input1, input2,
3256 permutes[i & 1]);
3257 gimple_seq_add_stmt (seq, stmt);
3258 pieces[out_start + i] = output;
3259 }
3260 std::swap (in_start, out_start);
3261 }
3262
3263 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3264 results.reserve (nresults);
3265 for (unsigned int i = 0; i < nresults; ++i)
3266 if (i < nvectors)
3267 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3268 pieces[in_start + i]));
3269 else
3270 results.quick_push (results[i - nvectors]);
3271}
3272
e4af0bc4 3273
b8698a0f
L
3274/* For constant and loop invariant defs of SLP_NODE this function returns
3275 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3276 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3277 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3278 REDUC_INDEX is the index of the reduction operand in the statements, unless
3279 it is -1. */
ebfd146a
IR
3280
3281static void
9dc3f7de 3282vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3283 vec<tree> *vec_oprnds,
306b0c92 3284 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3285{
b9787581
RS
3286 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3287 stmt_vec_info stmt_vinfo = stmts[0];
3288 gimple *stmt = stmt_vinfo->stmt;
018b2744 3289 unsigned HOST_WIDE_INT nunits;
ebfd146a 3290 tree vec_cst;
d2a12ae7 3291 unsigned j, number_of_places_left_in_vector;
ebfd146a 3292 tree vector_type;
9dc3f7de 3293 tree vop;
9771b263 3294 int group_size = stmts.length ();
ebfd146a 3295 unsigned int vec_num, i;
d2a12ae7 3296 unsigned number_of_copies = 1;
9771b263
DN
3297 vec<tree> voprnds;
3298 voprnds.create (number_of_vectors);
ebfd146a 3299 bool constant_p, is_store;
b5aeb3bb 3300 tree neutral_op = NULL;
bac430c9 3301 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3302 gimple_seq ctor_seq = NULL;
018b2744 3303 auto_vec<tree, 16> permute_results;
b5aeb3bb 3304
42fd8198 3305 /* Check if vector type is a boolean vector. */
2568d8a1 3306 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
b9787581 3307 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
42fd8198
IE
3308 vector_type
3309 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3310 else
3311 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3312
ebfd146a
IR
3313 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3314 {
3315 is_store = true;
3316 op = gimple_assign_rhs1 (stmt);
3317 }
3318 else
9dc3f7de
IR
3319 is_store = false;
3320
3321 gcc_assert (op);
ebfd146a 3322
ebfd146a 3323 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3324 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3325
3326 For example, we have two scalar operands, s1 and s2 (e.g., group of
3327 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3328 of this type can be packed in a vector). The output vector will contain
3329 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3330 will be 2).
3331
b8698a0f 3332 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3333 containing the operands.
3334
3335 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3336 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3337 {s5, s6, s7, s8}. */
b8698a0f 3338
018b2744
RS
3339 /* When using duplicate_and_interleave, we just need one element for
3340 each scalar statement. */
3341 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3342 nunits = group_size;
3343
14a61437 3344 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3345
3346 number_of_places_left_in_vector = nunits;
62cf7335 3347 constant_p = true;
5ebaa477 3348 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3349 elts.quick_grow (nunits);
90dd6e3d 3350 bool place_after_defs = false;
ebfd146a
IR
3351 for (j = 0; j < number_of_copies; j++)
3352 {
b9787581 3353 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
ebfd146a 3354 {
b9787581 3355 stmt = stmt_vinfo->stmt;
ebfd146a
IR
3356 if (is_store)
3357 op = gimple_assign_rhs1 (stmt);
bac430c9 3358 else
f7e531cf 3359 {
bac430c9 3360 switch (code)
f7e531cf 3361 {
bac430c9 3362 case COND_EXPR:
a989bcc3
IE
3363 {
3364 tree cond = gimple_assign_rhs1 (stmt);
3365 if (TREE_CODE (cond) == SSA_NAME)
3366 op = gimple_op (stmt, op_num + 1);
3367 else if (op_num == 0 || op_num == 1)
bac430c9 3368 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3369 else
3370 {
3371 if (op_num == 2)
3372 op = gimple_assign_rhs2 (stmt);
3373 else
3374 op = gimple_assign_rhs3 (stmt);
3375 }
3376 }
bac430c9
IR
3377 break;
3378
3379 case CALL_EXPR:
3380 op = gimple_call_arg (stmt, op_num);
3381 break;
3382
b84b294a
JJ
3383 case LSHIFT_EXPR:
3384 case RSHIFT_EXPR:
3385 case LROTATE_EXPR:
3386 case RROTATE_EXPR:
3387 op = gimple_op (stmt, op_num + 1);
3388 /* Unlike the other binary operators, shifts/rotates have
3389 the shift count being int, instead of the same type as
3390 the lhs, so make sure the scalar is the right type if
3391 we are dealing with vectors of
3392 long long/long/short/char. */
793d9a16 3393 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3394 op = fold_convert (TREE_TYPE (vector_type), op);
3395 break;
3396
bac430c9
IR
3397 default:
3398 op = gimple_op (stmt, op_num + 1);
b84b294a 3399 break;
f7e531cf
IR
3400 }
3401 }
b8698a0f 3402
ebfd146a 3403 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3404 number_of_places_left_in_vector--;
90dd6e3d 3405 tree orig_op = op;
13396b6e 3406 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3407 {
793d9a16 3408 if (CONSTANT_CLASS_P (op))
13396b6e 3409 {
42fd8198
IE
3410 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3411 {
3412 /* Can't use VIEW_CONVERT_EXPR for booleans because
3413 of possibly different sizes of scalar value and
3414 vector element. */
3415 if (integer_zerop (op))
3416 op = build_int_cst (TREE_TYPE (vector_type), 0);
3417 else if (integer_onep (op))
158beb4a 3418 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3419 else
3420 gcc_unreachable ();
3421 }
3422 else
3423 op = fold_unary (VIEW_CONVERT_EXPR,
3424 TREE_TYPE (vector_type), op);
13396b6e
JJ
3425 gcc_assert (op && CONSTANT_CLASS_P (op));
3426 }
3427 else
3428 {
b731b390 3429 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3430 gimple *init_stmt;
262a363f
JJ
3431 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3432 {
158beb4a
JJ
3433 tree true_val
3434 = build_all_ones_cst (TREE_TYPE (vector_type));
3435 tree false_val
3436 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3437 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3438 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3439 op, true_val,
3440 false_val);
262a363f 3441 }
262a363f
JJ
3442 else
3443 {
3444 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3445 op);
3446 init_stmt
3447 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3448 op);
3449 }
13396b6e
JJ
3450 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3451 op = new_temp;
3452 }
50eeef09 3453 }
d2a12ae7 3454 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3455 if (!CONSTANT_CLASS_P (op))
3456 constant_p = false;
90dd6e3d
RB
3457 if (TREE_CODE (orig_op) == SSA_NAME
3458 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3459 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3460 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3461 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3462 place_after_defs = true;
ebfd146a
IR
3463
3464 if (number_of_places_left_in_vector == 0)
3465 {
018b2744
RS
3466 if (constant_p
3467 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3468 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3469 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3470 else
d2a12ae7 3471 {
018b2744
RS
3472 if (vec_oprnds->is_empty ())
3473 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3474 number_of_vectors,
3475 permute_results);
3476 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3477 }
90dd6e3d
RB
3478 tree init;
3479 gimple_stmt_iterator gsi;
3480 if (place_after_defs)
3481 {
95c68311
RS
3482 stmt_vec_info last_stmt_info
3483 = vect_find_last_scalar_stmt_in_slp (slp_node);
3484 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3485 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3486 &gsi);
90dd6e3d
RB
3487 }
3488 else
b9787581
RS
3489 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3490 NULL);
13396b6e
JJ
3491 if (ctor_seq != NULL)
3492 {
90dd6e3d 3493 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3494 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3495 ctor_seq = NULL;
3496 }
90dd6e3d
RB
3497 voprnds.quick_push (init);
3498 place_after_defs = false;
62cf7335
RB
3499 number_of_places_left_in_vector = nunits;
3500 constant_p = true;
5ebaa477
RS
3501 elts.new_vector (vector_type, nunits, 1);
3502 elts.quick_grow (nunits);
ebfd146a
IR
3503 }
3504 }
3505 }
3506
b8698a0f 3507 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3508 them. */
9771b263 3509 vec_num = voprnds.length ();
d2a12ae7 3510 for (j = vec_num; j != 0; j--)
ebfd146a 3511 {
9771b263
DN
3512 vop = voprnds[j - 1];
3513 vec_oprnds->quick_push (vop);
ebfd146a
IR
3514 }
3515
9771b263 3516 voprnds.release ();
ebfd146a
IR
3517
3518 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3519 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3520 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3521 to replicate the vectors. */
9771b263 3522 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3523 {
b5aeb3bb
IR
3524 tree neutral_vec = NULL;
3525
3526 if (neutral_op)
3527 {
3528 if (!neutral_vec)
b9acc9f1 3529 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3530
9771b263 3531 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3532 }
3533 else
3534 {
9771b263
DN
3535 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3536 vec_oprnds->quick_push (vop);
b5aeb3bb 3537 }
ebfd146a
IR
3538 }
3539}
3540
3541
3542/* Get vectorized definitions from SLP_NODE that contains corresponding
3543 vectorized def-stmts. */
3544
3545static void
9771b263 3546vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3547{
3548 tree vec_oprnd;
16edaeb8 3549 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3550 unsigned int i;
3551
9771b263 3552 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3553
16edaeb8 3554 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
ebfd146a 3555 {
16edaeb8
RS
3556 gcc_assert (vec_def_stmt_info);
3557 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3558 vec_oprnd = gimple_phi_result (vec_def_phi);
e7baeb39 3559 else
16edaeb8 3560 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
9771b263 3561 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3562 }
3563}
3564
3565
b8698a0f
L
3566/* Get vectorized definitions for SLP_NODE.
3567 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3568 call vect_get_constant_vectors() to create vector stmts.
3569 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3570 must be stored in the corresponding child of SLP_NODE, and we call
3571 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3572
ebfd146a 3573void
9771b263 3574vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3575 vec<vec<tree> > *vec_oprnds)
ebfd146a 3576{
355fe088 3577 gimple *first_stmt;
d092494c 3578 int number_of_vects = 0, i;
77eefb71 3579 unsigned int child_index = 0;
b8698a0f 3580 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3581 slp_tree child = NULL;
37b5ec8f 3582 vec<tree> vec_defs;
e44978dc 3583 tree oprnd;
77eefb71 3584 bool vectorized_defs;
ebfd146a 3585
9771b263
DN
3586 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3587 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3588 {
d092494c
IR
3589 /* For each operand we check if it has vectorized definitions in a child
3590 node or we need to create them (for invariants and constants). We
3591 check if the LHS of the first stmt of the next child matches OPRND.
3592 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3593 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3594 to check this child node for the next operand. */
3595 vectorized_defs = false;
3596 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3597 {
01d8bf07 3598 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3599
e44978dc 3600 /* We have to check both pattern and original def, if available. */
603cca93 3601 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3602 {
b9787581
RS
3603 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3604 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
e7baeb39 3605 tree first_def_op;
90dd6e3d 3606
b9787581 3607 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
e7baeb39
RB
3608 first_def_op = gimple_phi_result (first_def);
3609 else
b9787581 3610 first_def_op = gimple_get_lhs (first_def_info->stmt);
e7baeb39 3611 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d 3612 || (related
10681ce8
RS
3613 && operand_equal_p (oprnd,
3614 gimple_get_lhs (related->stmt), 0)))
90dd6e3d
RB
3615 {
3616 /* The number of vector defs is determined by the number of
3617 vector statements in the node from which we get those
3618 statements. */
3619 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3620 vectorized_defs = true;
77eefb71 3621 child_index++;
90dd6e3d 3622 }
e44978dc 3623 }
77eefb71
RB
3624 else
3625 child_index++;
d092494c 3626 }
ebfd146a 3627
77eefb71
RB
3628 if (!vectorized_defs)
3629 {
3630 if (i == 0)
3631 {
3632 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3633 /* Number of vector stmts was calculated according to LHS in
3634 vect_schedule_slp_instance (), fix it by replacing LHS with
3635 RHS, if necessary. See vect_get_smallest_scalar_type () for
3636 details. */
3637 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3638 &rhs_size_unit);
3639 if (rhs_size_unit != lhs_size_unit)
3640 {
3641 number_of_vects *= rhs_size_unit;
3642 number_of_vects /= lhs_size_unit;
3643 }
3644 }
d092494c 3645 }
b5aeb3bb 3646
d092494c 3647 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3648 vec_defs = vNULL;
3649 vec_defs.create (number_of_vects);
ebfd146a 3650
d092494c
IR
3651 /* For reduction defs we call vect_get_constant_vectors (), since we are
3652 looking for initial loop invariant values. */
306b0c92 3653 if (vectorized_defs)
d092494c 3654 /* The defs are already vectorized. */
37b5ec8f 3655 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3656 else
e7baeb39 3657 /* Build vectors from scalar defs. */
37b5ec8f 3658 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3659 number_of_vects);
ebfd146a 3660
37b5ec8f 3661 vec_oprnds->quick_push (vec_defs);
d092494c 3662 }
ebfd146a
IR
3663}
3664
ebfd146a
IR
3665/* Generate vector permute statements from a list of loads in DR_CHAIN.
3666 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3667 permute statements for the SLP node NODE of the SLP instance
3668 SLP_NODE_INSTANCE. */
3669
ebfd146a 3670bool
01d8bf07 3671vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3672 gimple_stmt_iterator *gsi, poly_uint64 vf,
3673 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3674 unsigned *n_perms)
ebfd146a 3675{
b9787581 3676 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3677 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 3678 tree mask_element_type = NULL_TREE, mask_type;
928686b1 3679 int vec_index = 0;
2635892a 3680 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3681 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3682 unsigned int mask_element;
ef4bddc2 3683 machine_mode mode;
928686b1 3684 unsigned HOST_WIDE_INT nunits, const_vf;
ebfd146a 3685
91ff1504
RB
3686 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3687 return false;
3688
bffb8014 3689 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3690
22e4dee7
RH
3691 mode = TYPE_MODE (vectype);
3692
d9f21f6a 3693 /* At the moment, all permutations are represented using per-element
928686b1
RS
3694 indices, so we can't cope with variable vector lengths or
3695 vectorization factors. */
3696 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3697 || !vf.is_constant (&const_vf))
d9f21f6a
RS
3698 return false;
3699
2635892a
RH
3700 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3701 same size as the vector element being permuted. */
96f9265a 3702 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3703 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3704 mask_type = get_vectype_for_scalar_type (mask_element_type);
e3342de4 3705 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3706 mask.quick_grow (nunits);
e3342de4 3707 vec_perm_indices indices;
ebfd146a 3708
61fdfd8c
RB
3709 /* Initialize the vect stmts of NODE to properly insert the generated
3710 stmts later. */
3711 if (! analyze_only)
3712 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3713 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3714 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3715
b8698a0f
L
3716 /* Generate permutation masks for every NODE. Number of masks for each NODE
3717 is equal to GROUP_SIZE.
3718 E.g., we have a group of three nodes with three loads from the same
3719 location in each node, and the vector size is 4. I.e., we have a
3720 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3721 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3722 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3723 ...
3724
2635892a 3725 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3726 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3727 operation, and the mask element values can't be outside that range.
3728 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3729 For the first two permutations we need the first and the second input
ebfd146a 3730 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3731 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3732 {c2,a3,b3,c3}. */
3733
2ce27200 3734 int vect_stmts_counter = 0;
928686b1 3735 unsigned int index = 0;
2ce27200
RB
3736 int first_vec_index = -1;
3737 int second_vec_index = -1;
be377c80 3738 bool noop_p = true;
29afecdf 3739 *n_perms = 0;
ebfd146a 3740
d9f21f6a 3741 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3742 {
3743 for (int k = 0; k < group_size; k++)
3744 {
928686b1 3745 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
2c53b149 3746 + j * DR_GROUP_SIZE (stmt_info));
2ce27200
RB
3747 vec_index = i / nunits;
3748 mask_element = i % nunits;
3749 if (vec_index == first_vec_index
3750 || first_vec_index == -1)
3751 {
3752 first_vec_index = vec_index;
3753 }
3754 else if (vec_index == second_vec_index
3755 || second_vec_index == -1)
3756 {
3757 second_vec_index = vec_index;
3758 mask_element += nunits;
3759 }
3760 else
3761 {
3762 if (dump_enabled_p ())
3763 {
3764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3765 "permutation requires at "
3766 "least three vectors ");
3767 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 3768 stmt_info->stmt, 0);
2ce27200 3769 }
31bee964 3770 gcc_assert (analyze_only);
2ce27200
RB
3771 return false;
3772 }
ebfd146a 3773
928686b1 3774 gcc_assert (mask_element < 2 * nunits);
be377c80
RB
3775 if (mask_element != index)
3776 noop_p = false;
2ce27200
RB
3777 mask[index++] = mask_element;
3778
e3342de4 3779 if (index == nunits && !noop_p)
2ce27200 3780 {
e3342de4
RS
3781 indices.new_vector (mask, 2, nunits);
3782 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3783 {
3784 if (dump_enabled_p ())
22e4dee7 3785 {
2ce27200
RB
3786 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3787 vect_location,
3788 "unsupported vect permute { ");
3789 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3790 {
3791 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3792 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3793 }
2ce27200 3794 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3795 }
31bee964 3796 gcc_assert (analyze_only);
2ce27200
RB
3797 return false;
3798 }
22e4dee7 3799
e3342de4
RS
3800 ++*n_perms;
3801 }
29afecdf 3802
e3342de4
RS
3803 if (index == nunits)
3804 {
2ce27200
RB
3805 if (!analyze_only)
3806 {
be377c80
RB
3807 tree mask_vec = NULL_TREE;
3808
3809 if (! noop_p)
736d0f28 3810 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3811
3812 if (second_vec_index == -1)
3813 second_vec_index = first_vec_index;
61fdfd8c
RB
3814
3815 /* Generate the permute statement if necessary. */
3816 tree first_vec = dr_chain[first_vec_index];
3817 tree second_vec = dr_chain[second_vec_index];
16edaeb8 3818 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3819 if (! noop_p)
3820 {
b9787581 3821 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3822 tree perm_dest
3823 = vect_create_destination_var (gimple_assign_lhs (stmt),
3824 vectype);
3825 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3826 gassign *perm_stmt
3827 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3828 first_vec, second_vec,
3829 mask_vec);
3830 perm_stmt_info
b9787581
RS
3831 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3832 gsi);
61fdfd8c
RB
3833 }
3834 else
3835 /* If mask was NULL_TREE generate the requested
3836 identity transform. */
16edaeb8 3837 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3838
3839 /* Store the vector statement in NODE. */
16edaeb8
RS
3840 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3841 = perm_stmt_info;
2ce27200 3842 }
ebfd146a 3843
2ce27200
RB
3844 index = 0;
3845 first_vec_index = -1;
3846 second_vec_index = -1;
be377c80 3847 noop_p = true;
2ce27200
RB
3848 }
3849 }
b8698a0f 3850 }
ebfd146a 3851
ebfd146a
IR
3852 return true;
3853}
3854
ebfd146a
IR
3855/* Vectorize SLP instance tree in postorder. */
3856
3857static bool
f7300fff
RB
3858vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3859 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3860{
0d0293ac 3861 bool grouped_store, is_store;
ebfd146a
IR
3862 gimple_stmt_iterator si;
3863 stmt_vec_info stmt_info;
8b7e9dba 3864 unsigned int group_size;
ebfd146a 3865 tree vectype;
603cca93 3866 int i, j;
d755c7ef 3867 slp_tree child;
ebfd146a 3868
603cca93 3869 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3870 return false;
3871
f7300fff
RB
3872 /* See if we have already vectorized the same set of stmts and reuse their
3873 vectorized stmts. */
dd172744 3874 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3875 {
dd172744 3876 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
f7300fff
RB
3877 return false;
3878 }
3879
dd172744 3880 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3881 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3882 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3883
603cca93
RB
3884 /* Push SLP node def-type to stmts. */
3885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3886 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3887 {
3888 stmt_vec_info child_stmt_info;
3889 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3890 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3891 }
603cca93 3892
b9787581 3893 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3894
3895 /* VECTYPE is the type of the destination. */
b690cc0f 3896 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3897 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3898 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3899
68435eb2 3900 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3901 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3902 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3903
73fbfcad 3904 if (dump_enabled_p ())
ebfd146a 3905 {
78c60e3d
SS
3906 dump_printf_loc (MSG_NOTE,vect_location,
3907 "------>vectorizing SLP node starting from: ");
b9787581 3908 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
b8698a0f 3909 }
ebfd146a 3910
2e8ab70c
RB
3911 /* Vectorized stmts go before the last scalar stmt which is where
3912 all uses are ready. */
95c68311
RS
3913 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3914 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 3915
b010117a
IR
3916 /* Mark the first element of the reduction chain as reduction to properly
3917 transform the node. In the analysis phase only the last element of the
3918 chain is marked as reduction. */
2c53b149
RB
3919 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3920 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
b9787581 3921 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
b010117a
IR
3922 {
3923 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3924 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3925 }
3926
6876e5bc
RB
3927 /* Handle two-operation SLP nodes by vectorizing the group with
3928 both operations and then performing a merge. */
3929 if (SLP_TREE_TWO_OPERATORS (node))
3930 {
b9787581 3931 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 3932 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3933 enum tree_code ocode = ERROR_MARK;
b9787581 3934 stmt_vec_info ostmt_info;
e3342de4 3935 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
3936 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3937 {
3938 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3939 if (gimple_assign_rhs_code (ostmt) != code0)
3940 {
3941 mask.quick_push (1);
3942 ocode = gimple_assign_rhs_code (ostmt);
3943 }
3944 else
3945 mask.quick_push (0);
3946 }
567a3691 3947 if (ocode != ERROR_MARK)
6876e5bc 3948 {
16edaeb8
RS
3949 vec<stmt_vec_info> v0;
3950 vec<stmt_vec_info> v1;
6876e5bc
RB
3951 unsigned j;
3952 tree tmask = NULL_TREE;
b9787581 3953 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
6876e5bc
RB
3954 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3955 SLP_TREE_VEC_STMTS (node).truncate (0);
3956 gimple_assign_set_rhs_code (stmt, ocode);
b9787581 3957 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
6876e5bc
RB
3958 gimple_assign_set_rhs_code (stmt, code0);
3959 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3960 SLP_TREE_VEC_STMTS (node).truncate (0);
3961 tree meltype = build_nonstandard_integer_type
b397965c 3962 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3963 tree mvectype = get_same_sized_vectype (meltype, vectype);
3964 unsigned k = 0, l;
3965 for (j = 0; j < v0.length (); ++j)
3966 {
dad55d70
RS
3967 /* Enforced by vect_build_slp_tree, which rejects variable-length
3968 vectors for SLP_TREE_TWO_OPERATORS. */
3969 unsigned int const_nunits = nunits.to_constant ();
3970 tree_vector_builder melts (mvectype, const_nunits, 1);
3971 for (l = 0; l < const_nunits; ++l)
6876e5bc 3972 {
1ece8d4c 3973 if (k >= group_size)
6876e5bc 3974 k = 0;
dad55d70
RS
3975 tree t = build_int_cst (meltype,
3976 mask[k++] * const_nunits + l);
794e3180 3977 melts.quick_push (t);
6876e5bc 3978 }
5ebaa477 3979 tmask = melts.build ();
6876e5bc
RB
3980
3981 /* ??? Not all targets support a VEC_PERM_EXPR with a
3982 constant mask that would translate to a vec_merge RTX
3983 (with their vec_perm_const_ok). We can either not
3984 vectorize in that case or let veclower do its job.
3985 Unfortunately that isn't too great and at least for
3986 plus/minus we'd eventually like to match targets
3987 vector addsub instructions. */
355fe088 3988 gimple *vstmt;
6876e5bc
RB
3989 vstmt = gimple_build_assign (make_ssa_name (vectype),
3990 VEC_PERM_EXPR,
16edaeb8
RS
3991 gimple_assign_lhs (v0[j]->stmt),
3992 gimple_assign_lhs (v1[j]->stmt),
3993 tmask);
3994 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 3995 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
3996 }
3997 v0.release ();
3998 v1.release ();
3999 return false;
4000 }
4001 }
b9787581
RS
4002 is_store = vect_transform_stmt (stmt_info, &si, &grouped_store, node,
4003 instance);
603cca93
RB
4004
4005 /* Restore stmt def-types. */
4006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4007 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4008 {
4009 stmt_vec_info child_stmt_info;
4010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4011 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4012 }
603cca93 4013
b5aeb3bb 4014 return is_store;
ebfd146a
IR
4015}
4016
dd34c087
JJ
4017/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4018 For loop vectorization this is done in vectorizable_call, but for SLP
4019 it needs to be deferred until end of vect_schedule_slp, because multiple
4020 SLP instances may refer to the same scalar stmt. */
4021
4022static void
4023vect_remove_slp_scalar_calls (slp_tree node)
4024{
b9787581 4025 gimple *new_stmt;
dd34c087
JJ
4026 gimple_stmt_iterator gsi;
4027 int i;
d755c7ef 4028 slp_tree child;
dd34c087
JJ
4029 tree lhs;
4030 stmt_vec_info stmt_info;
4031
603cca93 4032 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4033 return;
4034
9771b263 4035 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 4036 vect_remove_slp_scalar_calls (child);
dd34c087 4037
b9787581 4038 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4039 {
b9787581
RS
4040 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4041 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4042 continue;
b9787581 4043 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4044 || !PURE_SLP_STMT (stmt_info))
4045 continue;
4046 lhs = gimple_call_lhs (stmt);
4047 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4048 set_vinfo_for_stmt (new_stmt, stmt_info);
4049 set_vinfo_for_stmt (stmt, NULL);
4050 STMT_VINFO_STMT (stmt_info) = new_stmt;
4051 gsi = gsi_for_stmt (stmt);
4052 gsi_replace (&gsi, new_stmt, false);
4053 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4054 }
4055}
ebfd146a 4056
ff802fa1
IR
4057/* Generate vector code for all SLP instances in the loop/basic block. */
4058
ebfd146a 4059bool
310213d4 4060vect_schedule_slp (vec_info *vinfo)
ebfd146a 4061{
9771b263 4062 vec<slp_instance> slp_instances;
ebfd146a 4063 slp_instance instance;
8b7e9dba 4064 unsigned int i;
ebfd146a
IR
4065 bool is_store = false;
4066
78604de0
RB
4067
4068 scalar_stmts_to_slp_tree_map_t *bst_map
4069 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4070 slp_instances = vinfo->slp_instances;
9771b263 4071 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
4072 {
4073 /* Schedule the tree of INSTANCE. */
4074 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff 4075 instance, bst_map);
73fbfcad 4076 if (dump_enabled_p ())
78c60e3d 4077 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4078 "vectorizing stmts using SLP.\n");
ebfd146a 4079 }
78604de0 4080 delete bst_map;
ebfd146a 4081
9771b263 4082 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4083 {
4084 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4085 stmt_vec_info store_info;
b5aeb3bb
IR
4086 unsigned int j;
4087 gimple_stmt_iterator gsi;
4088
c40eced0
RB
4089 /* Remove scalar call stmts. Do not do this for basic-block
4090 vectorization as not all uses may be vectorized.
4091 ??? Why should this be necessary? DCE should be able to
4092 remove the stmts itself.
4093 ??? For BB vectorization we can as well remove scalar
4094 stmts starting from the SLP tree root if they have no
4095 uses. */
310213d4 4096 if (is_a <loop_vec_info> (vinfo))
c40eced0 4097 vect_remove_slp_scalar_calls (root);
dd34c087 4098
b9787581 4099 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4100 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4101 {
b9787581
RS
4102 if (!STMT_VINFO_DATA_REF (store_info))
4103 break;
4104
4105 if (is_pattern_stmt_p (store_info))
4106 store_info = STMT_VINFO_RELATED_STMT (store_info);
4107 /* Free the attached stmt_vec_info and remove the stmt. */
4108 gsi = gsi_for_stmt (store_info);
4109 unlink_stmt_vdef (store_info);
4110 gsi_remove (&gsi, true);
4111 release_defs (store_info);
4112 free_stmt_vec_info (store_info);
b5aeb3bb
IR
4113 }
4114 }
4115
ebfd146a
IR
4116 return is_store;
4117}