]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
re PR tree-optimization/92537 (ICE in vect_slp_analyze_node_operations, at tree-vect...
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
a5544970 2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
40e23961 35#include "fold-const.h"
d8a2d370 36#include "stor-layout.h"
5be5c238 37#include "gimple-iterator.h"
ebfd146a 38#include "cfgloop.h"
ebfd146a 39#include "tree-vectorizer.h"
2635892a 40#include "langhooks.h"
642fce57 41#include "gimple-walk.h"
428db0ba 42#include "dbgcnt.h"
5ebaa477 43#include "tree-vector-builder.h"
f151c9e1 44#include "vec-perm-indices.h"
018b2744
RS
45#include "gimple-fold.h"
46#include "internal-fn.h"
a70d6342
IR
47
48
6e2dd807
RS
49/* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
52
53static void
6e2dd807 54vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 55{
d092494c 56 int i;
d755c7ef 57 slp_tree child;
d092494c 58
a1f072e2
RB
59 if (--node->refcnt != 0)
60 return;
61
9771b263 62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 63 vect_free_slp_tree (child, final_p);
b8698a0f 64
6e2dd807
RS
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
b9787581
RS
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 73 {
b9787581
RS
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
76 }
77 }
78810bd3 78
9771b263
DN
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
30c0d1e3 81 SLP_TREE_SCALAR_OPS (node).release ();
9771b263 82 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 83 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
84
85 free (node);
86}
87
6e2dd807
RS
88/* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
ebfd146a
IR
91
92void
6e2dd807 93vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 94{
6e2dd807 95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 96 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 97 free (instance);
ebfd146a
IR
98}
99
100
d092494c
IR
101/* Create an SLP node for SCALAR_STMTS. */
102
103static slp_tree
b9787581 104vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 105{
d3cfd39e 106 slp_tree node;
b9787581 107 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
108 unsigned int nops;
109
b9787581 110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 111 nops = gimple_call_num_args (stmt);
b9787581 112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
b9787581 118 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 119 nops = 0;
d092494c
IR
120 else
121 return NULL;
122
d3cfd39e 123 node = XNEW (struct _slp_tree);
d092494c 124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
30c0d1e3 125 SLP_TREE_SCALAR_OPS (node) = vNULL;
9771b263 126 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 128 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 130 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
a1f072e2 132 node->refcnt = 1;
f48e4da3 133 node->max_nunits = 1;
d092494c 134
78810bd3 135 unsigned i;
b9787581
RS
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 138
d092494c
IR
139 return node;
140}
141
30c0d1e3
RB
142/* Create an SLP node for OPS. */
143
144static slp_tree
145vect_create_new_slp_node (vec<tree> ops)
146{
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162}
163
d092494c 164
ddf56386
RB
165/* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168typedef struct _slp_oprnd_info
169{
170 /* Def-stmts for the operands. */
b9787581 171 vec<stmt_vec_info> def_stmts;
30c0d1e3
RB
172 /* Operands. */
173 vec<tree> ops;
ddf56386
RB
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
ddf56386 177 tree first_op_type;
34e82342 178 enum vect_def_type first_dt;
7098ab48 179 bool any_pattern;
ddf56386
RB
180} *slp_oprnd_info;
181
182
d092494c
IR
183/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
9771b263 185static vec<slp_oprnd_info>
d092494c
IR
186vect_create_oprnd_info (int nops, int group_size)
187{
188 int i;
189 slp_oprnd_info oprnd_info;
9771b263 190 vec<slp_oprnd_info> oprnds_info;
d092494c 191
9771b263 192 oprnds_info.create (nops);
d092494c
IR
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 196 oprnd_info->def_stmts.create (group_size);
30c0d1e3 197 oprnd_info->ops.create (group_size);
d092494c 198 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 199 oprnd_info->first_op_type = NULL_TREE;
7098ab48 200 oprnd_info->any_pattern = false;
9771b263 201 oprnds_info.quick_push (oprnd_info);
d092494c
IR
202 }
203
204 return oprnds_info;
205}
206
207
d3cfd39e
JJ
208/* Free operands info. */
209
d092494c 210static void
9771b263 211vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
212{
213 int i;
214 slp_oprnd_info oprnd_info;
215
9771b263 216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 217 {
9771b263 218 oprnd_info->def_stmts.release ();
30c0d1e3 219 oprnd_info->ops.release ();
d3cfd39e
JJ
220 XDELETE (oprnd_info);
221 }
d092494c 222
9771b263 223 oprnds_info.release ();
d092494c
IR
224}
225
226
60838d63
RS
227/* Return true if STMTS contains a pattern statement. */
228
229static bool
230vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231{
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
238}
239
32e8e429
RS
240/* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
d755c7ef 243
b210f45f 244int
32e8e429
RS
245vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
d755c7ef 247{
a1824cfd 248 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
249 int result = 0;
250
a1824cfd 251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
252 return -1;
253
254 do
255 {
a1824cfd 256 if (next_stmt_info == stmt_info)
d755c7ef 257 return result;
a1824cfd
RS
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 261 }
a1824cfd 262 while (next_stmt_info);
d755c7ef
RB
263
264 return -1;
265}
266
f884cd2f 267/* Check whether it is possible to load COUNT elements of type ELT_TYPE
018b2744
RS
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
272
f1739b48 273bool
ba7f76dd 274can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
f884cd2f 275 tree elt_type, unsigned int *nvectors_out,
f1739b48
RS
276 tree *vector_type_out,
277 tree *permutes)
018b2744 278{
f884cd2f
RS
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
282
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
018b2744
RS
285 unsigned int nvectors = 1;
286 for (;;)
287 {
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
f884cd2f 290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
018b2744 291 {
f884cd2f 292 /* Get the natural vector type for this SLP group size. */
018b2744
RS
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
f884cd2f
RS
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
018b2744 301 {
f884cd2f
RS
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
018b2744
RS
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
310 {
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
315 }
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 {
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
326 {
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
331 }
332 return true;
333 }
334 }
335 }
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
339 }
340}
d755c7ef 341
d092494c
IR
342/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
4cecd659 344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
4cecd659
BC
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
4cecd659
BC
354static int
355vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 356 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 357 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 358{
b9787581 359 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
d092494c 362 enum vect_def_type dt = vect_uninitialized_def;
abf9bfbc 363 slp_oprnd_info oprnd_info;
b0b4483e 364 int first_op_idx = 1;
0246112a 365 unsigned int commutative_op = -1U;
b0b4483e 366 bool first_op_cond = false;
effb52da 367 bool first = stmt_num == 0;
b8698a0f 368
b9787581 369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
370 {
371 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 372 first_op_idx = 3;
0246112a
RS
373 if (gimple_call_internal_p (stmt))
374 {
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
99763671
AM
377
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
380 {
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
384 }
0246112a 385 }
190c2236 386 }
b9787581 387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 388 {
b0b4483e 389 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 390 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
61021c35 393 if (code == COND_EXPR
a414c77f 394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
395 {
396 first_op_cond = true;
b0b4483e
RB
397 number_of_oprnds++;
398 }
399 else
0246112a 400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 401 }
d092494c 402 else
b0b4483e 403 return -1;
ebfd146a 404
4cecd659
BC
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
407 for (i = 0; i < number_of_oprnds; i++)
408 {
b0b4483e
RB
409again:
410 if (first_op_cond)
f7e531cf 411 {
4cecd659
BC
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
415
416 if (i < 2)
b9787581
RS
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
b0b4483e 419 else
b9787581 420 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
421 }
422 else
b9787581 423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
61021c35
RB
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
f7e531cf 426
9771b263 427 oprnd_info = (*oprnds_info)[i];
ebfd146a 428
fef96d8e
RS
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 431 {
73fbfcad 432 if (dump_enabled_p ())
3c2a8ed0
DM
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
ebfd146a 436
b0b4483e 437 return -1;
ebfd146a
IR
438 }
439
7098ab48
RB
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
effb52da 442
d092494c 443 if (first)
ebfd146a 444 {
b4673569
RB
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
d092494c 452 oprnd_info->first_dt = dt;
793d9a16 453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 454 }
ebfd146a
IR
455 else
456 {
d092494c
IR
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
4352288a 461 end in the reduction chain. */
018b2744
RS
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
4352288a
RB
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
018b2744
RS
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
4352288a
RB
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
ebfd146a 483 {
b0b4483e 484 /* Try swapping operands if we got a mismatch. */
0246112a 485 if (i == commutative_op && !swapped)
b0b4483e 486 {
4352288a
RB
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
b0b4483e
RB
490 swapped = true;
491 goto again;
492 }
493
abf9bfbc
RB
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 496 "Build SLP failed: different types\n");
d092494c 497
b0b4483e 498 return 1;
ebfd146a 499 }
018b2744
RS
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
1c84a2d2 502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
018b2744 503 && (TREE_CODE (type) == BOOLEAN_TYPE
43fdde57 504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
f884cd2f 505 type)))
a23644f2
RS
506 {
507 if (dump_enabled_p ())
3c2a8ed0
DM
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
511 return -1;
512 }
018b2744
RS
513 }
514
515 /* Check the types of the definitions. */
516 switch (dt)
517 {
018b2744 518 case vect_external_def:
6c7b0df8
RB
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
30c0d1e3
RB
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
ebfd146a 525 break;
b8698a0f 526
4352288a 527 case vect_internal_def:
c78e3652 528 case vect_reduction_def:
4352288a
RB
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 {
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
544 }
545 /* Fallthru. */
e7baeb39 546 case vect_induction_def:
fef96d8e 547 oprnd_info->def_stmts.quick_push (def_stmt_info);
30c0d1e3 548 oprnd_info->ops.quick_push (oprnd);
ebfd146a
IR
549 break;
550
551 default:
552 /* FORNOW: Not supported. */
73fbfcad 553 if (dump_enabled_p ())
3c2a8ed0
DM
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
ebfd146a 557
b0b4483e 558 return -1;
ebfd146a
IR
559 }
560 }
561
b0b4483e
RB
562 /* Swap operands. */
563 if (swapped)
564 {
565 if (first_op_cond)
566 {
77100812
RB
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
570 {
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info->stmt);
575 return -1;
576 }
577
30c0d1e3
RB
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
0246112a 580 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e 581 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
582 enum tree_code code = TREE_CODE (cond);
583
584 /* Swap. */
585 if (*swap == 1)
586 {
587 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
588 &TREE_OPERAND (cond, 1));
589 TREE_SET_CODE (cond, swap_tree_comparison (code));
590 }
591 /* Invert. */
592 else
593 {
594 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
595 gimple_assign_rhs3_ptr (stmt));
96d9d162
RB
596 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
4cecd659
BC
600 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
601 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
602 gcc_assert (code != ERROR_MARK);
603 TREE_SET_CODE (cond, code);
604 }
b0b4483e
RB
605 }
606 else
0246112a 607 {
30c0d1e3
RB
608 /* Commutative ops need not reflect swapping, ops are in
609 the SLP tree. */
0246112a 610 }
78810bd3 611 if (dump_enabled_p ())
3c2a8ed0
DM
612 dump_printf_loc (MSG_NOTE, vect_location,
613 "swapped operands to match def types in %G",
614 stmt_info->stmt);
b0b4483e
RB
615 }
616
4cecd659 617 *swap = swapped;
b0b4483e 618 return 0;
ebfd146a
IR
619}
620
9b75f56d
RS
621/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
624
625static bool
626vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
627{
628 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
629 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
630 return true;
631
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
634 {
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
638 stmt_vec_info member_info = first_info;
639 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
640 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
641 || is_pattern_stmt_p (member_info))
642 break;
643
644 if (!member_info)
645 {
646 for (member_info = first_info; member_info;
647 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
648 STMT_VINFO_VECTYPE (member_info) = vectype;
649 return true;
650 }
651 }
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
653 && !is_pattern_stmt_p (stmt_info))
654 {
655 STMT_VINFO_VECTYPE (stmt_info) = vectype;
656 return true;
657 }
658
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info->stmt);
664 dump_printf_loc (MSG_NOTE, vect_location,
665 " old vector type: %T\n", old_vectype);
666 dump_printf_loc (MSG_NOTE, vect_location,
667 " new vector type: %T\n", vectype);
668 }
669 return false;
670}
671
672/* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
674
675static bool
676vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
677{
678 tree vectype, nunits_vectype;
679 if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
680 &nunits_vectype, stmts.length ()))
681 return false;
682
683 stmt_vec_info stmt_info;
684 unsigned int i;
685 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
686 if (!vect_update_shared_vectype (stmt_info, vectype))
687 return false;
688
689 return true;
690}
691
5249ee4d
RS
692/* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
694
695static bool
696compatible_calls_p (gcall *call1, gcall *call2)
697{
698 unsigned int nargs = gimple_call_num_args (call1);
699 if (nargs != gimple_call_num_args (call2))
700 return false;
701
702 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
703 return false;
704
705 if (gimple_call_internal_p (call1))
706 {
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
708 TREE_TYPE (gimple_call_lhs (call2))))
709 return false;
710 for (unsigned int i = 0; i < nargs; ++i)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
712 TREE_TYPE (gimple_call_arg (call2, i))))
713 return false;
714 }
715 else
716 {
717 if (!operand_equal_p (gimple_call_fn (call1),
718 gimple_call_fn (call2), 0))
719 return false;
720
721 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
722 return false;
723 }
724 return true;
725}
726
b161f2c9 727/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 728 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 729 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
b161f2c9
RS
733
734static bool
d7609678 735vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
4b6068ea 736 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
737{
738 if (!vectype)
739 {
740 if (dump_enabled_p ())
3c2a8ed0
DM
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported data-type in %G\n",
743 stmt_info->stmt);
b161f2c9
RS
744 /* Fatal mismatch. */
745 return false;
746 }
747
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
750 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
751 unsigned HOST_WIDE_INT const_nunits;
d7609678 752 if (STMT_VINFO_BB_VINFO (stmt_info)
4b6068ea
RS
753 && (!nunits.is_constant (&const_nunits)
754 || const_nunits > group_size))
b161f2c9 755 {
bbeeac91
DM
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
b161f2c9
RS
760 /* Fatal mismatch. */
761 return false;
762 }
763
764 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 765 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
766 return true;
767}
ebfd146a 768
1f3cb663
RS
769/* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
775
776static bool
b9787581
RS
777vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
778 unsigned int group_size, tree vectype,
779 tree_code alt_stmt_code)
1f3cb663
RS
780{
781 unsigned HOST_WIDE_INT count;
782 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
783 return false;
784
785 vec_perm_builder sel (count, count, 1);
786 for (unsigned int i = 0; i < count; ++i)
787 {
788 unsigned int elt = i;
b9787581
RS
789 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
790 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
791 elt += count;
792 sel.quick_push (elt);
793 }
794 vec_perm_indices indices (sel, 2, count);
795 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
796}
797
6983e6b5
RB
798/* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
4cecd659
BC
803 carried out or the stmts will never be vectorized by SLP.
804
99763671 805 Note COND_EXPR is possibly isomorphic to another one after swapping its
4cecd659
BC
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
811
812static bool
d7609678 813vect_build_slp_tree_1 (unsigned char *swap,
b9787581 814 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
815 poly_uint64 *max_nunits, bool *matches,
816 bool *two_operators)
ebfd146a 817{
ebfd146a 818 unsigned int i;
b9787581 819 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
820 enum tree_code first_stmt_code = ERROR_MARK;
821 enum tree_code alt_stmt_code = ERROR_MARK;
822 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 823 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 824 tree lhs;
6983e6b5 825 bool need_same_oprnds = false;
1f3cb663 826 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
827 optab optab;
828 int icode;
ef4bddc2
RS
829 machine_mode optab_op2_mode;
830 machine_mode vec_mode;
bffb8014 831 stmt_vec_info first_load = NULL, prev_first_load = NULL;
bcde3345 832 bool load_p = false;
d092494c 833
ebfd146a 834 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
835 stmt_vec_info stmt_info;
836 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 837 {
9b75f56d 838 vec_info *vinfo = stmt_info->vinfo;
b9787581 839 gimple *stmt = stmt_info->stmt;
4cecd659 840 swap[i] = 0;
6983e6b5
RB
841 matches[i] = false;
842
73fbfcad 843 if (dump_enabled_p ())
3c2a8ed0 844 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 845
4b5caab7 846 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 847 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 848 {
73fbfcad 849 if (dump_enabled_p ())
3c2a8ed0
DM
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: unvectorizable statement %G",
852 stmt);
6983e6b5
RB
853 /* Fatal mismatch. */
854 matches[0] = false;
4b5caab7
IR
855 return false;
856 }
857
ebfd146a
IR
858 lhs = gimple_get_lhs (stmt);
859 if (lhs == NULL_TREE)
860 {
73fbfcad 861 if (dump_enabled_p ())
3c2a8ed0
DM
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
865 /* Fatal mismatch. */
866 matches[0] = false;
ebfd146a
IR
867 return false;
868 }
869
1f3cb663
RS
870 tree nunits_vectype;
871 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
9b75f56d 872 &nunits_vectype, group_size)
1f3cb663 873 || (nunits_vectype
d7609678 874 && !vect_record_max_nunits (stmt_info, group_size,
1f3cb663 875 nunits_vectype, max_nunits)))
b161f2c9 876 {
6983e6b5
RB
877 /* Fatal mismatch. */
878 matches[0] = false;
1f3cb663
RS
879 return false;
880 }
881
882 gcc_assert (vectype);
b8698a0f 883
9b75f56d
RS
884 if (is_a <bb_vec_info> (vinfo)
885 && !vect_update_shared_vectype (stmt_info, vectype))
886 continue;
887
538dd0b7 888 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
889 {
890 rhs_code = CALL_EXPR;
bcde3345
AM
891
892 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
893 load_p = true;
894 else if ((gimple_call_internal_p (call_stmt)
895 && (!vectorizable_internal_fn_p
896 (gimple_call_internal_fn (call_stmt))))
897 || gimple_call_tail_p (call_stmt)
898 || gimple_call_noreturn_p (call_stmt)
899 || !gimple_call_nothrow_p (call_stmt)
900 || gimple_call_chain (call_stmt))
190c2236 901 {
73fbfcad 902 if (dump_enabled_p ())
3c2a8ed0
DM
903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
904 "Build SLP failed: unsupported call type %G",
905 call_stmt);
6983e6b5
RB
906 /* Fatal mismatch. */
907 matches[0] = false;
190c2236
JJ
908 return false;
909 }
910 }
ebfd146a 911 else
bcde3345
AM
912 {
913 rhs_code = gimple_assign_rhs_code (stmt);
914 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
915 }
ebfd146a
IR
916
917 /* Check the operation. */
918 if (i == 0)
919 {
920 first_stmt_code = rhs_code;
921
b8698a0f 922 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
923 vector shift with scalar shift operand. */
924 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
925 || rhs_code == LROTATE_EXPR
926 || rhs_code == RROTATE_EXPR)
927 {
1f3cb663
RS
928 if (vectype == boolean_type_node)
929 {
930 if (dump_enabled_p ())
931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
932 "Build SLP failed: shift of a"
933 " boolean.\n");
934 /* Fatal mismatch. */
935 matches[0] = false;
936 return false;
937 }
938
ebfd146a
IR
939 vec_mode = TYPE_MODE (vectype);
940
941 /* First see if we have a vector/vector shift. */
942 optab = optab_for_tree_code (rhs_code, vectype,
943 optab_vector);
944
945 if (!optab
947131ba 946 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
947 {
948 /* No vector/vector shift, try for a vector/scalar shift. */
949 optab = optab_for_tree_code (rhs_code, vectype,
950 optab_scalar);
951
952 if (!optab)
953 {
73fbfcad 954 if (dump_enabled_p ())
78c60e3d 955 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 956 "Build SLP failed: no optab.\n");
6983e6b5
RB
957 /* Fatal mismatch. */
958 matches[0] = false;
ebfd146a
IR
959 return false;
960 }
947131ba 961 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
962 if (icode == CODE_FOR_nothing)
963 {
73fbfcad 964 if (dump_enabled_p ())
78c60e3d
SS
965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
966 "Build SLP failed: "
e645e942 967 "op not supported by target.\n");
6983e6b5
RB
968 /* Fatal mismatch. */
969 matches[0] = false;
ebfd146a
IR
970 return false;
971 }
972 optab_op2_mode = insn_data[icode].operand[2].mode;
973 if (!VECTOR_MODE_P (optab_op2_mode))
974 {
975 need_same_oprnds = true;
976 first_op1 = gimple_assign_rhs2 (stmt);
977 }
978 }
979 }
36ba4aae
IR
980 else if (rhs_code == WIDEN_LSHIFT_EXPR)
981 {
982 need_same_oprnds = true;
983 first_op1 = gimple_assign_rhs2 (stmt);
984 }
ebfd146a
IR
985 }
986 else
987 {
6876e5bc
RB
988 if (first_stmt_code != rhs_code
989 && alt_stmt_code == ERROR_MARK)
990 alt_stmt_code = rhs_code;
ebfd146a
IR
991 if (first_stmt_code != rhs_code
992 && (first_stmt_code != IMAGPART_EXPR
993 || rhs_code != REALPART_EXPR)
994 && (first_stmt_code != REALPART_EXPR
69f11a13 995 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
996 /* Handle mismatches in plus/minus by computing both
997 and merging the results. */
998 && !((first_stmt_code == PLUS_EXPR
999 || first_stmt_code == MINUS_EXPR)
1000 && (alt_stmt_code == PLUS_EXPR
1001 || alt_stmt_code == MINUS_EXPR)
1002 && rhs_code == alt_stmt_code)
b9787581 1003 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 1004 && (first_stmt_code == ARRAY_REF
38000232 1005 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
1006 || first_stmt_code == INDIRECT_REF
1007 || first_stmt_code == COMPONENT_REF
1008 || first_stmt_code == MEM_REF)))
ebfd146a 1009 {
73fbfcad 1010 if (dump_enabled_p ())
ebfd146a 1011 {
78c60e3d
SS
1012 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1013 "Build SLP failed: different operation "
3c2a8ed0 1014 "in stmt %G", stmt);
6876e5bc 1015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 1016 "original stmt %G", first_stmt_info->stmt);
ebfd146a 1017 }
6983e6b5
RB
1018 /* Mismatch. */
1019 continue;
ebfd146a 1020 }
b8698a0f
L
1021
1022 if (need_same_oprnds
ebfd146a
IR
1023 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
1024 {
73fbfcad 1025 if (dump_enabled_p ())
3c2a8ed0
DM
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1027 "Build SLP failed: different shift "
1028 "arguments in %G", stmt);
6983e6b5
RB
1029 /* Mismatch. */
1030 continue;
ebfd146a 1031 }
190c2236 1032
d6350f82 1033 if (!load_p && rhs_code == CALL_EXPR)
190c2236 1034 {
b9787581 1035 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 1036 as_a <gcall *> (stmt)))
190c2236 1037 {
73fbfcad 1038 if (dump_enabled_p ())
3c2a8ed0
DM
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1040 "Build SLP failed: different calls in %G",
1041 stmt);
6983e6b5
RB
1042 /* Mismatch. */
1043 continue;
190c2236
JJ
1044 }
1045 }
ebfd146a
IR
1046 }
1047
0d0293ac 1048 /* Grouped store or load. */
b9787581 1049 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
1050 {
1051 if (REFERENCE_CLASS_P (lhs))
1052 {
1053 /* Store. */
6983e6b5 1054 ;
ebfd146a 1055 }
b5aeb3bb
IR
1056 else
1057 {
1058 /* Load. */
b9787581 1059 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
1060 if (prev_first_load)
1061 {
1062 /* Check that there are no loads from different interleaving
6983e6b5
RB
1063 chains in the same node. */
1064 if (prev_first_load != first_load)
78c60e3d 1065 {
73fbfcad 1066 if (dump_enabled_p ())
3c2a8ed0
DM
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1068 vect_location,
1069 "Build SLP failed: different "
1070 "interleaving chains in one node %G",
1071 stmt);
6983e6b5
RB
1072 /* Mismatch. */
1073 continue;
b5aeb3bb
IR
1074 }
1075 }
1076 else
1077 prev_first_load = first_load;
ebfd146a 1078 }
0d0293ac 1079 } /* Grouped access. */
ebfd146a
IR
1080 else
1081 {
bcde3345 1082 if (load_p)
ebfd146a 1083 {
0d0293ac 1084 /* Not grouped load. */
73fbfcad 1085 if (dump_enabled_p ())
3c2a8ed0
DM
1086 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1087 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 1088
0d0293ac 1089 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
1090 /* Fatal mismatch. */
1091 matches[0] = false;
ebfd146a
IR
1092 return false;
1093 }
1094
1095 /* Not memory operation. */
1096 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 1097 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 1098 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 1099 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
61021c35 1100 && rhs_code != VIEW_CONVERT_EXPR
190c2236 1101 && rhs_code != CALL_EXPR)
ebfd146a 1102 {
73fbfcad 1103 if (dump_enabled_p ())
3c2a8ed0
DM
1104 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1105 "Build SLP failed: operation unsupported %G",
1106 stmt);
6983e6b5
RB
1107 /* Fatal mismatch. */
1108 matches[0] = false;
ebfd146a
IR
1109 return false;
1110 }
1111
4cecd659
BC
1112 if (rhs_code == COND_EXPR)
1113 {
1114 tree cond_expr = gimple_assign_rhs1 (stmt);
1115 enum tree_code cond_code = TREE_CODE (cond_expr);
1116 enum tree_code swap_code = ERROR_MARK;
1117 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1118
1119 if (i == 0)
1120 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1121 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1122 {
1123 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1124 swap_code = swap_tree_comparison (cond_code);
1125 invert_code = invert_tree_comparison (cond_code, honor_nans);
1126 }
1127
1128 if (first_cond_code == cond_code)
1129 ;
1130 /* Isomorphic can be achieved by swapping. */
1131 else if (first_cond_code == swap_code)
1132 swap[i] = 1;
1133 /* Isomorphic can be achieved by inverting. */
1134 else if (first_cond_code == invert_code)
1135 swap[i] = 2;
1136 else
1137 {
1138 if (dump_enabled_p ())
3c2a8ed0
DM
1139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1140 "Build SLP failed: different"
1141 " operation %G", stmt);
6983e6b5
RB
1142 /* Mismatch. */
1143 continue;
f7e531cf 1144 }
4cecd659 1145 }
ebfd146a 1146 }
6983e6b5
RB
1147
1148 matches[i] = true;
1149 }
1150
1151 for (i = 0; i < group_size; ++i)
1152 if (!matches[i])
1153 return false;
1154
6876e5bc
RB
1155 /* If we allowed a two-operation SLP node verify the target can cope
1156 with the permute we are going to use. */
1157 if (alt_stmt_code != ERROR_MARK
1158 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1159 {
1f3cb663
RS
1160 if (vectype == boolean_type_node
1161 || !vect_two_operations_perm_ok_p (stmts, group_size,
1162 vectype, alt_stmt_code))
6876e5bc
RB
1163 {
1164 for (i = 0; i < group_size; ++i)
b9787581 1165 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1166 {
1167 matches[i] = false;
1168 if (dump_enabled_p ())
1169 {
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1171 "Build SLP failed: different operation "
3c2a8ed0 1172 "in stmt %G", stmts[i]->stmt);
6876e5bc 1173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 1174 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
1175 }
1176 }
1177 return false;
1178 }
1179 *two_operators = true;
1180 }
1181
6983e6b5
RB
1182 return true;
1183}
1184
26d66f28
RB
1185/* Traits for the hash_set to record failed SLP builds for a stmt set.
1186 Note we never remove apart from at destruction time so we do not
1187 need a special value for deleted that differs from empty. */
1188struct bst_traits
1189{
b9787581
RS
1190 typedef vec <stmt_vec_info> value_type;
1191 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1192 static inline hashval_t hash (value_type);
1193 static inline bool equal (value_type existing, value_type candidate);
1194 static inline bool is_empty (value_type x) { return !x.exists (); }
1195 static inline bool is_deleted (value_type x) { return !x.exists (); }
1196 static inline void mark_empty (value_type &x) { x.release (); }
1197 static inline void mark_deleted (value_type &x) { x.release (); }
1198 static inline void remove (value_type &x) { x.release (); }
1199};
1200inline hashval_t
1201bst_traits::hash (value_type x)
1202{
1203 inchash::hash h;
1204 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1205 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1206 return h.end ();
1207}
1208inline bool
1209bst_traits::equal (value_type existing, value_type candidate)
1210{
1211 if (existing.length () != candidate.length ())
1212 return false;
1213 for (unsigned i = 0; i < existing.length (); ++i)
1214 if (existing[i] != candidate[i])
1215 return false;
1216 return true;
1217}
1218
68435eb2
RB
1219typedef hash_map <vec <gimple *>, slp_tree,
1220 simple_hashmap_traits <bst_traits, slp_tree> >
1221 scalar_stmts_to_slp_tree_map_t;
1222
26d66f28
RB
1223static slp_tree
1224vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1225 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1226 poly_uint64 *max_nunits,
26d66f28 1227 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1228 scalar_stmts_to_slp_tree_map_t *bst_map);
6983e6b5 1229
e403d17e 1230static slp_tree
310213d4 1231vect_build_slp_tree (vec_info *vinfo,
b9787581 1232 vec<stmt_vec_info> stmts, unsigned int group_size,
5d8c32cb 1233 poly_uint64 *max_nunits,
1428105c 1234 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1235 scalar_stmts_to_slp_tree_map_t *bst_map)
26d66f28 1236{
a1f072e2 1237 if (slp_tree *leader = bst_map->get (stmts))
26d66f28 1238 {
a1f072e2
RB
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1241 *leader ? "" : "failed ", *leader);
1242 if (*leader)
f48e4da3
RB
1243 {
1244 (*leader)->refcnt++;
1245 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1246 }
a1f072e2 1247 return *leader;
26d66f28 1248 }
f48e4da3 1249 poly_uint64 this_max_nunits = 1;
a1f072e2 1250 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
9f708a84 1251 matches, npermutes, tree_size, bst_map);
a1f072e2 1252 if (res)
f48e4da3
RB
1253 {
1254 res->max_nunits = this_max_nunits;
1255 vect_update_max_nunits (max_nunits, this_max_nunits);
1256 /* Keep a reference for the bst_map use. */
1257 res->refcnt++;
1258 }
a1f072e2 1259 bst_map->put (stmts.copy (), res);
26d66f28
RB
1260 return res;
1261}
1262
1263/* Recursively build an SLP tree starting from NODE.
1264 Fail (and return a value not equal to zero) if def-stmts are not
1265 isomorphic, require data permutation or are of unsupported types of
1266 operation. Otherwise, return 0.
1267 The value returned is the depth in the SLP tree where a mismatch
1268 was found. */
1269
1270static slp_tree
1271vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1272 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1273 poly_uint64 *max_nunits,
26d66f28 1274 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1275 scalar_stmts_to_slp_tree_map_t *bst_map)
6983e6b5 1276{
4b6068ea
RS
1277 unsigned nops, i, this_tree_size = 0;
1278 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1279 slp_tree node;
6983e6b5 1280
6983e6b5
RB
1281 matches[0] = false;
1282
b9787581
RS
1283 stmt_vec_info stmt_info = stmts[0];
1284 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1285 nops = gimple_call_num_args (stmt);
b9787581 1286 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1287 {
1288 nops = gimple_num_ops (stmt) - 1;
1289 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1290 nops++;
ebfd146a 1291 }
b9787581 1292 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1293 nops = 0;
6983e6b5 1294 else
e403d17e 1295 return NULL;
6983e6b5 1296
c78e3652
RB
1297 /* If the SLP node is a PHI (induction or reduction), terminate
1298 the recursion. */
b9787581 1299 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1300 {
b161f2c9 1301 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
7ed54790 1302 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
d7609678 1303 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
b161f2c9
RS
1304 return NULL;
1305
b9787581 1306 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1307 /* Induction from different IVs is not supported. */
719488f8
RB
1308 if (def_type == vect_induction_def)
1309 {
b9787581
RS
1310 stmt_vec_info other_info;
1311 FOR_EACH_VEC_ELT (stmts, i, other_info)
1312 if (stmt_info != other_info)
719488f8
RB
1313 return NULL;
1314 }
22e4f1fb
RB
1315 else if (def_type == vect_reduction_def
1316 || def_type == vect_double_reduction_def
1317 || def_type == vect_nested_cycle)
719488f8
RB
1318 {
1319 /* Else def types have to match. */
b9787581
RS
1320 stmt_vec_info other_info;
1321 FOR_EACH_VEC_ELT (stmts, i, other_info)
4352288a
RB
1322 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1323 return NULL;
719488f8 1324 }
22e4f1fb
RB
1325 else
1326 return NULL;
9f708a84 1327 (*tree_size)++;
e7baeb39
RB
1328 node = vect_create_new_slp_node (stmts);
1329 return node;
1330 }
1331
1332
6876e5bc 1333 bool two_operators = false;
4cecd659 1334 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
d7609678 1335 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
e403d17e
RB
1336 &this_max_nunits, matches, &two_operators))
1337 return NULL;
ebfd146a 1338
99763671 1339 /* If the SLP node is a load, terminate the recursion unless masked. */
b9787581
RS
1340 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1341 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1342 {
99763671
AM
1343 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1344 {
1345 /* Masked load. */
1346 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1347 nops = 1;
1348 }
1349 else
1350 {
1351 *max_nunits = this_max_nunits;
1352 (*tree_size)++;
1353 node = vect_create_new_slp_node (stmts);
1354 return node;
1355 }
ebfd146a
IR
1356 }
1357
6983e6b5
RB
1358 /* Get at the operands, verifying they are compatible. */
1359 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1360 slp_oprnd_info oprnd_info;
b9787581 1361 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1362 {
4cecd659 1363 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1364 stmts, i, &oprnds_info);
4cecd659
BC
1365 if (res != 0)
1366 matches[(res == -1) ? 0 : i] = false;
1367 if (!matches[0])
1368 break;
6983e6b5 1369 }
b0b4483e
RB
1370 for (i = 0; i < group_size; ++i)
1371 if (!matches[i])
1372 {
1373 vect_free_oprnd_info (oprnds_info);
e403d17e 1374 return NULL;
b0b4483e 1375 }
6983e6b5 1376
e403d17e 1377 auto_vec<slp_tree, 4> children;
e403d17e 1378
b9787581 1379 stmt_info = stmts[0];
6983e6b5 1380
b8698a0f 1381 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1382 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1383 {
d092494c 1384 slp_tree child;
e403d17e
RB
1385 unsigned old_tree_size = this_tree_size;
1386 unsigned int j;
b8698a0f 1387
30c0d1e3
RB
1388 if (oprnd_info->first_dt == vect_uninitialized_def)
1389 {
1390 /* COND_EXPR have one too many eventually if the condition
1391 is a SSA name. */
1392 gcc_assert (i == 3 && nops == 4);
1393 continue;
1394 }
1395
e7baeb39 1396 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1397 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1398 && oprnd_info->first_dt != vect_induction_def)
30c0d1e3
RB
1399 {
1400 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1401 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1402 oprnd_info->ops = vNULL;
1403 children.safe_push (invnode);
1404 continue;
1405 }
ebfd146a 1406
e403d17e
RB
1407 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1408 group_size, &this_max_nunits,
5d8c32cb 1409 matches, npermutes,
9f708a84 1410 &this_tree_size, bst_map)) != NULL)
6983e6b5 1411 {
3fc356dc
RB
1412 /* If we have all children of child built up from scalars then just
1413 throw that away and build it up this node from scalars. */
30c0d1e3
RB
1414 if (is_a <bb_vec_info> (vinfo)
1415 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1416 /* ??? Rejecting patterns this way doesn't work. We'd have to
1417 do extra work to cancel the pattern so the uses see the
1418 scalar version. */
7098ab48 1419 && !oprnd_info->any_pattern)
3fc356dc 1420 {
3fc356dc
RB
1421 slp_tree grandchild;
1422
1423 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1424 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
3fc356dc 1425 break;
9b75f56d
RS
1426 if (!grandchild
1427 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
3fc356dc
RB
1428 {
1429 /* Roll back. */
e403d17e 1430 this_tree_size = old_tree_size;
3fc356dc 1431 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1432 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1433 SLP_TREE_CHILDREN (child).truncate (0);
1434
bbeeac91
DM
1435 if (dump_enabled_p ())
1436 dump_printf_loc (MSG_NOTE, vect_location,
1437 "Building parent vector operands from "
1438 "scalars instead\n");
3fc356dc 1439 oprnd_info->def_stmts = vNULL;
603cca93 1440 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1441 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1442 oprnd_info->ops = vNULL;
9f708a84 1443 ++this_tree_size;
e403d17e 1444 children.safe_push (child);
3fc356dc
RB
1445 continue;
1446 }
1447 }
1448
6983e6b5 1449 oprnd_info->def_stmts = vNULL;
e403d17e 1450 children.safe_push (child);
6983e6b5
RB
1451 continue;
1452 }
1453
90dd6e3d
RB
1454 /* If the SLP build failed fatally and we analyze a basic-block
1455 simply treat nodes we fail to build as externally defined
1456 (and thus build vectors from the scalar defs).
1457 The cost model will reject outright expensive cases.
1458 ??? This doesn't treat cases where permutation ultimatively
1459 fails (or we don't try permutation below). Ideally we'd
1460 even compute a permutation that will end up with the maximum
1461 SLP tree size... */
310213d4 1462 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1463 && !matches[0]
1464 /* ??? Rejecting patterns this way doesn't work. We'd have to
1465 do extra work to cancel the pattern so the uses see the
1466 scalar version. */
7098ab48 1467 && !is_pattern_stmt_p (stmt_info)
9b75f56d
RS
1468 && !oprnd_info->any_pattern
1469 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
90dd6e3d 1470 {
bbeeac91
DM
1471 if (dump_enabled_p ())
1472 dump_printf_loc (MSG_NOTE, vect_location,
1473 "Building vector operands from scalars\n");
9f708a84 1474 this_tree_size++;
e403d17e 1475 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1476 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3 1477 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
e403d17e 1478 children.safe_push (child);
30c0d1e3 1479 oprnd_info->ops = vNULL;
e403d17e 1480 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1481 continue;
1482 }
1483
6983e6b5
RB
1484 /* If the SLP build for operand zero failed and operand zero
1485 and one can be commutated try that for the scalar stmts
1486 that failed the match. */
1487 if (i == 0
1488 /* A first scalar stmt mismatch signals a fatal mismatch. */
1489 && matches[0]
1490 /* ??? For COND_EXPRs we can swap the comparison operands
1491 as well as the arms under some constraints. */
1492 && nops == 2
1493 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1494 && is_gimple_assign (stmt_info->stmt)
2153fa7b
RB
1495 /* Swapping operands for reductions breaks assumptions later on. */
1496 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1497 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
6983e6b5
RB
1498 /* Do so only if the number of not successful permutes was nor more
1499 than a cut-ff as re-trying the recursive match on
1500 possibly each level of the tree would expose exponential
1501 behavior. */
1502 && *npermutes < 4)
1503 {
85c5e2f5
RB
1504 /* See whether we can swap the matching or the non-matching
1505 stmt operands. */
1506 bool swap_not_matching = true;
1507 do
1508 {
1509 for (j = 0; j < group_size; ++j)
1510 {
1511 if (matches[j] != !swap_not_matching)
1512 continue;
b9787581 1513 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1514 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1515 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1516 if (!stmt
85c5e2f5
RB
1517 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1518 {
1519 if (!swap_not_matching)
1520 goto fail;
1521 swap_not_matching = false;
1522 break;
1523 }
85c5e2f5
RB
1524 }
1525 }
1526 while (j != group_size);
78810bd3 1527
6983e6b5 1528 /* Swap mismatched definition stmts. */
bbeeac91
DM
1529 if (dump_enabled_p ())
1530 dump_printf_loc (MSG_NOTE, vect_location,
1531 "Re-trying with swapped operands of stmts ");
e72baed7 1532 for (j = 0; j < group_size; ++j)
85c5e2f5 1533 if (matches[j] == !swap_not_matching)
6983e6b5 1534 {
6b4db501
MM
1535 std::swap (oprnds_info[0]->def_stmts[j],
1536 oprnds_info[1]->def_stmts[j]);
30c0d1e3
RB
1537 std::swap (oprnds_info[0]->ops[j],
1538 oprnds_info[1]->ops[j]);
bbeeac91
DM
1539 if (dump_enabled_p ())
1540 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1541 }
bbeeac91
DM
1542 if (dump_enabled_p ())
1543 dump_printf (MSG_NOTE, "\n");
74574669
RB
1544 /* And try again with scratch 'matches' ... */
1545 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1546 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1547 group_size, &this_max_nunits,
5d8c32cb 1548 tem, npermutes,
9f708a84 1549 &this_tree_size, bst_map)) != NULL)
6983e6b5 1550 {
85c69b0b
RB
1551 /* If we have all children of child built up from scalars then
1552 just throw that away and build it up this node from scalars. */
30c0d1e3
RB
1553 if (is_a <bb_vec_info> (vinfo)
1554 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1555 /* ??? Rejecting patterns this way doesn't work. We'd have
1556 to do extra work to cancel the pattern so the uses see the
1557 scalar version. */
7098ab48 1558 && !oprnd_info->any_pattern)
85c69b0b
RB
1559 {
1560 unsigned int j;
1561 slp_tree grandchild;
1562
1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1564 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
85c69b0b 1565 break;
9b75f56d
RS
1566 if (!grandchild
1567 && (vect_update_all_shared_vectypes
1568 (oprnd_info->def_stmts)))
85c69b0b
RB
1569 {
1570 /* Roll back. */
e403d17e 1571 this_tree_size = old_tree_size;
85c69b0b 1572 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1573 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1574 SLP_TREE_CHILDREN (child).truncate (0);
1575
bbeeac91
DM
1576 if (dump_enabled_p ())
1577 dump_printf_loc (MSG_NOTE, vect_location,
1578 "Building parent vector operands from "
1579 "scalars instead\n");
85c69b0b 1580 oprnd_info->def_stmts = vNULL;
603cca93 1581 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1582 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1583 oprnd_info->ops = vNULL;
9f708a84 1584 ++this_tree_size;
e403d17e 1585 children.safe_push (child);
85c69b0b
RB
1586 continue;
1587 }
1588 }
1589
6983e6b5 1590 oprnd_info->def_stmts = vNULL;
e403d17e 1591 children.safe_push (child);
6983e6b5
RB
1592 continue;
1593 }
1594
1595 ++*npermutes;
1596 }
1597
78810bd3 1598fail:
e403d17e
RB
1599 gcc_assert (child == NULL);
1600 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1601 vect_free_slp_tree (child, false);
6983e6b5 1602 vect_free_oprnd_info (oprnds_info);
e403d17e 1603 return NULL;
ebfd146a
IR
1604 }
1605
e403d17e
RB
1606 vect_free_oprnd_info (oprnds_info);
1607
9f708a84 1608 *tree_size += this_tree_size + 1;
e403d17e 1609 *max_nunits = this_max_nunits;
1428105c 1610
e403d17e
RB
1611 node = vect_create_new_slp_node (stmts);
1612 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1613 SLP_TREE_CHILDREN (node).splice (children);
1614 return node;
ebfd146a
IR
1615}
1616
78c60e3d 1617/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1618
1619static void
4f5b9c80 1620vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
a1f072e2 1621 slp_tree node, hash_set<slp_tree> &visited)
ebfd146a 1622{
6c7b0df8 1623 unsigned i;
b9787581 1624 stmt_vec_info stmt_info;
d755c7ef 1625 slp_tree child;
6c7b0df8 1626 tree op;
ebfd146a 1627
a1f072e2
RB
1628 if (visited.add (node))
1629 return;
1630
3da39f52
DM
1631 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1632 dump_user_location_t user_loc = loc.get_user_location ();
f48e4da3 1633 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
6c7b0df8
RB
1634 SLP_TREE_DEF_TYPE (node) == vect_external_def
1635 ? " (external)"
1636 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1637 ? " (constant)"
1638 : ""), node,
f48e4da3 1639 estimated_poly_value (node->max_nunits));
6c7b0df8
RB
1640 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1641 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1642 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1643 else
1644 {
1645 dump_printf_loc (metadata, user_loc, "\t{ ");
1646 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1647 dump_printf (metadata, "%T%s ", op,
1648 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1649 dump_printf (metadata, "}\n");
1650 }
a1f072e2
RB
1651 if (SLP_TREE_CHILDREN (node).is_empty ())
1652 return;
3da39f52 1653 dump_printf_loc (metadata, user_loc, "\tchildren");
9771b263 1654 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2
RB
1655 dump_printf (dump_kind, " %p", (void *)child);
1656 dump_printf (dump_kind, "\n");
1657 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1658 vect_print_slp_tree (dump_kind, loc, child, visited);
ebfd146a
IR
1659}
1660
a1f072e2
RB
1661static void
1662vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1663 slp_tree node)
1664{
1665 hash_set<slp_tree> visited;
1666 vect_print_slp_tree (dump_kind, loc, node, visited);
1667}
ebfd146a 1668
6c7e3b1f 1669/* Mark the tree rooted at NODE with PURE_SLP. */
ebfd146a
IR
1670
1671static void
6c7e3b1f 1672vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
ebfd146a
IR
1673{
1674 int i;
b9787581 1675 stmt_vec_info stmt_info;
d755c7ef 1676 slp_tree child;
ebfd146a 1677
603cca93 1678 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1679 return;
1680
4bfcf879
RB
1681 if (visited.add (node))
1682 return;
1683
b9787581 1684 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6c7e3b1f 1685 STMT_SLP_TYPE (stmt_info) = pure_slp;
ebfd146a 1686
9771b263 1687 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6c7e3b1f 1688 vect_mark_slp_stmts (child, visited);
ebfd146a
IR
1689}
1690
4bfcf879 1691static void
6c7e3b1f 1692vect_mark_slp_stmts (slp_tree node)
4bfcf879
RB
1693{
1694 hash_set<slp_tree> visited;
6c7e3b1f 1695 vect_mark_slp_stmts (node, visited);
4bfcf879 1696}
ebfd146a 1697
a70d6342
IR
1698/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1699
1700static void
4bfcf879 1701vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
a70d6342
IR
1702{
1703 int i;
a70d6342 1704 stmt_vec_info stmt_info;
d755c7ef 1705 slp_tree child;
a70d6342 1706
603cca93 1707 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1708 return;
1709
4bfcf879
RB
1710 if (visited.add (node))
1711 return;
1712
b9787581 1713 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1714 {
b8698a0f 1715 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1716 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1717 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1718 }
1719
9771b263 1720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879
RB
1721 vect_mark_slp_stmts_relevant (child, visited);
1722}
1723
1724static void
1725vect_mark_slp_stmts_relevant (slp_tree node)
1726{
1727 hash_set<slp_tree> visited;
1728 vect_mark_slp_stmts_relevant (node, visited);
a70d6342
IR
1729}
1730
1731
b5aeb3bb
IR
1732/* Rearrange the statements of NODE according to PERMUTATION. */
1733
1734static void
1735vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
a1f072e2
RB
1736 vec<unsigned> permutation,
1737 hash_set<slp_tree> &visited)
b5aeb3bb 1738{
d755c7ef
RB
1739 unsigned int i;
1740 slp_tree child;
b5aeb3bb 1741
a1f072e2
RB
1742 if (visited.add (node))
1743 return;
1744
9771b263 1745 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2 1746 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
b5aeb3bb 1747
30c0d1e3
RB
1748 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1749 {
1750 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1751 vec<stmt_vec_info> tmp_stmts;
1752 tmp_stmts.create (group_size);
1753 tmp_stmts.quick_grow (group_size);
1754 stmt_vec_info stmt_info;
1755 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1756 tmp_stmts[permutation[i]] = stmt_info;
1757 SLP_TREE_SCALAR_STMTS (node).release ();
1758 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1759 }
1760 if (SLP_TREE_SCALAR_OPS (node).exists ())
1761 {
1762 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1763 vec<tree> tmp_ops;
1764 tmp_ops.create (group_size);
1765 tmp_ops.quick_grow (group_size);
1766 tree op;
1767 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1768 tmp_ops[permutation[i]] = op;
1769 SLP_TREE_SCALAR_OPS (node).release ();
1770 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1771 }
b5aeb3bb
IR
1772}
1773
1774
b266b968
RB
1775/* Attempt to reorder stmts in a reduction chain so that we don't
1776 require any load permutation. Return true if that was possible,
1777 otherwise return false. */
1778
1779static bool
1780vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1781{
1782 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1783 unsigned int i, j;
b266b968
RB
1784 unsigned int lidx;
1785 slp_tree node, load;
1786
1787 /* Compare all the permutation sequences to the first one. We know
1788 that at least one load is permuted. */
1789 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1790 if (!node->load_permutation.exists ())
1791 return false;
1792 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1793 {
1794 if (!load->load_permutation.exists ())
1795 return false;
1796 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1797 if (lidx != node->load_permutation[j])
1798 return false;
1799 }
1800
1801 /* Check that the loads in the first sequence are different and there
1802 are no gaps between them. */
7ba9e72d 1803 auto_sbitmap load_index (group_size);
b266b968
RB
1804 bitmap_clear (load_index);
1805 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1806 {
41eefe13 1807 if (lidx >= group_size)
7ba9e72d 1808 return false;
b266b968 1809 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1810 return false;
1811
b266b968
RB
1812 bitmap_set_bit (load_index, lidx);
1813 }
1814 for (i = 0; i < group_size; i++)
1815 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1816 return false;
b266b968
RB
1817
1818 /* This permutation is valid for reduction. Since the order of the
1819 statements in the nodes is not important unless they are memory
1820 accesses, we can rearrange the statements in all the nodes
1821 according to the order of the loads. */
a1f072e2 1822 hash_set<slp_tree> visited;
b266b968 1823 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
a1f072e2 1824 node->load_permutation, visited);
b266b968
RB
1825
1826 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1827 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1829 {
b9787581 1830 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1831 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1832 /* But we have to keep those permutations that are required because
1833 of handling of gaps. */
d9f21f6a 1834 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1835 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1836 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1837 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1838 else
1839 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1840 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1841 }
1842
b266b968
RB
1843 return true;
1844}
1845
5d8c32cb
RB
1846/* Gather loads in the SLP graph NODE and populate the INST loads array. */
1847
1848static void
1849vect_gather_slp_loads (slp_instance inst, slp_tree node,
1850 hash_set<slp_tree> &visited)
1851{
1852 if (visited.add (node))
1853 return;
1854
1855 if (SLP_TREE_CHILDREN (node).length () == 0)
1856 {
30c0d1e3
RB
1857 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1858 return;
5d8c32cb 1859 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
30c0d1e3 1860 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
5d8c32cb
RB
1861 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1862 SLP_INSTANCE_LOADS (inst).safe_push (node);
1863 }
1864 else
1865 {
1866 unsigned i;
1867 slp_tree child;
1868 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1869 vect_gather_slp_loads (inst, child, visited);
1870 }
1871}
1872
1873static void
1874vect_gather_slp_loads (slp_instance inst, slp_tree node)
1875{
1876 hash_set<slp_tree> visited;
1877 vect_gather_slp_loads (inst, node, visited);
1878}
1879
01d8bf07
RB
1880/* Check if the required load permutations in the SLP instance
1881 SLP_INSTN are supported. */
ebfd146a
IR
1882
1883static bool
01d8bf07 1884vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1885{
01d8bf07
RB
1886 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1887 unsigned int i, j, k, next;
6983e6b5 1888 slp_tree node;
ebfd146a 1889
73fbfcad 1890 if (dump_enabled_p ())
ebfd146a 1891 {
78c60e3d 1892 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1893 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1894 if (node->load_permutation.exists ())
1895 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1896 dump_printf (MSG_NOTE, "%d ", next);
1897 else
bddc974e
TJ
1898 for (k = 0; k < group_size; ++k)
1899 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1900 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1901 }
1902
b5aeb3bb
IR
1903 /* In case of reduction every load permutation is allowed, since the order
1904 of the reduction statements is not important (as opposed to the case of
0d0293ac 1905 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1906 load nodes are of the same size and have the same permutation (and then
1907 rearrange all the nodes of the SLP instance according to this
1908 permutation). */
1909
1910 /* Check that all the load nodes are of the same size. */
01d8bf07 1911 /* ??? Can't we assert this? */
9771b263 1912 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1913 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1914 return false;
2200fc49 1915
b5aeb3bb 1916 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1917 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1918
b010117a 1919 /* Reduction (there are no data-refs in the root).
b266b968 1920 In reduction chain the order of the loads is not important. */
b9787581
RS
1921 if (!STMT_VINFO_DATA_REF (stmt_info)
1922 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1923 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1924
6aa904c4
IR
1925 /* In basic block vectorization we allow any subchain of an interleaving
1926 chain.
1927 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1928 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1929 {
240a94da
RB
1930 /* Check whether the loads in an instance form a subchain and thus
1931 no permutation is necessary. */
9771b263 1932 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1933 {
9626d143
RB
1934 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1935 continue;
240a94da 1936 bool subchain_p = true;
bffb8014 1937 stmt_vec_info next_load_info = NULL;
b9787581
RS
1938 stmt_vec_info load_info;
1939 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1940 {
1941 if (j != 0
bffb8014 1942 && (next_load_info != load_info
b9787581 1943 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1944 {
1945 subchain_p = false;
1946 break;
1947 }
bffb8014 1948 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1949 }
240a94da
RB
1950 if (subchain_p)
1951 SLP_TREE_LOAD_PERMUTATION (node).release ();
1952 else
1953 {
b9787581 1954 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1955 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1956 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1957 unsigned k, maxk = 0;
1958 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1959 if (k > maxk)
1960 maxk = k;
1961 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1962 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1963 tree vectype = STMT_VINFO_VECTYPE (group_info);
1964 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1965 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332 1966 {
bbeeac91
DM
1967 if (dump_enabled_p ())
1968 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1969 "BB vectorization with gaps at the end of "
1970 "a load is not supported\n");
fe73a332
RB
1971 return false;
1972 }
1973
240a94da
RB
1974 /* Verify the permutation can be generated. */
1975 vec<tree> tem;
29afecdf 1976 unsigned n_perms;
240a94da 1977 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1978 1, slp_instn, true, &n_perms))
240a94da 1979 {
bbeeac91
DM
1980 if (dump_enabled_p ())
1981 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1982 vect_location,
1983 "unsupported load permutation\n");
240a94da
RB
1984 return false;
1985 }
1986 }
6aa904c4 1987 }
01d8bf07 1988 return true;
6aa904c4
IR
1989 }
1990
31bee964
RB
1991 /* For loop vectorization verify we can generate the permutation. Be
1992 conservative about the vectorization factor, there are permutations
1993 that will use three vector inputs only starting from a specific factor
1994 and the vectorization factor is not yet final.
1995 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1996 unsigned n_perms;
d9f21f6a
RS
1997 poly_uint64 test_vf
1998 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1999 LOOP_VINFO_VECT_FACTOR
b9787581 2000 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
2001 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
2002 if (node->load_permutation.exists ()
31bee964
RB
2003 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
2004 slp_instn, true, &n_perms))
01d8bf07 2005 return false;
9b999e8c 2006
01d8bf07 2007 return true;
ebfd146a
IR
2008}
2009
2010
e4a707c4 2011/* Find the last store in SLP INSTANCE. */
ff802fa1 2012
95c68311 2013stmt_vec_info
2e8ab70c 2014vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 2015{
95c68311 2016 stmt_vec_info last = NULL;
b9787581 2017 stmt_vec_info stmt_vinfo;
e4a707c4 2018
b9787581 2019 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 2020 {
211cd1e2 2021 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 2022 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 2023 }
e4a707c4 2024
2e8ab70c 2025 return last;
e4a707c4
IR
2026}
2027
82570274
RS
2028/* Splits a group of stores, currently beginning at FIRST_VINFO, into
2029 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2030 (also containing the first GROUP1_SIZE stmts, since stores are
2031 consecutive), the second containing the remainder.
1ba91a49
AL
2032 Return the first stmt in the second group. */
2033
82570274
RS
2034static stmt_vec_info
2035vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 2036{
bffb8014 2037 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 2038 gcc_assert (group1_size > 0);
2c53b149 2039 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 2040 gcc_assert (group2_size > 0);
2c53b149 2041 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 2042
bffb8014 2043 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
2044 for (unsigned i = group1_size; i > 1; i--)
2045 {
bffb8014
RS
2046 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2047 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
2048 }
2049 /* STMT is now the last element of the first group. */
bffb8014
RS
2050 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2051 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 2052
bffb8014
RS
2053 DR_GROUP_SIZE (group2) = group2_size;
2054 for (stmt_info = group2; stmt_info;
2055 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 2056 {
bffb8014
RS
2057 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2058 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
2059 }
2060
2c53b149 2061 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 2062 plus skipping over the first vector. */
bffb8014 2063 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 2064
2c53b149
RB
2065 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2066 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
2067
2068 if (dump_enabled_p ())
2069 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2070 group1_size, group2_size);
2071
2072 return group2;
2073}
2074
4b6068ea
RS
2075/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2076 statements and a vector of NUNITS elements. */
2077
2078static poly_uint64
2079calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2080{
2081 return exact_div (common_multiple (nunits, group_size), group_size);
2082}
2083
0d0293ac 2084/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 2085 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
2086 Return FALSE if it's impossible to SLP any stmt in the loop. */
2087
2088static bool
310213d4 2089vect_analyze_slp_instance (vec_info *vinfo,
10a73df7 2090 scalar_stmts_to_slp_tree_map_t *bst_map,
32e8e429 2091 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
2092{
2093 slp_instance new_instance;
d092494c 2094 slp_tree node;
2c53b149 2095 unsigned int group_size;
b5aeb3bb 2096 tree vectype, scalar_type = NULL_TREE;
1ba91a49 2097 unsigned int i;
b9787581
RS
2098 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2099 vec<stmt_vec_info> scalar_stmts;
818b3293 2100 bool constructor = false;
b5aeb3bb 2101
b9787581 2102 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 2103 {
2c53b149 2104 scalar_type = TREE_TYPE (DR_REF (dr));
b9787581 2105 group_size = DR_GROUP_SIZE (stmt_info);
9b75f56d 2106 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2c53b149 2107 }
b9787581 2108 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2109 {
2110 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
2111 vectype = STMT_VINFO_VECTYPE (stmt_info);
2112 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb 2113 }
818b3293
JH
2114 else if (is_gimple_assign (stmt_info->stmt)
2115 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2116 {
2117 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2118 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2119 constructor = true;
2120 }
b5aeb3bb
IR
2121 else
2122 {
310213d4 2123 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 2124 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 2125 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 2126 }
b8698a0f 2127
ebfd146a
IR
2128 if (!vectype)
2129 {
73fbfcad 2130 if (dump_enabled_p ())
3c2a8ed0
DM
2131 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2132 "Build SLP failed: unsupported data-type %T\n",
2133 scalar_type);
b5aeb3bb 2134
ebfd146a
IR
2135 return false;
2136 }
4b6068ea 2137 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 2138
0d0293ac 2139 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 2140 scalar_stmts.create (group_size);
bffb8014 2141 stmt_vec_info next_info = stmt_info;
b9787581 2142 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 2143 {
b5aeb3bb 2144 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 2145 while (next_info)
b5aeb3bb 2146 {
6e6b18e5 2147 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2148 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
2149 }
2150 }
b9787581 2151 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2152 {
2153 /* Collect the reduction stmts and store them in
2154 SLP_TREE_SCALAR_STMTS. */
bffb8014 2155 while (next_info)
2c53b149 2156 {
6e6b18e5 2157 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2158 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 2159 }
14a61437
RB
2160 /* Mark the first element of the reduction chain as reduction to properly
2161 transform the node. In the reduction analysis phase only the last
2162 element of the chain is marked as reduction. */
b4673569
RB
2163 STMT_VINFO_DEF_TYPE (stmt_info)
2164 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
ef6e6914
RB
2165 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2166 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
b5aeb3bb 2167 }
818b3293
JH
2168 else if (constructor)
2169 {
2170 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2171 tree val;
2172 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2173 {
2174 if (TREE_CODE (val) == SSA_NAME)
2175 {
2176 gimple* def = SSA_NAME_DEF_STMT (val);
2177 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2178 /* Value is defined in another basic block. */
2179 if (!def_info)
2180 return false;
2181 scalar_stmts.safe_push (def_info);
2182 }
2183 else
2184 return false;
2185 }
2186 }
b5aeb3bb
IR
2187 else
2188 {
2189 /* Collect reduction statements. */
32c91dfc
RS
2190 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2191 for (i = 0; reductions.iterate (i, &next_info); i++)
2192 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2193 }
2194
ebfd146a 2195 /* Build the tree for the SLP instance. */
89d390e5
RB
2196 bool *matches = XALLOCAVEC (bool, group_size);
2197 unsigned npermutes = 0;
4b6068ea 2198 poly_uint64 max_nunits = nunits;
9f708a84 2199 unsigned tree_size = 0;
e569db5f 2200 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
5d8c32cb 2201 &max_nunits, matches, &npermutes,
9f708a84 2202 &tree_size, bst_map);
e569db5f 2203 if (node != NULL)
ebfd146a 2204 {
b4673569
RB
2205 /* If this is a reduction chain with a conversion in front
2206 amend the SLP tree with a node for that. */
2207 if (!dr
2208 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2209 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2210 {
2211 /* Get at the conversion stmt - we know it's the single use
2212 of the last stmt of the reduction chain. */
2213 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2214 use_operand_p use_p;
2215 gimple *use_stmt;
2216 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2217 gcc_assert (r);
2218 next_info = vinfo->lookup_stmt (use_stmt);
2219 next_info = vect_stmt_to_vectorize (next_info);
2220 scalar_stmts = vNULL;
2221 scalar_stmts.create (group_size);
2222 for (unsigned i = 0; i < group_size; ++i)
2223 scalar_stmts.quick_push (next_info);
2224 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2225 SLP_TREE_CHILDREN (conv).quick_push (node);
2226 node = conv;
2227 /* We also have to fake this conversion stmt as SLP reduction group
2228 so we don't have to mess with too much code elsewhere. */
2229 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2230 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2231 }
2232
4ef69dfc 2233 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2234 poly_uint64 unrolling_factor
4b6068ea 2235 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2236
d9f21f6a 2237 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2238 && is_a <bb_vec_info> (vinfo))
2239 {
4b6068ea
RS
2240 unsigned HOST_WIDE_INT const_max_nunits;
2241 if (!max_nunits.is_constant (&const_max_nunits)
2242 || const_max_nunits > group_size)
2243 {
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Build SLP failed: store group "
2247 "size not a multiple of the vector size "
2248 "in basic block SLP\n");
6e2dd807 2249 vect_free_slp_tree (node, false);
4b6068ea
RS
2250 return false;
2251 }
e569db5f 2252 /* Fatal mismatch. */
4b6068ea 2253 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2254 vect_free_slp_tree (node, false);
e569db5f
VK
2255 }
2256 else
2257 {
a1f072e2 2258 /* Create a new SLP instance. */
99b1c316 2259 new_instance = XNEW (class _slp_instance);
a1f072e2
RB
2260 SLP_INSTANCE_TREE (new_instance) = node;
2261 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2262 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
5d8c32cb 2263 SLP_INSTANCE_LOADS (new_instance) = vNULL;
818b3293
JH
2264 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2265
5d8c32cb 2266 vect_gather_slp_loads (new_instance, node);
9f708a84
RB
2267 if (dump_enabled_p ())
2268 dump_printf_loc (MSG_NOTE, vect_location,
2269 "SLP size %u vs. limit %u.\n",
2270 tree_size, max_tree_size);
a1f072e2
RB
2271
2272 /* Compute the load permutation. */
2273 slp_tree load_node;
2274 bool loads_permuted = false;
5d8c32cb 2275 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
abf9bfbc 2276 {
a1f072e2
RB
2277 vec<unsigned> load_permutation;
2278 int j;
2279 stmt_vec_info load_info;
2280 bool this_load_permuted = false;
2281 load_permutation.create (group_size);
2282 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2283 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2284 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2285 {
2286 int load_place = vect_get_place_in_interleaving_chain
2287 (load_info, first_stmt_info);
2288 gcc_assert (load_place != -1);
2289 if (load_place != j)
2290 this_load_permuted = true;
2291 load_permutation.safe_push (load_place);
2292 }
2293 if (!this_load_permuted
2294 /* The load requires permutation when unrolling exposes
2295 a gap either because the group is larger than the SLP
2296 group-size or because there is a gap between the groups. */
2297 && (known_eq (unrolling_factor, 1U)
2298 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2299 && DR_GROUP_GAP (first_stmt_info) == 0)))
2300 {
2301 load_permutation.release ();
2302 continue;
2303 }
2304 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2305 loads_permuted = true;
abf9bfbc 2306 }
a1f072e2
RB
2307
2308 if (loads_permuted)
01d8bf07 2309 {
a1f072e2
RB
2310 if (!vect_supported_load_permutation_p (new_instance))
2311 {
2312 if (dump_enabled_p ())
2313 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2314 "Build SLP failed: unsupported load "
2315 "permutation %G", stmt_info->stmt);
2316 vect_free_slp_instance (new_instance, false);
2317 return false;
2318 }
01d8bf07 2319 }
ebfd146a 2320
e569db5f 2321 /* If the loads and stores can be handled with load/store-lan
a1f072e2
RB
2322 instructions do not generate this SLP instance. */
2323 if (is_a <loop_vec_info> (vinfo)
2324 && loads_permuted
2325 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7 2326 {
a1f072e2 2327 slp_tree load_node;
5d8c32cb 2328 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
a1f072e2
RB
2329 {
2330 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2331 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2332 /* Use SLP for strided accesses (or if we can't load-lanes). */
2333 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2334 || ! vect_load_lanes_supported
2335 (STMT_VINFO_VECTYPE (stmt_vinfo),
2336 DR_GROUP_SIZE (stmt_vinfo), false))
2337 break;
2338 }
5d8c32cb 2339 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
a1f072e2
RB
2340 {
2341 if (dump_enabled_p ())
2342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2343 "Built SLP cancelled: can use "
2344 "load/store-lanes\n");
2345 vect_free_slp_instance (new_instance, false);
2346 return false;
2347 }
bb0f5ca7 2348 }
a1f072e2
RB
2349
2350 vinfo->slp_instances.safe_push (new_instance);
2351
2352 if (dump_enabled_p ())
bb0f5ca7 2353 {
a1f072e2
RB
2354 dump_printf_loc (MSG_NOTE, vect_location,
2355 "Final SLP tree for instance:\n");
2356 vect_print_slp_tree (MSG_NOTE, vect_location, node);
bb0f5ca7 2357 }
bb0f5ca7 2358
a1f072e2 2359 return true;
c2a12ca0 2360 }
e569db5f
VK
2361 }
2362 else
2363 {
a1f072e2
RB
2364 /* Failed to SLP. */
2365 /* Free the allocated memory. */
2366 scalar_stmts.release ();
e569db5f 2367 }
b8698a0f 2368
1ba91a49 2369 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2370 vector size. */
4b6068ea 2371 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2372 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2373 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2374 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2375 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2376 {
2377 /* We consider breaking the group only on VF boundaries from the existing
2378 start. */
2379 for (i = 0; i < group_size; i++)
2380 if (!matches[i]) break;
2381
4b6068ea 2382 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2383 {
2384 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2385 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2386 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2387
82570274
RS
2388 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2389 group1_size);
10a73df7 2390 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
86a91c0a 2391 max_tree_size);
1ba91a49
AL
2392 /* If the first non-match was in the middle of a vector,
2393 skip the rest of that vector. */
2394 if (group1_size < i)
2395 {
4b6068ea 2396 i = group1_size + const_nunits;
1ba91a49 2397 if (i < group_size)
4b6068ea 2398 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2399 }
2400 if (i < group_size)
10a73df7
RB
2401 res |= vect_analyze_slp_instance (vinfo, bst_map,
2402 rest, max_tree_size);
1ba91a49
AL
2403 return res;
2404 }
2405 /* Even though the first vector did not all match, we might be able to SLP
2406 (some) of the remainder. FORNOW ignore this possibility. */
2407 }
2408
a70d6342 2409 return false;
ebfd146a
IR
2410}
2411
2412
ff802fa1 2413/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2414 trees of packed scalar stmts if SLP is possible. */
2415
f4ebbd24 2416opt_result
310213d4 2417vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2418{
2419 unsigned int i;
f698fccf 2420 stmt_vec_info first_element;
ebfd146a 2421
adac3a68 2422 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2423
10a73df7
RB
2424 scalar_stmts_to_slp_tree_map_t *bst_map
2425 = new scalar_stmts_to_slp_tree_map_t ();
2426
0d0293ac 2427 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2428 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
10a73df7 2429 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
ebfd146a 2430
310213d4 2431 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2432 {
310213d4
RB
2433 if (loop_vinfo->reduction_chains.length () > 0)
2434 {
2435 /* Find SLP sequences starting from reduction chains. */
2436 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
10a73df7 2437 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
310213d4 2438 max_tree_size))
6b5e165b
RB
2439 {
2440 /* Dissolve reduction chain group. */
f698fccf 2441 stmt_vec_info vinfo = first_element;
0214d31a 2442 stmt_vec_info last = NULL;
f698fccf 2443 while (vinfo)
6b5e165b 2444 {
bffb8014 2445 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2446 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2447 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
0214d31a 2448 last = vinfo;
f698fccf 2449 vinfo = next;
6b5e165b 2450 }
f698fccf 2451 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
0214d31a
RB
2452 /* It can be still vectorized as part of an SLP reduction. */
2453 loop_vinfo->reductions.safe_push (last);
6b5e165b 2454 }
310213d4 2455 }
b010117a 2456
310213d4 2457 /* Find SLP sequences starting from groups of reductions. */
0630a4ec 2458 if (loop_vinfo->reductions.length () > 1)
10a73df7 2459 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
0630a4ec 2460 max_tree_size);
310213d4 2461 }
b5aeb3bb 2462
10a73df7
RB
2463 /* The map keeps a reference on SLP nodes built, release that. */
2464 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2465 it != bst_map->end (); ++it)
2466 if ((*it).second)
2467 vect_free_slp_tree ((*it).second, false);
2468 delete bst_map;
2469
f4ebbd24 2470 return opt_result::success ();
ebfd146a
IR
2471}
2472
2473
2474/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2475 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2476 least one instance. */
ebfd146a 2477
437f4a00 2478bool
ebfd146a
IR
2479vect_make_slp_decision (loop_vec_info loop_vinfo)
2480{
d9f21f6a
RS
2481 unsigned int i;
2482 poly_uint64 unrolling_factor = 1;
9771b263 2483 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2484 slp_instance instance;
2485 int decided_to_slp = 0;
2486
adac3a68 2487 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2488
9771b263 2489 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2490 {
2491 /* FORNOW: SLP if you can. */
1c84a2d2
RS
2492 /* All unroll factors have the form:
2493
2494 GET_MODE_SIZE (vinfo->vector_mode) * X
2495
2496 for some rational X, so they must have a common multiple. */
d9f21f6a
RS
2497 unrolling_factor
2498 = force_common_multiple (unrolling_factor,
2499 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2500
ff802fa1 2501 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2502 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2503 loop-based vectorization. Such stmts will be marked as HYBRID. */
6c7e3b1f 2504 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
ebfd146a
IR
2505 decided_to_slp++;
2506 }
2507
2508 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2509
73fbfcad 2510 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2511 {
2512 dump_printf_loc (MSG_NOTE, vect_location,
2513 "Decided to SLP %d instances. Unrolling factor ",
2514 decided_to_slp);
2515 dump_dec (MSG_NOTE, unrolling_factor);
2516 dump_printf (MSG_NOTE, "\n");
2517 }
437f4a00
IR
2518
2519 return (decided_to_slp > 0);
ebfd146a
IR
2520}
2521
2522
2523/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2524 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2525
2526static void
4bfcf879 2527vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
6c7e3b1f 2528 hash_map<slp_tree, unsigned> &visited)
ebfd146a 2529{
b9787581 2530 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2531 imm_use_iterator imm_iter;
355fe088 2532 gimple *use_stmt;
b9787581 2533 stmt_vec_info use_vinfo;
d755c7ef 2534 slp_tree child;
f2c74cc4 2535 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2536 int j;
2537
6c7e3b1f
RB
2538 /* We need to union stype over the incoming graph edges but we still
2539 want to limit recursion to stay O(N+E). */
2540 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
4bfcf879 2541
642fce57
RB
2542 /* Propagate hybrid down the SLP tree. */
2543 if (stype == hybrid)
2544 ;
2545 else if (HYBRID_SLP_STMT (stmt_vinfo))
2546 stype = hybrid;
6c7e3b1f 2547 else if (!only_edge)
642fce57
RB
2548 {
2549 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2550 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2551 /* If we get a pattern stmt here we have to use the LHS of the
2552 original stmt for immediate uses. */
211cd1e2 2553 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
e7baeb39
RB
2554 tree def;
2555 if (gimple_code (stmt) == GIMPLE_PHI)
2556 def = gimple_phi_result (stmt);
2557 else
2558 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2559 if (def)
2560 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2561 {
6585ff8f
RS
2562 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2563 if (!use_vinfo)
29764870 2564 continue;
6e6b18e5 2565 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
29764870
RB
2566 if (!STMT_SLP_TYPE (use_vinfo)
2567 && (STMT_VINFO_RELEVANT (use_vinfo)
2568 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2569 && !(gimple_code (use_stmt) == GIMPLE_PHI
2570 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2571 {
2572 if (dump_enabled_p ())
3c2a8ed0
DM
2573 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2574 "def in non-SLP stmt: %G", use_stmt);
502f0263
RB
2575 stype = hybrid;
2576 }
29764870 2577 }
642fce57 2578 }
ebfd146a 2579
502f0263
RB
2580 if (stype == hybrid
2581 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2582 {
2583 if (dump_enabled_p ())
3c2a8ed0
DM
2584 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2585 stmt_vinfo->stmt);
b1af7da6
RB
2586 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2587 }
ebfd146a 2588
6c7e3b1f
RB
2589 if (!only_edge)
2590 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2591 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2592 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
6c7e3b1f 2593 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
4bfcf879
RB
2594}
2595
642fce57 2596/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2597
642fce57
RB
2598static tree
2599vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2600{
2601 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2602 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2603
2604 if (wi->is_lhs)
2605 return NULL_TREE;
2606
c98d0595
RS
2607 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2608 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2609 {
6585ff8f 2610 if (dump_enabled_p ())
3c2a8ed0
DM
2611 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2612 def_stmt_info->stmt);
6585ff8f 2613 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2614 }
2615
2616 return NULL_TREE;
ebfd146a
IR
2617}
2618
642fce57
RB
2619static tree
2620vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2621 walk_stmt_info *wi)
642fce57 2622{
6585ff8f
RS
2623 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2624 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2625 /* If the stmt is in a SLP instance then this isn't a reason
2626 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2627 if (! STMT_SLP_TYPE (use_vinfo)
2628 && (STMT_VINFO_RELEVANT (use_vinfo)
2629 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2630 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2631 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2632 ;
2633 else
642fce57
RB
2634 *handled = true;
2635 return NULL_TREE;
2636}
ebfd146a
IR
2637
2638/* Find stmts that must be both vectorized and SLPed. */
2639
2640void
2641vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2642{
2643 unsigned int i;
9771b263 2644 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2645 slp_instance instance;
2646
adac3a68 2647 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2648
642fce57
RB
2649 /* First walk all pattern stmt in the loop and mark defs of uses as
2650 hybrid because immediate uses in them are not recorded. */
2651 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2652 {
2653 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2654 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2655 gsi_next (&gsi))
2656 {
355fe088 2657 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2658 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2659 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2660 {
2661 walk_stmt_info wi;
2662 memset (&wi, 0, sizeof (wi));
6585ff8f 2663 wi.info = loop_vinfo;
642fce57 2664 gimple_stmt_iterator gsi2
10681ce8 2665 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2666 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2667 vect_detect_hybrid_slp_1, &wi);
2668 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2669 vect_detect_hybrid_slp_2,
2670 vect_detect_hybrid_slp_1, &wi);
2671 }
2672 }
2673 }
2674
2675 /* Then walk the SLP instance trees marking stmts with uses in
2676 non-SLP stmts as hybrid, also propagating hybrid down the
2677 SLP tree, collecting the above info on-the-fly. */
10a73df7 2678 hash_map<slp_tree, unsigned> visited;
9771b263 2679 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2680 {
2681 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2682 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
10a73df7 2683 i, pure_slp, visited);
642fce57 2684 }
ebfd146a
IR
2685}
2686
a70d6342 2687
2c515559
RS
2688/* Initialize a bb_vec_info struct for the statements between
2689 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2690
2691_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2692 gimple_stmt_iterator region_end_in,
2693 vec_info_shared *shared)
2694 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2695 bb (gsi_bb (region_begin_in)),
2696 region_begin (region_begin_in),
2697 region_end (region_end_in)
a70d6342 2698{
a70d6342
IR
2699 gimple_stmt_iterator gsi;
2700
61d371eb
RB
2701 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2702 gsi_next (&gsi))
a70d6342 2703 {
355fe088 2704 gimple *stmt = gsi_stmt (gsi);
a70d6342 2705 gimple_set_uid (stmt, 0);
4fbeb363 2706 add_stmt (stmt);
a70d6342
IR
2707 }
2708
2c515559 2709 bb->aux = this;
a70d6342
IR
2710}
2711
2712
2713/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2714 stmts in the basic block. */
2715
2c515559 2716_bb_vec_info::~_bb_vec_info ()
a70d6342 2717{
2c515559
RS
2718 for (gimple_stmt_iterator si = region_begin;
2719 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2720 /* Reset region marker. */
2721 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2722
2c515559 2723 bb->aux = NULL;
a70d6342
IR
2724}
2725
15944069
RS
2726/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2727 given then that child nodes have already been processed, and that
2728 their def types currently match their SLP node's def type. */
a70d6342
IR
2729
2730static bool
15944069
RS
2731vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2732 slp_instance node_instance,
2733 stmt_vector_for_cost *cost_vec)
a70d6342 2734{
b9787581 2735 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2736 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2737
2738 /* For BB vectorization vector types are assigned here.
2739 Memory accesses already got their vector type assigned
2740 in vect_analyze_data_refs. */
2741 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
9b75f56d 2742 if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node)
a70d6342 2743 {
9b75f56d
RS
2744 tree vectype = vect_get_mask_type_for_stmt (stmt_info, node);
2745 if (!vectype)
2746 /* vect_get_mask_type_for_stmt has already explained the
2747 failure. */
2748 return false;
bd2f172f 2749
b9787581 2750 stmt_vec_info sstmt_info;
15944069 2751 unsigned int i;
b9787581
RS
2752 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2753 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2754 }
2755
8b7e9dba
RS
2756 /* Calculate the number of vector statements to be created for the
2757 scalar stmts in this node. For SLP reductions it is equal to the
2758 number of vector statements in the children (which has already been
2759 calculated by the recursive call). Otherwise it is the number of
2c53b149 2760 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2761 VF divided by the number of elements in a vector. */
2c53b149
RB
2762 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2763 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
30c0d1e3
RB
2764 {
2765 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2766 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2767 {
2768 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2769 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2770 break;
2771 }
2772 }
8b7e9dba
RS
2773 else
2774 {
d9f21f6a 2775 poly_uint64 vf;
8b7e9dba
RS
2776 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2777 vf = loop_vinfo->vectorization_factor;
2778 else
2779 vf = 1;
2780 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2782 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2783 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2784 }
2785
15944069 2786 bool dummy;
86a91c0a 2787 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2788}
2789
60838d63
RS
2790/* Try to build NODE from scalars, returning true on success.
2791 NODE_INSTANCE is the SLP instance that contains NODE. */
2792
2793static bool
2794vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2795 slp_instance node_instance)
2796{
2797 stmt_vec_info stmt_info;
2798 unsigned int i;
2799
2800 if (!is_a <bb_vec_info> (vinfo)
2801 || node == SLP_INSTANCE_TREE (node_instance)
2802 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2803 return false;
2804
2805 if (dump_enabled_p ())
2806 dump_printf_loc (MSG_NOTE, vect_location,
2807 "Building vector operands from scalars instead\n");
2808
2809 /* Don't remove and free the child nodes here, since they could be
2810 referenced by other structures. The analysis and scheduling phases
2811 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2812 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2813 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2814 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2815 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2816 {
2817 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2818 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2819 }
2820 return true;
2821}
2822
15944069
RS
2823/* Analyze statements contained in SLP tree NODE after recursively analyzing
2824 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2825
2826 Return true if the operations are supported. */
2827
2828static bool
2829vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2830 slp_instance node_instance,
10a73df7
RB
2831 hash_set<slp_tree> &visited,
2832 hash_set<slp_tree> &lvisited,
15944069
RS
2833 stmt_vector_for_cost *cost_vec)
2834{
2835 int i, j;
2836 slp_tree child;
2837
2838 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2839 return true;
2840
2841 /* If we already analyzed the exact same set of scalar stmts we're done.
10a73df7
RB
2842 We share the generated vector stmts for those.
2843 The SLP graph is acyclic so not caching whether we failed or succeeded
15944069
RS
2844 doesn't result in any issue since we throw away the lvisited set
2845 when we fail. */
10a73df7
RB
2846 if (visited.contains (node)
2847 || lvisited.add (node))
2848 return true;
15944069
RS
2849
2850 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2851 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2852 visited, lvisited, cost_vec))
2853 return false;
2854
f7b94dec
RB
2855 /* ??? We have to catch the case late where two first scalar stmts appear
2856 in multiple SLP children with different def type and fail. Remember
2857 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2858 match it when that is vect_internal_def. */
2859 auto_vec<vect_def_type, 4> dt;
2860 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2861 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2862 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2863 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
f7b94dec 2864
bd2f172f
RB
2865 /* Push SLP node def-type to stmt operands. */
2866 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2867 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2868 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
b9787581 2869 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2870 = SLP_TREE_DEF_TYPE (child);
f7b94dec
RB
2871
2872 /* Check everything worked out. */
2873 bool res = true;
bd2f172f 2874 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2875 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2876 {
2877 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2878 {
2879 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2880 != SLP_TREE_DEF_TYPE (child))
2881 res = false;
2882 }
2883 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2884 != dt[j])
2885 res = false;
2886 }
f7b94dec
RB
2887 if (!res && dump_enabled_p ())
2888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2889 "not vectorized: same operand with different "
2890 "def type in stmt.\n");
bd2f172f 2891
f7b94dec
RB
2892 if (res)
2893 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2894 cost_vec);
2895
2896 /* Restore def-types. */
2897 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2898 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2899 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
f7b94dec 2900
60838d63
RS
2901 /* If this node can't be vectorized, try pruning the tree here rather
2902 than felling the whole thing. */
2903 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2904 res = true;
2905
f7b94dec 2906 return res;
a70d6342
IR
2907}
2908
2909
8b7e9dba 2910/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2911 operations are supported. */
2912
a12e42fc 2913bool
8b7e9dba 2914vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2915{
a70d6342
IR
2916 slp_instance instance;
2917 int i;
2918
adac3a68 2919 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2920
10a73df7 2921 hash_set<slp_tree> visited;
8b7e9dba 2922 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2923 {
10a73df7 2924 hash_set<slp_tree> lvisited;
68435eb2
RB
2925 stmt_vector_for_cost cost_vec;
2926 cost_vec.create (2);
8b7e9dba
RS
2927 if (!vect_slp_analyze_node_operations (vinfo,
2928 SLP_INSTANCE_TREE (instance),
10a73df7 2929 instance, visited, lvisited,
2439d584
RB
2930 &cost_vec)
2931 /* Instances with a root stmt require vectorized defs for the
2932 SLP tree root. */
2933 || (SLP_INSTANCE_ROOT_STMT (instance)
2934 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2935 != vect_internal_def)))
a70d6342 2936 {
b9787581
RS
2937 slp_tree node = SLP_INSTANCE_TREE (instance);
2938 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
2939 if (dump_enabled_p ())
2940 dump_printf_loc (MSG_NOTE, vect_location,
2941 "removing SLP instance operations starting from: %G",
2942 stmt_info->stmt);
6e2dd807 2943 vect_free_slp_instance (instance, false);
8b7e9dba 2944 vinfo->slp_instances.ordered_remove (i);
68435eb2 2945 cost_vec.release ();
a70d6342
IR
2946 }
2947 else
68435eb2 2948 {
10a73df7 2949 for (hash_set<slp_tree>::iterator x = lvisited.begin();
68435eb2 2950 x != lvisited.end(); ++x)
10a73df7 2951 visited.add (*x);
68435eb2 2952 i++;
78604de0 2953
68435eb2
RB
2954 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2955 cost_vec.release ();
2956 }
2957 }
78604de0 2958
8b7e9dba 2959 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2960}
2961
6eddf228
RB
2962
2963/* Compute the scalar cost of the SLP node NODE and its children
2964 and return it. Do not account defs that are marked in LIFE and
2965 update LIFE according to uses of NODE. */
2966
a296d6d3 2967static void
292cba13 2968vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3 2969 slp_tree node, vec<bool, va_heap> *life,
4bfcf879
RB
2970 stmt_vector_for_cost *cost_vec,
2971 hash_set<slp_tree> &visited)
6eddf228 2972{
6eddf228 2973 unsigned i;
b9787581 2974 stmt_vec_info stmt_info;
6eddf228
RB
2975 slp_tree child;
2976
4bfcf879
RB
2977 if (visited.add (node))
2978 return;
2979
b9787581 2980 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2981 {
b9787581 2982 gimple *stmt = stmt_info->stmt;
91987857 2983 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2984 ssa_op_iter op_iter;
2985 def_operand_p def_p;
6eddf228 2986
ff4c81cc 2987 if ((*life)[i])
6eddf228
RB
2988 continue;
2989
2990 /* If there is a non-vectorized use of the defs then the scalar
2991 stmt is kept live in which case we do not account it or any
2992 required defs in the SLP children in the scalar cost. This
2993 way we make the vectorization more costly when compared to
2994 the scalar cost. */
2995 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2996 {
2997 imm_use_iterator use_iter;
355fe088 2998 gimple *use_stmt;
6eddf228 2999 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 3000 if (!is_gimple_debug (use_stmt))
6eddf228 3001 {
91987857
RS
3002 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
3003 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
3004 {
3005 (*life)[i] = true;
3006 BREAK_FROM_IMM_USE_STMT (use_iter);
3007 }
6eddf228
RB
3008 }
3009 }
ff4c81cc 3010 if ((*life)[i])
6eddf228
RB
3011 continue;
3012
b555a2e4
RB
3013 /* Count scalar stmts only once. */
3014 if (gimple_visited_p (stmt))
3015 continue;
3016 gimple_set_visited (stmt, true);
3017
a296d6d3 3018 vect_cost_for_stmt kind;
6eddf228
RB
3019 if (STMT_VINFO_DATA_REF (stmt_info))
3020 {
3021 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 3022 kind = scalar_load;
6eddf228 3023 else
a296d6d3 3024 kind = scalar_store;
6eddf228 3025 }
e4020b28
RS
3026 else if (vect_nop_conversion_p (stmt_info))
3027 continue;
6eddf228 3028 else
a296d6d3
RB
3029 kind = scalar_stmt;
3030 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
3031 }
3032
faa5399b 3033 auto_vec<bool, 20> subtree_life;
6eddf228 3034 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
3035 {
3036 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3037 {
3038 /* Do not directly pass LIFE to the recursive call, copy it to
3039 confine changes in the callee to the current child/subtree. */
3040 subtree_life.safe_splice (*life);
4bfcf879
RB
3041 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3042 visited);
faa5399b
RB
3043 subtree_life.truncate (0);
3044 }
3045 }
6eddf228
RB
3046}
3047
69f11a13
IR
3048/* Check if vectorization of the basic block is profitable. */
3049
3050static bool
3051vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3052{
9771b263 3053 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 3054 slp_instance instance;
1a4b99c1 3055 int i;
c3e7ee41 3056 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 3057 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
3058
3059 /* Calculate scalar cost. */
a296d6d3
RB
3060 stmt_vector_for_cost scalar_costs;
3061 scalar_costs.create (0);
10a73df7 3062 hash_set<slp_tree> visited;
6eddf228 3063 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 3064 {
00f96dc9 3065 auto_vec<bool, 20> life;
ff4c81cc 3066 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
3067 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3068 SLP_INSTANCE_TREE (instance),
10a73df7 3069 &life, &scalar_costs, visited);
a296d6d3
RB
3070 }
3071 void *target_cost_data = init_cost (NULL);
68435eb2 3072 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
3073 scalar_costs.release ();
3074 unsigned dummy;
3075 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3076 destroy_cost_data (target_cost_data);
69f11a13 3077
b555a2e4
RB
3078 /* Unset visited flag. */
3079 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3080 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3081 gimple_set_visited (gsi_stmt (gsi), false);
3082
c3e7ee41 3083 /* Complete the target-specific cost calculation. */
92345349
BS
3084 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3085 &vec_inside_cost, &vec_epilogue_cost);
3086
3087 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 3088
73fbfcad 3089 if (dump_enabled_p ())
69f11a13 3090 {
78c60e3d
SS
3091 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3092 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
3093 vec_inside_cost);
3094 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
3095 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 3096 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
3097 }
3098
a6524bba
RB
3099 /* Vectorization is profitable if its cost is more than the cost of scalar
3100 version. Note that we err on the vector side for equal cost because
3101 the cost estimate is otherwise quite pessimistic (constant uses are
3102 free on the scalar side but cost a load on the vector side for
3103 example). */
3104 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
3105 return false;
3106
3107 return true;
3108}
3109
818b3293
JH
3110/* Find any vectorizable constructors and add them to the grouped_store
3111 array. */
3112
3113static void
3114vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3115{
3116 gimple_stmt_iterator gsi;
3117
3118 for (gsi = bb_vinfo->region_begin;
3119 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3120 {
3121 gimple *stmt = gsi_stmt (gsi);
3122
3123 if (is_gimple_assign (stmt)
3124 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
3125 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
3126 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
3127 {
3128 tree rhs = gimple_assign_rhs1 (stmt);
3129
3130 if (CONSTRUCTOR_NELTS (rhs) == 0)
3131 continue;
3132
3133 poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
3134
3135 if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
3136 continue;
3137
3138 if (dump_enabled_p ())
3139 dump_printf_loc (MSG_NOTE, vect_location,
3140 "Found vectorizable constructor: %G\n", stmt);
3141 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3142 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3143 }
3144 }
3145}
3146
1d778697
RS
3147/* Check if the region described by BB_VINFO can be vectorized, returning
3148 true if so. When returning false, set FATAL to true if the same failure
3149 would prevent vectorization at other vector sizes, false if it is still
3150 worth trying other sizes. N_STMTS is the number of statements in the
3151 region. */
3152
3153static bool
3154vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
a70d6342 3155{
bfb9d798
RB
3156 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3157
a70d6342 3158 slp_instance instance;
8e19f5a1 3159 int i;
d9f21f6a 3160 poly_uint64 min_vf = 2;
e4a707c4 3161
a5b50aa1
RB
3162 /* The first group of checks is independent of the vector size. */
3163 fatal = true;
3164
428db0ba
RB
3165 /* Analyze the data references. */
3166
a7b3509e 3167 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
a70d6342 3168 {
73fbfcad 3169 if (dump_enabled_p ())
78c60e3d
SS
3170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3171 "not vectorized: unhandled data-ref in basic "
3172 "block.\n");
1d778697 3173 return false;
a70d6342
IR
3174 }
3175
fcac74a1 3176 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 3177 {
73fbfcad 3178 if (dump_enabled_p ())
78c60e3d
SS
3179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3180 "not vectorized: not enough data-refs in "
3181 "basic block.\n");
1d778697 3182 return false;
a70d6342
IR
3183 }
3184
310213d4 3185 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
3186 {
3187 if (dump_enabled_p ())
3188 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3189 "not vectorized: unhandled data access in "
3190 "basic block.\n");
1d778697 3191 return false;
5abe1e05
RB
3192 }
3193
818b3293
JH
3194 vect_slp_check_for_constructors (bb_vinfo);
3195
a5b50aa1
RB
3196 /* If there are no grouped stores in the region there is no need
3197 to continue with pattern recog as vect_analyze_slp will fail
3198 anyway. */
3199 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 3200 {
73fbfcad 3201 if (dump_enabled_p ())
a5b50aa1
RB
3202 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3203 "not vectorized: no grouped stores in "
3204 "basic block.\n");
1d778697 3205 return false;
a70d6342 3206 }
b8698a0f 3207
a5b50aa1
RB
3208 /* While the rest of the analysis below depends on it in some way. */
3209 fatal = false;
3210
3211 vect_pattern_recog (bb_vinfo);
3212
a70d6342
IR
3213 /* Check the SLP opportunities in the basic block, analyze and build SLP
3214 trees. */
310213d4 3215 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 3216 {
73fbfcad 3217 if (dump_enabled_p ())
effb52da
RB
3218 {
3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3220 "Failed to SLP the basic block.\n");
3221 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3222 "not vectorized: failed to find SLP opportunities "
3223 "in basic block.\n");
3224 }
1d778697 3225 return false;
a70d6342 3226 }
b8698a0f 3227
62c8a2cf
RS
3228 vect_record_base_alignments (bb_vinfo);
3229
c2a12ca0
RB
3230 /* Analyze and verify the alignment of data references and the
3231 dependence in the SLP instances. */
a5b50aa1
RB
3232 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3233 {
c2a12ca0
RB
3234 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3235 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 3236 {
b9787581
RS
3237 slp_tree node = SLP_INSTANCE_TREE (instance);
3238 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
3239 if (dump_enabled_p ())
3240 dump_printf_loc (MSG_NOTE, vect_location,
3241 "removing SLP instance operations starting from: %G",
3242 stmt_info->stmt);
6e2dd807 3243 vect_free_slp_instance (instance, false);
a5b50aa1
RB
3244 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3245 continue;
3246 }
c2a12ca0
RB
3247
3248 /* Mark all the statements that we want to vectorize as pure SLP and
3249 relevant. */
6c7e3b1f 3250 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
c2a12ca0 3251 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
818b3293
JH
3252 if (SLP_INSTANCE_ROOT_STMT (instance))
3253 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
c2a12ca0 3254
a5b50aa1
RB
3255 i++;
3256 }
a5b50aa1 3257 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
1d778697 3258 return false;
a5b50aa1 3259
8b7e9dba 3260 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 3261 {
73fbfcad 3262 if (dump_enabled_p ())
e645e942 3263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3264 "not vectorized: bad operation in basic block.\n");
1d778697 3265 return false;
a70d6342
IR
3266 }
3267
69f11a13 3268 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 3269 if (!unlimited_cost_model (NULL)
69f11a13
IR
3270 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3271 {
73fbfcad 3272 if (dump_enabled_p ())
78c60e3d
SS
3273 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3274 "not vectorized: vectorization is not "
3275 "profitable.\n");
1d778697 3276 return false;
69f11a13
IR
3277 }
3278
73fbfcad 3279 if (dump_enabled_p ())
78c60e3d
SS
3280 dump_printf_loc (MSG_NOTE, vect_location,
3281 "Basic block will be vectorized using SLP\n");
1d778697 3282 return true;
a70d6342
IR
3283}
3284
fa0c8df7
RS
3285/* Subroutine of vect_slp_bb. Try to vectorize the statements between
3286 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3287 on success. The region has N_STMTS statements and has the datarefs
3288 given by DATAREFS. */
a70d6342 3289
fa0c8df7
RS
3290static bool
3291vect_slp_bb_region (gimple_stmt_iterator region_begin,
3292 gimple_stmt_iterator region_end,
3293 vec<data_reference_p> datarefs,
3294 unsigned int n_stmts)
8e19f5a1
IR
3295{
3296 bb_vec_info bb_vinfo;
e021fb86 3297 auto_vector_modes vector_modes;
8e19f5a1 3298
8e19f5a1 3299 /* Autodetect first vector size we try. */
e021fb86
RS
3300 machine_mode next_vector_mode = VOIDmode;
3301 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3302 unsigned int mode_i = 0;
8e19f5a1 3303
fa0c8df7 3304 vec_info_shared shared;
61d371eb 3305
1c84a2d2 3306 machine_mode autodetected_vector_mode = VOIDmode;
8e19f5a1
IR
3307 while (1)
3308 {
61d371eb 3309 bool vectorized = false;
a5b50aa1 3310 bool fatal = false;
1d778697
RS
3311 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3312
3313 bool first_time_p = shared.datarefs.is_empty ();
3314 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3315 if (first_time_p)
3316 bb_vinfo->shared->save_datarefs ();
3317 else
3318 bb_vinfo->shared->check_datarefs ();
1c84a2d2 3319 bb_vinfo->vector_mode = next_vector_mode;
1d778697
RS
3320
3321 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
61d371eb
RB
3322 && dbg_cnt (vect_slp))
3323 {
428db0ba 3324 if (dump_enabled_p ())
df7c2283
RS
3325 {
3326 dump_printf_loc (MSG_NOTE, vect_location,
3327 "***** Analysis succeeded with vector mode"
3328 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3329 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3330 }
428db0ba 3331
ca823c85 3332 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3333 vect_schedule_slp (bb_vinfo);
3334
d1ac60d5 3335 unsigned HOST_WIDE_INT bytes;
bbeeac91
DM
3336 if (dump_enabled_p ())
3337 {
1c84a2d2 3338 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
bbeeac91
DM
3339 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3340 "basic block part vectorized using %wu byte "
3341 "vectors\n", bytes);
3342 else
3343 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3344 "basic block part vectorized using variable "
3345 "length vectors\n");
3346 }
428db0ba 3347
61d371eb 3348 vectorized = true;
428db0ba 3349 }
df7c2283
RS
3350 else
3351 {
3352 if (dump_enabled_p ())
3353 dump_printf_loc (MSG_NOTE, vect_location,
3354 "***** Analysis failed with vector mode %s\n",
3355 GET_MODE_NAME (bb_vinfo->vector_mode));
3356 }
8e19f5a1 3357
e021fb86 3358 if (mode_i == 0)
1c84a2d2 3359 autodetected_vector_mode = bb_vinfo->vector_mode;
ba7f76dd 3360
a55d8232
RS
3361 if (!fatal)
3362 while (mode_i < vector_modes.length ()
3363 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3364 {
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE, vect_location,
3367 "***** The result for vector mode %s would"
3368 " be the same\n",
3369 GET_MODE_NAME (vector_modes[mode_i]));
3370 mode_i += 1;
3371 }
3372
ba7f76dd 3373 delete bb_vinfo;
86e36728 3374
e021fb86 3375 if (mode_i < vector_modes.length ()
df7c2283
RS
3376 && VECTOR_MODE_P (autodetected_vector_mode)
3377 && (related_vector_mode (vector_modes[mode_i],
3378 GET_MODE_INNER (autodetected_vector_mode))
3379 == autodetected_vector_mode)
3380 && (related_vector_mode (autodetected_vector_mode,
3381 GET_MODE_INNER (vector_modes[mode_i]))
3382 == vector_modes[mode_i]))
3383 {
3384 if (dump_enabled_p ())
3385 dump_printf_loc (MSG_NOTE, vect_location,
3386 "***** Skipping vector mode %s, which would"
3387 " repeat the analysis for %s\n",
3388 GET_MODE_NAME (vector_modes[mode_i]),
3389 GET_MODE_NAME (autodetected_vector_mode));
3390 mode_i += 1;
3391 }
86e36728 3392
61d371eb 3393 if (vectorized
e021fb86 3394 || mode_i == vector_modes.length ()
1c84a2d2 3395 || autodetected_vector_mode == VOIDmode
a5b50aa1
RB
3396 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3397 vector sizes will fail do not bother iterating. */
3398 || fatal)
fa0c8df7
RS
3399 return vectorized;
3400
3401 /* Try the next biggest vector size. */
e021fb86 3402 next_vector_mode = vector_modes[mode_i++];
fa0c8df7 3403 if (dump_enabled_p ())
e021fb86
RS
3404 dump_printf_loc (MSG_NOTE, vect_location,
3405 "***** Re-trying analysis with vector mode %s\n",
3406 GET_MODE_NAME (next_vector_mode));
fa0c8df7
RS
3407 }
3408}
8e19f5a1 3409
fa0c8df7
RS
3410/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3411 true if anything in the basic-block was vectorized. */
61d371eb 3412
fa0c8df7
RS
3413bool
3414vect_slp_bb (basic_block bb)
3415{
3416 gimple_stmt_iterator gsi;
3417 bool any_vectorized = false;
3418
3419 gsi = gsi_start_bb (bb);
3420 while (!gsi_end_p (gsi))
3421 {
3422 gimple_stmt_iterator region_begin = gsi;
3423 vec<data_reference_p> datarefs = vNULL;
3424 int insns = 0;
3425
3426 for (; !gsi_end_p (gsi); gsi_next (&gsi))
61d371eb 3427 {
fa0c8df7
RS
3428 gimple *stmt = gsi_stmt (gsi);
3429 if (is_gimple_debug (stmt))
3430 continue;
3431 insns++;
3432
3433 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3434 vect_location = stmt;
3435
3436 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3437 break;
3438 }
61d371eb 3439
fa0c8df7
RS
3440 /* Skip leading unhandled stmts. */
3441 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3442 {
3443 gsi_next (&gsi);
3444 continue;
61d371eb 3445 }
fa0c8df7
RS
3446
3447 gimple_stmt_iterator region_end = gsi;
3448
028d4092 3449 if (insns > param_slp_max_insns_in_bb)
1d778697
RS
3450 {
3451 if (dump_enabled_p ())
3452 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3453 "not vectorized: too many instructions in "
3454 "basic block.\n");
3455 }
3456 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
fa0c8df7
RS
3457 any_vectorized = true;
3458
3459 if (gsi_end_p (region_end))
3460 break;
3461
3462 /* Skip the unhandled stmt. */
3463 gsi_next (&gsi);
8e19f5a1 3464 }
61d371eb
RB
3465
3466 return any_vectorized;
8e19f5a1
IR
3467}
3468
3469
c73e7656 3470/* Return 1 if vector type STMT_VINFO is a boolean vector. */
e4af0bc4
IE
3471
3472static bool
c73e7656 3473vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
e4af0bc4 3474{
32e8e429 3475 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 3476 tree op, vectype;
e4af0bc4
IE
3477 enum vect_def_type dt;
3478
3479 /* For comparison and COND_EXPR type is chosen depending
c73e7656 3480 on the non-constant other comparison operand. */
e4af0bc4
IE
3481 if (TREE_CODE_CLASS (code) == tcc_comparison)
3482 {
32e8e429 3483 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
c73e7656 3484 op = gimple_assign_rhs1 (stmt);
e4af0bc4 3485
894dd753 3486 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3487 gcc_unreachable ();
3488
3489 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3490 }
3491
3492 if (code == COND_EXPR)
3493 {
32e8e429 3494 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3495 tree cond = gimple_assign_rhs1 (stmt);
3496
3497 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1 3498 op = cond;
e4af0bc4
IE
3499 else
3500 op = TREE_OPERAND (cond, 0);
3501
894dd753 3502 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3503 gcc_unreachable ();
3504
3505 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3506 }
3507
3508 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3509}
3510
018b2744
RS
3511/* Build a variable-length vector in which the elements in ELTS are repeated
3512 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3513 RESULTS and add any new instructions to SEQ.
3514
3515 The approach we use is:
3516
3517 (1) Find a vector mode VM with integer elements of mode IM.
3518
3519 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3520 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3521 from small vectors to IM.
3522
3523 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3524
3525 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3526 correct byte contents.
3527
3528 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3529
3530 We try to find the largest IM for which this sequence works, in order
3531 to cut down on the number of interleaves. */
3532
f1739b48 3533void
43fdde57 3534duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
cdbe6e9b
RS
3535 vec<tree> elts, unsigned int nresults,
3536 vec<tree> &results)
018b2744
RS
3537{
3538 unsigned int nelts = elts.length ();
3539 tree element_type = TREE_TYPE (vector_type);
3540
3541 /* (1) Find a vector mode VM with integer elements of mode IM. */
3542 unsigned int nvectors = 1;
3543 tree new_vector_type;
3544 tree permutes[2];
f884cd2f 3545 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
018b2744
RS
3546 &nvectors, &new_vector_type,
3547 permutes))
3548 gcc_unreachable ();
3549
3550 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3551 unsigned int partial_nelts = nelts / nvectors;
3552 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3553
3554 tree_vector_builder partial_elts;
3555 auto_vec<tree, 32> pieces (nvectors * 2);
3556 pieces.quick_grow (nvectors * 2);
3557 for (unsigned int i = 0; i < nvectors; ++i)
3558 {
3559 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3560 ELTS' has mode IM. */
3561 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3562 for (unsigned int j = 0; j < partial_nelts; ++j)
3563 partial_elts.quick_push (elts[i * partial_nelts + j]);
3564 tree t = gimple_build_vector (seq, &partial_elts);
3565 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3566 TREE_TYPE (new_vector_type), t);
3567
3568 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3569 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3570 }
3571
3572 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3573 correct byte contents.
3574
3575 We need to repeat the following operation log2(nvectors) times:
3576
3577 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3578 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3579
3580 However, if each input repeats every N elements and the VF is
3581 a multiple of N * 2, the HI result is the same as the LO. */
3582 unsigned int in_start = 0;
3583 unsigned int out_start = nvectors;
3584 unsigned int hi_start = nvectors / 2;
3585 /* A bound on the number of outputs needed to produce NRESULTS results
3586 in the final iteration. */
3587 unsigned int noutputs_bound = nvectors * nresults;
3588 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3589 {
3590 noutputs_bound /= 2;
3591 unsigned int limit = MIN (noutputs_bound, nvectors);
3592 for (unsigned int i = 0; i < limit; ++i)
3593 {
3594 if ((i & 1) != 0
3595 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3596 2 * in_repeat))
3597 {
3598 pieces[out_start + i] = pieces[out_start + i - 1];
3599 continue;
3600 }
3601
3602 tree output = make_ssa_name (new_vector_type);
3603 tree input1 = pieces[in_start + (i / 2)];
3604 tree input2 = pieces[in_start + (i / 2) + hi_start];
3605 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3606 input1, input2,
3607 permutes[i & 1]);
3608 gimple_seq_add_stmt (seq, stmt);
3609 pieces[out_start + i] = output;
3610 }
3611 std::swap (in_start, out_start);
3612 }
3613
3614 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3615 results.reserve (nresults);
3616 for (unsigned int i = 0; i < nresults; ++i)
3617 if (i < nvectors)
3618 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3619 pieces[in_start + i]));
3620 else
3621 results.quick_push (results[i - nvectors]);
3622}
3623
e4af0bc4 3624
b8698a0f
L
3625/* For constant and loop invariant defs of SLP_NODE this function returns
3626 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
30c0d1e3
RB
3627 OP_NODE determines the node for the operand containing the scalar
3628 operands. */
ebfd146a
IR
3629
3630static void
30c0d1e3
RB
3631vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
3632 vec<tree> *vec_oprnds)
ebfd146a 3633{
30c0d1e3 3634 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7ed54790 3635 vec_info *vinfo = stmt_vinfo->vinfo;
018b2744 3636 unsigned HOST_WIDE_INT nunits;
ebfd146a 3637 tree vec_cst;
d2a12ae7 3638 unsigned j, number_of_places_left_in_vector;
ebfd146a 3639 tree vector_type;
9dc3f7de 3640 tree vop;
30c0d1e3 3641 int group_size = op_node->ops.length ();
ebfd146a 3642 unsigned int vec_num, i;
d2a12ae7 3643 unsigned number_of_copies = 1;
30c0d1e3 3644 bool constant_p;
b5aeb3bb 3645 tree neutral_op = NULL;
13396b6e 3646 gimple_seq ctor_seq = NULL;
018b2744 3647 auto_vec<tree, 16> permute_results;
b5aeb3bb 3648
30c0d1e3
RB
3649 /* ??? SLP analysis should compute the vector type for the
3650 constant / invariant and store it in the SLP node. */
3651 tree op = op_node->ops[0];
42fd8198 3652 /* Check if vector type is a boolean vector. */
30c0d1e3 3653 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2568d8a1 3654 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c73e7656 3655 && vect_mask_constant_operand_p (stmt_vinfo))
e8738f4e 3656 vector_type = truth_type_for (stmt_vectype);
42fd8198 3657 else
9b75f56d 3658 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
afbe6325 3659
318bd8c6
RB
3660 /* ??? For lane-reducing ops we should also have the required number
3661 of vector stmts initialized rather than second-guessing here. */
3662 unsigned int number_of_vectors;
3663 if (is_gimple_assign (stmt_vinfo->stmt)
3664 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3665 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3666 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3667 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3668 else
3669 number_of_vectors
3670 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3671 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3672 vector_type);
30c0d1e3
RB
3673 vec_oprnds->create (number_of_vectors);
3674 auto_vec<tree> voprnds (number_of_vectors);
ebfd146a 3675
ebfd146a 3676 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3677 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3678
3679 For example, we have two scalar operands, s1 and s2 (e.g., group of
3680 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3681 of this type can be packed in a vector). The output vector will contain
3682 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3683 will be 2).
3684
b8698a0f 3685 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3686 containing the operands.
3687
3688 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3689 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3690 {s5, s6, s7, s8}. */
b8698a0f 3691
018b2744
RS
3692 /* When using duplicate_and_interleave, we just need one element for
3693 each scalar statement. */
3694 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3695 nunits = group_size;
3696
14a61437 3697 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3698
3699 number_of_places_left_in_vector = nunits;
62cf7335 3700 constant_p = true;
5ebaa477 3701 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3702 elts.quick_grow (nunits);
90dd6e3d 3703 bool place_after_defs = false;
ebfd146a
IR
3704 for (j = 0; j < number_of_copies; j++)
3705 {
30c0d1e3 3706 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
ebfd146a 3707 {
ebfd146a 3708 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3709 number_of_places_left_in_vector--;
90dd6e3d 3710 tree orig_op = op;
13396b6e 3711 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3712 {
793d9a16 3713 if (CONSTANT_CLASS_P (op))
13396b6e 3714 {
42fd8198
IE
3715 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3716 {
3717 /* Can't use VIEW_CONVERT_EXPR for booleans because
3718 of possibly different sizes of scalar value and
3719 vector element. */
3720 if (integer_zerop (op))
3721 op = build_int_cst (TREE_TYPE (vector_type), 0);
3722 else if (integer_onep (op))
158beb4a 3723 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3724 else
3725 gcc_unreachable ();
3726 }
3727 else
3728 op = fold_unary (VIEW_CONVERT_EXPR,
3729 TREE_TYPE (vector_type), op);
13396b6e
JJ
3730 gcc_assert (op && CONSTANT_CLASS_P (op));
3731 }
3732 else
3733 {
b731b390 3734 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3735 gimple *init_stmt;
262a363f
JJ
3736 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3737 {
158beb4a
JJ
3738 tree true_val
3739 = build_all_ones_cst (TREE_TYPE (vector_type));
3740 tree false_val
3741 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3742 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3743 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3744 op, true_val,
3745 false_val);
262a363f 3746 }
262a363f
JJ
3747 else
3748 {
3749 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3750 op);
3751 init_stmt
3752 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3753 op);
3754 }
13396b6e
JJ
3755 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3756 op = new_temp;
3757 }
50eeef09 3758 }
d2a12ae7 3759 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3760 if (!CONSTANT_CLASS_P (op))
3761 constant_p = false;
90dd6e3d
RB
3762 if (TREE_CODE (orig_op) == SSA_NAME
3763 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3764 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3765 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3766 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3767 place_after_defs = true;
ebfd146a
IR
3768
3769 if (number_of_places_left_in_vector == 0)
3770 {
018b2744
RS
3771 if (constant_p
3772 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3773 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3774 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3775 else
d2a12ae7 3776 {
30c0d1e3 3777 if (permute_results.is_empty ())
cdbe6e9b
RS
3778 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3779 elts, number_of_vectors,
018b2744
RS
3780 permute_results);
3781 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3782 }
90dd6e3d
RB
3783 tree init;
3784 gimple_stmt_iterator gsi;
3785 if (place_after_defs)
3786 {
95c68311
RS
3787 stmt_vec_info last_stmt_info
3788 = vect_find_last_scalar_stmt_in_slp (slp_node);
3789 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3790 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3791 &gsi);
90dd6e3d
RB
3792 }
3793 else
b9787581
RS
3794 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3795 NULL);
13396b6e
JJ
3796 if (ctor_seq != NULL)
3797 {
90dd6e3d 3798 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3799 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3800 ctor_seq = NULL;
3801 }
90dd6e3d
RB
3802 voprnds.quick_push (init);
3803 place_after_defs = false;
62cf7335
RB
3804 number_of_places_left_in_vector = nunits;
3805 constant_p = true;
5ebaa477
RS
3806 elts.new_vector (vector_type, nunits, 1);
3807 elts.quick_grow (nunits);
ebfd146a
IR
3808 }
3809 }
3810 }
3811
b8698a0f 3812 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3813 them. */
9771b263 3814 vec_num = voprnds.length ();
d2a12ae7 3815 for (j = vec_num; j != 0; j--)
ebfd146a 3816 {
9771b263
DN
3817 vop = voprnds[j - 1];
3818 vec_oprnds->quick_push (vop);
ebfd146a
IR
3819 }
3820
ebfd146a 3821 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3822 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3823 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3824 to replicate the vectors. */
9771b263 3825 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3826 {
b5aeb3bb
IR
3827 tree neutral_vec = NULL;
3828
3829 if (neutral_op)
3830 {
3831 if (!neutral_vec)
b9acc9f1 3832 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3833
9771b263 3834 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3835 }
3836 else
3837 {
9771b263
DN
3838 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3839 vec_oprnds->quick_push (vop);
b5aeb3bb 3840 }
ebfd146a
IR
3841 }
3842}
3843
3844
3845/* Get vectorized definitions from SLP_NODE that contains corresponding
3846 vectorized def-stmts. */
3847
3848static void
9771b263 3849vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a 3850{
16edaeb8 3851 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3852 unsigned int i;
3853
9771b263 3854 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3855
16edaeb8 3856 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
30c0d1e3 3857 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
ebfd146a
IR
3858}
3859
3860
30c0d1e3 3861/* Get N vectorized definitions for SLP_NODE.
b8698a0f 3862 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3863 call vect_get_constant_vectors() to create vector stmts.
3864 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3865 must be stored in the corresponding child of SLP_NODE, and we call
3866 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3867
ebfd146a 3868void
30c0d1e3 3869vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
ebfd146a 3870{
30c0d1e3
RB
3871 if (n == -1U)
3872 n = SLP_TREE_CHILDREN (slp_node).length ();
ebfd146a 3873
30c0d1e3 3874 for (unsigned i = 0; i < n; ++i)
ebfd146a 3875 {
30c0d1e3 3876 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
b5aeb3bb 3877
30c0d1e3 3878 vec<tree> vec_defs = vNULL;
ebfd146a 3879
30c0d1e3
RB
3880 /* For each operand we check if it has vectorized definitions in a child
3881 node or we need to create them (for invariants and constants). */
3882 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3883 {
3884 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3885 vect_get_slp_vect_defs (child, &vec_defs);
3886 }
d092494c 3887 else
30c0d1e3 3888 vect_get_constant_vectors (child, slp_node, &vec_defs);
ebfd146a 3889
37b5ec8f 3890 vec_oprnds->quick_push (vec_defs);
d092494c 3891 }
ebfd146a
IR
3892}
3893
ebfd146a
IR
3894/* Generate vector permute statements from a list of loads in DR_CHAIN.
3895 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3896 permute statements for the SLP node NODE of the SLP instance
3897 SLP_NODE_INSTANCE. */
3898
ebfd146a 3899bool
01d8bf07 3900vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3901 gimple_stmt_iterator *gsi, poly_uint64 vf,
3902 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3903 unsigned *n_perms)
ebfd146a 3904{
b9787581 3905 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3906 vec_info *vinfo = stmt_info->vinfo;
928686b1 3907 int vec_index = 0;
2635892a 3908 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ab7e60ce 3909 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3910 unsigned int mask_element;
ef4bddc2 3911 machine_mode mode;
ebfd146a 3912
91ff1504
RB
3913 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3914 return false;
3915
bffb8014 3916 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3917
22e4dee7 3918 mode = TYPE_MODE (vectype);
ab7e60ce 3919 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3920
61fdfd8c
RB
3921 /* Initialize the vect stmts of NODE to properly insert the generated
3922 stmts later. */
3923 if (! analyze_only)
3924 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3925 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3926 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3927
b8698a0f
L
3928 /* Generate permutation masks for every NODE. Number of masks for each NODE
3929 is equal to GROUP_SIZE.
3930 E.g., we have a group of three nodes with three loads from the same
3931 location in each node, and the vector size is 4. I.e., we have a
3932 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3933 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3934 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3935 ...
3936
2635892a 3937 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3938 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3939 operation, and the mask element values can't be outside that range.
3940 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3941 For the first two permutations we need the first and the second input
ebfd146a 3942 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3943 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3944 {c2,a3,b3,c3}. */
3945
2ce27200 3946 int vect_stmts_counter = 0;
928686b1 3947 unsigned int index = 0;
2ce27200
RB
3948 int first_vec_index = -1;
3949 int second_vec_index = -1;
be377c80 3950 bool noop_p = true;
29afecdf 3951 *n_perms = 0;
ebfd146a 3952
ab7e60ce
RS
3953 vec_perm_builder mask;
3954 unsigned int nelts_to_build;
3955 unsigned int nvectors_per_build;
3956 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3957 && multiple_p (nunits, group_size));
3958 if (repeating_p)
2ce27200 3959 {
ab7e60ce
RS
3960 /* A single vector contains a whole number of copies of the node, so:
3961 (a) all permutes can use the same mask; and
3962 (b) the permutes only need a single vector input. */
3963 mask.new_vector (nunits, group_size, 3);
3964 nelts_to_build = mask.encoded_nelts ();
3965 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3966 }
3967 else
3968 {
3969 /* We need to construct a separate mask for each vector statement. */
3970 unsigned HOST_WIDE_INT const_nunits, const_vf;
3971 if (!nunits.is_constant (&const_nunits)
3972 || !vf.is_constant (&const_vf))
3973 return false;
3974 mask.new_vector (const_nunits, const_nunits, 1);
3975 nelts_to_build = const_vf * group_size;
3976 nvectors_per_build = 1;
3977 }
3978
3979 unsigned int count = mask.encoded_nelts ();
3980 mask.quick_grow (count);
3981 vec_perm_indices indices;
3982
3983 for (unsigned int j = 0; j < nelts_to_build; j++)
3984 {
3985 unsigned int iter_num = j / group_size;
3986 unsigned int stmt_num = j % group_size;
3987 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3988 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3989 if (repeating_p)
2ce27200 3990 {
ab7e60ce
RS
3991 first_vec_index = 0;
3992 mask_element = i;
3993 }
3994 else
3995 {
3996 /* Enforced before the loop when !repeating_p. */
3997 unsigned int const_nunits = nunits.to_constant ();
3998 vec_index = i / const_nunits;
3999 mask_element = i % const_nunits;
2ce27200
RB
4000 if (vec_index == first_vec_index
4001 || first_vec_index == -1)
4002 {
4003 first_vec_index = vec_index;
4004 }
4005 else if (vec_index == second_vec_index
4006 || second_vec_index == -1)
4007 {
4008 second_vec_index = vec_index;
ab7e60ce 4009 mask_element += const_nunits;
2ce27200
RB
4010 }
4011 else
4012 {
4013 if (dump_enabled_p ())
3c2a8ed0
DM
4014 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4015 "permutation requires at "
4016 "least three vectors %G",
4017 stmt_info->stmt);
31bee964 4018 gcc_assert (analyze_only);
2ce27200
RB
4019 return false;
4020 }
ebfd146a 4021
ab7e60ce
RS
4022 gcc_assert (mask_element < 2 * const_nunits);
4023 }
4024
4025 if (mask_element != index)
4026 noop_p = false;
4027 mask[index++] = mask_element;
2ce27200 4028
ab7e60ce
RS
4029 if (index == count && !noop_p)
4030 {
4031 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4032 if (!can_vec_perm_const_p (mode, indices))
2ce27200 4033 {
ab7e60ce 4034 if (dump_enabled_p ())
2ce27200 4035 {
ab7e60ce
RS
4036 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4037 vect_location,
4038 "unsupported vect permute { ");
4039 for (i = 0; i < count; ++i)
22e4dee7 4040 {
ab7e60ce
RS
4041 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4042 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 4043 }
ab7e60ce 4044 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 4045 }
ab7e60ce
RS
4046 gcc_assert (analyze_only);
4047 return false;
e3342de4 4048 }
29afecdf 4049
ab7e60ce
RS
4050 ++*n_perms;
4051 }
4052
4053 if (index == count)
4054 {
4055 if (!analyze_only)
e3342de4 4056 {
ab7e60ce 4057 tree mask_vec = NULL_TREE;
be377c80 4058
ab7e60ce
RS
4059 if (! noop_p)
4060 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 4061
ab7e60ce
RS
4062 if (second_vec_index == -1)
4063 second_vec_index = first_vec_index;
61fdfd8c 4064
ab7e60ce
RS
4065 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4066 {
61fdfd8c 4067 /* Generate the permute statement if necessary. */
ab7e60ce
RS
4068 tree first_vec = dr_chain[first_vec_index + ri];
4069 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 4070 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
4071 if (! noop_p)
4072 {
b9787581 4073 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
4074 tree perm_dest
4075 = vect_create_destination_var (gimple_assign_lhs (stmt),
4076 vectype);
4077 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
4078 gassign *perm_stmt
4079 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4080 first_vec, second_vec,
4081 mask_vec);
4082 perm_stmt_info
b9787581
RS
4083 = vect_finish_stmt_generation (stmt_info, perm_stmt,
4084 gsi);
61fdfd8c
RB
4085 }
4086 else
4087 /* If mask was NULL_TREE generate the requested
4088 identity transform. */
16edaeb8 4089 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
4090
4091 /* Store the vector statement in NODE. */
16edaeb8
RS
4092 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4093 = perm_stmt_info;
2ce27200 4094 }
2ce27200 4095 }
ab7e60ce
RS
4096
4097 index = 0;
4098 first_vec_index = -1;
4099 second_vec_index = -1;
4100 noop_p = true;
2ce27200 4101 }
b8698a0f 4102 }
ebfd146a 4103
ebfd146a
IR
4104 return true;
4105}
4106
ebfd146a
IR
4107/* Vectorize SLP instance tree in postorder. */
4108
8fe1bd30 4109static void
10a73df7 4110vect_schedule_slp_instance (slp_tree node, slp_instance instance)
ebfd146a 4111{
ebfd146a
IR
4112 gimple_stmt_iterator si;
4113 stmt_vec_info stmt_info;
8b7e9dba 4114 unsigned int group_size;
ebfd146a 4115 tree vectype;
603cca93 4116 int i, j;
d755c7ef 4117 slp_tree child;
ebfd146a 4118
603cca93 4119 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 4120 return;
ebfd146a 4121
a1f072e2
RB
4122 /* See if we have already vectorized the node in the graph of the
4123 SLP instance. */
4124 if (SLP_TREE_VEC_STMTS (node).exists ())
4125 return;
4126
9771b263 4127 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
10a73df7 4128 vect_schedule_slp_instance (child, instance);
b8698a0f 4129
603cca93
RB
4130 /* Push SLP node def-type to stmts. */
4131 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4132 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4133 {
4134 stmt_vec_info child_stmt_info;
4135 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4136 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4137 }
603cca93 4138
b9787581 4139 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
4140
4141 /* VECTYPE is the type of the destination. */
b690cc0f 4142 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 4143 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
4144 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4145
68435eb2 4146 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
a1f072e2 4147 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 4148
73fbfcad 4149 if (dump_enabled_p ())
3c2a8ed0
DM
4150 dump_printf_loc (MSG_NOTE, vect_location,
4151 "------>vectorizing SLP node starting from: %G",
4152 stmt_info->stmt);
ebfd146a 4153
2e8ab70c
RB
4154 /* Vectorized stmts go before the last scalar stmt which is where
4155 all uses are ready. */
95c68311
RS
4156 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4157 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 4158
6876e5bc
RB
4159 /* Handle two-operation SLP nodes by vectorizing the group with
4160 both operations and then performing a merge. */
4161 if (SLP_TREE_TWO_OPERATORS (node))
4162 {
b9787581 4163 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 4164 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 4165 enum tree_code ocode = ERROR_MARK;
b9787581 4166 stmt_vec_info ostmt_info;
e3342de4 4167 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
4168 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4169 {
4170 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4171 if (gimple_assign_rhs_code (ostmt) != code0)
4172 {
4173 mask.quick_push (1);
4174 ocode = gimple_assign_rhs_code (ostmt);
4175 }
4176 else
4177 mask.quick_push (0);
4178 }
567a3691 4179 if (ocode != ERROR_MARK)
6876e5bc 4180 {
16edaeb8
RS
4181 vec<stmt_vec_info> v0;
4182 vec<stmt_vec_info> v1;
6876e5bc
RB
4183 unsigned j;
4184 tree tmask = NULL_TREE;
b0b45e58 4185 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4186 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4187 SLP_TREE_VEC_STMTS (node).truncate (0);
4188 gimple_assign_set_rhs_code (stmt, ocode);
b0b45e58 4189 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4190 gimple_assign_set_rhs_code (stmt, code0);
4191 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4192 SLP_TREE_VEC_STMTS (node).truncate (0);
4193 tree meltype = build_nonstandard_integer_type
b397965c 4194 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
4195 tree mvectype = get_same_sized_vectype (meltype, vectype);
4196 unsigned k = 0, l;
4197 for (j = 0; j < v0.length (); ++j)
4198 {
dad55d70
RS
4199 /* Enforced by vect_build_slp_tree, which rejects variable-length
4200 vectors for SLP_TREE_TWO_OPERATORS. */
4201 unsigned int const_nunits = nunits.to_constant ();
4202 tree_vector_builder melts (mvectype, const_nunits, 1);
4203 for (l = 0; l < const_nunits; ++l)
6876e5bc 4204 {
1ece8d4c 4205 if (k >= group_size)
6876e5bc 4206 k = 0;
dad55d70
RS
4207 tree t = build_int_cst (meltype,
4208 mask[k++] * const_nunits + l);
794e3180 4209 melts.quick_push (t);
6876e5bc 4210 }
5ebaa477 4211 tmask = melts.build ();
6876e5bc
RB
4212
4213 /* ??? Not all targets support a VEC_PERM_EXPR with a
4214 constant mask that would translate to a vec_merge RTX
4215 (with their vec_perm_const_ok). We can either not
4216 vectorize in that case or let veclower do its job.
4217 Unfortunately that isn't too great and at least for
4218 plus/minus we'd eventually like to match targets
4219 vector addsub instructions. */
355fe088 4220 gimple *vstmt;
6876e5bc
RB
4221 vstmt = gimple_build_assign (make_ssa_name (vectype),
4222 VEC_PERM_EXPR,
16edaeb8
RS
4223 gimple_assign_lhs (v0[j]->stmt),
4224 gimple_assign_lhs (v1[j]->stmt),
4225 tmask);
4226 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 4227 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
4228 }
4229 v0.release ();
4230 v1.release ();
8fe1bd30 4231 return;
6876e5bc
RB
4232 }
4233 }
b0b45e58 4234 vect_transform_stmt (stmt_info, &si, node, instance);
603cca93
RB
4235
4236 /* Restore stmt def-types. */
4237 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4238 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4239 {
4240 stmt_vec_info child_stmt_info;
4241 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4242 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4243 }
ebfd146a
IR
4244}
4245
dd34c087
JJ
4246/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4247 For loop vectorization this is done in vectorizable_call, but for SLP
4248 it needs to be deferred until end of vect_schedule_slp, because multiple
4249 SLP instances may refer to the same scalar stmt. */
4250
4251static void
4bfcf879 4252vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
dd34c087 4253{
b9787581 4254 gimple *new_stmt;
dd34c087
JJ
4255 gimple_stmt_iterator gsi;
4256 int i;
d755c7ef 4257 slp_tree child;
dd34c087
JJ
4258 tree lhs;
4259 stmt_vec_info stmt_info;
4260
603cca93 4261 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4262 return;
4263
4bfcf879
RB
4264 if (visited.add (node))
4265 return;
4266
9771b263 4267 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879 4268 vect_remove_slp_scalar_calls (child, visited);
dd34c087 4269
b9787581 4270 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4271 {
b9787581
RS
4272 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4273 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4274 continue;
b9787581 4275 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4276 || !PURE_SLP_STMT (stmt_info))
4277 continue;
4278 lhs = gimple_call_lhs (stmt);
4279 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 4280 gsi = gsi_for_stmt (stmt);
9d97912b 4281 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
4282 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4283 }
4284}
ebfd146a 4285
4bfcf879
RB
4286static void
4287vect_remove_slp_scalar_calls (slp_tree node)
4288{
4289 hash_set<slp_tree> visited;
4290 vect_remove_slp_scalar_calls (node, visited);
4291}
4292
818b3293
JH
4293/* Vectorize the instance root. */
4294
4295void
4296vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4297{
0ec77a6c 4298 gassign *rstmt = NULL;
818b3293
JH
4299
4300 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4301 {
4302 stmt_vec_info child_stmt_info;
4303 int j;
4304
4305 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4306 {
4307 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4308 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4309 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4310 break;
4311 }
4312 }
4313 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4314 {
4315 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4316 stmt_vec_info child_stmt_info;
4317 int j;
4318 vec<constructor_elt, va_gc> *v;
4319 vec_alloc (v, nelts);
4320
4321 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4322 {
4323 CONSTRUCTOR_APPEND_ELT (v,
4324 NULL_TREE,
4325 gimple_get_lhs (child_stmt_info->stmt));
4326 }
4327 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4328 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4329 tree r_constructor = build_constructor (rtype, v);
4330 rstmt = gimple_build_assign (lhs, r_constructor);
4331 }
0ec77a6c
TC
4332
4333 gcc_assert (rstmt);
4334
818b3293
JH
4335 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4336 gsi_replace (&rgsi, rstmt, true);
4337}
4338
ff802fa1
IR
4339/* Generate vector code for all SLP instances in the loop/basic block. */
4340
8fe1bd30 4341void
310213d4 4342vect_schedule_slp (vec_info *vinfo)
ebfd146a 4343{
9771b263 4344 vec<slp_instance> slp_instances;
ebfd146a 4345 slp_instance instance;
8b7e9dba 4346 unsigned int i;
78604de0 4347
310213d4 4348 slp_instances = vinfo->slp_instances;
9771b263 4349 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a 4350 {
818b3293 4351 slp_tree node = SLP_INSTANCE_TREE (instance);
ebfd146a 4352 /* Schedule the tree of INSTANCE. */
10a73df7 4353 vect_schedule_slp_instance (node, instance);
818b3293
JH
4354
4355 if (SLP_INSTANCE_ROOT_STMT (instance))
4356 vectorize_slp_instance_root_stmt (node, instance);
4357
73fbfcad 4358 if (dump_enabled_p ())
78c60e3d 4359 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4360 "vectorizing stmts using SLP.\n");
ebfd146a
IR
4361 }
4362
9771b263 4363 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4364 {
4365 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4366 stmt_vec_info store_info;
b5aeb3bb 4367 unsigned int j;
b5aeb3bb 4368
c40eced0
RB
4369 /* Remove scalar call stmts. Do not do this for basic-block
4370 vectorization as not all uses may be vectorized.
4371 ??? Why should this be necessary? DCE should be able to
4372 remove the stmts itself.
4373 ??? For BB vectorization we can as well remove scalar
4374 stmts starting from the SLP tree root if they have no
4375 uses. */
310213d4 4376 if (is_a <loop_vec_info> (vinfo))
c40eced0 4377 vect_remove_slp_scalar_calls (root);
dd34c087 4378
b9787581 4379 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4380 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4381 {
b9787581
RS
4382 if (!STMT_VINFO_DATA_REF (store_info))
4383 break;
4384
818b3293
JH
4385 if (SLP_INSTANCE_ROOT_STMT (instance))
4386 continue;
4387
211cd1e2 4388 store_info = vect_orig_stmt (store_info);
b9787581 4389 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 4390 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
4391 }
4392 }
ebfd146a 4393}