]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
c++: Adjust push_template_decl_real
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
8d9254fc 2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
40e23961 35#include "fold-const.h"
d8a2d370 36#include "stor-layout.h"
5be5c238 37#include "gimple-iterator.h"
ebfd146a 38#include "cfgloop.h"
ebfd146a 39#include "tree-vectorizer.h"
2635892a 40#include "langhooks.h"
642fce57 41#include "gimple-walk.h"
428db0ba 42#include "dbgcnt.h"
5ebaa477 43#include "tree-vector-builder.h"
f151c9e1 44#include "vec-perm-indices.h"
018b2744
RS
45#include "gimple-fold.h"
46#include "internal-fn.h"
a70d6342
IR
47
48
6e2dd807
RS
49/* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
52
53static void
6e2dd807 54vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 55{
d092494c 56 int i;
d755c7ef 57 slp_tree child;
d092494c 58
a1f072e2
RB
59 if (--node->refcnt != 0)
60 return;
61
9771b263 62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 63 vect_free_slp_tree (child, final_p);
b8698a0f 64
6e2dd807
RS
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
b9787581
RS
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 73 {
b9787581
RS
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
76 }
77 }
78810bd3 78
9771b263
DN
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
30c0d1e3 81 SLP_TREE_SCALAR_OPS (node).release ();
9771b263 82 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 83 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
84
85 free (node);
86}
87
6e2dd807
RS
88/* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
ebfd146a
IR
91
92void
6e2dd807 93vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 94{
6e2dd807 95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 96 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 97 free (instance);
ebfd146a
IR
98}
99
100
d092494c
IR
101/* Create an SLP node for SCALAR_STMTS. */
102
103static slp_tree
b9787581 104vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 105{
d3cfd39e 106 slp_tree node;
b9787581 107 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
108 unsigned int nops;
109
b9787581 110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 111 nops = gimple_call_num_args (stmt);
b9787581 112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
b9787581 118 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 119 nops = 0;
d092494c
IR
120 else
121 return NULL;
122
d3cfd39e 123 node = XNEW (struct _slp_tree);
d092494c 124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
30c0d1e3 125 SLP_TREE_SCALAR_OPS (node) = vNULL;
9771b263 126 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 128 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 130 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
a1f072e2 132 node->refcnt = 1;
f48e4da3 133 node->max_nunits = 1;
d092494c 134
78810bd3 135 unsigned i;
b9787581
RS
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 138
d092494c
IR
139 return node;
140}
141
30c0d1e3
RB
142/* Create an SLP node for OPS. */
143
144static slp_tree
145vect_create_new_slp_node (vec<tree> ops)
146{
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162}
163
d092494c 164
ddf56386
RB
165/* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168typedef struct _slp_oprnd_info
169{
170 /* Def-stmts for the operands. */
b9787581 171 vec<stmt_vec_info> def_stmts;
30c0d1e3
RB
172 /* Operands. */
173 vec<tree> ops;
ddf56386
RB
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
ddf56386 177 tree first_op_type;
34e82342 178 enum vect_def_type first_dt;
7098ab48 179 bool any_pattern;
ddf56386
RB
180} *slp_oprnd_info;
181
182
d092494c
IR
183/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
9771b263 185static vec<slp_oprnd_info>
d092494c
IR
186vect_create_oprnd_info (int nops, int group_size)
187{
188 int i;
189 slp_oprnd_info oprnd_info;
9771b263 190 vec<slp_oprnd_info> oprnds_info;
d092494c 191
9771b263 192 oprnds_info.create (nops);
d092494c
IR
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 196 oprnd_info->def_stmts.create (group_size);
30c0d1e3 197 oprnd_info->ops.create (group_size);
d092494c 198 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 199 oprnd_info->first_op_type = NULL_TREE;
7098ab48 200 oprnd_info->any_pattern = false;
9771b263 201 oprnds_info.quick_push (oprnd_info);
d092494c
IR
202 }
203
204 return oprnds_info;
205}
206
207
d3cfd39e
JJ
208/* Free operands info. */
209
d092494c 210static void
9771b263 211vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
212{
213 int i;
214 slp_oprnd_info oprnd_info;
215
9771b263 216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 217 {
9771b263 218 oprnd_info->def_stmts.release ();
30c0d1e3 219 oprnd_info->ops.release ();
d3cfd39e
JJ
220 XDELETE (oprnd_info);
221 }
d092494c 222
9771b263 223 oprnds_info.release ();
d092494c
IR
224}
225
226
60838d63
RS
227/* Return true if STMTS contains a pattern statement. */
228
229static bool
230vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231{
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
238}
239
32e8e429
RS
240/* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
d755c7ef 243
b210f45f 244int
32e8e429
RS
245vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
d755c7ef 247{
a1824cfd 248 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
249 int result = 0;
250
a1824cfd 251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
252 return -1;
253
254 do
255 {
a1824cfd 256 if (next_stmt_info == stmt_info)
d755c7ef 257 return result;
a1824cfd
RS
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 261 }
a1824cfd 262 while (next_stmt_info);
d755c7ef
RB
263
264 return -1;
265}
266
f884cd2f 267/* Check whether it is possible to load COUNT elements of type ELT_TYPE
018b2744
RS
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
272
f1739b48 273bool
ba7f76dd 274can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
f884cd2f 275 tree elt_type, unsigned int *nvectors_out,
f1739b48
RS
276 tree *vector_type_out,
277 tree *permutes)
018b2744 278{
f884cd2f
RS
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
282
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
018b2744
RS
285 unsigned int nvectors = 1;
286 for (;;)
287 {
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
f884cd2f 290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
018b2744 291 {
f884cd2f 292 /* Get the natural vector type for this SLP group size. */
018b2744
RS
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
f884cd2f
RS
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
018b2744 301 {
f884cd2f
RS
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
018b2744
RS
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
310 {
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
315 }
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 {
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
326 {
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
331 }
332 return true;
333 }
334 }
335 }
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
339 }
340}
d755c7ef 341
d092494c
IR
342/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
4cecd659 344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
4cecd659
BC
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
4cecd659
BC
354static int
355vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 356 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 357 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 358{
b9787581 359 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
d092494c 362 enum vect_def_type dt = vect_uninitialized_def;
abf9bfbc 363 slp_oprnd_info oprnd_info;
b0b4483e 364 int first_op_idx = 1;
0246112a 365 unsigned int commutative_op = -1U;
b0b4483e 366 bool first_op_cond = false;
effb52da 367 bool first = stmt_num == 0;
b8698a0f 368
b9787581 369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
370 {
371 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 372 first_op_idx = 3;
0246112a
RS
373 if (gimple_call_internal_p (stmt))
374 {
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
99763671
AM
377
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
380 {
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
384 }
0246112a 385 }
190c2236 386 }
b9787581 387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 388 {
b0b4483e 389 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 390 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
61021c35 393 if (code == COND_EXPR
a414c77f 394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
395 {
396 first_op_cond = true;
b0b4483e
RB
397 number_of_oprnds++;
398 }
399 else
0246112a 400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 401 }
d092494c 402 else
b0b4483e 403 return -1;
ebfd146a 404
4cecd659
BC
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
407 for (i = 0; i < number_of_oprnds; i++)
408 {
b0b4483e
RB
409again:
410 if (first_op_cond)
f7e531cf 411 {
4cecd659
BC
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
415
416 if (i < 2)
b9787581
RS
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
b0b4483e 419 else
b9787581 420 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
421 }
422 else
b9787581 423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
61021c35
RB
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
f7e531cf 426
9771b263 427 oprnd_info = (*oprnds_info)[i];
ebfd146a 428
fef96d8e
RS
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 431 {
73fbfcad 432 if (dump_enabled_p ())
3c2a8ed0
DM
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
ebfd146a 436
b0b4483e 437 return -1;
ebfd146a
IR
438 }
439
7098ab48
RB
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
effb52da 442
d092494c 443 if (first)
ebfd146a 444 {
b4673569
RB
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
d092494c 452 oprnd_info->first_dt = dt;
793d9a16 453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 454 }
ebfd146a
IR
455 else
456 {
d092494c
IR
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
4352288a 461 end in the reduction chain. */
018b2744
RS
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
4352288a
RB
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
018b2744
RS
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
4352288a
RB
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
ebfd146a 483 {
b0b4483e 484 /* Try swapping operands if we got a mismatch. */
0246112a 485 if (i == commutative_op && !swapped)
b0b4483e 486 {
4352288a
RB
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
b0b4483e
RB
490 swapped = true;
491 goto again;
492 }
493
abf9bfbc
RB
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 496 "Build SLP failed: different types\n");
d092494c 497
b0b4483e 498 return 1;
ebfd146a 499 }
018b2744
RS
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
1c84a2d2 502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
018b2744 503 && (TREE_CODE (type) == BOOLEAN_TYPE
43fdde57 504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
f884cd2f 505 type)))
a23644f2
RS
506 {
507 if (dump_enabled_p ())
3c2a8ed0
DM
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
511 return -1;
512 }
018b2744
RS
513 }
514
515 /* Check the types of the definitions. */
516 switch (dt)
517 {
018b2744 518 case vect_external_def:
6c7b0df8
RB
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
30c0d1e3
RB
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
ebfd146a 525 break;
b8698a0f 526
4352288a 527 case vect_internal_def:
c78e3652 528 case vect_reduction_def:
4352288a
RB
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 {
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
544 }
545 /* Fallthru. */
e7baeb39 546 case vect_induction_def:
fef96d8e 547 oprnd_info->def_stmts.quick_push (def_stmt_info);
30c0d1e3 548 oprnd_info->ops.quick_push (oprnd);
ebfd146a
IR
549 break;
550
551 default:
552 /* FORNOW: Not supported. */
73fbfcad 553 if (dump_enabled_p ())
3c2a8ed0
DM
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
ebfd146a 557
b0b4483e 558 return -1;
ebfd146a
IR
559 }
560 }
561
b0b4483e
RB
562 /* Swap operands. */
563 if (swapped)
564 {
78810bd3 565 if (dump_enabled_p ())
3c2a8ed0
DM
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "swapped operands to match def types in %G",
568 stmt_info->stmt);
b0b4483e
RB
569 }
570
4cecd659 571 *swap = swapped;
b0b4483e 572 return 0;
ebfd146a
IR
573}
574
9b75f56d
RS
575/* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
576 Return true if we can, meaning that this choice doesn't conflict with
577 existing SLP nodes that use STMT_INFO. */
578
579static bool
580vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
581{
582 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
583 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
584 return true;
585
586 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
587 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
588 {
589 /* We maintain the invariant that if any statement in the group is
590 used, all other members of the group have the same vector type. */
591 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
592 stmt_vec_info member_info = first_info;
593 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
594 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
595 || is_pattern_stmt_p (member_info))
596 break;
597
598 if (!member_info)
599 {
600 for (member_info = first_info; member_info;
601 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
602 STMT_VINFO_VECTYPE (member_info) = vectype;
603 return true;
604 }
605 }
606 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
607 && !is_pattern_stmt_p (stmt_info))
608 {
609 STMT_VINFO_VECTYPE (stmt_info) = vectype;
610 return true;
611 }
612
613 if (dump_enabled_p ())
614 {
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: incompatible vector"
617 " types for: %G", stmt_info->stmt);
618 dump_printf_loc (MSG_NOTE, vect_location,
619 " old vector type: %T\n", old_vectype);
620 dump_printf_loc (MSG_NOTE, vect_location,
621 " new vector type: %T\n", vectype);
622 }
623 return false;
624}
625
626/* Try to infer and assign a vector type to all the statements in STMTS.
627 Used only for BB vectorization. */
628
629static bool
308bc496 630vect_update_all_shared_vectypes (vec_info *vinfo, vec<stmt_vec_info> stmts)
9b75f56d
RS
631{
632 tree vectype, nunits_vectype;
308bc496 633 if (!vect_get_vector_types_for_stmt (vinfo, stmts[0], &vectype,
9b75f56d
RS
634 &nunits_vectype, stmts.length ()))
635 return false;
636
637 stmt_vec_info stmt_info;
638 unsigned int i;
639 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
640 if (!vect_update_shared_vectype (stmt_info, vectype))
641 return false;
642
643 return true;
644}
645
5249ee4d
RS
646/* Return true if call statements CALL1 and CALL2 are similar enough
647 to be combined into the same SLP group. */
648
649static bool
650compatible_calls_p (gcall *call1, gcall *call2)
651{
652 unsigned int nargs = gimple_call_num_args (call1);
653 if (nargs != gimple_call_num_args (call2))
654 return false;
655
656 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
657 return false;
658
659 if (gimple_call_internal_p (call1))
660 {
661 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
662 TREE_TYPE (gimple_call_lhs (call2))))
663 return false;
664 for (unsigned int i = 0; i < nargs; ++i)
665 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
666 TREE_TYPE (gimple_call_arg (call2, i))))
667 return false;
668 }
669 else
670 {
671 if (!operand_equal_p (gimple_call_fn (call1),
672 gimple_call_fn (call2), 0))
673 return false;
674
675 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
676 return false;
677 }
678 return true;
679}
680
b161f2c9 681/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 682 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 683 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
684 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
685 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
686 vect_build_slp_tree. */
b161f2c9
RS
687
688static bool
308bc496
RB
689vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info,
690 unsigned int group_size,
4b6068ea 691 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
692{
693 if (!vectype)
694 {
695 if (dump_enabled_p ())
3c2a8ed0
DM
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
697 "Build SLP failed: unsupported data-type in %G\n",
698 stmt_info->stmt);
b161f2c9
RS
699 /* Fatal mismatch. */
700 return false;
701 }
702
703 /* If populating the vector type requires unrolling then fail
704 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
705 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
706 unsigned HOST_WIDE_INT const_nunits;
308bc496 707 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
708 && (!nunits.is_constant (&const_nunits)
709 || const_nunits > group_size))
b161f2c9 710 {
bbeeac91
DM
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: unrolling required "
714 "in basic block SLP\n");
b161f2c9
RS
715 /* Fatal mismatch. */
716 return false;
717 }
718
719 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 720 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
721 return true;
722}
ebfd146a 723
1f3cb663
RS
724/* STMTS is a group of GROUP_SIZE SLP statements in which some
725 statements do the same operation as the first statement and in which
726 the others do ALT_STMT_CODE. Return true if we can take one vector
727 of the first operation and one vector of the second and permute them
728 to get the required result. VECTYPE is the type of the vector that
729 would be permuted. */
730
731static bool
b9787581
RS
732vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
733 unsigned int group_size, tree vectype,
734 tree_code alt_stmt_code)
1f3cb663
RS
735{
736 unsigned HOST_WIDE_INT count;
737 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
738 return false;
739
740 vec_perm_builder sel (count, count, 1);
741 for (unsigned int i = 0; i < count; ++i)
742 {
743 unsigned int elt = i;
b9787581
RS
744 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
745 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
746 elt += count;
747 sel.quick_push (elt);
748 }
749 vec_perm_indices indices (sel, 2, count);
750 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
751}
752
6983e6b5
RB
753/* Verify if the scalar stmts STMTS are isomorphic, require data
754 permutation or are of unsupported types of operation. Return
755 true if they are, otherwise return false and indicate in *MATCHES
756 which stmts are not isomorphic to the first one. If MATCHES[0]
757 is false then this indicates the comparison could not be
4cecd659
BC
758 carried out or the stmts will never be vectorized by SLP.
759
99763671 760 Note COND_EXPR is possibly isomorphic to another one after swapping its
4cecd659
BC
761 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
762 the first stmt by swapping the two operands of comparison; set SWAP[i]
763 to 2 if stmt I is isormorphic to the first stmt by inverting the code
764 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
765 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
766
767static bool
308bc496 768vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
b9787581 769 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
770 poly_uint64 *max_nunits, bool *matches,
771 bool *two_operators)
ebfd146a 772{
ebfd146a 773 unsigned int i;
b9787581 774 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
775 enum tree_code first_stmt_code = ERROR_MARK;
776 enum tree_code alt_stmt_code = ERROR_MARK;
777 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 778 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 779 tree lhs;
6983e6b5 780 bool need_same_oprnds = false;
1f3cb663 781 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
782 optab optab;
783 int icode;
ef4bddc2
RS
784 machine_mode optab_op2_mode;
785 machine_mode vec_mode;
bffb8014 786 stmt_vec_info first_load = NULL, prev_first_load = NULL;
bcde3345 787 bool load_p = false;
d092494c 788
ebfd146a 789 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
790 stmt_vec_info stmt_info;
791 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 792 {
b9787581 793 gimple *stmt = stmt_info->stmt;
4cecd659 794 swap[i] = 0;
6983e6b5
RB
795 matches[i] = false;
796
73fbfcad 797 if (dump_enabled_p ())
3c2a8ed0 798 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 799
4b5caab7 800 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 801 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 802 {
73fbfcad 803 if (dump_enabled_p ())
3c2a8ed0
DM
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805 "Build SLP failed: unvectorizable statement %G",
806 stmt);
6983e6b5
RB
807 /* Fatal mismatch. */
808 matches[0] = false;
4b5caab7
IR
809 return false;
810 }
811
ebfd146a
IR
812 lhs = gimple_get_lhs (stmt);
813 if (lhs == NULL_TREE)
814 {
73fbfcad 815 if (dump_enabled_p ())
3c2a8ed0
DM
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: not GIMPLE_ASSIGN nor "
818 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
819 /* Fatal mismatch. */
820 matches[0] = false;
ebfd146a
IR
821 return false;
822 }
823
1f3cb663 824 tree nunits_vectype;
308bc496 825 if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype,
9b75f56d 826 &nunits_vectype, group_size)
1f3cb663 827 || (nunits_vectype
308bc496 828 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
1f3cb663 829 nunits_vectype, max_nunits)))
b161f2c9 830 {
6983e6b5
RB
831 /* Fatal mismatch. */
832 matches[0] = false;
1f3cb663
RS
833 return false;
834 }
835
836 gcc_assert (vectype);
b8698a0f 837
9b75f56d
RS
838 if (is_a <bb_vec_info> (vinfo)
839 && !vect_update_shared_vectype (stmt_info, vectype))
840 continue;
841
a0643f02
RS
842 gcall *call_stmt = dyn_cast <gcall *> (stmt);
843 if (call_stmt)
190c2236
JJ
844 {
845 rhs_code = CALL_EXPR;
bcde3345
AM
846
847 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
848 load_p = true;
849 else if ((gimple_call_internal_p (call_stmt)
850 && (!vectorizable_internal_fn_p
851 (gimple_call_internal_fn (call_stmt))))
852 || gimple_call_tail_p (call_stmt)
853 || gimple_call_noreturn_p (call_stmt)
854 || !gimple_call_nothrow_p (call_stmt)
855 || gimple_call_chain (call_stmt))
190c2236 856 {
73fbfcad 857 if (dump_enabled_p ())
3c2a8ed0
DM
858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
859 "Build SLP failed: unsupported call type %G",
860 call_stmt);
6983e6b5
RB
861 /* Fatal mismatch. */
862 matches[0] = false;
190c2236
JJ
863 return false;
864 }
865 }
ebfd146a 866 else
bcde3345
AM
867 {
868 rhs_code = gimple_assign_rhs_code (stmt);
869 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
870 }
ebfd146a
IR
871
872 /* Check the operation. */
873 if (i == 0)
874 {
875 first_stmt_code = rhs_code;
876
b8698a0f 877 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
878 vector shift with scalar shift operand. */
879 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
880 || rhs_code == LROTATE_EXPR
881 || rhs_code == RROTATE_EXPR)
882 {
883 vec_mode = TYPE_MODE (vectype);
884
885 /* First see if we have a vector/vector shift. */
886 optab = optab_for_tree_code (rhs_code, vectype,
887 optab_vector);
888
889 if (!optab
947131ba 890 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
891 {
892 /* No vector/vector shift, try for a vector/scalar shift. */
893 optab = optab_for_tree_code (rhs_code, vectype,
894 optab_scalar);
895
896 if (!optab)
897 {
73fbfcad 898 if (dump_enabled_p ())
78c60e3d 899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 900 "Build SLP failed: no optab.\n");
6983e6b5
RB
901 /* Fatal mismatch. */
902 matches[0] = false;
ebfd146a
IR
903 return false;
904 }
947131ba 905 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
906 if (icode == CODE_FOR_nothing)
907 {
73fbfcad 908 if (dump_enabled_p ())
78c60e3d
SS
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 "Build SLP failed: "
e645e942 911 "op not supported by target.\n");
6983e6b5
RB
912 /* Fatal mismatch. */
913 matches[0] = false;
ebfd146a
IR
914 return false;
915 }
916 optab_op2_mode = insn_data[icode].operand[2].mode;
917 if (!VECTOR_MODE_P (optab_op2_mode))
918 {
919 need_same_oprnds = true;
920 first_op1 = gimple_assign_rhs2 (stmt);
921 }
922 }
923 }
36ba4aae
IR
924 else if (rhs_code == WIDEN_LSHIFT_EXPR)
925 {
926 need_same_oprnds = true;
927 first_op1 = gimple_assign_rhs2 (stmt);
928 }
a0643f02
RS
929 else if (call_stmt
930 && gimple_call_internal_p (call_stmt, IFN_DIV_POW2))
931 {
932 need_same_oprnds = true;
933 first_op1 = gimple_call_arg (call_stmt, 1);
934 }
ebfd146a
IR
935 }
936 else
937 {
6876e5bc
RB
938 if (first_stmt_code != rhs_code
939 && alt_stmt_code == ERROR_MARK)
940 alt_stmt_code = rhs_code;
ebfd146a
IR
941 if (first_stmt_code != rhs_code
942 && (first_stmt_code != IMAGPART_EXPR
943 || rhs_code != REALPART_EXPR)
944 && (first_stmt_code != REALPART_EXPR
69f11a13 945 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
946 /* Handle mismatches in plus/minus by computing both
947 and merging the results. */
948 && !((first_stmt_code == PLUS_EXPR
949 || first_stmt_code == MINUS_EXPR)
950 && (alt_stmt_code == PLUS_EXPR
951 || alt_stmt_code == MINUS_EXPR)
952 && rhs_code == alt_stmt_code)
b9787581 953 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 954 && (first_stmt_code == ARRAY_REF
38000232 955 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
956 || first_stmt_code == INDIRECT_REF
957 || first_stmt_code == COMPONENT_REF
958 || first_stmt_code == MEM_REF)))
ebfd146a 959 {
73fbfcad 960 if (dump_enabled_p ())
ebfd146a 961 {
78c60e3d
SS
962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
963 "Build SLP failed: different operation "
3c2a8ed0 964 "in stmt %G", stmt);
6876e5bc 965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 966 "original stmt %G", first_stmt_info->stmt);
ebfd146a 967 }
6983e6b5
RB
968 /* Mismatch. */
969 continue;
ebfd146a 970 }
b8698a0f 971
a0643f02 972 if (need_same_oprnds)
ebfd146a 973 {
a0643f02
RS
974 tree other_op1 = (call_stmt
975 ? gimple_call_arg (call_stmt, 1)
976 : gimple_assign_rhs2 (stmt));
977 if (!operand_equal_p (first_op1, other_op1, 0))
978 {
979 if (dump_enabled_p ())
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
981 "Build SLP failed: different shift "
982 "arguments in %G", stmt);
983 /* Mismatch. */
984 continue;
985 }
ebfd146a 986 }
190c2236 987
d6350f82 988 if (!load_p && rhs_code == CALL_EXPR)
190c2236 989 {
b9787581 990 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 991 as_a <gcall *> (stmt)))
190c2236 992 {
73fbfcad 993 if (dump_enabled_p ())
3c2a8ed0
DM
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
995 "Build SLP failed: different calls in %G",
996 stmt);
6983e6b5
RB
997 /* Mismatch. */
998 continue;
190c2236
JJ
999 }
1000 }
ebfd146a
IR
1001 }
1002
0d0293ac 1003 /* Grouped store or load. */
b9787581 1004 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
1005 {
1006 if (REFERENCE_CLASS_P (lhs))
1007 {
1008 /* Store. */
6983e6b5 1009 ;
ebfd146a 1010 }
b5aeb3bb
IR
1011 else
1012 {
1013 /* Load. */
b9787581 1014 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
1015 if (prev_first_load)
1016 {
1017 /* Check that there are no loads from different interleaving
6983e6b5
RB
1018 chains in the same node. */
1019 if (prev_first_load != first_load)
78c60e3d 1020 {
73fbfcad 1021 if (dump_enabled_p ())
3c2a8ed0
DM
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1023 vect_location,
1024 "Build SLP failed: different "
1025 "interleaving chains in one node %G",
1026 stmt);
6983e6b5
RB
1027 /* Mismatch. */
1028 continue;
b5aeb3bb
IR
1029 }
1030 }
1031 else
1032 prev_first_load = first_load;
ebfd146a 1033 }
0d0293ac 1034 } /* Grouped access. */
ebfd146a
IR
1035 else
1036 {
bcde3345 1037 if (load_p)
ebfd146a 1038 {
0d0293ac 1039 /* Not grouped load. */
73fbfcad 1040 if (dump_enabled_p ())
3c2a8ed0
DM
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1042 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 1043
0d0293ac 1044 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
1045 /* Fatal mismatch. */
1046 matches[0] = false;
ebfd146a
IR
1047 return false;
1048 }
1049
1050 /* Not memory operation. */
1051 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 1052 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 1053 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 1054 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
61021c35 1055 && rhs_code != VIEW_CONVERT_EXPR
190c2236 1056 && rhs_code != CALL_EXPR)
ebfd146a 1057 {
73fbfcad 1058 if (dump_enabled_p ())
3c2a8ed0
DM
1059 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1060 "Build SLP failed: operation unsupported %G",
1061 stmt);
6983e6b5
RB
1062 /* Fatal mismatch. */
1063 matches[0] = false;
ebfd146a
IR
1064 return false;
1065 }
1066
4cecd659
BC
1067 if (rhs_code == COND_EXPR)
1068 {
1069 tree cond_expr = gimple_assign_rhs1 (stmt);
1070 enum tree_code cond_code = TREE_CODE (cond_expr);
1071 enum tree_code swap_code = ERROR_MARK;
1072 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1073
1074 if (i == 0)
1075 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1076 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1077 {
1078 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1079 swap_code = swap_tree_comparison (cond_code);
1080 invert_code = invert_tree_comparison (cond_code, honor_nans);
1081 }
1082
1083 if (first_cond_code == cond_code)
1084 ;
1085 /* Isomorphic can be achieved by swapping. */
1086 else if (first_cond_code == swap_code)
1087 swap[i] = 1;
1088 /* Isomorphic can be achieved by inverting. */
1089 else if (first_cond_code == invert_code)
1090 swap[i] = 2;
1091 else
1092 {
1093 if (dump_enabled_p ())
3c2a8ed0
DM
1094 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1095 "Build SLP failed: different"
1096 " operation %G", stmt);
6983e6b5
RB
1097 /* Mismatch. */
1098 continue;
f7e531cf 1099 }
4cecd659 1100 }
ebfd146a 1101 }
6983e6b5
RB
1102
1103 matches[i] = true;
1104 }
1105
1106 for (i = 0; i < group_size; ++i)
1107 if (!matches[i])
1108 return false;
1109
6876e5bc
RB
1110 /* If we allowed a two-operation SLP node verify the target can cope
1111 with the permute we are going to use. */
1112 if (alt_stmt_code != ERROR_MARK
1113 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1114 {
02d89550
RS
1115 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1116 vectype, alt_stmt_code))
6876e5bc
RB
1117 {
1118 for (i = 0; i < group_size; ++i)
b9787581 1119 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1120 {
1121 matches[i] = false;
1122 if (dump_enabled_p ())
1123 {
1124 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1125 "Build SLP failed: different operation "
3c2a8ed0 1126 "in stmt %G", stmts[i]->stmt);
6876e5bc 1127 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 1128 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
1129 }
1130 }
1131 return false;
1132 }
1133 *two_operators = true;
1134 }
1135
6983e6b5
RB
1136 return true;
1137}
1138
26d66f28
RB
1139/* Traits for the hash_set to record failed SLP builds for a stmt set.
1140 Note we never remove apart from at destruction time so we do not
1141 need a special value for deleted that differs from empty. */
1142struct bst_traits
1143{
b9787581
RS
1144 typedef vec <stmt_vec_info> value_type;
1145 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1146 static inline hashval_t hash (value_type);
1147 static inline bool equal (value_type existing, value_type candidate);
1148 static inline bool is_empty (value_type x) { return !x.exists (); }
1149 static inline bool is_deleted (value_type x) { return !x.exists (); }
7ca50de0 1150 static const bool empty_zero_p = true;
26d66f28
RB
1151 static inline void mark_empty (value_type &x) { x.release (); }
1152 static inline void mark_deleted (value_type &x) { x.release (); }
1153 static inline void remove (value_type &x) { x.release (); }
1154};
1155inline hashval_t
1156bst_traits::hash (value_type x)
1157{
1158 inchash::hash h;
1159 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1160 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1161 return h.end ();
1162}
1163inline bool
1164bst_traits::equal (value_type existing, value_type candidate)
1165{
1166 if (existing.length () != candidate.length ())
1167 return false;
1168 for (unsigned i = 0; i < existing.length (); ++i)
1169 if (existing[i] != candidate[i])
1170 return false;
1171 return true;
1172}
1173
68435eb2
RB
1174typedef hash_map <vec <gimple *>, slp_tree,
1175 simple_hashmap_traits <bst_traits, slp_tree> >
1176 scalar_stmts_to_slp_tree_map_t;
1177
26d66f28
RB
1178static slp_tree
1179vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1180 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1181 poly_uint64 *max_nunits,
26d66f28 1182 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1183 scalar_stmts_to_slp_tree_map_t *bst_map);
6983e6b5 1184
e403d17e 1185static slp_tree
310213d4 1186vect_build_slp_tree (vec_info *vinfo,
b9787581 1187 vec<stmt_vec_info> stmts, unsigned int group_size,
5d8c32cb 1188 poly_uint64 *max_nunits,
1428105c 1189 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1190 scalar_stmts_to_slp_tree_map_t *bst_map)
26d66f28 1191{
a1f072e2 1192 if (slp_tree *leader = bst_map->get (stmts))
26d66f28 1193 {
a1f072e2
RB
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1196 *leader ? "" : "failed ", *leader);
1197 if (*leader)
f48e4da3
RB
1198 {
1199 (*leader)->refcnt++;
1200 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1201 }
a1f072e2 1202 return *leader;
26d66f28 1203 }
f48e4da3 1204 poly_uint64 this_max_nunits = 1;
7c327e2d
RB
1205 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1206 &this_max_nunits,
9f708a84 1207 matches, npermutes, tree_size, bst_map);
a1f072e2 1208 if (res)
f48e4da3
RB
1209 {
1210 res->max_nunits = this_max_nunits;
1211 vect_update_max_nunits (max_nunits, this_max_nunits);
1212 /* Keep a reference for the bst_map use. */
1213 res->refcnt++;
1214 }
a1f072e2 1215 bst_map->put (stmts.copy (), res);
26d66f28
RB
1216 return res;
1217}
1218
1219/* Recursively build an SLP tree starting from NODE.
1220 Fail (and return a value not equal to zero) if def-stmts are not
1221 isomorphic, require data permutation or are of unsupported types of
1222 operation. Otherwise, return 0.
1223 The value returned is the depth in the SLP tree where a mismatch
1224 was found. */
1225
1226static slp_tree
1227vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1228 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1229 poly_uint64 *max_nunits,
26d66f28 1230 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1231 scalar_stmts_to_slp_tree_map_t *bst_map)
6983e6b5 1232{
4b6068ea
RS
1233 unsigned nops, i, this_tree_size = 0;
1234 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1235 slp_tree node;
6983e6b5 1236
6983e6b5
RB
1237 matches[0] = false;
1238
b9787581
RS
1239 stmt_vec_info stmt_info = stmts[0];
1240 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1241 nops = gimple_call_num_args (stmt);
b9787581 1242 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1243 {
1244 nops = gimple_num_ops (stmt) - 1;
1245 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1246 nops++;
ebfd146a 1247 }
b9787581 1248 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1249 nops = 0;
6983e6b5 1250 else
e403d17e 1251 return NULL;
6983e6b5 1252
c78e3652
RB
1253 /* If the SLP node is a PHI (induction or reduction), terminate
1254 the recursion. */
b9787581 1255 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1256 {
b161f2c9 1257 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
7ed54790 1258 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
308bc496
RB
1259 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
1260 max_nunits))
b161f2c9
RS
1261 return NULL;
1262
b9787581 1263 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1264 /* Induction from different IVs is not supported. */
719488f8
RB
1265 if (def_type == vect_induction_def)
1266 {
b9787581
RS
1267 stmt_vec_info other_info;
1268 FOR_EACH_VEC_ELT (stmts, i, other_info)
1269 if (stmt_info != other_info)
719488f8
RB
1270 return NULL;
1271 }
22e4f1fb
RB
1272 else if (def_type == vect_reduction_def
1273 || def_type == vect_double_reduction_def
1274 || def_type == vect_nested_cycle)
719488f8
RB
1275 {
1276 /* Else def types have to match. */
b9787581
RS
1277 stmt_vec_info other_info;
1278 FOR_EACH_VEC_ELT (stmts, i, other_info)
4352288a
RB
1279 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1280 return NULL;
719488f8 1281 }
22e4f1fb
RB
1282 else
1283 return NULL;
9f708a84 1284 (*tree_size)++;
e7baeb39
RB
1285 node = vect_create_new_slp_node (stmts);
1286 return node;
1287 }
1288
1289
6876e5bc 1290 bool two_operators = false;
4cecd659 1291 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
308bc496 1292 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
e403d17e
RB
1293 &this_max_nunits, matches, &two_operators))
1294 return NULL;
ebfd146a 1295
99763671 1296 /* If the SLP node is a load, terminate the recursion unless masked. */
b9787581
RS
1297 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1298 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1299 {
99763671
AM
1300 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1301 {
1302 /* Masked load. */
1303 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1304 nops = 1;
1305 }
1306 else
1307 {
1308 *max_nunits = this_max_nunits;
1309 (*tree_size)++;
1310 node = vect_create_new_slp_node (stmts);
148018bc
RB
1311 /* And compute the load permutation. Whether it is actually
1312 a permutation depends on the unrolling factor which is
1313 decided later. */
1314 vec<unsigned> load_permutation;
1315 int j;
1316 stmt_vec_info load_info;
1317 load_permutation.create (group_size);
1318 stmt_vec_info first_stmt_info
1319 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
1320 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1321 {
1322 int load_place = vect_get_place_in_interleaving_chain
1323 (load_info, first_stmt_info);
1324 gcc_assert (load_place != -1);
1325 load_permutation.safe_push (load_place);
1326 }
1327 SLP_TREE_LOAD_PERMUTATION (node) = load_permutation;
99763671
AM
1328 return node;
1329 }
ebfd146a
IR
1330 }
1331
6983e6b5
RB
1332 /* Get at the operands, verifying they are compatible. */
1333 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1334 slp_oprnd_info oprnd_info;
b9787581 1335 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1336 {
4cecd659 1337 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1338 stmts, i, &oprnds_info);
4cecd659
BC
1339 if (res != 0)
1340 matches[(res == -1) ? 0 : i] = false;
1341 if (!matches[0])
1342 break;
6983e6b5 1343 }
b0b4483e
RB
1344 for (i = 0; i < group_size; ++i)
1345 if (!matches[i])
1346 {
1347 vect_free_oprnd_info (oprnds_info);
e403d17e 1348 return NULL;
b0b4483e 1349 }
6983e6b5 1350
e403d17e 1351 auto_vec<slp_tree, 4> children;
e403d17e 1352
b9787581 1353 stmt_info = stmts[0];
6983e6b5 1354
b8698a0f 1355 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1356 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1357 {
d092494c 1358 slp_tree child;
e403d17e
RB
1359 unsigned old_tree_size = this_tree_size;
1360 unsigned int j;
b8698a0f 1361
30c0d1e3
RB
1362 if (oprnd_info->first_dt == vect_uninitialized_def)
1363 {
1364 /* COND_EXPR have one too many eventually if the condition
1365 is a SSA name. */
1366 gcc_assert (i == 3 && nops == 4);
1367 continue;
1368 }
1369
e7baeb39 1370 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1371 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1372 && oprnd_info->first_dt != vect_induction_def)
30c0d1e3
RB
1373 {
1374 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1375 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1376 oprnd_info->ops = vNULL;
1377 children.safe_push (invnode);
1378 continue;
1379 }
ebfd146a 1380
e403d17e
RB
1381 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1382 group_size, &this_max_nunits,
5d8c32cb 1383 matches, npermutes,
9f708a84 1384 &this_tree_size, bst_map)) != NULL)
6983e6b5 1385 {
f4a74d27
RB
1386 /* If we have all children of a non-unary child built up from
1387 scalars then just throw that away and build it up this node
1388 from scalars. */
30c0d1e3 1389 if (is_a <bb_vec_info> (vinfo)
f4a74d27 1390 && SLP_TREE_CHILDREN (child).length () > 1
995b6fe0
RB
1391 /* ??? Rejecting patterns this way doesn't work. We'd have to
1392 do extra work to cancel the pattern so the uses see the
1393 scalar version. */
7098ab48 1394 && !oprnd_info->any_pattern)
3fc356dc 1395 {
3fc356dc
RB
1396 slp_tree grandchild;
1397
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1399 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
3fc356dc 1400 break;
9b75f56d 1401 if (!grandchild
308bc496
RB
1402 && vect_update_all_shared_vectypes (vinfo,
1403 oprnd_info->def_stmts))
3fc356dc
RB
1404 {
1405 /* Roll back. */
e403d17e 1406 this_tree_size = old_tree_size;
3fc356dc 1407 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1408 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1409 SLP_TREE_CHILDREN (child).truncate (0);
1410
bbeeac91
DM
1411 if (dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE, vect_location,
1413 "Building parent vector operands from "
1414 "scalars instead\n");
3fc356dc 1415 oprnd_info->def_stmts = vNULL;
603cca93 1416 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1417 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1418 oprnd_info->ops = vNULL;
9f708a84 1419 ++this_tree_size;
e403d17e 1420 children.safe_push (child);
3fc356dc
RB
1421 continue;
1422 }
1423 }
1424
6983e6b5 1425 oprnd_info->def_stmts = vNULL;
e403d17e 1426 children.safe_push (child);
6983e6b5
RB
1427 continue;
1428 }
1429
90dd6e3d
RB
1430 /* If the SLP build failed fatally and we analyze a basic-block
1431 simply treat nodes we fail to build as externally defined
1432 (and thus build vectors from the scalar defs).
1433 The cost model will reject outright expensive cases.
1434 ??? This doesn't treat cases where permutation ultimatively
1435 fails (or we don't try permutation below). Ideally we'd
1436 even compute a permutation that will end up with the maximum
1437 SLP tree size... */
310213d4 1438 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1439 && !matches[0]
1440 /* ??? Rejecting patterns this way doesn't work. We'd have to
1441 do extra work to cancel the pattern so the uses see the
1442 scalar version. */
7098ab48 1443 && !is_pattern_stmt_p (stmt_info)
9b75f56d 1444 && !oprnd_info->any_pattern
308bc496 1445 && vect_update_all_shared_vectypes (vinfo, oprnd_info->def_stmts))
90dd6e3d 1446 {
bbeeac91
DM
1447 if (dump_enabled_p ())
1448 dump_printf_loc (MSG_NOTE, vect_location,
1449 "Building vector operands from scalars\n");
9f708a84 1450 this_tree_size++;
e403d17e 1451 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1452 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3 1453 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
e403d17e 1454 children.safe_push (child);
30c0d1e3 1455 oprnd_info->ops = vNULL;
e403d17e 1456 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1457 continue;
1458 }
1459
6983e6b5
RB
1460 /* If the SLP build for operand zero failed and operand zero
1461 and one can be commutated try that for the scalar stmts
1462 that failed the match. */
1463 if (i == 0
1464 /* A first scalar stmt mismatch signals a fatal mismatch. */
1465 && matches[0]
1466 /* ??? For COND_EXPRs we can swap the comparison operands
1467 as well as the arms under some constraints. */
1468 && nops == 2
1469 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1470 && is_gimple_assign (stmt_info->stmt)
2153fa7b
RB
1471 /* Swapping operands for reductions breaks assumptions later on. */
1472 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1473 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
6983e6b5
RB
1474 /* Do so only if the number of not successful permutes was nor more
1475 than a cut-ff as re-trying the recursive match on
1476 possibly each level of the tree would expose exponential
1477 behavior. */
1478 && *npermutes < 4)
1479 {
85c5e2f5
RB
1480 /* See whether we can swap the matching or the non-matching
1481 stmt operands. */
1482 bool swap_not_matching = true;
1483 do
1484 {
1485 for (j = 0; j < group_size; ++j)
1486 {
1487 if (matches[j] != !swap_not_matching)
1488 continue;
b9787581 1489 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1490 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1491 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1492 if (!stmt
85c5e2f5
RB
1493 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1494 {
1495 if (!swap_not_matching)
1496 goto fail;
1497 swap_not_matching = false;
1498 break;
1499 }
85c5e2f5
RB
1500 }
1501 }
1502 while (j != group_size);
78810bd3 1503
6983e6b5 1504 /* Swap mismatched definition stmts. */
bbeeac91
DM
1505 if (dump_enabled_p ())
1506 dump_printf_loc (MSG_NOTE, vect_location,
1507 "Re-trying with swapped operands of stmts ");
e72baed7 1508 for (j = 0; j < group_size; ++j)
85c5e2f5 1509 if (matches[j] == !swap_not_matching)
6983e6b5 1510 {
6b4db501
MM
1511 std::swap (oprnds_info[0]->def_stmts[j],
1512 oprnds_info[1]->def_stmts[j]);
30c0d1e3
RB
1513 std::swap (oprnds_info[0]->ops[j],
1514 oprnds_info[1]->ops[j]);
bbeeac91
DM
1515 if (dump_enabled_p ())
1516 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1517 }
bbeeac91
DM
1518 if (dump_enabled_p ())
1519 dump_printf (MSG_NOTE, "\n");
74574669
RB
1520 /* And try again with scratch 'matches' ... */
1521 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1522 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1523 group_size, &this_max_nunits,
5d8c32cb 1524 tem, npermutes,
9f708a84 1525 &this_tree_size, bst_map)) != NULL)
6983e6b5 1526 {
f4a74d27
RB
1527 /* If we have all children of a non-unary child built up from
1528 scalars then just throw that away and build it up this node
1529 from scalars. */
30c0d1e3 1530 if (is_a <bb_vec_info> (vinfo)
f4a74d27 1531 && SLP_TREE_CHILDREN (child).length () > 1
995b6fe0
RB
1532 /* ??? Rejecting patterns this way doesn't work. We'd have
1533 to do extra work to cancel the pattern so the uses see the
1534 scalar version. */
7098ab48 1535 && !oprnd_info->any_pattern)
85c69b0b
RB
1536 {
1537 unsigned int j;
1538 slp_tree grandchild;
1539
1540 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1541 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
85c69b0b 1542 break;
9b75f56d
RS
1543 if (!grandchild
1544 && (vect_update_all_shared_vectypes
308bc496 1545 (vinfo, oprnd_info->def_stmts)))
85c69b0b
RB
1546 {
1547 /* Roll back. */
e403d17e 1548 this_tree_size = old_tree_size;
85c69b0b 1549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1550 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1551 SLP_TREE_CHILDREN (child).truncate (0);
1552
bbeeac91
DM
1553 if (dump_enabled_p ())
1554 dump_printf_loc (MSG_NOTE, vect_location,
1555 "Building parent vector operands from "
1556 "scalars instead\n");
85c69b0b 1557 oprnd_info->def_stmts = vNULL;
603cca93 1558 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1559 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1560 oprnd_info->ops = vNULL;
9f708a84 1561 ++this_tree_size;
e403d17e 1562 children.safe_push (child);
85c69b0b
RB
1563 continue;
1564 }
1565 }
1566
6983e6b5 1567 oprnd_info->def_stmts = vNULL;
e403d17e 1568 children.safe_push (child);
6983e6b5
RB
1569 continue;
1570 }
1571
1572 ++*npermutes;
1573 }
1574
78810bd3 1575fail:
e403d17e
RB
1576 gcc_assert (child == NULL);
1577 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1578 vect_free_slp_tree (child, false);
6983e6b5 1579 vect_free_oprnd_info (oprnds_info);
e403d17e 1580 return NULL;
ebfd146a
IR
1581 }
1582
e403d17e
RB
1583 vect_free_oprnd_info (oprnds_info);
1584
9f708a84 1585 *tree_size += this_tree_size + 1;
e403d17e 1586 *max_nunits = this_max_nunits;
1428105c 1587
e403d17e
RB
1588 node = vect_create_new_slp_node (stmts);
1589 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1590 SLP_TREE_CHILDREN (node).splice (children);
1591 return node;
ebfd146a
IR
1592}
1593
78c60e3d 1594/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1595
1596static void
4f5b9c80 1597vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
a1f072e2 1598 slp_tree node, hash_set<slp_tree> &visited)
ebfd146a 1599{
759bd406 1600 unsigned i, j;
b9787581 1601 stmt_vec_info stmt_info;
d755c7ef 1602 slp_tree child;
6c7b0df8 1603 tree op;
ebfd146a 1604
a1f072e2
RB
1605 if (visited.add (node))
1606 return;
1607
3da39f52
DM
1608 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1609 dump_user_location_t user_loc = loc.get_user_location ();
759bd406 1610 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n",
6c7b0df8
RB
1611 SLP_TREE_DEF_TYPE (node) == vect_external_def
1612 ? " (external)"
1613 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1614 ? " (constant)"
1615 : ""), node,
759bd406 1616 estimated_poly_value (node->max_nunits), node->refcnt);
6c7b0df8
RB
1617 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1618 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1619 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1620 else
1621 {
1622 dump_printf_loc (metadata, user_loc, "\t{ ");
1623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1624 dump_printf (metadata, "%T%s ", op,
1625 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1626 dump_printf (metadata, "}\n");
1627 }
759bd406
RB
1628 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1629 {
1630 dump_printf_loc (metadata, user_loc, "\tload permutation {");
1631 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j)
1632 dump_printf (dump_kind, " %u", j);
1633 dump_printf (dump_kind, " }\n");
1634 }
a1f072e2
RB
1635 if (SLP_TREE_CHILDREN (node).is_empty ())
1636 return;
3da39f52 1637 dump_printf_loc (metadata, user_loc, "\tchildren");
9771b263 1638 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2
RB
1639 dump_printf (dump_kind, " %p", (void *)child);
1640 dump_printf (dump_kind, "\n");
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1642 vect_print_slp_tree (dump_kind, loc, child, visited);
ebfd146a
IR
1643}
1644
a1f072e2
RB
1645static void
1646vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1647 slp_tree node)
1648{
1649 hash_set<slp_tree> visited;
1650 vect_print_slp_tree (dump_kind, loc, node, visited);
1651}
ebfd146a 1652
6c7e3b1f 1653/* Mark the tree rooted at NODE with PURE_SLP. */
ebfd146a
IR
1654
1655static void
6c7e3b1f 1656vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
ebfd146a
IR
1657{
1658 int i;
b9787581 1659 stmt_vec_info stmt_info;
d755c7ef 1660 slp_tree child;
ebfd146a 1661
603cca93 1662 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1663 return;
1664
4bfcf879
RB
1665 if (visited.add (node))
1666 return;
1667
b9787581 1668 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6c7e3b1f 1669 STMT_SLP_TYPE (stmt_info) = pure_slp;
ebfd146a 1670
9771b263 1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6c7e3b1f 1672 vect_mark_slp_stmts (child, visited);
ebfd146a
IR
1673}
1674
4bfcf879 1675static void
6c7e3b1f 1676vect_mark_slp_stmts (slp_tree node)
4bfcf879
RB
1677{
1678 hash_set<slp_tree> visited;
6c7e3b1f 1679 vect_mark_slp_stmts (node, visited);
4bfcf879 1680}
ebfd146a 1681
a70d6342
IR
1682/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1683
1684static void
4bfcf879 1685vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
a70d6342
IR
1686{
1687 int i;
a70d6342 1688 stmt_vec_info stmt_info;
d755c7ef 1689 slp_tree child;
a70d6342 1690
603cca93 1691 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1692 return;
1693
4bfcf879
RB
1694 if (visited.add (node))
1695 return;
1696
b9787581 1697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1698 {
b8698a0f 1699 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1700 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1701 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1702 }
1703
9771b263 1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879
RB
1705 vect_mark_slp_stmts_relevant (child, visited);
1706}
1707
1708static void
1709vect_mark_slp_stmts_relevant (slp_tree node)
1710{
1711 hash_set<slp_tree> visited;
1712 vect_mark_slp_stmts_relevant (node, visited);
a70d6342
IR
1713}
1714
81c833b3
RB
1715/* Copy the SLP subtree rooted at NODE. */
1716
1717static slp_tree
1718slp_copy_subtree (slp_tree node, hash_map<slp_tree, slp_tree> &map)
1719{
1720 unsigned i;
1721
1722 bool existed_p;
e840185b 1723 slp_tree &copy_ref = map.get_or_insert (node, &existed_p);
81c833b3 1724 if (existed_p)
e840185b 1725 return copy_ref;
81c833b3 1726
e840185b
RB
1727 copy_ref = XNEW (_slp_tree);
1728 slp_tree copy = copy_ref;
81c833b3
RB
1729 memcpy (copy, node, sizeof (_slp_tree));
1730 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1731 {
1732 SLP_TREE_SCALAR_STMTS (copy) = SLP_TREE_SCALAR_STMTS (node).copy ();
1733 stmt_vec_info stmt_info;
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1735 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
1736 }
1737 if (SLP_TREE_SCALAR_OPS (node).exists ())
1738 SLP_TREE_SCALAR_OPS (copy) = SLP_TREE_SCALAR_OPS (node).copy ();
1739 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1740 SLP_TREE_LOAD_PERMUTATION (copy) = SLP_TREE_LOAD_PERMUTATION (node).copy ();
1741 if (SLP_TREE_CHILDREN (node).exists ())
1742 SLP_TREE_CHILDREN (copy) = SLP_TREE_CHILDREN (node).copy ();
1743 gcc_assert (!SLP_TREE_VEC_STMTS (node).exists ());
1744 copy->refcnt = 0;
1745
1746 slp_tree child;
1747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy), i, child)
1748 {
1749 SLP_TREE_CHILDREN (copy)[i] = slp_copy_subtree (child, map);
1750 SLP_TREE_CHILDREN (copy)[i]->refcnt++;
1751 }
1752 return copy;
1753}
a70d6342 1754
b5aeb3bb
IR
1755/* Rearrange the statements of NODE according to PERMUTATION. */
1756
1757static void
1758vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
a1f072e2
RB
1759 vec<unsigned> permutation,
1760 hash_set<slp_tree> &visited)
b5aeb3bb 1761{
d755c7ef
RB
1762 unsigned int i;
1763 slp_tree child;
b5aeb3bb 1764
a1f072e2
RB
1765 if (visited.add (node))
1766 return;
1767
9771b263 1768 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2 1769 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
b5aeb3bb 1770
30c0d1e3
RB
1771 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1772 {
1773 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
4dcc4502
RB
1774 /* ??? Computation nodes are isomorphic and need no rearrangement.
1775 This is a quick hack to cover those where rearrangement breaks
1776 semantics because only the first stmt is guaranteed to have the
1777 correct operation code due to others being swapped or inverted. */
1778 stmt_vec_info first = SLP_TREE_SCALAR_STMTS (node)[0];
1779 if (is_gimple_assign (first->stmt)
1780 && gimple_assign_rhs_code (first->stmt) == COND_EXPR)
1781 return;
30c0d1e3
RB
1782 vec<stmt_vec_info> tmp_stmts;
1783 tmp_stmts.create (group_size);
1784 tmp_stmts.quick_grow (group_size);
1785 stmt_vec_info stmt_info;
1786 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1787 tmp_stmts[permutation[i]] = stmt_info;
1788 SLP_TREE_SCALAR_STMTS (node).release ();
1789 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1790 }
1791 if (SLP_TREE_SCALAR_OPS (node).exists ())
1792 {
1793 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1794 vec<tree> tmp_ops;
1795 tmp_ops.create (group_size);
1796 tmp_ops.quick_grow (group_size);
1797 tree op;
1798 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1799 tmp_ops[permutation[i]] = op;
1800 SLP_TREE_SCALAR_OPS (node).release ();
1801 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1802 }
b5aeb3bb
IR
1803}
1804
1805
b266b968
RB
1806/* Attempt to reorder stmts in a reduction chain so that we don't
1807 require any load permutation. Return true if that was possible,
1808 otherwise return false. */
1809
1810static bool
1811vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1812{
b266b968 1813 unsigned int i, j;
b266b968
RB
1814 unsigned int lidx;
1815 slp_tree node, load;
1816
bc484e25
RB
1817 if (SLP_INSTANCE_LOADS (slp_instn).is_empty ())
1818 return false;
1819
b266b968
RB
1820 /* Compare all the permutation sequences to the first one. We know
1821 that at least one load is permuted. */
1822 node = SLP_INSTANCE_LOADS (slp_instn)[0];
ab5934a8 1823 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
b266b968 1824 return false;
ab5934a8 1825 unsigned int group_size = SLP_TREE_LOAD_PERMUTATION (node).length ();
b266b968
RB
1826 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1827 {
ab5934a8
RB
1828 if (!SLP_TREE_LOAD_PERMUTATION (load).exists ()
1829 || SLP_TREE_LOAD_PERMUTATION (load).length () != group_size)
b266b968 1830 return false;
ab5934a8
RB
1831 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load), j, lidx)
1832 if (lidx != SLP_TREE_LOAD_PERMUTATION (node)[j])
b266b968
RB
1833 return false;
1834 }
1835
1836 /* Check that the loads in the first sequence are different and there
1837 are no gaps between them. */
7ba9e72d 1838 auto_sbitmap load_index (group_size);
b266b968
RB
1839 bitmap_clear (load_index);
1840 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1841 {
41eefe13 1842 if (lidx >= group_size)
7ba9e72d 1843 return false;
b266b968 1844 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1845 return false;
1846
b266b968
RB
1847 bitmap_set_bit (load_index, lidx);
1848 }
1849 for (i = 0; i < group_size; i++)
1850 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1851 return false;
b266b968
RB
1852
1853 /* This permutation is valid for reduction. Since the order of the
1854 statements in the nodes is not important unless they are memory
1855 accesses, we can rearrange the statements in all the nodes
1856 according to the order of the loads. */
81c833b3
RB
1857
1858 /* We have to unshare the SLP tree we modify. */
1859 hash_map<slp_tree, slp_tree> map;
1860 slp_tree unshared = slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn), map);
1861 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn), false);
1862 unshared->refcnt++;
1863 SLP_INSTANCE_TREE (slp_instn) = unshared;
1864 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1865 SLP_INSTANCE_LOADS (slp_instn)[i] = *map.get (node);
1866 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1867
1868 /* Do the actual re-arrangement. */
a1f072e2 1869 hash_set<slp_tree> visited;
b266b968 1870 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
a1f072e2 1871 node->load_permutation, visited);
b266b968
RB
1872
1873 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1874 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1875 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1876 {
b9787581 1877 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1878 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1879 /* But we have to keep those permutations that are required because
1880 of handling of gaps. */
d9f21f6a 1881 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1882 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1883 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1884 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1885 else
1886 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1887 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1888 }
1889
b266b968
RB
1890 return true;
1891}
1892
5d8c32cb
RB
1893/* Gather loads in the SLP graph NODE and populate the INST loads array. */
1894
1895static void
bc484e25 1896vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node,
5d8c32cb
RB
1897 hash_set<slp_tree> &visited)
1898{
1899 if (visited.add (node))
1900 return;
1901
1902 if (SLP_TREE_CHILDREN (node).length () == 0)
1903 {
30c0d1e3
RB
1904 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1905 return;
5d8c32cb 1906 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
30c0d1e3 1907 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
5d8c32cb 1908 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
bc484e25 1909 loads.safe_push (node);
5d8c32cb
RB
1910 }
1911 else
1912 {
1913 unsigned i;
1914 slp_tree child;
1915 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
bc484e25 1916 vect_gather_slp_loads (loads, child, visited);
5d8c32cb
RB
1917 }
1918}
1919
1920static void
1921vect_gather_slp_loads (slp_instance inst, slp_tree node)
1922{
1923 hash_set<slp_tree> visited;
bc484e25 1924 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst), node, visited);
ebfd146a
IR
1925}
1926
1927
e4a707c4 1928/* Find the last store in SLP INSTANCE. */
ff802fa1 1929
95c68311 1930stmt_vec_info
2e8ab70c 1931vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1932{
95c68311 1933 stmt_vec_info last = NULL;
b9787581 1934 stmt_vec_info stmt_vinfo;
e4a707c4 1935
b9787581 1936 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1937 {
211cd1e2 1938 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 1939 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1940 }
e4a707c4 1941
2e8ab70c 1942 return last;
e4a707c4
IR
1943}
1944
82570274
RS
1945/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1946 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1947 (also containing the first GROUP1_SIZE stmts, since stores are
1948 consecutive), the second containing the remainder.
1ba91a49
AL
1949 Return the first stmt in the second group. */
1950
82570274
RS
1951static stmt_vec_info
1952vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1953{
bffb8014 1954 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1955 gcc_assert (group1_size > 0);
2c53b149 1956 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1957 gcc_assert (group2_size > 0);
2c53b149 1958 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1959
bffb8014 1960 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1961 for (unsigned i = group1_size; i > 1; i--)
1962 {
bffb8014
RS
1963 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1964 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1965 }
1966 /* STMT is now the last element of the first group. */
bffb8014
RS
1967 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1968 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1969
bffb8014
RS
1970 DR_GROUP_SIZE (group2) = group2_size;
1971 for (stmt_info = group2; stmt_info;
1972 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1973 {
bffb8014
RS
1974 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1975 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1976 }
1977
2c53b149 1978 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1979 plus skipping over the first vector. */
bffb8014 1980 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1981
2c53b149
RB
1982 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1983 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1984
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1987 group1_size, group2_size);
1988
1989 return group2;
1990}
1991
4b6068ea
RS
1992/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1993 statements and a vector of NUNITS elements. */
1994
1995static poly_uint64
1996calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1997{
1998 return exact_div (common_multiple (nunits, group_size), group_size);
1999}
2000
0d0293ac 2001/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 2002 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
2003 Return FALSE if it's impossible to SLP any stmt in the loop. */
2004
2005static bool
310213d4 2006vect_analyze_slp_instance (vec_info *vinfo,
10a73df7 2007 scalar_stmts_to_slp_tree_map_t *bst_map,
32e8e429 2008 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
2009{
2010 slp_instance new_instance;
d092494c 2011 slp_tree node;
2c53b149 2012 unsigned int group_size;
b5aeb3bb 2013 tree vectype, scalar_type = NULL_TREE;
1ba91a49 2014 unsigned int i;
b9787581
RS
2015 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2016 vec<stmt_vec_info> scalar_stmts;
818b3293 2017 bool constructor = false;
b5aeb3bb 2018
b9787581 2019 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 2020 {
2c53b149 2021 scalar_type = TREE_TYPE (DR_REF (dr));
b9787581 2022 group_size = DR_GROUP_SIZE (stmt_info);
9b75f56d 2023 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2c53b149 2024 }
b9787581 2025 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2026 {
2027 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
2028 vectype = STMT_VINFO_VECTYPE (stmt_info);
2029 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb 2030 }
818b3293
JH
2031 else if (is_gimple_assign (stmt_info->stmt)
2032 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2033 {
2034 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2035 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2036 constructor = true;
2037 }
b5aeb3bb
IR
2038 else
2039 {
310213d4 2040 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 2041 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 2042 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 2043 }
b8698a0f 2044
ebfd146a
IR
2045 if (!vectype)
2046 {
73fbfcad 2047 if (dump_enabled_p ())
3c2a8ed0
DM
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2049 "Build SLP failed: unsupported data-type %T\n",
2050 scalar_type);
b5aeb3bb 2051
ebfd146a
IR
2052 return false;
2053 }
4b6068ea 2054 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 2055
0d0293ac 2056 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 2057 scalar_stmts.create (group_size);
bffb8014 2058 stmt_vec_info next_info = stmt_info;
b9787581 2059 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 2060 {
b5aeb3bb 2061 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 2062 while (next_info)
b5aeb3bb 2063 {
6e6b18e5 2064 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2065 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
2066 }
2067 }
b9787581 2068 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2069 {
2070 /* Collect the reduction stmts and store them in
2071 SLP_TREE_SCALAR_STMTS. */
bffb8014 2072 while (next_info)
2c53b149 2073 {
6e6b18e5 2074 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2075 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 2076 }
14a61437
RB
2077 /* Mark the first element of the reduction chain as reduction to properly
2078 transform the node. In the reduction analysis phase only the last
2079 element of the chain is marked as reduction. */
b4673569
RB
2080 STMT_VINFO_DEF_TYPE (stmt_info)
2081 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
ef6e6914
RB
2082 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2083 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
b5aeb3bb 2084 }
818b3293
JH
2085 else if (constructor)
2086 {
2087 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2088 tree val;
2089 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2090 {
2091 if (TREE_CODE (val) == SSA_NAME)
2092 {
2093 gimple* def = SSA_NAME_DEF_STMT (val);
2094 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2095 /* Value is defined in another basic block. */
2096 if (!def_info)
2097 return false;
3d42842c 2098 def_info = vect_stmt_to_vectorize (def_info);
818b3293
JH
2099 scalar_stmts.safe_push (def_info);
2100 }
2101 else
2102 return false;
2103 }
140ee00a
RB
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE, vect_location,
2106 "Analyzing vectorizable constructor: %G\n",
2107 stmt_info->stmt);
818b3293 2108 }
b5aeb3bb
IR
2109 else
2110 {
2111 /* Collect reduction statements. */
32c91dfc
RS
2112 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2113 for (i = 0; reductions.iterate (i, &next_info); i++)
2114 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2115 }
2116
ebfd146a 2117 /* Build the tree for the SLP instance. */
89d390e5
RB
2118 bool *matches = XALLOCAVEC (bool, group_size);
2119 unsigned npermutes = 0;
4b6068ea 2120 poly_uint64 max_nunits = nunits;
9f708a84 2121 unsigned tree_size = 0;
e569db5f 2122 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
5d8c32cb 2123 &max_nunits, matches, &npermutes,
9f708a84 2124 &tree_size, bst_map);
e569db5f 2125 if (node != NULL)
ebfd146a 2126 {
4ef69dfc 2127 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2128 poly_uint64 unrolling_factor
4b6068ea 2129 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2130
d9f21f6a 2131 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2132 && is_a <bb_vec_info> (vinfo))
2133 {
4b6068ea
RS
2134 unsigned HOST_WIDE_INT const_max_nunits;
2135 if (!max_nunits.is_constant (&const_max_nunits)
2136 || const_max_nunits > group_size)
2137 {
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2140 "Build SLP failed: store group "
2141 "size not a multiple of the vector size "
2142 "in basic block SLP\n");
6e2dd807 2143 vect_free_slp_tree (node, false);
4b6068ea
RS
2144 return false;
2145 }
e569db5f 2146 /* Fatal mismatch. */
4b6068ea 2147 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2148 vect_free_slp_tree (node, false);
e569db5f
VK
2149 }
2150 else
2151 {
a1f072e2 2152 /* Create a new SLP instance. */
99b1c316 2153 new_instance = XNEW (class _slp_instance);
a1f072e2 2154 SLP_INSTANCE_TREE (new_instance) = node;
a1f072e2 2155 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
5d8c32cb 2156 SLP_INSTANCE_LOADS (new_instance) = vNULL;
818b3293
JH
2157 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2158
5d8c32cb 2159 vect_gather_slp_loads (new_instance, node);
9f708a84
RB
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE, vect_location,
2162 "SLP size %u vs. limit %u.\n",
2163 tree_size, max_tree_size);
a1f072e2 2164
bc484e25 2165 /* Check whether any load is possibly permuted. */
a1f072e2
RB
2166 slp_tree load_node;
2167 bool loads_permuted = false;
5d8c32cb 2168 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
abf9bfbc 2169 {
148018bc
RB
2170 if (!SLP_TREE_LOAD_PERMUTATION (load_node).exists ())
2171 continue;
2172 unsigned j;
a1f072e2 2173 stmt_vec_info load_info;
a1f072e2 2174 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
148018bc
RB
2175 if (SLP_TREE_LOAD_PERMUTATION (load_node)[j] != j)
2176 {
bc484e25 2177 loads_permuted = true;
148018bc
RB
2178 break;
2179 }
01d8bf07 2180 }
ebfd146a 2181
bc484e25 2182 /* If the loads and stores can be handled with load/store-lane
a1f072e2
RB
2183 instructions do not generate this SLP instance. */
2184 if (is_a <loop_vec_info> (vinfo)
2185 && loads_permuted
2186 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7 2187 {
a1f072e2 2188 slp_tree load_node;
5d8c32cb 2189 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
a1f072e2
RB
2190 {
2191 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2192 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2193 /* Use SLP for strided accesses (or if we can't load-lanes). */
2194 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2195 || ! vect_load_lanes_supported
2196 (STMT_VINFO_VECTYPE (stmt_vinfo),
2197 DR_GROUP_SIZE (stmt_vinfo), false))
2198 break;
2199 }
5d8c32cb 2200 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
a1f072e2
RB
2201 {
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "Built SLP cancelled: can use "
2205 "load/store-lanes\n");
2206 vect_free_slp_instance (new_instance, false);
2207 return false;
2208 }
bb0f5ca7 2209 }
a1f072e2 2210
1442bc31
RB
2211 /* If this is a reduction chain with a conversion in front
2212 amend the SLP tree with a node for that. */
2213 if (!dr
2214 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2215 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2216 {
2217 /* Get at the conversion stmt - we know it's the single use
2218 of the last stmt of the reduction chain. */
2219 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2220 use_operand_p use_p;
2221 gimple *use_stmt;
2222 bool r = single_imm_use (gimple_assign_lhs (tem),
2223 &use_p, &use_stmt);
2224 gcc_assert (r);
2225 next_info = vinfo->lookup_stmt (use_stmt);
2226 next_info = vect_stmt_to_vectorize (next_info);
2227 scalar_stmts = vNULL;
2228 scalar_stmts.create (group_size);
2229 for (unsigned i = 0; i < group_size; ++i)
2230 scalar_stmts.quick_push (next_info);
2231 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2232 SLP_TREE_CHILDREN (conv).quick_push (node);
2233 SLP_INSTANCE_TREE (new_instance) = conv;
2234 /* We also have to fake this conversion stmt as SLP reduction
2235 group so we don't have to mess with too much code
2236 elsewhere. */
2237 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2238 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2239 }
2240
a1f072e2
RB
2241 vinfo->slp_instances.safe_push (new_instance);
2242
ab5934a8
RB
2243 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2244 the number of scalar stmts in the root in a few places.
2245 Verify that assumption holds. */
2246 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance))
2247 .length () == group_size);
2248
a1f072e2 2249 if (dump_enabled_p ())
bb0f5ca7 2250 {
a1f072e2
RB
2251 dump_printf_loc (MSG_NOTE, vect_location,
2252 "Final SLP tree for instance:\n");
a89349e6
RB
2253 vect_print_slp_tree (MSG_NOTE, vect_location,
2254 SLP_INSTANCE_TREE (new_instance));
bb0f5ca7 2255 }
bb0f5ca7 2256
a1f072e2 2257 return true;
c2a12ca0 2258 }
e569db5f
VK
2259 }
2260 else
2261 {
a1f072e2
RB
2262 /* Failed to SLP. */
2263 /* Free the allocated memory. */
2264 scalar_stmts.release ();
e569db5f 2265 }
b8698a0f 2266
1ba91a49 2267 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2268 vector size. */
4b6068ea 2269 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2270 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2271 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2272 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2273 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2274 {
2275 /* We consider breaking the group only on VF boundaries from the existing
2276 start. */
2277 for (i = 0; i < group_size; i++)
2278 if (!matches[i]) break;
2279
4b6068ea 2280 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2281 {
2282 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2283 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2284 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2285
82570274
RS
2286 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2287 group1_size);
10a73df7 2288 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
86a91c0a 2289 max_tree_size);
1ba91a49
AL
2290 /* If the first non-match was in the middle of a vector,
2291 skip the rest of that vector. */
2292 if (group1_size < i)
2293 {
4b6068ea 2294 i = group1_size + const_nunits;
1ba91a49 2295 if (i < group_size)
4b6068ea 2296 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2297 }
2298 if (i < group_size)
10a73df7
RB
2299 res |= vect_analyze_slp_instance (vinfo, bst_map,
2300 rest, max_tree_size);
1ba91a49
AL
2301 return res;
2302 }
2303 /* Even though the first vector did not all match, we might be able to SLP
2304 (some) of the remainder. FORNOW ignore this possibility. */
2305 }
2306
a70d6342 2307 return false;
ebfd146a
IR
2308}
2309
2310
ff802fa1 2311/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2312 trees of packed scalar stmts if SLP is possible. */
2313
f4ebbd24 2314opt_result
310213d4 2315vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2316{
2317 unsigned int i;
f698fccf 2318 stmt_vec_info first_element;
ebfd146a 2319
adac3a68 2320 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2321
10a73df7
RB
2322 scalar_stmts_to_slp_tree_map_t *bst_map
2323 = new scalar_stmts_to_slp_tree_map_t ();
2324
0d0293ac 2325 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2326 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
10a73df7 2327 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
ebfd146a 2328
310213d4 2329 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2330 {
310213d4
RB
2331 if (loop_vinfo->reduction_chains.length () > 0)
2332 {
2333 /* Find SLP sequences starting from reduction chains. */
2334 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
10a73df7 2335 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
310213d4 2336 max_tree_size))
6b5e165b
RB
2337 {
2338 /* Dissolve reduction chain group. */
f698fccf 2339 stmt_vec_info vinfo = first_element;
0214d31a 2340 stmt_vec_info last = NULL;
f698fccf 2341 while (vinfo)
6b5e165b 2342 {
bffb8014 2343 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2344 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2345 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
0214d31a 2346 last = vinfo;
f698fccf 2347 vinfo = next;
6b5e165b 2348 }
f698fccf 2349 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
0214d31a
RB
2350 /* It can be still vectorized as part of an SLP reduction. */
2351 loop_vinfo->reductions.safe_push (last);
6b5e165b 2352 }
310213d4 2353 }
b010117a 2354
310213d4 2355 /* Find SLP sequences starting from groups of reductions. */
0630a4ec 2356 if (loop_vinfo->reductions.length () > 1)
10a73df7 2357 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
0630a4ec 2358 max_tree_size);
310213d4 2359 }
b5aeb3bb 2360
10a73df7
RB
2361 /* The map keeps a reference on SLP nodes built, release that. */
2362 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2363 it != bst_map->end (); ++it)
2364 if ((*it).second)
2365 vect_free_slp_tree ((*it).second, false);
2366 delete bst_map;
2367
bc484e25
RB
2368 /* Optimize permutations in SLP reductions. */
2369 slp_instance instance;
2370 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2371 {
2372 slp_tree node = SLP_INSTANCE_TREE (instance);
2373 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2374 /* Reduction (there are no data-refs in the root).
2375 In reduction chain the order of the loads is not important. */
2376 if (!STMT_VINFO_DATA_REF (stmt_info)
2377 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2378 vect_attempt_slp_rearrange_stmts (instance);
2379 }
2380
2381 /* Gather all loads in the SLP graph. */
2382 hash_set<slp_tree> visited;
2383 FOR_EACH_VEC_ELT (vinfo->slp_instances, i, instance)
2384 vect_gather_slp_loads (vinfo->slp_loads, SLP_INSTANCE_TREE (instance),
2385 visited);
2386
f4ebbd24 2387 return opt_result::success ();
ebfd146a
IR
2388}
2389
bc484e25
RB
2390void
2391vect_optimize_slp (vec_info *vinfo)
2392{
2393 slp_tree node;
2394 unsigned i;
2395 FOR_EACH_VEC_ELT (vinfo->slp_loads, i, node)
2396 {
2397 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
2398 continue;
2399
2400 /* In basic block vectorization we allow any subchain of an interleaving
2401 chain.
2402 FORNOW: not in loop SLP because of realignment complications. */
2403 if (is_a <bb_vec_info> (vinfo))
2404 {
2405 bool subchain_p = true;
2406 stmt_vec_info next_load_info = NULL;
2407 stmt_vec_info load_info;
2408 unsigned j;
2409 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2410 {
2411 if (j != 0
2412 && (next_load_info != load_info
2413 || DR_GROUP_GAP (load_info) != 1))
2414 {
2415 subchain_p = false;
2416 break;
2417 }
2418 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
2419 }
2420 if (subchain_p)
2421 {
2422 SLP_TREE_LOAD_PERMUTATION (node).release ();
2423 continue;
2424 }
2425 }
2426 else
2427 {
2428 stmt_vec_info load_info;
2429 bool this_load_permuted = false;
2430 unsigned j;
2431 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
2432 if (SLP_TREE_LOAD_PERMUTATION (node)[j] != j)
2433 {
2434 this_load_permuted = true;
2435 break;
2436 }
2437 stmt_vec_info first_stmt_info
2438 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]);
2439 if (!this_load_permuted
2440 /* The load requires permutation when unrolling exposes
2441 a gap either because the group is larger than the SLP
2442 group-size or because there is a gap between the groups. */
2443 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a <loop_vec_info> (vinfo)), 1U)
2444 || ((SLP_TREE_SCALAR_STMTS (node).length ()
2445 == DR_GROUP_SIZE (first_stmt_info))
2446 && DR_GROUP_GAP (first_stmt_info) == 0)))
2447 {
2448 SLP_TREE_LOAD_PERMUTATION (node).release ();
2449 continue;
2450 }
2451 }
2452 }
2453}
2454
ebfd146a
IR
2455
2456/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2457 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2458 least one instance. */
ebfd146a 2459
437f4a00 2460bool
ebfd146a
IR
2461vect_make_slp_decision (loop_vec_info loop_vinfo)
2462{
d9f21f6a
RS
2463 unsigned int i;
2464 poly_uint64 unrolling_factor = 1;
9771b263 2465 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2466 slp_instance instance;
2467 int decided_to_slp = 0;
2468
adac3a68 2469 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2470
9771b263 2471 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2472 {
2473 /* FORNOW: SLP if you can. */
1c84a2d2
RS
2474 /* All unroll factors have the form:
2475
2476 GET_MODE_SIZE (vinfo->vector_mode) * X
2477
2478 for some rational X, so they must have a common multiple. */
d9f21f6a
RS
2479 unrolling_factor
2480 = force_common_multiple (unrolling_factor,
2481 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2482
ff802fa1 2483 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2484 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2485 loop-based vectorization. Such stmts will be marked as HYBRID. */
6c7e3b1f 2486 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
ebfd146a
IR
2487 decided_to_slp++;
2488 }
2489
2490 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2491
73fbfcad 2492 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2493 {
2494 dump_printf_loc (MSG_NOTE, vect_location,
2495 "Decided to SLP %d instances. Unrolling factor ",
2496 decided_to_slp);
2497 dump_dec (MSG_NOTE, unrolling_factor);
2498 dump_printf (MSG_NOTE, "\n");
2499 }
437f4a00
IR
2500
2501 return (decided_to_slp > 0);
ebfd146a
IR
2502}
2503
fae545fb
RB
2504/* Private data for vect_detect_hybrid_slp. */
2505struct vdhs_data
ebfd146a 2506{
fae545fb
RB
2507 loop_vec_info loop_vinfo;
2508 vec<stmt_vec_info> *worklist;
2509};
4bfcf879 2510
fae545fb 2511/* Walker for walk_gimple_op. */
ebfd146a 2512
642fce57 2513static tree
fae545fb 2514vect_detect_hybrid_slp (tree *tp, int *, void *data)
642fce57
RB
2515{
2516 walk_stmt_info *wi = (walk_stmt_info *)data;
fae545fb 2517 vdhs_data *dat = (vdhs_data *)wi->info;
642fce57
RB
2518
2519 if (wi->is_lhs)
2520 return NULL_TREE;
2521
fae545fb
RB
2522 stmt_vec_info def_stmt_info = dat->loop_vinfo->lookup_def (*tp);
2523 if (!def_stmt_info)
2524 return NULL_TREE;
2525 def_stmt_info = vect_stmt_to_vectorize (def_stmt_info);
2526 if (PURE_SLP_STMT (def_stmt_info))
642fce57 2527 {
6585ff8f 2528 if (dump_enabled_p ())
3c2a8ed0
DM
2529 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2530 def_stmt_info->stmt);
6585ff8f 2531 STMT_SLP_TYPE (def_stmt_info) = hybrid;
fae545fb 2532 dat->worklist->safe_push (def_stmt_info);
642fce57
RB
2533 }
2534
2535 return NULL_TREE;
ebfd146a
IR
2536}
2537
ebfd146a
IR
2538/* Find stmts that must be both vectorized and SLPed. */
2539
2540void
2541vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2542{
adac3a68 2543 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2544
fae545fb
RB
2545 /* All stmts participating in SLP are marked pure_slp, all other
2546 stmts are loop_vect.
2547 First collect all loop_vect stmts into a worklist. */
2548 auto_vec<stmt_vec_info> worklist;
2549 for (unsigned i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
642fce57
RB
2550 {
2551 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
fae545fb
RB
2552 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
2553 gsi_next (&gsi))
2554 {
2555 gphi *phi = gsi.phi ();
2556 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (phi);
2557 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2558 worklist.safe_push (stmt_info);
2559 }
642fce57
RB
2560 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2561 gsi_next (&gsi))
2562 {
355fe088 2563 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2564 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2565 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2566 {
fae545fb
RB
2567 for (gimple_stmt_iterator gsi2
2568 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2569 !gsi_end_p (gsi2); gsi_next (&gsi2))
2570 {
2571 stmt_vec_info patt_info
2572 = loop_vinfo->lookup_stmt (gsi_stmt (gsi2));
2573 if (!STMT_SLP_TYPE (patt_info)
2574 && STMT_VINFO_RELEVANT (patt_info))
2575 worklist.safe_push (patt_info);
2576 }
2577 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
642fce57 2578 }
fae545fb
RB
2579 if (!STMT_SLP_TYPE (stmt_info) && STMT_VINFO_RELEVANT (stmt_info))
2580 worklist.safe_push (stmt_info);
642fce57
RB
2581 }
2582 }
2583
fae545fb
RB
2584 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2585 mark any SLP vectorized stmt as hybrid.
2586 ??? We're visiting def stmts N times (once for each non-SLP and
2587 once for each hybrid-SLP use). */
2588 walk_stmt_info wi;
2589 vdhs_data dat;
2590 dat.worklist = &worklist;
2591 dat.loop_vinfo = loop_vinfo;
2592 memset (&wi, 0, sizeof (wi));
2593 wi.info = (void *)&dat;
2594 while (!worklist.is_empty ())
2595 {
2596 stmt_vec_info stmt_info = worklist.pop ();
2597 /* Since SSA operands are not set up for pattern stmts we need
2598 to use walk_gimple_op. */
2599 wi.is_lhs = 0;
2600 walk_gimple_op (stmt_info->stmt, vect_detect_hybrid_slp, &wi);
642fce57 2601 }
ebfd146a
IR
2602}
2603
a70d6342 2604
2c515559
RS
2605/* Initialize a bb_vec_info struct for the statements between
2606 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2607
2608_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2609 gimple_stmt_iterator region_end_in,
2610 vec_info_shared *shared)
2611 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2612 bb (gsi_bb (region_begin_in)),
2613 region_begin (region_begin_in),
2614 region_end (region_end_in)
a70d6342 2615{
a70d6342
IR
2616 gimple_stmt_iterator gsi;
2617
61d371eb
RB
2618 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2619 gsi_next (&gsi))
a70d6342 2620 {
355fe088 2621 gimple *stmt = gsi_stmt (gsi);
a70d6342 2622 gimple_set_uid (stmt, 0);
4fbeb363 2623 add_stmt (stmt);
a70d6342
IR
2624 }
2625
2c515559 2626 bb->aux = this;
a70d6342
IR
2627}
2628
2629
2630/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2631 stmts in the basic block. */
2632
2c515559 2633_bb_vec_info::~_bb_vec_info ()
a70d6342 2634{
2c515559
RS
2635 for (gimple_stmt_iterator si = region_begin;
2636 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2637 /* Reset region marker. */
2638 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2639
2c515559 2640 bb->aux = NULL;
a70d6342
IR
2641}
2642
15944069
RS
2643/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2644 given then that child nodes have already been processed, and that
2645 their def types currently match their SLP node's def type. */
a70d6342
IR
2646
2647static bool
15944069
RS
2648vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2649 slp_instance node_instance,
2650 stmt_vector_for_cost *cost_vec)
a70d6342 2651{
b9787581 2652 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2653 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2654
8b7e9dba
RS
2655 /* Calculate the number of vector statements to be created for the
2656 scalar stmts in this node. For SLP reductions it is equal to the
2657 number of vector statements in the children (which has already been
2658 calculated by the recursive call). Otherwise it is the number of
2c53b149 2659 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2660 VF divided by the number of elements in a vector. */
2c53b149
RB
2661 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2662 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
30c0d1e3
RB
2663 {
2664 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2665 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2666 {
2667 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2668 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2669 break;
2670 }
2671 }
8b7e9dba
RS
2672 else
2673 {
d9f21f6a 2674 poly_uint64 vf;
8b7e9dba
RS
2675 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2676 vf = loop_vinfo->vectorization_factor;
2677 else
2678 vf = 1;
ab5934a8 2679 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
8b7e9dba
RS
2680 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2681 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2682 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2683 }
2684
15944069 2685 bool dummy;
308bc496
RB
2686 return vect_analyze_stmt (vinfo, stmt_info, &dummy,
2687 node, node_instance, cost_vec);
15944069
RS
2688}
2689
60838d63
RS
2690/* Try to build NODE from scalars, returning true on success.
2691 NODE_INSTANCE is the SLP instance that contains NODE. */
2692
2693static bool
2694vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2695 slp_instance node_instance)
2696{
2697 stmt_vec_info stmt_info;
2698 unsigned int i;
2699
2700 if (!is_a <bb_vec_info> (vinfo)
2701 || node == SLP_INSTANCE_TREE (node_instance)
2702 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2703 return false;
2704
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_NOTE, vect_location,
2707 "Building vector operands from scalars instead\n");
2708
2709 /* Don't remove and free the child nodes here, since they could be
2710 referenced by other structures. The analysis and scheduling phases
2711 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2712 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2713 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2714 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2715 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2716 {
2717 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2718 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2719 }
2720 return true;
2721}
2722
15944069
RS
2723/* Analyze statements contained in SLP tree NODE after recursively analyzing
2724 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2725
2726 Return true if the operations are supported. */
2727
2728static bool
2729vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2730 slp_instance node_instance,
10a73df7
RB
2731 hash_set<slp_tree> &visited,
2732 hash_set<slp_tree> &lvisited,
15944069
RS
2733 stmt_vector_for_cost *cost_vec)
2734{
2735 int i, j;
2736 slp_tree child;
2737
2738 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2739 return true;
2740
2741 /* If we already analyzed the exact same set of scalar stmts we're done.
10a73df7
RB
2742 We share the generated vector stmts for those.
2743 The SLP graph is acyclic so not caching whether we failed or succeeded
15944069
RS
2744 doesn't result in any issue since we throw away the lvisited set
2745 when we fail. */
10a73df7
RB
2746 if (visited.contains (node)
2747 || lvisited.add (node))
2748 return true;
15944069
RS
2749
2750 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2751 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2752 visited, lvisited, cost_vec))
2753 return false;
2754
f7b94dec
RB
2755 /* ??? We have to catch the case late where two first scalar stmts appear
2756 in multiple SLP children with different def type and fail. Remember
2757 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2758 match it when that is vect_internal_def. */
2759 auto_vec<vect_def_type, 4> dt;
2760 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2762 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2763 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
f7b94dec 2764
bd2f172f
RB
2765 /* Push SLP node def-type to stmt operands. */
2766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2767 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2768 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
b9787581 2769 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2770 = SLP_TREE_DEF_TYPE (child);
f7b94dec
RB
2771
2772 /* Check everything worked out. */
2773 bool res = true;
bd2f172f 2774 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2775 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2776 {
2777 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2778 {
2779 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2780 != SLP_TREE_DEF_TYPE (child))
2781 res = false;
2782 }
2783 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2784 != dt[j])
2785 res = false;
2786 }
f7b94dec
RB
2787 if (!res && dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2789 "not vectorized: same operand with different "
2790 "def type in stmt.\n");
bd2f172f 2791
f7b94dec
RB
2792 if (res)
2793 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2794 cost_vec);
2795
2796 /* Restore def-types. */
2797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2798 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2799 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
f7b94dec 2800
60838d63
RS
2801 /* If this node can't be vectorized, try pruning the tree here rather
2802 than felling the whole thing. */
2803 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2804 res = true;
2805
f7b94dec 2806 return res;
a70d6342
IR
2807}
2808
2809
8b7e9dba 2810/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2811 operations are supported. */
2812
a12e42fc 2813bool
8b7e9dba 2814vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2815{
a70d6342
IR
2816 slp_instance instance;
2817 int i;
2818
adac3a68 2819 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2820
10a73df7 2821 hash_set<slp_tree> visited;
8b7e9dba 2822 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2823 {
10a73df7 2824 hash_set<slp_tree> lvisited;
68435eb2
RB
2825 stmt_vector_for_cost cost_vec;
2826 cost_vec.create (2);
8b7e9dba
RS
2827 if (!vect_slp_analyze_node_operations (vinfo,
2828 SLP_INSTANCE_TREE (instance),
10a73df7 2829 instance, visited, lvisited,
2439d584
RB
2830 &cost_vec)
2831 /* Instances with a root stmt require vectorized defs for the
2832 SLP tree root. */
2833 || (SLP_INSTANCE_ROOT_STMT (instance)
2834 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2835 != vect_internal_def)))
a70d6342 2836 {
b9787581
RS
2837 slp_tree node = SLP_INSTANCE_TREE (instance);
2838 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
2839 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_NOTE, vect_location,
2841 "removing SLP instance operations starting from: %G",
2842 stmt_info->stmt);
6e2dd807 2843 vect_free_slp_instance (instance, false);
8b7e9dba 2844 vinfo->slp_instances.ordered_remove (i);
68435eb2 2845 cost_vec.release ();
a70d6342
IR
2846 }
2847 else
68435eb2 2848 {
10a73df7 2849 for (hash_set<slp_tree>::iterator x = lvisited.begin();
68435eb2 2850 x != lvisited.end(); ++x)
10a73df7 2851 visited.add (*x);
68435eb2 2852 i++;
78604de0 2853
308bc496 2854 add_stmt_costs (vinfo, vinfo->target_cost_data, &cost_vec);
68435eb2
RB
2855 cost_vec.release ();
2856 }
2857 }
78604de0 2858
8b7e9dba 2859 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2860}
2861
6eddf228
RB
2862
2863/* Compute the scalar cost of the SLP node NODE and its children
2864 and return it. Do not account defs that are marked in LIFE and
2865 update LIFE according to uses of NODE. */
2866
a296d6d3 2867static void
4e849a74 2868vect_bb_slp_scalar_cost (vec_info *vinfo,
a296d6d3 2869 slp_tree node, vec<bool, va_heap> *life,
4bfcf879
RB
2870 stmt_vector_for_cost *cost_vec,
2871 hash_set<slp_tree> &visited)
6eddf228 2872{
6eddf228 2873 unsigned i;
b9787581 2874 stmt_vec_info stmt_info;
6eddf228
RB
2875 slp_tree child;
2876
4bfcf879
RB
2877 if (visited.add (node))
2878 return;
2879
b9787581 2880 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2881 {
b9787581 2882 gimple *stmt = stmt_info->stmt;
6eddf228
RB
2883 ssa_op_iter op_iter;
2884 def_operand_p def_p;
6eddf228 2885
ff4c81cc 2886 if ((*life)[i])
6eddf228
RB
2887 continue;
2888
2889 /* If there is a non-vectorized use of the defs then the scalar
2890 stmt is kept live in which case we do not account it or any
2891 required defs in the SLP children in the scalar cost. This
2892 way we make the vectorization more costly when compared to
2893 the scalar cost. */
2894 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2895 {
2896 imm_use_iterator use_iter;
355fe088 2897 gimple *use_stmt;
6eddf228 2898 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2899 if (!is_gimple_debug (use_stmt))
6eddf228 2900 {
91987857
RS
2901 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2902 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2903 {
2904 (*life)[i] = true;
2905 BREAK_FROM_IMM_USE_STMT (use_iter);
2906 }
6eddf228
RB
2907 }
2908 }
ff4c81cc 2909 if ((*life)[i])
6eddf228
RB
2910 continue;
2911
b555a2e4
RB
2912 /* Count scalar stmts only once. */
2913 if (gimple_visited_p (stmt))
2914 continue;
2915 gimple_set_visited (stmt, true);
2916
a296d6d3 2917 vect_cost_for_stmt kind;
6eddf228
RB
2918 if (STMT_VINFO_DATA_REF (stmt_info))
2919 {
2920 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2921 kind = scalar_load;
6eddf228 2922 else
a296d6d3 2923 kind = scalar_store;
6eddf228 2924 }
e4020b28
RS
2925 else if (vect_nop_conversion_p (stmt_info))
2926 continue;
6eddf228 2927 else
a296d6d3
RB
2928 kind = scalar_stmt;
2929 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2930 }
2931
faa5399b 2932 auto_vec<bool, 20> subtree_life;
6eddf228 2933 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2934 {
2935 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2936 {
2937 /* Do not directly pass LIFE to the recursive call, copy it to
2938 confine changes in the callee to the current child/subtree. */
2939 subtree_life.safe_splice (*life);
4e849a74 2940 vect_bb_slp_scalar_cost (vinfo, child, &subtree_life, cost_vec,
4bfcf879 2941 visited);
faa5399b
RB
2942 subtree_life.truncate (0);
2943 }
2944 }
6eddf228
RB
2945}
2946
69f11a13
IR
2947/* Check if vectorization of the basic block is profitable. */
2948
2949static bool
2950vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2951{
9771b263 2952 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2953 slp_instance instance;
1a4b99c1 2954 int i;
c3e7ee41 2955 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2956 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2957
2958 /* Calculate scalar cost. */
a296d6d3
RB
2959 stmt_vector_for_cost scalar_costs;
2960 scalar_costs.create (0);
10a73df7 2961 hash_set<slp_tree> visited;
6eddf228 2962 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2963 {
00f96dc9 2964 auto_vec<bool, 20> life;
ab5934a8
RB
2965 life.safe_grow_cleared
2966 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance)).length ());
4e849a74 2967 vect_bb_slp_scalar_cost (bb_vinfo,
a296d6d3 2968 SLP_INSTANCE_TREE (instance),
10a73df7 2969 &life, &scalar_costs, visited);
a296d6d3
RB
2970 }
2971 void *target_cost_data = init_cost (NULL);
308bc496 2972 add_stmt_costs (bb_vinfo, target_cost_data, &scalar_costs);
a296d6d3
RB
2973 scalar_costs.release ();
2974 unsigned dummy;
2975 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2976 destroy_cost_data (target_cost_data);
69f11a13 2977
b555a2e4
RB
2978 /* Unset visited flag. */
2979 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2980 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2981 gimple_set_visited (gsi_stmt (gsi), false);
2982
c3e7ee41 2983 /* Complete the target-specific cost calculation. */
92345349
BS
2984 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2985 &vec_inside_cost, &vec_epilogue_cost);
2986
2987 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2988
73fbfcad 2989 if (dump_enabled_p ())
69f11a13 2990 {
78c60e3d
SS
2991 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2992 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2993 vec_inside_cost);
2994 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2995 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2996 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2997 }
2998
a6524bba
RB
2999 /* Vectorization is profitable if its cost is more than the cost of scalar
3000 version. Note that we err on the vector side for equal cost because
3001 the cost estimate is otherwise quite pessimistic (constant uses are
3002 free on the scalar side but cost a load on the vector side for
3003 example). */
3004 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
3005 return false;
3006
3007 return true;
3008}
3009
818b3293
JH
3010/* Find any vectorizable constructors and add them to the grouped_store
3011 array. */
3012
3013static void
3014vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3015{
3016 gimple_stmt_iterator gsi;
3017
3018 for (gsi = bb_vinfo->region_begin;
140ee00a 3019 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
818b3293 3020 {
140ee00a
RB
3021 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3022 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3023 continue;
818b3293 3024
140ee00a
RB
3025 tree rhs = gimple_assign_rhs1 (stmt);
3026 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3027 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3028 CONSTRUCTOR_NELTS (rhs))
3029 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3030 || uniform_vector_p (rhs))
3031 continue;
818b3293 3032
140ee00a
RB
3033 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3034 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
818b3293
JH
3035 }
3036}
3037
1d778697
RS
3038/* Check if the region described by BB_VINFO can be vectorized, returning
3039 true if so. When returning false, set FATAL to true if the same failure
3040 would prevent vectorization at other vector sizes, false if it is still
3041 worth trying other sizes. N_STMTS is the number of statements in the
3042 region. */
3043
3044static bool
3045vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
a70d6342 3046{
bfb9d798
RB
3047 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3048
a70d6342 3049 slp_instance instance;
8e19f5a1 3050 int i;
d9f21f6a 3051 poly_uint64 min_vf = 2;
e4a707c4 3052
a5b50aa1
RB
3053 /* The first group of checks is independent of the vector size. */
3054 fatal = true;
3055
428db0ba
RB
3056 /* Analyze the data references. */
3057
a7b3509e 3058 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
a70d6342 3059 {
73fbfcad 3060 if (dump_enabled_p ())
78c60e3d
SS
3061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3062 "not vectorized: unhandled data-ref in basic "
3063 "block.\n");
1d778697 3064 return false;
a70d6342
IR
3065 }
3066
fcac74a1 3067 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 3068 {
73fbfcad 3069 if (dump_enabled_p ())
78c60e3d
SS
3070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3071 "not vectorized: not enough data-refs in "
3072 "basic block.\n");
1d778697 3073 return false;
a70d6342
IR
3074 }
3075
310213d4 3076 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
3077 {
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080 "not vectorized: unhandled data access in "
3081 "basic block.\n");
1d778697 3082 return false;
5abe1e05
RB
3083 }
3084
818b3293
JH
3085 vect_slp_check_for_constructors (bb_vinfo);
3086
a5b50aa1
RB
3087 /* If there are no grouped stores in the region there is no need
3088 to continue with pattern recog as vect_analyze_slp will fail
3089 anyway. */
3090 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 3091 {
73fbfcad 3092 if (dump_enabled_p ())
a5b50aa1
RB
3093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3094 "not vectorized: no grouped stores in "
3095 "basic block.\n");
1d778697 3096 return false;
a70d6342 3097 }
b8698a0f 3098
a5b50aa1
RB
3099 /* While the rest of the analysis below depends on it in some way. */
3100 fatal = false;
3101
3102 vect_pattern_recog (bb_vinfo);
3103
a70d6342
IR
3104 /* Check the SLP opportunities in the basic block, analyze and build SLP
3105 trees. */
310213d4 3106 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 3107 {
73fbfcad 3108 if (dump_enabled_p ())
effb52da
RB
3109 {
3110 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3111 "Failed to SLP the basic block.\n");
3112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3113 "not vectorized: failed to find SLP opportunities "
3114 "in basic block.\n");
3115 }
1d778697 3116 return false;
a70d6342 3117 }
b8698a0f 3118
bc484e25
RB
3119 /* Optimize permutations. */
3120 vect_optimize_slp (bb_vinfo);
3121
62c8a2cf
RS
3122 vect_record_base_alignments (bb_vinfo);
3123
c2a12ca0
RB
3124 /* Analyze and verify the alignment of data references and the
3125 dependence in the SLP instances. */
a5b50aa1
RB
3126 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3127 {
308bc496
RB
3128 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo, instance)
3129 || ! vect_slp_analyze_instance_dependence (bb_vinfo, instance))
a5b50aa1 3130 {
b9787581
RS
3131 slp_tree node = SLP_INSTANCE_TREE (instance);
3132 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
3133 if (dump_enabled_p ())
3134 dump_printf_loc (MSG_NOTE, vect_location,
3135 "removing SLP instance operations starting from: %G",
3136 stmt_info->stmt);
6e2dd807 3137 vect_free_slp_instance (instance, false);
a5b50aa1
RB
3138 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3139 continue;
3140 }
c2a12ca0
RB
3141
3142 /* Mark all the statements that we want to vectorize as pure SLP and
3143 relevant. */
6c7e3b1f 3144 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
c2a12ca0 3145 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
818b3293
JH
3146 if (SLP_INSTANCE_ROOT_STMT (instance))
3147 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
c2a12ca0 3148
a5b50aa1
RB
3149 i++;
3150 }
a5b50aa1 3151 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
1d778697 3152 return false;
a5b50aa1 3153
8b7e9dba 3154 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 3155 {
73fbfcad 3156 if (dump_enabled_p ())
e645e942 3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3158 "not vectorized: bad operation in basic block.\n");
1d778697 3159 return false;
a70d6342
IR
3160 }
3161
69f11a13 3162 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 3163 if (!unlimited_cost_model (NULL)
69f11a13
IR
3164 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3165 {
73fbfcad 3166 if (dump_enabled_p ())
78c60e3d
SS
3167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3168 "not vectorized: vectorization is not "
3169 "profitable.\n");
1d778697 3170 return false;
69f11a13
IR
3171 }
3172
73fbfcad 3173 if (dump_enabled_p ())
78c60e3d
SS
3174 dump_printf_loc (MSG_NOTE, vect_location,
3175 "Basic block will be vectorized using SLP\n");
1d778697 3176 return true;
a70d6342
IR
3177}
3178
fa0c8df7
RS
3179/* Subroutine of vect_slp_bb. Try to vectorize the statements between
3180 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3181 on success. The region has N_STMTS statements and has the datarefs
3182 given by DATAREFS. */
a70d6342 3183
fa0c8df7
RS
3184static bool
3185vect_slp_bb_region (gimple_stmt_iterator region_begin,
3186 gimple_stmt_iterator region_end,
3187 vec<data_reference_p> datarefs,
3188 unsigned int n_stmts)
8e19f5a1
IR
3189{
3190 bb_vec_info bb_vinfo;
e021fb86 3191 auto_vector_modes vector_modes;
8e19f5a1 3192
8e19f5a1 3193 /* Autodetect first vector size we try. */
e021fb86
RS
3194 machine_mode next_vector_mode = VOIDmode;
3195 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3196 unsigned int mode_i = 0;
8e19f5a1 3197
fa0c8df7 3198 vec_info_shared shared;
61d371eb 3199
1c84a2d2 3200 machine_mode autodetected_vector_mode = VOIDmode;
8e19f5a1
IR
3201 while (1)
3202 {
61d371eb 3203 bool vectorized = false;
a5b50aa1 3204 bool fatal = false;
1d778697
RS
3205 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3206
3207 bool first_time_p = shared.datarefs.is_empty ();
3208 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3209 if (first_time_p)
3210 bb_vinfo->shared->save_datarefs ();
3211 else
3212 bb_vinfo->shared->check_datarefs ();
1c84a2d2 3213 bb_vinfo->vector_mode = next_vector_mode;
1d778697
RS
3214
3215 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
61d371eb
RB
3216 && dbg_cnt (vect_slp))
3217 {
428db0ba 3218 if (dump_enabled_p ())
df7c2283
RS
3219 {
3220 dump_printf_loc (MSG_NOTE, vect_location,
3221 "***** Analysis succeeded with vector mode"
3222 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3223 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3224 }
428db0ba 3225
ca823c85 3226 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3227 vect_schedule_slp (bb_vinfo);
3228
d1ac60d5 3229 unsigned HOST_WIDE_INT bytes;
bbeeac91
DM
3230 if (dump_enabled_p ())
3231 {
1c84a2d2 3232 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
bbeeac91
DM
3233 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3234 "basic block part vectorized using %wu byte "
3235 "vectors\n", bytes);
3236 else
3237 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3238 "basic block part vectorized using variable "
3239 "length vectors\n");
3240 }
428db0ba 3241
61d371eb 3242 vectorized = true;
428db0ba 3243 }
df7c2283
RS
3244 else
3245 {
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE, vect_location,
3248 "***** Analysis failed with vector mode %s\n",
3249 GET_MODE_NAME (bb_vinfo->vector_mode));
3250 }
8e19f5a1 3251
e021fb86 3252 if (mode_i == 0)
1c84a2d2 3253 autodetected_vector_mode = bb_vinfo->vector_mode;
ba7f76dd 3254
a55d8232
RS
3255 if (!fatal)
3256 while (mode_i < vector_modes.length ()
3257 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3258 {
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_NOTE, vect_location,
3261 "***** The result for vector mode %s would"
3262 " be the same\n",
3263 GET_MODE_NAME (vector_modes[mode_i]));
3264 mode_i += 1;
3265 }
3266
ba7f76dd 3267 delete bb_vinfo;
86e36728 3268
e021fb86 3269 if (mode_i < vector_modes.length ()
df7c2283
RS
3270 && VECTOR_MODE_P (autodetected_vector_mode)
3271 && (related_vector_mode (vector_modes[mode_i],
3272 GET_MODE_INNER (autodetected_vector_mode))
3273 == autodetected_vector_mode)
3274 && (related_vector_mode (autodetected_vector_mode,
3275 GET_MODE_INNER (vector_modes[mode_i]))
3276 == vector_modes[mode_i]))
3277 {
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_NOTE, vect_location,
3280 "***** Skipping vector mode %s, which would"
3281 " repeat the analysis for %s\n",
3282 GET_MODE_NAME (vector_modes[mode_i]),
3283 GET_MODE_NAME (autodetected_vector_mode));
3284 mode_i += 1;
3285 }
86e36728 3286
61d371eb 3287 if (vectorized
e021fb86 3288 || mode_i == vector_modes.length ()
1c84a2d2 3289 || autodetected_vector_mode == VOIDmode
a5b50aa1
RB
3290 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3291 vector sizes will fail do not bother iterating. */
3292 || fatal)
fa0c8df7
RS
3293 return vectorized;
3294
3295 /* Try the next biggest vector size. */
e021fb86 3296 next_vector_mode = vector_modes[mode_i++];
fa0c8df7 3297 if (dump_enabled_p ())
e021fb86
RS
3298 dump_printf_loc (MSG_NOTE, vect_location,
3299 "***** Re-trying analysis with vector mode %s\n",
3300 GET_MODE_NAME (next_vector_mode));
fa0c8df7
RS
3301 }
3302}
8e19f5a1 3303
fa0c8df7
RS
3304/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3305 true if anything in the basic-block was vectorized. */
61d371eb 3306
fa0c8df7
RS
3307bool
3308vect_slp_bb (basic_block bb)
3309{
3310 gimple_stmt_iterator gsi;
3311 bool any_vectorized = false;
3312
3313 gsi = gsi_start_bb (bb);
3314 while (!gsi_end_p (gsi))
3315 {
3316 gimple_stmt_iterator region_begin = gsi;
3317 vec<data_reference_p> datarefs = vNULL;
3318 int insns = 0;
3319
3320 for (; !gsi_end_p (gsi); gsi_next (&gsi))
61d371eb 3321 {
fa0c8df7
RS
3322 gimple *stmt = gsi_stmt (gsi);
3323 if (is_gimple_debug (stmt))
3324 continue;
3325 insns++;
3326
3327 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3328 vect_location = stmt;
3329
3330 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3331 break;
3332 }
61d371eb 3333
fa0c8df7
RS
3334 /* Skip leading unhandled stmts. */
3335 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3336 {
3337 gsi_next (&gsi);
3338 continue;
61d371eb 3339 }
fa0c8df7
RS
3340
3341 gimple_stmt_iterator region_end = gsi;
3342
028d4092 3343 if (insns > param_slp_max_insns_in_bb)
1d778697
RS
3344 {
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3347 "not vectorized: too many instructions in "
3348 "basic block.\n");
3349 }
3350 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
fa0c8df7
RS
3351 any_vectorized = true;
3352
3353 if (gsi_end_p (region_end))
3354 break;
3355
3356 /* Skip the unhandled stmt. */
3357 gsi_next (&gsi);
8e19f5a1 3358 }
61d371eb
RB
3359
3360 return any_vectorized;
8e19f5a1
IR
3361}
3362
3363
c73e7656 3364/* Return 1 if vector type STMT_VINFO is a boolean vector. */
e4af0bc4
IE
3365
3366static bool
308bc496
RB
3367vect_mask_constant_operand_p (vec_info *vinfo,
3368 stmt_vec_info stmt_vinfo, unsigned op_num)
e4af0bc4 3369{
32e8e429 3370 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 3371 tree op, vectype;
e4af0bc4
IE
3372 enum vect_def_type dt;
3373
3374 /* For comparison and COND_EXPR type is chosen depending
c73e7656 3375 on the non-constant other comparison operand. */
e4af0bc4
IE
3376 if (TREE_CODE_CLASS (code) == tcc_comparison)
3377 {
32e8e429 3378 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
c73e7656 3379 op = gimple_assign_rhs1 (stmt);
e4af0bc4 3380
308bc496 3381 if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
e4af0bc4
IE
3382 gcc_unreachable ();
3383
3384 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3385 }
3386
3387 if (code == COND_EXPR)
3388 {
32e8e429 3389 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3390 tree cond = gimple_assign_rhs1 (stmt);
3391
3392 if (TREE_CODE (cond) == SSA_NAME)
d005f61e
RB
3393 {
3394 if (op_num > 0)
3395 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3396 op = cond;
3397 }
e4af0bc4 3398 else
d005f61e
RB
3399 {
3400 if (op_num > 1)
3401 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3402 op = TREE_OPERAND (cond, 0);
3403 }
e4af0bc4 3404
308bc496 3405 if (!vect_is_simple_use (op, vinfo, &dt, &vectype))
e4af0bc4
IE
3406 gcc_unreachable ();
3407
3408 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3409 }
3410
3411 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3412}
3413
018b2744
RS
3414/* Build a variable-length vector in which the elements in ELTS are repeated
3415 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3416 RESULTS and add any new instructions to SEQ.
3417
3418 The approach we use is:
3419
3420 (1) Find a vector mode VM with integer elements of mode IM.
3421
3422 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3423 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3424 from small vectors to IM.
3425
3426 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3427
3428 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3429 correct byte contents.
3430
3431 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3432
3433 We try to find the largest IM for which this sequence works, in order
3434 to cut down on the number of interleaves. */
3435
f1739b48 3436void
43fdde57 3437duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
cdbe6e9b
RS
3438 vec<tree> elts, unsigned int nresults,
3439 vec<tree> &results)
018b2744
RS
3440{
3441 unsigned int nelts = elts.length ();
3442 tree element_type = TREE_TYPE (vector_type);
3443
3444 /* (1) Find a vector mode VM with integer elements of mode IM. */
3445 unsigned int nvectors = 1;
3446 tree new_vector_type;
3447 tree permutes[2];
f884cd2f 3448 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
018b2744
RS
3449 &nvectors, &new_vector_type,
3450 permutes))
3451 gcc_unreachable ();
3452
3453 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3454 unsigned int partial_nelts = nelts / nvectors;
3455 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3456
3457 tree_vector_builder partial_elts;
3458 auto_vec<tree, 32> pieces (nvectors * 2);
3459 pieces.quick_grow (nvectors * 2);
3460 for (unsigned int i = 0; i < nvectors; ++i)
3461 {
3462 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3463 ELTS' has mode IM. */
3464 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3465 for (unsigned int j = 0; j < partial_nelts; ++j)
3466 partial_elts.quick_push (elts[i * partial_nelts + j]);
3467 tree t = gimple_build_vector (seq, &partial_elts);
3468 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3469 TREE_TYPE (new_vector_type), t);
3470
3471 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3472 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3473 }
3474
3475 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3476 correct byte contents.
3477
3478 We need to repeat the following operation log2(nvectors) times:
3479
3480 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3481 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3482
3483 However, if each input repeats every N elements and the VF is
3484 a multiple of N * 2, the HI result is the same as the LO. */
3485 unsigned int in_start = 0;
3486 unsigned int out_start = nvectors;
3487 unsigned int hi_start = nvectors / 2;
3488 /* A bound on the number of outputs needed to produce NRESULTS results
3489 in the final iteration. */
3490 unsigned int noutputs_bound = nvectors * nresults;
3491 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3492 {
3493 noutputs_bound /= 2;
3494 unsigned int limit = MIN (noutputs_bound, nvectors);
3495 for (unsigned int i = 0; i < limit; ++i)
3496 {
3497 if ((i & 1) != 0
3498 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3499 2 * in_repeat))
3500 {
3501 pieces[out_start + i] = pieces[out_start + i - 1];
3502 continue;
3503 }
3504
3505 tree output = make_ssa_name (new_vector_type);
3506 tree input1 = pieces[in_start + (i / 2)];
3507 tree input2 = pieces[in_start + (i / 2) + hi_start];
3508 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3509 input1, input2,
3510 permutes[i & 1]);
3511 gimple_seq_add_stmt (seq, stmt);
3512 pieces[out_start + i] = output;
3513 }
3514 std::swap (in_start, out_start);
3515 }
3516
3517 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3518 results.reserve (nresults);
3519 for (unsigned int i = 0; i < nresults; ++i)
3520 if (i < nvectors)
3521 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3522 pieces[in_start + i]));
3523 else
3524 results.quick_push (results[i - nvectors]);
3525}
3526
e4af0bc4 3527
b8698a0f
L
3528/* For constant and loop invariant defs of SLP_NODE this function returns
3529 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
30c0d1e3
RB
3530 OP_NODE determines the node for the operand containing the scalar
3531 operands. */
ebfd146a
IR
3532
3533static void
308bc496
RB
3534vect_get_constant_vectors (vec_info *vinfo,
3535 slp_tree slp_node, unsigned op_num,
30c0d1e3 3536 vec<tree> *vec_oprnds)
ebfd146a 3537{
d005f61e 3538 slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
30c0d1e3 3539 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
018b2744 3540 unsigned HOST_WIDE_INT nunits;
ebfd146a 3541 tree vec_cst;
d2a12ae7 3542 unsigned j, number_of_places_left_in_vector;
ebfd146a 3543 tree vector_type;
9dc3f7de 3544 tree vop;
30c0d1e3 3545 int group_size = op_node->ops.length ();
ebfd146a 3546 unsigned int vec_num, i;
d2a12ae7 3547 unsigned number_of_copies = 1;
30c0d1e3 3548 bool constant_p;
b5aeb3bb 3549 tree neutral_op = NULL;
13396b6e 3550 gimple_seq ctor_seq = NULL;
018b2744 3551 auto_vec<tree, 16> permute_results;
b5aeb3bb 3552
30c0d1e3
RB
3553 /* ??? SLP analysis should compute the vector type for the
3554 constant / invariant and store it in the SLP node. */
3555 tree op = op_node->ops[0];
42fd8198 3556 /* Check if vector type is a boolean vector. */
30c0d1e3 3557 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2568d8a1 3558 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
308bc496 3559 && vect_mask_constant_operand_p (vinfo, stmt_vinfo, op_num))
e8738f4e 3560 vector_type = truth_type_for (stmt_vectype);
42fd8198 3561 else
9b75f56d 3562 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
afbe6325 3563
318bd8c6
RB
3564 /* ??? For lane-reducing ops we should also have the required number
3565 of vector stmts initialized rather than second-guessing here. */
3566 unsigned int number_of_vectors;
3567 if (is_gimple_assign (stmt_vinfo->stmt)
3568 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3569 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3570 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3571 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3572 else
3573 number_of_vectors
3574 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3575 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3576 vector_type);
30c0d1e3
RB
3577 vec_oprnds->create (number_of_vectors);
3578 auto_vec<tree> voprnds (number_of_vectors);
ebfd146a 3579
ebfd146a 3580 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3581 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3582
3583 For example, we have two scalar operands, s1 and s2 (e.g., group of
3584 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3585 of this type can be packed in a vector). The output vector will contain
3586 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3587 will be 2).
3588
b8698a0f 3589 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3590 containing the operands.
3591
3592 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3593 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3594 {s5, s6, s7, s8}. */
b8698a0f 3595
018b2744
RS
3596 /* When using duplicate_and_interleave, we just need one element for
3597 each scalar statement. */
3598 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3599 nunits = group_size;
3600
14a61437 3601 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3602
3603 number_of_places_left_in_vector = nunits;
62cf7335 3604 constant_p = true;
5ebaa477 3605 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3606 elts.quick_grow (nunits);
90dd6e3d 3607 bool place_after_defs = false;
ebfd146a
IR
3608 for (j = 0; j < number_of_copies; j++)
3609 {
30c0d1e3 3610 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
ebfd146a 3611 {
ebfd146a 3612 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3613 number_of_places_left_in_vector--;
90dd6e3d 3614 tree orig_op = op;
13396b6e 3615 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3616 {
793d9a16 3617 if (CONSTANT_CLASS_P (op))
13396b6e 3618 {
42fd8198
IE
3619 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3620 {
3621 /* Can't use VIEW_CONVERT_EXPR for booleans because
3622 of possibly different sizes of scalar value and
3623 vector element. */
3624 if (integer_zerop (op))
3625 op = build_int_cst (TREE_TYPE (vector_type), 0);
3626 else if (integer_onep (op))
158beb4a 3627 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3628 else
3629 gcc_unreachable ();
3630 }
3631 else
3632 op = fold_unary (VIEW_CONVERT_EXPR,
3633 TREE_TYPE (vector_type), op);
13396b6e
JJ
3634 gcc_assert (op && CONSTANT_CLASS_P (op));
3635 }
3636 else
3637 {
b731b390 3638 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3639 gimple *init_stmt;
262a363f
JJ
3640 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3641 {
158beb4a
JJ
3642 tree true_val
3643 = build_all_ones_cst (TREE_TYPE (vector_type));
3644 tree false_val
3645 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3646 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3647 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3648 op, true_val,
3649 false_val);
262a363f 3650 }
262a363f
JJ
3651 else
3652 {
3653 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3654 op);
3655 init_stmt
3656 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3657 op);
3658 }
13396b6e
JJ
3659 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3660 op = new_temp;
3661 }
50eeef09 3662 }
d2a12ae7 3663 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3664 if (!CONSTANT_CLASS_P (op))
3665 constant_p = false;
90dd6e3d
RB
3666 if (TREE_CODE (orig_op) == SSA_NAME
3667 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
308bc496
RB
3668 && is_a <bb_vec_info> (vinfo)
3669 && (as_a <bb_vec_info> (vinfo)->bb
90dd6e3d
RB
3670 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3671 place_after_defs = true;
ebfd146a
IR
3672
3673 if (number_of_places_left_in_vector == 0)
3674 {
018b2744
RS
3675 if (constant_p
3676 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3677 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3678 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3679 else
d2a12ae7 3680 {
30c0d1e3 3681 if (permute_results.is_empty ())
cdbe6e9b
RS
3682 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3683 elts, number_of_vectors,
018b2744
RS
3684 permute_results);
3685 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3686 }
90dd6e3d
RB
3687 tree init;
3688 gimple_stmt_iterator gsi;
3689 if (place_after_defs)
3690 {
95c68311
RS
3691 stmt_vec_info last_stmt_info
3692 = vect_find_last_scalar_stmt_in_slp (slp_node);
3693 gsi = gsi_for_stmt (last_stmt_info->stmt);
308bc496
RB
3694 init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
3695 vector_type, &gsi);
90dd6e3d
RB
3696 }
3697 else
308bc496
RB
3698 init = vect_init_vector (vinfo, stmt_vinfo, vec_cst,
3699 vector_type, NULL);
13396b6e
JJ
3700 if (ctor_seq != NULL)
3701 {
90dd6e3d 3702 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3703 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3704 ctor_seq = NULL;
3705 }
90dd6e3d
RB
3706 voprnds.quick_push (init);
3707 place_after_defs = false;
62cf7335
RB
3708 number_of_places_left_in_vector = nunits;
3709 constant_p = true;
5ebaa477
RS
3710 elts.new_vector (vector_type, nunits, 1);
3711 elts.quick_grow (nunits);
ebfd146a
IR
3712 }
3713 }
3714 }
3715
b8698a0f 3716 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3717 them. */
9771b263 3718 vec_num = voprnds.length ();
d2a12ae7 3719 for (j = vec_num; j != 0; j--)
ebfd146a 3720 {
9771b263
DN
3721 vop = voprnds[j - 1];
3722 vec_oprnds->quick_push (vop);
ebfd146a
IR
3723 }
3724
ebfd146a 3725 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3726 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3727 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3728 to replicate the vectors. */
9771b263 3729 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3730 {
b5aeb3bb
IR
3731 tree neutral_vec = NULL;
3732
3733 if (neutral_op)
3734 {
3735 if (!neutral_vec)
b9acc9f1 3736 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3737
9771b263 3738 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3739 }
3740 else
3741 {
9771b263
DN
3742 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3743 vec_oprnds->quick_push (vop);
b5aeb3bb 3744 }
ebfd146a
IR
3745 }
3746}
3747
3748
3749/* Get vectorized definitions from SLP_NODE that contains corresponding
3750 vectorized def-stmts. */
3751
3752static void
9771b263 3753vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a 3754{
16edaeb8 3755 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3756 unsigned int i;
3757
9771b263 3758 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3759
16edaeb8 3760 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
30c0d1e3 3761 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
ebfd146a
IR
3762}
3763
3764
30c0d1e3 3765/* Get N vectorized definitions for SLP_NODE.
b8698a0f 3766 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3767 call vect_get_constant_vectors() to create vector stmts.
3768 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3769 must be stored in the corresponding child of SLP_NODE, and we call
3770 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3771
ebfd146a 3772void
308bc496
RB
3773vect_get_slp_defs (vec_info *vinfo,
3774 slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
ebfd146a 3775{
30c0d1e3
RB
3776 if (n == -1U)
3777 n = SLP_TREE_CHILDREN (slp_node).length ();
ebfd146a 3778
30c0d1e3 3779 for (unsigned i = 0; i < n; ++i)
ebfd146a 3780 {
30c0d1e3 3781 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
b5aeb3bb 3782
30c0d1e3 3783 vec<tree> vec_defs = vNULL;
ebfd146a 3784
30c0d1e3
RB
3785 /* For each operand we check if it has vectorized definitions in a child
3786 node or we need to create them (for invariants and constants). */
3787 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3788 {
3789 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3790 vect_get_slp_vect_defs (child, &vec_defs);
3791 }
d092494c 3792 else
308bc496 3793 vect_get_constant_vectors (vinfo, slp_node, i, &vec_defs);
ebfd146a 3794
37b5ec8f 3795 vec_oprnds->quick_push (vec_defs);
d092494c 3796 }
ebfd146a
IR
3797}
3798
ebfd146a
IR
3799/* Generate vector permute statements from a list of loads in DR_CHAIN.
3800 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
ab5934a8 3801 permute statements for the SLP node NODE. */
01d8bf07 3802
ebfd146a 3803bool
308bc496
RB
3804vect_transform_slp_perm_load (vec_info *vinfo,
3805 slp_tree node, vec<tree> dr_chain,
d9f21f6a 3806 gimple_stmt_iterator *gsi, poly_uint64 vf,
4e849a74 3807 bool analyze_only, unsigned *n_perms)
ebfd146a 3808{
b9787581 3809 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
928686b1 3810 int vec_index = 0;
2635892a 3811 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4e849a74 3812 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
928686b1 3813 unsigned int mask_element;
ef4bddc2 3814 machine_mode mode;
ebfd146a 3815
91ff1504
RB
3816 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3817 return false;
3818
bffb8014 3819 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3820
22e4dee7 3821 mode = TYPE_MODE (vectype);
ab7e60ce 3822 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3823
61fdfd8c
RB
3824 /* Initialize the vect stmts of NODE to properly insert the generated
3825 stmts later. */
3826 if (! analyze_only)
3827 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3828 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3829 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3830
b8698a0f
L
3831 /* Generate permutation masks for every NODE. Number of masks for each NODE
3832 is equal to GROUP_SIZE.
3833 E.g., we have a group of three nodes with three loads from the same
3834 location in each node, and the vector size is 4. I.e., we have a
3835 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3836 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3837 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3838 ...
3839
2635892a 3840 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3841 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3842 operation, and the mask element values can't be outside that range.
3843 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3844 For the first two permutations we need the first and the second input
ebfd146a 3845 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3846 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3847 {c2,a3,b3,c3}. */
3848
2ce27200 3849 int vect_stmts_counter = 0;
928686b1 3850 unsigned int index = 0;
2ce27200
RB
3851 int first_vec_index = -1;
3852 int second_vec_index = -1;
be377c80 3853 bool noop_p = true;
29afecdf 3854 *n_perms = 0;
ebfd146a 3855
ab7e60ce
RS
3856 vec_perm_builder mask;
3857 unsigned int nelts_to_build;
3858 unsigned int nvectors_per_build;
3859 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3860 && multiple_p (nunits, group_size));
3861 if (repeating_p)
2ce27200 3862 {
ab7e60ce
RS
3863 /* A single vector contains a whole number of copies of the node, so:
3864 (a) all permutes can use the same mask; and
3865 (b) the permutes only need a single vector input. */
3866 mask.new_vector (nunits, group_size, 3);
3867 nelts_to_build = mask.encoded_nelts ();
3868 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3869 }
3870 else
3871 {
3872 /* We need to construct a separate mask for each vector statement. */
3873 unsigned HOST_WIDE_INT const_nunits, const_vf;
3874 if (!nunits.is_constant (&const_nunits)
3875 || !vf.is_constant (&const_vf))
3876 return false;
3877 mask.new_vector (const_nunits, const_nunits, 1);
3878 nelts_to_build = const_vf * group_size;
3879 nvectors_per_build = 1;
3880 }
3881
3882 unsigned int count = mask.encoded_nelts ();
3883 mask.quick_grow (count);
3884 vec_perm_indices indices;
3885
3886 for (unsigned int j = 0; j < nelts_to_build; j++)
3887 {
3888 unsigned int iter_num = j / group_size;
3889 unsigned int stmt_num = j % group_size;
3890 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3891 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3892 if (repeating_p)
2ce27200 3893 {
ab7e60ce
RS
3894 first_vec_index = 0;
3895 mask_element = i;
3896 }
3897 else
3898 {
3899 /* Enforced before the loop when !repeating_p. */
3900 unsigned int const_nunits = nunits.to_constant ();
3901 vec_index = i / const_nunits;
3902 mask_element = i % const_nunits;
2ce27200
RB
3903 if (vec_index == first_vec_index
3904 || first_vec_index == -1)
3905 {
3906 first_vec_index = vec_index;
3907 }
3908 else if (vec_index == second_vec_index
3909 || second_vec_index == -1)
3910 {
3911 second_vec_index = vec_index;
ab7e60ce 3912 mask_element += const_nunits;
2ce27200
RB
3913 }
3914 else
3915 {
3916 if (dump_enabled_p ())
3c2a8ed0
DM
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3918 "permutation requires at "
3919 "least three vectors %G",
3920 stmt_info->stmt);
31bee964 3921 gcc_assert (analyze_only);
2ce27200
RB
3922 return false;
3923 }
ebfd146a 3924
ab7e60ce
RS
3925 gcc_assert (mask_element < 2 * const_nunits);
3926 }
3927
3928 if (mask_element != index)
3929 noop_p = false;
3930 mask[index++] = mask_element;
2ce27200 3931
ab7e60ce
RS
3932 if (index == count && !noop_p)
3933 {
3934 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3935 if (!can_vec_perm_const_p (mode, indices))
2ce27200 3936 {
ab7e60ce 3937 if (dump_enabled_p ())
2ce27200 3938 {
ab7e60ce
RS
3939 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3940 vect_location,
3941 "unsupported vect permute { ");
3942 for (i = 0; i < count; ++i)
22e4dee7 3943 {
ab7e60ce
RS
3944 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3945 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 3946 }
ab7e60ce 3947 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 3948 }
ab7e60ce
RS
3949 gcc_assert (analyze_only);
3950 return false;
e3342de4 3951 }
29afecdf 3952
ab7e60ce
RS
3953 ++*n_perms;
3954 }
3955
3956 if (index == count)
3957 {
3958 if (!analyze_only)
e3342de4 3959 {
ab7e60ce 3960 tree mask_vec = NULL_TREE;
be377c80 3961
ab7e60ce
RS
3962 if (! noop_p)
3963 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 3964
ab7e60ce
RS
3965 if (second_vec_index == -1)
3966 second_vec_index = first_vec_index;
61fdfd8c 3967
ab7e60ce
RS
3968 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3969 {
61fdfd8c 3970 /* Generate the permute statement if necessary. */
ab7e60ce
RS
3971 tree first_vec = dr_chain[first_vec_index + ri];
3972 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 3973 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3974 if (! noop_p)
3975 {
b9787581 3976 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3977 tree perm_dest
3978 = vect_create_destination_var (gimple_assign_lhs (stmt),
3979 vectype);
3980 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3981 gassign *perm_stmt
3982 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3983 first_vec, second_vec,
3984 mask_vec);
3985 perm_stmt_info
308bc496
RB
3986 = vect_finish_stmt_generation (vinfo,
3987 stmt_info, perm_stmt,
b9787581 3988 gsi);
61fdfd8c
RB
3989 }
3990 else
3991 /* If mask was NULL_TREE generate the requested
3992 identity transform. */
16edaeb8 3993 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3994
3995 /* Store the vector statement in NODE. */
16edaeb8
RS
3996 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3997 = perm_stmt_info;
2ce27200 3998 }
2ce27200 3999 }
ab7e60ce
RS
4000
4001 index = 0;
4002 first_vec_index = -1;
4003 second_vec_index = -1;
4004 noop_p = true;
2ce27200 4005 }
b8698a0f 4006 }
ebfd146a 4007
ebfd146a
IR
4008 return true;
4009}
4010
ebfd146a
IR
4011/* Vectorize SLP instance tree in postorder. */
4012
8fe1bd30 4013static void
308bc496
RB
4014vect_schedule_slp_instance (vec_info *vinfo,
4015 slp_tree node, slp_instance instance)
ebfd146a 4016{
ebfd146a
IR
4017 gimple_stmt_iterator si;
4018 stmt_vec_info stmt_info;
8b7e9dba 4019 unsigned int group_size;
ebfd146a 4020 tree vectype;
603cca93 4021 int i, j;
d755c7ef 4022 slp_tree child;
ebfd146a 4023
603cca93 4024 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 4025 return;
ebfd146a 4026
a1f072e2
RB
4027 /* See if we have already vectorized the node in the graph of the
4028 SLP instance. */
4029 if (SLP_TREE_VEC_STMTS (node).exists ())
4030 return;
4031
9771b263 4032 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
308bc496 4033 vect_schedule_slp_instance (vinfo, child, instance);
b8698a0f 4034
603cca93
RB
4035 /* Push SLP node def-type to stmts. */
4036 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4037 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4038 {
4039 stmt_vec_info child_stmt_info;
4040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4041 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4042 }
603cca93 4043
b9787581 4044 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
4045
4046 /* VECTYPE is the type of the destination. */
b690cc0f 4047 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 4048 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ab5934a8 4049 group_size = SLP_TREE_SCALAR_STMTS (node).length ();
ebfd146a 4050
68435eb2 4051 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
a1f072e2 4052 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 4053
73fbfcad 4054 if (dump_enabled_p ())
3c2a8ed0
DM
4055 dump_printf_loc (MSG_NOTE, vect_location,
4056 "------>vectorizing SLP node starting from: %G",
4057 stmt_info->stmt);
ebfd146a 4058
2e8ab70c
RB
4059 /* Vectorized stmts go before the last scalar stmt which is where
4060 all uses are ready. */
95c68311
RS
4061 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4062 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 4063
6876e5bc
RB
4064 /* Handle two-operation SLP nodes by vectorizing the group with
4065 both operations and then performing a merge. */
0f6e9b29 4066 bool done_p = false;
6876e5bc
RB
4067 if (SLP_TREE_TWO_OPERATORS (node))
4068 {
b9787581 4069 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 4070 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 4071 enum tree_code ocode = ERROR_MARK;
b9787581 4072 stmt_vec_info ostmt_info;
e3342de4 4073 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
4074 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4075 {
4076 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4077 if (gimple_assign_rhs_code (ostmt) != code0)
4078 {
4079 mask.quick_push (1);
4080 ocode = gimple_assign_rhs_code (ostmt);
4081 }
4082 else
4083 mask.quick_push (0);
4084 }
567a3691 4085 if (ocode != ERROR_MARK)
6876e5bc 4086 {
16edaeb8
RS
4087 vec<stmt_vec_info> v0;
4088 vec<stmt_vec_info> v1;
6876e5bc
RB
4089 unsigned j;
4090 tree tmask = NULL_TREE;
308bc496 4091 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
6876e5bc
RB
4092 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4093 SLP_TREE_VEC_STMTS (node).truncate (0);
4094 gimple_assign_set_rhs_code (stmt, ocode);
308bc496 4095 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
6876e5bc
RB
4096 gimple_assign_set_rhs_code (stmt, code0);
4097 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4098 SLP_TREE_VEC_STMTS (node).truncate (0);
4099 tree meltype = build_nonstandard_integer_type
b397965c 4100 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
4101 tree mvectype = get_same_sized_vectype (meltype, vectype);
4102 unsigned k = 0, l;
4103 for (j = 0; j < v0.length (); ++j)
4104 {
dad55d70
RS
4105 /* Enforced by vect_build_slp_tree, which rejects variable-length
4106 vectors for SLP_TREE_TWO_OPERATORS. */
4107 unsigned int const_nunits = nunits.to_constant ();
4108 tree_vector_builder melts (mvectype, const_nunits, 1);
4109 for (l = 0; l < const_nunits; ++l)
6876e5bc 4110 {
1ece8d4c 4111 if (k >= group_size)
6876e5bc 4112 k = 0;
dad55d70
RS
4113 tree t = build_int_cst (meltype,
4114 mask[k++] * const_nunits + l);
794e3180 4115 melts.quick_push (t);
6876e5bc 4116 }
5ebaa477 4117 tmask = melts.build ();
6876e5bc
RB
4118
4119 /* ??? Not all targets support a VEC_PERM_EXPR with a
4120 constant mask that would translate to a vec_merge RTX
4121 (with their vec_perm_const_ok). We can either not
4122 vectorize in that case or let veclower do its job.
4123 Unfortunately that isn't too great and at least for
4124 plus/minus we'd eventually like to match targets
4125 vector addsub instructions. */
355fe088 4126 gimple *vstmt;
6876e5bc
RB
4127 vstmt = gimple_build_assign (make_ssa_name (vectype),
4128 VEC_PERM_EXPR,
16edaeb8
RS
4129 gimple_assign_lhs (v0[j]->stmt),
4130 gimple_assign_lhs (v1[j]->stmt),
4131 tmask);
4132 SLP_TREE_VEC_STMTS (node).quick_push
308bc496 4133 (vect_finish_stmt_generation (vinfo, stmt_info, vstmt, &si));
6876e5bc
RB
4134 }
4135 v0.release ();
4136 v1.release ();
0f6e9b29 4137 done_p = true;
6876e5bc
RB
4138 }
4139 }
0f6e9b29 4140 if (!done_p)
308bc496 4141 vect_transform_stmt (vinfo, stmt_info, &si, node, instance);
603cca93
RB
4142
4143 /* Restore stmt def-types. */
4144 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4145 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4146 {
4147 stmt_vec_info child_stmt_info;
4148 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4149 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4150 }
ebfd146a
IR
4151}
4152
dd34c087
JJ
4153/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4154 For loop vectorization this is done in vectorizable_call, but for SLP
4155 it needs to be deferred until end of vect_schedule_slp, because multiple
4156 SLP instances may refer to the same scalar stmt. */
4157
4158static void
308bc496
RB
4159vect_remove_slp_scalar_calls (vec_info *vinfo,
4160 slp_tree node, hash_set<slp_tree> &visited)
dd34c087 4161{
b9787581 4162 gimple *new_stmt;
dd34c087
JJ
4163 gimple_stmt_iterator gsi;
4164 int i;
d755c7ef 4165 slp_tree child;
dd34c087
JJ
4166 tree lhs;
4167 stmt_vec_info stmt_info;
4168
603cca93 4169 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4170 return;
4171
4bfcf879
RB
4172 if (visited.add (node))
4173 return;
4174
9771b263 4175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
308bc496 4176 vect_remove_slp_scalar_calls (vinfo, child, visited);
dd34c087 4177
b9787581 4178 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4179 {
b9787581
RS
4180 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4181 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4182 continue;
b9787581 4183 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4184 || !PURE_SLP_STMT (stmt_info))
4185 continue;
4186 lhs = gimple_call_lhs (stmt);
4187 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 4188 gsi = gsi_for_stmt (stmt);
308bc496 4189 vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
4190 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4191 }
4192}
ebfd146a 4193
4bfcf879 4194static void
308bc496 4195vect_remove_slp_scalar_calls (vec_info *vinfo, slp_tree node)
4bfcf879
RB
4196{
4197 hash_set<slp_tree> visited;
308bc496 4198 vect_remove_slp_scalar_calls (vinfo, node, visited);
4bfcf879
RB
4199}
4200
818b3293
JH
4201/* Vectorize the instance root. */
4202
4203void
4204vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4205{
0ec77a6c 4206 gassign *rstmt = NULL;
818b3293
JH
4207
4208 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4209 {
4210 stmt_vec_info child_stmt_info;
4211 int j;
4212
4213 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4214 {
4215 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4216 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
86c3a7d8
RS
4217 if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
4218 TREE_TYPE (vect_lhs)))
4219 vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
4220 vect_lhs);
818b3293
JH
4221 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4222 break;
4223 }
4224 }
4225 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4226 {
4227 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4228 stmt_vec_info child_stmt_info;
4229 int j;
4230 vec<constructor_elt, va_gc> *v;
4231 vec_alloc (v, nelts);
4232
4233 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4234 {
4235 CONSTRUCTOR_APPEND_ELT (v,
4236 NULL_TREE,
4237 gimple_get_lhs (child_stmt_info->stmt));
4238 }
4239 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4240 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4241 tree r_constructor = build_constructor (rtype, v);
4242 rstmt = gimple_build_assign (lhs, r_constructor);
4243 }
0ec77a6c
TC
4244
4245 gcc_assert (rstmt);
4246
818b3293
JH
4247 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4248 gsi_replace (&rgsi, rstmt, true);
4249}
4250
ff802fa1
IR
4251/* Generate vector code for all SLP instances in the loop/basic block. */
4252
8fe1bd30 4253void
310213d4 4254vect_schedule_slp (vec_info *vinfo)
ebfd146a 4255{
9771b263 4256 vec<slp_instance> slp_instances;
ebfd146a 4257 slp_instance instance;
8b7e9dba 4258 unsigned int i;
78604de0 4259
310213d4 4260 slp_instances = vinfo->slp_instances;
9771b263 4261 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a 4262 {
818b3293 4263 slp_tree node = SLP_INSTANCE_TREE (instance);
ebfd146a 4264 /* Schedule the tree of INSTANCE. */
308bc496 4265 vect_schedule_slp_instance (vinfo, node, instance);
818b3293
JH
4266
4267 if (SLP_INSTANCE_ROOT_STMT (instance))
4268 vectorize_slp_instance_root_stmt (node, instance);
4269
73fbfcad 4270 if (dump_enabled_p ())
78c60e3d 4271 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4272 "vectorizing stmts using SLP.\n");
ebfd146a
IR
4273 }
4274
9771b263 4275 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4276 {
4277 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4278 stmt_vec_info store_info;
b5aeb3bb 4279 unsigned int j;
b5aeb3bb 4280
c40eced0
RB
4281 /* Remove scalar call stmts. Do not do this for basic-block
4282 vectorization as not all uses may be vectorized.
4283 ??? Why should this be necessary? DCE should be able to
4284 remove the stmts itself.
4285 ??? For BB vectorization we can as well remove scalar
4286 stmts starting from the SLP tree root if they have no
4287 uses. */
310213d4 4288 if (is_a <loop_vec_info> (vinfo))
308bc496 4289 vect_remove_slp_scalar_calls (vinfo, root);
dd34c087 4290
4e849a74 4291 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info); j++)
b5aeb3bb 4292 {
b9787581
RS
4293 if (!STMT_VINFO_DATA_REF (store_info))
4294 break;
4295
818b3293
JH
4296 if (SLP_INSTANCE_ROOT_STMT (instance))
4297 continue;
4298
211cd1e2 4299 store_info = vect_orig_stmt (store_info);
b9787581 4300 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 4301 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
4302 }
4303 }
ebfd146a 4304}