]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Avoid retrying with the same vector modes
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
a5544970 2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
40e23961 35#include "fold-const.h"
d8a2d370 36#include "stor-layout.h"
5be5c238 37#include "gimple-iterator.h"
ebfd146a 38#include "cfgloop.h"
ebfd146a 39#include "tree-vectorizer.h"
2635892a 40#include "langhooks.h"
642fce57 41#include "gimple-walk.h"
428db0ba 42#include "dbgcnt.h"
5ebaa477 43#include "tree-vector-builder.h"
f151c9e1 44#include "vec-perm-indices.h"
018b2744
RS
45#include "gimple-fold.h"
46#include "internal-fn.h"
a70d6342
IR
47
48
6e2dd807
RS
49/* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
52
53static void
6e2dd807 54vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 55{
d092494c 56 int i;
d755c7ef 57 slp_tree child;
d092494c 58
a1f072e2
RB
59 if (--node->refcnt != 0)
60 return;
61
9771b263 62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 63 vect_free_slp_tree (child, final_p);
b8698a0f 64
6e2dd807
RS
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
b9787581
RS
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 73 {
b9787581
RS
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
76 }
77 }
78810bd3 78
9771b263
DN
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
30c0d1e3 81 SLP_TREE_SCALAR_OPS (node).release ();
9771b263 82 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 83 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
84
85 free (node);
86}
87
6e2dd807
RS
88/* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
ebfd146a
IR
91
92void
6e2dd807 93vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 94{
6e2dd807 95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 96 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 97 free (instance);
ebfd146a
IR
98}
99
100
d092494c
IR
101/* Create an SLP node for SCALAR_STMTS. */
102
103static slp_tree
b9787581 104vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 105{
d3cfd39e 106 slp_tree node;
b9787581 107 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
108 unsigned int nops;
109
b9787581 110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 111 nops = gimple_call_num_args (stmt);
b9787581 112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
b9787581 118 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 119 nops = 0;
d092494c
IR
120 else
121 return NULL;
122
d3cfd39e 123 node = XNEW (struct _slp_tree);
d092494c 124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
30c0d1e3 125 SLP_TREE_SCALAR_OPS (node) = vNULL;
9771b263 126 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 128 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 130 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
a1f072e2 132 node->refcnt = 1;
f48e4da3 133 node->max_nunits = 1;
d092494c 134
78810bd3 135 unsigned i;
b9787581
RS
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 138
d092494c
IR
139 return node;
140}
141
30c0d1e3
RB
142/* Create an SLP node for OPS. */
143
144static slp_tree
145vect_create_new_slp_node (vec<tree> ops)
146{
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162}
163
d092494c 164
ddf56386
RB
165/* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168typedef struct _slp_oprnd_info
169{
170 /* Def-stmts for the operands. */
b9787581 171 vec<stmt_vec_info> def_stmts;
30c0d1e3
RB
172 /* Operands. */
173 vec<tree> ops;
ddf56386
RB
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
ddf56386 177 tree first_op_type;
34e82342 178 enum vect_def_type first_dt;
7098ab48 179 bool any_pattern;
ddf56386
RB
180} *slp_oprnd_info;
181
182
d092494c
IR
183/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
9771b263 185static vec<slp_oprnd_info>
d092494c
IR
186vect_create_oprnd_info (int nops, int group_size)
187{
188 int i;
189 slp_oprnd_info oprnd_info;
9771b263 190 vec<slp_oprnd_info> oprnds_info;
d092494c 191
9771b263 192 oprnds_info.create (nops);
d092494c
IR
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 196 oprnd_info->def_stmts.create (group_size);
30c0d1e3 197 oprnd_info->ops.create (group_size);
d092494c 198 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 199 oprnd_info->first_op_type = NULL_TREE;
7098ab48 200 oprnd_info->any_pattern = false;
9771b263 201 oprnds_info.quick_push (oprnd_info);
d092494c
IR
202 }
203
204 return oprnds_info;
205}
206
207
d3cfd39e
JJ
208/* Free operands info. */
209
d092494c 210static void
9771b263 211vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
212{
213 int i;
214 slp_oprnd_info oprnd_info;
215
9771b263 216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 217 {
9771b263 218 oprnd_info->def_stmts.release ();
30c0d1e3 219 oprnd_info->ops.release ();
d3cfd39e
JJ
220 XDELETE (oprnd_info);
221 }
d092494c 222
9771b263 223 oprnds_info.release ();
d092494c
IR
224}
225
226
32e8e429
RS
227/* Find the place of the data-ref in STMT_INFO in the interleaving chain
228 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
229 of the chain. */
d755c7ef 230
b210f45f 231int
32e8e429
RS
232vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
233 stmt_vec_info first_stmt_info)
d755c7ef 234{
a1824cfd 235 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
236 int result = 0;
237
a1824cfd 238 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
239 return -1;
240
241 do
242 {
a1824cfd 243 if (next_stmt_info == stmt_info)
d755c7ef 244 return result;
a1824cfd
RS
245 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
246 if (next_stmt_info)
247 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 248 }
a1824cfd 249 while (next_stmt_info);
d755c7ef
RB
250
251 return -1;
252}
253
018b2744
RS
254/* Check whether it is possible to load COUNT elements of type ELT_MODE
255 using the method implemented by duplicate_and_interleave. Return true
256 if so, returning the number of intermediate vectors in *NVECTORS_OUT
257 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
258 (if nonnull). */
259
f1739b48 260bool
ba7f76dd 261can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
43fdde57 262 machine_mode elt_mode,
f1739b48
RS
263 unsigned int *nvectors_out,
264 tree *vector_type_out,
265 tree *permutes)
018b2744
RS
266{
267 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
268 poly_int64 nelts;
269 unsigned int nvectors = 1;
270 for (;;)
271 {
272 scalar_int_mode int_mode;
273 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
1c84a2d2 274 if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
018b2744
RS
275 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
276 {
277 tree int_type = build_nonstandard_integer_type
278 (GET_MODE_BITSIZE (int_mode), 1);
279 tree vector_type = build_vector_type (int_type, nelts);
280 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
281 {
282 vec_perm_builder sel1 (nelts, 2, 3);
283 vec_perm_builder sel2 (nelts, 2, 3);
284 poly_int64 half_nelts = exact_div (nelts, 2);
285 for (unsigned int i = 0; i < 3; ++i)
286 {
287 sel1.quick_push (i);
288 sel1.quick_push (i + nelts);
289 sel2.quick_push (half_nelts + i);
290 sel2.quick_push (half_nelts + i + nelts);
291 }
292 vec_perm_indices indices1 (sel1, 2, nelts);
293 vec_perm_indices indices2 (sel2, 2, nelts);
294 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
295 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
296 {
297 if (nvectors_out)
298 *nvectors_out = nvectors;
299 if (vector_type_out)
300 *vector_type_out = vector_type;
301 if (permutes)
302 {
303 permutes[0] = vect_gen_perm_mask_checked (vector_type,
304 indices1);
305 permutes[1] = vect_gen_perm_mask_checked (vector_type,
306 indices2);
307 }
308 return true;
309 }
310 }
311 }
312 if (!multiple_p (elt_bytes, 2, &elt_bytes))
313 return false;
314 nvectors *= 2;
315 }
316}
d755c7ef 317
d092494c
IR
318/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
319 they are of a valid type and that they match the defs of the first stmt of
4cecd659 320 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
321 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
322 indicates swap is required for cond_expr stmts. Specifically, *SWAP
323 is 1 if STMT is cond and operands of comparison need to be swapped;
324 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
325 If there is any operand swap in this function, *SWAP is set to non-zero
326 value.
4cecd659
BC
327 If there was a fatal error return -1; if the error could be corrected by
328 swapping operands of father node of this one, return 1; if everything is
329 ok return 0. */
4cecd659
BC
330static int
331vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 332 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 333 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 334{
b9787581 335 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
336 tree oprnd;
337 unsigned int i, number_of_oprnds;
d092494c 338 enum vect_def_type dt = vect_uninitialized_def;
abf9bfbc 339 slp_oprnd_info oprnd_info;
b0b4483e 340 int first_op_idx = 1;
0246112a 341 unsigned int commutative_op = -1U;
b0b4483e 342 bool first_op_cond = false;
effb52da 343 bool first = stmt_num == 0;
b8698a0f 344
b9787581 345 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
346 {
347 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 348 first_op_idx = 3;
0246112a
RS
349 if (gimple_call_internal_p (stmt))
350 {
351 internal_fn ifn = gimple_call_internal_fn (stmt);
352 commutative_op = first_commutative_argument (ifn);
99763671
AM
353
354 /* Masked load, only look at mask. */
355 if (ifn == IFN_MASK_LOAD)
356 {
357 number_of_oprnds = 1;
358 /* Mask operand index. */
359 first_op_idx = 5;
360 }
0246112a 361 }
190c2236 362 }
b9787581 363 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 364 {
b0b4483e 365 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 366 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
367 /* Swap can only be done for cond_expr if asked to, otherwise we
368 could result in different comparison code to the first stmt. */
61021c35 369 if (code == COND_EXPR
a414c77f 370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
371 {
372 first_op_cond = true;
b0b4483e
RB
373 number_of_oprnds++;
374 }
375 else
0246112a 376 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 377 }
d092494c 378 else
b0b4483e 379 return -1;
ebfd146a 380
4cecd659
BC
381 bool swapped = (*swap != 0);
382 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
383 for (i = 0; i < number_of_oprnds; i++)
384 {
b0b4483e
RB
385again:
386 if (first_op_cond)
f7e531cf 387 {
4cecd659
BC
388 /* Map indicating how operands of cond_expr should be swapped. */
389 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
390 int *map = maps[*swap];
391
392 if (i < 2)
b9787581
RS
393 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
394 first_op_idx), map[i]);
b0b4483e 395 else
b9787581 396 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
397 }
398 else
b9787581 399 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
61021c35
RB
400 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
401 oprnd = TREE_OPERAND (oprnd, 0);
f7e531cf 402
9771b263 403 oprnd_info = (*oprnds_info)[i];
ebfd146a 404
fef96d8e
RS
405 stmt_vec_info def_stmt_info;
406 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 407 {
73fbfcad 408 if (dump_enabled_p ())
3c2a8ed0
DM
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
410 "Build SLP failed: can't analyze def for %T\n",
411 oprnd);
ebfd146a 412
b0b4483e 413 return -1;
ebfd146a
IR
414 }
415
7098ab48
RB
416 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
417 oprnd_info->any_pattern = true;
effb52da 418
d092494c 419 if (first)
ebfd146a 420 {
b4673569
RB
421 /* For the swapping logic below force vect_reduction_def
422 for the reduction op in a SLP reduction group. */
423 if (!STMT_VINFO_DATA_REF (stmt_info)
424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
425 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
426 && def_stmt_info)
427 dt = vect_reduction_def;
d092494c 428 oprnd_info->first_dt = dt;
793d9a16 429 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 430 }
ebfd146a
IR
431 else
432 {
d092494c
IR
433 /* Not first stmt of the group, check that the def-stmt/s match
434 the def-stmt/s of the first stmt. Allow different definition
435 types for reduction chains: the first stmt must be a
436 vect_reduction_def (a phi node), and the rest
4352288a 437 end in the reduction chain. */
018b2744
RS
438 tree type = TREE_TYPE (oprnd);
439 if ((oprnd_info->first_dt != dt
440 && !(oprnd_info->first_dt == vect_reduction_def
4352288a
RB
441 && !STMT_VINFO_DATA_REF (stmt_info)
442 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
443 && def_stmt_info
444 && !STMT_VINFO_DATA_REF (def_stmt_info)
445 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
446 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
018b2744
RS
447 && !((oprnd_info->first_dt == vect_external_def
448 || oprnd_info->first_dt == vect_constant_def)
449 && (dt == vect_external_def
450 || dt == vect_constant_def)))
4352288a
RB
451 || !types_compatible_p (oprnd_info->first_op_type, type)
452 || (!STMT_VINFO_DATA_REF (stmt_info)
453 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
454 && ((!def_stmt_info
455 || STMT_VINFO_DATA_REF (def_stmt_info)
456 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
457 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
458 != (oprnd_info->first_dt != vect_reduction_def))))
ebfd146a 459 {
b0b4483e 460 /* Try swapping operands if we got a mismatch. */
0246112a 461 if (i == commutative_op && !swapped)
b0b4483e 462 {
4352288a
RB
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE, vect_location,
465 "trying swapped operands\n");
b0b4483e
RB
466 swapped = true;
467 goto again;
468 }
469
abf9bfbc
RB
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 472 "Build SLP failed: different types\n");
d092494c 473
b0b4483e 474 return 1;
ebfd146a 475 }
018b2744
RS
476 if ((dt == vect_constant_def
477 || dt == vect_external_def)
1c84a2d2 478 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
018b2744 479 && (TREE_CODE (type) == BOOLEAN_TYPE
43fdde57 480 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
018b2744 481 TYPE_MODE (type))))
a23644f2
RS
482 {
483 if (dump_enabled_p ())
3c2a8ed0
DM
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: invalid type of def "
486 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
487 return -1;
488 }
018b2744
RS
489 }
490
491 /* Check the types of the definitions. */
492 switch (dt)
493 {
018b2744 494 case vect_external_def:
6c7b0df8
RB
495 /* Make sure to demote the overall operand to external. */
496 oprnd_info->first_dt = vect_external_def;
497 /* Fallthru. */
498 case vect_constant_def:
30c0d1e3
RB
499 oprnd_info->def_stmts.quick_push (NULL);
500 oprnd_info->ops.quick_push (oprnd);
ebfd146a 501 break;
b8698a0f 502
4352288a 503 case vect_internal_def:
c78e3652 504 case vect_reduction_def:
4352288a
RB
505 if (oprnd_info->first_dt == vect_reduction_def
506 && !STMT_VINFO_DATA_REF (stmt_info)
507 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
508 && !STMT_VINFO_DATA_REF (def_stmt_info)
509 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
510 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
511 {
512 /* For a SLP reduction chain we want to duplicate the
513 reduction to each of the chain members. That gets
514 us a sane SLP graph (still the stmts are not 100%
515 correct wrt the initial values). */
516 gcc_assert (!first);
517 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
518 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
519 break;
520 }
521 /* Fallthru. */
e7baeb39 522 case vect_induction_def:
fef96d8e 523 oprnd_info->def_stmts.quick_push (def_stmt_info);
30c0d1e3 524 oprnd_info->ops.quick_push (oprnd);
ebfd146a
IR
525 break;
526
527 default:
528 /* FORNOW: Not supported. */
73fbfcad 529 if (dump_enabled_p ())
3c2a8ed0
DM
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
531 "Build SLP failed: illegal type of def %T\n",
532 oprnd);
ebfd146a 533
b0b4483e 534 return -1;
ebfd146a
IR
535 }
536 }
537
b0b4483e
RB
538 /* Swap operands. */
539 if (swapped)
540 {
541 if (first_op_cond)
542 {
77100812
RB
543 /* If there are already uses of this stmt in a SLP instance then
544 we've committed to the operand order and can't swap it. */
545 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
546 {
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
549 "Build SLP failed: cannot swap operands of "
550 "shared stmt %G", stmt_info->stmt);
551 return -1;
552 }
553
30c0d1e3
RB
554 /* To get rid of this swapping we have to move the stmt code
555 to the SLP tree as well (and gather it here per stmt). */
0246112a 556 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e 557 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
558 enum tree_code code = TREE_CODE (cond);
559
560 /* Swap. */
561 if (*swap == 1)
562 {
563 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
564 &TREE_OPERAND (cond, 1));
565 TREE_SET_CODE (cond, swap_tree_comparison (code));
566 }
567 /* Invert. */
568 else
569 {
570 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
571 gimple_assign_rhs3_ptr (stmt));
96d9d162
RB
572 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
573 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
574 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
575 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
4cecd659
BC
576 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
577 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
578 gcc_assert (code != ERROR_MARK);
579 TREE_SET_CODE (cond, code);
580 }
b0b4483e
RB
581 }
582 else
0246112a 583 {
30c0d1e3
RB
584 /* Commutative ops need not reflect swapping, ops are in
585 the SLP tree. */
0246112a 586 }
78810bd3 587 if (dump_enabled_p ())
3c2a8ed0
DM
588 dump_printf_loc (MSG_NOTE, vect_location,
589 "swapped operands to match def types in %G",
590 stmt_info->stmt);
b0b4483e
RB
591 }
592
4cecd659 593 *swap = swapped;
b0b4483e 594 return 0;
ebfd146a
IR
595}
596
5249ee4d
RS
597/* Return true if call statements CALL1 and CALL2 are similar enough
598 to be combined into the same SLP group. */
599
600static bool
601compatible_calls_p (gcall *call1, gcall *call2)
602{
603 unsigned int nargs = gimple_call_num_args (call1);
604 if (nargs != gimple_call_num_args (call2))
605 return false;
606
607 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
608 return false;
609
610 if (gimple_call_internal_p (call1))
611 {
612 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
613 TREE_TYPE (gimple_call_lhs (call2))))
614 return false;
615 for (unsigned int i = 0; i < nargs; ++i)
616 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
617 TREE_TYPE (gimple_call_arg (call2, i))))
618 return false;
619 }
620 else
621 {
622 if (!operand_equal_p (gimple_call_fn (call1),
623 gimple_call_fn (call2), 0))
624 return false;
625
626 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
627 return false;
628 }
629 return true;
630}
631
b161f2c9 632/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 633 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 634 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
635 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
636 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
637 vect_build_slp_tree. */
b161f2c9
RS
638
639static bool
d7609678 640vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
4b6068ea 641 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
642{
643 if (!vectype)
644 {
645 if (dump_enabled_p ())
3c2a8ed0
DM
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "Build SLP failed: unsupported data-type in %G\n",
648 stmt_info->stmt);
b161f2c9
RS
649 /* Fatal mismatch. */
650 return false;
651 }
652
653 /* If populating the vector type requires unrolling then fail
654 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
655 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
656 unsigned HOST_WIDE_INT const_nunits;
d7609678 657 if (STMT_VINFO_BB_VINFO (stmt_info)
4b6068ea
RS
658 && (!nunits.is_constant (&const_nunits)
659 || const_nunits > group_size))
b161f2c9 660 {
bbeeac91
DM
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
663 "Build SLP failed: unrolling required "
664 "in basic block SLP\n");
b161f2c9
RS
665 /* Fatal mismatch. */
666 return false;
667 }
668
669 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 670 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
671 return true;
672}
ebfd146a 673
1f3cb663
RS
674/* STMTS is a group of GROUP_SIZE SLP statements in which some
675 statements do the same operation as the first statement and in which
676 the others do ALT_STMT_CODE. Return true if we can take one vector
677 of the first operation and one vector of the second and permute them
678 to get the required result. VECTYPE is the type of the vector that
679 would be permuted. */
680
681static bool
b9787581
RS
682vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
683 unsigned int group_size, tree vectype,
684 tree_code alt_stmt_code)
1f3cb663
RS
685{
686 unsigned HOST_WIDE_INT count;
687 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
688 return false;
689
690 vec_perm_builder sel (count, count, 1);
691 for (unsigned int i = 0; i < count; ++i)
692 {
693 unsigned int elt = i;
b9787581
RS
694 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
695 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
696 elt += count;
697 sel.quick_push (elt);
698 }
699 vec_perm_indices indices (sel, 2, count);
700 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
701}
702
6983e6b5
RB
703/* Verify if the scalar stmts STMTS are isomorphic, require data
704 permutation or are of unsupported types of operation. Return
705 true if they are, otherwise return false and indicate in *MATCHES
706 which stmts are not isomorphic to the first one. If MATCHES[0]
707 is false then this indicates the comparison could not be
4cecd659
BC
708 carried out or the stmts will never be vectorized by SLP.
709
99763671 710 Note COND_EXPR is possibly isomorphic to another one after swapping its
4cecd659
BC
711 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
712 the first stmt by swapping the two operands of comparison; set SWAP[i]
713 to 2 if stmt I is isormorphic to the first stmt by inverting the code
714 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
715 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
716
717static bool
d7609678 718vect_build_slp_tree_1 (unsigned char *swap,
b9787581 719 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
720 poly_uint64 *max_nunits, bool *matches,
721 bool *two_operators)
ebfd146a 722{
ebfd146a 723 unsigned int i;
b9787581 724 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
725 enum tree_code first_stmt_code = ERROR_MARK;
726 enum tree_code alt_stmt_code = ERROR_MARK;
727 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 728 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 729 tree lhs;
6983e6b5 730 bool need_same_oprnds = false;
1f3cb663 731 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
732 optab optab;
733 int icode;
ef4bddc2
RS
734 machine_mode optab_op2_mode;
735 machine_mode vec_mode;
bffb8014 736 stmt_vec_info first_load = NULL, prev_first_load = NULL;
bcde3345 737 bool load_p = false;
d092494c 738
ebfd146a 739 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
740 stmt_vec_info stmt_info;
741 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 742 {
b9787581 743 gimple *stmt = stmt_info->stmt;
4cecd659 744 swap[i] = 0;
6983e6b5
RB
745 matches[i] = false;
746
73fbfcad 747 if (dump_enabled_p ())
3c2a8ed0 748 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 749
4b5caab7 750 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 751 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 752 {
73fbfcad 753 if (dump_enabled_p ())
3c2a8ed0
DM
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
755 "Build SLP failed: unvectorizable statement %G",
756 stmt);
6983e6b5
RB
757 /* Fatal mismatch. */
758 matches[0] = false;
4b5caab7
IR
759 return false;
760 }
761
ebfd146a
IR
762 lhs = gimple_get_lhs (stmt);
763 if (lhs == NULL_TREE)
764 {
73fbfcad 765 if (dump_enabled_p ())
3c2a8ed0
DM
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
767 "Build SLP failed: not GIMPLE_ASSIGN nor "
768 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
769 /* Fatal mismatch. */
770 matches[0] = false;
ebfd146a
IR
771 return false;
772 }
773
1f3cb663
RS
774 tree nunits_vectype;
775 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
776 &nunits_vectype)
777 || (nunits_vectype
d7609678 778 && !vect_record_max_nunits (stmt_info, group_size,
1f3cb663 779 nunits_vectype, max_nunits)))
b161f2c9 780 {
6983e6b5
RB
781 /* Fatal mismatch. */
782 matches[0] = false;
1f3cb663
RS
783 return false;
784 }
785
786 gcc_assert (vectype);
b8698a0f 787
538dd0b7 788 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
789 {
790 rhs_code = CALL_EXPR;
bcde3345
AM
791
792 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
793 load_p = true;
794 else if ((gimple_call_internal_p (call_stmt)
795 && (!vectorizable_internal_fn_p
796 (gimple_call_internal_fn (call_stmt))))
797 || gimple_call_tail_p (call_stmt)
798 || gimple_call_noreturn_p (call_stmt)
799 || !gimple_call_nothrow_p (call_stmt)
800 || gimple_call_chain (call_stmt))
190c2236 801 {
73fbfcad 802 if (dump_enabled_p ())
3c2a8ed0
DM
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804 "Build SLP failed: unsupported call type %G",
805 call_stmt);
6983e6b5
RB
806 /* Fatal mismatch. */
807 matches[0] = false;
190c2236
JJ
808 return false;
809 }
810 }
ebfd146a 811 else
bcde3345
AM
812 {
813 rhs_code = gimple_assign_rhs_code (stmt);
814 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
815 }
ebfd146a
IR
816
817 /* Check the operation. */
818 if (i == 0)
819 {
820 first_stmt_code = rhs_code;
821
b8698a0f 822 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
823 vector shift with scalar shift operand. */
824 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
825 || rhs_code == LROTATE_EXPR
826 || rhs_code == RROTATE_EXPR)
827 {
1f3cb663
RS
828 if (vectype == boolean_type_node)
829 {
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
832 "Build SLP failed: shift of a"
833 " boolean.\n");
834 /* Fatal mismatch. */
835 matches[0] = false;
836 return false;
837 }
838
ebfd146a
IR
839 vec_mode = TYPE_MODE (vectype);
840
841 /* First see if we have a vector/vector shift. */
842 optab = optab_for_tree_code (rhs_code, vectype,
843 optab_vector);
844
845 if (!optab
947131ba 846 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
847 {
848 /* No vector/vector shift, try for a vector/scalar shift. */
849 optab = optab_for_tree_code (rhs_code, vectype,
850 optab_scalar);
851
852 if (!optab)
853 {
73fbfcad 854 if (dump_enabled_p ())
78c60e3d 855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 856 "Build SLP failed: no optab.\n");
6983e6b5
RB
857 /* Fatal mismatch. */
858 matches[0] = false;
ebfd146a
IR
859 return false;
860 }
947131ba 861 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
862 if (icode == CODE_FOR_nothing)
863 {
73fbfcad 864 if (dump_enabled_p ())
78c60e3d
SS
865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
866 "Build SLP failed: "
e645e942 867 "op not supported by target.\n");
6983e6b5
RB
868 /* Fatal mismatch. */
869 matches[0] = false;
ebfd146a
IR
870 return false;
871 }
872 optab_op2_mode = insn_data[icode].operand[2].mode;
873 if (!VECTOR_MODE_P (optab_op2_mode))
874 {
875 need_same_oprnds = true;
876 first_op1 = gimple_assign_rhs2 (stmt);
877 }
878 }
879 }
36ba4aae
IR
880 else if (rhs_code == WIDEN_LSHIFT_EXPR)
881 {
882 need_same_oprnds = true;
883 first_op1 = gimple_assign_rhs2 (stmt);
884 }
ebfd146a
IR
885 }
886 else
887 {
6876e5bc
RB
888 if (first_stmt_code != rhs_code
889 && alt_stmt_code == ERROR_MARK)
890 alt_stmt_code = rhs_code;
ebfd146a
IR
891 if (first_stmt_code != rhs_code
892 && (first_stmt_code != IMAGPART_EXPR
893 || rhs_code != REALPART_EXPR)
894 && (first_stmt_code != REALPART_EXPR
69f11a13 895 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
896 /* Handle mismatches in plus/minus by computing both
897 and merging the results. */
898 && !((first_stmt_code == PLUS_EXPR
899 || first_stmt_code == MINUS_EXPR)
900 && (alt_stmt_code == PLUS_EXPR
901 || alt_stmt_code == MINUS_EXPR)
902 && rhs_code == alt_stmt_code)
b9787581 903 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 904 && (first_stmt_code == ARRAY_REF
38000232 905 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
906 || first_stmt_code == INDIRECT_REF
907 || first_stmt_code == COMPONENT_REF
908 || first_stmt_code == MEM_REF)))
ebfd146a 909 {
73fbfcad 910 if (dump_enabled_p ())
ebfd146a 911 {
78c60e3d
SS
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
913 "Build SLP failed: different operation "
3c2a8ed0 914 "in stmt %G", stmt);
6876e5bc 915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 916 "original stmt %G", first_stmt_info->stmt);
ebfd146a 917 }
6983e6b5
RB
918 /* Mismatch. */
919 continue;
ebfd146a 920 }
b8698a0f
L
921
922 if (need_same_oprnds
ebfd146a
IR
923 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
924 {
73fbfcad 925 if (dump_enabled_p ())
3c2a8ed0
DM
926 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
927 "Build SLP failed: different shift "
928 "arguments in %G", stmt);
6983e6b5
RB
929 /* Mismatch. */
930 continue;
ebfd146a 931 }
190c2236 932
d6350f82 933 if (!load_p && rhs_code == CALL_EXPR)
190c2236 934 {
b9787581 935 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 936 as_a <gcall *> (stmt)))
190c2236 937 {
73fbfcad 938 if (dump_enabled_p ())
3c2a8ed0
DM
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
940 "Build SLP failed: different calls in %G",
941 stmt);
6983e6b5
RB
942 /* Mismatch. */
943 continue;
190c2236
JJ
944 }
945 }
ebfd146a
IR
946 }
947
0d0293ac 948 /* Grouped store or load. */
b9787581 949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
950 {
951 if (REFERENCE_CLASS_P (lhs))
952 {
953 /* Store. */
6983e6b5 954 ;
ebfd146a 955 }
b5aeb3bb
IR
956 else
957 {
958 /* Load. */
b9787581 959 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
960 if (prev_first_load)
961 {
962 /* Check that there are no loads from different interleaving
6983e6b5
RB
963 chains in the same node. */
964 if (prev_first_load != first_load)
78c60e3d 965 {
73fbfcad 966 if (dump_enabled_p ())
3c2a8ed0
DM
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
968 vect_location,
969 "Build SLP failed: different "
970 "interleaving chains in one node %G",
971 stmt);
6983e6b5
RB
972 /* Mismatch. */
973 continue;
b5aeb3bb
IR
974 }
975 }
976 else
977 prev_first_load = first_load;
ebfd146a 978 }
0d0293ac 979 } /* Grouped access. */
ebfd146a
IR
980 else
981 {
bcde3345 982 if (load_p)
ebfd146a 983 {
0d0293ac 984 /* Not grouped load. */
73fbfcad 985 if (dump_enabled_p ())
3c2a8ed0
DM
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
987 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 988
0d0293ac 989 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
990 /* Fatal mismatch. */
991 matches[0] = false;
ebfd146a
IR
992 return false;
993 }
994
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 997 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 998 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 999 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
61021c35 1000 && rhs_code != VIEW_CONVERT_EXPR
190c2236 1001 && rhs_code != CALL_EXPR)
ebfd146a 1002 {
73fbfcad 1003 if (dump_enabled_p ())
3c2a8ed0
DM
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1005 "Build SLP failed: operation unsupported %G",
1006 stmt);
6983e6b5
RB
1007 /* Fatal mismatch. */
1008 matches[0] = false;
ebfd146a
IR
1009 return false;
1010 }
1011
4cecd659
BC
1012 if (rhs_code == COND_EXPR)
1013 {
1014 tree cond_expr = gimple_assign_rhs1 (stmt);
1015 enum tree_code cond_code = TREE_CODE (cond_expr);
1016 enum tree_code swap_code = ERROR_MARK;
1017 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1018
1019 if (i == 0)
1020 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1021 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1022 {
1023 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1024 swap_code = swap_tree_comparison (cond_code);
1025 invert_code = invert_tree_comparison (cond_code, honor_nans);
1026 }
1027
1028 if (first_cond_code == cond_code)
1029 ;
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code == swap_code)
1032 swap[i] = 1;
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code == invert_code)
1035 swap[i] = 2;
1036 else
1037 {
1038 if (dump_enabled_p ())
3c2a8ed0
DM
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1040 "Build SLP failed: different"
1041 " operation %G", stmt);
6983e6b5
RB
1042 /* Mismatch. */
1043 continue;
f7e531cf 1044 }
4cecd659 1045 }
ebfd146a 1046 }
6983e6b5
RB
1047
1048 matches[i] = true;
1049 }
1050
1051 for (i = 0; i < group_size; ++i)
1052 if (!matches[i])
1053 return false;
1054
6876e5bc
RB
1055 /* If we allowed a two-operation SLP node verify the target can cope
1056 with the permute we are going to use. */
1057 if (alt_stmt_code != ERROR_MARK
1058 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1059 {
1f3cb663
RS
1060 if (vectype == boolean_type_node
1061 || !vect_two_operations_perm_ok_p (stmts, group_size,
1062 vectype, alt_stmt_code))
6876e5bc
RB
1063 {
1064 for (i = 0; i < group_size; ++i)
b9787581 1065 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1066 {
1067 matches[i] = false;
1068 if (dump_enabled_p ())
1069 {
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1071 "Build SLP failed: different operation "
3c2a8ed0 1072 "in stmt %G", stmts[i]->stmt);
6876e5bc 1073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 1074 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
1075 }
1076 }
1077 return false;
1078 }
1079 *two_operators = true;
1080 }
1081
6983e6b5
RB
1082 return true;
1083}
1084
26d66f28
RB
1085/* Traits for the hash_set to record failed SLP builds for a stmt set.
1086 Note we never remove apart from at destruction time so we do not
1087 need a special value for deleted that differs from empty. */
1088struct bst_traits
1089{
b9787581
RS
1090 typedef vec <stmt_vec_info> value_type;
1091 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1092 static inline hashval_t hash (value_type);
1093 static inline bool equal (value_type existing, value_type candidate);
1094 static inline bool is_empty (value_type x) { return !x.exists (); }
1095 static inline bool is_deleted (value_type x) { return !x.exists (); }
1096 static inline void mark_empty (value_type &x) { x.release (); }
1097 static inline void mark_deleted (value_type &x) { x.release (); }
1098 static inline void remove (value_type &x) { x.release (); }
1099};
1100inline hashval_t
1101bst_traits::hash (value_type x)
1102{
1103 inchash::hash h;
1104 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1105 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1106 return h.end ();
1107}
1108inline bool
1109bst_traits::equal (value_type existing, value_type candidate)
1110{
1111 if (existing.length () != candidate.length ())
1112 return false;
1113 for (unsigned i = 0; i < existing.length (); ++i)
1114 if (existing[i] != candidate[i])
1115 return false;
1116 return true;
1117}
1118
68435eb2
RB
1119typedef hash_map <vec <gimple *>, slp_tree,
1120 simple_hashmap_traits <bst_traits, slp_tree> >
1121 scalar_stmts_to_slp_tree_map_t;
1122
26d66f28
RB
1123static slp_tree
1124vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1125 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1126 poly_uint64 *max_nunits,
26d66f28 1127 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1128 scalar_stmts_to_slp_tree_map_t *bst_map);
6983e6b5 1129
e403d17e 1130static slp_tree
310213d4 1131vect_build_slp_tree (vec_info *vinfo,
b9787581 1132 vec<stmt_vec_info> stmts, unsigned int group_size,
5d8c32cb 1133 poly_uint64 *max_nunits,
1428105c 1134 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1135 scalar_stmts_to_slp_tree_map_t *bst_map)
26d66f28 1136{
a1f072e2 1137 if (slp_tree *leader = bst_map->get (stmts))
26d66f28 1138 {
a1f072e2
RB
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1141 *leader ? "" : "failed ", *leader);
1142 if (*leader)
f48e4da3
RB
1143 {
1144 (*leader)->refcnt++;
1145 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1146 }
a1f072e2 1147 return *leader;
26d66f28 1148 }
f48e4da3 1149 poly_uint64 this_max_nunits = 1;
a1f072e2 1150 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
9f708a84 1151 matches, npermutes, tree_size, bst_map);
a1f072e2 1152 if (res)
f48e4da3
RB
1153 {
1154 res->max_nunits = this_max_nunits;
1155 vect_update_max_nunits (max_nunits, this_max_nunits);
1156 /* Keep a reference for the bst_map use. */
1157 res->refcnt++;
1158 }
a1f072e2 1159 bst_map->put (stmts.copy (), res);
26d66f28
RB
1160 return res;
1161}
1162
1163/* Recursively build an SLP tree starting from NODE.
1164 Fail (and return a value not equal to zero) if def-stmts are not
1165 isomorphic, require data permutation or are of unsupported types of
1166 operation. Otherwise, return 0.
1167 The value returned is the depth in the SLP tree where a mismatch
1168 was found. */
1169
1170static slp_tree
1171vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1172 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1173 poly_uint64 *max_nunits,
26d66f28 1174 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1175 scalar_stmts_to_slp_tree_map_t *bst_map)
6983e6b5 1176{
4b6068ea
RS
1177 unsigned nops, i, this_tree_size = 0;
1178 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1179 slp_tree node;
6983e6b5 1180
6983e6b5
RB
1181 matches[0] = false;
1182
b9787581
RS
1183 stmt_vec_info stmt_info = stmts[0];
1184 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1185 nops = gimple_call_num_args (stmt);
b9787581 1186 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1187 {
1188 nops = gimple_num_ops (stmt) - 1;
1189 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1190 nops++;
ebfd146a 1191 }
b9787581 1192 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1193 nops = 0;
6983e6b5 1194 else
e403d17e 1195 return NULL;
6983e6b5 1196
c78e3652
RB
1197 /* If the SLP node is a PHI (induction or reduction), terminate
1198 the recursion. */
b9787581 1199 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1200 {
b161f2c9 1201 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
7ed54790 1202 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
d7609678 1203 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
b161f2c9
RS
1204 return NULL;
1205
b9787581 1206 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1207 /* Induction from different IVs is not supported. */
719488f8
RB
1208 if (def_type == vect_induction_def)
1209 {
b9787581
RS
1210 stmt_vec_info other_info;
1211 FOR_EACH_VEC_ELT (stmts, i, other_info)
1212 if (stmt_info != other_info)
719488f8
RB
1213 return NULL;
1214 }
22e4f1fb
RB
1215 else if (def_type == vect_reduction_def
1216 || def_type == vect_double_reduction_def
1217 || def_type == vect_nested_cycle)
719488f8
RB
1218 {
1219 /* Else def types have to match. */
b9787581
RS
1220 stmt_vec_info other_info;
1221 FOR_EACH_VEC_ELT (stmts, i, other_info)
4352288a
RB
1222 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1223 return NULL;
719488f8 1224 }
22e4f1fb
RB
1225 else
1226 return NULL;
9f708a84 1227 (*tree_size)++;
e7baeb39
RB
1228 node = vect_create_new_slp_node (stmts);
1229 return node;
1230 }
1231
1232
6876e5bc 1233 bool two_operators = false;
4cecd659 1234 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
d7609678 1235 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
e403d17e
RB
1236 &this_max_nunits, matches, &two_operators))
1237 return NULL;
ebfd146a 1238
99763671 1239 /* If the SLP node is a load, terminate the recursion unless masked. */
b9787581
RS
1240 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1241 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1242 {
99763671
AM
1243 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1244 {
1245 /* Masked load. */
1246 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1247 nops = 1;
1248 }
1249 else
1250 {
1251 *max_nunits = this_max_nunits;
1252 (*tree_size)++;
1253 node = vect_create_new_slp_node (stmts);
1254 return node;
1255 }
ebfd146a
IR
1256 }
1257
6983e6b5
RB
1258 /* Get at the operands, verifying they are compatible. */
1259 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1260 slp_oprnd_info oprnd_info;
b9787581 1261 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1262 {
4cecd659 1263 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1264 stmts, i, &oprnds_info);
4cecd659
BC
1265 if (res != 0)
1266 matches[(res == -1) ? 0 : i] = false;
1267 if (!matches[0])
1268 break;
6983e6b5 1269 }
b0b4483e
RB
1270 for (i = 0; i < group_size; ++i)
1271 if (!matches[i])
1272 {
1273 vect_free_oprnd_info (oprnds_info);
e403d17e 1274 return NULL;
b0b4483e 1275 }
6983e6b5 1276
e403d17e 1277 auto_vec<slp_tree, 4> children;
e403d17e 1278
b9787581 1279 stmt_info = stmts[0];
6983e6b5 1280
b8698a0f 1281 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1282 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1283 {
d092494c 1284 slp_tree child;
e403d17e
RB
1285 unsigned old_tree_size = this_tree_size;
1286 unsigned int j;
b8698a0f 1287
30c0d1e3
RB
1288 if (oprnd_info->first_dt == vect_uninitialized_def)
1289 {
1290 /* COND_EXPR have one too many eventually if the condition
1291 is a SSA name. */
1292 gcc_assert (i == 3 && nops == 4);
1293 continue;
1294 }
1295
e7baeb39 1296 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1297 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1298 && oprnd_info->first_dt != vect_induction_def)
30c0d1e3
RB
1299 {
1300 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1301 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1302 oprnd_info->ops = vNULL;
1303 children.safe_push (invnode);
1304 continue;
1305 }
ebfd146a 1306
e403d17e
RB
1307 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1308 group_size, &this_max_nunits,
5d8c32cb 1309 matches, npermutes,
9f708a84 1310 &this_tree_size, bst_map)) != NULL)
6983e6b5 1311 {
3fc356dc
RB
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
30c0d1e3
RB
1314 if (is_a <bb_vec_info> (vinfo)
1315 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1318 scalar version. */
7098ab48 1319 && !oprnd_info->any_pattern)
3fc356dc 1320 {
3fc356dc
RB
1321 slp_tree grandchild;
1322
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1324 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
3fc356dc
RB
1325 break;
1326 if (!grandchild)
1327 {
1328 /* Roll back. */
e403d17e 1329 this_tree_size = old_tree_size;
3fc356dc 1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1331 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1332 SLP_TREE_CHILDREN (child).truncate (0);
1333
bbeeac91
DM
1334 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE, vect_location,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
3fc356dc 1338 oprnd_info->def_stmts = vNULL;
603cca93 1339 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1340 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1341 oprnd_info->ops = vNULL;
9f708a84 1342 ++this_tree_size;
e403d17e 1343 children.safe_push (child);
3fc356dc
RB
1344 continue;
1345 }
1346 }
1347
6983e6b5 1348 oprnd_info->def_stmts = vNULL;
e403d17e 1349 children.safe_push (child);
6983e6b5
RB
1350 continue;
1351 }
1352
90dd6e3d
RB
1353 /* If the SLP build failed fatally and we analyze a basic-block
1354 simply treat nodes we fail to build as externally defined
1355 (and thus build vectors from the scalar defs).
1356 The cost model will reject outright expensive cases.
1357 ??? This doesn't treat cases where permutation ultimatively
1358 fails (or we don't try permutation below). Ideally we'd
1359 even compute a permutation that will end up with the maximum
1360 SLP tree size... */
310213d4 1361 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1362 && !matches[0]
1363 /* ??? Rejecting patterns this way doesn't work. We'd have to
1364 do extra work to cancel the pattern so the uses see the
1365 scalar version. */
7098ab48
RB
1366 && !is_pattern_stmt_p (stmt_info)
1367 && !oprnd_info->any_pattern)
90dd6e3d 1368 {
bbeeac91
DM
1369 if (dump_enabled_p ())
1370 dump_printf_loc (MSG_NOTE, vect_location,
1371 "Building vector operands from scalars\n");
9f708a84 1372 this_tree_size++;
e403d17e 1373 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1374 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3 1375 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
e403d17e 1376 children.safe_push (child);
30c0d1e3 1377 oprnd_info->ops = vNULL;
e403d17e 1378 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1379 continue;
1380 }
1381
6983e6b5
RB
1382 /* If the SLP build for operand zero failed and operand zero
1383 and one can be commutated try that for the scalar stmts
1384 that failed the match. */
1385 if (i == 0
1386 /* A first scalar stmt mismatch signals a fatal mismatch. */
1387 && matches[0]
1388 /* ??? For COND_EXPRs we can swap the comparison operands
1389 as well as the arms under some constraints. */
1390 && nops == 2
1391 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1392 && is_gimple_assign (stmt_info->stmt)
2153fa7b
RB
1393 /* Swapping operands for reductions breaks assumptions later on. */
1394 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1395 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
6983e6b5
RB
1396 /* Do so only if the number of not successful permutes was nor more
1397 than a cut-ff as re-trying the recursive match on
1398 possibly each level of the tree would expose exponential
1399 behavior. */
1400 && *npermutes < 4)
1401 {
85c5e2f5
RB
1402 /* See whether we can swap the matching or the non-matching
1403 stmt operands. */
1404 bool swap_not_matching = true;
1405 do
1406 {
1407 for (j = 0; j < group_size; ++j)
1408 {
1409 if (matches[j] != !swap_not_matching)
1410 continue;
b9787581 1411 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1412 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1413 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1414 if (!stmt
85c5e2f5
RB
1415 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1416 {
1417 if (!swap_not_matching)
1418 goto fail;
1419 swap_not_matching = false;
1420 break;
1421 }
85c5e2f5
RB
1422 }
1423 }
1424 while (j != group_size);
78810bd3 1425
6983e6b5 1426 /* Swap mismatched definition stmts. */
bbeeac91
DM
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE, vect_location,
1429 "Re-trying with swapped operands of stmts ");
e72baed7 1430 for (j = 0; j < group_size; ++j)
85c5e2f5 1431 if (matches[j] == !swap_not_matching)
6983e6b5 1432 {
6b4db501
MM
1433 std::swap (oprnds_info[0]->def_stmts[j],
1434 oprnds_info[1]->def_stmts[j]);
30c0d1e3
RB
1435 std::swap (oprnds_info[0]->ops[j],
1436 oprnds_info[1]->ops[j]);
bbeeac91
DM
1437 if (dump_enabled_p ())
1438 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1439 }
bbeeac91
DM
1440 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE, "\n");
74574669
RB
1442 /* And try again with scratch 'matches' ... */
1443 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1444 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1445 group_size, &this_max_nunits,
5d8c32cb 1446 tem, npermutes,
9f708a84 1447 &this_tree_size, bst_map)) != NULL)
6983e6b5 1448 {
85c69b0b
RB
1449 /* If we have all children of child built up from scalars then
1450 just throw that away and build it up this node from scalars. */
30c0d1e3
RB
1451 if (is_a <bb_vec_info> (vinfo)
1452 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1453 /* ??? Rejecting patterns this way doesn't work. We'd have
1454 to do extra work to cancel the pattern so the uses see the
1455 scalar version. */
7098ab48 1456 && !oprnd_info->any_pattern)
85c69b0b
RB
1457 {
1458 unsigned int j;
1459 slp_tree grandchild;
1460
1461 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1462 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
85c69b0b
RB
1463 break;
1464 if (!grandchild)
1465 {
1466 /* Roll back. */
e403d17e 1467 this_tree_size = old_tree_size;
85c69b0b 1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1469 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1470 SLP_TREE_CHILDREN (child).truncate (0);
1471
bbeeac91
DM
1472 if (dump_enabled_p ())
1473 dump_printf_loc (MSG_NOTE, vect_location,
1474 "Building parent vector operands from "
1475 "scalars instead\n");
85c69b0b 1476 oprnd_info->def_stmts = vNULL;
603cca93 1477 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1478 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1479 oprnd_info->ops = vNULL;
9f708a84 1480 ++this_tree_size;
e403d17e 1481 children.safe_push (child);
85c69b0b
RB
1482 continue;
1483 }
1484 }
1485
6983e6b5 1486 oprnd_info->def_stmts = vNULL;
e403d17e 1487 children.safe_push (child);
6983e6b5
RB
1488 continue;
1489 }
1490
1491 ++*npermutes;
1492 }
1493
78810bd3 1494fail:
e403d17e
RB
1495 gcc_assert (child == NULL);
1496 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1497 vect_free_slp_tree (child, false);
6983e6b5 1498 vect_free_oprnd_info (oprnds_info);
e403d17e 1499 return NULL;
ebfd146a
IR
1500 }
1501
e403d17e
RB
1502 vect_free_oprnd_info (oprnds_info);
1503
9f708a84 1504 *tree_size += this_tree_size + 1;
e403d17e 1505 *max_nunits = this_max_nunits;
1428105c 1506
e403d17e
RB
1507 node = vect_create_new_slp_node (stmts);
1508 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1509 SLP_TREE_CHILDREN (node).splice (children);
1510 return node;
ebfd146a
IR
1511}
1512
78c60e3d 1513/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1514
1515static void
4f5b9c80 1516vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
a1f072e2 1517 slp_tree node, hash_set<slp_tree> &visited)
ebfd146a 1518{
6c7b0df8 1519 unsigned i;
b9787581 1520 stmt_vec_info stmt_info;
d755c7ef 1521 slp_tree child;
6c7b0df8 1522 tree op;
ebfd146a 1523
a1f072e2
RB
1524 if (visited.add (node))
1525 return;
1526
3da39f52
DM
1527 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1528 dump_user_location_t user_loc = loc.get_user_location ();
f48e4da3 1529 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
6c7b0df8
RB
1530 SLP_TREE_DEF_TYPE (node) == vect_external_def
1531 ? " (external)"
1532 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1533 ? " (constant)"
1534 : ""), node,
f48e4da3 1535 estimated_poly_value (node->max_nunits));
6c7b0df8
RB
1536 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1538 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1539 else
1540 {
1541 dump_printf_loc (metadata, user_loc, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1543 dump_printf (metadata, "%T%s ", op,
1544 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1545 dump_printf (metadata, "}\n");
1546 }
a1f072e2
RB
1547 if (SLP_TREE_CHILDREN (node).is_empty ())
1548 return;
3da39f52 1549 dump_printf_loc (metadata, user_loc, "\tchildren");
9771b263 1550 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2
RB
1551 dump_printf (dump_kind, " %p", (void *)child);
1552 dump_printf (dump_kind, "\n");
1553 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1554 vect_print_slp_tree (dump_kind, loc, child, visited);
ebfd146a
IR
1555}
1556
a1f072e2
RB
1557static void
1558vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1559 slp_tree node)
1560{
1561 hash_set<slp_tree> visited;
1562 vect_print_slp_tree (dump_kind, loc, node, visited);
1563}
ebfd146a 1564
6c7e3b1f 1565/* Mark the tree rooted at NODE with PURE_SLP. */
ebfd146a
IR
1566
1567static void
6c7e3b1f 1568vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
ebfd146a
IR
1569{
1570 int i;
b9787581 1571 stmt_vec_info stmt_info;
d755c7ef 1572 slp_tree child;
ebfd146a 1573
603cca93 1574 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1575 return;
1576
4bfcf879
RB
1577 if (visited.add (node))
1578 return;
1579
b9787581 1580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6c7e3b1f 1581 STMT_SLP_TYPE (stmt_info) = pure_slp;
ebfd146a 1582
9771b263 1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6c7e3b1f 1584 vect_mark_slp_stmts (child, visited);
ebfd146a
IR
1585}
1586
4bfcf879 1587static void
6c7e3b1f 1588vect_mark_slp_stmts (slp_tree node)
4bfcf879
RB
1589{
1590 hash_set<slp_tree> visited;
6c7e3b1f 1591 vect_mark_slp_stmts (node, visited);
4bfcf879 1592}
ebfd146a 1593
a70d6342
IR
1594/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1595
1596static void
4bfcf879 1597vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
a70d6342
IR
1598{
1599 int i;
a70d6342 1600 stmt_vec_info stmt_info;
d755c7ef 1601 slp_tree child;
a70d6342 1602
603cca93 1603 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1604 return;
1605
4bfcf879
RB
1606 if (visited.add (node))
1607 return;
1608
b9787581 1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1610 {
b8698a0f 1611 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1612 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1613 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1614 }
1615
9771b263 1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879
RB
1617 vect_mark_slp_stmts_relevant (child, visited);
1618}
1619
1620static void
1621vect_mark_slp_stmts_relevant (slp_tree node)
1622{
1623 hash_set<slp_tree> visited;
1624 vect_mark_slp_stmts_relevant (node, visited);
a70d6342
IR
1625}
1626
1627
b5aeb3bb
IR
1628/* Rearrange the statements of NODE according to PERMUTATION. */
1629
1630static void
1631vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
a1f072e2
RB
1632 vec<unsigned> permutation,
1633 hash_set<slp_tree> &visited)
b5aeb3bb 1634{
d755c7ef
RB
1635 unsigned int i;
1636 slp_tree child;
b5aeb3bb 1637
a1f072e2
RB
1638 if (visited.add (node))
1639 return;
1640
9771b263 1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2 1642 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
b5aeb3bb 1643
30c0d1e3
RB
1644 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1645 {
1646 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1647 vec<stmt_vec_info> tmp_stmts;
1648 tmp_stmts.create (group_size);
1649 tmp_stmts.quick_grow (group_size);
1650 stmt_vec_info stmt_info;
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1652 tmp_stmts[permutation[i]] = stmt_info;
1653 SLP_TREE_SCALAR_STMTS (node).release ();
1654 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1655 }
1656 if (SLP_TREE_SCALAR_OPS (node).exists ())
1657 {
1658 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1659 vec<tree> tmp_ops;
1660 tmp_ops.create (group_size);
1661 tmp_ops.quick_grow (group_size);
1662 tree op;
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1664 tmp_ops[permutation[i]] = op;
1665 SLP_TREE_SCALAR_OPS (node).release ();
1666 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1667 }
b5aeb3bb
IR
1668}
1669
1670
b266b968
RB
1671/* Attempt to reorder stmts in a reduction chain so that we don't
1672 require any load permutation. Return true if that was possible,
1673 otherwise return false. */
1674
1675static bool
1676vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1677{
1678 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1679 unsigned int i, j;
b266b968
RB
1680 unsigned int lidx;
1681 slp_tree node, load;
1682
1683 /* Compare all the permutation sequences to the first one. We know
1684 that at least one load is permuted. */
1685 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1686 if (!node->load_permutation.exists ())
1687 return false;
1688 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1689 {
1690 if (!load->load_permutation.exists ())
1691 return false;
1692 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1693 if (lidx != node->load_permutation[j])
1694 return false;
1695 }
1696
1697 /* Check that the loads in the first sequence are different and there
1698 are no gaps between them. */
7ba9e72d 1699 auto_sbitmap load_index (group_size);
b266b968
RB
1700 bitmap_clear (load_index);
1701 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1702 {
41eefe13 1703 if (lidx >= group_size)
7ba9e72d 1704 return false;
b266b968 1705 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1706 return false;
1707
b266b968
RB
1708 bitmap_set_bit (load_index, lidx);
1709 }
1710 for (i = 0; i < group_size; i++)
1711 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1712 return false;
b266b968
RB
1713
1714 /* This permutation is valid for reduction. Since the order of the
1715 statements in the nodes is not important unless they are memory
1716 accesses, we can rearrange the statements in all the nodes
1717 according to the order of the loads. */
a1f072e2 1718 hash_set<slp_tree> visited;
b266b968 1719 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
a1f072e2 1720 node->load_permutation, visited);
b266b968
RB
1721
1722 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1723 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1724 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1725 {
b9787581 1726 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1727 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1728 /* But we have to keep those permutations that are required because
1729 of handling of gaps. */
d9f21f6a 1730 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1731 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1732 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1733 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1734 else
1735 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1736 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1737 }
1738
b266b968
RB
1739 return true;
1740}
1741
5d8c32cb
RB
1742/* Gather loads in the SLP graph NODE and populate the INST loads array. */
1743
1744static void
1745vect_gather_slp_loads (slp_instance inst, slp_tree node,
1746 hash_set<slp_tree> &visited)
1747{
1748 if (visited.add (node))
1749 return;
1750
1751 if (SLP_TREE_CHILDREN (node).length () == 0)
1752 {
30c0d1e3
RB
1753 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1754 return;
5d8c32cb 1755 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
30c0d1e3 1756 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
5d8c32cb
RB
1757 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1758 SLP_INSTANCE_LOADS (inst).safe_push (node);
1759 }
1760 else
1761 {
1762 unsigned i;
1763 slp_tree child;
1764 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1765 vect_gather_slp_loads (inst, child, visited);
1766 }
1767}
1768
1769static void
1770vect_gather_slp_loads (slp_instance inst, slp_tree node)
1771{
1772 hash_set<slp_tree> visited;
1773 vect_gather_slp_loads (inst, node, visited);
1774}
1775
01d8bf07
RB
1776/* Check if the required load permutations in the SLP instance
1777 SLP_INSTN are supported. */
ebfd146a
IR
1778
1779static bool
01d8bf07 1780vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1781{
01d8bf07
RB
1782 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1783 unsigned int i, j, k, next;
6983e6b5 1784 slp_tree node;
ebfd146a 1785
73fbfcad 1786 if (dump_enabled_p ())
ebfd146a 1787 {
78c60e3d 1788 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1789 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1790 if (node->load_permutation.exists ())
1791 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1792 dump_printf (MSG_NOTE, "%d ", next);
1793 else
bddc974e
TJ
1794 for (k = 0; k < group_size; ++k)
1795 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1796 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1797 }
1798
b5aeb3bb
IR
1799 /* In case of reduction every load permutation is allowed, since the order
1800 of the reduction statements is not important (as opposed to the case of
0d0293ac 1801 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1802 load nodes are of the same size and have the same permutation (and then
1803 rearrange all the nodes of the SLP instance according to this
1804 permutation). */
1805
1806 /* Check that all the load nodes are of the same size. */
01d8bf07 1807 /* ??? Can't we assert this? */
9771b263 1808 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1809 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1810 return false;
2200fc49 1811
b5aeb3bb 1812 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1813 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1814
b010117a 1815 /* Reduction (there are no data-refs in the root).
b266b968 1816 In reduction chain the order of the loads is not important. */
b9787581
RS
1817 if (!STMT_VINFO_DATA_REF (stmt_info)
1818 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1819 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1820
6aa904c4
IR
1821 /* In basic block vectorization we allow any subchain of an interleaving
1822 chain.
1823 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1824 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1825 {
240a94da
RB
1826 /* Check whether the loads in an instance form a subchain and thus
1827 no permutation is necessary. */
9771b263 1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1829 {
9626d143
RB
1830 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1831 continue;
240a94da 1832 bool subchain_p = true;
bffb8014 1833 stmt_vec_info next_load_info = NULL;
b9787581
RS
1834 stmt_vec_info load_info;
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1836 {
1837 if (j != 0
bffb8014 1838 && (next_load_info != load_info
b9787581 1839 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1840 {
1841 subchain_p = false;
1842 break;
1843 }
bffb8014 1844 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1845 }
240a94da
RB
1846 if (subchain_p)
1847 SLP_TREE_LOAD_PERMUTATION (node).release ();
1848 else
1849 {
b9787581 1850 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1851 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1852 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1853 unsigned k, maxk = 0;
1854 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1855 if (k > maxk)
1856 maxk = k;
1857 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1858 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1859 tree vectype = STMT_VINFO_VECTYPE (group_info);
1860 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1861 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332 1862 {
bbeeac91
DM
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1865 "BB vectorization with gaps at the end of "
1866 "a load is not supported\n");
fe73a332
RB
1867 return false;
1868 }
1869
240a94da
RB
1870 /* Verify the permutation can be generated. */
1871 vec<tree> tem;
29afecdf 1872 unsigned n_perms;
240a94da 1873 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1874 1, slp_instn, true, &n_perms))
240a94da 1875 {
bbeeac91
DM
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1878 vect_location,
1879 "unsupported load permutation\n");
240a94da
RB
1880 return false;
1881 }
1882 }
6aa904c4 1883 }
01d8bf07 1884 return true;
6aa904c4
IR
1885 }
1886
31bee964
RB
1887 /* For loop vectorization verify we can generate the permutation. Be
1888 conservative about the vectorization factor, there are permutations
1889 that will use three vector inputs only starting from a specific factor
1890 and the vectorization factor is not yet final.
1891 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1892 unsigned n_perms;
d9f21f6a
RS
1893 poly_uint64 test_vf
1894 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1895 LOOP_VINFO_VECT_FACTOR
b9787581 1896 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1897 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1898 if (node->load_permutation.exists ()
31bee964
RB
1899 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1900 slp_instn, true, &n_perms))
01d8bf07 1901 return false;
9b999e8c 1902
01d8bf07 1903 return true;
ebfd146a
IR
1904}
1905
1906
e4a707c4 1907/* Find the last store in SLP INSTANCE. */
ff802fa1 1908
95c68311 1909stmt_vec_info
2e8ab70c 1910vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1911{
95c68311 1912 stmt_vec_info last = NULL;
b9787581 1913 stmt_vec_info stmt_vinfo;
e4a707c4 1914
b9787581 1915 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1916 {
211cd1e2 1917 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 1918 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1919 }
e4a707c4 1920
2e8ab70c 1921 return last;
e4a707c4
IR
1922}
1923
82570274
RS
1924/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1925 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1926 (also containing the first GROUP1_SIZE stmts, since stores are
1927 consecutive), the second containing the remainder.
1ba91a49
AL
1928 Return the first stmt in the second group. */
1929
82570274
RS
1930static stmt_vec_info
1931vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1932{
bffb8014 1933 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1934 gcc_assert (group1_size > 0);
2c53b149 1935 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1936 gcc_assert (group2_size > 0);
2c53b149 1937 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1938
bffb8014 1939 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1940 for (unsigned i = group1_size; i > 1; i--)
1941 {
bffb8014
RS
1942 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1943 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1944 }
1945 /* STMT is now the last element of the first group. */
bffb8014
RS
1946 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1947 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1948
bffb8014
RS
1949 DR_GROUP_SIZE (group2) = group2_size;
1950 for (stmt_info = group2; stmt_info;
1951 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1952 {
bffb8014
RS
1953 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1954 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1955 }
1956
2c53b149 1957 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1958 plus skipping over the first vector. */
bffb8014 1959 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1960
2c53b149
RB
1961 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1962 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1963
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1966 group1_size, group2_size);
1967
1968 return group2;
1969}
1970
4b6068ea
RS
1971/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1973
1974static poly_uint64
1975calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1976{
1977 return exact_div (common_multiple (nunits, group_size), group_size);
1978}
1979
0d0293ac 1980/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1981 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1983
1984static bool
310213d4 1985vect_analyze_slp_instance (vec_info *vinfo,
32e8e429 1986 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
1987{
1988 slp_instance new_instance;
d092494c 1989 slp_tree node;
2c53b149 1990 unsigned int group_size;
b5aeb3bb 1991 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1992 unsigned int i;
b9787581
RS
1993 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1994 vec<stmt_vec_info> scalar_stmts;
818b3293 1995 bool constructor = false;
b5aeb3bb 1996
b9787581 1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1998 {
2c53b149 1999 scalar_type = TREE_TYPE (DR_REF (dr));
7ed54790 2000 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
b9787581 2001 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 2002 }
b9787581 2003 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2004 {
2005 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
2006 vectype = STMT_VINFO_VECTYPE (stmt_info);
2007 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb 2008 }
818b3293
JH
2009 else if (is_gimple_assign (stmt_info->stmt)
2010 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2011 {
2012 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2013 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2014 constructor = true;
2015 }
b5aeb3bb
IR
2016 else
2017 {
310213d4 2018 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 2019 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 2020 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 2021 }
b8698a0f 2022
ebfd146a
IR
2023 if (!vectype)
2024 {
73fbfcad 2025 if (dump_enabled_p ())
3c2a8ed0
DM
2026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2027 "Build SLP failed: unsupported data-type %T\n",
2028 scalar_type);
b5aeb3bb 2029
ebfd146a
IR
2030 return false;
2031 }
4b6068ea 2032 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 2033
0d0293ac 2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 2035 scalar_stmts.create (group_size);
bffb8014 2036 stmt_vec_info next_info = stmt_info;
b9787581 2037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 2038 {
b5aeb3bb 2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 2040 while (next_info)
b5aeb3bb 2041 {
6e6b18e5 2042 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2043 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
2044 }
2045 }
b9787581 2046 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2047 {
2048 /* Collect the reduction stmts and store them in
2049 SLP_TREE_SCALAR_STMTS. */
bffb8014 2050 while (next_info)
2c53b149 2051 {
6e6b18e5 2052 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2053 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 2054 }
14a61437
RB
2055 /* Mark the first element of the reduction chain as reduction to properly
2056 transform the node. In the reduction analysis phase only the last
2057 element of the chain is marked as reduction. */
b4673569
RB
2058 STMT_VINFO_DEF_TYPE (stmt_info)
2059 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
ef6e6914
RB
2060 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
b5aeb3bb 2062 }
818b3293
JH
2063 else if (constructor)
2064 {
2065 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2066 tree val;
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2068 {
2069 if (TREE_CODE (val) == SSA_NAME)
2070 {
2071 gimple* def = SSA_NAME_DEF_STMT (val);
2072 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2073 /* Value is defined in another basic block. */
2074 if (!def_info)
2075 return false;
2076 scalar_stmts.safe_push (def_info);
2077 }
2078 else
2079 return false;
2080 }
2081 }
b5aeb3bb
IR
2082 else
2083 {
2084 /* Collect reduction statements. */
32c91dfc
RS
2085 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2086 for (i = 0; reductions.iterate (i, &next_info); i++)
2087 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2088 }
2089
ebfd146a 2090 /* Build the tree for the SLP instance. */
89d390e5
RB
2091 bool *matches = XALLOCAVEC (bool, group_size);
2092 unsigned npermutes = 0;
a1f072e2
RB
2093 scalar_stmts_to_slp_tree_map_t *bst_map
2094 = new scalar_stmts_to_slp_tree_map_t ();
4b6068ea 2095 poly_uint64 max_nunits = nunits;
9f708a84 2096 unsigned tree_size = 0;
e569db5f 2097 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
5d8c32cb 2098 &max_nunits, matches, &npermutes,
9f708a84 2099 &tree_size, bst_map);
a1f072e2
RB
2100 /* The map keeps a reference on SLP nodes built, release that. */
2101 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2102 it != bst_map->end (); ++it)
2103 if ((*it).second)
2104 vect_free_slp_tree ((*it).second, false);
2105 delete bst_map;
e569db5f 2106 if (node != NULL)
ebfd146a 2107 {
b4673569
RB
2108 /* If this is a reduction chain with a conversion in front
2109 amend the SLP tree with a node for that. */
2110 if (!dr
2111 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2112 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2113 {
2114 /* Get at the conversion stmt - we know it's the single use
2115 of the last stmt of the reduction chain. */
2116 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2117 use_operand_p use_p;
2118 gimple *use_stmt;
2119 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2120 gcc_assert (r);
2121 next_info = vinfo->lookup_stmt (use_stmt);
2122 next_info = vect_stmt_to_vectorize (next_info);
2123 scalar_stmts = vNULL;
2124 scalar_stmts.create (group_size);
2125 for (unsigned i = 0; i < group_size; ++i)
2126 scalar_stmts.quick_push (next_info);
2127 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2128 SLP_TREE_CHILDREN (conv).quick_push (node);
2129 node = conv;
2130 /* We also have to fake this conversion stmt as SLP reduction group
2131 so we don't have to mess with too much code elsewhere. */
2132 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2133 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2134 }
2135
4ef69dfc 2136 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2137 poly_uint64 unrolling_factor
4b6068ea 2138 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2139
d9f21f6a 2140 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2141 && is_a <bb_vec_info> (vinfo))
2142 {
4b6068ea
RS
2143 unsigned HOST_WIDE_INT const_max_nunits;
2144 if (!max_nunits.is_constant (&const_max_nunits)
2145 || const_max_nunits > group_size)
2146 {
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
6e2dd807 2152 vect_free_slp_tree (node, false);
4b6068ea
RS
2153 return false;
2154 }
e569db5f 2155 /* Fatal mismatch. */
4b6068ea 2156 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2157 vect_free_slp_tree (node, false);
e569db5f
VK
2158 }
2159 else
2160 {
a1f072e2 2161 /* Create a new SLP instance. */
99b1c316 2162 new_instance = XNEW (class _slp_instance);
a1f072e2
RB
2163 SLP_INSTANCE_TREE (new_instance) = node;
2164 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
5d8c32cb 2166 SLP_INSTANCE_LOADS (new_instance) = vNULL;
818b3293
JH
2167 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2168
5d8c32cb 2169 vect_gather_slp_loads (new_instance, node);
9f708a84
RB
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE, vect_location,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size, max_tree_size);
a1f072e2
RB
2174
2175 /* Compute the load permutation. */
2176 slp_tree load_node;
2177 bool loads_permuted = false;
5d8c32cb 2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
abf9bfbc 2179 {
a1f072e2
RB
2180 vec<unsigned> load_permutation;
2181 int j;
2182 stmt_vec_info load_info;
2183 bool this_load_permuted = false;
2184 load_permutation.create (group_size);
2185 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2186 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2188 {
2189 int load_place = vect_get_place_in_interleaving_chain
2190 (load_info, first_stmt_info);
2191 gcc_assert (load_place != -1);
2192 if (load_place != j)
2193 this_load_permuted = true;
2194 load_permutation.safe_push (load_place);
2195 }
2196 if (!this_load_permuted
2197 /* The load requires permutation when unrolling exposes
2198 a gap either because the group is larger than the SLP
2199 group-size or because there is a gap between the groups. */
2200 && (known_eq (unrolling_factor, 1U)
2201 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2202 && DR_GROUP_GAP (first_stmt_info) == 0)))
2203 {
2204 load_permutation.release ();
2205 continue;
2206 }
2207 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2208 loads_permuted = true;
abf9bfbc 2209 }
a1f072e2
RB
2210
2211 if (loads_permuted)
01d8bf07 2212 {
a1f072e2
RB
2213 if (!vect_supported_load_permutation_p (new_instance))
2214 {
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2217 "Build SLP failed: unsupported load "
2218 "permutation %G", stmt_info->stmt);
2219 vect_free_slp_instance (new_instance, false);
2220 return false;
2221 }
01d8bf07 2222 }
ebfd146a 2223
e569db5f 2224 /* If the loads and stores can be handled with load/store-lan
a1f072e2
RB
2225 instructions do not generate this SLP instance. */
2226 if (is_a <loop_vec_info> (vinfo)
2227 && loads_permuted
2228 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7 2229 {
a1f072e2 2230 slp_tree load_node;
5d8c32cb 2231 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
a1f072e2
RB
2232 {
2233 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2234 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2235 /* Use SLP for strided accesses (or if we can't load-lanes). */
2236 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2237 || ! vect_load_lanes_supported
2238 (STMT_VINFO_VECTYPE (stmt_vinfo),
2239 DR_GROUP_SIZE (stmt_vinfo), false))
2240 break;
2241 }
5d8c32cb 2242 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
a1f072e2
RB
2243 {
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2246 "Built SLP cancelled: can use "
2247 "load/store-lanes\n");
2248 vect_free_slp_instance (new_instance, false);
2249 return false;
2250 }
bb0f5ca7 2251 }
a1f072e2
RB
2252
2253 vinfo->slp_instances.safe_push (new_instance);
2254
2255 if (dump_enabled_p ())
bb0f5ca7 2256 {
a1f072e2
RB
2257 dump_printf_loc (MSG_NOTE, vect_location,
2258 "Final SLP tree for instance:\n");
2259 vect_print_slp_tree (MSG_NOTE, vect_location, node);
bb0f5ca7 2260 }
bb0f5ca7 2261
a1f072e2 2262 return true;
c2a12ca0 2263 }
e569db5f
VK
2264 }
2265 else
2266 {
a1f072e2
RB
2267 /* Failed to SLP. */
2268 /* Free the allocated memory. */
2269 scalar_stmts.release ();
e569db5f 2270 }
b8698a0f 2271
1ba91a49 2272 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2273 vector size. */
4b6068ea 2274 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2275 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2276 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2277 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2278 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2279 {
2280 /* We consider breaking the group only on VF boundaries from the existing
2281 start. */
2282 for (i = 0; i < group_size; i++)
2283 if (!matches[i]) break;
2284
4b6068ea 2285 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2286 {
2287 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2288 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2289 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2290
82570274
RS
2291 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2292 group1_size);
86a91c0a
RS
2293 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2294 max_tree_size);
1ba91a49
AL
2295 /* If the first non-match was in the middle of a vector,
2296 skip the rest of that vector. */
2297 if (group1_size < i)
2298 {
4b6068ea 2299 i = group1_size + const_nunits;
1ba91a49 2300 if (i < group_size)
4b6068ea 2301 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2302 }
2303 if (i < group_size)
2304 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2305 return res;
2306 }
2307 /* Even though the first vector did not all match, we might be able to SLP
2308 (some) of the remainder. FORNOW ignore this possibility. */
2309 }
2310
a70d6342 2311 return false;
ebfd146a
IR
2312}
2313
2314
ff802fa1 2315/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2316 trees of packed scalar stmts if SLP is possible. */
2317
f4ebbd24 2318opt_result
310213d4 2319vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2320{
2321 unsigned int i;
f698fccf 2322 stmt_vec_info first_element;
ebfd146a 2323
adac3a68 2324 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2325
0d0293ac 2326 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2327 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2328 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2329
310213d4 2330 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2331 {
310213d4
RB
2332 if (loop_vinfo->reduction_chains.length () > 0)
2333 {
2334 /* Find SLP sequences starting from reduction chains. */
2335 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2336 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2337 max_tree_size))
6b5e165b
RB
2338 {
2339 /* Dissolve reduction chain group. */
f698fccf 2340 stmt_vec_info vinfo = first_element;
0214d31a 2341 stmt_vec_info last = NULL;
f698fccf 2342 while (vinfo)
6b5e165b 2343 {
bffb8014 2344 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2345 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2346 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
0214d31a 2347 last = vinfo;
f698fccf 2348 vinfo = next;
6b5e165b 2349 }
f698fccf 2350 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
0214d31a
RB
2351 /* It can be still vectorized as part of an SLP reduction. */
2352 loop_vinfo->reductions.safe_push (last);
6b5e165b 2353 }
310213d4 2354 }
b010117a 2355
310213d4 2356 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2357 if (loop_vinfo->reductions.length () > 1)
2358 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2359 max_tree_size);
310213d4 2360 }
b5aeb3bb 2361
f4ebbd24 2362 return opt_result::success ();
ebfd146a
IR
2363}
2364
2365
2366/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2367 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2368 least one instance. */
ebfd146a 2369
437f4a00 2370bool
ebfd146a
IR
2371vect_make_slp_decision (loop_vec_info loop_vinfo)
2372{
d9f21f6a
RS
2373 unsigned int i;
2374 poly_uint64 unrolling_factor = 1;
9771b263 2375 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2376 slp_instance instance;
2377 int decided_to_slp = 0;
2378
adac3a68 2379 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2380
9771b263 2381 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2382 {
2383 /* FORNOW: SLP if you can. */
1c84a2d2
RS
2384 /* All unroll factors have the form:
2385
2386 GET_MODE_SIZE (vinfo->vector_mode) * X
2387
2388 for some rational X, so they must have a common multiple. */
d9f21f6a
RS
2389 unrolling_factor
2390 = force_common_multiple (unrolling_factor,
2391 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2392
ff802fa1 2393 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2394 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2395 loop-based vectorization. Such stmts will be marked as HYBRID. */
6c7e3b1f 2396 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
ebfd146a
IR
2397 decided_to_slp++;
2398 }
2399
2400 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2401
73fbfcad 2402 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2403 {
2404 dump_printf_loc (MSG_NOTE, vect_location,
2405 "Decided to SLP %d instances. Unrolling factor ",
2406 decided_to_slp);
2407 dump_dec (MSG_NOTE, unrolling_factor);
2408 dump_printf (MSG_NOTE, "\n");
2409 }
437f4a00
IR
2410
2411 return (decided_to_slp > 0);
ebfd146a
IR
2412}
2413
2414
2415/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2416 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2417
2418static void
4bfcf879 2419vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
6c7e3b1f 2420 hash_map<slp_tree, unsigned> &visited)
ebfd146a 2421{
b9787581 2422 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2423 imm_use_iterator imm_iter;
355fe088 2424 gimple *use_stmt;
b9787581 2425 stmt_vec_info use_vinfo;
d755c7ef 2426 slp_tree child;
f2c74cc4 2427 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2428 int j;
2429
6c7e3b1f
RB
2430 /* We need to union stype over the incoming graph edges but we still
2431 want to limit recursion to stay O(N+E). */
2432 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
4bfcf879 2433
642fce57
RB
2434 /* Propagate hybrid down the SLP tree. */
2435 if (stype == hybrid)
2436 ;
2437 else if (HYBRID_SLP_STMT (stmt_vinfo))
2438 stype = hybrid;
6c7e3b1f 2439 else if (!only_edge)
642fce57
RB
2440 {
2441 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2442 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2443 /* If we get a pattern stmt here we have to use the LHS of the
2444 original stmt for immediate uses. */
211cd1e2 2445 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
e7baeb39
RB
2446 tree def;
2447 if (gimple_code (stmt) == GIMPLE_PHI)
2448 def = gimple_phi_result (stmt);
2449 else
2450 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2451 if (def)
2452 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2453 {
6585ff8f
RS
2454 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2455 if (!use_vinfo)
29764870 2456 continue;
6e6b18e5 2457 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
29764870
RB
2458 if (!STMT_SLP_TYPE (use_vinfo)
2459 && (STMT_VINFO_RELEVANT (use_vinfo)
2460 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2461 && !(gimple_code (use_stmt) == GIMPLE_PHI
2462 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2463 {
2464 if (dump_enabled_p ())
3c2a8ed0
DM
2465 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2466 "def in non-SLP stmt: %G", use_stmt);
502f0263
RB
2467 stype = hybrid;
2468 }
29764870 2469 }
642fce57 2470 }
ebfd146a 2471
502f0263
RB
2472 if (stype == hybrid
2473 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2474 {
2475 if (dump_enabled_p ())
3c2a8ed0
DM
2476 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2477 stmt_vinfo->stmt);
b1af7da6
RB
2478 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2479 }
ebfd146a 2480
6c7e3b1f
RB
2481 if (!only_edge)
2482 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2483 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2484 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
6c7e3b1f 2485 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
4bfcf879
RB
2486}
2487
2488static void
2489vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2490{
6c7e3b1f 2491 hash_map<slp_tree, unsigned> visited;
4bfcf879 2492 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
642fce57 2493}
f2c74cc4 2494
642fce57 2495/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2496
642fce57
RB
2497static tree
2498vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2499{
2500 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2501 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2502
2503 if (wi->is_lhs)
2504 return NULL_TREE;
2505
c98d0595
RS
2506 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2507 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2508 {
6585ff8f 2509 if (dump_enabled_p ())
3c2a8ed0
DM
2510 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2511 def_stmt_info->stmt);
6585ff8f 2512 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2513 }
2514
2515 return NULL_TREE;
ebfd146a
IR
2516}
2517
642fce57
RB
2518static tree
2519vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2520 walk_stmt_info *wi)
642fce57 2521{
6585ff8f
RS
2522 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2523 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2524 /* If the stmt is in a SLP instance then this isn't a reason
2525 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2526 if (! STMT_SLP_TYPE (use_vinfo)
2527 && (STMT_VINFO_RELEVANT (use_vinfo)
2528 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2529 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2530 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2531 ;
2532 else
642fce57
RB
2533 *handled = true;
2534 return NULL_TREE;
2535}
ebfd146a
IR
2536
2537/* Find stmts that must be both vectorized and SLPed. */
2538
2539void
2540vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2541{
2542 unsigned int i;
9771b263 2543 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2544 slp_instance instance;
2545
adac3a68 2546 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2547
642fce57
RB
2548 /* First walk all pattern stmt in the loop and mark defs of uses as
2549 hybrid because immediate uses in them are not recorded. */
2550 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2551 {
2552 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2553 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2554 gsi_next (&gsi))
2555 {
355fe088 2556 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2557 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2558 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2559 {
2560 walk_stmt_info wi;
2561 memset (&wi, 0, sizeof (wi));
6585ff8f 2562 wi.info = loop_vinfo;
642fce57 2563 gimple_stmt_iterator gsi2
10681ce8 2564 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2565 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2566 vect_detect_hybrid_slp_1, &wi);
2567 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2568 vect_detect_hybrid_slp_2,
2569 vect_detect_hybrid_slp_1, &wi);
2570 }
2571 }
2572 }
2573
2574 /* Then walk the SLP instance trees marking stmts with uses in
2575 non-SLP stmts as hybrid, also propagating hybrid down the
2576 SLP tree, collecting the above info on-the-fly. */
9771b263 2577 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2578 {
2579 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2580 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2581 i, pure_slp);
2582 }
ebfd146a
IR
2583}
2584
a70d6342 2585
2c515559
RS
2586/* Initialize a bb_vec_info struct for the statements between
2587 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2588
2589_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2590 gimple_stmt_iterator region_end_in,
2591 vec_info_shared *shared)
2592 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2593 bb (gsi_bb (region_begin_in)),
2594 region_begin (region_begin_in),
2595 region_end (region_end_in)
a70d6342 2596{
a70d6342
IR
2597 gimple_stmt_iterator gsi;
2598
61d371eb
RB
2599 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2600 gsi_next (&gsi))
a70d6342 2601 {
355fe088 2602 gimple *stmt = gsi_stmt (gsi);
a70d6342 2603 gimple_set_uid (stmt, 0);
4fbeb363 2604 add_stmt (stmt);
a70d6342
IR
2605 }
2606
2c515559 2607 bb->aux = this;
a70d6342
IR
2608}
2609
2610
2611/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2612 stmts in the basic block. */
2613
2c515559 2614_bb_vec_info::~_bb_vec_info ()
a70d6342 2615{
2c515559
RS
2616 for (gimple_stmt_iterator si = region_begin;
2617 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2618 /* Reset region marker. */
2619 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2620
2c515559 2621 bb->aux = NULL;
a70d6342
IR
2622}
2623
15944069
RS
2624/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2625 given then that child nodes have already been processed, and that
2626 their def types currently match their SLP node's def type. */
a70d6342
IR
2627
2628static bool
15944069
RS
2629vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2630 slp_instance node_instance,
2631 stmt_vector_for_cost *cost_vec)
a70d6342 2632{
b9787581 2633 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2634 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2635
2636 /* For BB vectorization vector types are assigned here.
2637 Memory accesses already got their vector type assigned
2638 in vect_analyze_data_refs. */
2639 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2640 if (bb_vinfo
2641 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2642 {
1f3cb663
RS
2643 tree vectype, nunits_vectype;
2644 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2645 &nunits_vectype))
2646 /* We checked this when building the node. */
2647 gcc_unreachable ();
2648 if (vectype == boolean_type_node)
bd2f172f 2649 {
1f3cb663
RS
2650 vectype = vect_get_mask_type_for_stmt (stmt_info);
2651 if (!vectype)
2652 /* vect_get_mask_type_for_stmt has already explained the
2653 failure. */
2654 return false;
bd2f172f
RB
2655 }
2656
b9787581 2657 stmt_vec_info sstmt_info;
15944069 2658 unsigned int i;
b9787581
RS
2659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2660 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2661 }
2662
8b7e9dba
RS
2663 /* Calculate the number of vector statements to be created for the
2664 scalar stmts in this node. For SLP reductions it is equal to the
2665 number of vector statements in the children (which has already been
2666 calculated by the recursive call). Otherwise it is the number of
2c53b149 2667 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2668 VF divided by the number of elements in a vector. */
2c53b149
RB
2669 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2670 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
30c0d1e3
RB
2671 {
2672 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2673 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2674 {
2675 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2676 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2677 break;
2678 }
2679 }
8b7e9dba
RS
2680 else
2681 {
d9f21f6a 2682 poly_uint64 vf;
8b7e9dba
RS
2683 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2684 vf = loop_vinfo->vectorization_factor;
2685 else
2686 vf = 1;
2687 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2688 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2689 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2690 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2691 }
2692
15944069 2693 bool dummy;
86a91c0a 2694 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2695}
2696
2697/* Analyze statements contained in SLP tree NODE after recursively analyzing
2698 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2699
2700 Return true if the operations are supported. */
2701
2702static bool
2703vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2704 slp_instance node_instance,
2705 scalar_stmts_to_slp_tree_map_t *visited,
2706 scalar_stmts_to_slp_tree_map_t *lvisited,
2707 stmt_vector_for_cost *cost_vec)
2708{
2709 int i, j;
2710 slp_tree child;
2711
2712 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2713 return true;
2714
2715 /* If we already analyzed the exact same set of scalar stmts we're done.
2716 We share the generated vector stmts for those. */
2717 slp_tree *leader;
2718 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2719 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2720 {
2721 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2722 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2723 return true;
2724 }
2725
2726 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2727 doesn't result in any issue since we throw away the lvisited set
2728 when we fail. */
2729 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2730
2731 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2732 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2733 visited, lvisited, cost_vec))
2734 return false;
2735
f7b94dec
RB
2736 /* ??? We have to catch the case late where two first scalar stmts appear
2737 in multiple SLP children with different def type and fail. Remember
2738 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2739 match it when that is vect_internal_def. */
2740 auto_vec<vect_def_type, 4> dt;
2741 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2742 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2743 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2744 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
f7b94dec 2745
bd2f172f
RB
2746 /* Push SLP node def-type to stmt operands. */
2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2748 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2749 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
b9787581 2750 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2751 = SLP_TREE_DEF_TYPE (child);
f7b94dec
RB
2752
2753 /* Check everything worked out. */
2754 bool res = true;
bd2f172f 2755 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2756 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2757 {
2758 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2759 {
2760 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2761 != SLP_TREE_DEF_TYPE (child))
2762 res = false;
2763 }
2764 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2765 != dt[j])
2766 res = false;
2767 }
f7b94dec
RB
2768 if (!res && dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2770 "not vectorized: same operand with different "
2771 "def type in stmt.\n");
bd2f172f 2772
f7b94dec
RB
2773 if (res)
2774 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2775 cost_vec);
2776
2777 /* Restore def-types. */
2778 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2779 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2780 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
f7b94dec
RB
2781
2782 return res;
a70d6342
IR
2783}
2784
2785
8b7e9dba 2786/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2787 operations are supported. */
2788
a12e42fc 2789bool
8b7e9dba 2790vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2791{
a70d6342
IR
2792 slp_instance instance;
2793 int i;
2794
adac3a68 2795 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2796
68435eb2
RB
2797 scalar_stmts_to_slp_tree_map_t *visited
2798 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2799 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2800 {
68435eb2
RB
2801 scalar_stmts_to_slp_tree_map_t lvisited;
2802 stmt_vector_for_cost cost_vec;
2803 cost_vec.create (2);
8b7e9dba
RS
2804 if (!vect_slp_analyze_node_operations (vinfo,
2805 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2806 instance, visited, &lvisited,
2807 &cost_vec))
a70d6342 2808 {
b9787581
RS
2809 slp_tree node = SLP_INSTANCE_TREE (instance);
2810 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE, vect_location,
2813 "removing SLP instance operations starting from: %G",
2814 stmt_info->stmt);
6e2dd807 2815 vect_free_slp_instance (instance, false);
8b7e9dba 2816 vinfo->slp_instances.ordered_remove (i);
68435eb2 2817 cost_vec.release ();
a70d6342
IR
2818 }
2819 else
68435eb2
RB
2820 {
2821 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2822 x != lvisited.end(); ++x)
2823 visited->put ((*x).first.copy (), (*x).second);
2824 i++;
78604de0 2825
68435eb2
RB
2826 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2827 cost_vec.release ();
2828 }
2829 }
78604de0
RB
2830 delete visited;
2831
8b7e9dba 2832 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2833}
2834
6eddf228
RB
2835
2836/* Compute the scalar cost of the SLP node NODE and its children
2837 and return it. Do not account defs that are marked in LIFE and
2838 update LIFE according to uses of NODE. */
2839
a296d6d3 2840static void
292cba13 2841vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3 2842 slp_tree node, vec<bool, va_heap> *life,
4bfcf879
RB
2843 stmt_vector_for_cost *cost_vec,
2844 hash_set<slp_tree> &visited)
6eddf228 2845{
6eddf228 2846 unsigned i;
b9787581 2847 stmt_vec_info stmt_info;
6eddf228
RB
2848 slp_tree child;
2849
4bfcf879
RB
2850 if (visited.add (node))
2851 return;
2852
b9787581 2853 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2854 {
b9787581 2855 gimple *stmt = stmt_info->stmt;
91987857 2856 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2857 ssa_op_iter op_iter;
2858 def_operand_p def_p;
6eddf228 2859
ff4c81cc 2860 if ((*life)[i])
6eddf228
RB
2861 continue;
2862
2863 /* If there is a non-vectorized use of the defs then the scalar
2864 stmt is kept live in which case we do not account it or any
2865 required defs in the SLP children in the scalar cost. This
2866 way we make the vectorization more costly when compared to
2867 the scalar cost. */
2868 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2869 {
2870 imm_use_iterator use_iter;
355fe088 2871 gimple *use_stmt;
6eddf228 2872 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2873 if (!is_gimple_debug (use_stmt))
6eddf228 2874 {
91987857
RS
2875 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2876 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2877 {
2878 (*life)[i] = true;
2879 BREAK_FROM_IMM_USE_STMT (use_iter);
2880 }
6eddf228
RB
2881 }
2882 }
ff4c81cc 2883 if ((*life)[i])
6eddf228
RB
2884 continue;
2885
b555a2e4
RB
2886 /* Count scalar stmts only once. */
2887 if (gimple_visited_p (stmt))
2888 continue;
2889 gimple_set_visited (stmt, true);
2890
a296d6d3 2891 vect_cost_for_stmt kind;
6eddf228
RB
2892 if (STMT_VINFO_DATA_REF (stmt_info))
2893 {
2894 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2895 kind = scalar_load;
6eddf228 2896 else
a296d6d3 2897 kind = scalar_store;
6eddf228 2898 }
e4020b28
RS
2899 else if (vect_nop_conversion_p (stmt_info))
2900 continue;
6eddf228 2901 else
a296d6d3
RB
2902 kind = scalar_stmt;
2903 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2904 }
2905
faa5399b 2906 auto_vec<bool, 20> subtree_life;
6eddf228 2907 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2908 {
2909 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2910 {
2911 /* Do not directly pass LIFE to the recursive call, copy it to
2912 confine changes in the callee to the current child/subtree. */
2913 subtree_life.safe_splice (*life);
4bfcf879
RB
2914 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2915 visited);
faa5399b
RB
2916 subtree_life.truncate (0);
2917 }
2918 }
6eddf228
RB
2919}
2920
4bfcf879
RB
2921static void
2922vect_bb_slp_scalar_cost (basic_block bb,
2923 slp_tree node, vec<bool, va_heap> *life,
2924 stmt_vector_for_cost *cost_vec)
2925{
2926 hash_set<slp_tree> visited;
2927 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2928}
2929
69f11a13
IR
2930/* Check if vectorization of the basic block is profitable. */
2931
2932static bool
2933vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2934{
9771b263 2935 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2936 slp_instance instance;
1a4b99c1 2937 int i;
c3e7ee41 2938 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2939 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2940
2941 /* Calculate scalar cost. */
a296d6d3
RB
2942 stmt_vector_for_cost scalar_costs;
2943 scalar_costs.create (0);
6eddf228 2944 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2945 {
00f96dc9 2946 auto_vec<bool, 20> life;
ff4c81cc 2947 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2948 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2949 SLP_INSTANCE_TREE (instance),
2950 &life, &scalar_costs);
2951 }
2952 void *target_cost_data = init_cost (NULL);
68435eb2 2953 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2954 scalar_costs.release ();
2955 unsigned dummy;
2956 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2957 destroy_cost_data (target_cost_data);
69f11a13 2958
b555a2e4
RB
2959 /* Unset visited flag. */
2960 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2961 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2962 gimple_set_visited (gsi_stmt (gsi), false);
2963
c3e7ee41 2964 /* Complete the target-specific cost calculation. */
92345349
BS
2965 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2966 &vec_inside_cost, &vec_epilogue_cost);
2967
2968 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2969
73fbfcad 2970 if (dump_enabled_p ())
69f11a13 2971 {
78c60e3d
SS
2972 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2973 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2974 vec_inside_cost);
2975 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2976 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2977 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2978 }
2979
a6524bba
RB
2980 /* Vectorization is profitable if its cost is more than the cost of scalar
2981 version. Note that we err on the vector side for equal cost because
2982 the cost estimate is otherwise quite pessimistic (constant uses are
2983 free on the scalar side but cost a load on the vector side for
2984 example). */
2985 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2986 return false;
2987
2988 return true;
2989}
2990
818b3293
JH
2991/* Find any vectorizable constructors and add them to the grouped_store
2992 array. */
2993
2994static void
2995vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
2996{
2997 gimple_stmt_iterator gsi;
2998
2999 for (gsi = bb_vinfo->region_begin;
3000 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3001 {
3002 gimple *stmt = gsi_stmt (gsi);
3003
3004 if (is_gimple_assign (stmt)
3005 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
3006 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
3007 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
3008 {
3009 tree rhs = gimple_assign_rhs1 (stmt);
3010
3011 if (CONSTRUCTOR_NELTS (rhs) == 0)
3012 continue;
3013
3014 poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
3015
3016 if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
3017 continue;
3018
3019 if (dump_enabled_p ())
3020 dump_printf_loc (MSG_NOTE, vect_location,
3021 "Found vectorizable constructor: %G\n", stmt);
3022 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3023 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3024 }
3025 }
3026}
3027
1d778697
RS
3028/* Check if the region described by BB_VINFO can be vectorized, returning
3029 true if so. When returning false, set FATAL to true if the same failure
3030 would prevent vectorization at other vector sizes, false if it is still
3031 worth trying other sizes. N_STMTS is the number of statements in the
3032 region. */
3033
3034static bool
3035vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
a70d6342 3036{
bfb9d798
RB
3037 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3038
a70d6342 3039 slp_instance instance;
8e19f5a1 3040 int i;
d9f21f6a 3041 poly_uint64 min_vf = 2;
e4a707c4 3042
a5b50aa1
RB
3043 /* The first group of checks is independent of the vector size. */
3044 fatal = true;
3045
428db0ba
RB
3046 /* Analyze the data references. */
3047
a7b3509e 3048 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
a70d6342 3049 {
73fbfcad 3050 if (dump_enabled_p ())
78c60e3d
SS
3051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3052 "not vectorized: unhandled data-ref in basic "
3053 "block.\n");
1d778697 3054 return false;
a70d6342
IR
3055 }
3056
fcac74a1 3057 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 3058 {
73fbfcad 3059 if (dump_enabled_p ())
78c60e3d
SS
3060 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3061 "not vectorized: not enough data-refs in "
3062 "basic block.\n");
1d778697 3063 return false;
a70d6342
IR
3064 }
3065
310213d4 3066 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
3067 {
3068 if (dump_enabled_p ())
3069 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3070 "not vectorized: unhandled data access in "
3071 "basic block.\n");
1d778697 3072 return false;
5abe1e05
RB
3073 }
3074
818b3293
JH
3075 vect_slp_check_for_constructors (bb_vinfo);
3076
a5b50aa1
RB
3077 /* If there are no grouped stores in the region there is no need
3078 to continue with pattern recog as vect_analyze_slp will fail
3079 anyway. */
3080 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 3081 {
73fbfcad 3082 if (dump_enabled_p ())
a5b50aa1
RB
3083 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3084 "not vectorized: no grouped stores in "
3085 "basic block.\n");
1d778697 3086 return false;
a70d6342 3087 }
b8698a0f 3088
a5b50aa1
RB
3089 /* While the rest of the analysis below depends on it in some way. */
3090 fatal = false;
3091
3092 vect_pattern_recog (bb_vinfo);
3093
a70d6342
IR
3094 /* Check the SLP opportunities in the basic block, analyze and build SLP
3095 trees. */
310213d4 3096 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 3097 {
73fbfcad 3098 if (dump_enabled_p ())
effb52da
RB
3099 {
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3101 "Failed to SLP the basic block.\n");
3102 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3103 "not vectorized: failed to find SLP opportunities "
3104 "in basic block.\n");
3105 }
1d778697 3106 return false;
a70d6342 3107 }
b8698a0f 3108
62c8a2cf
RS
3109 vect_record_base_alignments (bb_vinfo);
3110
c2a12ca0
RB
3111 /* Analyze and verify the alignment of data references and the
3112 dependence in the SLP instances. */
a5b50aa1
RB
3113 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3114 {
c2a12ca0
RB
3115 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3116 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 3117 {
b9787581
RS
3118 slp_tree node = SLP_INSTANCE_TREE (instance);
3119 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
3120 if (dump_enabled_p ())
3121 dump_printf_loc (MSG_NOTE, vect_location,
3122 "removing SLP instance operations starting from: %G",
3123 stmt_info->stmt);
6e2dd807 3124 vect_free_slp_instance (instance, false);
a5b50aa1
RB
3125 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3126 continue;
3127 }
c2a12ca0
RB
3128
3129 /* Mark all the statements that we want to vectorize as pure SLP and
3130 relevant. */
6c7e3b1f 3131 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
c2a12ca0 3132 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
818b3293
JH
3133 if (SLP_INSTANCE_ROOT_STMT (instance))
3134 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
c2a12ca0 3135
a5b50aa1
RB
3136 i++;
3137 }
a5b50aa1 3138 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
1d778697 3139 return false;
a5b50aa1 3140
8b7e9dba 3141 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 3142 {
73fbfcad 3143 if (dump_enabled_p ())
e645e942 3144 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3145 "not vectorized: bad operation in basic block.\n");
1d778697 3146 return false;
a70d6342
IR
3147 }
3148
69f11a13 3149 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 3150 if (!unlimited_cost_model (NULL)
69f11a13
IR
3151 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3152 {
73fbfcad 3153 if (dump_enabled_p ())
78c60e3d
SS
3154 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3155 "not vectorized: vectorization is not "
3156 "profitable.\n");
1d778697 3157 return false;
69f11a13
IR
3158 }
3159
73fbfcad 3160 if (dump_enabled_p ())
78c60e3d
SS
3161 dump_printf_loc (MSG_NOTE, vect_location,
3162 "Basic block will be vectorized using SLP\n");
1d778697 3163 return true;
a70d6342
IR
3164}
3165
fa0c8df7
RS
3166/* Subroutine of vect_slp_bb. Try to vectorize the statements between
3167 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3168 on success. The region has N_STMTS statements and has the datarefs
3169 given by DATAREFS. */
a70d6342 3170
fa0c8df7
RS
3171static bool
3172vect_slp_bb_region (gimple_stmt_iterator region_begin,
3173 gimple_stmt_iterator region_end,
3174 vec<data_reference_p> datarefs,
3175 unsigned int n_stmts)
8e19f5a1
IR
3176{
3177 bb_vec_info bb_vinfo;
e021fb86 3178 auto_vector_modes vector_modes;
8e19f5a1 3179
8e19f5a1 3180 /* Autodetect first vector size we try. */
e021fb86
RS
3181 machine_mode next_vector_mode = VOIDmode;
3182 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3183 unsigned int mode_i = 0;
8e19f5a1 3184
fa0c8df7 3185 vec_info_shared shared;
61d371eb 3186
1c84a2d2 3187 machine_mode autodetected_vector_mode = VOIDmode;
8e19f5a1
IR
3188 while (1)
3189 {
61d371eb 3190 bool vectorized = false;
a5b50aa1 3191 bool fatal = false;
1d778697
RS
3192 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3193
3194 bool first_time_p = shared.datarefs.is_empty ();
3195 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3196 if (first_time_p)
3197 bb_vinfo->shared->save_datarefs ();
3198 else
3199 bb_vinfo->shared->check_datarefs ();
1c84a2d2 3200 bb_vinfo->vector_mode = next_vector_mode;
1d778697
RS
3201
3202 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
61d371eb
RB
3203 && dbg_cnt (vect_slp))
3204 {
428db0ba 3205 if (dump_enabled_p ())
df7c2283
RS
3206 {
3207 dump_printf_loc (MSG_NOTE, vect_location,
3208 "***** Analysis succeeded with vector mode"
3209 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3210 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3211 }
428db0ba 3212
ca823c85 3213 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3214 vect_schedule_slp (bb_vinfo);
3215
d1ac60d5 3216 unsigned HOST_WIDE_INT bytes;
bbeeac91
DM
3217 if (dump_enabled_p ())
3218 {
1c84a2d2 3219 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
bbeeac91
DM
3220 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3221 "basic block part vectorized using %wu byte "
3222 "vectors\n", bytes);
3223 else
3224 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3225 "basic block part vectorized using variable "
3226 "length vectors\n");
3227 }
428db0ba 3228
61d371eb 3229 vectorized = true;
428db0ba 3230 }
df7c2283
RS
3231 else
3232 {
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE, vect_location,
3235 "***** Analysis failed with vector mode %s\n",
3236 GET_MODE_NAME (bb_vinfo->vector_mode));
3237 }
8e19f5a1 3238
e021fb86 3239 if (mode_i == 0)
1c84a2d2 3240 autodetected_vector_mode = bb_vinfo->vector_mode;
ba7f76dd 3241
a55d8232
RS
3242 if (!fatal)
3243 while (mode_i < vector_modes.length ()
3244 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3245 {
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE, vect_location,
3248 "***** The result for vector mode %s would"
3249 " be the same\n",
3250 GET_MODE_NAME (vector_modes[mode_i]));
3251 mode_i += 1;
3252 }
3253
ba7f76dd 3254 delete bb_vinfo;
86e36728 3255
e021fb86 3256 if (mode_i < vector_modes.length ()
df7c2283
RS
3257 && VECTOR_MODE_P (autodetected_vector_mode)
3258 && (related_vector_mode (vector_modes[mode_i],
3259 GET_MODE_INNER (autodetected_vector_mode))
3260 == autodetected_vector_mode)
3261 && (related_vector_mode (autodetected_vector_mode,
3262 GET_MODE_INNER (vector_modes[mode_i]))
3263 == vector_modes[mode_i]))
3264 {
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_NOTE, vect_location,
3267 "***** Skipping vector mode %s, which would"
3268 " repeat the analysis for %s\n",
3269 GET_MODE_NAME (vector_modes[mode_i]),
3270 GET_MODE_NAME (autodetected_vector_mode));
3271 mode_i += 1;
3272 }
86e36728 3273
61d371eb 3274 if (vectorized
e021fb86 3275 || mode_i == vector_modes.length ()
1c84a2d2 3276 || autodetected_vector_mode == VOIDmode
a5b50aa1
RB
3277 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3278 vector sizes will fail do not bother iterating. */
3279 || fatal)
fa0c8df7
RS
3280 return vectorized;
3281
3282 /* Try the next biggest vector size. */
e021fb86 3283 next_vector_mode = vector_modes[mode_i++];
fa0c8df7 3284 if (dump_enabled_p ())
e021fb86
RS
3285 dump_printf_loc (MSG_NOTE, vect_location,
3286 "***** Re-trying analysis with vector mode %s\n",
3287 GET_MODE_NAME (next_vector_mode));
fa0c8df7
RS
3288 }
3289}
8e19f5a1 3290
fa0c8df7
RS
3291/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3292 true if anything in the basic-block was vectorized. */
61d371eb 3293
fa0c8df7
RS
3294bool
3295vect_slp_bb (basic_block bb)
3296{
3297 gimple_stmt_iterator gsi;
3298 bool any_vectorized = false;
3299
3300 gsi = gsi_start_bb (bb);
3301 while (!gsi_end_p (gsi))
3302 {
3303 gimple_stmt_iterator region_begin = gsi;
3304 vec<data_reference_p> datarefs = vNULL;
3305 int insns = 0;
3306
3307 for (; !gsi_end_p (gsi); gsi_next (&gsi))
61d371eb 3308 {
fa0c8df7
RS
3309 gimple *stmt = gsi_stmt (gsi);
3310 if (is_gimple_debug (stmt))
3311 continue;
3312 insns++;
3313
3314 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3315 vect_location = stmt;
3316
3317 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3318 break;
3319 }
61d371eb 3320
fa0c8df7
RS
3321 /* Skip leading unhandled stmts. */
3322 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3323 {
3324 gsi_next (&gsi);
3325 continue;
61d371eb 3326 }
fa0c8df7
RS
3327
3328 gimple_stmt_iterator region_end = gsi;
3329
028d4092 3330 if (insns > param_slp_max_insns_in_bb)
1d778697
RS
3331 {
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3334 "not vectorized: too many instructions in "
3335 "basic block.\n");
3336 }
3337 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
fa0c8df7
RS
3338 any_vectorized = true;
3339
3340 if (gsi_end_p (region_end))
3341 break;
3342
3343 /* Skip the unhandled stmt. */
3344 gsi_next (&gsi);
8e19f5a1 3345 }
61d371eb
RB
3346
3347 return any_vectorized;
8e19f5a1
IR
3348}
3349
3350
c73e7656 3351/* Return 1 if vector type STMT_VINFO is a boolean vector. */
e4af0bc4
IE
3352
3353static bool
c73e7656 3354vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
e4af0bc4 3355{
32e8e429 3356 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 3357 tree op, vectype;
e4af0bc4
IE
3358 enum vect_def_type dt;
3359
3360 /* For comparison and COND_EXPR type is chosen depending
c73e7656 3361 on the non-constant other comparison operand. */
e4af0bc4
IE
3362 if (TREE_CODE_CLASS (code) == tcc_comparison)
3363 {
32e8e429 3364 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
c73e7656 3365 op = gimple_assign_rhs1 (stmt);
e4af0bc4 3366
894dd753 3367 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3368 gcc_unreachable ();
3369
3370 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3371 }
3372
3373 if (code == COND_EXPR)
3374 {
32e8e429 3375 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3376 tree cond = gimple_assign_rhs1 (stmt);
3377
3378 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1 3379 op = cond;
e4af0bc4
IE
3380 else
3381 op = TREE_OPERAND (cond, 0);
3382
894dd753 3383 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3384 gcc_unreachable ();
3385
3386 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3387 }
3388
3389 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3390}
3391
018b2744
RS
3392/* Build a variable-length vector in which the elements in ELTS are repeated
3393 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3394 RESULTS and add any new instructions to SEQ.
3395
3396 The approach we use is:
3397
3398 (1) Find a vector mode VM with integer elements of mode IM.
3399
3400 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3401 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3402 from small vectors to IM.
3403
3404 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3405
3406 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3407 correct byte contents.
3408
3409 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3410
3411 We try to find the largest IM for which this sequence works, in order
3412 to cut down on the number of interleaves. */
3413
f1739b48 3414void
43fdde57 3415duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
cdbe6e9b
RS
3416 vec<tree> elts, unsigned int nresults,
3417 vec<tree> &results)
018b2744
RS
3418{
3419 unsigned int nelts = elts.length ();
3420 tree element_type = TREE_TYPE (vector_type);
3421
3422 /* (1) Find a vector mode VM with integer elements of mode IM. */
3423 unsigned int nvectors = 1;
3424 tree new_vector_type;
3425 tree permutes[2];
43fdde57 3426 if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
018b2744
RS
3427 &nvectors, &new_vector_type,
3428 permutes))
3429 gcc_unreachable ();
3430
3431 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3432 unsigned int partial_nelts = nelts / nvectors;
3433 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3434
3435 tree_vector_builder partial_elts;
3436 auto_vec<tree, 32> pieces (nvectors * 2);
3437 pieces.quick_grow (nvectors * 2);
3438 for (unsigned int i = 0; i < nvectors; ++i)
3439 {
3440 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3441 ELTS' has mode IM. */
3442 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3443 for (unsigned int j = 0; j < partial_nelts; ++j)
3444 partial_elts.quick_push (elts[i * partial_nelts + j]);
3445 tree t = gimple_build_vector (seq, &partial_elts);
3446 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3447 TREE_TYPE (new_vector_type), t);
3448
3449 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3450 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3451 }
3452
3453 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3454 correct byte contents.
3455
3456 We need to repeat the following operation log2(nvectors) times:
3457
3458 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3459 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3460
3461 However, if each input repeats every N elements and the VF is
3462 a multiple of N * 2, the HI result is the same as the LO. */
3463 unsigned int in_start = 0;
3464 unsigned int out_start = nvectors;
3465 unsigned int hi_start = nvectors / 2;
3466 /* A bound on the number of outputs needed to produce NRESULTS results
3467 in the final iteration. */
3468 unsigned int noutputs_bound = nvectors * nresults;
3469 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3470 {
3471 noutputs_bound /= 2;
3472 unsigned int limit = MIN (noutputs_bound, nvectors);
3473 for (unsigned int i = 0; i < limit; ++i)
3474 {
3475 if ((i & 1) != 0
3476 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3477 2 * in_repeat))
3478 {
3479 pieces[out_start + i] = pieces[out_start + i - 1];
3480 continue;
3481 }
3482
3483 tree output = make_ssa_name (new_vector_type);
3484 tree input1 = pieces[in_start + (i / 2)];
3485 tree input2 = pieces[in_start + (i / 2) + hi_start];
3486 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3487 input1, input2,
3488 permutes[i & 1]);
3489 gimple_seq_add_stmt (seq, stmt);
3490 pieces[out_start + i] = output;
3491 }
3492 std::swap (in_start, out_start);
3493 }
3494
3495 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3496 results.reserve (nresults);
3497 for (unsigned int i = 0; i < nresults; ++i)
3498 if (i < nvectors)
3499 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3500 pieces[in_start + i]));
3501 else
3502 results.quick_push (results[i - nvectors]);
3503}
3504
e4af0bc4 3505
b8698a0f
L
3506/* For constant and loop invariant defs of SLP_NODE this function returns
3507 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
30c0d1e3
RB
3508 OP_NODE determines the node for the operand containing the scalar
3509 operands. */
ebfd146a
IR
3510
3511static void
30c0d1e3
RB
3512vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
3513 vec<tree> *vec_oprnds)
ebfd146a 3514{
30c0d1e3 3515 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7ed54790 3516 vec_info *vinfo = stmt_vinfo->vinfo;
018b2744 3517 unsigned HOST_WIDE_INT nunits;
ebfd146a 3518 tree vec_cst;
d2a12ae7 3519 unsigned j, number_of_places_left_in_vector;
ebfd146a 3520 tree vector_type;
9dc3f7de 3521 tree vop;
30c0d1e3 3522 int group_size = op_node->ops.length ();
ebfd146a 3523 unsigned int vec_num, i;
d2a12ae7 3524 unsigned number_of_copies = 1;
30c0d1e3 3525 bool constant_p;
b5aeb3bb 3526 tree neutral_op = NULL;
13396b6e 3527 gimple_seq ctor_seq = NULL;
018b2744 3528 auto_vec<tree, 16> permute_results;
b5aeb3bb 3529
30c0d1e3
RB
3530 /* ??? SLP analysis should compute the vector type for the
3531 constant / invariant and store it in the SLP node. */
3532 tree op = op_node->ops[0];
42fd8198 3533 /* Check if vector type is a boolean vector. */
30c0d1e3 3534 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2568d8a1 3535 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c73e7656 3536 && vect_mask_constant_operand_p (stmt_vinfo))
e8738f4e 3537 vector_type = truth_type_for (stmt_vectype);
42fd8198 3538 else
7ed54790 3539 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
afbe6325 3540
318bd8c6
RB
3541 /* ??? For lane-reducing ops we should also have the required number
3542 of vector stmts initialized rather than second-guessing here. */
3543 unsigned int number_of_vectors;
3544 if (is_gimple_assign (stmt_vinfo->stmt)
3545 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3546 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3547 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3548 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3549 else
3550 number_of_vectors
3551 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3552 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3553 vector_type);
30c0d1e3
RB
3554 vec_oprnds->create (number_of_vectors);
3555 auto_vec<tree> voprnds (number_of_vectors);
ebfd146a 3556
ebfd146a 3557 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3558 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3559
3560 For example, we have two scalar operands, s1 and s2 (e.g., group of
3561 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3562 of this type can be packed in a vector). The output vector will contain
3563 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3564 will be 2).
3565
b8698a0f 3566 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3567 containing the operands.
3568
3569 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3570 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3571 {s5, s6, s7, s8}. */
b8698a0f 3572
018b2744
RS
3573 /* When using duplicate_and_interleave, we just need one element for
3574 each scalar statement. */
3575 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3576 nunits = group_size;
3577
14a61437 3578 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3579
3580 number_of_places_left_in_vector = nunits;
62cf7335 3581 constant_p = true;
5ebaa477 3582 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3583 elts.quick_grow (nunits);
90dd6e3d 3584 bool place_after_defs = false;
ebfd146a
IR
3585 for (j = 0; j < number_of_copies; j++)
3586 {
30c0d1e3 3587 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
ebfd146a 3588 {
ebfd146a 3589 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3590 number_of_places_left_in_vector--;
90dd6e3d 3591 tree orig_op = op;
13396b6e 3592 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3593 {
793d9a16 3594 if (CONSTANT_CLASS_P (op))
13396b6e 3595 {
42fd8198
IE
3596 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3597 {
3598 /* Can't use VIEW_CONVERT_EXPR for booleans because
3599 of possibly different sizes of scalar value and
3600 vector element. */
3601 if (integer_zerop (op))
3602 op = build_int_cst (TREE_TYPE (vector_type), 0);
3603 else if (integer_onep (op))
158beb4a 3604 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3605 else
3606 gcc_unreachable ();
3607 }
3608 else
3609 op = fold_unary (VIEW_CONVERT_EXPR,
3610 TREE_TYPE (vector_type), op);
13396b6e
JJ
3611 gcc_assert (op && CONSTANT_CLASS_P (op));
3612 }
3613 else
3614 {
b731b390 3615 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3616 gimple *init_stmt;
262a363f
JJ
3617 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3618 {
158beb4a
JJ
3619 tree true_val
3620 = build_all_ones_cst (TREE_TYPE (vector_type));
3621 tree false_val
3622 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3623 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3624 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3625 op, true_val,
3626 false_val);
262a363f 3627 }
262a363f
JJ
3628 else
3629 {
3630 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3631 op);
3632 init_stmt
3633 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3634 op);
3635 }
13396b6e
JJ
3636 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3637 op = new_temp;
3638 }
50eeef09 3639 }
d2a12ae7 3640 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3641 if (!CONSTANT_CLASS_P (op))
3642 constant_p = false;
90dd6e3d
RB
3643 if (TREE_CODE (orig_op) == SSA_NAME
3644 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3645 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3646 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3647 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3648 place_after_defs = true;
ebfd146a
IR
3649
3650 if (number_of_places_left_in_vector == 0)
3651 {
018b2744
RS
3652 if (constant_p
3653 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3654 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3655 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3656 else
d2a12ae7 3657 {
30c0d1e3 3658 if (permute_results.is_empty ())
cdbe6e9b
RS
3659 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3660 elts, number_of_vectors,
018b2744
RS
3661 permute_results);
3662 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3663 }
90dd6e3d
RB
3664 tree init;
3665 gimple_stmt_iterator gsi;
3666 if (place_after_defs)
3667 {
95c68311
RS
3668 stmt_vec_info last_stmt_info
3669 = vect_find_last_scalar_stmt_in_slp (slp_node);
3670 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3671 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3672 &gsi);
90dd6e3d
RB
3673 }
3674 else
b9787581
RS
3675 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3676 NULL);
13396b6e
JJ
3677 if (ctor_seq != NULL)
3678 {
90dd6e3d 3679 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3680 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3681 ctor_seq = NULL;
3682 }
90dd6e3d
RB
3683 voprnds.quick_push (init);
3684 place_after_defs = false;
62cf7335
RB
3685 number_of_places_left_in_vector = nunits;
3686 constant_p = true;
5ebaa477
RS
3687 elts.new_vector (vector_type, nunits, 1);
3688 elts.quick_grow (nunits);
ebfd146a
IR
3689 }
3690 }
3691 }
3692
b8698a0f 3693 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3694 them. */
9771b263 3695 vec_num = voprnds.length ();
d2a12ae7 3696 for (j = vec_num; j != 0; j--)
ebfd146a 3697 {
9771b263
DN
3698 vop = voprnds[j - 1];
3699 vec_oprnds->quick_push (vop);
ebfd146a
IR
3700 }
3701
ebfd146a 3702 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3703 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3704 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3705 to replicate the vectors. */
9771b263 3706 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3707 {
b5aeb3bb
IR
3708 tree neutral_vec = NULL;
3709
3710 if (neutral_op)
3711 {
3712 if (!neutral_vec)
b9acc9f1 3713 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3714
9771b263 3715 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3716 }
3717 else
3718 {
9771b263
DN
3719 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3720 vec_oprnds->quick_push (vop);
b5aeb3bb 3721 }
ebfd146a
IR
3722 }
3723}
3724
3725
3726/* Get vectorized definitions from SLP_NODE that contains corresponding
3727 vectorized def-stmts. */
3728
3729static void
9771b263 3730vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a 3731{
16edaeb8 3732 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3733 unsigned int i;
3734
9771b263 3735 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3736
16edaeb8 3737 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
30c0d1e3 3738 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
ebfd146a
IR
3739}
3740
3741
30c0d1e3 3742/* Get N vectorized definitions for SLP_NODE.
b8698a0f 3743 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3744 call vect_get_constant_vectors() to create vector stmts.
3745 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3746 must be stored in the corresponding child of SLP_NODE, and we call
3747 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3748
ebfd146a 3749void
30c0d1e3 3750vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
ebfd146a 3751{
30c0d1e3
RB
3752 if (n == -1U)
3753 n = SLP_TREE_CHILDREN (slp_node).length ();
ebfd146a 3754
30c0d1e3 3755 for (unsigned i = 0; i < n; ++i)
ebfd146a 3756 {
30c0d1e3 3757 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
b5aeb3bb 3758
30c0d1e3 3759 vec<tree> vec_defs = vNULL;
ebfd146a 3760
30c0d1e3
RB
3761 /* For each operand we check if it has vectorized definitions in a child
3762 node or we need to create them (for invariants and constants). */
3763 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3764 {
3765 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3766 vect_get_slp_vect_defs (child, &vec_defs);
3767 }
d092494c 3768 else
30c0d1e3 3769 vect_get_constant_vectors (child, slp_node, &vec_defs);
ebfd146a 3770
37b5ec8f 3771 vec_oprnds->quick_push (vec_defs);
d092494c 3772 }
ebfd146a
IR
3773}
3774
ebfd146a
IR
3775/* Generate vector permute statements from a list of loads in DR_CHAIN.
3776 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3777 permute statements for the SLP node NODE of the SLP instance
3778 SLP_NODE_INSTANCE. */
3779
ebfd146a 3780bool
01d8bf07 3781vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3782 gimple_stmt_iterator *gsi, poly_uint64 vf,
3783 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3784 unsigned *n_perms)
ebfd146a 3785{
b9787581 3786 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3787 vec_info *vinfo = stmt_info->vinfo;
928686b1 3788 int vec_index = 0;
2635892a 3789 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ab7e60ce 3790 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3791 unsigned int mask_element;
ef4bddc2 3792 machine_mode mode;
ebfd146a 3793
91ff1504
RB
3794 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3795 return false;
3796
bffb8014 3797 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3798
22e4dee7 3799 mode = TYPE_MODE (vectype);
ab7e60ce 3800 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3801
61fdfd8c
RB
3802 /* Initialize the vect stmts of NODE to properly insert the generated
3803 stmts later. */
3804 if (! analyze_only)
3805 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3806 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3807 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3808
b8698a0f
L
3809 /* Generate permutation masks for every NODE. Number of masks for each NODE
3810 is equal to GROUP_SIZE.
3811 E.g., we have a group of three nodes with three loads from the same
3812 location in each node, and the vector size is 4. I.e., we have a
3813 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3814 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3815 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3816 ...
3817
2635892a 3818 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3819 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3820 operation, and the mask element values can't be outside that range.
3821 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3822 For the first two permutations we need the first and the second input
ebfd146a 3823 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3824 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3825 {c2,a3,b3,c3}. */
3826
2ce27200 3827 int vect_stmts_counter = 0;
928686b1 3828 unsigned int index = 0;
2ce27200
RB
3829 int first_vec_index = -1;
3830 int second_vec_index = -1;
be377c80 3831 bool noop_p = true;
29afecdf 3832 *n_perms = 0;
ebfd146a 3833
ab7e60ce
RS
3834 vec_perm_builder mask;
3835 unsigned int nelts_to_build;
3836 unsigned int nvectors_per_build;
3837 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3838 && multiple_p (nunits, group_size));
3839 if (repeating_p)
2ce27200 3840 {
ab7e60ce
RS
3841 /* A single vector contains a whole number of copies of the node, so:
3842 (a) all permutes can use the same mask; and
3843 (b) the permutes only need a single vector input. */
3844 mask.new_vector (nunits, group_size, 3);
3845 nelts_to_build = mask.encoded_nelts ();
3846 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3847 }
3848 else
3849 {
3850 /* We need to construct a separate mask for each vector statement. */
3851 unsigned HOST_WIDE_INT const_nunits, const_vf;
3852 if (!nunits.is_constant (&const_nunits)
3853 || !vf.is_constant (&const_vf))
3854 return false;
3855 mask.new_vector (const_nunits, const_nunits, 1);
3856 nelts_to_build = const_vf * group_size;
3857 nvectors_per_build = 1;
3858 }
3859
3860 unsigned int count = mask.encoded_nelts ();
3861 mask.quick_grow (count);
3862 vec_perm_indices indices;
3863
3864 for (unsigned int j = 0; j < nelts_to_build; j++)
3865 {
3866 unsigned int iter_num = j / group_size;
3867 unsigned int stmt_num = j % group_size;
3868 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3869 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3870 if (repeating_p)
2ce27200 3871 {
ab7e60ce
RS
3872 first_vec_index = 0;
3873 mask_element = i;
3874 }
3875 else
3876 {
3877 /* Enforced before the loop when !repeating_p. */
3878 unsigned int const_nunits = nunits.to_constant ();
3879 vec_index = i / const_nunits;
3880 mask_element = i % const_nunits;
2ce27200
RB
3881 if (vec_index == first_vec_index
3882 || first_vec_index == -1)
3883 {
3884 first_vec_index = vec_index;
3885 }
3886 else if (vec_index == second_vec_index
3887 || second_vec_index == -1)
3888 {
3889 second_vec_index = vec_index;
ab7e60ce 3890 mask_element += const_nunits;
2ce27200
RB
3891 }
3892 else
3893 {
3894 if (dump_enabled_p ())
3c2a8ed0
DM
3895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3896 "permutation requires at "
3897 "least three vectors %G",
3898 stmt_info->stmt);
31bee964 3899 gcc_assert (analyze_only);
2ce27200
RB
3900 return false;
3901 }
ebfd146a 3902
ab7e60ce
RS
3903 gcc_assert (mask_element < 2 * const_nunits);
3904 }
3905
3906 if (mask_element != index)
3907 noop_p = false;
3908 mask[index++] = mask_element;
2ce27200 3909
ab7e60ce
RS
3910 if (index == count && !noop_p)
3911 {
3912 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3913 if (!can_vec_perm_const_p (mode, indices))
2ce27200 3914 {
ab7e60ce 3915 if (dump_enabled_p ())
2ce27200 3916 {
ab7e60ce
RS
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3918 vect_location,
3919 "unsupported vect permute { ");
3920 for (i = 0; i < count; ++i)
22e4dee7 3921 {
ab7e60ce
RS
3922 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3923 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 3924 }
ab7e60ce 3925 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 3926 }
ab7e60ce
RS
3927 gcc_assert (analyze_only);
3928 return false;
e3342de4 3929 }
29afecdf 3930
ab7e60ce
RS
3931 ++*n_perms;
3932 }
3933
3934 if (index == count)
3935 {
3936 if (!analyze_only)
e3342de4 3937 {
ab7e60ce 3938 tree mask_vec = NULL_TREE;
be377c80 3939
ab7e60ce
RS
3940 if (! noop_p)
3941 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 3942
ab7e60ce
RS
3943 if (second_vec_index == -1)
3944 second_vec_index = first_vec_index;
61fdfd8c 3945
ab7e60ce
RS
3946 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3947 {
61fdfd8c 3948 /* Generate the permute statement if necessary. */
ab7e60ce
RS
3949 tree first_vec = dr_chain[first_vec_index + ri];
3950 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 3951 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3952 if (! noop_p)
3953 {
b9787581 3954 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3955 tree perm_dest
3956 = vect_create_destination_var (gimple_assign_lhs (stmt),
3957 vectype);
3958 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3959 gassign *perm_stmt
3960 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3961 first_vec, second_vec,
3962 mask_vec);
3963 perm_stmt_info
b9787581
RS
3964 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3965 gsi);
61fdfd8c
RB
3966 }
3967 else
3968 /* If mask was NULL_TREE generate the requested
3969 identity transform. */
16edaeb8 3970 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3971
3972 /* Store the vector statement in NODE. */
16edaeb8
RS
3973 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3974 = perm_stmt_info;
2ce27200 3975 }
2ce27200 3976 }
ab7e60ce
RS
3977
3978 index = 0;
3979 first_vec_index = -1;
3980 second_vec_index = -1;
3981 noop_p = true;
2ce27200 3982 }
b8698a0f 3983 }
ebfd146a 3984
ebfd146a
IR
3985 return true;
3986}
3987
ebfd146a
IR
3988/* Vectorize SLP instance tree in postorder. */
3989
8fe1bd30 3990static void
f7300fff
RB
3991vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3992 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3993{
ebfd146a
IR
3994 gimple_stmt_iterator si;
3995 stmt_vec_info stmt_info;
8b7e9dba 3996 unsigned int group_size;
ebfd146a 3997 tree vectype;
603cca93 3998 int i, j;
d755c7ef 3999 slp_tree child;
ebfd146a 4000
603cca93 4001 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 4002 return;
ebfd146a 4003
a1f072e2
RB
4004 /* See if we have already vectorized the node in the graph of the
4005 SLP instance. */
4006 if (SLP_TREE_VEC_STMTS (node).exists ())
4007 return;
4008
f7300fff 4009 /* See if we have already vectorized the same set of stmts and reuse their
a1f072e2 4010 vectorized stmts across instances. */
dd172744 4011 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 4012 {
dd172744 4013 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
8fe1bd30 4014 return;
f7300fff
RB
4015 }
4016
dd172744 4017 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 4018 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 4019 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 4020
603cca93
RB
4021 /* Push SLP node def-type to stmts. */
4022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4023 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4024 {
4025 stmt_vec_info child_stmt_info;
4026 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4027 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4028 }
603cca93 4029
b9787581 4030 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
4031
4032 /* VECTYPE is the type of the destination. */
b690cc0f 4033 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 4034 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
4035 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4036
68435eb2 4037 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
a1f072e2 4038 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 4039
73fbfcad 4040 if (dump_enabled_p ())
3c2a8ed0
DM
4041 dump_printf_loc (MSG_NOTE, vect_location,
4042 "------>vectorizing SLP node starting from: %G",
4043 stmt_info->stmt);
ebfd146a 4044
2e8ab70c
RB
4045 /* Vectorized stmts go before the last scalar stmt which is where
4046 all uses are ready. */
95c68311
RS
4047 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4048 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 4049
6876e5bc
RB
4050 /* Handle two-operation SLP nodes by vectorizing the group with
4051 both operations and then performing a merge. */
4052 if (SLP_TREE_TWO_OPERATORS (node))
4053 {
b9787581 4054 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 4055 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 4056 enum tree_code ocode = ERROR_MARK;
b9787581 4057 stmt_vec_info ostmt_info;
e3342de4 4058 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
4059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4060 {
4061 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4062 if (gimple_assign_rhs_code (ostmt) != code0)
4063 {
4064 mask.quick_push (1);
4065 ocode = gimple_assign_rhs_code (ostmt);
4066 }
4067 else
4068 mask.quick_push (0);
4069 }
567a3691 4070 if (ocode != ERROR_MARK)
6876e5bc 4071 {
16edaeb8
RS
4072 vec<stmt_vec_info> v0;
4073 vec<stmt_vec_info> v1;
6876e5bc
RB
4074 unsigned j;
4075 tree tmask = NULL_TREE;
b0b45e58 4076 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4077 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4078 SLP_TREE_VEC_STMTS (node).truncate (0);
4079 gimple_assign_set_rhs_code (stmt, ocode);
b0b45e58 4080 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4081 gimple_assign_set_rhs_code (stmt, code0);
4082 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4083 SLP_TREE_VEC_STMTS (node).truncate (0);
4084 tree meltype = build_nonstandard_integer_type
b397965c 4085 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
4086 tree mvectype = get_same_sized_vectype (meltype, vectype);
4087 unsigned k = 0, l;
4088 for (j = 0; j < v0.length (); ++j)
4089 {
dad55d70
RS
4090 /* Enforced by vect_build_slp_tree, which rejects variable-length
4091 vectors for SLP_TREE_TWO_OPERATORS. */
4092 unsigned int const_nunits = nunits.to_constant ();
4093 tree_vector_builder melts (mvectype, const_nunits, 1);
4094 for (l = 0; l < const_nunits; ++l)
6876e5bc 4095 {
1ece8d4c 4096 if (k >= group_size)
6876e5bc 4097 k = 0;
dad55d70
RS
4098 tree t = build_int_cst (meltype,
4099 mask[k++] * const_nunits + l);
794e3180 4100 melts.quick_push (t);
6876e5bc 4101 }
5ebaa477 4102 tmask = melts.build ();
6876e5bc
RB
4103
4104 /* ??? Not all targets support a VEC_PERM_EXPR with a
4105 constant mask that would translate to a vec_merge RTX
4106 (with their vec_perm_const_ok). We can either not
4107 vectorize in that case or let veclower do its job.
4108 Unfortunately that isn't too great and at least for
4109 plus/minus we'd eventually like to match targets
4110 vector addsub instructions. */
355fe088 4111 gimple *vstmt;
6876e5bc
RB
4112 vstmt = gimple_build_assign (make_ssa_name (vectype),
4113 VEC_PERM_EXPR,
16edaeb8
RS
4114 gimple_assign_lhs (v0[j]->stmt),
4115 gimple_assign_lhs (v1[j]->stmt),
4116 tmask);
4117 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 4118 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
4119 }
4120 v0.release ();
4121 v1.release ();
8fe1bd30 4122 return;
6876e5bc
RB
4123 }
4124 }
b0b45e58 4125 vect_transform_stmt (stmt_info, &si, node, instance);
603cca93
RB
4126
4127 /* Restore stmt def-types. */
4128 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4129 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4130 {
4131 stmt_vec_info child_stmt_info;
4132 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4133 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4134 }
ebfd146a
IR
4135}
4136
dd34c087
JJ
4137/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4138 For loop vectorization this is done in vectorizable_call, but for SLP
4139 it needs to be deferred until end of vect_schedule_slp, because multiple
4140 SLP instances may refer to the same scalar stmt. */
4141
4142static void
4bfcf879 4143vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
dd34c087 4144{
b9787581 4145 gimple *new_stmt;
dd34c087
JJ
4146 gimple_stmt_iterator gsi;
4147 int i;
d755c7ef 4148 slp_tree child;
dd34c087
JJ
4149 tree lhs;
4150 stmt_vec_info stmt_info;
4151
603cca93 4152 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4153 return;
4154
4bfcf879
RB
4155 if (visited.add (node))
4156 return;
4157
9771b263 4158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879 4159 vect_remove_slp_scalar_calls (child, visited);
dd34c087 4160
b9787581 4161 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4162 {
b9787581
RS
4163 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4164 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4165 continue;
b9787581 4166 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4167 || !PURE_SLP_STMT (stmt_info))
4168 continue;
4169 lhs = gimple_call_lhs (stmt);
4170 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 4171 gsi = gsi_for_stmt (stmt);
9d97912b 4172 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
4173 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4174 }
4175}
ebfd146a 4176
4bfcf879
RB
4177static void
4178vect_remove_slp_scalar_calls (slp_tree node)
4179{
4180 hash_set<slp_tree> visited;
4181 vect_remove_slp_scalar_calls (node, visited);
4182}
4183
818b3293
JH
4184/* Vectorize the instance root. */
4185
4186void
4187vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4188{
0ec77a6c 4189 gassign *rstmt = NULL;
818b3293
JH
4190
4191 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4192 {
4193 stmt_vec_info child_stmt_info;
4194 int j;
4195
4196 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4197 {
4198 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4199 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4200 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4201 break;
4202 }
4203 }
4204 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4205 {
4206 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4207 stmt_vec_info child_stmt_info;
4208 int j;
4209 vec<constructor_elt, va_gc> *v;
4210 vec_alloc (v, nelts);
4211
4212 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4213 {
4214 CONSTRUCTOR_APPEND_ELT (v,
4215 NULL_TREE,
4216 gimple_get_lhs (child_stmt_info->stmt));
4217 }
4218 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4219 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4220 tree r_constructor = build_constructor (rtype, v);
4221 rstmt = gimple_build_assign (lhs, r_constructor);
4222 }
0ec77a6c
TC
4223
4224 gcc_assert (rstmt);
4225
818b3293
JH
4226 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4227 gsi_replace (&rgsi, rstmt, true);
4228}
4229
ff802fa1
IR
4230/* Generate vector code for all SLP instances in the loop/basic block. */
4231
8fe1bd30 4232void
310213d4 4233vect_schedule_slp (vec_info *vinfo)
ebfd146a 4234{
9771b263 4235 vec<slp_instance> slp_instances;
ebfd146a 4236 slp_instance instance;
8b7e9dba 4237 unsigned int i;
78604de0
RB
4238
4239 scalar_stmts_to_slp_tree_map_t *bst_map
4240 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4241 slp_instances = vinfo->slp_instances;
9771b263 4242 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a 4243 {
818b3293 4244 slp_tree node = SLP_INSTANCE_TREE (instance);
ebfd146a 4245 /* Schedule the tree of INSTANCE. */
818b3293
JH
4246 vect_schedule_slp_instance (node, instance, bst_map);
4247
4248 if (SLP_INSTANCE_ROOT_STMT (instance))
4249 vectorize_slp_instance_root_stmt (node, instance);
4250
73fbfcad 4251 if (dump_enabled_p ())
78c60e3d 4252 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4253 "vectorizing stmts using SLP.\n");
ebfd146a 4254 }
78604de0 4255 delete bst_map;
ebfd146a 4256
9771b263 4257 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4258 {
4259 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4260 stmt_vec_info store_info;
b5aeb3bb 4261 unsigned int j;
b5aeb3bb 4262
c40eced0
RB
4263 /* Remove scalar call stmts. Do not do this for basic-block
4264 vectorization as not all uses may be vectorized.
4265 ??? Why should this be necessary? DCE should be able to
4266 remove the stmts itself.
4267 ??? For BB vectorization we can as well remove scalar
4268 stmts starting from the SLP tree root if they have no
4269 uses. */
310213d4 4270 if (is_a <loop_vec_info> (vinfo))
c40eced0 4271 vect_remove_slp_scalar_calls (root);
dd34c087 4272
b9787581 4273 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4274 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4275 {
b9787581
RS
4276 if (!STMT_VINFO_DATA_REF (store_info))
4277 break;
4278
818b3293
JH
4279 if (SLP_INSTANCE_ROOT_STMT (instance))
4280 continue;
4281
211cd1e2 4282 store_info = vect_orig_stmt (store_info);
b9787581 4283 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 4284 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
4285 }
4286 }
ebfd146a 4287}