]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Apply mechanical replacement (generated patch).
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
a5544970 2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
a1f072e2
RB
60 if (--node->refcnt != 0)
61 return;
62
9771b263 63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 64 vect_free_slp_tree (child, final_p);
b8698a0f 65
6e2dd807
RS
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
70 if (!final_p)
71 {
b9787581
RS
72 stmt_vec_info stmt_info;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 74 {
b9787581
RS
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
77 }
78 }
78810bd3 79
9771b263
DN
80 SLP_TREE_CHILDREN (node).release ();
81 SLP_TREE_SCALAR_STMTS (node).release ();
30c0d1e3 82 SLP_TREE_SCALAR_OPS (node).release ();
9771b263 83 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 84 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
85
86 free (node);
87}
88
6e2dd807
RS
89/* Free the memory allocated for the SLP instance. FINAL_P is true if we
90 have vectorized the instance or if we have made a final decision not
91 to vectorize the statements in any way. */
ebfd146a
IR
92
93void
6e2dd807 94vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 95{
6e2dd807 96 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 97 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 98 free (instance);
ebfd146a
IR
99}
100
101
d092494c
IR
102/* Create an SLP node for SCALAR_STMTS. */
103
104static slp_tree
b9787581 105vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 106{
d3cfd39e 107 slp_tree node;
b9787581 108 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
109 unsigned int nops;
110
b9787581 111 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 112 nops = gimple_call_num_args (stmt);
b9787581 113 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
114 {
115 nops = gimple_num_ops (stmt) - 1;
116 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
117 nops++;
118 }
b9787581 119 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 120 nops = 0;
d092494c
IR
121 else
122 return NULL;
123
d3cfd39e 124 node = XNEW (struct _slp_tree);
d092494c 125 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
30c0d1e3 126 SLP_TREE_SCALAR_OPS (node) = vNULL;
9771b263 127 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 128 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 129 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 130 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 131 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 132 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
a1f072e2 133 node->refcnt = 1;
f48e4da3 134 node->max_nunits = 1;
d092494c 135
78810bd3 136 unsigned i;
b9787581
RS
137 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
138 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 139
d092494c
IR
140 return node;
141}
142
30c0d1e3
RB
143/* Create an SLP node for OPS. */
144
145static slp_tree
146vect_create_new_slp_node (vec<tree> ops)
147{
148 slp_tree node;
149
150 node = XNEW (struct _slp_tree);
151 SLP_TREE_SCALAR_STMTS (node) = vNULL;
152 SLP_TREE_SCALAR_OPS (node) = ops;
153 SLP_TREE_VEC_STMTS (node).create (0);
154 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
155 SLP_TREE_CHILDREN (node) = vNULL;
156 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
157 SLP_TREE_TWO_OPERATORS (node) = false;
158 SLP_TREE_DEF_TYPE (node) = vect_external_def;
159 node->refcnt = 1;
160 node->max_nunits = 1;
161
162 return node;
163}
164
d092494c 165
ddf56386
RB
166/* This structure is used in creation of an SLP tree. Each instance
167 corresponds to the same operand in a group of scalar stmts in an SLP
168 node. */
169typedef struct _slp_oprnd_info
170{
171 /* Def-stmts for the operands. */
b9787581 172 vec<stmt_vec_info> def_stmts;
30c0d1e3
RB
173 /* Operands. */
174 vec<tree> ops;
ddf56386
RB
175 /* Information about the first statement, its vector def-type, type, the
176 operand itself in case it's constant, and an indication if it's a pattern
177 stmt. */
ddf56386 178 tree first_op_type;
34e82342 179 enum vect_def_type first_dt;
7098ab48 180 bool any_pattern;
ddf56386
RB
181} *slp_oprnd_info;
182
183
d092494c
IR
184/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 operand. */
9771b263 186static vec<slp_oprnd_info>
d092494c
IR
187vect_create_oprnd_info (int nops, int group_size)
188{
189 int i;
190 slp_oprnd_info oprnd_info;
9771b263 191 vec<slp_oprnd_info> oprnds_info;
d092494c 192
9771b263 193 oprnds_info.create (nops);
d092494c
IR
194 for (i = 0; i < nops; i++)
195 {
196 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 197 oprnd_info->def_stmts.create (group_size);
30c0d1e3 198 oprnd_info->ops.create (group_size);
d092494c 199 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 200 oprnd_info->first_op_type = NULL_TREE;
7098ab48 201 oprnd_info->any_pattern = false;
9771b263 202 oprnds_info.quick_push (oprnd_info);
d092494c
IR
203 }
204
205 return oprnds_info;
206}
207
208
d3cfd39e
JJ
209/* Free operands info. */
210
d092494c 211static void
9771b263 212vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
213{
214 int i;
215 slp_oprnd_info oprnd_info;
216
9771b263 217 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 218 {
9771b263 219 oprnd_info->def_stmts.release ();
30c0d1e3 220 oprnd_info->ops.release ();
d3cfd39e
JJ
221 XDELETE (oprnd_info);
222 }
d092494c 223
9771b263 224 oprnds_info.release ();
d092494c
IR
225}
226
227
32e8e429
RS
228/* Find the place of the data-ref in STMT_INFO in the interleaving chain
229 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
230 of the chain. */
d755c7ef 231
b210f45f 232int
32e8e429
RS
233vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
234 stmt_vec_info first_stmt_info)
d755c7ef 235{
a1824cfd 236 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
237 int result = 0;
238
a1824cfd 239 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
240 return -1;
241
242 do
243 {
a1824cfd 244 if (next_stmt_info == stmt_info)
d755c7ef 245 return result;
a1824cfd
RS
246 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
247 if (next_stmt_info)
248 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 249 }
a1824cfd 250 while (next_stmt_info);
d755c7ef
RB
251
252 return -1;
253}
254
018b2744
RS
255/* Check whether it is possible to load COUNT elements of type ELT_MODE
256 using the method implemented by duplicate_and_interleave. Return true
257 if so, returning the number of intermediate vectors in *NVECTORS_OUT
258 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
259 (if nonnull). */
260
f1739b48 261bool
ba7f76dd 262can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
43fdde57 263 machine_mode elt_mode,
f1739b48
RS
264 unsigned int *nvectors_out,
265 tree *vector_type_out,
266 tree *permutes)
018b2744
RS
267{
268 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
269 poly_int64 nelts;
270 unsigned int nvectors = 1;
271 for (;;)
272 {
273 scalar_int_mode int_mode;
274 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
ba7f76dd 275 if (multiple_p (vinfo->vector_size, elt_bytes, &nelts)
018b2744
RS
276 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
277 {
278 tree int_type = build_nonstandard_integer_type
279 (GET_MODE_BITSIZE (int_mode), 1);
280 tree vector_type = build_vector_type (int_type, nelts);
281 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
282 {
283 vec_perm_builder sel1 (nelts, 2, 3);
284 vec_perm_builder sel2 (nelts, 2, 3);
285 poly_int64 half_nelts = exact_div (nelts, 2);
286 for (unsigned int i = 0; i < 3; ++i)
287 {
288 sel1.quick_push (i);
289 sel1.quick_push (i + nelts);
290 sel2.quick_push (half_nelts + i);
291 sel2.quick_push (half_nelts + i + nelts);
292 }
293 vec_perm_indices indices1 (sel1, 2, nelts);
294 vec_perm_indices indices2 (sel2, 2, nelts);
295 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
296 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
297 {
298 if (nvectors_out)
299 *nvectors_out = nvectors;
300 if (vector_type_out)
301 *vector_type_out = vector_type;
302 if (permutes)
303 {
304 permutes[0] = vect_gen_perm_mask_checked (vector_type,
305 indices1);
306 permutes[1] = vect_gen_perm_mask_checked (vector_type,
307 indices2);
308 }
309 return true;
310 }
311 }
312 }
313 if (!multiple_p (elt_bytes, 2, &elt_bytes))
314 return false;
315 nvectors *= 2;
316 }
317}
d755c7ef 318
d092494c
IR
319/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
320 they are of a valid type and that they match the defs of the first stmt of
4cecd659 321 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
322 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
323 indicates swap is required for cond_expr stmts. Specifically, *SWAP
324 is 1 if STMT is cond and operands of comparison need to be swapped;
325 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
326 If there is any operand swap in this function, *SWAP is set to non-zero
327 value.
4cecd659
BC
328 If there was a fatal error return -1; if the error could be corrected by
329 swapping operands of father node of this one, return 1; if everything is
330 ok return 0. */
4cecd659
BC
331static int
332vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 333 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 334 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 335{
b9787581 336 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
337 tree oprnd;
338 unsigned int i, number_of_oprnds;
d092494c 339 enum vect_def_type dt = vect_uninitialized_def;
abf9bfbc 340 slp_oprnd_info oprnd_info;
b0b4483e 341 int first_op_idx = 1;
0246112a 342 unsigned int commutative_op = -1U;
b0b4483e 343 bool first_op_cond = false;
effb52da 344 bool first = stmt_num == 0;
b8698a0f 345
b9787581 346 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
347 {
348 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 349 first_op_idx = 3;
0246112a
RS
350 if (gimple_call_internal_p (stmt))
351 {
352 internal_fn ifn = gimple_call_internal_fn (stmt);
353 commutative_op = first_commutative_argument (ifn);
99763671
AM
354
355 /* Masked load, only look at mask. */
356 if (ifn == IFN_MASK_LOAD)
357 {
358 number_of_oprnds = 1;
359 /* Mask operand index. */
360 first_op_idx = 5;
361 }
0246112a 362 }
190c2236 363 }
b9787581 364 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 365 {
b0b4483e 366 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 367 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
368 /* Swap can only be done for cond_expr if asked to, otherwise we
369 could result in different comparison code to the first stmt. */
61021c35 370 if (code == COND_EXPR
a414c77f 371 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
372 {
373 first_op_cond = true;
b0b4483e
RB
374 number_of_oprnds++;
375 }
376 else
0246112a 377 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 378 }
d092494c 379 else
b0b4483e 380 return -1;
ebfd146a 381
4cecd659
BC
382 bool swapped = (*swap != 0);
383 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
384 for (i = 0; i < number_of_oprnds; i++)
385 {
b0b4483e
RB
386again:
387 if (first_op_cond)
f7e531cf 388 {
4cecd659
BC
389 /* Map indicating how operands of cond_expr should be swapped. */
390 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
391 int *map = maps[*swap];
392
393 if (i < 2)
b9787581
RS
394 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
395 first_op_idx), map[i]);
b0b4483e 396 else
b9787581 397 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
398 }
399 else
b9787581 400 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
61021c35
RB
401 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
402 oprnd = TREE_OPERAND (oprnd, 0);
f7e531cf 403
9771b263 404 oprnd_info = (*oprnds_info)[i];
ebfd146a 405
fef96d8e
RS
406 stmt_vec_info def_stmt_info;
407 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 408 {
73fbfcad 409 if (dump_enabled_p ())
3c2a8ed0
DM
410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
411 "Build SLP failed: can't analyze def for %T\n",
412 oprnd);
ebfd146a 413
b0b4483e 414 return -1;
ebfd146a
IR
415 }
416
7098ab48
RB
417 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
418 oprnd_info->any_pattern = true;
effb52da 419
d092494c 420 if (first)
ebfd146a 421 {
b4673569
RB
422 /* For the swapping logic below force vect_reduction_def
423 for the reduction op in a SLP reduction group. */
424 if (!STMT_VINFO_DATA_REF (stmt_info)
425 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
426 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
427 && def_stmt_info)
428 dt = vect_reduction_def;
d092494c 429 oprnd_info->first_dt = dt;
793d9a16 430 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 431 }
ebfd146a
IR
432 else
433 {
d092494c
IR
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
4352288a 438 end in the reduction chain. */
018b2744
RS
439 tree type = TREE_TYPE (oprnd);
440 if ((oprnd_info->first_dt != dt
441 && !(oprnd_info->first_dt == vect_reduction_def
4352288a
RB
442 && !STMT_VINFO_DATA_REF (stmt_info)
443 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
444 && def_stmt_info
445 && !STMT_VINFO_DATA_REF (def_stmt_info)
446 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
447 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
018b2744
RS
448 && !((oprnd_info->first_dt == vect_external_def
449 || oprnd_info->first_dt == vect_constant_def)
450 && (dt == vect_external_def
451 || dt == vect_constant_def)))
4352288a
RB
452 || !types_compatible_p (oprnd_info->first_op_type, type)
453 || (!STMT_VINFO_DATA_REF (stmt_info)
454 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
455 && ((!def_stmt_info
456 || STMT_VINFO_DATA_REF (def_stmt_info)
457 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
458 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
459 != (oprnd_info->first_dt != vect_reduction_def))))
ebfd146a 460 {
b0b4483e 461 /* Try swapping operands if we got a mismatch. */
0246112a 462 if (i == commutative_op && !swapped)
b0b4483e 463 {
4352288a
RB
464 if (dump_enabled_p ())
465 dump_printf_loc (MSG_NOTE, vect_location,
466 "trying swapped operands\n");
b0b4483e
RB
467 swapped = true;
468 goto again;
469 }
470
abf9bfbc
RB
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 473 "Build SLP failed: different types\n");
d092494c 474
b0b4483e 475 return 1;
ebfd146a 476 }
018b2744
RS
477 if ((dt == vect_constant_def
478 || dt == vect_external_def)
ba7f76dd 479 && !vinfo->vector_size.is_constant ()
018b2744 480 && (TREE_CODE (type) == BOOLEAN_TYPE
43fdde57 481 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
018b2744 482 TYPE_MODE (type))))
a23644f2
RS
483 {
484 if (dump_enabled_p ())
3c2a8ed0
DM
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
486 "Build SLP failed: invalid type of def "
487 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
488 return -1;
489 }
018b2744
RS
490 }
491
492 /* Check the types of the definitions. */
493 switch (dt)
494 {
018b2744 495 case vect_external_def:
6c7b0df8
RB
496 /* Make sure to demote the overall operand to external. */
497 oprnd_info->first_dt = vect_external_def;
498 /* Fallthru. */
499 case vect_constant_def:
30c0d1e3
RB
500 oprnd_info->def_stmts.quick_push (NULL);
501 oprnd_info->ops.quick_push (oprnd);
ebfd146a 502 break;
b8698a0f 503
4352288a 504 case vect_internal_def:
c78e3652 505 case vect_reduction_def:
4352288a
RB
506 if (oprnd_info->first_dt == vect_reduction_def
507 && !STMT_VINFO_DATA_REF (stmt_info)
508 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
509 && !STMT_VINFO_DATA_REF (def_stmt_info)
510 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
511 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
512 {
513 /* For a SLP reduction chain we want to duplicate the
514 reduction to each of the chain members. That gets
515 us a sane SLP graph (still the stmts are not 100%
516 correct wrt the initial values). */
517 gcc_assert (!first);
518 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
519 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
520 break;
521 }
522 /* Fallthru. */
e7baeb39 523 case vect_induction_def:
fef96d8e 524 oprnd_info->def_stmts.quick_push (def_stmt_info);
30c0d1e3 525 oprnd_info->ops.quick_push (oprnd);
ebfd146a
IR
526 break;
527
528 default:
529 /* FORNOW: Not supported. */
73fbfcad 530 if (dump_enabled_p ())
3c2a8ed0
DM
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
532 "Build SLP failed: illegal type of def %T\n",
533 oprnd);
ebfd146a 534
b0b4483e 535 return -1;
ebfd146a
IR
536 }
537 }
538
b0b4483e
RB
539 /* Swap operands. */
540 if (swapped)
541 {
542 if (first_op_cond)
543 {
77100812
RB
544 /* If there are already uses of this stmt in a SLP instance then
545 we've committed to the operand order and can't swap it. */
546 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
547 {
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
550 "Build SLP failed: cannot swap operands of "
551 "shared stmt %G", stmt_info->stmt);
552 return -1;
553 }
554
30c0d1e3
RB
555 /* To get rid of this swapping we have to move the stmt code
556 to the SLP tree as well (and gather it here per stmt). */
0246112a 557 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e 558 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
559 enum tree_code code = TREE_CODE (cond);
560
561 /* Swap. */
562 if (*swap == 1)
563 {
564 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
565 &TREE_OPERAND (cond, 1));
566 TREE_SET_CODE (cond, swap_tree_comparison (code));
567 }
568 /* Invert. */
569 else
570 {
571 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
572 gimple_assign_rhs3_ptr (stmt));
96d9d162
RB
573 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
574 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
575 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
576 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
4cecd659
BC
577 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
578 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
579 gcc_assert (code != ERROR_MARK);
580 TREE_SET_CODE (cond, code);
581 }
b0b4483e
RB
582 }
583 else
0246112a 584 {
30c0d1e3
RB
585 /* Commutative ops need not reflect swapping, ops are in
586 the SLP tree. */
0246112a 587 }
78810bd3 588 if (dump_enabled_p ())
3c2a8ed0
DM
589 dump_printf_loc (MSG_NOTE, vect_location,
590 "swapped operands to match def types in %G",
591 stmt_info->stmt);
b0b4483e
RB
592 }
593
4cecd659 594 *swap = swapped;
b0b4483e 595 return 0;
ebfd146a
IR
596}
597
5249ee4d
RS
598/* Return true if call statements CALL1 and CALL2 are similar enough
599 to be combined into the same SLP group. */
600
601static bool
602compatible_calls_p (gcall *call1, gcall *call2)
603{
604 unsigned int nargs = gimple_call_num_args (call1);
605 if (nargs != gimple_call_num_args (call2))
606 return false;
607
608 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
609 return false;
610
611 if (gimple_call_internal_p (call1))
612 {
613 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
614 TREE_TYPE (gimple_call_lhs (call2))))
615 return false;
616 for (unsigned int i = 0; i < nargs; ++i)
617 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
618 TREE_TYPE (gimple_call_arg (call2, i))))
619 return false;
620 }
621 else
622 {
623 if (!operand_equal_p (gimple_call_fn (call1),
624 gimple_call_fn (call2), 0))
625 return false;
626
627 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
628 return false;
629 }
630 return true;
631}
632
b161f2c9 633/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 634 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 635 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
636 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
637 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
638 vect_build_slp_tree. */
b161f2c9
RS
639
640static bool
d7609678 641vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
4b6068ea 642 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
643{
644 if (!vectype)
645 {
646 if (dump_enabled_p ())
3c2a8ed0
DM
647 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
648 "Build SLP failed: unsupported data-type in %G\n",
649 stmt_info->stmt);
b161f2c9
RS
650 /* Fatal mismatch. */
651 return false;
652 }
653
654 /* If populating the vector type requires unrolling then fail
655 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
656 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
657 unsigned HOST_WIDE_INT const_nunits;
d7609678 658 if (STMT_VINFO_BB_VINFO (stmt_info)
4b6068ea
RS
659 && (!nunits.is_constant (&const_nunits)
660 || const_nunits > group_size))
b161f2c9 661 {
bbeeac91
DM
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
664 "Build SLP failed: unrolling required "
665 "in basic block SLP\n");
b161f2c9
RS
666 /* Fatal mismatch. */
667 return false;
668 }
669
670 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 671 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
672 return true;
673}
ebfd146a 674
1f3cb663
RS
675/* STMTS is a group of GROUP_SIZE SLP statements in which some
676 statements do the same operation as the first statement and in which
677 the others do ALT_STMT_CODE. Return true if we can take one vector
678 of the first operation and one vector of the second and permute them
679 to get the required result. VECTYPE is the type of the vector that
680 would be permuted. */
681
682static bool
b9787581
RS
683vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
684 unsigned int group_size, tree vectype,
685 tree_code alt_stmt_code)
1f3cb663
RS
686{
687 unsigned HOST_WIDE_INT count;
688 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
689 return false;
690
691 vec_perm_builder sel (count, count, 1);
692 for (unsigned int i = 0; i < count; ++i)
693 {
694 unsigned int elt = i;
b9787581
RS
695 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
696 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
697 elt += count;
698 sel.quick_push (elt);
699 }
700 vec_perm_indices indices (sel, 2, count);
701 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
702}
703
6983e6b5
RB
704/* Verify if the scalar stmts STMTS are isomorphic, require data
705 permutation or are of unsupported types of operation. Return
706 true if they are, otherwise return false and indicate in *MATCHES
707 which stmts are not isomorphic to the first one. If MATCHES[0]
708 is false then this indicates the comparison could not be
4cecd659
BC
709 carried out or the stmts will never be vectorized by SLP.
710
99763671 711 Note COND_EXPR is possibly isomorphic to another one after swapping its
4cecd659
BC
712 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
713 the first stmt by swapping the two operands of comparison; set SWAP[i]
714 to 2 if stmt I is isormorphic to the first stmt by inverting the code
715 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
716 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
717
718static bool
d7609678 719vect_build_slp_tree_1 (unsigned char *swap,
b9787581 720 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
721 poly_uint64 *max_nunits, bool *matches,
722 bool *two_operators)
ebfd146a 723{
ebfd146a 724 unsigned int i;
b9787581 725 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
726 enum tree_code first_stmt_code = ERROR_MARK;
727 enum tree_code alt_stmt_code = ERROR_MARK;
728 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 729 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 730 tree lhs;
6983e6b5 731 bool need_same_oprnds = false;
1f3cb663 732 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
733 optab optab;
734 int icode;
ef4bddc2
RS
735 machine_mode optab_op2_mode;
736 machine_mode vec_mode;
bffb8014 737 stmt_vec_info first_load = NULL, prev_first_load = NULL;
bcde3345 738 bool load_p = false;
d092494c 739
ebfd146a 740 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
741 stmt_vec_info stmt_info;
742 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 743 {
b9787581 744 gimple *stmt = stmt_info->stmt;
4cecd659 745 swap[i] = 0;
6983e6b5
RB
746 matches[i] = false;
747
73fbfcad 748 if (dump_enabled_p ())
3c2a8ed0 749 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 750
4b5caab7 751 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 752 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 753 {
73fbfcad 754 if (dump_enabled_p ())
3c2a8ed0
DM
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
756 "Build SLP failed: unvectorizable statement %G",
757 stmt);
6983e6b5
RB
758 /* Fatal mismatch. */
759 matches[0] = false;
4b5caab7
IR
760 return false;
761 }
762
ebfd146a
IR
763 lhs = gimple_get_lhs (stmt);
764 if (lhs == NULL_TREE)
765 {
73fbfcad 766 if (dump_enabled_p ())
3c2a8ed0
DM
767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
768 "Build SLP failed: not GIMPLE_ASSIGN nor "
769 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
770 /* Fatal mismatch. */
771 matches[0] = false;
ebfd146a
IR
772 return false;
773 }
774
1f3cb663
RS
775 tree nunits_vectype;
776 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
777 &nunits_vectype)
778 || (nunits_vectype
d7609678 779 && !vect_record_max_nunits (stmt_info, group_size,
1f3cb663 780 nunits_vectype, max_nunits)))
b161f2c9 781 {
6983e6b5
RB
782 /* Fatal mismatch. */
783 matches[0] = false;
1f3cb663
RS
784 return false;
785 }
786
787 gcc_assert (vectype);
b8698a0f 788
538dd0b7 789 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
790 {
791 rhs_code = CALL_EXPR;
bcde3345
AM
792
793 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
794 load_p = true;
795 else if ((gimple_call_internal_p (call_stmt)
796 && (!vectorizable_internal_fn_p
797 (gimple_call_internal_fn (call_stmt))))
798 || gimple_call_tail_p (call_stmt)
799 || gimple_call_noreturn_p (call_stmt)
800 || !gimple_call_nothrow_p (call_stmt)
801 || gimple_call_chain (call_stmt))
190c2236 802 {
73fbfcad 803 if (dump_enabled_p ())
3c2a8ed0
DM
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
805 "Build SLP failed: unsupported call type %G",
806 call_stmt);
6983e6b5
RB
807 /* Fatal mismatch. */
808 matches[0] = false;
190c2236
JJ
809 return false;
810 }
811 }
ebfd146a 812 else
bcde3345
AM
813 {
814 rhs_code = gimple_assign_rhs_code (stmt);
815 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
816 }
ebfd146a
IR
817
818 /* Check the operation. */
819 if (i == 0)
820 {
821 first_stmt_code = rhs_code;
822
b8698a0f 823 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
824 vector shift with scalar shift operand. */
825 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
826 || rhs_code == LROTATE_EXPR
827 || rhs_code == RROTATE_EXPR)
828 {
1f3cb663
RS
829 if (vectype == boolean_type_node)
830 {
831 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: shift of a"
834 " boolean.\n");
835 /* Fatal mismatch. */
836 matches[0] = false;
837 return false;
838 }
839
ebfd146a
IR
840 vec_mode = TYPE_MODE (vectype);
841
842 /* First see if we have a vector/vector shift. */
843 optab = optab_for_tree_code (rhs_code, vectype,
844 optab_vector);
845
846 if (!optab
947131ba 847 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
848 {
849 /* No vector/vector shift, try for a vector/scalar shift. */
850 optab = optab_for_tree_code (rhs_code, vectype,
851 optab_scalar);
852
853 if (!optab)
854 {
73fbfcad 855 if (dump_enabled_p ())
78c60e3d 856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 857 "Build SLP failed: no optab.\n");
6983e6b5
RB
858 /* Fatal mismatch. */
859 matches[0] = false;
ebfd146a
IR
860 return false;
861 }
947131ba 862 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
863 if (icode == CODE_FOR_nothing)
864 {
73fbfcad 865 if (dump_enabled_p ())
78c60e3d
SS
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
867 "Build SLP failed: "
e645e942 868 "op not supported by target.\n");
6983e6b5
RB
869 /* Fatal mismatch. */
870 matches[0] = false;
ebfd146a
IR
871 return false;
872 }
873 optab_op2_mode = insn_data[icode].operand[2].mode;
874 if (!VECTOR_MODE_P (optab_op2_mode))
875 {
876 need_same_oprnds = true;
877 first_op1 = gimple_assign_rhs2 (stmt);
878 }
879 }
880 }
36ba4aae
IR
881 else if (rhs_code == WIDEN_LSHIFT_EXPR)
882 {
883 need_same_oprnds = true;
884 first_op1 = gimple_assign_rhs2 (stmt);
885 }
ebfd146a
IR
886 }
887 else
888 {
6876e5bc
RB
889 if (first_stmt_code != rhs_code
890 && alt_stmt_code == ERROR_MARK)
891 alt_stmt_code = rhs_code;
ebfd146a
IR
892 if (first_stmt_code != rhs_code
893 && (first_stmt_code != IMAGPART_EXPR
894 || rhs_code != REALPART_EXPR)
895 && (first_stmt_code != REALPART_EXPR
69f11a13 896 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
897 /* Handle mismatches in plus/minus by computing both
898 and merging the results. */
899 && !((first_stmt_code == PLUS_EXPR
900 || first_stmt_code == MINUS_EXPR)
901 && (alt_stmt_code == PLUS_EXPR
902 || alt_stmt_code == MINUS_EXPR)
903 && rhs_code == alt_stmt_code)
b9787581 904 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 905 && (first_stmt_code == ARRAY_REF
38000232 906 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
907 || first_stmt_code == INDIRECT_REF
908 || first_stmt_code == COMPONENT_REF
909 || first_stmt_code == MEM_REF)))
ebfd146a 910 {
73fbfcad 911 if (dump_enabled_p ())
ebfd146a 912 {
78c60e3d
SS
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
914 "Build SLP failed: different operation "
3c2a8ed0 915 "in stmt %G", stmt);
6876e5bc 916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 917 "original stmt %G", first_stmt_info->stmt);
ebfd146a 918 }
6983e6b5
RB
919 /* Mismatch. */
920 continue;
ebfd146a 921 }
b8698a0f
L
922
923 if (need_same_oprnds
ebfd146a
IR
924 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
925 {
73fbfcad 926 if (dump_enabled_p ())
3c2a8ed0
DM
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "Build SLP failed: different shift "
929 "arguments in %G", stmt);
6983e6b5
RB
930 /* Mismatch. */
931 continue;
ebfd146a 932 }
190c2236 933
d6350f82 934 if (!load_p && rhs_code == CALL_EXPR)
190c2236 935 {
b9787581 936 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 937 as_a <gcall *> (stmt)))
190c2236 938 {
73fbfcad 939 if (dump_enabled_p ())
3c2a8ed0
DM
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
941 "Build SLP failed: different calls in %G",
942 stmt);
6983e6b5
RB
943 /* Mismatch. */
944 continue;
190c2236
JJ
945 }
946 }
ebfd146a
IR
947 }
948
0d0293ac 949 /* Grouped store or load. */
b9787581 950 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
951 {
952 if (REFERENCE_CLASS_P (lhs))
953 {
954 /* Store. */
6983e6b5 955 ;
ebfd146a 956 }
b5aeb3bb
IR
957 else
958 {
959 /* Load. */
b9787581 960 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
961 if (prev_first_load)
962 {
963 /* Check that there are no loads from different interleaving
6983e6b5
RB
964 chains in the same node. */
965 if (prev_first_load != first_load)
78c60e3d 966 {
73fbfcad 967 if (dump_enabled_p ())
3c2a8ed0
DM
968 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
969 vect_location,
970 "Build SLP failed: different "
971 "interleaving chains in one node %G",
972 stmt);
6983e6b5
RB
973 /* Mismatch. */
974 continue;
b5aeb3bb
IR
975 }
976 }
977 else
978 prev_first_load = first_load;
ebfd146a 979 }
0d0293ac 980 } /* Grouped access. */
ebfd146a
IR
981 else
982 {
bcde3345 983 if (load_p)
ebfd146a 984 {
0d0293ac 985 /* Not grouped load. */
73fbfcad 986 if (dump_enabled_p ())
3c2a8ed0
DM
987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
988 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 989
0d0293ac 990 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
991 /* Fatal mismatch. */
992 matches[0] = false;
ebfd146a
IR
993 return false;
994 }
995
996 /* Not memory operation. */
997 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 998 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 999 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 1000 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
61021c35 1001 && rhs_code != VIEW_CONVERT_EXPR
190c2236 1002 && rhs_code != CALL_EXPR)
ebfd146a 1003 {
73fbfcad 1004 if (dump_enabled_p ())
3c2a8ed0
DM
1005 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1006 "Build SLP failed: operation unsupported %G",
1007 stmt);
6983e6b5
RB
1008 /* Fatal mismatch. */
1009 matches[0] = false;
ebfd146a
IR
1010 return false;
1011 }
1012
4cecd659
BC
1013 if (rhs_code == COND_EXPR)
1014 {
1015 tree cond_expr = gimple_assign_rhs1 (stmt);
1016 enum tree_code cond_code = TREE_CODE (cond_expr);
1017 enum tree_code swap_code = ERROR_MARK;
1018 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1019
1020 if (i == 0)
1021 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1022 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1023 {
1024 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1025 swap_code = swap_tree_comparison (cond_code);
1026 invert_code = invert_tree_comparison (cond_code, honor_nans);
1027 }
1028
1029 if (first_cond_code == cond_code)
1030 ;
1031 /* Isomorphic can be achieved by swapping. */
1032 else if (first_cond_code == swap_code)
1033 swap[i] = 1;
1034 /* Isomorphic can be achieved by inverting. */
1035 else if (first_cond_code == invert_code)
1036 swap[i] = 2;
1037 else
1038 {
1039 if (dump_enabled_p ())
3c2a8ed0
DM
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1041 "Build SLP failed: different"
1042 " operation %G", stmt);
6983e6b5
RB
1043 /* Mismatch. */
1044 continue;
f7e531cf 1045 }
4cecd659 1046 }
ebfd146a 1047 }
6983e6b5
RB
1048
1049 matches[i] = true;
1050 }
1051
1052 for (i = 0; i < group_size; ++i)
1053 if (!matches[i])
1054 return false;
1055
6876e5bc
RB
1056 /* If we allowed a two-operation SLP node verify the target can cope
1057 with the permute we are going to use. */
1058 if (alt_stmt_code != ERROR_MARK
1059 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1060 {
1f3cb663
RS
1061 if (vectype == boolean_type_node
1062 || !vect_two_operations_perm_ok_p (stmts, group_size,
1063 vectype, alt_stmt_code))
6876e5bc
RB
1064 {
1065 for (i = 0; i < group_size; ++i)
b9787581 1066 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1067 {
1068 matches[i] = false;
1069 if (dump_enabled_p ())
1070 {
1071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1072 "Build SLP failed: different operation "
3c2a8ed0 1073 "in stmt %G", stmts[i]->stmt);
6876e5bc 1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 1075 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
1076 }
1077 }
1078 return false;
1079 }
1080 *two_operators = true;
1081 }
1082
6983e6b5
RB
1083 return true;
1084}
1085
26d66f28
RB
1086/* Traits for the hash_set to record failed SLP builds for a stmt set.
1087 Note we never remove apart from at destruction time so we do not
1088 need a special value for deleted that differs from empty. */
1089struct bst_traits
1090{
b9787581
RS
1091 typedef vec <stmt_vec_info> value_type;
1092 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1093 static inline hashval_t hash (value_type);
1094 static inline bool equal (value_type existing, value_type candidate);
1095 static inline bool is_empty (value_type x) { return !x.exists (); }
1096 static inline bool is_deleted (value_type x) { return !x.exists (); }
1097 static inline void mark_empty (value_type &x) { x.release (); }
1098 static inline void mark_deleted (value_type &x) { x.release (); }
1099 static inline void remove (value_type &x) { x.release (); }
1100};
1101inline hashval_t
1102bst_traits::hash (value_type x)
1103{
1104 inchash::hash h;
1105 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1106 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1107 return h.end ();
1108}
1109inline bool
1110bst_traits::equal (value_type existing, value_type candidate)
1111{
1112 if (existing.length () != candidate.length ())
1113 return false;
1114 for (unsigned i = 0; i < existing.length (); ++i)
1115 if (existing[i] != candidate[i])
1116 return false;
1117 return true;
1118}
1119
68435eb2
RB
1120typedef hash_map <vec <gimple *>, slp_tree,
1121 simple_hashmap_traits <bst_traits, slp_tree> >
1122 scalar_stmts_to_slp_tree_map_t;
1123
26d66f28
RB
1124static slp_tree
1125vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1126 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1127 poly_uint64 *max_nunits,
26d66f28 1128 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1129 scalar_stmts_to_slp_tree_map_t *bst_map);
6983e6b5 1130
e403d17e 1131static slp_tree
310213d4 1132vect_build_slp_tree (vec_info *vinfo,
b9787581 1133 vec<stmt_vec_info> stmts, unsigned int group_size,
5d8c32cb 1134 poly_uint64 *max_nunits,
1428105c 1135 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1136 scalar_stmts_to_slp_tree_map_t *bst_map)
26d66f28 1137{
a1f072e2 1138 if (slp_tree *leader = bst_map->get (stmts))
26d66f28 1139 {
a1f072e2
RB
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1142 *leader ? "" : "failed ", *leader);
1143 if (*leader)
f48e4da3
RB
1144 {
1145 (*leader)->refcnt++;
1146 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1147 }
a1f072e2 1148 return *leader;
26d66f28 1149 }
f48e4da3 1150 poly_uint64 this_max_nunits = 1;
a1f072e2 1151 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
9f708a84 1152 matches, npermutes, tree_size, bst_map);
a1f072e2 1153 if (res)
f48e4da3
RB
1154 {
1155 res->max_nunits = this_max_nunits;
1156 vect_update_max_nunits (max_nunits, this_max_nunits);
1157 /* Keep a reference for the bst_map use. */
1158 res->refcnt++;
1159 }
a1f072e2 1160 bst_map->put (stmts.copy (), res);
26d66f28
RB
1161 return res;
1162}
1163
1164/* Recursively build an SLP tree starting from NODE.
1165 Fail (and return a value not equal to zero) if def-stmts are not
1166 isomorphic, require data permutation or are of unsupported types of
1167 operation. Otherwise, return 0.
1168 The value returned is the depth in the SLP tree where a mismatch
1169 was found. */
1170
1171static slp_tree
1172vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1173 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1174 poly_uint64 *max_nunits,
26d66f28 1175 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2 1176 scalar_stmts_to_slp_tree_map_t *bst_map)
6983e6b5 1177{
4b6068ea
RS
1178 unsigned nops, i, this_tree_size = 0;
1179 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1180 slp_tree node;
6983e6b5 1181
6983e6b5
RB
1182 matches[0] = false;
1183
b9787581
RS
1184 stmt_vec_info stmt_info = stmts[0];
1185 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1186 nops = gimple_call_num_args (stmt);
b9787581 1187 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1188 {
1189 nops = gimple_num_ops (stmt) - 1;
1190 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1191 nops++;
ebfd146a 1192 }
b9787581 1193 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1194 nops = 0;
6983e6b5 1195 else
e403d17e 1196 return NULL;
6983e6b5 1197
c78e3652
RB
1198 /* If the SLP node is a PHI (induction or reduction), terminate
1199 the recursion. */
b9787581 1200 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1201 {
b161f2c9 1202 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
7ed54790 1203 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
d7609678 1204 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
b161f2c9
RS
1205 return NULL;
1206
b9787581 1207 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1208 /* Induction from different IVs is not supported. */
719488f8
RB
1209 if (def_type == vect_induction_def)
1210 {
b9787581
RS
1211 stmt_vec_info other_info;
1212 FOR_EACH_VEC_ELT (stmts, i, other_info)
1213 if (stmt_info != other_info)
719488f8
RB
1214 return NULL;
1215 }
22e4f1fb
RB
1216 else if (def_type == vect_reduction_def
1217 || def_type == vect_double_reduction_def
1218 || def_type == vect_nested_cycle)
719488f8
RB
1219 {
1220 /* Else def types have to match. */
b9787581
RS
1221 stmt_vec_info other_info;
1222 FOR_EACH_VEC_ELT (stmts, i, other_info)
4352288a
RB
1223 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1224 return NULL;
719488f8 1225 }
22e4f1fb
RB
1226 else
1227 return NULL;
9f708a84 1228 (*tree_size)++;
e7baeb39
RB
1229 node = vect_create_new_slp_node (stmts);
1230 return node;
1231 }
1232
1233
6876e5bc 1234 bool two_operators = false;
4cecd659 1235 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
d7609678 1236 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
e403d17e
RB
1237 &this_max_nunits, matches, &two_operators))
1238 return NULL;
ebfd146a 1239
99763671 1240 /* If the SLP node is a load, terminate the recursion unless masked. */
b9787581
RS
1241 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1242 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1243 {
99763671
AM
1244 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1245 {
1246 /* Masked load. */
1247 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1248 nops = 1;
1249 }
1250 else
1251 {
1252 *max_nunits = this_max_nunits;
1253 (*tree_size)++;
1254 node = vect_create_new_slp_node (stmts);
1255 return node;
1256 }
ebfd146a
IR
1257 }
1258
6983e6b5
RB
1259 /* Get at the operands, verifying they are compatible. */
1260 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1261 slp_oprnd_info oprnd_info;
b9787581 1262 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1263 {
4cecd659 1264 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1265 stmts, i, &oprnds_info);
4cecd659
BC
1266 if (res != 0)
1267 matches[(res == -1) ? 0 : i] = false;
1268 if (!matches[0])
1269 break;
6983e6b5 1270 }
b0b4483e
RB
1271 for (i = 0; i < group_size; ++i)
1272 if (!matches[i])
1273 {
1274 vect_free_oprnd_info (oprnds_info);
e403d17e 1275 return NULL;
b0b4483e 1276 }
6983e6b5 1277
e403d17e 1278 auto_vec<slp_tree, 4> children;
e403d17e 1279
b9787581 1280 stmt_info = stmts[0];
6983e6b5 1281
b8698a0f 1282 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1283 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1284 {
d092494c 1285 slp_tree child;
e403d17e
RB
1286 unsigned old_tree_size = this_tree_size;
1287 unsigned int j;
b8698a0f 1288
30c0d1e3
RB
1289 if (oprnd_info->first_dt == vect_uninitialized_def)
1290 {
1291 /* COND_EXPR have one too many eventually if the condition
1292 is a SSA name. */
1293 gcc_assert (i == 3 && nops == 4);
1294 continue;
1295 }
1296
e7baeb39 1297 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1298 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1299 && oprnd_info->first_dt != vect_induction_def)
30c0d1e3
RB
1300 {
1301 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1302 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1303 oprnd_info->ops = vNULL;
1304 children.safe_push (invnode);
1305 continue;
1306 }
ebfd146a 1307
e403d17e
RB
1308 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1309 group_size, &this_max_nunits,
5d8c32cb 1310 matches, npermutes,
9f708a84 1311 &this_tree_size, bst_map)) != NULL)
6983e6b5 1312 {
3fc356dc
RB
1313 /* If we have all children of child built up from scalars then just
1314 throw that away and build it up this node from scalars. */
30c0d1e3
RB
1315 if (is_a <bb_vec_info> (vinfo)
1316 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1317 /* ??? Rejecting patterns this way doesn't work. We'd have to
1318 do extra work to cancel the pattern so the uses see the
1319 scalar version. */
7098ab48 1320 && !oprnd_info->any_pattern)
3fc356dc 1321 {
3fc356dc
RB
1322 slp_tree grandchild;
1323
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1325 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
3fc356dc
RB
1326 break;
1327 if (!grandchild)
1328 {
1329 /* Roll back. */
e403d17e 1330 this_tree_size = old_tree_size;
3fc356dc 1331 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1332 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1333 SLP_TREE_CHILDREN (child).truncate (0);
1334
bbeeac91
DM
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE, vect_location,
1337 "Building parent vector operands from "
1338 "scalars instead\n");
3fc356dc 1339 oprnd_info->def_stmts = vNULL;
603cca93 1340 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1341 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1342 oprnd_info->ops = vNULL;
9f708a84 1343 ++this_tree_size;
e403d17e 1344 children.safe_push (child);
3fc356dc
RB
1345 continue;
1346 }
1347 }
1348
6983e6b5 1349 oprnd_info->def_stmts = vNULL;
e403d17e 1350 children.safe_push (child);
6983e6b5
RB
1351 continue;
1352 }
1353
90dd6e3d
RB
1354 /* If the SLP build failed fatally and we analyze a basic-block
1355 simply treat nodes we fail to build as externally defined
1356 (and thus build vectors from the scalar defs).
1357 The cost model will reject outright expensive cases.
1358 ??? This doesn't treat cases where permutation ultimatively
1359 fails (or we don't try permutation below). Ideally we'd
1360 even compute a permutation that will end up with the maximum
1361 SLP tree size... */
310213d4 1362 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1363 && !matches[0]
1364 /* ??? Rejecting patterns this way doesn't work. We'd have to
1365 do extra work to cancel the pattern so the uses see the
1366 scalar version. */
7098ab48
RB
1367 && !is_pattern_stmt_p (stmt_info)
1368 && !oprnd_info->any_pattern)
90dd6e3d 1369 {
bbeeac91
DM
1370 if (dump_enabled_p ())
1371 dump_printf_loc (MSG_NOTE, vect_location,
1372 "Building vector operands from scalars\n");
9f708a84 1373 this_tree_size++;
e403d17e 1374 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1375 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3 1376 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
e403d17e 1377 children.safe_push (child);
30c0d1e3 1378 oprnd_info->ops = vNULL;
e403d17e 1379 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1380 continue;
1381 }
1382
6983e6b5
RB
1383 /* If the SLP build for operand zero failed and operand zero
1384 and one can be commutated try that for the scalar stmts
1385 that failed the match. */
1386 if (i == 0
1387 /* A first scalar stmt mismatch signals a fatal mismatch. */
1388 && matches[0]
1389 /* ??? For COND_EXPRs we can swap the comparison operands
1390 as well as the arms under some constraints. */
1391 && nops == 2
1392 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1393 && is_gimple_assign (stmt_info->stmt)
2153fa7b
RB
1394 /* Swapping operands for reductions breaks assumptions later on. */
1395 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1396 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
6983e6b5
RB
1397 /* Do so only if the number of not successful permutes was nor more
1398 than a cut-ff as re-trying the recursive match on
1399 possibly each level of the tree would expose exponential
1400 behavior. */
1401 && *npermutes < 4)
1402 {
85c5e2f5
RB
1403 /* See whether we can swap the matching or the non-matching
1404 stmt operands. */
1405 bool swap_not_matching = true;
1406 do
1407 {
1408 for (j = 0; j < group_size; ++j)
1409 {
1410 if (matches[j] != !swap_not_matching)
1411 continue;
b9787581 1412 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1413 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1414 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1415 if (!stmt
85c5e2f5
RB
1416 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1417 {
1418 if (!swap_not_matching)
1419 goto fail;
1420 swap_not_matching = false;
1421 break;
1422 }
85c5e2f5
RB
1423 }
1424 }
1425 while (j != group_size);
78810bd3 1426
6983e6b5 1427 /* Swap mismatched definition stmts. */
bbeeac91
DM
1428 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE, vect_location,
1430 "Re-trying with swapped operands of stmts ");
e72baed7 1431 for (j = 0; j < group_size; ++j)
85c5e2f5 1432 if (matches[j] == !swap_not_matching)
6983e6b5 1433 {
6b4db501
MM
1434 std::swap (oprnds_info[0]->def_stmts[j],
1435 oprnds_info[1]->def_stmts[j]);
30c0d1e3
RB
1436 std::swap (oprnds_info[0]->ops[j],
1437 oprnds_info[1]->ops[j]);
bbeeac91
DM
1438 if (dump_enabled_p ())
1439 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1440 }
bbeeac91
DM
1441 if (dump_enabled_p ())
1442 dump_printf (MSG_NOTE, "\n");
74574669
RB
1443 /* And try again with scratch 'matches' ... */
1444 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1445 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1446 group_size, &this_max_nunits,
5d8c32cb 1447 tem, npermutes,
9f708a84 1448 &this_tree_size, bst_map)) != NULL)
6983e6b5 1449 {
85c69b0b
RB
1450 /* If we have all children of child built up from scalars then
1451 just throw that away and build it up this node from scalars. */
30c0d1e3
RB
1452 if (is_a <bb_vec_info> (vinfo)
1453 && !SLP_TREE_CHILDREN (child).is_empty ()
995b6fe0
RB
1454 /* ??? Rejecting patterns this way doesn't work. We'd have
1455 to do extra work to cancel the pattern so the uses see the
1456 scalar version. */
7098ab48 1457 && !oprnd_info->any_pattern)
85c69b0b
RB
1458 {
1459 unsigned int j;
1460 slp_tree grandchild;
1461
1462 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
f99d6262 1463 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
85c69b0b
RB
1464 break;
1465 if (!grandchild)
1466 {
1467 /* Roll back. */
e403d17e 1468 this_tree_size = old_tree_size;
85c69b0b 1469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1470 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1471 SLP_TREE_CHILDREN (child).truncate (0);
1472
bbeeac91
DM
1473 if (dump_enabled_p ())
1474 dump_printf_loc (MSG_NOTE, vect_location,
1475 "Building parent vector operands from "
1476 "scalars instead\n");
85c69b0b 1477 oprnd_info->def_stmts = vNULL;
603cca93 1478 SLP_TREE_DEF_TYPE (child) = vect_external_def;
30c0d1e3
RB
1479 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1480 oprnd_info->ops = vNULL;
9f708a84 1481 ++this_tree_size;
e403d17e 1482 children.safe_push (child);
85c69b0b
RB
1483 continue;
1484 }
1485 }
1486
6983e6b5 1487 oprnd_info->def_stmts = vNULL;
e403d17e 1488 children.safe_push (child);
6983e6b5
RB
1489 continue;
1490 }
1491
1492 ++*npermutes;
1493 }
1494
78810bd3 1495fail:
e403d17e
RB
1496 gcc_assert (child == NULL);
1497 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1498 vect_free_slp_tree (child, false);
6983e6b5 1499 vect_free_oprnd_info (oprnds_info);
e403d17e 1500 return NULL;
ebfd146a
IR
1501 }
1502
e403d17e
RB
1503 vect_free_oprnd_info (oprnds_info);
1504
9f708a84 1505 *tree_size += this_tree_size + 1;
e403d17e 1506 *max_nunits = this_max_nunits;
1428105c 1507
e403d17e
RB
1508 node = vect_create_new_slp_node (stmts);
1509 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1510 SLP_TREE_CHILDREN (node).splice (children);
1511 return node;
ebfd146a
IR
1512}
1513
78c60e3d 1514/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1515
1516static void
4f5b9c80 1517vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
a1f072e2 1518 slp_tree node, hash_set<slp_tree> &visited)
ebfd146a 1519{
6c7b0df8 1520 unsigned i;
b9787581 1521 stmt_vec_info stmt_info;
d755c7ef 1522 slp_tree child;
6c7b0df8 1523 tree op;
ebfd146a 1524
a1f072e2
RB
1525 if (visited.add (node))
1526 return;
1527
3da39f52
DM
1528 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1529 dump_user_location_t user_loc = loc.get_user_location ();
f48e4da3 1530 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
6c7b0df8
RB
1531 SLP_TREE_DEF_TYPE (node) == vect_external_def
1532 ? " (external)"
1533 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1534 ? " (constant)"
1535 : ""), node,
f48e4da3 1536 estimated_poly_value (node->max_nunits));
6c7b0df8
RB
1537 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1538 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1539 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1540 else
1541 {
1542 dump_printf_loc (metadata, user_loc, "\t{ ");
1543 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1544 dump_printf (metadata, "%T%s ", op,
1545 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1546 dump_printf (metadata, "}\n");
1547 }
a1f072e2
RB
1548 if (SLP_TREE_CHILDREN (node).is_empty ())
1549 return;
3da39f52 1550 dump_printf_loc (metadata, user_loc, "\tchildren");
9771b263 1551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2
RB
1552 dump_printf (dump_kind, " %p", (void *)child);
1553 dump_printf (dump_kind, "\n");
1554 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1555 vect_print_slp_tree (dump_kind, loc, child, visited);
ebfd146a
IR
1556}
1557
a1f072e2
RB
1558static void
1559vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1560 slp_tree node)
1561{
1562 hash_set<slp_tree> visited;
1563 vect_print_slp_tree (dump_kind, loc, node, visited);
1564}
ebfd146a 1565
6c7e3b1f 1566/* Mark the tree rooted at NODE with PURE_SLP. */
ebfd146a
IR
1567
1568static void
6c7e3b1f 1569vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
ebfd146a
IR
1570{
1571 int i;
b9787581 1572 stmt_vec_info stmt_info;
d755c7ef 1573 slp_tree child;
ebfd146a 1574
603cca93 1575 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1576 return;
1577
4bfcf879
RB
1578 if (visited.add (node))
1579 return;
1580
b9787581 1581 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6c7e3b1f 1582 STMT_SLP_TYPE (stmt_info) = pure_slp;
ebfd146a 1583
9771b263 1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6c7e3b1f 1585 vect_mark_slp_stmts (child, visited);
ebfd146a
IR
1586}
1587
4bfcf879 1588static void
6c7e3b1f 1589vect_mark_slp_stmts (slp_tree node)
4bfcf879
RB
1590{
1591 hash_set<slp_tree> visited;
6c7e3b1f 1592 vect_mark_slp_stmts (node, visited);
4bfcf879 1593}
ebfd146a 1594
a70d6342
IR
1595/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1596
1597static void
4bfcf879 1598vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
a70d6342
IR
1599{
1600 int i;
a70d6342 1601 stmt_vec_info stmt_info;
d755c7ef 1602 slp_tree child;
a70d6342 1603
603cca93 1604 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1605 return;
1606
4bfcf879
RB
1607 if (visited.add (node))
1608 return;
1609
b9787581 1610 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1611 {
b8698a0f 1612 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1613 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1614 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1615 }
1616
9771b263 1617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879
RB
1618 vect_mark_slp_stmts_relevant (child, visited);
1619}
1620
1621static void
1622vect_mark_slp_stmts_relevant (slp_tree node)
1623{
1624 hash_set<slp_tree> visited;
1625 vect_mark_slp_stmts_relevant (node, visited);
a70d6342
IR
1626}
1627
1628
b5aeb3bb
IR
1629/* Rearrange the statements of NODE according to PERMUTATION. */
1630
1631static void
1632vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
a1f072e2
RB
1633 vec<unsigned> permutation,
1634 hash_set<slp_tree> &visited)
b5aeb3bb 1635{
d755c7ef
RB
1636 unsigned int i;
1637 slp_tree child;
b5aeb3bb 1638
a1f072e2
RB
1639 if (visited.add (node))
1640 return;
1641
9771b263 1642 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2 1643 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
b5aeb3bb 1644
30c0d1e3
RB
1645 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1646 {
1647 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1648 vec<stmt_vec_info> tmp_stmts;
1649 tmp_stmts.create (group_size);
1650 tmp_stmts.quick_grow (group_size);
1651 stmt_vec_info stmt_info;
1652 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1653 tmp_stmts[permutation[i]] = stmt_info;
1654 SLP_TREE_SCALAR_STMTS (node).release ();
1655 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1656 }
1657 if (SLP_TREE_SCALAR_OPS (node).exists ())
1658 {
1659 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1660 vec<tree> tmp_ops;
1661 tmp_ops.create (group_size);
1662 tmp_ops.quick_grow (group_size);
1663 tree op;
1664 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1665 tmp_ops[permutation[i]] = op;
1666 SLP_TREE_SCALAR_OPS (node).release ();
1667 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1668 }
b5aeb3bb
IR
1669}
1670
1671
b266b968
RB
1672/* Attempt to reorder stmts in a reduction chain so that we don't
1673 require any load permutation. Return true if that was possible,
1674 otherwise return false. */
1675
1676static bool
1677vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1678{
1679 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1680 unsigned int i, j;
b266b968
RB
1681 unsigned int lidx;
1682 slp_tree node, load;
1683
1684 /* Compare all the permutation sequences to the first one. We know
1685 that at least one load is permuted. */
1686 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1687 if (!node->load_permutation.exists ())
1688 return false;
1689 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1690 {
1691 if (!load->load_permutation.exists ())
1692 return false;
1693 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1694 if (lidx != node->load_permutation[j])
1695 return false;
1696 }
1697
1698 /* Check that the loads in the first sequence are different and there
1699 are no gaps between them. */
7ba9e72d 1700 auto_sbitmap load_index (group_size);
b266b968
RB
1701 bitmap_clear (load_index);
1702 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1703 {
41eefe13 1704 if (lidx >= group_size)
7ba9e72d 1705 return false;
b266b968 1706 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1707 return false;
1708
b266b968
RB
1709 bitmap_set_bit (load_index, lidx);
1710 }
1711 for (i = 0; i < group_size; i++)
1712 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1713 return false;
b266b968
RB
1714
1715 /* This permutation is valid for reduction. Since the order of the
1716 statements in the nodes is not important unless they are memory
1717 accesses, we can rearrange the statements in all the nodes
1718 according to the order of the loads. */
a1f072e2 1719 hash_set<slp_tree> visited;
b266b968 1720 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
a1f072e2 1721 node->load_permutation, visited);
b266b968
RB
1722
1723 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1724 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1725 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1726 {
b9787581 1727 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1728 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1729 /* But we have to keep those permutations that are required because
1730 of handling of gaps. */
d9f21f6a 1731 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1732 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1733 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1734 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1735 else
1736 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1737 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1738 }
1739
b266b968
RB
1740 return true;
1741}
1742
5d8c32cb
RB
1743/* Gather loads in the SLP graph NODE and populate the INST loads array. */
1744
1745static void
1746vect_gather_slp_loads (slp_instance inst, slp_tree node,
1747 hash_set<slp_tree> &visited)
1748{
1749 if (visited.add (node))
1750 return;
1751
1752 if (SLP_TREE_CHILDREN (node).length () == 0)
1753 {
30c0d1e3
RB
1754 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1755 return;
5d8c32cb 1756 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
30c0d1e3 1757 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
5d8c32cb
RB
1758 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1759 SLP_INSTANCE_LOADS (inst).safe_push (node);
1760 }
1761 else
1762 {
1763 unsigned i;
1764 slp_tree child;
1765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1766 vect_gather_slp_loads (inst, child, visited);
1767 }
1768}
1769
1770static void
1771vect_gather_slp_loads (slp_instance inst, slp_tree node)
1772{
1773 hash_set<slp_tree> visited;
1774 vect_gather_slp_loads (inst, node, visited);
1775}
1776
01d8bf07
RB
1777/* Check if the required load permutations in the SLP instance
1778 SLP_INSTN are supported. */
ebfd146a
IR
1779
1780static bool
01d8bf07 1781vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1782{
01d8bf07
RB
1783 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1784 unsigned int i, j, k, next;
6983e6b5 1785 slp_tree node;
ebfd146a 1786
73fbfcad 1787 if (dump_enabled_p ())
ebfd146a 1788 {
78c60e3d 1789 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1790 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1791 if (node->load_permutation.exists ())
1792 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1793 dump_printf (MSG_NOTE, "%d ", next);
1794 else
bddc974e
TJ
1795 for (k = 0; k < group_size; ++k)
1796 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1797 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1798 }
1799
b5aeb3bb
IR
1800 /* In case of reduction every load permutation is allowed, since the order
1801 of the reduction statements is not important (as opposed to the case of
0d0293ac 1802 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1803 load nodes are of the same size and have the same permutation (and then
1804 rearrange all the nodes of the SLP instance according to this
1805 permutation). */
1806
1807 /* Check that all the load nodes are of the same size. */
01d8bf07 1808 /* ??? Can't we assert this? */
9771b263 1809 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1810 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1811 return false;
2200fc49 1812
b5aeb3bb 1813 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1814 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1815
b010117a 1816 /* Reduction (there are no data-refs in the root).
b266b968 1817 In reduction chain the order of the loads is not important. */
b9787581
RS
1818 if (!STMT_VINFO_DATA_REF (stmt_info)
1819 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1820 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1821
6aa904c4
IR
1822 /* In basic block vectorization we allow any subchain of an interleaving
1823 chain.
1824 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1825 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1826 {
240a94da
RB
1827 /* Check whether the loads in an instance form a subchain and thus
1828 no permutation is necessary. */
9771b263 1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1830 {
9626d143
RB
1831 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1832 continue;
240a94da 1833 bool subchain_p = true;
bffb8014 1834 stmt_vec_info next_load_info = NULL;
b9787581
RS
1835 stmt_vec_info load_info;
1836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1837 {
1838 if (j != 0
bffb8014 1839 && (next_load_info != load_info
b9787581 1840 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1841 {
1842 subchain_p = false;
1843 break;
1844 }
bffb8014 1845 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1846 }
240a94da
RB
1847 if (subchain_p)
1848 SLP_TREE_LOAD_PERMUTATION (node).release ();
1849 else
1850 {
b9787581 1851 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1852 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1853 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1854 unsigned k, maxk = 0;
1855 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1856 if (k > maxk)
1857 maxk = k;
1858 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1859 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1860 tree vectype = STMT_VINFO_VECTYPE (group_info);
1861 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1862 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332 1863 {
bbeeac91
DM
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1866 "BB vectorization with gaps at the end of "
1867 "a load is not supported\n");
fe73a332
RB
1868 return false;
1869 }
1870
240a94da
RB
1871 /* Verify the permutation can be generated. */
1872 vec<tree> tem;
29afecdf 1873 unsigned n_perms;
240a94da 1874 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1875 1, slp_instn, true, &n_perms))
240a94da 1876 {
bbeeac91
DM
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1879 vect_location,
1880 "unsupported load permutation\n");
240a94da
RB
1881 return false;
1882 }
1883 }
6aa904c4 1884 }
01d8bf07 1885 return true;
6aa904c4
IR
1886 }
1887
31bee964
RB
1888 /* For loop vectorization verify we can generate the permutation. Be
1889 conservative about the vectorization factor, there are permutations
1890 that will use three vector inputs only starting from a specific factor
1891 and the vectorization factor is not yet final.
1892 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1893 unsigned n_perms;
d9f21f6a
RS
1894 poly_uint64 test_vf
1895 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1896 LOOP_VINFO_VECT_FACTOR
b9787581 1897 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1898 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1899 if (node->load_permutation.exists ()
31bee964
RB
1900 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1901 slp_instn, true, &n_perms))
01d8bf07 1902 return false;
9b999e8c 1903
01d8bf07 1904 return true;
ebfd146a
IR
1905}
1906
1907
e4a707c4 1908/* Find the last store in SLP INSTANCE. */
ff802fa1 1909
95c68311 1910stmt_vec_info
2e8ab70c 1911vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1912{
95c68311 1913 stmt_vec_info last = NULL;
b9787581 1914 stmt_vec_info stmt_vinfo;
e4a707c4 1915
b9787581 1916 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1917 {
211cd1e2 1918 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 1919 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1920 }
e4a707c4 1921
2e8ab70c 1922 return last;
e4a707c4
IR
1923}
1924
82570274
RS
1925/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1926 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1927 (also containing the first GROUP1_SIZE stmts, since stores are
1928 consecutive), the second containing the remainder.
1ba91a49
AL
1929 Return the first stmt in the second group. */
1930
82570274
RS
1931static stmt_vec_info
1932vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1933{
bffb8014 1934 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1935 gcc_assert (group1_size > 0);
2c53b149 1936 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1937 gcc_assert (group2_size > 0);
2c53b149 1938 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1939
bffb8014 1940 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1941 for (unsigned i = group1_size; i > 1; i--)
1942 {
bffb8014
RS
1943 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1944 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1945 }
1946 /* STMT is now the last element of the first group. */
bffb8014
RS
1947 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1948 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1949
bffb8014
RS
1950 DR_GROUP_SIZE (group2) = group2_size;
1951 for (stmt_info = group2; stmt_info;
1952 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1953 {
bffb8014
RS
1954 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1955 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1956 }
1957
2c53b149 1958 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1959 plus skipping over the first vector. */
bffb8014 1960 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1961
2c53b149
RB
1962 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1963 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1964
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1967 group1_size, group2_size);
1968
1969 return group2;
1970}
1971
4b6068ea
RS
1972/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1973 statements and a vector of NUNITS elements. */
1974
1975static poly_uint64
1976calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1977{
1978 return exact_div (common_multiple (nunits, group_size), group_size);
1979}
1980
0d0293ac 1981/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1982 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1983 Return FALSE if it's impossible to SLP any stmt in the loop. */
1984
1985static bool
310213d4 1986vect_analyze_slp_instance (vec_info *vinfo,
32e8e429 1987 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
1988{
1989 slp_instance new_instance;
d092494c 1990 slp_tree node;
2c53b149 1991 unsigned int group_size;
b5aeb3bb 1992 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1993 unsigned int i;
b9787581
RS
1994 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1995 vec<stmt_vec_info> scalar_stmts;
818b3293 1996 bool constructor = false;
b5aeb3bb 1997
b9787581 1998 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1999 {
2c53b149 2000 scalar_type = TREE_TYPE (DR_REF (dr));
7ed54790 2001 vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
b9787581 2002 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 2003 }
b9787581 2004 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2005 {
2006 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
2007 vectype = STMT_VINFO_VECTYPE (stmt_info);
2008 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb 2009 }
818b3293
JH
2010 else if (is_gimple_assign (stmt_info->stmt)
2011 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2012 {
2013 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2014 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2015 constructor = true;
2016 }
b5aeb3bb
IR
2017 else
2018 {
310213d4 2019 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 2020 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 2021 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 2022 }
b8698a0f 2023
ebfd146a
IR
2024 if (!vectype)
2025 {
73fbfcad 2026 if (dump_enabled_p ())
3c2a8ed0
DM
2027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2028 "Build SLP failed: unsupported data-type %T\n",
2029 scalar_type);
b5aeb3bb 2030
ebfd146a
IR
2031 return false;
2032 }
4b6068ea 2033 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 2034
0d0293ac 2035 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 2036 scalar_stmts.create (group_size);
bffb8014 2037 stmt_vec_info next_info = stmt_info;
b9787581 2038 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 2039 {
b5aeb3bb 2040 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 2041 while (next_info)
b5aeb3bb 2042 {
6e6b18e5 2043 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2044 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
2045 }
2046 }
b9787581 2047 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
2048 {
2049 /* Collect the reduction stmts and store them in
2050 SLP_TREE_SCALAR_STMTS. */
bffb8014 2051 while (next_info)
2c53b149 2052 {
6e6b18e5 2053 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 2054 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 2055 }
14a61437
RB
2056 /* Mark the first element of the reduction chain as reduction to properly
2057 transform the node. In the reduction analysis phase only the last
2058 element of the chain is marked as reduction. */
b4673569
RB
2059 STMT_VINFO_DEF_TYPE (stmt_info)
2060 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
ef6e6914
RB
2061 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2062 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
b5aeb3bb 2063 }
818b3293
JH
2064 else if (constructor)
2065 {
2066 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2067 tree val;
2068 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2069 {
2070 if (TREE_CODE (val) == SSA_NAME)
2071 {
2072 gimple* def = SSA_NAME_DEF_STMT (val);
2073 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2074 /* Value is defined in another basic block. */
2075 if (!def_info)
2076 return false;
2077 scalar_stmts.safe_push (def_info);
2078 }
2079 else
2080 return false;
2081 }
2082 }
b5aeb3bb
IR
2083 else
2084 {
2085 /* Collect reduction statements. */
32c91dfc
RS
2086 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2087 for (i = 0; reductions.iterate (i, &next_info); i++)
2088 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2089 }
2090
ebfd146a 2091 /* Build the tree for the SLP instance. */
89d390e5
RB
2092 bool *matches = XALLOCAVEC (bool, group_size);
2093 unsigned npermutes = 0;
a1f072e2
RB
2094 scalar_stmts_to_slp_tree_map_t *bst_map
2095 = new scalar_stmts_to_slp_tree_map_t ();
4b6068ea 2096 poly_uint64 max_nunits = nunits;
9f708a84 2097 unsigned tree_size = 0;
e569db5f 2098 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
5d8c32cb 2099 &max_nunits, matches, &npermutes,
9f708a84 2100 &tree_size, bst_map);
a1f072e2
RB
2101 /* The map keeps a reference on SLP nodes built, release that. */
2102 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2103 it != bst_map->end (); ++it)
2104 if ((*it).second)
2105 vect_free_slp_tree ((*it).second, false);
2106 delete bst_map;
e569db5f 2107 if (node != NULL)
ebfd146a 2108 {
b4673569
RB
2109 /* If this is a reduction chain with a conversion in front
2110 amend the SLP tree with a node for that. */
2111 if (!dr
2112 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2113 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2114 {
2115 /* Get at the conversion stmt - we know it's the single use
2116 of the last stmt of the reduction chain. */
2117 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2118 use_operand_p use_p;
2119 gimple *use_stmt;
2120 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2121 gcc_assert (r);
2122 next_info = vinfo->lookup_stmt (use_stmt);
2123 next_info = vect_stmt_to_vectorize (next_info);
2124 scalar_stmts = vNULL;
2125 scalar_stmts.create (group_size);
2126 for (unsigned i = 0; i < group_size; ++i)
2127 scalar_stmts.quick_push (next_info);
2128 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2129 SLP_TREE_CHILDREN (conv).quick_push (node);
2130 node = conv;
2131 /* We also have to fake this conversion stmt as SLP reduction group
2132 so we don't have to mess with too much code elsewhere. */
2133 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2134 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2135 }
2136
4ef69dfc 2137 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2138 poly_uint64 unrolling_factor
4b6068ea 2139 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2140
d9f21f6a 2141 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2142 && is_a <bb_vec_info> (vinfo))
2143 {
4b6068ea
RS
2144 unsigned HOST_WIDE_INT const_max_nunits;
2145 if (!max_nunits.is_constant (&const_max_nunits)
2146 || const_max_nunits > group_size)
2147 {
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2150 "Build SLP failed: store group "
2151 "size not a multiple of the vector size "
2152 "in basic block SLP\n");
6e2dd807 2153 vect_free_slp_tree (node, false);
4b6068ea
RS
2154 return false;
2155 }
e569db5f 2156 /* Fatal mismatch. */
4b6068ea 2157 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2158 vect_free_slp_tree (node, false);
e569db5f
VK
2159 }
2160 else
2161 {
a1f072e2 2162 /* Create a new SLP instance. */
99b1c316 2163 new_instance = XNEW (class _slp_instance);
a1f072e2
RB
2164 SLP_INSTANCE_TREE (new_instance) = node;
2165 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2166 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
5d8c32cb 2167 SLP_INSTANCE_LOADS (new_instance) = vNULL;
818b3293
JH
2168 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2169
5d8c32cb 2170 vect_gather_slp_loads (new_instance, node);
9f708a84
RB
2171 if (dump_enabled_p ())
2172 dump_printf_loc (MSG_NOTE, vect_location,
2173 "SLP size %u vs. limit %u.\n",
2174 tree_size, max_tree_size);
a1f072e2
RB
2175
2176 /* Compute the load permutation. */
2177 slp_tree load_node;
2178 bool loads_permuted = false;
5d8c32cb 2179 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
abf9bfbc 2180 {
a1f072e2
RB
2181 vec<unsigned> load_permutation;
2182 int j;
2183 stmt_vec_info load_info;
2184 bool this_load_permuted = false;
2185 load_permutation.create (group_size);
2186 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2187 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2188 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2189 {
2190 int load_place = vect_get_place_in_interleaving_chain
2191 (load_info, first_stmt_info);
2192 gcc_assert (load_place != -1);
2193 if (load_place != j)
2194 this_load_permuted = true;
2195 load_permutation.safe_push (load_place);
2196 }
2197 if (!this_load_permuted
2198 /* The load requires permutation when unrolling exposes
2199 a gap either because the group is larger than the SLP
2200 group-size or because there is a gap between the groups. */
2201 && (known_eq (unrolling_factor, 1U)
2202 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2203 && DR_GROUP_GAP (first_stmt_info) == 0)))
2204 {
2205 load_permutation.release ();
2206 continue;
2207 }
2208 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2209 loads_permuted = true;
abf9bfbc 2210 }
a1f072e2
RB
2211
2212 if (loads_permuted)
01d8bf07 2213 {
a1f072e2
RB
2214 if (!vect_supported_load_permutation_p (new_instance))
2215 {
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2218 "Build SLP failed: unsupported load "
2219 "permutation %G", stmt_info->stmt);
2220 vect_free_slp_instance (new_instance, false);
2221 return false;
2222 }
01d8bf07 2223 }
ebfd146a 2224
e569db5f 2225 /* If the loads and stores can be handled with load/store-lan
a1f072e2
RB
2226 instructions do not generate this SLP instance. */
2227 if (is_a <loop_vec_info> (vinfo)
2228 && loads_permuted
2229 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7 2230 {
a1f072e2 2231 slp_tree load_node;
5d8c32cb 2232 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
a1f072e2
RB
2233 {
2234 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2235 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2236 /* Use SLP for strided accesses (or if we can't load-lanes). */
2237 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2238 || ! vect_load_lanes_supported
2239 (STMT_VINFO_VECTYPE (stmt_vinfo),
2240 DR_GROUP_SIZE (stmt_vinfo), false))
2241 break;
2242 }
5d8c32cb 2243 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
a1f072e2
RB
2244 {
2245 if (dump_enabled_p ())
2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2247 "Built SLP cancelled: can use "
2248 "load/store-lanes\n");
2249 vect_free_slp_instance (new_instance, false);
2250 return false;
2251 }
bb0f5ca7 2252 }
a1f072e2
RB
2253
2254 vinfo->slp_instances.safe_push (new_instance);
2255
2256 if (dump_enabled_p ())
bb0f5ca7 2257 {
a1f072e2
RB
2258 dump_printf_loc (MSG_NOTE, vect_location,
2259 "Final SLP tree for instance:\n");
2260 vect_print_slp_tree (MSG_NOTE, vect_location, node);
bb0f5ca7 2261 }
bb0f5ca7 2262
a1f072e2 2263 return true;
c2a12ca0 2264 }
e569db5f
VK
2265 }
2266 else
2267 {
a1f072e2
RB
2268 /* Failed to SLP. */
2269 /* Free the allocated memory. */
2270 scalar_stmts.release ();
e569db5f 2271 }
b8698a0f 2272
1ba91a49 2273 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2274 vector size. */
4b6068ea 2275 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2276 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2277 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2278 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2279 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2280 {
2281 /* We consider breaking the group only on VF boundaries from the existing
2282 start. */
2283 for (i = 0; i < group_size; i++)
2284 if (!matches[i]) break;
2285
4b6068ea 2286 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2287 {
2288 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2289 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2290 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2291
82570274
RS
2292 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2293 group1_size);
86a91c0a
RS
2294 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2295 max_tree_size);
1ba91a49
AL
2296 /* If the first non-match was in the middle of a vector,
2297 skip the rest of that vector. */
2298 if (group1_size < i)
2299 {
4b6068ea 2300 i = group1_size + const_nunits;
1ba91a49 2301 if (i < group_size)
4b6068ea 2302 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2303 }
2304 if (i < group_size)
2305 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2306 return res;
2307 }
2308 /* Even though the first vector did not all match, we might be able to SLP
2309 (some) of the remainder. FORNOW ignore this possibility. */
2310 }
2311
a70d6342 2312 return false;
ebfd146a
IR
2313}
2314
2315
ff802fa1 2316/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2317 trees of packed scalar stmts if SLP is possible. */
2318
f4ebbd24 2319opt_result
310213d4 2320vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2321{
2322 unsigned int i;
f698fccf 2323 stmt_vec_info first_element;
ebfd146a 2324
adac3a68 2325 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2326
0d0293ac 2327 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2328 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2329 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2330
310213d4 2331 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2332 {
310213d4
RB
2333 if (loop_vinfo->reduction_chains.length () > 0)
2334 {
2335 /* Find SLP sequences starting from reduction chains. */
2336 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2337 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2338 max_tree_size))
6b5e165b
RB
2339 {
2340 /* Dissolve reduction chain group. */
f698fccf 2341 stmt_vec_info vinfo = first_element;
0214d31a 2342 stmt_vec_info last = NULL;
f698fccf 2343 while (vinfo)
6b5e165b 2344 {
bffb8014 2345 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2346 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2347 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
0214d31a 2348 last = vinfo;
f698fccf 2349 vinfo = next;
6b5e165b 2350 }
f698fccf 2351 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
0214d31a
RB
2352 /* It can be still vectorized as part of an SLP reduction. */
2353 loop_vinfo->reductions.safe_push (last);
6b5e165b 2354 }
310213d4 2355 }
b010117a 2356
310213d4 2357 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2358 if (loop_vinfo->reductions.length () > 1)
2359 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2360 max_tree_size);
310213d4 2361 }
b5aeb3bb 2362
f4ebbd24 2363 return opt_result::success ();
ebfd146a
IR
2364}
2365
2366
2367/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2368 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2369 least one instance. */
ebfd146a 2370
437f4a00 2371bool
ebfd146a
IR
2372vect_make_slp_decision (loop_vec_info loop_vinfo)
2373{
d9f21f6a
RS
2374 unsigned int i;
2375 poly_uint64 unrolling_factor = 1;
9771b263 2376 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2377 slp_instance instance;
2378 int decided_to_slp = 0;
2379
adac3a68 2380 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2381
9771b263 2382 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2383 {
2384 /* FORNOW: SLP if you can. */
ba7f76dd 2385 /* All unroll factors have the form vinfo->vector_size * X for some
d9f21f6a
RS
2386 rational X, so they must have a common multiple. */
2387 unrolling_factor
2388 = force_common_multiple (unrolling_factor,
2389 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2390
ff802fa1 2391 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2392 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2393 loop-based vectorization. Such stmts will be marked as HYBRID. */
6c7e3b1f 2394 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
ebfd146a
IR
2395 decided_to_slp++;
2396 }
2397
2398 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2399
73fbfcad 2400 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2401 {
2402 dump_printf_loc (MSG_NOTE, vect_location,
2403 "Decided to SLP %d instances. Unrolling factor ",
2404 decided_to_slp);
2405 dump_dec (MSG_NOTE, unrolling_factor);
2406 dump_printf (MSG_NOTE, "\n");
2407 }
437f4a00
IR
2408
2409 return (decided_to_slp > 0);
ebfd146a
IR
2410}
2411
2412
2413/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2414 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2415
2416static void
4bfcf879 2417vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
6c7e3b1f 2418 hash_map<slp_tree, unsigned> &visited)
ebfd146a 2419{
b9787581 2420 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2421 imm_use_iterator imm_iter;
355fe088 2422 gimple *use_stmt;
b9787581 2423 stmt_vec_info use_vinfo;
d755c7ef 2424 slp_tree child;
f2c74cc4 2425 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2426 int j;
2427
6c7e3b1f
RB
2428 /* We need to union stype over the incoming graph edges but we still
2429 want to limit recursion to stay O(N+E). */
2430 bool only_edge = (++visited.get_or_insert (node) < node->refcnt);
4bfcf879 2431
642fce57
RB
2432 /* Propagate hybrid down the SLP tree. */
2433 if (stype == hybrid)
2434 ;
2435 else if (HYBRID_SLP_STMT (stmt_vinfo))
2436 stype = hybrid;
6c7e3b1f 2437 else if (!only_edge)
642fce57
RB
2438 {
2439 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2440 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2441 /* If we get a pattern stmt here we have to use the LHS of the
2442 original stmt for immediate uses. */
211cd1e2 2443 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
e7baeb39
RB
2444 tree def;
2445 if (gimple_code (stmt) == GIMPLE_PHI)
2446 def = gimple_phi_result (stmt);
2447 else
2448 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2449 if (def)
2450 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2451 {
6585ff8f
RS
2452 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2453 if (!use_vinfo)
29764870 2454 continue;
6e6b18e5 2455 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
29764870
RB
2456 if (!STMT_SLP_TYPE (use_vinfo)
2457 && (STMT_VINFO_RELEVANT (use_vinfo)
2458 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2459 && !(gimple_code (use_stmt) == GIMPLE_PHI
2460 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2461 {
2462 if (dump_enabled_p ())
3c2a8ed0
DM
2463 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2464 "def in non-SLP stmt: %G", use_stmt);
502f0263
RB
2465 stype = hybrid;
2466 }
29764870 2467 }
642fce57 2468 }
ebfd146a 2469
502f0263
RB
2470 if (stype == hybrid
2471 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2472 {
2473 if (dump_enabled_p ())
3c2a8ed0
DM
2474 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2475 stmt_vinfo->stmt);
b1af7da6
RB
2476 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2477 }
ebfd146a 2478
6c7e3b1f
RB
2479 if (!only_edge)
2480 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2481 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2482 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
6c7e3b1f 2483 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
4bfcf879
RB
2484}
2485
2486static void
2487vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2488{
6c7e3b1f 2489 hash_map<slp_tree, unsigned> visited;
4bfcf879 2490 vect_detect_hybrid_slp_stmts (node, i, stype, visited);
642fce57 2491}
f2c74cc4 2492
642fce57 2493/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2494
642fce57
RB
2495static tree
2496vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2497{
2498 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2499 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2500
2501 if (wi->is_lhs)
2502 return NULL_TREE;
2503
c98d0595
RS
2504 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2505 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2506 {
6585ff8f 2507 if (dump_enabled_p ())
3c2a8ed0
DM
2508 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2509 def_stmt_info->stmt);
6585ff8f 2510 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2511 }
2512
2513 return NULL_TREE;
ebfd146a
IR
2514}
2515
642fce57
RB
2516static tree
2517vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2518 walk_stmt_info *wi)
642fce57 2519{
6585ff8f
RS
2520 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2521 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2522 /* If the stmt is in a SLP instance then this isn't a reason
2523 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2524 if (! STMT_SLP_TYPE (use_vinfo)
2525 && (STMT_VINFO_RELEVANT (use_vinfo)
2526 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2527 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2528 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2529 ;
2530 else
642fce57
RB
2531 *handled = true;
2532 return NULL_TREE;
2533}
ebfd146a
IR
2534
2535/* Find stmts that must be both vectorized and SLPed. */
2536
2537void
2538vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2539{
2540 unsigned int i;
9771b263 2541 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2542 slp_instance instance;
2543
adac3a68 2544 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2545
642fce57
RB
2546 /* First walk all pattern stmt in the loop and mark defs of uses as
2547 hybrid because immediate uses in them are not recorded. */
2548 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2549 {
2550 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2551 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2552 gsi_next (&gsi))
2553 {
355fe088 2554 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2555 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2556 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2557 {
2558 walk_stmt_info wi;
2559 memset (&wi, 0, sizeof (wi));
6585ff8f 2560 wi.info = loop_vinfo;
642fce57 2561 gimple_stmt_iterator gsi2
10681ce8 2562 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2563 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2564 vect_detect_hybrid_slp_1, &wi);
2565 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2566 vect_detect_hybrid_slp_2,
2567 vect_detect_hybrid_slp_1, &wi);
2568 }
2569 }
2570 }
2571
2572 /* Then walk the SLP instance trees marking stmts with uses in
2573 non-SLP stmts as hybrid, also propagating hybrid down the
2574 SLP tree, collecting the above info on-the-fly. */
9771b263 2575 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2576 {
2577 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2578 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2579 i, pure_slp);
2580 }
ebfd146a
IR
2581}
2582
a70d6342 2583
2c515559
RS
2584/* Initialize a bb_vec_info struct for the statements between
2585 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2586
2587_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2588 gimple_stmt_iterator region_end_in,
2589 vec_info_shared *shared)
2590 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2591 bb (gsi_bb (region_begin_in)),
2592 region_begin (region_begin_in),
2593 region_end (region_end_in)
a70d6342 2594{
a70d6342
IR
2595 gimple_stmt_iterator gsi;
2596
61d371eb
RB
2597 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2598 gsi_next (&gsi))
a70d6342 2599 {
355fe088 2600 gimple *stmt = gsi_stmt (gsi);
a70d6342 2601 gimple_set_uid (stmt, 0);
4fbeb363 2602 add_stmt (stmt);
a70d6342
IR
2603 }
2604
2c515559 2605 bb->aux = this;
a70d6342
IR
2606}
2607
2608
2609/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2610 stmts in the basic block. */
2611
2c515559 2612_bb_vec_info::~_bb_vec_info ()
a70d6342 2613{
2c515559
RS
2614 for (gimple_stmt_iterator si = region_begin;
2615 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2616 /* Reset region marker. */
2617 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2618
2c515559 2619 bb->aux = NULL;
a70d6342
IR
2620}
2621
15944069
RS
2622/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2623 given then that child nodes have already been processed, and that
2624 their def types currently match their SLP node's def type. */
a70d6342
IR
2625
2626static bool
15944069
RS
2627vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2628 slp_instance node_instance,
2629 stmt_vector_for_cost *cost_vec)
a70d6342 2630{
b9787581 2631 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2632 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2633
2634 /* For BB vectorization vector types are assigned here.
2635 Memory accesses already got their vector type assigned
2636 in vect_analyze_data_refs. */
2637 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2638 if (bb_vinfo
2639 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2640 {
1f3cb663
RS
2641 tree vectype, nunits_vectype;
2642 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2643 &nunits_vectype))
2644 /* We checked this when building the node. */
2645 gcc_unreachable ();
2646 if (vectype == boolean_type_node)
bd2f172f 2647 {
1f3cb663
RS
2648 vectype = vect_get_mask_type_for_stmt (stmt_info);
2649 if (!vectype)
2650 /* vect_get_mask_type_for_stmt has already explained the
2651 failure. */
2652 return false;
bd2f172f
RB
2653 }
2654
b9787581 2655 stmt_vec_info sstmt_info;
15944069 2656 unsigned int i;
b9787581
RS
2657 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2658 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2659 }
2660
8b7e9dba
RS
2661 /* Calculate the number of vector statements to be created for the
2662 scalar stmts in this node. For SLP reductions it is equal to the
2663 number of vector statements in the children (which has already been
2664 calculated by the recursive call). Otherwise it is the number of
2c53b149 2665 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2666 VF divided by the number of elements in a vector. */
2c53b149
RB
2667 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2668 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
30c0d1e3
RB
2669 {
2670 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2671 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2672 {
2673 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2674 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2675 break;
2676 }
2677 }
8b7e9dba
RS
2678 else
2679 {
d9f21f6a 2680 poly_uint64 vf;
8b7e9dba
RS
2681 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2682 vf = loop_vinfo->vectorization_factor;
2683 else
2684 vf = 1;
2685 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2686 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2687 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2688 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2689 }
2690
15944069 2691 bool dummy;
86a91c0a 2692 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2693}
2694
2695/* Analyze statements contained in SLP tree NODE after recursively analyzing
2696 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2697
2698 Return true if the operations are supported. */
2699
2700static bool
2701vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2702 slp_instance node_instance,
2703 scalar_stmts_to_slp_tree_map_t *visited,
2704 scalar_stmts_to_slp_tree_map_t *lvisited,
2705 stmt_vector_for_cost *cost_vec)
2706{
2707 int i, j;
2708 slp_tree child;
2709
2710 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2711 return true;
2712
2713 /* If we already analyzed the exact same set of scalar stmts we're done.
2714 We share the generated vector stmts for those. */
2715 slp_tree *leader;
2716 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2717 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2718 {
2719 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2720 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2721 return true;
2722 }
2723
2724 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2725 doesn't result in any issue since we throw away the lvisited set
2726 when we fail. */
2727 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2728
2729 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2730 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2731 visited, lvisited, cost_vec))
2732 return false;
2733
f7b94dec
RB
2734 /* ??? We have to catch the case late where two first scalar stmts appear
2735 in multiple SLP children with different def type and fail. Remember
2736 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2737 match it when that is vect_internal_def. */
2738 auto_vec<vect_def_type, 4> dt;
2739 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2740 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2741 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2742 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
f7b94dec 2743
bd2f172f
RB
2744 /* Push SLP node def-type to stmt operands. */
2745 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2746 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2747 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
b9787581 2748 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2749 = SLP_TREE_DEF_TYPE (child);
f7b94dec
RB
2750
2751 /* Check everything worked out. */
2752 bool res = true;
bd2f172f 2753 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2754 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2755 {
2756 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2757 {
2758 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2759 != SLP_TREE_DEF_TYPE (child))
2760 res = false;
2761 }
2762 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2763 != dt[j])
2764 res = false;
2765 }
f7b94dec
RB
2766 if (!res && dump_enabled_p ())
2767 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2768 "not vectorized: same operand with different "
2769 "def type in stmt.\n");
bd2f172f 2770
f7b94dec
RB
2771 if (res)
2772 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2773 cost_vec);
2774
2775 /* Restore def-types. */
2776 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
30c0d1e3
RB
2777 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2778 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
f7b94dec
RB
2779
2780 return res;
a70d6342
IR
2781}
2782
2783
8b7e9dba 2784/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2785 operations are supported. */
2786
a12e42fc 2787bool
8b7e9dba 2788vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2789{
a70d6342
IR
2790 slp_instance instance;
2791 int i;
2792
adac3a68 2793 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2794
68435eb2
RB
2795 scalar_stmts_to_slp_tree_map_t *visited
2796 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2797 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2798 {
68435eb2
RB
2799 scalar_stmts_to_slp_tree_map_t lvisited;
2800 stmt_vector_for_cost cost_vec;
2801 cost_vec.create (2);
8b7e9dba
RS
2802 if (!vect_slp_analyze_node_operations (vinfo,
2803 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2804 instance, visited, &lvisited,
2805 &cost_vec))
a70d6342 2806 {
b9787581
RS
2807 slp_tree node = SLP_INSTANCE_TREE (instance);
2808 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
2809 if (dump_enabled_p ())
2810 dump_printf_loc (MSG_NOTE, vect_location,
2811 "removing SLP instance operations starting from: %G",
2812 stmt_info->stmt);
6e2dd807 2813 vect_free_slp_instance (instance, false);
8b7e9dba 2814 vinfo->slp_instances.ordered_remove (i);
68435eb2 2815 cost_vec.release ();
a70d6342
IR
2816 }
2817 else
68435eb2
RB
2818 {
2819 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2820 x != lvisited.end(); ++x)
2821 visited->put ((*x).first.copy (), (*x).second);
2822 i++;
78604de0 2823
68435eb2
RB
2824 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2825 cost_vec.release ();
2826 }
2827 }
78604de0
RB
2828 delete visited;
2829
8b7e9dba 2830 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2831}
2832
6eddf228
RB
2833
2834/* Compute the scalar cost of the SLP node NODE and its children
2835 and return it. Do not account defs that are marked in LIFE and
2836 update LIFE according to uses of NODE. */
2837
a296d6d3 2838static void
292cba13 2839vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3 2840 slp_tree node, vec<bool, va_heap> *life,
4bfcf879
RB
2841 stmt_vector_for_cost *cost_vec,
2842 hash_set<slp_tree> &visited)
6eddf228 2843{
6eddf228 2844 unsigned i;
b9787581 2845 stmt_vec_info stmt_info;
6eddf228
RB
2846 slp_tree child;
2847
4bfcf879
RB
2848 if (visited.add (node))
2849 return;
2850
b9787581 2851 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2852 {
b9787581 2853 gimple *stmt = stmt_info->stmt;
91987857 2854 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2855 ssa_op_iter op_iter;
2856 def_operand_p def_p;
6eddf228 2857
ff4c81cc 2858 if ((*life)[i])
6eddf228
RB
2859 continue;
2860
2861 /* If there is a non-vectorized use of the defs then the scalar
2862 stmt is kept live in which case we do not account it or any
2863 required defs in the SLP children in the scalar cost. This
2864 way we make the vectorization more costly when compared to
2865 the scalar cost. */
2866 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2867 {
2868 imm_use_iterator use_iter;
355fe088 2869 gimple *use_stmt;
6eddf228 2870 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2871 if (!is_gimple_debug (use_stmt))
6eddf228 2872 {
91987857
RS
2873 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2874 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2875 {
2876 (*life)[i] = true;
2877 BREAK_FROM_IMM_USE_STMT (use_iter);
2878 }
6eddf228
RB
2879 }
2880 }
ff4c81cc 2881 if ((*life)[i])
6eddf228
RB
2882 continue;
2883
b555a2e4
RB
2884 /* Count scalar stmts only once. */
2885 if (gimple_visited_p (stmt))
2886 continue;
2887 gimple_set_visited (stmt, true);
2888
a296d6d3 2889 vect_cost_for_stmt kind;
6eddf228
RB
2890 if (STMT_VINFO_DATA_REF (stmt_info))
2891 {
2892 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2893 kind = scalar_load;
6eddf228 2894 else
a296d6d3 2895 kind = scalar_store;
6eddf228
RB
2896 }
2897 else
a296d6d3
RB
2898 kind = scalar_stmt;
2899 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2900 }
2901
faa5399b 2902 auto_vec<bool, 20> subtree_life;
6eddf228 2903 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2904 {
2905 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2906 {
2907 /* Do not directly pass LIFE to the recursive call, copy it to
2908 confine changes in the callee to the current child/subtree. */
2909 subtree_life.safe_splice (*life);
4bfcf879
RB
2910 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
2911 visited);
faa5399b
RB
2912 subtree_life.truncate (0);
2913 }
2914 }
6eddf228
RB
2915}
2916
4bfcf879
RB
2917static void
2918vect_bb_slp_scalar_cost (basic_block bb,
2919 slp_tree node, vec<bool, va_heap> *life,
2920 stmt_vector_for_cost *cost_vec)
2921{
2922 hash_set<slp_tree> visited;
2923 vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
2924}
2925
69f11a13
IR
2926/* Check if vectorization of the basic block is profitable. */
2927
2928static bool
2929vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2930{
9771b263 2931 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2932 slp_instance instance;
1a4b99c1 2933 int i;
c3e7ee41 2934 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2935 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2936
2937 /* Calculate scalar cost. */
a296d6d3
RB
2938 stmt_vector_for_cost scalar_costs;
2939 scalar_costs.create (0);
6eddf228 2940 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2941 {
00f96dc9 2942 auto_vec<bool, 20> life;
ff4c81cc 2943 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2944 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2945 SLP_INSTANCE_TREE (instance),
2946 &life, &scalar_costs);
2947 }
2948 void *target_cost_data = init_cost (NULL);
68435eb2 2949 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2950 scalar_costs.release ();
2951 unsigned dummy;
2952 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2953 destroy_cost_data (target_cost_data);
69f11a13 2954
b555a2e4
RB
2955 /* Unset visited flag. */
2956 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2957 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2958 gimple_set_visited (gsi_stmt (gsi), false);
2959
c3e7ee41 2960 /* Complete the target-specific cost calculation. */
92345349
BS
2961 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2962 &vec_inside_cost, &vec_epilogue_cost);
2963
2964 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2965
73fbfcad 2966 if (dump_enabled_p ())
69f11a13 2967 {
78c60e3d
SS
2968 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2969 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2970 vec_inside_cost);
2971 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2972 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2973 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2974 }
2975
a6524bba
RB
2976 /* Vectorization is profitable if its cost is more than the cost of scalar
2977 version. Note that we err on the vector side for equal cost because
2978 the cost estimate is otherwise quite pessimistic (constant uses are
2979 free on the scalar side but cost a load on the vector side for
2980 example). */
2981 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2982 return false;
2983
2984 return true;
2985}
2986
818b3293
JH
2987/* Find any vectorizable constructors and add them to the grouped_store
2988 array. */
2989
2990static void
2991vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
2992{
2993 gimple_stmt_iterator gsi;
2994
2995 for (gsi = bb_vinfo->region_begin;
2996 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2997 {
2998 gimple *stmt = gsi_stmt (gsi);
2999
3000 if (is_gimple_assign (stmt)
3001 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
3002 && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
3003 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
3004 {
3005 tree rhs = gimple_assign_rhs1 (stmt);
3006
3007 if (CONSTRUCTOR_NELTS (rhs) == 0)
3008 continue;
3009
3010 poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
3011
3012 if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
3013 continue;
3014
3015 if (dump_enabled_p ())
3016 dump_printf_loc (MSG_NOTE, vect_location,
3017 "Found vectorizable constructor: %G\n", stmt);
3018 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3019 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3020 }
3021 }
3022}
3023
1d778697
RS
3024/* Check if the region described by BB_VINFO can be vectorized, returning
3025 true if so. When returning false, set FATAL to true if the same failure
3026 would prevent vectorization at other vector sizes, false if it is still
3027 worth trying other sizes. N_STMTS is the number of statements in the
3028 region. */
3029
3030static bool
3031vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
a70d6342 3032{
bfb9d798
RB
3033 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3034
a70d6342 3035 slp_instance instance;
8e19f5a1 3036 int i;
d9f21f6a 3037 poly_uint64 min_vf = 2;
e4a707c4 3038
a5b50aa1
RB
3039 /* The first group of checks is independent of the vector size. */
3040 fatal = true;
3041
428db0ba
RB
3042 /* Analyze the data references. */
3043
a7b3509e 3044 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
a70d6342 3045 {
73fbfcad 3046 if (dump_enabled_p ())
78c60e3d
SS
3047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3048 "not vectorized: unhandled data-ref in basic "
3049 "block.\n");
1d778697 3050 return false;
a70d6342
IR
3051 }
3052
fcac74a1 3053 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 3054 {
73fbfcad 3055 if (dump_enabled_p ())
78c60e3d
SS
3056 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3057 "not vectorized: not enough data-refs in "
3058 "basic block.\n");
1d778697 3059 return false;
a70d6342
IR
3060 }
3061
310213d4 3062 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
3063 {
3064 if (dump_enabled_p ())
3065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3066 "not vectorized: unhandled data access in "
3067 "basic block.\n");
1d778697 3068 return false;
5abe1e05
RB
3069 }
3070
818b3293
JH
3071 vect_slp_check_for_constructors (bb_vinfo);
3072
a5b50aa1
RB
3073 /* If there are no grouped stores in the region there is no need
3074 to continue with pattern recog as vect_analyze_slp will fail
3075 anyway. */
3076 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 3077 {
73fbfcad 3078 if (dump_enabled_p ())
a5b50aa1
RB
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080 "not vectorized: no grouped stores in "
3081 "basic block.\n");
1d778697 3082 return false;
a70d6342 3083 }
b8698a0f 3084
a5b50aa1
RB
3085 /* While the rest of the analysis below depends on it in some way. */
3086 fatal = false;
3087
3088 vect_pattern_recog (bb_vinfo);
3089
a70d6342
IR
3090 /* Check the SLP opportunities in the basic block, analyze and build SLP
3091 trees. */
310213d4 3092 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 3093 {
73fbfcad 3094 if (dump_enabled_p ())
effb52da
RB
3095 {
3096 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3097 "Failed to SLP the basic block.\n");
3098 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3099 "not vectorized: failed to find SLP opportunities "
3100 "in basic block.\n");
3101 }
1d778697 3102 return false;
a70d6342 3103 }
b8698a0f 3104
62c8a2cf
RS
3105 vect_record_base_alignments (bb_vinfo);
3106
c2a12ca0
RB
3107 /* Analyze and verify the alignment of data references and the
3108 dependence in the SLP instances. */
a5b50aa1
RB
3109 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3110 {
c2a12ca0
RB
3111 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3112 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 3113 {
b9787581
RS
3114 slp_tree node = SLP_INSTANCE_TREE (instance);
3115 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bbeeac91
DM
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_NOTE, vect_location,
3118 "removing SLP instance operations starting from: %G",
3119 stmt_info->stmt);
6e2dd807 3120 vect_free_slp_instance (instance, false);
a5b50aa1
RB
3121 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3122 continue;
3123 }
c2a12ca0
RB
3124
3125 /* Mark all the statements that we want to vectorize as pure SLP and
3126 relevant. */
6c7e3b1f 3127 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
c2a12ca0 3128 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
818b3293
JH
3129 if (SLP_INSTANCE_ROOT_STMT (instance))
3130 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
c2a12ca0 3131
a5b50aa1
RB
3132 i++;
3133 }
a5b50aa1 3134 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
1d778697 3135 return false;
a5b50aa1 3136
8b7e9dba 3137 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 3138 {
73fbfcad 3139 if (dump_enabled_p ())
e645e942 3140 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3141 "not vectorized: bad operation in basic block.\n");
1d778697 3142 return false;
a70d6342
IR
3143 }
3144
69f11a13 3145 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 3146 if (!unlimited_cost_model (NULL)
69f11a13
IR
3147 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3148 {
73fbfcad 3149 if (dump_enabled_p ())
78c60e3d
SS
3150 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3151 "not vectorized: vectorization is not "
3152 "profitable.\n");
1d778697 3153 return false;
69f11a13
IR
3154 }
3155
73fbfcad 3156 if (dump_enabled_p ())
78c60e3d
SS
3157 dump_printf_loc (MSG_NOTE, vect_location,
3158 "Basic block will be vectorized using SLP\n");
1d778697 3159 return true;
a70d6342
IR
3160}
3161
fa0c8df7
RS
3162/* Subroutine of vect_slp_bb. Try to vectorize the statements between
3163 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3164 on success. The region has N_STMTS statements and has the datarefs
3165 given by DATAREFS. */
a70d6342 3166
fa0c8df7
RS
3167static bool
3168vect_slp_bb_region (gimple_stmt_iterator region_begin,
3169 gimple_stmt_iterator region_end,
3170 vec<data_reference_p> datarefs,
3171 unsigned int n_stmts)
8e19f5a1
IR
3172{
3173 bb_vec_info bb_vinfo;
86e36728 3174 auto_vector_sizes vector_sizes;
8e19f5a1 3175
8e19f5a1 3176 /* Autodetect first vector size we try. */
ba7f76dd 3177 poly_uint64 next_vector_size = 0;
f63445e5 3178 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes, false);
86e36728 3179 unsigned int next_size = 0;
8e19f5a1 3180
fa0c8df7 3181 vec_info_shared shared;
61d371eb 3182
86e36728 3183 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3184 while (1)
3185 {
61d371eb 3186 bool vectorized = false;
a5b50aa1 3187 bool fatal = false;
1d778697
RS
3188 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3189
3190 bool first_time_p = shared.datarefs.is_empty ();
3191 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3192 if (first_time_p)
3193 bb_vinfo->shared->save_datarefs ();
3194 else
3195 bb_vinfo->shared->check_datarefs ();
ba7f76dd 3196 bb_vinfo->vector_size = next_vector_size;
1d778697
RS
3197
3198 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
61d371eb
RB
3199 && dbg_cnt (vect_slp))
3200 {
428db0ba 3201 if (dump_enabled_p ())
61d371eb 3202 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 3203
ca823c85 3204 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3205 vect_schedule_slp (bb_vinfo);
3206
d1ac60d5 3207 unsigned HOST_WIDE_INT bytes;
bbeeac91
DM
3208 if (dump_enabled_p ())
3209 {
ba7f76dd 3210 if (bb_vinfo->vector_size.is_constant (&bytes))
bbeeac91
DM
3211 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3212 "basic block part vectorized using %wu byte "
3213 "vectors\n", bytes);
3214 else
3215 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3216 "basic block part vectorized using variable "
3217 "length vectors\n");
3218 }
428db0ba 3219
61d371eb 3220 vectorized = true;
428db0ba 3221 }
8e19f5a1 3222
86e36728 3223 if (next_size == 0)
ba7f76dd
RS
3224 autodetected_vector_size = bb_vinfo->vector_size;
3225
3226 delete bb_vinfo;
86e36728
RS
3227
3228 if (next_size < vector_sizes.length ()
3229 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3230 next_size += 1;
3231
61d371eb 3232 if (vectorized
86e36728 3233 || next_size == vector_sizes.length ()
87121696 3234 || known_eq (autodetected_vector_size, 0U)
a5b50aa1
RB
3235 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3236 vector sizes will fail do not bother iterating. */
3237 || fatal)
fa0c8df7
RS
3238 return vectorized;
3239
3240 /* Try the next biggest vector size. */
ba7f76dd 3241 next_vector_size = vector_sizes[next_size++];
fa0c8df7 3242 if (dump_enabled_p ())
61d371eb 3243 {
fa0c8df7
RS
3244 dump_printf_loc (MSG_NOTE, vect_location,
3245 "***** Re-trying analysis with "
3246 "vector size ");
ba7f76dd 3247 dump_dec (MSG_NOTE, next_vector_size);
fa0c8df7
RS
3248 dump_printf (MSG_NOTE, "\n");
3249 }
3250 }
3251}
8e19f5a1 3252
fa0c8df7
RS
3253/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3254 true if anything in the basic-block was vectorized. */
61d371eb 3255
fa0c8df7
RS
3256bool
3257vect_slp_bb (basic_block bb)
3258{
3259 gimple_stmt_iterator gsi;
3260 bool any_vectorized = false;
3261
3262 gsi = gsi_start_bb (bb);
3263 while (!gsi_end_p (gsi))
3264 {
3265 gimple_stmt_iterator region_begin = gsi;
3266 vec<data_reference_p> datarefs = vNULL;
3267 int insns = 0;
3268
3269 for (; !gsi_end_p (gsi); gsi_next (&gsi))
61d371eb 3270 {
fa0c8df7
RS
3271 gimple *stmt = gsi_stmt (gsi);
3272 if (is_gimple_debug (stmt))
3273 continue;
3274 insns++;
3275
3276 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3277 vect_location = stmt;
3278
3279 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3280 break;
3281 }
61d371eb 3282
fa0c8df7
RS
3283 /* Skip leading unhandled stmts. */
3284 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3285 {
3286 gsi_next (&gsi);
3287 continue;
61d371eb 3288 }
fa0c8df7
RS
3289
3290 gimple_stmt_iterator region_end = gsi;
3291
028d4092 3292 if (insns > param_slp_max_insns_in_bb)
1d778697
RS
3293 {
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3296 "not vectorized: too many instructions in "
3297 "basic block.\n");
3298 }
3299 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
fa0c8df7
RS
3300 any_vectorized = true;
3301
3302 if (gsi_end_p (region_end))
3303 break;
3304
3305 /* Skip the unhandled stmt. */
3306 gsi_next (&gsi);
8e19f5a1 3307 }
61d371eb
RB
3308
3309 return any_vectorized;
8e19f5a1
IR
3310}
3311
3312
c73e7656 3313/* Return 1 if vector type STMT_VINFO is a boolean vector. */
e4af0bc4
IE
3314
3315static bool
c73e7656 3316vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
e4af0bc4 3317{
32e8e429 3318 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 3319 tree op, vectype;
e4af0bc4
IE
3320 enum vect_def_type dt;
3321
3322 /* For comparison and COND_EXPR type is chosen depending
c73e7656 3323 on the non-constant other comparison operand. */
e4af0bc4
IE
3324 if (TREE_CODE_CLASS (code) == tcc_comparison)
3325 {
32e8e429 3326 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
c73e7656 3327 op = gimple_assign_rhs1 (stmt);
e4af0bc4 3328
894dd753 3329 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3330 gcc_unreachable ();
3331
3332 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3333 }
3334
3335 if (code == COND_EXPR)
3336 {
32e8e429 3337 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3338 tree cond = gimple_assign_rhs1 (stmt);
3339
3340 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1 3341 op = cond;
e4af0bc4
IE
3342 else
3343 op = TREE_OPERAND (cond, 0);
3344
894dd753 3345 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3346 gcc_unreachable ();
3347
3348 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3349 }
3350
3351 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3352}
3353
018b2744
RS
3354/* Build a variable-length vector in which the elements in ELTS are repeated
3355 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3356 RESULTS and add any new instructions to SEQ.
3357
3358 The approach we use is:
3359
3360 (1) Find a vector mode VM with integer elements of mode IM.
3361
3362 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3363 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3364 from small vectors to IM.
3365
3366 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3367
3368 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3369 correct byte contents.
3370
3371 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3372
3373 We try to find the largest IM for which this sequence works, in order
3374 to cut down on the number of interleaves. */
3375
f1739b48 3376void
43fdde57 3377duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
cdbe6e9b
RS
3378 vec<tree> elts, unsigned int nresults,
3379 vec<tree> &results)
018b2744
RS
3380{
3381 unsigned int nelts = elts.length ();
3382 tree element_type = TREE_TYPE (vector_type);
3383
3384 /* (1) Find a vector mode VM with integer elements of mode IM. */
3385 unsigned int nvectors = 1;
3386 tree new_vector_type;
3387 tree permutes[2];
43fdde57 3388 if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
018b2744
RS
3389 &nvectors, &new_vector_type,
3390 permutes))
3391 gcc_unreachable ();
3392
3393 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3394 unsigned int partial_nelts = nelts / nvectors;
3395 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3396
3397 tree_vector_builder partial_elts;
3398 auto_vec<tree, 32> pieces (nvectors * 2);
3399 pieces.quick_grow (nvectors * 2);
3400 for (unsigned int i = 0; i < nvectors; ++i)
3401 {
3402 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3403 ELTS' has mode IM. */
3404 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3405 for (unsigned int j = 0; j < partial_nelts; ++j)
3406 partial_elts.quick_push (elts[i * partial_nelts + j]);
3407 tree t = gimple_build_vector (seq, &partial_elts);
3408 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3409 TREE_TYPE (new_vector_type), t);
3410
3411 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3412 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3413 }
3414
3415 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3416 correct byte contents.
3417
3418 We need to repeat the following operation log2(nvectors) times:
3419
3420 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3421 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3422
3423 However, if each input repeats every N elements and the VF is
3424 a multiple of N * 2, the HI result is the same as the LO. */
3425 unsigned int in_start = 0;
3426 unsigned int out_start = nvectors;
3427 unsigned int hi_start = nvectors / 2;
3428 /* A bound on the number of outputs needed to produce NRESULTS results
3429 in the final iteration. */
3430 unsigned int noutputs_bound = nvectors * nresults;
3431 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3432 {
3433 noutputs_bound /= 2;
3434 unsigned int limit = MIN (noutputs_bound, nvectors);
3435 for (unsigned int i = 0; i < limit; ++i)
3436 {
3437 if ((i & 1) != 0
3438 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3439 2 * in_repeat))
3440 {
3441 pieces[out_start + i] = pieces[out_start + i - 1];
3442 continue;
3443 }
3444
3445 tree output = make_ssa_name (new_vector_type);
3446 tree input1 = pieces[in_start + (i / 2)];
3447 tree input2 = pieces[in_start + (i / 2) + hi_start];
3448 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3449 input1, input2,
3450 permutes[i & 1]);
3451 gimple_seq_add_stmt (seq, stmt);
3452 pieces[out_start + i] = output;
3453 }
3454 std::swap (in_start, out_start);
3455 }
3456
3457 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3458 results.reserve (nresults);
3459 for (unsigned int i = 0; i < nresults; ++i)
3460 if (i < nvectors)
3461 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3462 pieces[in_start + i]));
3463 else
3464 results.quick_push (results[i - nvectors]);
3465}
3466
e4af0bc4 3467
b8698a0f
L
3468/* For constant and loop invariant defs of SLP_NODE this function returns
3469 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
30c0d1e3
RB
3470 OP_NODE determines the node for the operand containing the scalar
3471 operands. */
ebfd146a
IR
3472
3473static void
30c0d1e3
RB
3474vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
3475 vec<tree> *vec_oprnds)
ebfd146a 3476{
30c0d1e3 3477 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
7ed54790 3478 vec_info *vinfo = stmt_vinfo->vinfo;
018b2744 3479 unsigned HOST_WIDE_INT nunits;
ebfd146a 3480 tree vec_cst;
d2a12ae7 3481 unsigned j, number_of_places_left_in_vector;
ebfd146a 3482 tree vector_type;
9dc3f7de 3483 tree vop;
30c0d1e3 3484 int group_size = op_node->ops.length ();
ebfd146a 3485 unsigned int vec_num, i;
d2a12ae7 3486 unsigned number_of_copies = 1;
30c0d1e3 3487 bool constant_p;
b5aeb3bb 3488 tree neutral_op = NULL;
13396b6e 3489 gimple_seq ctor_seq = NULL;
018b2744 3490 auto_vec<tree, 16> permute_results;
b5aeb3bb 3491
30c0d1e3
RB
3492 /* ??? SLP analysis should compute the vector type for the
3493 constant / invariant and store it in the SLP node. */
3494 tree op = op_node->ops[0];
42fd8198 3495 /* Check if vector type is a boolean vector. */
30c0d1e3 3496 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2568d8a1 3497 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
c73e7656 3498 && vect_mask_constant_operand_p (stmt_vinfo))
42fd8198 3499 vector_type
30c0d1e3 3500 = build_same_sized_truth_vector_type (stmt_vectype);
42fd8198 3501 else
7ed54790 3502 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
afbe6325 3503
318bd8c6
RB
3504 /* ??? For lane-reducing ops we should also have the required number
3505 of vector stmts initialized rather than second-guessing here. */
3506 unsigned int number_of_vectors;
3507 if (is_gimple_assign (stmt_vinfo->stmt)
3508 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3509 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3510 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3511 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3512 else
3513 number_of_vectors
3514 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3515 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3516 vector_type);
30c0d1e3
RB
3517 vec_oprnds->create (number_of_vectors);
3518 auto_vec<tree> voprnds (number_of_vectors);
ebfd146a 3519
ebfd146a 3520 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3521 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3522
3523 For example, we have two scalar operands, s1 and s2 (e.g., group of
3524 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3525 of this type can be packed in a vector). The output vector will contain
3526 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3527 will be 2).
3528
b8698a0f 3529 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3530 containing the operands.
3531
3532 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3533 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3534 {s5, s6, s7, s8}. */
b8698a0f 3535
018b2744
RS
3536 /* When using duplicate_and_interleave, we just need one element for
3537 each scalar statement. */
3538 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3539 nunits = group_size;
3540
14a61437 3541 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3542
3543 number_of_places_left_in_vector = nunits;
62cf7335 3544 constant_p = true;
5ebaa477 3545 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3546 elts.quick_grow (nunits);
90dd6e3d 3547 bool place_after_defs = false;
ebfd146a
IR
3548 for (j = 0; j < number_of_copies; j++)
3549 {
30c0d1e3 3550 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
ebfd146a 3551 {
ebfd146a 3552 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3553 number_of_places_left_in_vector--;
90dd6e3d 3554 tree orig_op = op;
13396b6e 3555 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3556 {
793d9a16 3557 if (CONSTANT_CLASS_P (op))
13396b6e 3558 {
42fd8198
IE
3559 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3560 {
3561 /* Can't use VIEW_CONVERT_EXPR for booleans because
3562 of possibly different sizes of scalar value and
3563 vector element. */
3564 if (integer_zerop (op))
3565 op = build_int_cst (TREE_TYPE (vector_type), 0);
3566 else if (integer_onep (op))
158beb4a 3567 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3568 else
3569 gcc_unreachable ();
3570 }
3571 else
3572 op = fold_unary (VIEW_CONVERT_EXPR,
3573 TREE_TYPE (vector_type), op);
13396b6e
JJ
3574 gcc_assert (op && CONSTANT_CLASS_P (op));
3575 }
3576 else
3577 {
b731b390 3578 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3579 gimple *init_stmt;
262a363f
JJ
3580 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3581 {
158beb4a
JJ
3582 tree true_val
3583 = build_all_ones_cst (TREE_TYPE (vector_type));
3584 tree false_val
3585 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3586 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3587 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3588 op, true_val,
3589 false_val);
262a363f 3590 }
262a363f
JJ
3591 else
3592 {
3593 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3594 op);
3595 init_stmt
3596 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3597 op);
3598 }
13396b6e
JJ
3599 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3600 op = new_temp;
3601 }
50eeef09 3602 }
d2a12ae7 3603 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3604 if (!CONSTANT_CLASS_P (op))
3605 constant_p = false;
90dd6e3d
RB
3606 if (TREE_CODE (orig_op) == SSA_NAME
3607 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3608 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3609 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3610 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3611 place_after_defs = true;
ebfd146a
IR
3612
3613 if (number_of_places_left_in_vector == 0)
3614 {
018b2744
RS
3615 if (constant_p
3616 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3617 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3618 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3619 else
d2a12ae7 3620 {
30c0d1e3 3621 if (permute_results.is_empty ())
cdbe6e9b
RS
3622 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3623 elts, number_of_vectors,
018b2744
RS
3624 permute_results);
3625 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3626 }
90dd6e3d
RB
3627 tree init;
3628 gimple_stmt_iterator gsi;
3629 if (place_after_defs)
3630 {
95c68311
RS
3631 stmt_vec_info last_stmt_info
3632 = vect_find_last_scalar_stmt_in_slp (slp_node);
3633 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3634 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3635 &gsi);
90dd6e3d
RB
3636 }
3637 else
b9787581
RS
3638 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3639 NULL);
13396b6e
JJ
3640 if (ctor_seq != NULL)
3641 {
90dd6e3d 3642 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3643 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3644 ctor_seq = NULL;
3645 }
90dd6e3d
RB
3646 voprnds.quick_push (init);
3647 place_after_defs = false;
62cf7335
RB
3648 number_of_places_left_in_vector = nunits;
3649 constant_p = true;
5ebaa477
RS
3650 elts.new_vector (vector_type, nunits, 1);
3651 elts.quick_grow (nunits);
ebfd146a
IR
3652 }
3653 }
3654 }
3655
b8698a0f 3656 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3657 them. */
9771b263 3658 vec_num = voprnds.length ();
d2a12ae7 3659 for (j = vec_num; j != 0; j--)
ebfd146a 3660 {
9771b263
DN
3661 vop = voprnds[j - 1];
3662 vec_oprnds->quick_push (vop);
ebfd146a
IR
3663 }
3664
ebfd146a 3665 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3666 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3667 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3668 to replicate the vectors. */
9771b263 3669 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3670 {
b5aeb3bb
IR
3671 tree neutral_vec = NULL;
3672
3673 if (neutral_op)
3674 {
3675 if (!neutral_vec)
b9acc9f1 3676 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3677
9771b263 3678 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3679 }
3680 else
3681 {
9771b263
DN
3682 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3683 vec_oprnds->quick_push (vop);
b5aeb3bb 3684 }
ebfd146a
IR
3685 }
3686}
3687
3688
3689/* Get vectorized definitions from SLP_NODE that contains corresponding
3690 vectorized def-stmts. */
3691
3692static void
9771b263 3693vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a 3694{
16edaeb8 3695 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3696 unsigned int i;
3697
9771b263 3698 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3699
16edaeb8 3700 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
30c0d1e3 3701 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
ebfd146a
IR
3702}
3703
3704
30c0d1e3 3705/* Get N vectorized definitions for SLP_NODE.
b8698a0f 3706 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3707 call vect_get_constant_vectors() to create vector stmts.
3708 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3709 must be stored in the corresponding child of SLP_NODE, and we call
3710 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3711
ebfd146a 3712void
30c0d1e3 3713vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
ebfd146a 3714{
30c0d1e3
RB
3715 if (n == -1U)
3716 n = SLP_TREE_CHILDREN (slp_node).length ();
ebfd146a 3717
30c0d1e3 3718 for (unsigned i = 0; i < n; ++i)
ebfd146a 3719 {
30c0d1e3 3720 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
b5aeb3bb 3721
30c0d1e3 3722 vec<tree> vec_defs = vNULL;
ebfd146a 3723
30c0d1e3
RB
3724 /* For each operand we check if it has vectorized definitions in a child
3725 node or we need to create them (for invariants and constants). */
3726 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3727 {
3728 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3729 vect_get_slp_vect_defs (child, &vec_defs);
3730 }
d092494c 3731 else
30c0d1e3 3732 vect_get_constant_vectors (child, slp_node, &vec_defs);
ebfd146a 3733
37b5ec8f 3734 vec_oprnds->quick_push (vec_defs);
d092494c 3735 }
ebfd146a
IR
3736}
3737
ebfd146a
IR
3738/* Generate vector permute statements from a list of loads in DR_CHAIN.
3739 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3740 permute statements for the SLP node NODE of the SLP instance
3741 SLP_NODE_INSTANCE. */
3742
ebfd146a 3743bool
01d8bf07 3744vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3745 gimple_stmt_iterator *gsi, poly_uint64 vf,
3746 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3747 unsigned *n_perms)
ebfd146a 3748{
b9787581 3749 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3750 vec_info *vinfo = stmt_info->vinfo;
928686b1 3751 int vec_index = 0;
2635892a 3752 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ab7e60ce 3753 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3754 unsigned int mask_element;
ef4bddc2 3755 machine_mode mode;
ebfd146a 3756
91ff1504
RB
3757 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3758 return false;
3759
bffb8014 3760 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3761
22e4dee7 3762 mode = TYPE_MODE (vectype);
ab7e60ce 3763 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3764
61fdfd8c
RB
3765 /* Initialize the vect stmts of NODE to properly insert the generated
3766 stmts later. */
3767 if (! analyze_only)
3768 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3769 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3770 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3771
b8698a0f
L
3772 /* Generate permutation masks for every NODE. Number of masks for each NODE
3773 is equal to GROUP_SIZE.
3774 E.g., we have a group of three nodes with three loads from the same
3775 location in each node, and the vector size is 4. I.e., we have a
3776 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3777 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3778 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3779 ...
3780
2635892a 3781 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3782 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3783 operation, and the mask element values can't be outside that range.
3784 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3785 For the first two permutations we need the first and the second input
ebfd146a 3786 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3787 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3788 {c2,a3,b3,c3}. */
3789
2ce27200 3790 int vect_stmts_counter = 0;
928686b1 3791 unsigned int index = 0;
2ce27200
RB
3792 int first_vec_index = -1;
3793 int second_vec_index = -1;
be377c80 3794 bool noop_p = true;
29afecdf 3795 *n_perms = 0;
ebfd146a 3796
ab7e60ce
RS
3797 vec_perm_builder mask;
3798 unsigned int nelts_to_build;
3799 unsigned int nvectors_per_build;
3800 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3801 && multiple_p (nunits, group_size));
3802 if (repeating_p)
2ce27200 3803 {
ab7e60ce
RS
3804 /* A single vector contains a whole number of copies of the node, so:
3805 (a) all permutes can use the same mask; and
3806 (b) the permutes only need a single vector input. */
3807 mask.new_vector (nunits, group_size, 3);
3808 nelts_to_build = mask.encoded_nelts ();
3809 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3810 }
3811 else
3812 {
3813 /* We need to construct a separate mask for each vector statement. */
3814 unsigned HOST_WIDE_INT const_nunits, const_vf;
3815 if (!nunits.is_constant (&const_nunits)
3816 || !vf.is_constant (&const_vf))
3817 return false;
3818 mask.new_vector (const_nunits, const_nunits, 1);
3819 nelts_to_build = const_vf * group_size;
3820 nvectors_per_build = 1;
3821 }
3822
3823 unsigned int count = mask.encoded_nelts ();
3824 mask.quick_grow (count);
3825 vec_perm_indices indices;
3826
3827 for (unsigned int j = 0; j < nelts_to_build; j++)
3828 {
3829 unsigned int iter_num = j / group_size;
3830 unsigned int stmt_num = j % group_size;
3831 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3832 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3833 if (repeating_p)
2ce27200 3834 {
ab7e60ce
RS
3835 first_vec_index = 0;
3836 mask_element = i;
3837 }
3838 else
3839 {
3840 /* Enforced before the loop when !repeating_p. */
3841 unsigned int const_nunits = nunits.to_constant ();
3842 vec_index = i / const_nunits;
3843 mask_element = i % const_nunits;
2ce27200
RB
3844 if (vec_index == first_vec_index
3845 || first_vec_index == -1)
3846 {
3847 first_vec_index = vec_index;
3848 }
3849 else if (vec_index == second_vec_index
3850 || second_vec_index == -1)
3851 {
3852 second_vec_index = vec_index;
ab7e60ce 3853 mask_element += const_nunits;
2ce27200
RB
3854 }
3855 else
3856 {
3857 if (dump_enabled_p ())
3c2a8ed0
DM
3858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3859 "permutation requires at "
3860 "least three vectors %G",
3861 stmt_info->stmt);
31bee964 3862 gcc_assert (analyze_only);
2ce27200
RB
3863 return false;
3864 }
ebfd146a 3865
ab7e60ce
RS
3866 gcc_assert (mask_element < 2 * const_nunits);
3867 }
3868
3869 if (mask_element != index)
3870 noop_p = false;
3871 mask[index++] = mask_element;
2ce27200 3872
ab7e60ce
RS
3873 if (index == count && !noop_p)
3874 {
3875 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3876 if (!can_vec_perm_const_p (mode, indices))
2ce27200 3877 {
ab7e60ce 3878 if (dump_enabled_p ())
2ce27200 3879 {
ab7e60ce
RS
3880 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3881 vect_location,
3882 "unsupported vect permute { ");
3883 for (i = 0; i < count; ++i)
22e4dee7 3884 {
ab7e60ce
RS
3885 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3886 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 3887 }
ab7e60ce 3888 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 3889 }
ab7e60ce
RS
3890 gcc_assert (analyze_only);
3891 return false;
e3342de4 3892 }
29afecdf 3893
ab7e60ce
RS
3894 ++*n_perms;
3895 }
3896
3897 if (index == count)
3898 {
3899 if (!analyze_only)
e3342de4 3900 {
ab7e60ce 3901 tree mask_vec = NULL_TREE;
be377c80 3902
ab7e60ce
RS
3903 if (! noop_p)
3904 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 3905
ab7e60ce
RS
3906 if (second_vec_index == -1)
3907 second_vec_index = first_vec_index;
61fdfd8c 3908
ab7e60ce
RS
3909 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3910 {
61fdfd8c 3911 /* Generate the permute statement if necessary. */
ab7e60ce
RS
3912 tree first_vec = dr_chain[first_vec_index + ri];
3913 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 3914 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3915 if (! noop_p)
3916 {
b9787581 3917 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3918 tree perm_dest
3919 = vect_create_destination_var (gimple_assign_lhs (stmt),
3920 vectype);
3921 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3922 gassign *perm_stmt
3923 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3924 first_vec, second_vec,
3925 mask_vec);
3926 perm_stmt_info
b9787581
RS
3927 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3928 gsi);
61fdfd8c
RB
3929 }
3930 else
3931 /* If mask was NULL_TREE generate the requested
3932 identity transform. */
16edaeb8 3933 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3934
3935 /* Store the vector statement in NODE. */
16edaeb8
RS
3936 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3937 = perm_stmt_info;
2ce27200 3938 }
2ce27200 3939 }
ab7e60ce
RS
3940
3941 index = 0;
3942 first_vec_index = -1;
3943 second_vec_index = -1;
3944 noop_p = true;
2ce27200 3945 }
b8698a0f 3946 }
ebfd146a 3947
ebfd146a
IR
3948 return true;
3949}
3950
ebfd146a
IR
3951/* Vectorize SLP instance tree in postorder. */
3952
8fe1bd30 3953static void
f7300fff
RB
3954vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3955 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3956{
ebfd146a
IR
3957 gimple_stmt_iterator si;
3958 stmt_vec_info stmt_info;
8b7e9dba 3959 unsigned int group_size;
ebfd146a 3960 tree vectype;
603cca93 3961 int i, j;
d755c7ef 3962 slp_tree child;
ebfd146a 3963
603cca93 3964 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 3965 return;
ebfd146a 3966
a1f072e2
RB
3967 /* See if we have already vectorized the node in the graph of the
3968 SLP instance. */
3969 if (SLP_TREE_VEC_STMTS (node).exists ())
3970 return;
3971
f7300fff 3972 /* See if we have already vectorized the same set of stmts and reuse their
a1f072e2 3973 vectorized stmts across instances. */
dd172744 3974 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3975 {
dd172744 3976 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
8fe1bd30 3977 return;
f7300fff
RB
3978 }
3979
dd172744 3980 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3981 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3982 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3983
603cca93
RB
3984 /* Push SLP node def-type to stmts. */
3985 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3986 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3987 {
3988 stmt_vec_info child_stmt_info;
3989 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3990 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3991 }
603cca93 3992
b9787581 3993 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3994
3995 /* VECTYPE is the type of the destination. */
b690cc0f 3996 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3997 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3998 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3999
68435eb2 4000 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
a1f072e2 4001 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 4002
73fbfcad 4003 if (dump_enabled_p ())
3c2a8ed0
DM
4004 dump_printf_loc (MSG_NOTE, vect_location,
4005 "------>vectorizing SLP node starting from: %G",
4006 stmt_info->stmt);
ebfd146a 4007
2e8ab70c
RB
4008 /* Vectorized stmts go before the last scalar stmt which is where
4009 all uses are ready. */
95c68311
RS
4010 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4011 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 4012
6876e5bc
RB
4013 /* Handle two-operation SLP nodes by vectorizing the group with
4014 both operations and then performing a merge. */
4015 if (SLP_TREE_TWO_OPERATORS (node))
4016 {
b9787581 4017 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 4018 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 4019 enum tree_code ocode = ERROR_MARK;
b9787581 4020 stmt_vec_info ostmt_info;
e3342de4 4021 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
4022 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4023 {
4024 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4025 if (gimple_assign_rhs_code (ostmt) != code0)
4026 {
4027 mask.quick_push (1);
4028 ocode = gimple_assign_rhs_code (ostmt);
4029 }
4030 else
4031 mask.quick_push (0);
4032 }
567a3691 4033 if (ocode != ERROR_MARK)
6876e5bc 4034 {
16edaeb8
RS
4035 vec<stmt_vec_info> v0;
4036 vec<stmt_vec_info> v1;
6876e5bc
RB
4037 unsigned j;
4038 tree tmask = NULL_TREE;
b0b45e58 4039 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4040 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4041 SLP_TREE_VEC_STMTS (node).truncate (0);
4042 gimple_assign_set_rhs_code (stmt, ocode);
b0b45e58 4043 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
4044 gimple_assign_set_rhs_code (stmt, code0);
4045 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4046 SLP_TREE_VEC_STMTS (node).truncate (0);
4047 tree meltype = build_nonstandard_integer_type
b397965c 4048 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
4049 tree mvectype = get_same_sized_vectype (meltype, vectype);
4050 unsigned k = 0, l;
4051 for (j = 0; j < v0.length (); ++j)
4052 {
dad55d70
RS
4053 /* Enforced by vect_build_slp_tree, which rejects variable-length
4054 vectors for SLP_TREE_TWO_OPERATORS. */
4055 unsigned int const_nunits = nunits.to_constant ();
4056 tree_vector_builder melts (mvectype, const_nunits, 1);
4057 for (l = 0; l < const_nunits; ++l)
6876e5bc 4058 {
1ece8d4c 4059 if (k >= group_size)
6876e5bc 4060 k = 0;
dad55d70
RS
4061 tree t = build_int_cst (meltype,
4062 mask[k++] * const_nunits + l);
794e3180 4063 melts.quick_push (t);
6876e5bc 4064 }
5ebaa477 4065 tmask = melts.build ();
6876e5bc
RB
4066
4067 /* ??? Not all targets support a VEC_PERM_EXPR with a
4068 constant mask that would translate to a vec_merge RTX
4069 (with their vec_perm_const_ok). We can either not
4070 vectorize in that case or let veclower do its job.
4071 Unfortunately that isn't too great and at least for
4072 plus/minus we'd eventually like to match targets
4073 vector addsub instructions. */
355fe088 4074 gimple *vstmt;
6876e5bc
RB
4075 vstmt = gimple_build_assign (make_ssa_name (vectype),
4076 VEC_PERM_EXPR,
16edaeb8
RS
4077 gimple_assign_lhs (v0[j]->stmt),
4078 gimple_assign_lhs (v1[j]->stmt),
4079 tmask);
4080 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 4081 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
4082 }
4083 v0.release ();
4084 v1.release ();
8fe1bd30 4085 return;
6876e5bc
RB
4086 }
4087 }
b0b45e58 4088 vect_transform_stmt (stmt_info, &si, node, instance);
603cca93
RB
4089
4090 /* Restore stmt def-types. */
4091 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4092 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4093 {
4094 stmt_vec_info child_stmt_info;
4095 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4096 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4097 }
ebfd146a
IR
4098}
4099
dd34c087
JJ
4100/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4101 For loop vectorization this is done in vectorizable_call, but for SLP
4102 it needs to be deferred until end of vect_schedule_slp, because multiple
4103 SLP instances may refer to the same scalar stmt. */
4104
4105static void
4bfcf879 4106vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
dd34c087 4107{
b9787581 4108 gimple *new_stmt;
dd34c087
JJ
4109 gimple_stmt_iterator gsi;
4110 int i;
d755c7ef 4111 slp_tree child;
dd34c087
JJ
4112 tree lhs;
4113 stmt_vec_info stmt_info;
4114
603cca93 4115 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4116 return;
4117
4bfcf879
RB
4118 if (visited.add (node))
4119 return;
4120
9771b263 4121 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4bfcf879 4122 vect_remove_slp_scalar_calls (child, visited);
dd34c087 4123
b9787581 4124 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4125 {
b9787581
RS
4126 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4127 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4128 continue;
b9787581 4129 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4130 || !PURE_SLP_STMT (stmt_info))
4131 continue;
4132 lhs = gimple_call_lhs (stmt);
4133 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 4134 gsi = gsi_for_stmt (stmt);
9d97912b 4135 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
4136 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4137 }
4138}
ebfd146a 4139
4bfcf879
RB
4140static void
4141vect_remove_slp_scalar_calls (slp_tree node)
4142{
4143 hash_set<slp_tree> visited;
4144 vect_remove_slp_scalar_calls (node, visited);
4145}
4146
818b3293
JH
4147/* Vectorize the instance root. */
4148
4149void
4150vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4151{
0ec77a6c 4152 gassign *rstmt = NULL;
818b3293
JH
4153
4154 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4155 {
4156 stmt_vec_info child_stmt_info;
4157 int j;
4158
4159 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4160 {
4161 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4162 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4163 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4164 break;
4165 }
4166 }
4167 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4168 {
4169 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4170 stmt_vec_info child_stmt_info;
4171 int j;
4172 vec<constructor_elt, va_gc> *v;
4173 vec_alloc (v, nelts);
4174
4175 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4176 {
4177 CONSTRUCTOR_APPEND_ELT (v,
4178 NULL_TREE,
4179 gimple_get_lhs (child_stmt_info->stmt));
4180 }
4181 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4182 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4183 tree r_constructor = build_constructor (rtype, v);
4184 rstmt = gimple_build_assign (lhs, r_constructor);
4185 }
0ec77a6c
TC
4186
4187 gcc_assert (rstmt);
4188
818b3293
JH
4189 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4190 gsi_replace (&rgsi, rstmt, true);
4191}
4192
ff802fa1
IR
4193/* Generate vector code for all SLP instances in the loop/basic block. */
4194
8fe1bd30 4195void
310213d4 4196vect_schedule_slp (vec_info *vinfo)
ebfd146a 4197{
9771b263 4198 vec<slp_instance> slp_instances;
ebfd146a 4199 slp_instance instance;
8b7e9dba 4200 unsigned int i;
78604de0
RB
4201
4202 scalar_stmts_to_slp_tree_map_t *bst_map
4203 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4204 slp_instances = vinfo->slp_instances;
9771b263 4205 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a 4206 {
818b3293 4207 slp_tree node = SLP_INSTANCE_TREE (instance);
ebfd146a 4208 /* Schedule the tree of INSTANCE. */
818b3293
JH
4209 vect_schedule_slp_instance (node, instance, bst_map);
4210
4211 if (SLP_INSTANCE_ROOT_STMT (instance))
4212 vectorize_slp_instance_root_stmt (node, instance);
4213
73fbfcad 4214 if (dump_enabled_p ())
78c60e3d 4215 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4216 "vectorizing stmts using SLP.\n");
ebfd146a 4217 }
78604de0 4218 delete bst_map;
ebfd146a 4219
9771b263 4220 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4221 {
4222 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4223 stmt_vec_info store_info;
b5aeb3bb 4224 unsigned int j;
b5aeb3bb 4225
c40eced0
RB
4226 /* Remove scalar call stmts. Do not do this for basic-block
4227 vectorization as not all uses may be vectorized.
4228 ??? Why should this be necessary? DCE should be able to
4229 remove the stmts itself.
4230 ??? For BB vectorization we can as well remove scalar
4231 stmts starting from the SLP tree root if they have no
4232 uses. */
310213d4 4233 if (is_a <loop_vec_info> (vinfo))
c40eced0 4234 vect_remove_slp_scalar_calls (root);
dd34c087 4235
b9787581 4236 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4237 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4238 {
b9787581
RS
4239 if (!STMT_VINFO_DATA_REF (store_info))
4240 break;
4241
818b3293
JH
4242 if (SLP_INSTANCE_ROOT_STMT (instance))
4243 continue;
4244
211cd1e2 4245 store_info = vect_orig_stmt (store_info);
b9787581 4246 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 4247 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
4248 }
4249 }
ebfd146a 4250}