]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Disable string merging with alignment > 1 before Solaris 11.4/SPARC
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
9771b263 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 61 vect_free_slp_tree (child, final_p);
b8698a0f 62
6e2dd807
RS
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
68 {
b9787581
RS
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 71 {
b9787581
RS
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
74 }
75 }
78810bd3 76
9771b263
DN
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 80 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
81
82 free (node);
83}
84
85
6e2dd807
RS
86/* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
ebfd146a
IR
89
90void
6e2dd807 91vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 92{
6e2dd807 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 94 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 95 free (instance);
ebfd146a
IR
96}
97
98
d092494c
IR
99/* Create an SLP node for SCALAR_STMTS. */
100
101static slp_tree
b9787581 102vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 103{
d3cfd39e 104 slp_tree node;
b9787581 105 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
106 unsigned int nops;
107
b9787581 108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 109 nops = gimple_call_num_args (stmt);
b9787581 110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
b9787581 116 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 117 nops = 0;
d092494c
IR
118 else
119 return NULL;
120
d3cfd39e 121 node = XNEW (struct _slp_tree);
d092494c 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 123 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 125 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 127 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 129
78810bd3 130 unsigned i;
b9787581
RS
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 133
d092494c
IR
134 return node;
135}
136
137
ddf56386
RB
138/* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141typedef struct _slp_oprnd_info
142{
143 /* Def-stmts for the operands. */
b9787581 144 vec<stmt_vec_info> def_stmts;
ddf56386
RB
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
ddf56386 148 tree first_op_type;
34e82342 149 enum vect_def_type first_dt;
ddf56386
RB
150 bool first_pattern;
151 bool second_pattern;
152} *slp_oprnd_info;
153
154
d092494c
IR
155/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
9771b263 157static vec<slp_oprnd_info>
d092494c
IR
158vect_create_oprnd_info (int nops, int group_size)
159{
160 int i;
161 slp_oprnd_info oprnd_info;
9771b263 162 vec<slp_oprnd_info> oprnds_info;
d092494c 163
9771b263 164 oprnds_info.create (nops);
d092494c
IR
165 for (i = 0; i < nops; i++)
166 {
167 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 168 oprnd_info->def_stmts.create (group_size);
d092494c 169 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 170 oprnd_info->first_op_type = NULL_TREE;
d092494c 171 oprnd_info->first_pattern = false;
effb52da 172 oprnd_info->second_pattern = false;
9771b263 173 oprnds_info.quick_push (oprnd_info);
d092494c
IR
174 }
175
176 return oprnds_info;
177}
178
179
d3cfd39e
JJ
180/* Free operands info. */
181
d092494c 182static void
9771b263 183vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
184{
185 int i;
186 slp_oprnd_info oprnd_info;
187
9771b263 188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 189 {
9771b263 190 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
191 XDELETE (oprnd_info);
192 }
d092494c 193
9771b263 194 oprnds_info.release ();
d092494c
IR
195}
196
197
32e8e429
RS
198/* Find the place of the data-ref in STMT_INFO in the interleaving chain
199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
200 of the chain. */
d755c7ef 201
b210f45f 202int
32e8e429
RS
203vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
204 stmt_vec_info first_stmt_info)
d755c7ef 205{
a1824cfd 206 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
207 int result = 0;
208
a1824cfd 209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
210 return -1;
211
212 do
213 {
a1824cfd 214 if (next_stmt_info == stmt_info)
d755c7ef 215 return result;
a1824cfd
RS
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 219 }
a1824cfd 220 while (next_stmt_info);
d755c7ef
RB
221
222 return -1;
223}
224
018b2744
RS
225/* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
230
f1739b48 231bool
018b2744 232can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
018b2744
RS
236{
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
241 {
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
246 {
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
251 {
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
256 {
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
261 }
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
266 {
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
272 {
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
277 }
278 return true;
279 }
280 }
281 }
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
285 }
286}
d755c7ef 287
d092494c
IR
288/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
4cecd659 290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
4cecd659
BC
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
4cecd659
BC
300static int
301vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 302 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 303 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 304{
b9787581 305 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
d092494c 308 enum vect_def_type dt = vect_uninitialized_def;
d092494c 309 bool pattern = false;
abf9bfbc 310 slp_oprnd_info oprnd_info;
b0b4483e 311 int first_op_idx = 1;
0246112a 312 unsigned int commutative_op = -1U;
b0b4483e 313 bool first_op_cond = false;
effb52da
RB
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
b8698a0f 316
b9787581 317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
318 {
319 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 320 first_op_idx = 3;
0246112a
RS
321 if (gimple_call_internal_p (stmt))
322 {
323 internal_fn ifn = gimple_call_internal_fn (stmt);
324 commutative_op = first_commutative_argument (ifn);
325 }
190c2236 326 }
b9787581 327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 328 {
b0b4483e 329 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 330 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
331 /* Swap can only be done for cond_expr if asked to, otherwise we
332 could result in different comparison code to the first stmt. */
a414c77f
IE
333 if (gimple_assign_rhs_code (stmt) == COND_EXPR
334 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
335 {
336 first_op_cond = true;
b0b4483e
RB
337 number_of_oprnds++;
338 }
339 else
0246112a 340 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 341 }
d092494c 342 else
b0b4483e 343 return -1;
ebfd146a 344
4cecd659
BC
345 bool swapped = (*swap != 0);
346 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
347 for (i = 0; i < number_of_oprnds; i++)
348 {
b0b4483e
RB
349again:
350 if (first_op_cond)
f7e531cf 351 {
4cecd659
BC
352 /* Map indicating how operands of cond_expr should be swapped. */
353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
354 int *map = maps[*swap];
355
356 if (i < 2)
b9787581
RS
357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
358 first_op_idx), map[i]);
b0b4483e 359 else
b9787581 360 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
361 }
362 else
b9787581 363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 364
9771b263 365 oprnd_info = (*oprnds_info)[i];
ebfd146a 366
fef96d8e
RS
367 stmt_vec_info def_stmt_info;
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 369 {
73fbfcad 370 if (dump_enabled_p ())
3c2a8ed0
DM
371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
372 "Build SLP failed: can't analyze def for %T\n",
373 oprnd);
ebfd146a 374
b0b4483e 375 return -1;
ebfd146a
IR
376 }
377
effb52da
RB
378 if (second)
379 oprnd_info->second_pattern = pattern;
380
d092494c 381 if (first)
ebfd146a 382 {
d092494c
IR
383 oprnd_info->first_dt = dt;
384 oprnd_info->first_pattern = pattern;
793d9a16 385 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 386 }
ebfd146a
IR
387 else
388 {
d092494c
IR
389 /* Not first stmt of the group, check that the def-stmt/s match
390 the def-stmt/s of the first stmt. Allow different definition
391 types for reduction chains: the first stmt must be a
392 vect_reduction_def (a phi node), and the rest
393 vect_internal_def. */
018b2744
RS
394 tree type = TREE_TYPE (oprnd);
395 if ((oprnd_info->first_dt != dt
396 && !(oprnd_info->first_dt == vect_reduction_def
397 && dt == vect_internal_def)
398 && !((oprnd_info->first_dt == vect_external_def
399 || oprnd_info->first_dt == vect_constant_def)
400 && (dt == vect_external_def
401 || dt == vect_constant_def)))
402 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 403 {
b0b4483e 404 /* Try swapping operands if we got a mismatch. */
0246112a 405 if (i == commutative_op && !swapped)
b0b4483e
RB
406 {
407 swapped = true;
408 goto again;
409 }
410
abf9bfbc
RB
411 if (dump_enabled_p ())
412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 413 "Build SLP failed: different types\n");
d092494c 414
b0b4483e 415 return 1;
ebfd146a 416 }
018b2744
RS
417 if ((dt == vect_constant_def
418 || dt == vect_external_def)
419 && !current_vector_size.is_constant ()
420 && (TREE_CODE (type) == BOOLEAN_TYPE
421 || !can_duplicate_and_interleave_p (stmts.length (),
422 TYPE_MODE (type))))
a23644f2
RS
423 {
424 if (dump_enabled_p ())
3c2a8ed0
DM
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426 "Build SLP failed: invalid type of def "
427 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
428 return -1;
429 }
018b2744
RS
430 }
431
432 /* Check the types of the definitions. */
433 switch (dt)
434 {
435 case vect_constant_def:
436 case vect_external_def:
ebfd146a 437 break;
b8698a0f 438
c78e3652 439 case vect_reduction_def:
e7baeb39 440 case vect_induction_def:
8644a673 441 case vect_internal_def:
fef96d8e 442 oprnd_info->def_stmts.quick_push (def_stmt_info);
ebfd146a
IR
443 break;
444
445 default:
446 /* FORNOW: Not supported. */
73fbfcad 447 if (dump_enabled_p ())
3c2a8ed0
DM
448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
449 "Build SLP failed: illegal type of def %T\n",
450 oprnd);
ebfd146a 451
b0b4483e 452 return -1;
ebfd146a
IR
453 }
454 }
455
b0b4483e
RB
456 /* Swap operands. */
457 if (swapped)
458 {
78810bd3
RB
459 /* If there are already uses of this stmt in a SLP instance then
460 we've committed to the operand order and can't swap it. */
b9787581 461 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
78810bd3
RB
462 {
463 if (dump_enabled_p ())
3c2a8ed0
DM
464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
465 "Build SLP failed: cannot swap operands of "
466 "shared stmt %G", stmt_info->stmt);
78810bd3
RB
467 return -1;
468 }
469
b0b4483e
RB
470 if (first_op_cond)
471 {
0246112a 472 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e 473 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
474 enum tree_code code = TREE_CODE (cond);
475
476 /* Swap. */
477 if (*swap == 1)
478 {
479 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
480 &TREE_OPERAND (cond, 1));
481 TREE_SET_CODE (cond, swap_tree_comparison (code));
482 }
483 /* Invert. */
484 else
485 {
486 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
487 gimple_assign_rhs3_ptr (stmt));
488 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
489 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
490 gcc_assert (code != ERROR_MARK);
491 TREE_SET_CODE (cond, code);
492 }
b0b4483e
RB
493 }
494 else
0246112a
RS
495 {
496 unsigned int op = commutative_op + first_op_idx;
497 swap_ssa_operands (stmt_info->stmt,
498 gimple_op_ptr (stmt_info->stmt, op),
499 gimple_op_ptr (stmt_info->stmt, op + 1));
500 }
78810bd3 501 if (dump_enabled_p ())
3c2a8ed0
DM
502 dump_printf_loc (MSG_NOTE, vect_location,
503 "swapped operands to match def types in %G",
504 stmt_info->stmt);
b0b4483e
RB
505 }
506
4cecd659 507 *swap = swapped;
b0b4483e 508 return 0;
ebfd146a
IR
509}
510
5249ee4d
RS
511/* Return true if call statements CALL1 and CALL2 are similar enough
512 to be combined into the same SLP group. */
513
514static bool
515compatible_calls_p (gcall *call1, gcall *call2)
516{
517 unsigned int nargs = gimple_call_num_args (call1);
518 if (nargs != gimple_call_num_args (call2))
519 return false;
520
521 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
522 return false;
523
524 if (gimple_call_internal_p (call1))
525 {
526 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
527 TREE_TYPE (gimple_call_lhs (call2))))
528 return false;
529 for (unsigned int i = 0; i < nargs; ++i)
530 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
531 TREE_TYPE (gimple_call_arg (call2, i))))
532 return false;
533 }
534 else
535 {
536 if (!operand_equal_p (gimple_call_fn (call1),
537 gimple_call_fn (call2), 0))
538 return false;
539
540 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
541 return false;
542 }
543 return true;
544}
545
b161f2c9 546/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 547 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 548 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
549 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
550 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
551 vect_build_slp_tree. */
b161f2c9
RS
552
553static bool
d7609678 554vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
4b6068ea 555 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
556{
557 if (!vectype)
558 {
559 if (dump_enabled_p ())
3c2a8ed0
DM
560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
561 "Build SLP failed: unsupported data-type in %G\n",
562 stmt_info->stmt);
b161f2c9
RS
563 /* Fatal mismatch. */
564 return false;
565 }
566
567 /* If populating the vector type requires unrolling then fail
568 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
570 unsigned HOST_WIDE_INT const_nunits;
d7609678 571 if (STMT_VINFO_BB_VINFO (stmt_info)
4b6068ea
RS
572 && (!nunits.is_constant (&const_nunits)
573 || const_nunits > group_size))
b161f2c9
RS
574 {
575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
576 "Build SLP failed: unrolling required "
577 "in basic block SLP\n");
578 /* Fatal mismatch. */
579 return false;
580 }
581
582 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 583 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
584 return true;
585}
ebfd146a 586
1f3cb663
RS
587/* STMTS is a group of GROUP_SIZE SLP statements in which some
588 statements do the same operation as the first statement and in which
589 the others do ALT_STMT_CODE. Return true if we can take one vector
590 of the first operation and one vector of the second and permute them
591 to get the required result. VECTYPE is the type of the vector that
592 would be permuted. */
593
594static bool
b9787581
RS
595vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
596 unsigned int group_size, tree vectype,
597 tree_code alt_stmt_code)
1f3cb663
RS
598{
599 unsigned HOST_WIDE_INT count;
600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
601 return false;
602
603 vec_perm_builder sel (count, count, 1);
604 for (unsigned int i = 0; i < count; ++i)
605 {
606 unsigned int elt = i;
b9787581
RS
607 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
608 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
609 elt += count;
610 sel.quick_push (elt);
611 }
612 vec_perm_indices indices (sel, 2, count);
613 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
614}
615
6983e6b5
RB
616/* Verify if the scalar stmts STMTS are isomorphic, require data
617 permutation or are of unsupported types of operation. Return
618 true if they are, otherwise return false and indicate in *MATCHES
619 which stmts are not isomorphic to the first one. If MATCHES[0]
620 is false then this indicates the comparison could not be
4cecd659
BC
621 carried out or the stmts will never be vectorized by SLP.
622
623 Note COND_EXPR is possibly ismorphic to another one after swapping its
624 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
625 the first stmt by swapping the two operands of comparison; set SWAP[i]
626 to 2 if stmt I is isormorphic to the first stmt by inverting the code
627 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
628 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
629
630static bool
d7609678 631vect_build_slp_tree_1 (unsigned char *swap,
b9787581 632 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
633 poly_uint64 *max_nunits, bool *matches,
634 bool *two_operators)
ebfd146a 635{
ebfd146a 636 unsigned int i;
b9787581 637 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
638 enum tree_code first_stmt_code = ERROR_MARK;
639 enum tree_code alt_stmt_code = ERROR_MARK;
640 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 641 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 642 tree lhs;
6983e6b5 643 bool need_same_oprnds = false;
1f3cb663 644 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
645 optab optab;
646 int icode;
ef4bddc2
RS
647 machine_mode optab_op2_mode;
648 machine_mode vec_mode;
bffb8014 649 stmt_vec_info first_load = NULL, prev_first_load = NULL;
d092494c 650
ebfd146a 651 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
652 stmt_vec_info stmt_info;
653 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 654 {
b9787581 655 gimple *stmt = stmt_info->stmt;
4cecd659 656 swap[i] = 0;
6983e6b5
RB
657 matches[i] = false;
658
73fbfcad 659 if (dump_enabled_p ())
3c2a8ed0 660 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 661
4b5caab7 662 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 663 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 664 {
73fbfcad 665 if (dump_enabled_p ())
3c2a8ed0
DM
666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
667 "Build SLP failed: unvectorizable statement %G",
668 stmt);
6983e6b5
RB
669 /* Fatal mismatch. */
670 matches[0] = false;
4b5caab7
IR
671 return false;
672 }
673
ebfd146a
IR
674 lhs = gimple_get_lhs (stmt);
675 if (lhs == NULL_TREE)
676 {
73fbfcad 677 if (dump_enabled_p ())
3c2a8ed0
DM
678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
679 "Build SLP failed: not GIMPLE_ASSIGN nor "
680 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
681 /* Fatal mismatch. */
682 matches[0] = false;
ebfd146a
IR
683 return false;
684 }
685
1f3cb663
RS
686 tree nunits_vectype;
687 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
688 &nunits_vectype)
689 || (nunits_vectype
d7609678 690 && !vect_record_max_nunits (stmt_info, group_size,
1f3cb663 691 nunits_vectype, max_nunits)))
b161f2c9 692 {
6983e6b5
RB
693 /* Fatal mismatch. */
694 matches[0] = false;
1f3cb663
RS
695 return false;
696 }
697
698 gcc_assert (vectype);
b8698a0f 699
538dd0b7 700 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
701 {
702 rhs_code = CALL_EXPR;
5249ee4d
RS
703 if ((gimple_call_internal_p (call_stmt)
704 && (!vectorizable_internal_fn_p
705 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
706 || gimple_call_tail_p (call_stmt)
707 || gimple_call_noreturn_p (call_stmt)
708 || !gimple_call_nothrow_p (call_stmt)
709 || gimple_call_chain (call_stmt))
190c2236 710 {
73fbfcad 711 if (dump_enabled_p ())
3c2a8ed0
DM
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
713 "Build SLP failed: unsupported call type %G",
714 call_stmt);
6983e6b5
RB
715 /* Fatal mismatch. */
716 matches[0] = false;
190c2236
JJ
717 return false;
718 }
719 }
ebfd146a
IR
720 else
721 rhs_code = gimple_assign_rhs_code (stmt);
722
723 /* Check the operation. */
724 if (i == 0)
725 {
726 first_stmt_code = rhs_code;
727
b8698a0f 728 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
729 vector shift with scalar shift operand. */
730 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
731 || rhs_code == LROTATE_EXPR
732 || rhs_code == RROTATE_EXPR)
733 {
1f3cb663
RS
734 if (vectype == boolean_type_node)
735 {
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "Build SLP failed: shift of a"
739 " boolean.\n");
740 /* Fatal mismatch. */
741 matches[0] = false;
742 return false;
743 }
744
ebfd146a
IR
745 vec_mode = TYPE_MODE (vectype);
746
747 /* First see if we have a vector/vector shift. */
748 optab = optab_for_tree_code (rhs_code, vectype,
749 optab_vector);
750
751 if (!optab
947131ba 752 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
753 {
754 /* No vector/vector shift, try for a vector/scalar shift. */
755 optab = optab_for_tree_code (rhs_code, vectype,
756 optab_scalar);
757
758 if (!optab)
759 {
73fbfcad 760 if (dump_enabled_p ())
78c60e3d 761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 762 "Build SLP failed: no optab.\n");
6983e6b5
RB
763 /* Fatal mismatch. */
764 matches[0] = false;
ebfd146a
IR
765 return false;
766 }
947131ba 767 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
768 if (icode == CODE_FOR_nothing)
769 {
73fbfcad 770 if (dump_enabled_p ())
78c60e3d
SS
771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
772 "Build SLP failed: "
e645e942 773 "op not supported by target.\n");
6983e6b5
RB
774 /* Fatal mismatch. */
775 matches[0] = false;
ebfd146a
IR
776 return false;
777 }
778 optab_op2_mode = insn_data[icode].operand[2].mode;
779 if (!VECTOR_MODE_P (optab_op2_mode))
780 {
781 need_same_oprnds = true;
782 first_op1 = gimple_assign_rhs2 (stmt);
783 }
784 }
785 }
36ba4aae
IR
786 else if (rhs_code == WIDEN_LSHIFT_EXPR)
787 {
788 need_same_oprnds = true;
789 first_op1 = gimple_assign_rhs2 (stmt);
790 }
ebfd146a
IR
791 }
792 else
793 {
6876e5bc
RB
794 if (first_stmt_code != rhs_code
795 && alt_stmt_code == ERROR_MARK)
796 alt_stmt_code = rhs_code;
ebfd146a
IR
797 if (first_stmt_code != rhs_code
798 && (first_stmt_code != IMAGPART_EXPR
799 || rhs_code != REALPART_EXPR)
800 && (first_stmt_code != REALPART_EXPR
69f11a13 801 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
802 /* Handle mismatches in plus/minus by computing both
803 and merging the results. */
804 && !((first_stmt_code == PLUS_EXPR
805 || first_stmt_code == MINUS_EXPR)
806 && (alt_stmt_code == PLUS_EXPR
807 || alt_stmt_code == MINUS_EXPR)
808 && rhs_code == alt_stmt_code)
b9787581 809 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 810 && (first_stmt_code == ARRAY_REF
38000232 811 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
812 || first_stmt_code == INDIRECT_REF
813 || first_stmt_code == COMPONENT_REF
814 || first_stmt_code == MEM_REF)))
ebfd146a 815 {
73fbfcad 816 if (dump_enabled_p ())
ebfd146a 817 {
78c60e3d
SS
818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
819 "Build SLP failed: different operation "
3c2a8ed0 820 "in stmt %G", stmt);
6876e5bc 821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 822 "original stmt %G", first_stmt_info->stmt);
ebfd146a 823 }
6983e6b5
RB
824 /* Mismatch. */
825 continue;
ebfd146a 826 }
b8698a0f
L
827
828 if (need_same_oprnds
ebfd146a
IR
829 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
830 {
73fbfcad 831 if (dump_enabled_p ())
3c2a8ed0
DM
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
833 "Build SLP failed: different shift "
834 "arguments in %G", stmt);
6983e6b5
RB
835 /* Mismatch. */
836 continue;
ebfd146a 837 }
190c2236
JJ
838
839 if (rhs_code == CALL_EXPR)
840 {
b9787581 841 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 842 as_a <gcall *> (stmt)))
190c2236 843 {
73fbfcad 844 if (dump_enabled_p ())
3c2a8ed0
DM
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: different calls in %G",
847 stmt);
6983e6b5
RB
848 /* Mismatch. */
849 continue;
190c2236
JJ
850 }
851 }
ebfd146a
IR
852 }
853
0d0293ac 854 /* Grouped store or load. */
b9787581 855 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
856 {
857 if (REFERENCE_CLASS_P (lhs))
858 {
859 /* Store. */
6983e6b5 860 ;
ebfd146a 861 }
b5aeb3bb
IR
862 else
863 {
864 /* Load. */
b9787581 865 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
866 if (prev_first_load)
867 {
868 /* Check that there are no loads from different interleaving
6983e6b5
RB
869 chains in the same node. */
870 if (prev_first_load != first_load)
78c60e3d 871 {
73fbfcad 872 if (dump_enabled_p ())
3c2a8ed0
DM
873 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
874 vect_location,
875 "Build SLP failed: different "
876 "interleaving chains in one node %G",
877 stmt);
6983e6b5
RB
878 /* Mismatch. */
879 continue;
b5aeb3bb
IR
880 }
881 }
882 else
883 prev_first_load = first_load;
ebfd146a 884 }
0d0293ac 885 } /* Grouped access. */
ebfd146a
IR
886 else
887 {
888 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
889 {
0d0293ac 890 /* Not grouped load. */
73fbfcad 891 if (dump_enabled_p ())
3c2a8ed0
DM
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 894
0d0293ac 895 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
896 /* Fatal mismatch. */
897 matches[0] = false;
ebfd146a
IR
898 return false;
899 }
900
901 /* Not memory operation. */
902 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 903 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 904 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 905 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 906 && rhs_code != CALL_EXPR)
ebfd146a 907 {
73fbfcad 908 if (dump_enabled_p ())
3c2a8ed0
DM
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
910 "Build SLP failed: operation unsupported %G",
911 stmt);
6983e6b5
RB
912 /* Fatal mismatch. */
913 matches[0] = false;
ebfd146a
IR
914 return false;
915 }
916
4cecd659
BC
917 if (rhs_code == COND_EXPR)
918 {
919 tree cond_expr = gimple_assign_rhs1 (stmt);
920 enum tree_code cond_code = TREE_CODE (cond_expr);
921 enum tree_code swap_code = ERROR_MARK;
922 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
923
924 if (i == 0)
925 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
926 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
927 {
928 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
929 swap_code = swap_tree_comparison (cond_code);
930 invert_code = invert_tree_comparison (cond_code, honor_nans);
931 }
932
933 if (first_cond_code == cond_code)
934 ;
935 /* Isomorphic can be achieved by swapping. */
936 else if (first_cond_code == swap_code)
937 swap[i] = 1;
938 /* Isomorphic can be achieved by inverting. */
939 else if (first_cond_code == invert_code)
940 swap[i] = 2;
941 else
942 {
943 if (dump_enabled_p ())
3c2a8ed0
DM
944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945 "Build SLP failed: different"
946 " operation %G", stmt);
6983e6b5
RB
947 /* Mismatch. */
948 continue;
f7e531cf 949 }
4cecd659 950 }
ebfd146a 951 }
6983e6b5
RB
952
953 matches[i] = true;
954 }
955
956 for (i = 0; i < group_size; ++i)
957 if (!matches[i])
958 return false;
959
6876e5bc
RB
960 /* If we allowed a two-operation SLP node verify the target can cope
961 with the permute we are going to use. */
962 if (alt_stmt_code != ERROR_MARK
963 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
964 {
1f3cb663
RS
965 if (vectype == boolean_type_node
966 || !vect_two_operations_perm_ok_p (stmts, group_size,
967 vectype, alt_stmt_code))
6876e5bc
RB
968 {
969 for (i = 0; i < group_size; ++i)
b9787581 970 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
971 {
972 matches[i] = false;
973 if (dump_enabled_p ())
974 {
975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
976 "Build SLP failed: different operation "
3c2a8ed0 977 "in stmt %G", stmts[i]->stmt);
6876e5bc 978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 979 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
980 }
981 }
982 return false;
983 }
984 *two_operators = true;
985 }
986
6983e6b5
RB
987 return true;
988}
989
26d66f28
RB
990/* Traits for the hash_set to record failed SLP builds for a stmt set.
991 Note we never remove apart from at destruction time so we do not
992 need a special value for deleted that differs from empty. */
993struct bst_traits
994{
b9787581
RS
995 typedef vec <stmt_vec_info> value_type;
996 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
997 static inline hashval_t hash (value_type);
998 static inline bool equal (value_type existing, value_type candidate);
999 static inline bool is_empty (value_type x) { return !x.exists (); }
1000 static inline bool is_deleted (value_type x) { return !x.exists (); }
1001 static inline void mark_empty (value_type &x) { x.release (); }
1002 static inline void mark_deleted (value_type &x) { x.release (); }
1003 static inline void remove (value_type &x) { x.release (); }
1004};
1005inline hashval_t
1006bst_traits::hash (value_type x)
1007{
1008 inchash::hash h;
1009 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1010 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1011 return h.end ();
1012}
1013inline bool
1014bst_traits::equal (value_type existing, value_type candidate)
1015{
1016 if (existing.length () != candidate.length ())
1017 return false;
1018 for (unsigned i = 0; i < existing.length (); ++i)
1019 if (existing[i] != candidate[i])
1020 return false;
1021 return true;
1022}
1023
f7300fff
RB
1024typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1025static scalar_stmts_set_t *bst_fail;
26d66f28 1026
68435eb2
RB
1027typedef hash_map <vec <gimple *>, slp_tree,
1028 simple_hashmap_traits <bst_traits, slp_tree> >
1029 scalar_stmts_to_slp_tree_map_t;
1030
26d66f28
RB
1031static slp_tree
1032vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1033 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1034 poly_uint64 *max_nunits,
26d66f28
RB
1035 vec<slp_tree> *loads,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
1037 unsigned max_tree_size);
6983e6b5 1038
e403d17e 1039static slp_tree
310213d4 1040vect_build_slp_tree (vec_info *vinfo,
b9787581 1041 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1042 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1043 bool *matches, unsigned *npermutes, unsigned *tree_size,
1044 unsigned max_tree_size)
26d66f28
RB
1045{
1046 if (bst_fail->contains (stmts))
1047 return NULL;
1048 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1049 loads, matches, npermutes, tree_size,
1050 max_tree_size);
1051 /* When SLP build fails for stmts record this, otherwise SLP build
1052 can be exponential in time when we allow to construct parts from
1053 scalars, see PR81723. */
1054 if (! res)
1055 {
b9787581 1056 vec <stmt_vec_info> x;
26d66f28
RB
1057 x.create (stmts.length ());
1058 x.splice (stmts);
1059 bst_fail->add (x);
1060 }
1061 return res;
1062}
1063
1064/* Recursively build an SLP tree starting from NODE.
1065 Fail (and return a value not equal to zero) if def-stmts are not
1066 isomorphic, require data permutation or are of unsupported types of
1067 operation. Otherwise, return 0.
1068 The value returned is the depth in the SLP tree where a mismatch
1069 was found. */
1070
1071static slp_tree
1072vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1073 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1074 poly_uint64 *max_nunits,
26d66f28
RB
1075 vec<slp_tree> *loads,
1076 bool *matches, unsigned *npermutes, unsigned *tree_size,
1077 unsigned max_tree_size)
6983e6b5 1078{
4b6068ea
RS
1079 unsigned nops, i, this_tree_size = 0;
1080 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1081 slp_tree node;
6983e6b5 1082
6983e6b5
RB
1083 matches[0] = false;
1084
b9787581
RS
1085 stmt_vec_info stmt_info = stmts[0];
1086 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1087 nops = gimple_call_num_args (stmt);
b9787581 1088 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1089 {
1090 nops = gimple_num_ops (stmt) - 1;
1091 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1092 nops++;
ebfd146a 1093 }
b9787581 1094 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1095 nops = 0;
6983e6b5 1096 else
e403d17e 1097 return NULL;
6983e6b5 1098
c78e3652
RB
1099 /* If the SLP node is a PHI (induction or reduction), terminate
1100 the recursion. */
b9787581 1101 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1102 {
b161f2c9
RS
1103 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1104 tree vectype = get_vectype_for_scalar_type (scalar_type);
d7609678 1105 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
b161f2c9
RS
1106 return NULL;
1107
b9787581 1108 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1109 /* Induction from different IVs is not supported. */
719488f8
RB
1110 if (def_type == vect_induction_def)
1111 {
b9787581
RS
1112 stmt_vec_info other_info;
1113 FOR_EACH_VEC_ELT (stmts, i, other_info)
1114 if (stmt_info != other_info)
719488f8
RB
1115 return NULL;
1116 }
1117 else
1118 {
1119 /* Else def types have to match. */
b9787581
RS
1120 stmt_vec_info other_info;
1121 FOR_EACH_VEC_ELT (stmts, i, other_info)
719488f8
RB
1122 {
1123 /* But for reduction chains only check on the first stmt. */
b9787581
RS
1124 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1125 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
719488f8 1126 continue;
b9787581 1127 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
719488f8
RB
1128 return NULL;
1129 }
1130 }
e7baeb39
RB
1131 node = vect_create_new_slp_node (stmts);
1132 return node;
1133 }
1134
1135
6876e5bc 1136 bool two_operators = false;
4cecd659 1137 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
d7609678 1138 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
e403d17e
RB
1139 &this_max_nunits, matches, &two_operators))
1140 return NULL;
ebfd146a 1141
6983e6b5 1142 /* If the SLP node is a load, terminate the recursion. */
b9787581
RS
1143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1144 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1145 {
e403d17e
RB
1146 *max_nunits = this_max_nunits;
1147 node = vect_create_new_slp_node (stmts);
1148 loads->safe_push (node);
1149 return node;
ebfd146a
IR
1150 }
1151
6983e6b5
RB
1152 /* Get at the operands, verifying they are compatible. */
1153 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1154 slp_oprnd_info oprnd_info;
b9787581 1155 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1156 {
4cecd659 1157 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1158 stmts, i, &oprnds_info);
4cecd659
BC
1159 if (res != 0)
1160 matches[(res == -1) ? 0 : i] = false;
1161 if (!matches[0])
1162 break;
6983e6b5 1163 }
b0b4483e
RB
1164 for (i = 0; i < group_size; ++i)
1165 if (!matches[i])
1166 {
1167 vect_free_oprnd_info (oprnds_info);
e403d17e 1168 return NULL;
b0b4483e 1169 }
6983e6b5 1170
e403d17e
RB
1171 auto_vec<slp_tree, 4> children;
1172 auto_vec<slp_tree> this_loads;
1173
b9787581 1174 stmt_info = stmts[0];
6983e6b5 1175
26d66f28
RB
1176 if (tree_size)
1177 max_tree_size -= *tree_size;
1178
b8698a0f 1179 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1180 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1181 {
d092494c 1182 slp_tree child;
e403d17e
RB
1183 unsigned old_nloads = this_loads.length ();
1184 unsigned old_tree_size = this_tree_size;
1185 unsigned int j;
b8698a0f 1186
e7baeb39 1187 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1188 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1189 && oprnd_info->first_dt != vect_induction_def)
d092494c 1190 continue;
ebfd146a 1191
1428105c
RB
1192 if (++this_tree_size > max_tree_size)
1193 {
e403d17e 1194 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1195 vect_free_slp_tree (child, false);
1428105c 1196 vect_free_oprnd_info (oprnds_info);
e403d17e 1197 return NULL;
1428105c
RB
1198 }
1199
e403d17e
RB
1200 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1201 group_size, &this_max_nunits,
1202 &this_loads, matches, npermutes,
1203 &this_tree_size,
1204 max_tree_size)) != NULL)
6983e6b5 1205 {
3fc356dc
RB
1206 /* If we have all children of child built up from scalars then just
1207 throw that away and build it up this node from scalars. */
995b6fe0
RB
1208 if (!SLP_TREE_CHILDREN (child).is_empty ()
1209 /* ??? Rejecting patterns this way doesn't work. We'd have to
1210 do extra work to cancel the pattern so the uses see the
1211 scalar version. */
b9787581 1212 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
3fc356dc 1213 {
3fc356dc
RB
1214 slp_tree grandchild;
1215
1216 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1217 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1218 break;
1219 if (!grandchild)
1220 {
1221 /* Roll back. */
e403d17e
RB
1222 this_loads.truncate (old_nloads);
1223 this_tree_size = old_tree_size;
3fc356dc 1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1225 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1226 SLP_TREE_CHILDREN (child).truncate (0);
1227
1228 dump_printf_loc (MSG_NOTE, vect_location,
1229 "Building parent vector operands from "
1230 "scalars instead\n");
1231 oprnd_info->def_stmts = vNULL;
603cca93 1232 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1233 children.safe_push (child);
3fc356dc
RB
1234 continue;
1235 }
1236 }
1237
6983e6b5 1238 oprnd_info->def_stmts = vNULL;
e403d17e 1239 children.safe_push (child);
6983e6b5
RB
1240 continue;
1241 }
1242
90dd6e3d
RB
1243 /* If the SLP build failed fatally and we analyze a basic-block
1244 simply treat nodes we fail to build as externally defined
1245 (and thus build vectors from the scalar defs).
1246 The cost model will reject outright expensive cases.
1247 ??? This doesn't treat cases where permutation ultimatively
1248 fails (or we don't try permutation below). Ideally we'd
1249 even compute a permutation that will end up with the maximum
1250 SLP tree size... */
310213d4 1251 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1252 && !matches[0]
1253 /* ??? Rejecting patterns this way doesn't work. We'd have to
1254 do extra work to cancel the pattern so the uses see the
1255 scalar version. */
b9787581 1256 && !is_pattern_stmt_p (stmt_info))
90dd6e3d
RB
1257 {
1258 dump_printf_loc (MSG_NOTE, vect_location,
1259 "Building vector operands from scalars\n");
e403d17e 1260 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1261 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1262 children.safe_push (child);
1263 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1264 continue;
1265 }
1266
6983e6b5
RB
1267 /* If the SLP build for operand zero failed and operand zero
1268 and one can be commutated try that for the scalar stmts
1269 that failed the match. */
1270 if (i == 0
1271 /* A first scalar stmt mismatch signals a fatal mismatch. */
1272 && matches[0]
1273 /* ??? For COND_EXPRs we can swap the comparison operands
1274 as well as the arms under some constraints. */
1275 && nops == 2
1276 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1277 && is_gimple_assign (stmt_info->stmt)
6983e6b5
RB
1278 /* Do so only if the number of not successful permutes was nor more
1279 than a cut-ff as re-trying the recursive match on
1280 possibly each level of the tree would expose exponential
1281 behavior. */
1282 && *npermutes < 4)
1283 {
85c5e2f5
RB
1284 /* See whether we can swap the matching or the non-matching
1285 stmt operands. */
1286 bool swap_not_matching = true;
1287 do
1288 {
1289 for (j = 0; j < group_size; ++j)
1290 {
1291 if (matches[j] != !swap_not_matching)
1292 continue;
b9787581 1293 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1294 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1295 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1296 if (!stmt
85c5e2f5
RB
1297 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1298 {
1299 if (!swap_not_matching)
1300 goto fail;
1301 swap_not_matching = false;
1302 break;
1303 }
1304 /* Verify if we can safely swap or if we committed to a
1305 specific operand order already.
1306 ??? Instead of modifying GIMPLE stmts here we could
1307 record whether we want to swap operands in the SLP
1308 node and temporarily do that when processing it
1309 (or wrap operand accessors in a helper). */
1310 else if (swap[j] != 0
b9787581 1311 || STMT_VINFO_NUM_SLP_USES (stmt_info))
85c5e2f5
RB
1312 {
1313 if (!swap_not_matching)
1314 {
1315 if (dump_enabled_p ())
3c2a8ed0
DM
1316 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1317 vect_location,
1318 "Build SLP failed: cannot swap "
1319 "operands of shared stmt %G",
1320 stmts[j]->stmt);
85c5e2f5
RB
1321 goto fail;
1322 }
1323 swap_not_matching = false;
1324 break;
1325 }
1326 }
1327 }
1328 while (j != group_size);
78810bd3 1329
6983e6b5 1330 /* Swap mismatched definition stmts. */
b0b4483e
RB
1331 dump_printf_loc (MSG_NOTE, vect_location,
1332 "Re-trying with swapped operands of stmts ");
e72baed7 1333 for (j = 0; j < group_size; ++j)
85c5e2f5 1334 if (matches[j] == !swap_not_matching)
6983e6b5 1335 {
6b4db501
MM
1336 std::swap (oprnds_info[0]->def_stmts[j],
1337 oprnds_info[1]->def_stmts[j]);
b0b4483e 1338 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1339 }
b0b4483e 1340 dump_printf (MSG_NOTE, "\n");
74574669
RB
1341 /* And try again with scratch 'matches' ... */
1342 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1343 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1344 group_size, &this_max_nunits,
1345 &this_loads, tem, npermutes,
1346 &this_tree_size,
1347 max_tree_size)) != NULL)
6983e6b5 1348 {
60f2b864
RB
1349 /* ... so if successful we can apply the operand swapping
1350 to the GIMPLE IL. This is necessary because for example
1351 vect_get_slp_defs uses operand indexes and thus expects
1352 canonical operand order. This is also necessary even
1353 if we end up building the operand from scalars as
1354 we'll continue to process swapped operand two. */
1355 for (j = 0; j < group_size; ++j)
b9787581 1356 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
f47cda24 1357 for (j = 0; j < group_size; ++j)
b9787581
RS
1358 if (matches[j] == !swap_not_matching)
1359 {
1360 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1361 /* Avoid swapping operands twice. */
1362 if (gimple_plf (stmt, GF_PLF_1))
1363 continue;
1364 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1365 gimple_assign_rhs2_ptr (stmt));
1366 gimple_set_plf (stmt, GF_PLF_1, true);
1367 }
f47cda24
RB
1368 /* Verify we swap all duplicates or none. */
1369 if (flag_checking)
1370 for (j = 0; j < group_size; ++j)
b9787581
RS
1371 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1372 == (matches[j] == !swap_not_matching));
60f2b864 1373
85c69b0b
RB
1374 /* If we have all children of child built up from scalars then
1375 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1376 if (!SLP_TREE_CHILDREN (child).is_empty ()
1377 /* ??? Rejecting patterns this way doesn't work. We'd have
1378 to do extra work to cancel the pattern so the uses see the
1379 scalar version. */
b9787581 1380 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
85c69b0b
RB
1381 {
1382 unsigned int j;
1383 slp_tree grandchild;
1384
1385 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1386 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1387 break;
1388 if (!grandchild)
1389 {
1390 /* Roll back. */
e403d17e
RB
1391 this_loads.truncate (old_nloads);
1392 this_tree_size = old_tree_size;
85c69b0b 1393 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1394 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1395 SLP_TREE_CHILDREN (child).truncate (0);
1396
1397 dump_printf_loc (MSG_NOTE, vect_location,
1398 "Building parent vector operands from "
1399 "scalars instead\n");
1400 oprnd_info->def_stmts = vNULL;
603cca93 1401 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1402 children.safe_push (child);
85c69b0b
RB
1403 continue;
1404 }
1405 }
1406
6983e6b5 1407 oprnd_info->def_stmts = vNULL;
e403d17e 1408 children.safe_push (child);
6983e6b5
RB
1409 continue;
1410 }
1411
1412 ++*npermutes;
1413 }
1414
78810bd3 1415fail:
e403d17e
RB
1416 gcc_assert (child == NULL);
1417 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1418 vect_free_slp_tree (child, false);
6983e6b5 1419 vect_free_oprnd_info (oprnds_info);
e403d17e 1420 return NULL;
ebfd146a
IR
1421 }
1422
e403d17e
RB
1423 vect_free_oprnd_info (oprnds_info);
1424
1428105c
RB
1425 if (tree_size)
1426 *tree_size += this_tree_size;
e403d17e
RB
1427 *max_nunits = this_max_nunits;
1428 loads->safe_splice (this_loads);
1428105c 1429
e403d17e
RB
1430 node = vect_create_new_slp_node (stmts);
1431 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1432 SLP_TREE_CHILDREN (node).splice (children);
1433 return node;
ebfd146a
IR
1434}
1435
78c60e3d 1436/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1437
1438static void
4f5b9c80
DM
1439vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1440 slp_tree node)
ebfd146a
IR
1441{
1442 int i;
b9787581 1443 stmt_vec_info stmt_info;
d755c7ef 1444 slp_tree child;
ebfd146a 1445
603cca93
RB
1446 dump_printf_loc (dump_kind, loc, "node%s\n",
1447 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1448 ? " (external)" : "");
b9787581 1449 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3c2a8ed0 1450 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
9771b263 1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1452 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1453}
1454
1455
b8698a0f
L
1456/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1457 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1458 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1459 stmts in NODE are to be marked. */
1460
1461static void
1462vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1463{
1464 int i;
b9787581 1465 stmt_vec_info stmt_info;
d755c7ef 1466 slp_tree child;
ebfd146a 1467
603cca93 1468 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1469 return;
1470
b9787581 1471 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1472 if (j < 0 || i == j)
b9787581 1473 STMT_SLP_TYPE (stmt_info) = mark;
ebfd146a 1474
9771b263 1475 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1476 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1477}
1478
1479
a70d6342
IR
1480/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1481
1482static void
1483vect_mark_slp_stmts_relevant (slp_tree node)
1484{
1485 int i;
a70d6342 1486 stmt_vec_info stmt_info;
d755c7ef 1487 slp_tree child;
a70d6342 1488
603cca93 1489 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1490 return;
1491
b9787581 1492 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1493 {
b8698a0f 1494 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1495 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1496 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1497 }
1498
9771b263 1499 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1500 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1501}
1502
1503
b5aeb3bb
IR
1504/* Rearrange the statements of NODE according to PERMUTATION. */
1505
1506static void
1507vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1508 vec<unsigned> permutation)
b5aeb3bb 1509{
b9787581
RS
1510 stmt_vec_info stmt_info;
1511 vec<stmt_vec_info> tmp_stmts;
d755c7ef
RB
1512 unsigned int i;
1513 slp_tree child;
b5aeb3bb 1514
9771b263 1515 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1516 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1517
9771b263
DN
1518 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1519 tmp_stmts.create (group_size);
d755c7ef 1520 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1521
b9787581
RS
1522 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1523 tmp_stmts[permutation[i]] = stmt_info;
b5aeb3bb 1524
9771b263 1525 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1526 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1527}
1528
1529
b266b968
RB
1530/* Attempt to reorder stmts in a reduction chain so that we don't
1531 require any load permutation. Return true if that was possible,
1532 otherwise return false. */
1533
1534static bool
1535vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1536{
1537 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1538 unsigned int i, j;
b266b968
RB
1539 unsigned int lidx;
1540 slp_tree node, load;
1541
1542 /* Compare all the permutation sequences to the first one. We know
1543 that at least one load is permuted. */
1544 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1545 if (!node->load_permutation.exists ())
1546 return false;
1547 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1548 {
1549 if (!load->load_permutation.exists ())
1550 return false;
1551 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1552 if (lidx != node->load_permutation[j])
1553 return false;
1554 }
1555
1556 /* Check that the loads in the first sequence are different and there
1557 are no gaps between them. */
7ba9e72d 1558 auto_sbitmap load_index (group_size);
b266b968
RB
1559 bitmap_clear (load_index);
1560 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1561 {
41eefe13 1562 if (lidx >= group_size)
7ba9e72d 1563 return false;
b266b968 1564 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1565 return false;
1566
b266b968
RB
1567 bitmap_set_bit (load_index, lidx);
1568 }
1569 for (i = 0; i < group_size; i++)
1570 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1571 return false;
b266b968
RB
1572
1573 /* This permutation is valid for reduction. Since the order of the
1574 statements in the nodes is not important unless they are memory
1575 accesses, we can rearrange the statements in all the nodes
1576 according to the order of the loads. */
1577 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1578 node->load_permutation);
1579
1580 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1581 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1582 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1583 {
b9787581 1584 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1585 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1586 /* But we have to keep those permutations that are required because
1587 of handling of gaps. */
d9f21f6a 1588 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1589 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1590 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1591 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1592 else
1593 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1594 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1595 }
1596
b266b968
RB
1597 return true;
1598}
1599
01d8bf07
RB
1600/* Check if the required load permutations in the SLP instance
1601 SLP_INSTN are supported. */
ebfd146a
IR
1602
1603static bool
01d8bf07 1604vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1605{
01d8bf07
RB
1606 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1607 unsigned int i, j, k, next;
6983e6b5 1608 slp_tree node;
ebfd146a 1609
73fbfcad 1610 if (dump_enabled_p ())
ebfd146a 1611 {
78c60e3d 1612 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1613 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1614 if (node->load_permutation.exists ())
1615 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1616 dump_printf (MSG_NOTE, "%d ", next);
1617 else
bddc974e
TJ
1618 for (k = 0; k < group_size; ++k)
1619 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1620 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1621 }
1622
b5aeb3bb
IR
1623 /* In case of reduction every load permutation is allowed, since the order
1624 of the reduction statements is not important (as opposed to the case of
0d0293ac 1625 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1626 load nodes are of the same size and have the same permutation (and then
1627 rearrange all the nodes of the SLP instance according to this
1628 permutation). */
1629
1630 /* Check that all the load nodes are of the same size. */
01d8bf07 1631 /* ??? Can't we assert this? */
9771b263 1632 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1633 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1634 return false;
2200fc49 1635
b5aeb3bb 1636 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1637 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1638
b010117a 1639 /* Reduction (there are no data-refs in the root).
b266b968 1640 In reduction chain the order of the loads is not important. */
b9787581
RS
1641 if (!STMT_VINFO_DATA_REF (stmt_info)
1642 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1643 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1644
6aa904c4
IR
1645 /* In basic block vectorization we allow any subchain of an interleaving
1646 chain.
1647 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1648 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1649 {
240a94da
RB
1650 /* Check whether the loads in an instance form a subchain and thus
1651 no permutation is necessary. */
9771b263 1652 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1653 {
9626d143
RB
1654 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1655 continue;
240a94da 1656 bool subchain_p = true;
bffb8014 1657 stmt_vec_info next_load_info = NULL;
b9787581
RS
1658 stmt_vec_info load_info;
1659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1660 {
1661 if (j != 0
bffb8014 1662 && (next_load_info != load_info
b9787581 1663 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1664 {
1665 subchain_p = false;
1666 break;
1667 }
bffb8014 1668 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1669 }
240a94da
RB
1670 if (subchain_p)
1671 SLP_TREE_LOAD_PERMUTATION (node).release ();
1672 else
1673 {
b9787581 1674 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1675 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1676 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1677 unsigned k, maxk = 0;
1678 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1679 if (k > maxk)
1680 maxk = k;
1681 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1682 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1683 tree vectype = STMT_VINFO_VECTYPE (group_info);
1684 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1685 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1686 {
1687 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1688 "BB vectorization with gaps at the end of "
1689 "a load is not supported\n");
1690 return false;
1691 }
1692
240a94da
RB
1693 /* Verify the permutation can be generated. */
1694 vec<tree> tem;
29afecdf 1695 unsigned n_perms;
240a94da 1696 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1697 1, slp_instn, true, &n_perms))
240a94da
RB
1698 {
1699 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1700 vect_location,
1701 "unsupported load permutation\n");
1702 return false;
1703 }
1704 }
6aa904c4 1705 }
01d8bf07 1706 return true;
6aa904c4
IR
1707 }
1708
31bee964
RB
1709 /* For loop vectorization verify we can generate the permutation. Be
1710 conservative about the vectorization factor, there are permutations
1711 that will use three vector inputs only starting from a specific factor
1712 and the vectorization factor is not yet final.
1713 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1714 unsigned n_perms;
d9f21f6a
RS
1715 poly_uint64 test_vf
1716 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1717 LOOP_VINFO_VECT_FACTOR
b9787581 1718 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1719 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1720 if (node->load_permutation.exists ()
31bee964
RB
1721 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1722 slp_instn, true, &n_perms))
01d8bf07 1723 return false;
9b999e8c 1724
01d8bf07 1725 return true;
ebfd146a
IR
1726}
1727
1728
e4a707c4 1729/* Find the last store in SLP INSTANCE. */
ff802fa1 1730
95c68311 1731stmt_vec_info
2e8ab70c 1732vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1733{
95c68311 1734 stmt_vec_info last = NULL;
b9787581 1735 stmt_vec_info stmt_vinfo;
e4a707c4 1736
b9787581 1737 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1738 {
211cd1e2 1739 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 1740 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1741 }
e4a707c4 1742
2e8ab70c 1743 return last;
e4a707c4
IR
1744}
1745
82570274
RS
1746/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1747 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1748 (also containing the first GROUP1_SIZE stmts, since stores are
1749 consecutive), the second containing the remainder.
1ba91a49
AL
1750 Return the first stmt in the second group. */
1751
82570274
RS
1752static stmt_vec_info
1753vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1754{
bffb8014 1755 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1756 gcc_assert (group1_size > 0);
2c53b149 1757 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1758 gcc_assert (group2_size > 0);
2c53b149 1759 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1760
bffb8014 1761 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1762 for (unsigned i = group1_size; i > 1; i--)
1763 {
bffb8014
RS
1764 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1765 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1766 }
1767 /* STMT is now the last element of the first group. */
bffb8014
RS
1768 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1769 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1770
bffb8014
RS
1771 DR_GROUP_SIZE (group2) = group2_size;
1772 for (stmt_info = group2; stmt_info;
1773 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1774 {
bffb8014
RS
1775 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1776 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1777 }
1778
2c53b149 1779 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1780 plus skipping over the first vector. */
bffb8014 1781 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1782
2c53b149
RB
1783 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1784 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1785
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1788 group1_size, group2_size);
1789
1790 return group2;
1791}
1792
4b6068ea
RS
1793/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1794 statements and a vector of NUNITS elements. */
1795
1796static poly_uint64
1797calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1798{
1799 return exact_div (common_multiple (nunits, group_size), group_size);
1800}
1801
0d0293ac 1802/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1803 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1804 Return FALSE if it's impossible to SLP any stmt in the loop. */
1805
1806static bool
310213d4 1807vect_analyze_slp_instance (vec_info *vinfo,
32e8e429 1808 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
1809{
1810 slp_instance new_instance;
d092494c 1811 slp_tree node;
2c53b149 1812 unsigned int group_size;
b5aeb3bb 1813 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1814 unsigned int i;
9771b263 1815 vec<slp_tree> loads;
b9787581
RS
1816 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1817 vec<stmt_vec_info> scalar_stmts;
b5aeb3bb 1818
b9787581 1819 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1820 {
2c53b149
RB
1821 scalar_type = TREE_TYPE (DR_REF (dr));
1822 vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1823 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 1824 }
b9787581 1825 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1826 {
1827 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
1828 vectype = STMT_VINFO_VECTYPE (stmt_info);
1829 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb
IR
1830 }
1831 else
1832 {
310213d4 1833 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 1834 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 1835 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1836 }
b8698a0f 1837
ebfd146a
IR
1838 if (!vectype)
1839 {
73fbfcad 1840 if (dump_enabled_p ())
3c2a8ed0
DM
1841 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1842 "Build SLP failed: unsupported data-type %T\n",
1843 scalar_type);
b5aeb3bb 1844
ebfd146a
IR
1845 return false;
1846 }
4b6068ea 1847 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1848
0d0293ac 1849 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1850 scalar_stmts.create (group_size);
bffb8014 1851 stmt_vec_info next_info = stmt_info;
b9787581 1852 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 1853 {
b5aeb3bb 1854 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 1855 while (next_info)
b5aeb3bb 1856 {
6e6b18e5 1857 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 1858 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
1859 }
1860 }
b9787581 1861 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1862 {
1863 /* Collect the reduction stmts and store them in
1864 SLP_TREE_SCALAR_STMTS. */
bffb8014 1865 while (next_info)
2c53b149 1866 {
6e6b18e5 1867 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 1868 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 1869 }
14a61437
RB
1870 /* Mark the first element of the reduction chain as reduction to properly
1871 transform the node. In the reduction analysis phase only the last
1872 element of the chain is marked as reduction. */
b9787581 1873 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
b5aeb3bb
IR
1874 }
1875 else
1876 {
1877 /* Collect reduction statements. */
32c91dfc
RS
1878 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1879 for (i = 0; reductions.iterate (i, &next_info); i++)
1880 scalar_stmts.safe_push (next_info);
ebfd146a
IR
1881 }
1882
9771b263 1883 loads.create (group_size);
ebfd146a
IR
1884
1885 /* Build the tree for the SLP instance. */
89d390e5
RB
1886 bool *matches = XALLOCAVEC (bool, group_size);
1887 unsigned npermutes = 0;
f7300fff 1888 bst_fail = new scalar_stmts_set_t ();
4b6068ea 1889 poly_uint64 max_nunits = nunits;
e569db5f 1890 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 1891 &max_nunits, &loads, matches, &npermutes,
e569db5f 1892 NULL, max_tree_size);
26d66f28 1893 delete bst_fail;
e569db5f 1894 if (node != NULL)
ebfd146a 1895 {
4ef69dfc 1896 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 1897 poly_uint64 unrolling_factor
4b6068ea 1898 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 1899
d9f21f6a 1900 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
1901 && is_a <bb_vec_info> (vinfo))
1902 {
4b6068ea
RS
1903 unsigned HOST_WIDE_INT const_max_nunits;
1904 if (!max_nunits.is_constant (&const_max_nunits)
1905 || const_max_nunits > group_size)
1906 {
1907 if (dump_enabled_p ())
1908 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1909 "Build SLP failed: store group "
1910 "size not a multiple of the vector size "
1911 "in basic block SLP\n");
6e2dd807 1912 vect_free_slp_tree (node, false);
4b6068ea
RS
1913 loads.release ();
1914 return false;
1915 }
e569db5f 1916 /* Fatal mismatch. */
4b6068ea 1917 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 1918 vect_free_slp_tree (node, false);
e569db5f
VK
1919 loads.release ();
1920 }
1921 else
1922 {
4ef69dfc
IR
1923 /* Create a new SLP instance. */
1924 new_instance = XNEW (struct _slp_instance);
1925 SLP_INSTANCE_TREE (new_instance) = node;
1926 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1927 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1928 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1929
1930 /* Compute the load permutation. */
1931 slp_tree load_node;
1932 bool loads_permuted = false;
abf9bfbc
RB
1933 FOR_EACH_VEC_ELT (loads, i, load_node)
1934 {
01d8bf07 1935 vec<unsigned> load_permutation;
abf9bfbc 1936 int j;
b9787581 1937 stmt_vec_info load_info;
01d8bf07
RB
1938 bool this_load_permuted = false;
1939 load_permutation.create (group_size);
bffb8014 1940 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
b9787581
RS
1941 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1942 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
abf9bfbc 1943 {
b9787581 1944 int load_place = vect_get_place_in_interleaving_chain
bffb8014 1945 (load_info, first_stmt_info);
6983e6b5
RB
1946 gcc_assert (load_place != -1);
1947 if (load_place != j)
01d8bf07 1948 this_load_permuted = true;
abf9bfbc
RB
1949 load_permutation.safe_push (load_place);
1950 }
fe2bef71
RB
1951 if (!this_load_permuted
1952 /* The load requires permutation when unrolling exposes
1953 a gap either because the group is larger than the SLP
1954 group-size or because there is a gap between the groups. */
d9f21f6a 1955 && (known_eq (unrolling_factor, 1U)
bffb8014
RS
1956 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1957 && DR_GROUP_GAP (first_stmt_info) == 0)))
01d8bf07
RB
1958 {
1959 load_permutation.release ();
1960 continue;
1961 }
1962 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1963 loads_permuted = true;
abf9bfbc 1964 }
6aa904c4
IR
1965
1966 if (loads_permuted)
ebfd146a 1967 {
01d8bf07 1968 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1969 {
73fbfcad 1970 if (dump_enabled_p ())
3c2a8ed0
DM
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1972 "Build SLP failed: unsupported load "
1973 "permutation %G", stmt_info->stmt);
6e2dd807 1974 vect_free_slp_instance (new_instance, false);
ebfd146a
IR
1975 return false;
1976 }
ebfd146a 1977 }
ebfd146a 1978
e569db5f 1979 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
1980 instructions do not generate this SLP instance. */
1981 if (is_a <loop_vec_info> (vinfo)
1982 && loads_permuted
7e11fc7f 1983 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
1984 {
1985 slp_tree load_node;
1986 FOR_EACH_VEC_ELT (loads, i, load_node)
1987 {
bffb8014 1988 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
b9787581 1989 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
bffb8014 1990 /* Use SLP for strided accesses (or if we can't load-lanes). */
bb0f5ca7
AL
1991 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1992 || ! vect_load_lanes_supported
1993 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 1994 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
1995 break;
1996 }
1997 if (i == loads.length ())
1998 {
1999 if (dump_enabled_p ())
2000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2001 "Built SLP cancelled: can use "
2002 "load/store-lanes\n");
6e2dd807 2003 vect_free_slp_instance (new_instance, false);
bb0f5ca7
AL
2004 return false;
2005 }
2006 }
2007
310213d4 2008 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2009
73fbfcad 2010 if (dump_enabled_p ())
c2a12ca0
RB
2011 {
2012 dump_printf_loc (MSG_NOTE, vect_location,
2013 "Final SLP tree for instance:\n");
2014 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2015 }
ebfd146a
IR
2016
2017 return true;
2018 }
e569db5f
VK
2019 }
2020 else
2021 {
ebfd146a
IR
2022 /* Failed to SLP. */
2023 /* Free the allocated memory. */
e403d17e 2024 scalar_stmts.release ();
9771b263 2025 loads.release ();
e569db5f 2026 }
b8698a0f 2027
1ba91a49 2028 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2029 vector size. */
4b6068ea 2030 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2031 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2032 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2033 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2034 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2035 {
2036 /* We consider breaking the group only on VF boundaries from the existing
2037 start. */
2038 for (i = 0; i < group_size; i++)
2039 if (!matches[i]) break;
2040
4b6068ea 2041 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2042 {
2043 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2044 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2045 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2046
82570274
RS
2047 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2048 group1_size);
86a91c0a
RS
2049 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2050 max_tree_size);
1ba91a49
AL
2051 /* If the first non-match was in the middle of a vector,
2052 skip the rest of that vector. */
2053 if (group1_size < i)
2054 {
4b6068ea 2055 i = group1_size + const_nunits;
1ba91a49 2056 if (i < group_size)
4b6068ea 2057 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2058 }
2059 if (i < group_size)
2060 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2061 return res;
2062 }
2063 /* Even though the first vector did not all match, we might be able to SLP
2064 (some) of the remainder. FORNOW ignore this possibility. */
2065 }
2066
a70d6342 2067 return false;
ebfd146a
IR
2068}
2069
2070
ff802fa1 2071/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2072 trees of packed scalar stmts if SLP is possible. */
2073
f4ebbd24 2074opt_result
310213d4 2075vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2076{
2077 unsigned int i;
f698fccf 2078 stmt_vec_info first_element;
ebfd146a 2079
adac3a68 2080 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2081
0d0293ac 2082 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2083 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2084 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2085
310213d4 2086 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2087 {
310213d4
RB
2088 if (loop_vinfo->reduction_chains.length () > 0)
2089 {
2090 /* Find SLP sequences starting from reduction chains. */
2091 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2092 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2093 max_tree_size))
6b5e165b
RB
2094 {
2095 /* Dissolve reduction chain group. */
f698fccf
RS
2096 stmt_vec_info vinfo = first_element;
2097 while (vinfo)
6b5e165b 2098 {
bffb8014 2099 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2100 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2101 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
f698fccf 2102 vinfo = next;
6b5e165b 2103 }
f698fccf 2104 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
6b5e165b 2105 }
310213d4 2106 }
b010117a 2107
310213d4 2108 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2109 if (loop_vinfo->reductions.length () > 1)
2110 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2111 max_tree_size);
310213d4 2112 }
b5aeb3bb 2113
f4ebbd24 2114 return opt_result::success ();
ebfd146a
IR
2115}
2116
2117
2118/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2119 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2120 least one instance. */
ebfd146a 2121
437f4a00 2122bool
ebfd146a
IR
2123vect_make_slp_decision (loop_vec_info loop_vinfo)
2124{
d9f21f6a
RS
2125 unsigned int i;
2126 poly_uint64 unrolling_factor = 1;
9771b263 2127 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2128 slp_instance instance;
2129 int decided_to_slp = 0;
2130
adac3a68 2131 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2132
9771b263 2133 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2134 {
2135 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2136 /* All unroll factors have the form current_vector_size * X for some
2137 rational X, so they must have a common multiple. */
2138 unrolling_factor
2139 = force_common_multiple (unrolling_factor,
2140 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2141
ff802fa1 2142 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2143 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2144 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2145 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2146 decided_to_slp++;
2147 }
2148
2149 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2150
73fbfcad 2151 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2152 {
2153 dump_printf_loc (MSG_NOTE, vect_location,
2154 "Decided to SLP %d instances. Unrolling factor ",
2155 decided_to_slp);
2156 dump_dec (MSG_NOTE, unrolling_factor);
2157 dump_printf (MSG_NOTE, "\n");
2158 }
437f4a00
IR
2159
2160 return (decided_to_slp > 0);
ebfd146a
IR
2161}
2162
2163
2164/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2165 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2166
2167static void
642fce57 2168vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2169{
b9787581 2170 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2171 imm_use_iterator imm_iter;
355fe088 2172 gimple *use_stmt;
b9787581 2173 stmt_vec_info use_vinfo;
d755c7ef 2174 slp_tree child;
f2c74cc4 2175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2176 int j;
2177
2178 /* Propagate hybrid down the SLP tree. */
2179 if (stype == hybrid)
2180 ;
2181 else if (HYBRID_SLP_STMT (stmt_vinfo))
2182 stype = hybrid;
2183 else
2184 {
2185 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2186 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2187 /* If we get a pattern stmt here we have to use the LHS of the
2188 original stmt for immediate uses. */
211cd1e2 2189 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
e7baeb39
RB
2190 tree def;
2191 if (gimple_code (stmt) == GIMPLE_PHI)
2192 def = gimple_phi_result (stmt);
2193 else
2194 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2195 if (def)
2196 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2197 {
6585ff8f
RS
2198 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2199 if (!use_vinfo)
29764870 2200 continue;
6e6b18e5 2201 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
29764870
RB
2202 if (!STMT_SLP_TYPE (use_vinfo)
2203 && (STMT_VINFO_RELEVANT (use_vinfo)
2204 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2205 && !(gimple_code (use_stmt) == GIMPLE_PHI
2206 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2207 {
2208 if (dump_enabled_p ())
3c2a8ed0
DM
2209 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2210 "def in non-SLP stmt: %G", use_stmt);
502f0263
RB
2211 stype = hybrid;
2212 }
29764870 2213 }
642fce57 2214 }
ebfd146a 2215
502f0263
RB
2216 if (stype == hybrid
2217 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2218 {
2219 if (dump_enabled_p ())
3c2a8ed0
DM
2220 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2221 stmt_vinfo->stmt);
b1af7da6
RB
2222 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2223 }
ebfd146a 2224
642fce57 2225 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2226 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2227 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2228}
f2c74cc4 2229
642fce57 2230/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2231
642fce57
RB
2232static tree
2233vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2234{
2235 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2236 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2237
2238 if (wi->is_lhs)
2239 return NULL_TREE;
2240
c98d0595
RS
2241 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2242 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2243 {
6585ff8f 2244 if (dump_enabled_p ())
3c2a8ed0
DM
2245 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2246 def_stmt_info->stmt);
6585ff8f 2247 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2248 }
2249
2250 return NULL_TREE;
ebfd146a
IR
2251}
2252
642fce57
RB
2253static tree
2254vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2255 walk_stmt_info *wi)
642fce57 2256{
6585ff8f
RS
2257 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2258 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2259 /* If the stmt is in a SLP instance then this isn't a reason
2260 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2261 if (! STMT_SLP_TYPE (use_vinfo)
2262 && (STMT_VINFO_RELEVANT (use_vinfo)
2263 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2264 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2265 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2266 ;
2267 else
642fce57
RB
2268 *handled = true;
2269 return NULL_TREE;
2270}
ebfd146a
IR
2271
2272/* Find stmts that must be both vectorized and SLPed. */
2273
2274void
2275vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2276{
2277 unsigned int i;
9771b263 2278 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2279 slp_instance instance;
2280
adac3a68 2281 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2282
642fce57
RB
2283 /* First walk all pattern stmt in the loop and mark defs of uses as
2284 hybrid because immediate uses in them are not recorded. */
2285 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2286 {
2287 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2288 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2289 gsi_next (&gsi))
2290 {
355fe088 2291 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2292 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2293 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2294 {
2295 walk_stmt_info wi;
2296 memset (&wi, 0, sizeof (wi));
6585ff8f 2297 wi.info = loop_vinfo;
642fce57 2298 gimple_stmt_iterator gsi2
10681ce8 2299 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2300 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2301 vect_detect_hybrid_slp_1, &wi);
2302 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2303 vect_detect_hybrid_slp_2,
2304 vect_detect_hybrid_slp_1, &wi);
2305 }
2306 }
2307 }
2308
2309 /* Then walk the SLP instance trees marking stmts with uses in
2310 non-SLP stmts as hybrid, also propagating hybrid down the
2311 SLP tree, collecting the above info on-the-fly. */
9771b263 2312 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2313 {
2314 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2315 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2316 i, pure_slp);
2317 }
ebfd146a
IR
2318}
2319
a70d6342 2320
2c515559
RS
2321/* Initialize a bb_vec_info struct for the statements between
2322 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2323
2324_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2325 gimple_stmt_iterator region_end_in,
2326 vec_info_shared *shared)
2327 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2328 bb (gsi_bb (region_begin_in)),
2329 region_begin (region_begin_in),
2330 region_end (region_end_in)
a70d6342 2331{
a70d6342
IR
2332 gimple_stmt_iterator gsi;
2333
61d371eb
RB
2334 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2335 gsi_next (&gsi))
a70d6342 2336 {
355fe088 2337 gimple *stmt = gsi_stmt (gsi);
a70d6342 2338 gimple_set_uid (stmt, 0);
4fbeb363 2339 add_stmt (stmt);
a70d6342
IR
2340 }
2341
2c515559 2342 bb->aux = this;
a70d6342
IR
2343}
2344
2345
2346/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2347 stmts in the basic block. */
2348
2c515559 2349_bb_vec_info::~_bb_vec_info ()
a70d6342 2350{
2c515559
RS
2351 for (gimple_stmt_iterator si = region_begin;
2352 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2353 /* Reset region marker. */
2354 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2355
2c515559 2356 bb->aux = NULL;
a70d6342
IR
2357}
2358
15944069
RS
2359/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2360 given then that child nodes have already been processed, and that
2361 their def types currently match their SLP node's def type. */
a70d6342
IR
2362
2363static bool
15944069
RS
2364vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2365 slp_instance node_instance,
2366 stmt_vector_for_cost *cost_vec)
a70d6342 2367{
b9787581 2368 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2369 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2370
2371 /* For BB vectorization vector types are assigned here.
2372 Memory accesses already got their vector type assigned
2373 in vect_analyze_data_refs. */
2374 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2375 if (bb_vinfo
2376 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2377 {
1f3cb663
RS
2378 tree vectype, nunits_vectype;
2379 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2380 &nunits_vectype))
2381 /* We checked this when building the node. */
2382 gcc_unreachable ();
2383 if (vectype == boolean_type_node)
bd2f172f 2384 {
1f3cb663
RS
2385 vectype = vect_get_mask_type_for_stmt (stmt_info);
2386 if (!vectype)
2387 /* vect_get_mask_type_for_stmt has already explained the
2388 failure. */
2389 return false;
bd2f172f
RB
2390 }
2391
b9787581 2392 stmt_vec_info sstmt_info;
15944069 2393 unsigned int i;
b9787581
RS
2394 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2395 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2396 }
2397
8b7e9dba
RS
2398 /* Calculate the number of vector statements to be created for the
2399 scalar stmts in this node. For SLP reductions it is equal to the
2400 number of vector statements in the children (which has already been
2401 calculated by the recursive call). Otherwise it is the number of
2c53b149 2402 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2403 VF divided by the number of elements in a vector. */
2c53b149
RB
2404 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2405 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2406 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2407 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2408 else
2409 {
d9f21f6a 2410 poly_uint64 vf;
8b7e9dba
RS
2411 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2412 vf = loop_vinfo->vectorization_factor;
2413 else
2414 vf = 1;
2415 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2416 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2417 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2418 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2419 }
2420
15944069 2421 bool dummy;
86a91c0a 2422 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2423}
2424
2425/* Analyze statements contained in SLP tree NODE after recursively analyzing
2426 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2427
2428 Return true if the operations are supported. */
2429
2430static bool
2431vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2432 slp_instance node_instance,
2433 scalar_stmts_to_slp_tree_map_t *visited,
2434 scalar_stmts_to_slp_tree_map_t *lvisited,
2435 stmt_vector_for_cost *cost_vec)
2436{
2437 int i, j;
2438 slp_tree child;
2439
2440 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2441 return true;
2442
2443 /* If we already analyzed the exact same set of scalar stmts we're done.
2444 We share the generated vector stmts for those. */
2445 slp_tree *leader;
2446 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2447 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2448 {
2449 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2450 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2451 return true;
2452 }
2453
2454 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2455 doesn't result in any issue since we throw away the lvisited set
2456 when we fail. */
2457 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2458
2459 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2460 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2461 visited, lvisited, cost_vec))
2462 return false;
2463
bd2f172f
RB
2464 /* Push SLP node def-type to stmt operands. */
2465 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2466 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2467 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2468 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2469 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2470 cost_vec);
bd2f172f
RB
2471 /* Restore def-types. */
2472 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2473 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2474 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f
RB
2475 = vect_internal_def;
2476 if (! res)
2477 return false;
2478
2479 return true;
a70d6342
IR
2480}
2481
2482
8b7e9dba 2483/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2484 operations are supported. */
2485
a12e42fc 2486bool
8b7e9dba 2487vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2488{
a70d6342
IR
2489 slp_instance instance;
2490 int i;
2491
adac3a68 2492 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2493
68435eb2
RB
2494 scalar_stmts_to_slp_tree_map_t *visited
2495 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2496 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2497 {
68435eb2
RB
2498 scalar_stmts_to_slp_tree_map_t lvisited;
2499 stmt_vector_for_cost cost_vec;
2500 cost_vec.create (2);
8b7e9dba
RS
2501 if (!vect_slp_analyze_node_operations (vinfo,
2502 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2503 instance, visited, &lvisited,
2504 &cost_vec))
a70d6342 2505 {
b9787581
RS
2506 slp_tree node = SLP_INSTANCE_TREE (instance);
2507 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a12e42fc 2508 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
2509 "removing SLP instance operations starting from: %G",
2510 stmt_info->stmt);
6e2dd807 2511 vect_free_slp_instance (instance, false);
8b7e9dba 2512 vinfo->slp_instances.ordered_remove (i);
68435eb2 2513 cost_vec.release ();
a70d6342
IR
2514 }
2515 else
68435eb2
RB
2516 {
2517 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2518 x != lvisited.end(); ++x)
2519 visited->put ((*x).first.copy (), (*x).second);
2520 i++;
78604de0 2521
68435eb2
RB
2522 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2523 cost_vec.release ();
2524 }
2525 }
78604de0
RB
2526 delete visited;
2527
8b7e9dba 2528 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2529}
2530
6eddf228
RB
2531
2532/* Compute the scalar cost of the SLP node NODE and its children
2533 and return it. Do not account defs that are marked in LIFE and
2534 update LIFE according to uses of NODE. */
2535
a296d6d3 2536static void
292cba13 2537vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2538 slp_tree node, vec<bool, va_heap> *life,
2539 stmt_vector_for_cost *cost_vec)
6eddf228 2540{
6eddf228 2541 unsigned i;
b9787581 2542 stmt_vec_info stmt_info;
6eddf228
RB
2543 slp_tree child;
2544
b9787581 2545 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2546 {
b9787581 2547 gimple *stmt = stmt_info->stmt;
91987857 2548 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2549 ssa_op_iter op_iter;
2550 def_operand_p def_p;
6eddf228 2551
ff4c81cc 2552 if ((*life)[i])
6eddf228
RB
2553 continue;
2554
2555 /* If there is a non-vectorized use of the defs then the scalar
2556 stmt is kept live in which case we do not account it or any
2557 required defs in the SLP children in the scalar cost. This
2558 way we make the vectorization more costly when compared to
2559 the scalar cost. */
2560 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2561 {
2562 imm_use_iterator use_iter;
355fe088 2563 gimple *use_stmt;
6eddf228 2564 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2565 if (!is_gimple_debug (use_stmt))
6eddf228 2566 {
91987857
RS
2567 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2568 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2569 {
2570 (*life)[i] = true;
2571 BREAK_FROM_IMM_USE_STMT (use_iter);
2572 }
6eddf228
RB
2573 }
2574 }
ff4c81cc 2575 if ((*life)[i])
6eddf228
RB
2576 continue;
2577
b555a2e4
RB
2578 /* Count scalar stmts only once. */
2579 if (gimple_visited_p (stmt))
2580 continue;
2581 gimple_set_visited (stmt, true);
2582
a296d6d3 2583 vect_cost_for_stmt kind;
6eddf228
RB
2584 if (STMT_VINFO_DATA_REF (stmt_info))
2585 {
2586 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2587 kind = scalar_load;
6eddf228 2588 else
a296d6d3 2589 kind = scalar_store;
6eddf228
RB
2590 }
2591 else
a296d6d3
RB
2592 kind = scalar_stmt;
2593 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2594 }
2595
faa5399b 2596 auto_vec<bool, 20> subtree_life;
6eddf228 2597 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2598 {
2599 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2600 {
2601 /* Do not directly pass LIFE to the recursive call, copy it to
2602 confine changes in the callee to the current child/subtree. */
2603 subtree_life.safe_splice (*life);
a296d6d3 2604 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2605 subtree_life.truncate (0);
2606 }
2607 }
6eddf228
RB
2608}
2609
69f11a13
IR
2610/* Check if vectorization of the basic block is profitable. */
2611
2612static bool
2613vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2614{
9771b263 2615 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2616 slp_instance instance;
1a4b99c1 2617 int i;
c3e7ee41 2618 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2619 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2620
2621 /* Calculate scalar cost. */
a296d6d3
RB
2622 stmt_vector_for_cost scalar_costs;
2623 scalar_costs.create (0);
6eddf228 2624 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2625 {
00f96dc9 2626 auto_vec<bool, 20> life;
ff4c81cc 2627 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2628 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2629 SLP_INSTANCE_TREE (instance),
2630 &life, &scalar_costs);
2631 }
2632 void *target_cost_data = init_cost (NULL);
68435eb2 2633 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2634 scalar_costs.release ();
2635 unsigned dummy;
2636 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2637 destroy_cost_data (target_cost_data);
69f11a13 2638
b555a2e4
RB
2639 /* Unset visited flag. */
2640 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2641 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2642 gimple_set_visited (gsi_stmt (gsi), false);
2643
c3e7ee41 2644 /* Complete the target-specific cost calculation. */
92345349
BS
2645 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2646 &vec_inside_cost, &vec_epilogue_cost);
2647
2648 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2649
73fbfcad 2650 if (dump_enabled_p ())
69f11a13 2651 {
78c60e3d
SS
2652 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2653 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2654 vec_inside_cost);
2655 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2656 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2657 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2658 }
2659
a6524bba
RB
2660 /* Vectorization is profitable if its cost is more than the cost of scalar
2661 version. Note that we err on the vector side for equal cost because
2662 the cost estimate is otherwise quite pessimistic (constant uses are
2663 free on the scalar side but cost a load on the vector side for
2664 example). */
2665 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2666 return false;
2667
2668 return true;
2669}
2670
a5b50aa1
RB
2671/* Check if the basic block can be vectorized. Returns a bb_vec_info
2672 if so and sets fatal to true if failure is independent of
2673 current_vector_size. */
a70d6342 2674
8e19f5a1 2675static bb_vec_info
61d371eb
RB
2676vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2677 gimple_stmt_iterator region_end,
a5b50aa1 2678 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2679 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2680{
2681 bb_vec_info bb_vinfo;
a70d6342 2682 slp_instance instance;
8e19f5a1 2683 int i;
d9f21f6a 2684 poly_uint64 min_vf = 2;
e4a707c4 2685
a5b50aa1
RB
2686 /* The first group of checks is independent of the vector size. */
2687 fatal = true;
2688
61d371eb
RB
2689 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2690 {
2691 if (dump_enabled_p ())
2692 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2693 "not vectorized: too many instructions in "
2694 "basic block.\n");
2695 free_data_refs (datarefs);
2696 return NULL;
2697 }
2698
ca823c85 2699 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2700 if (!bb_vinfo)
2701 return NULL;
2702
61d371eb 2703 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2704 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2705
2706 /* Analyze the data references. */
2707
2708 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2709 {
73fbfcad 2710 if (dump_enabled_p ())
78c60e3d
SS
2711 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2712 "not vectorized: unhandled data-ref in basic "
2713 "block.\n");
b8698a0f 2714
2c515559 2715 delete bb_vinfo;
a70d6342
IR
2716 return NULL;
2717 }
2718
fcac74a1 2719 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2720 {
73fbfcad 2721 if (dump_enabled_p ())
78c60e3d
SS
2722 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2723 "not vectorized: not enough data-refs in "
2724 "basic block.\n");
a70d6342 2725
2c515559 2726 delete bb_vinfo;
a70d6342
IR
2727 return NULL;
2728 }
2729
310213d4 2730 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2731 {
2732 if (dump_enabled_p ())
2733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2734 "not vectorized: unhandled data access in "
2735 "basic block.\n");
2736
2c515559 2737 delete bb_vinfo;
5abe1e05
RB
2738 return NULL;
2739 }
2740
a5b50aa1
RB
2741 /* If there are no grouped stores in the region there is no need
2742 to continue with pattern recog as vect_analyze_slp will fail
2743 anyway. */
2744 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2745 {
73fbfcad 2746 if (dump_enabled_p ())
a5b50aa1
RB
2747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2748 "not vectorized: no grouped stores in "
2749 "basic block.\n");
b8698a0f 2750
2c515559 2751 delete bb_vinfo;
a70d6342
IR
2752 return NULL;
2753 }
b8698a0f 2754
a5b50aa1
RB
2755 /* While the rest of the analysis below depends on it in some way. */
2756 fatal = false;
2757
2758 vect_pattern_recog (bb_vinfo);
2759
a70d6342
IR
2760 /* Check the SLP opportunities in the basic block, analyze and build SLP
2761 trees. */
310213d4 2762 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2763 {
73fbfcad 2764 if (dump_enabled_p ())
effb52da
RB
2765 {
2766 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2767 "Failed to SLP the basic block.\n");
2768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2769 "not vectorized: failed to find SLP opportunities "
2770 "in basic block.\n");
2771 }
a70d6342 2772
2c515559 2773 delete bb_vinfo;
a70d6342
IR
2774 return NULL;
2775 }
b8698a0f 2776
62c8a2cf
RS
2777 vect_record_base_alignments (bb_vinfo);
2778
c2a12ca0
RB
2779 /* Analyze and verify the alignment of data references and the
2780 dependence in the SLP instances. */
a5b50aa1
RB
2781 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2782 {
c2a12ca0
RB
2783 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2784 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 2785 {
b9787581
RS
2786 slp_tree node = SLP_INSTANCE_TREE (instance);
2787 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a5b50aa1 2788 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
2789 "removing SLP instance operations starting from: %G",
2790 stmt_info->stmt);
6e2dd807 2791 vect_free_slp_instance (instance, false);
a5b50aa1
RB
2792 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2793 continue;
2794 }
c2a12ca0
RB
2795
2796 /* Mark all the statements that we want to vectorize as pure SLP and
2797 relevant. */
2798 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2799 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2800
a5b50aa1
RB
2801 i++;
2802 }
a5b50aa1
RB
2803 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2804 {
2c515559 2805 delete bb_vinfo;
a5b50aa1
RB
2806 return NULL;
2807 }
2808
8b7e9dba 2809 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2810 {
73fbfcad 2811 if (dump_enabled_p ())
e645e942 2812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2813 "not vectorized: bad operation in basic block.\n");
a70d6342 2814
2c515559 2815 delete bb_vinfo;
a70d6342
IR
2816 return NULL;
2817 }
2818
69f11a13 2819 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2820 if (!unlimited_cost_model (NULL)
69f11a13
IR
2821 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2822 {
73fbfcad 2823 if (dump_enabled_p ())
78c60e3d
SS
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2825 "not vectorized: vectorization is not "
2826 "profitable.\n");
69f11a13 2827
2c515559 2828 delete bb_vinfo;
69f11a13
IR
2829 return NULL;
2830 }
2831
73fbfcad 2832 if (dump_enabled_p ())
78c60e3d
SS
2833 dump_printf_loc (MSG_NOTE, vect_location,
2834 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2835
2836 return bb_vinfo;
2837}
2838
2839
428db0ba
RB
2840/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2841 true if anything in the basic-block was vectorized. */
2842
2843bool
2844vect_slp_bb (basic_block bb)
8e19f5a1
IR
2845{
2846 bb_vec_info bb_vinfo;
8e19f5a1 2847 gimple_stmt_iterator gsi;
61d371eb 2848 bool any_vectorized = false;
86e36728 2849 auto_vector_sizes vector_sizes;
8e19f5a1 2850
adac3a68 2851 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 2852
8e19f5a1
IR
2853 /* Autodetect first vector size we try. */
2854 current_vector_size = 0;
86e36728
RS
2855 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2856 unsigned int next_size = 0;
8e19f5a1 2857
61d371eb
RB
2858 gsi = gsi_start_bb (bb);
2859
86e36728 2860 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
2861 while (1)
2862 {
61d371eb
RB
2863 if (gsi_end_p (gsi))
2864 break;
2865
2866 gimple_stmt_iterator region_begin = gsi;
2867 vec<data_reference_p> datarefs = vNULL;
2868 int insns = 0;
2869
2870 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2871 {
61d371eb
RB
2872 gimple *stmt = gsi_stmt (gsi);
2873 if (is_gimple_debug (stmt))
2874 continue;
2875 insns++;
2876
2877 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 2878 vect_location = stmt;
61d371eb 2879
8e846c66 2880 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
2881 break;
2882 }
2883
2884 /* Skip leading unhandled stmts. */
2885 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2886 {
2887 gsi_next (&gsi);
2888 continue;
2889 }
428db0ba 2890
61d371eb
RB
2891 gimple_stmt_iterator region_end = gsi;
2892
2893 bool vectorized = false;
a5b50aa1 2894 bool fatal = false;
ca823c85 2895 vec_info_shared shared;
61d371eb 2896 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 2897 datarefs, insns, fatal, &shared);
61d371eb
RB
2898 if (bb_vinfo
2899 && dbg_cnt (vect_slp))
2900 {
428db0ba 2901 if (dump_enabled_p ())
61d371eb 2902 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 2903
ca823c85 2904 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
2905 vect_schedule_slp (bb_vinfo);
2906
d1ac60d5
RB
2907 unsigned HOST_WIDE_INT bytes;
2908 if (current_vector_size.is_constant (&bytes))
2909 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
6f795a92 2910 "basic block part vectorized using %wu byte "
d1ac60d5
RB
2911 "vectors\n", bytes);
2912 else
2913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2914 "basic block part vectorized using variable "
2915 "length vectors\n");
428db0ba 2916
61d371eb 2917 vectorized = true;
428db0ba 2918 }
2c515559 2919 delete bb_vinfo;
8e19f5a1 2920
61d371eb 2921 any_vectorized |= vectorized;
8e19f5a1 2922
86e36728
RS
2923 if (next_size == 0)
2924 autodetected_vector_size = current_vector_size;
2925
2926 if (next_size < vector_sizes.length ()
2927 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2928 next_size += 1;
2929
61d371eb 2930 if (vectorized
86e36728
RS
2931 || next_size == vector_sizes.length ()
2932 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
2933 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2934 vector sizes will fail do not bother iterating. */
2935 || fatal)
61d371eb
RB
2936 {
2937 if (gsi_end_p (region_end))
2938 break;
8e19f5a1 2939
61d371eb
RB
2940 /* Skip the unhandled stmt. */
2941 gsi_next (&gsi);
2942
2943 /* And reset vector sizes. */
2944 current_vector_size = 0;
86e36728 2945 next_size = 0;
61d371eb
RB
2946 }
2947 else
2948 {
2949 /* Try the next biggest vector size. */
86e36728 2950 current_vector_size = vector_sizes[next_size++];
61d371eb 2951 if (dump_enabled_p ())
86e36728
RS
2952 {
2953 dump_printf_loc (MSG_NOTE, vect_location,
2954 "***** Re-trying analysis with "
2955 "vector size ");
2956 dump_dec (MSG_NOTE, current_vector_size);
2957 dump_printf (MSG_NOTE, "\n");
2958 }
61d371eb
RB
2959
2960 /* Start over. */
2961 gsi = region_begin;
2962 }
8e19f5a1 2963 }
61d371eb
RB
2964
2965 return any_vectorized;
8e19f5a1
IR
2966}
2967
2968
e4af0bc4 2969/* Return 1 if vector type of boolean constant which is OPNUM
32e8e429 2970 operand in statement STMT_VINFO is a boolean vector. */
e4af0bc4
IE
2971
2972static bool
32e8e429 2973vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
e4af0bc4 2974{
32e8e429 2975 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 2976 tree op, vectype;
e4af0bc4
IE
2977 enum vect_def_type dt;
2978
2979 /* For comparison and COND_EXPR type is chosen depending
2980 on the other comparison operand. */
2981 if (TREE_CODE_CLASS (code) == tcc_comparison)
2982 {
32e8e429 2983 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
2984 if (opnum)
2985 op = gimple_assign_rhs1 (stmt);
2986 else
2987 op = gimple_assign_rhs2 (stmt);
2988
894dd753 2989 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
2990 gcc_unreachable ();
2991
2992 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2993 }
2994
2995 if (code == COND_EXPR)
2996 {
32e8e429 2997 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
2998 tree cond = gimple_assign_rhs1 (stmt);
2999
3000 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3001 op = cond;
3002 else if (opnum)
e4af0bc4
IE
3003 op = TREE_OPERAND (cond, 1);
3004 else
3005 op = TREE_OPERAND (cond, 0);
3006
894dd753 3007 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3008 gcc_unreachable ();
3009
3010 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3011 }
3012
3013 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3014}
3015
018b2744
RS
3016/* Build a variable-length vector in which the elements in ELTS are repeated
3017 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3018 RESULTS and add any new instructions to SEQ.
3019
3020 The approach we use is:
3021
3022 (1) Find a vector mode VM with integer elements of mode IM.
3023
3024 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3025 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3026 from small vectors to IM.
3027
3028 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3029
3030 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3031 correct byte contents.
3032
3033 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3034
3035 We try to find the largest IM for which this sequence works, in order
3036 to cut down on the number of interleaves. */
3037
f1739b48 3038void
018b2744
RS
3039duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3040 unsigned int nresults, vec<tree> &results)
3041{
3042 unsigned int nelts = elts.length ();
3043 tree element_type = TREE_TYPE (vector_type);
3044
3045 /* (1) Find a vector mode VM with integer elements of mode IM. */
3046 unsigned int nvectors = 1;
3047 tree new_vector_type;
3048 tree permutes[2];
3049 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3050 &nvectors, &new_vector_type,
3051 permutes))
3052 gcc_unreachable ();
3053
3054 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3055 unsigned int partial_nelts = nelts / nvectors;
3056 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3057
3058 tree_vector_builder partial_elts;
3059 auto_vec<tree, 32> pieces (nvectors * 2);
3060 pieces.quick_grow (nvectors * 2);
3061 for (unsigned int i = 0; i < nvectors; ++i)
3062 {
3063 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3064 ELTS' has mode IM. */
3065 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3066 for (unsigned int j = 0; j < partial_nelts; ++j)
3067 partial_elts.quick_push (elts[i * partial_nelts + j]);
3068 tree t = gimple_build_vector (seq, &partial_elts);
3069 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3070 TREE_TYPE (new_vector_type), t);
3071
3072 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3073 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3074 }
3075
3076 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3077 correct byte contents.
3078
3079 We need to repeat the following operation log2(nvectors) times:
3080
3081 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3082 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3083
3084 However, if each input repeats every N elements and the VF is
3085 a multiple of N * 2, the HI result is the same as the LO. */
3086 unsigned int in_start = 0;
3087 unsigned int out_start = nvectors;
3088 unsigned int hi_start = nvectors / 2;
3089 /* A bound on the number of outputs needed to produce NRESULTS results
3090 in the final iteration. */
3091 unsigned int noutputs_bound = nvectors * nresults;
3092 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3093 {
3094 noutputs_bound /= 2;
3095 unsigned int limit = MIN (noutputs_bound, nvectors);
3096 for (unsigned int i = 0; i < limit; ++i)
3097 {
3098 if ((i & 1) != 0
3099 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3100 2 * in_repeat))
3101 {
3102 pieces[out_start + i] = pieces[out_start + i - 1];
3103 continue;
3104 }
3105
3106 tree output = make_ssa_name (new_vector_type);
3107 tree input1 = pieces[in_start + (i / 2)];
3108 tree input2 = pieces[in_start + (i / 2) + hi_start];
3109 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3110 input1, input2,
3111 permutes[i & 1]);
3112 gimple_seq_add_stmt (seq, stmt);
3113 pieces[out_start + i] = output;
3114 }
3115 std::swap (in_start, out_start);
3116 }
3117
3118 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3119 results.reserve (nresults);
3120 for (unsigned int i = 0; i < nresults; ++i)
3121 if (i < nvectors)
3122 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3123 pieces[in_start + i]));
3124 else
3125 results.quick_push (results[i - nvectors]);
3126}
3127
e4af0bc4 3128
b8698a0f
L
3129/* For constant and loop invariant defs of SLP_NODE this function returns
3130 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3131 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3132 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3133 REDUC_INDEX is the index of the reduction operand in the statements, unless
3134 it is -1. */
ebfd146a
IR
3135
3136static void
9dc3f7de 3137vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3138 vec<tree> *vec_oprnds,
306b0c92 3139 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3140{
b9787581
RS
3141 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3142 stmt_vec_info stmt_vinfo = stmts[0];
3143 gimple *stmt = stmt_vinfo->stmt;
018b2744 3144 unsigned HOST_WIDE_INT nunits;
ebfd146a 3145 tree vec_cst;
d2a12ae7 3146 unsigned j, number_of_places_left_in_vector;
ebfd146a 3147 tree vector_type;
9dc3f7de 3148 tree vop;
9771b263 3149 int group_size = stmts.length ();
ebfd146a 3150 unsigned int vec_num, i;
d2a12ae7 3151 unsigned number_of_copies = 1;
9771b263
DN
3152 vec<tree> voprnds;
3153 voprnds.create (number_of_vectors);
ebfd146a 3154 bool constant_p, is_store;
b5aeb3bb 3155 tree neutral_op = NULL;
bac430c9 3156 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3157 gimple_seq ctor_seq = NULL;
018b2744 3158 auto_vec<tree, 16> permute_results;
b5aeb3bb 3159
42fd8198 3160 /* Check if vector type is a boolean vector. */
2568d8a1 3161 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
b9787581 3162 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
42fd8198
IE
3163 vector_type
3164 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3165 else
3166 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3167
ebfd146a
IR
3168 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3169 {
3170 is_store = true;
3171 op = gimple_assign_rhs1 (stmt);
3172 }
3173 else
9dc3f7de
IR
3174 is_store = false;
3175
3176 gcc_assert (op);
ebfd146a 3177
ebfd146a 3178 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3179 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3180
3181 For example, we have two scalar operands, s1 and s2 (e.g., group of
3182 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3183 of this type can be packed in a vector). The output vector will contain
3184 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3185 will be 2).
3186
b8698a0f 3187 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3188 containing the operands.
3189
3190 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3191 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3192 {s5, s6, s7, s8}. */
b8698a0f 3193
018b2744
RS
3194 /* When using duplicate_and_interleave, we just need one element for
3195 each scalar statement. */
3196 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3197 nunits = group_size;
3198
14a61437 3199 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3200
3201 number_of_places_left_in_vector = nunits;
62cf7335 3202 constant_p = true;
5ebaa477 3203 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3204 elts.quick_grow (nunits);
90dd6e3d 3205 bool place_after_defs = false;
ebfd146a
IR
3206 for (j = 0; j < number_of_copies; j++)
3207 {
b9787581 3208 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
ebfd146a 3209 {
b9787581 3210 stmt = stmt_vinfo->stmt;
ebfd146a
IR
3211 if (is_store)
3212 op = gimple_assign_rhs1 (stmt);
bac430c9 3213 else
f7e531cf 3214 {
bac430c9 3215 switch (code)
f7e531cf 3216 {
bac430c9 3217 case COND_EXPR:
a989bcc3
IE
3218 {
3219 tree cond = gimple_assign_rhs1 (stmt);
3220 if (TREE_CODE (cond) == SSA_NAME)
3221 op = gimple_op (stmt, op_num + 1);
3222 else if (op_num == 0 || op_num == 1)
bac430c9 3223 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3224 else
3225 {
3226 if (op_num == 2)
3227 op = gimple_assign_rhs2 (stmt);
3228 else
3229 op = gimple_assign_rhs3 (stmt);
3230 }
3231 }
bac430c9
IR
3232 break;
3233
3234 case CALL_EXPR:
3235 op = gimple_call_arg (stmt, op_num);
3236 break;
3237
b84b294a
JJ
3238 case LSHIFT_EXPR:
3239 case RSHIFT_EXPR:
3240 case LROTATE_EXPR:
3241 case RROTATE_EXPR:
3242 op = gimple_op (stmt, op_num + 1);
3243 /* Unlike the other binary operators, shifts/rotates have
3244 the shift count being int, instead of the same type as
3245 the lhs, so make sure the scalar is the right type if
3246 we are dealing with vectors of
3247 long long/long/short/char. */
793d9a16 3248 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3249 op = fold_convert (TREE_TYPE (vector_type), op);
3250 break;
3251
bac430c9
IR
3252 default:
3253 op = gimple_op (stmt, op_num + 1);
b84b294a 3254 break;
f7e531cf
IR
3255 }
3256 }
b8698a0f 3257
ebfd146a 3258 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3259 number_of_places_left_in_vector--;
90dd6e3d 3260 tree orig_op = op;
13396b6e 3261 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3262 {
793d9a16 3263 if (CONSTANT_CLASS_P (op))
13396b6e 3264 {
42fd8198
IE
3265 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3266 {
3267 /* Can't use VIEW_CONVERT_EXPR for booleans because
3268 of possibly different sizes of scalar value and
3269 vector element. */
3270 if (integer_zerop (op))
3271 op = build_int_cst (TREE_TYPE (vector_type), 0);
3272 else if (integer_onep (op))
158beb4a 3273 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3274 else
3275 gcc_unreachable ();
3276 }
3277 else
3278 op = fold_unary (VIEW_CONVERT_EXPR,
3279 TREE_TYPE (vector_type), op);
13396b6e
JJ
3280 gcc_assert (op && CONSTANT_CLASS_P (op));
3281 }
3282 else
3283 {
b731b390 3284 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3285 gimple *init_stmt;
262a363f
JJ
3286 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3287 {
158beb4a
JJ
3288 tree true_val
3289 = build_all_ones_cst (TREE_TYPE (vector_type));
3290 tree false_val
3291 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3292 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3293 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3294 op, true_val,
3295 false_val);
262a363f 3296 }
262a363f
JJ
3297 else
3298 {
3299 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3300 op);
3301 init_stmt
3302 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3303 op);
3304 }
13396b6e
JJ
3305 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3306 op = new_temp;
3307 }
50eeef09 3308 }
d2a12ae7 3309 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3310 if (!CONSTANT_CLASS_P (op))
3311 constant_p = false;
90dd6e3d
RB
3312 if (TREE_CODE (orig_op) == SSA_NAME
3313 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3314 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3315 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3316 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3317 place_after_defs = true;
ebfd146a
IR
3318
3319 if (number_of_places_left_in_vector == 0)
3320 {
018b2744
RS
3321 if (constant_p
3322 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3323 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3324 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3325 else
d2a12ae7 3326 {
018b2744
RS
3327 if (vec_oprnds->is_empty ())
3328 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3329 number_of_vectors,
3330 permute_results);
3331 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3332 }
90dd6e3d
RB
3333 tree init;
3334 gimple_stmt_iterator gsi;
3335 if (place_after_defs)
3336 {
95c68311
RS
3337 stmt_vec_info last_stmt_info
3338 = vect_find_last_scalar_stmt_in_slp (slp_node);
3339 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3340 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3341 &gsi);
90dd6e3d
RB
3342 }
3343 else
b9787581
RS
3344 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3345 NULL);
13396b6e
JJ
3346 if (ctor_seq != NULL)
3347 {
90dd6e3d 3348 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3349 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3350 ctor_seq = NULL;
3351 }
90dd6e3d
RB
3352 voprnds.quick_push (init);
3353 place_after_defs = false;
62cf7335
RB
3354 number_of_places_left_in_vector = nunits;
3355 constant_p = true;
5ebaa477
RS
3356 elts.new_vector (vector_type, nunits, 1);
3357 elts.quick_grow (nunits);
ebfd146a
IR
3358 }
3359 }
3360 }
3361
b8698a0f 3362 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3363 them. */
9771b263 3364 vec_num = voprnds.length ();
d2a12ae7 3365 for (j = vec_num; j != 0; j--)
ebfd146a 3366 {
9771b263
DN
3367 vop = voprnds[j - 1];
3368 vec_oprnds->quick_push (vop);
ebfd146a
IR
3369 }
3370
9771b263 3371 voprnds.release ();
ebfd146a
IR
3372
3373 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3374 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3375 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3376 to replicate the vectors. */
9771b263 3377 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3378 {
b5aeb3bb
IR
3379 tree neutral_vec = NULL;
3380
3381 if (neutral_op)
3382 {
3383 if (!neutral_vec)
b9acc9f1 3384 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3385
9771b263 3386 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3387 }
3388 else
3389 {
9771b263
DN
3390 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3391 vec_oprnds->quick_push (vop);
b5aeb3bb 3392 }
ebfd146a
IR
3393 }
3394}
3395
3396
3397/* Get vectorized definitions from SLP_NODE that contains corresponding
3398 vectorized def-stmts. */
3399
3400static void
9771b263 3401vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3402{
3403 tree vec_oprnd;
16edaeb8 3404 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3405 unsigned int i;
3406
9771b263 3407 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3408
16edaeb8 3409 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
ebfd146a 3410 {
16edaeb8
RS
3411 gcc_assert (vec_def_stmt_info);
3412 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3413 vec_oprnd = gimple_phi_result (vec_def_phi);
e7baeb39 3414 else
16edaeb8 3415 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
9771b263 3416 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3417 }
3418}
3419
3420
b8698a0f
L
3421/* Get vectorized definitions for SLP_NODE.
3422 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3423 call vect_get_constant_vectors() to create vector stmts.
3424 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3425 must be stored in the corresponding child of SLP_NODE, and we call
3426 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3427
ebfd146a 3428void
9771b263 3429vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3430 vec<vec<tree> > *vec_oprnds)
ebfd146a 3431{
d092494c 3432 int number_of_vects = 0, i;
77eefb71 3433 unsigned int child_index = 0;
b8698a0f 3434 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3435 slp_tree child = NULL;
37b5ec8f 3436 vec<tree> vec_defs;
e44978dc 3437 tree oprnd;
77eefb71 3438 bool vectorized_defs;
ebfd146a 3439
a1824cfd 3440 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9771b263 3441 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3442 {
d092494c
IR
3443 /* For each operand we check if it has vectorized definitions in a child
3444 node or we need to create them (for invariants and constants). We
3445 check if the LHS of the first stmt of the next child matches OPRND.
3446 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3447 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3448 to check this child node for the next operand. */
3449 vectorized_defs = false;
3450 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3451 {
01d8bf07 3452 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3453
e44978dc 3454 /* We have to check both pattern and original def, if available. */
603cca93 3455 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3456 {
b9787581
RS
3457 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3458 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
e7baeb39 3459 tree first_def_op;
90dd6e3d 3460
b9787581 3461 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
e7baeb39
RB
3462 first_def_op = gimple_phi_result (first_def);
3463 else
b9787581 3464 first_def_op = gimple_get_lhs (first_def_info->stmt);
e7baeb39 3465 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d 3466 || (related
10681ce8
RS
3467 && operand_equal_p (oprnd,
3468 gimple_get_lhs (related->stmt), 0)))
90dd6e3d
RB
3469 {
3470 /* The number of vector defs is determined by the number of
3471 vector statements in the node from which we get those
3472 statements. */
3473 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3474 vectorized_defs = true;
77eefb71 3475 child_index++;
90dd6e3d 3476 }
e44978dc 3477 }
77eefb71
RB
3478 else
3479 child_index++;
d092494c 3480 }
ebfd146a 3481
77eefb71
RB
3482 if (!vectorized_defs)
3483 {
3484 if (i == 0)
3485 {
3486 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3487 /* Number of vector stmts was calculated according to LHS in
3488 vect_schedule_slp_instance (), fix it by replacing LHS with
3489 RHS, if necessary. See vect_get_smallest_scalar_type () for
3490 details. */
a1824cfd
RS
3491 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3492 &rhs_size_unit);
77eefb71
RB
3493 if (rhs_size_unit != lhs_size_unit)
3494 {
3495 number_of_vects *= rhs_size_unit;
3496 number_of_vects /= lhs_size_unit;
3497 }
3498 }
d092494c 3499 }
b5aeb3bb 3500
d092494c 3501 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3502 vec_defs = vNULL;
3503 vec_defs.create (number_of_vects);
ebfd146a 3504
d092494c
IR
3505 /* For reduction defs we call vect_get_constant_vectors (), since we are
3506 looking for initial loop invariant values. */
306b0c92 3507 if (vectorized_defs)
d092494c 3508 /* The defs are already vectorized. */
37b5ec8f 3509 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3510 else
e7baeb39 3511 /* Build vectors from scalar defs. */
37b5ec8f 3512 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3513 number_of_vects);
ebfd146a 3514
37b5ec8f 3515 vec_oprnds->quick_push (vec_defs);
d092494c 3516 }
ebfd146a
IR
3517}
3518
ebfd146a
IR
3519/* Generate vector permute statements from a list of loads in DR_CHAIN.
3520 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3521 permute statements for the SLP node NODE of the SLP instance
3522 SLP_NODE_INSTANCE. */
3523
ebfd146a 3524bool
01d8bf07 3525vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3526 gimple_stmt_iterator *gsi, poly_uint64 vf,
3527 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3528 unsigned *n_perms)
ebfd146a 3529{
b9787581 3530 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3531 vec_info *vinfo = stmt_info->vinfo;
928686b1 3532 int vec_index = 0;
2635892a 3533 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ab7e60ce 3534 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3535 unsigned int mask_element;
ef4bddc2 3536 machine_mode mode;
ebfd146a 3537
91ff1504
RB
3538 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3539 return false;
3540
bffb8014 3541 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3542
22e4dee7 3543 mode = TYPE_MODE (vectype);
ab7e60ce 3544 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3545
61fdfd8c
RB
3546 /* Initialize the vect stmts of NODE to properly insert the generated
3547 stmts later. */
3548 if (! analyze_only)
3549 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3550 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3551 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3552
b8698a0f
L
3553 /* Generate permutation masks for every NODE. Number of masks for each NODE
3554 is equal to GROUP_SIZE.
3555 E.g., we have a group of three nodes with three loads from the same
3556 location in each node, and the vector size is 4. I.e., we have a
3557 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3558 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3559 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3560 ...
3561
2635892a 3562 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3563 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3564 operation, and the mask element values can't be outside that range.
3565 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3566 For the first two permutations we need the first and the second input
ebfd146a 3567 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3568 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3569 {c2,a3,b3,c3}. */
3570
2ce27200 3571 int vect_stmts_counter = 0;
928686b1 3572 unsigned int index = 0;
2ce27200
RB
3573 int first_vec_index = -1;
3574 int second_vec_index = -1;
be377c80 3575 bool noop_p = true;
29afecdf 3576 *n_perms = 0;
ebfd146a 3577
ab7e60ce
RS
3578 vec_perm_builder mask;
3579 unsigned int nelts_to_build;
3580 unsigned int nvectors_per_build;
3581 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3582 && multiple_p (nunits, group_size));
3583 if (repeating_p)
2ce27200 3584 {
ab7e60ce
RS
3585 /* A single vector contains a whole number of copies of the node, so:
3586 (a) all permutes can use the same mask; and
3587 (b) the permutes only need a single vector input. */
3588 mask.new_vector (nunits, group_size, 3);
3589 nelts_to_build = mask.encoded_nelts ();
3590 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3591 }
3592 else
3593 {
3594 /* We need to construct a separate mask for each vector statement. */
3595 unsigned HOST_WIDE_INT const_nunits, const_vf;
3596 if (!nunits.is_constant (&const_nunits)
3597 || !vf.is_constant (&const_vf))
3598 return false;
3599 mask.new_vector (const_nunits, const_nunits, 1);
3600 nelts_to_build = const_vf * group_size;
3601 nvectors_per_build = 1;
3602 }
3603
3604 unsigned int count = mask.encoded_nelts ();
3605 mask.quick_grow (count);
3606 vec_perm_indices indices;
3607
3608 for (unsigned int j = 0; j < nelts_to_build; j++)
3609 {
3610 unsigned int iter_num = j / group_size;
3611 unsigned int stmt_num = j % group_size;
3612 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3613 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3614 if (repeating_p)
2ce27200 3615 {
ab7e60ce
RS
3616 first_vec_index = 0;
3617 mask_element = i;
3618 }
3619 else
3620 {
3621 /* Enforced before the loop when !repeating_p. */
3622 unsigned int const_nunits = nunits.to_constant ();
3623 vec_index = i / const_nunits;
3624 mask_element = i % const_nunits;
2ce27200
RB
3625 if (vec_index == first_vec_index
3626 || first_vec_index == -1)
3627 {
3628 first_vec_index = vec_index;
3629 }
3630 else if (vec_index == second_vec_index
3631 || second_vec_index == -1)
3632 {
3633 second_vec_index = vec_index;
ab7e60ce 3634 mask_element += const_nunits;
2ce27200
RB
3635 }
3636 else
3637 {
3638 if (dump_enabled_p ())
3c2a8ed0
DM
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3640 "permutation requires at "
3641 "least three vectors %G",
3642 stmt_info->stmt);
31bee964 3643 gcc_assert (analyze_only);
2ce27200
RB
3644 return false;
3645 }
ebfd146a 3646
ab7e60ce
RS
3647 gcc_assert (mask_element < 2 * const_nunits);
3648 }
3649
3650 if (mask_element != index)
3651 noop_p = false;
3652 mask[index++] = mask_element;
2ce27200 3653
ab7e60ce
RS
3654 if (index == count && !noop_p)
3655 {
3656 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3657 if (!can_vec_perm_const_p (mode, indices))
2ce27200 3658 {
ab7e60ce 3659 if (dump_enabled_p ())
2ce27200 3660 {
ab7e60ce
RS
3661 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3662 vect_location,
3663 "unsupported vect permute { ");
3664 for (i = 0; i < count; ++i)
22e4dee7 3665 {
ab7e60ce
RS
3666 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3667 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 3668 }
ab7e60ce 3669 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 3670 }
ab7e60ce
RS
3671 gcc_assert (analyze_only);
3672 return false;
e3342de4 3673 }
29afecdf 3674
ab7e60ce
RS
3675 ++*n_perms;
3676 }
3677
3678 if (index == count)
3679 {
3680 if (!analyze_only)
e3342de4 3681 {
ab7e60ce 3682 tree mask_vec = NULL_TREE;
be377c80 3683
ab7e60ce
RS
3684 if (! noop_p)
3685 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 3686
ab7e60ce
RS
3687 if (second_vec_index == -1)
3688 second_vec_index = first_vec_index;
61fdfd8c 3689
ab7e60ce
RS
3690 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3691 {
61fdfd8c 3692 /* Generate the permute statement if necessary. */
ab7e60ce
RS
3693 tree first_vec = dr_chain[first_vec_index + ri];
3694 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 3695 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3696 if (! noop_p)
3697 {
b9787581 3698 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3699 tree perm_dest
3700 = vect_create_destination_var (gimple_assign_lhs (stmt),
3701 vectype);
3702 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3703 gassign *perm_stmt
3704 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3705 first_vec, second_vec,
3706 mask_vec);
3707 perm_stmt_info
b9787581
RS
3708 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3709 gsi);
61fdfd8c
RB
3710 }
3711 else
3712 /* If mask was NULL_TREE generate the requested
3713 identity transform. */
16edaeb8 3714 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3715
3716 /* Store the vector statement in NODE. */
16edaeb8
RS
3717 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3718 = perm_stmt_info;
2ce27200 3719 }
2ce27200 3720 }
ab7e60ce
RS
3721
3722 index = 0;
3723 first_vec_index = -1;
3724 second_vec_index = -1;
3725 noop_p = true;
2ce27200 3726 }
b8698a0f 3727 }
ebfd146a 3728
ebfd146a
IR
3729 return true;
3730}
3731
ebfd146a
IR
3732/* Vectorize SLP instance tree in postorder. */
3733
8fe1bd30 3734static void
f7300fff
RB
3735vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3736 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3737{
ebfd146a
IR
3738 gimple_stmt_iterator si;
3739 stmt_vec_info stmt_info;
8b7e9dba 3740 unsigned int group_size;
ebfd146a 3741 tree vectype;
603cca93 3742 int i, j;
d755c7ef 3743 slp_tree child;
ebfd146a 3744
603cca93 3745 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 3746 return;
ebfd146a 3747
f7300fff
RB
3748 /* See if we have already vectorized the same set of stmts and reuse their
3749 vectorized stmts. */
dd172744 3750 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3751 {
dd172744 3752 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
8fe1bd30 3753 return;
f7300fff
RB
3754 }
3755
dd172744 3756 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3757 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3758 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3759
603cca93
RB
3760 /* Push SLP node def-type to stmts. */
3761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3762 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3763 {
3764 stmt_vec_info child_stmt_info;
3765 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3766 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3767 }
603cca93 3768
b9787581 3769 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3770
3771 /* VECTYPE is the type of the destination. */
b690cc0f 3772 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3773 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3774 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3775
68435eb2 3776 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3777 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3778 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3779
73fbfcad 3780 if (dump_enabled_p ())
3c2a8ed0
DM
3781 dump_printf_loc (MSG_NOTE, vect_location,
3782 "------>vectorizing SLP node starting from: %G",
3783 stmt_info->stmt);
ebfd146a 3784
2e8ab70c
RB
3785 /* Vectorized stmts go before the last scalar stmt which is where
3786 all uses are ready. */
95c68311
RS
3787 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3788 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 3789
b010117a
IR
3790 /* Mark the first element of the reduction chain as reduction to properly
3791 transform the node. In the analysis phase only the last element of the
3792 chain is marked as reduction. */
2c53b149
RB
3793 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3794 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
b9787581 3795 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
b010117a
IR
3796 {
3797 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3798 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3799 }
3800
6876e5bc
RB
3801 /* Handle two-operation SLP nodes by vectorizing the group with
3802 both operations and then performing a merge. */
3803 if (SLP_TREE_TWO_OPERATORS (node))
3804 {
b9787581 3805 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 3806 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3807 enum tree_code ocode = ERROR_MARK;
b9787581 3808 stmt_vec_info ostmt_info;
e3342de4 3809 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
3810 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3811 {
3812 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3813 if (gimple_assign_rhs_code (ostmt) != code0)
3814 {
3815 mask.quick_push (1);
3816 ocode = gimple_assign_rhs_code (ostmt);
3817 }
3818 else
3819 mask.quick_push (0);
3820 }
567a3691 3821 if (ocode != ERROR_MARK)
6876e5bc 3822 {
16edaeb8
RS
3823 vec<stmt_vec_info> v0;
3824 vec<stmt_vec_info> v1;
6876e5bc
RB
3825 unsigned j;
3826 tree tmask = NULL_TREE;
b0b45e58 3827 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
3828 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3829 SLP_TREE_VEC_STMTS (node).truncate (0);
3830 gimple_assign_set_rhs_code (stmt, ocode);
b0b45e58 3831 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
3832 gimple_assign_set_rhs_code (stmt, code0);
3833 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3834 SLP_TREE_VEC_STMTS (node).truncate (0);
3835 tree meltype = build_nonstandard_integer_type
b397965c 3836 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3837 tree mvectype = get_same_sized_vectype (meltype, vectype);
3838 unsigned k = 0, l;
3839 for (j = 0; j < v0.length (); ++j)
3840 {
dad55d70
RS
3841 /* Enforced by vect_build_slp_tree, which rejects variable-length
3842 vectors for SLP_TREE_TWO_OPERATORS. */
3843 unsigned int const_nunits = nunits.to_constant ();
3844 tree_vector_builder melts (mvectype, const_nunits, 1);
3845 for (l = 0; l < const_nunits; ++l)
6876e5bc 3846 {
1ece8d4c 3847 if (k >= group_size)
6876e5bc 3848 k = 0;
dad55d70
RS
3849 tree t = build_int_cst (meltype,
3850 mask[k++] * const_nunits + l);
794e3180 3851 melts.quick_push (t);
6876e5bc 3852 }
5ebaa477 3853 tmask = melts.build ();
6876e5bc
RB
3854
3855 /* ??? Not all targets support a VEC_PERM_EXPR with a
3856 constant mask that would translate to a vec_merge RTX
3857 (with their vec_perm_const_ok). We can either not
3858 vectorize in that case or let veclower do its job.
3859 Unfortunately that isn't too great and at least for
3860 plus/minus we'd eventually like to match targets
3861 vector addsub instructions. */
355fe088 3862 gimple *vstmt;
6876e5bc
RB
3863 vstmt = gimple_build_assign (make_ssa_name (vectype),
3864 VEC_PERM_EXPR,
16edaeb8
RS
3865 gimple_assign_lhs (v0[j]->stmt),
3866 gimple_assign_lhs (v1[j]->stmt),
3867 tmask);
3868 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 3869 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
3870 }
3871 v0.release ();
3872 v1.release ();
8fe1bd30 3873 return;
6876e5bc
RB
3874 }
3875 }
b0b45e58 3876 vect_transform_stmt (stmt_info, &si, node, instance);
603cca93
RB
3877
3878 /* Restore stmt def-types. */
3879 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3880 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3881 {
3882 stmt_vec_info child_stmt_info;
3883 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3884 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3885 }
ebfd146a
IR
3886}
3887
dd34c087
JJ
3888/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3889 For loop vectorization this is done in vectorizable_call, but for SLP
3890 it needs to be deferred until end of vect_schedule_slp, because multiple
3891 SLP instances may refer to the same scalar stmt. */
3892
3893static void
3894vect_remove_slp_scalar_calls (slp_tree node)
3895{
b9787581 3896 gimple *new_stmt;
dd34c087
JJ
3897 gimple_stmt_iterator gsi;
3898 int i;
d755c7ef 3899 slp_tree child;
dd34c087
JJ
3900 tree lhs;
3901 stmt_vec_info stmt_info;
3902
603cca93 3903 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3904 return;
3905
9771b263 3906 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3907 vect_remove_slp_scalar_calls (child);
dd34c087 3908
b9787581 3909 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 3910 {
b9787581
RS
3911 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3912 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 3913 continue;
b9787581 3914 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
3915 || !PURE_SLP_STMT (stmt_info))
3916 continue;
3917 lhs = gimple_call_lhs (stmt);
3918 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 3919 gsi = gsi_for_stmt (stmt);
9d97912b 3920 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
3921 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3922 }
3923}
ebfd146a 3924
ff802fa1
IR
3925/* Generate vector code for all SLP instances in the loop/basic block. */
3926
8fe1bd30 3927void
310213d4 3928vect_schedule_slp (vec_info *vinfo)
ebfd146a 3929{
9771b263 3930 vec<slp_instance> slp_instances;
ebfd146a 3931 slp_instance instance;
8b7e9dba 3932 unsigned int i;
78604de0
RB
3933
3934 scalar_stmts_to_slp_tree_map_t *bst_map
3935 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 3936 slp_instances = vinfo->slp_instances;
9771b263 3937 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3938 {
3939 /* Schedule the tree of INSTANCE. */
8fe1bd30
RS
3940 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3941 instance, bst_map);
73fbfcad 3942 if (dump_enabled_p ())
78c60e3d 3943 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3944 "vectorizing stmts using SLP.\n");
ebfd146a 3945 }
78604de0 3946 delete bst_map;
ebfd146a 3947
9771b263 3948 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3949 {
3950 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 3951 stmt_vec_info store_info;
b5aeb3bb 3952 unsigned int j;
b5aeb3bb 3953
c40eced0
RB
3954 /* Remove scalar call stmts. Do not do this for basic-block
3955 vectorization as not all uses may be vectorized.
3956 ??? Why should this be necessary? DCE should be able to
3957 remove the stmts itself.
3958 ??? For BB vectorization we can as well remove scalar
3959 stmts starting from the SLP tree root if they have no
3960 uses. */
310213d4 3961 if (is_a <loop_vec_info> (vinfo))
c40eced0 3962 vect_remove_slp_scalar_calls (root);
dd34c087 3963
b9787581 3964 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
3965 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3966 {
b9787581
RS
3967 if (!STMT_VINFO_DATA_REF (store_info))
3968 break;
3969
211cd1e2 3970 store_info = vect_orig_stmt (store_info);
b9787581 3971 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 3972 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
3973 }
3974 }
ebfd146a 3975}