]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
[31/46] Use stmt_vec_info in function interfaces (part 1)
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
9771b263 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 61 vect_free_slp_tree (child, final_p);
b8698a0f 62
6e2dd807
RS
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
68 {
b9787581
RS
69 stmt_vec_info stmt_info;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 71 {
b9787581
RS
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
74 }
75 }
78810bd3 76
9771b263
DN
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 80 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
81
82 free (node);
83}
84
85
6e2dd807
RS
86/* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
ebfd146a
IR
89
90void
6e2dd807 91vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 92{
6e2dd807 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 94 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 95 free (instance);
ebfd146a
IR
96}
97
98
d092494c
IR
99/* Create an SLP node for SCALAR_STMTS. */
100
101static slp_tree
b9787581 102vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 103{
d3cfd39e 104 slp_tree node;
b9787581 105 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
106 unsigned int nops;
107
b9787581 108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 109 nops = gimple_call_num_args (stmt);
b9787581 110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
b9787581 116 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 117 nops = 0;
d092494c
IR
118 else
119 return NULL;
120
d3cfd39e 121 node = XNEW (struct _slp_tree);
d092494c 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 123 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 125 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 127 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 129
78810bd3 130 unsigned i;
b9787581
RS
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 133
d092494c
IR
134 return node;
135}
136
137
ddf56386
RB
138/* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141typedef struct _slp_oprnd_info
142{
143 /* Def-stmts for the operands. */
b9787581 144 vec<stmt_vec_info> def_stmts;
ddf56386
RB
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
ddf56386 148 tree first_op_type;
34e82342 149 enum vect_def_type first_dt;
ddf56386
RB
150 bool first_pattern;
151 bool second_pattern;
152} *slp_oprnd_info;
153
154
d092494c
IR
155/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
9771b263 157static vec<slp_oprnd_info>
d092494c
IR
158vect_create_oprnd_info (int nops, int group_size)
159{
160 int i;
161 slp_oprnd_info oprnd_info;
9771b263 162 vec<slp_oprnd_info> oprnds_info;
d092494c 163
9771b263 164 oprnds_info.create (nops);
d092494c
IR
165 for (i = 0; i < nops; i++)
166 {
167 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 168 oprnd_info->def_stmts.create (group_size);
d092494c 169 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 170 oprnd_info->first_op_type = NULL_TREE;
d092494c 171 oprnd_info->first_pattern = false;
effb52da 172 oprnd_info->second_pattern = false;
9771b263 173 oprnds_info.quick_push (oprnd_info);
d092494c
IR
174 }
175
176 return oprnds_info;
177}
178
179
d3cfd39e
JJ
180/* Free operands info. */
181
d092494c 182static void
9771b263 183vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
184{
185 int i;
186 slp_oprnd_info oprnd_info;
187
9771b263 188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 189 {
9771b263 190 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
191 XDELETE (oprnd_info);
192 }
d092494c 193
9771b263 194 oprnds_info.release ();
d092494c
IR
195}
196
197
d755c7ef
RB
198/* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
200
b210f45f 201int
355fe088 202vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 203{
a1824cfd
RS
204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205 stmt_vec_info first_stmt_info = vinfo_for_stmt (first_stmt);
206 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
207 int result = 0;
208
a1824cfd 209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
210 return -1;
211
212 do
213 {
a1824cfd 214 if (next_stmt_info == stmt_info)
d755c7ef 215 return result;
a1824cfd
RS
216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
217 if (next_stmt_info)
218 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 219 }
a1824cfd 220 while (next_stmt_info);
d755c7ef
RB
221
222 return -1;
223}
224
018b2744
RS
225/* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
230
f1739b48 231bool
018b2744 232can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
018b2744
RS
236{
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
241 {
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
246 {
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
251 {
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
256 {
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
261 }
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
266 {
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
272 {
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
277 }
278 return true;
279 }
280 }
281 }
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
285 }
286}
d755c7ef 287
d092494c
IR
288/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
289 they are of a valid type and that they match the defs of the first stmt of
4cecd659 290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
293 is 1 if STMT is cond and operands of comparison need to be swapped;
294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
4cecd659
BC
297 If there was a fatal error return -1; if the error could be corrected by
298 swapping operands of father node of this one, return 1; if everything is
299 ok return 0. */
4cecd659
BC
300static int
301vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 302 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 303 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 304{
b9787581 305 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
306 tree oprnd;
307 unsigned int i, number_of_oprnds;
d092494c 308 enum vect_def_type dt = vect_uninitialized_def;
d092494c 309 bool pattern = false;
abf9bfbc 310 slp_oprnd_info oprnd_info;
b0b4483e
RB
311 int first_op_idx = 1;
312 bool commutative = false;
313 bool first_op_cond = false;
effb52da
RB
314 bool first = stmt_num == 0;
315 bool second = stmt_num == 1;
b8698a0f 316
b9787581 317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
318 {
319 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 320 first_op_idx = 3;
190c2236 321 }
b9787581 322 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 323 {
b0b4483e 324 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 325 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
326 /* Swap can only be done for cond_expr if asked to, otherwise we
327 could result in different comparison code to the first stmt. */
a414c77f
IE
328 if (gimple_assign_rhs_code (stmt) == COND_EXPR
329 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
330 {
331 first_op_cond = true;
b0b4483e
RB
332 number_of_oprnds++;
333 }
334 else
335 commutative = commutative_tree_code (code);
f7e531cf 336 }
d092494c 337 else
b0b4483e 338 return -1;
ebfd146a 339
4cecd659
BC
340 bool swapped = (*swap != 0);
341 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
342 for (i = 0; i < number_of_oprnds; i++)
343 {
b0b4483e
RB
344again:
345 if (first_op_cond)
f7e531cf 346 {
4cecd659
BC
347 /* Map indicating how operands of cond_expr should be swapped. */
348 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
349 int *map = maps[*swap];
350
351 if (i < 2)
b9787581
RS
352 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
353 first_op_idx), map[i]);
b0b4483e 354 else
b9787581 355 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
356 }
357 else
b9787581 358 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 359
9771b263 360 oprnd_info = (*oprnds_info)[i];
ebfd146a 361
fef96d8e
RS
362 stmt_vec_info def_stmt_info;
363 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 364 {
73fbfcad 365 if (dump_enabled_p ())
ebfd146a 366 {
78c60e3d 367 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 368 "Build SLP failed: can't analyze def for ");
78c60e3d 369 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 370 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
371 }
372
b0b4483e 373 return -1;
ebfd146a
IR
374 }
375
fef96d8e
RS
376 /* Check if DEF_STMT_INFO is a part of a pattern in LOOP and get
377 the def stmt from the pattern. Check that all the stmts of the
378 node are in the pattern. */
379 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
ebfd146a 380 {
d092494c 381 pattern = true;
effb52da
RB
382 if (!first && !oprnd_info->first_pattern
383 /* Allow different pattern state for the defs of the
384 first stmt in reduction chains. */
385 && (oprnd_info->first_dt != vect_reduction_def
386 || (!second && !oprnd_info->second_pattern)))
d092494c 387 {
b0b4483e
RB
388 if (i == 0
389 && !swapped
390 && commutative)
391 {
392 swapped = true;
393 goto again;
394 }
395
73fbfcad 396 if (dump_enabled_p ())
d092494c 397 {
78c60e3d
SS
398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
399 "Build SLP failed: some of the stmts"
400 " are in a pattern, and others are not ");
401 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 402 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 403 }
ebfd146a 404
b0b4483e 405 return 1;
ebfd146a
IR
406 }
407
fef96d8e 408 dt = STMT_VINFO_DEF_TYPE (def_stmt_info);
ebfd146a 409
f7e531cf 410 if (dt == vect_unknown_def_type)
ebfd146a 411 {
73fbfcad 412 if (dump_enabled_p ())
78c60e3d 413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 414 "Unsupported pattern.\n");
b0b4483e 415 return -1;
ebfd146a
IR
416 }
417
fef96d8e 418 switch (gimple_code (def_stmt_info->stmt))
ebfd146a 419 {
81c40241
RB
420 case GIMPLE_PHI:
421 case GIMPLE_ASSIGN:
422 break;
423
424 default:
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "unsupported defining stmt:\n");
428 return -1;
ebfd146a
IR
429 }
430 }
431
effb52da
RB
432 if (second)
433 oprnd_info->second_pattern = pattern;
434
d092494c 435 if (first)
ebfd146a 436 {
d092494c
IR
437 oprnd_info->first_dt = dt;
438 oprnd_info->first_pattern = pattern;
793d9a16 439 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 440 }
ebfd146a
IR
441 else
442 {
d092494c
IR
443 /* Not first stmt of the group, check that the def-stmt/s match
444 the def-stmt/s of the first stmt. Allow different definition
445 types for reduction chains: the first stmt must be a
446 vect_reduction_def (a phi node), and the rest
447 vect_internal_def. */
018b2744
RS
448 tree type = TREE_TYPE (oprnd);
449 if ((oprnd_info->first_dt != dt
450 && !(oprnd_info->first_dt == vect_reduction_def
451 && dt == vect_internal_def)
452 && !((oprnd_info->first_dt == vect_external_def
453 || oprnd_info->first_dt == vect_constant_def)
454 && (dt == vect_external_def
455 || dt == vect_constant_def)))
456 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 457 {
b0b4483e
RB
458 /* Try swapping operands if we got a mismatch. */
459 if (i == 0
460 && !swapped
461 && commutative)
462 {
463 swapped = true;
464 goto again;
465 }
466
abf9bfbc
RB
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 469 "Build SLP failed: different types\n");
d092494c 470
b0b4483e 471 return 1;
ebfd146a 472 }
018b2744
RS
473 if ((dt == vect_constant_def
474 || dt == vect_external_def)
475 && !current_vector_size.is_constant ()
476 && (TREE_CODE (type) == BOOLEAN_TYPE
477 || !can_duplicate_and_interleave_p (stmts.length (),
478 TYPE_MODE (type))))
a23644f2
RS
479 {
480 if (dump_enabled_p ())
481 {
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "Build SLP failed: invalid type of def "
484 "for variable-length SLP ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
486 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
487 }
488 return -1;
489 }
018b2744
RS
490 }
491
492 /* Check the types of the definitions. */
493 switch (dt)
494 {
495 case vect_constant_def:
496 case vect_external_def:
ebfd146a 497 break;
b8698a0f 498
c78e3652 499 case vect_reduction_def:
e7baeb39 500 case vect_induction_def:
8644a673 501 case vect_internal_def:
fef96d8e 502 oprnd_info->def_stmts.quick_push (def_stmt_info);
ebfd146a
IR
503 break;
504
505 default:
506 /* FORNOW: Not supported. */
73fbfcad 507 if (dump_enabled_p ())
ebfd146a 508 {
78c60e3d
SS
509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
510 "Build SLP failed: illegal type of def ");
81c40241 511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
513 }
514
b0b4483e 515 return -1;
ebfd146a
IR
516 }
517 }
518
b0b4483e
RB
519 /* Swap operands. */
520 if (swapped)
521 {
78810bd3
RB
522 /* If there are already uses of this stmt in a SLP instance then
523 we've committed to the operand order and can't swap it. */
b9787581 524 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
78810bd3
RB
525 {
526 if (dump_enabled_p ())
527 {
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: cannot swap operands of "
530 "shared stmt ");
b9787581
RS
531 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
532 stmt_info->stmt, 0);
78810bd3
RB
533 }
534 return -1;
535 }
536
b9787581 537 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e
RB
538 if (first_op_cond)
539 {
540 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
541 enum tree_code code = TREE_CODE (cond);
542
543 /* Swap. */
544 if (*swap == 1)
545 {
546 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
547 &TREE_OPERAND (cond, 1));
548 TREE_SET_CODE (cond, swap_tree_comparison (code));
549 }
550 /* Invert. */
551 else
552 {
553 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
554 gimple_assign_rhs3_ptr (stmt));
555 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
556 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
557 gcc_assert (code != ERROR_MARK);
558 TREE_SET_CODE (cond, code);
559 }
b0b4483e
RB
560 }
561 else
562 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
563 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
564 if (dump_enabled_p ())
565 {
566 dump_printf_loc (MSG_NOTE, vect_location,
567 "swapped operands to match def types in ");
568 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
569 }
b0b4483e
RB
570 }
571
4cecd659 572 *swap = swapped;
b0b4483e 573 return 0;
ebfd146a
IR
574}
575
5249ee4d
RS
576/* Return true if call statements CALL1 and CALL2 are similar enough
577 to be combined into the same SLP group. */
578
579static bool
580compatible_calls_p (gcall *call1, gcall *call2)
581{
582 unsigned int nargs = gimple_call_num_args (call1);
583 if (nargs != gimple_call_num_args (call2))
584 return false;
585
586 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
587 return false;
588
589 if (gimple_call_internal_p (call1))
590 {
591 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
592 TREE_TYPE (gimple_call_lhs (call2))))
593 return false;
594 for (unsigned int i = 0; i < nargs; ++i)
595 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
596 TREE_TYPE (gimple_call_arg (call2, i))))
597 return false;
598 }
599 else
600 {
601 if (!operand_equal_p (gimple_call_fn (call1),
602 gimple_call_fn (call2), 0))
603 return false;
604
605 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
606 return false;
607 }
608 return true;
609}
610
b161f2c9
RS
611/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
612 caller's attempt to find the vector type in STMT with the narrowest
613 element type. Return true if VECTYPE is nonnull and if it is valid
614 for VINFO. When returning true, update MAX_NUNITS to reflect the
615 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
616 as for vect_build_slp_tree. */
617
618static bool
619vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 620 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
621{
622 if (!vectype)
623 {
624 if (dump_enabled_p ())
625 {
626 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
627 "Build SLP failed: unsupported data-type in ");
628 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
629 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
630 }
631 /* Fatal mismatch. */
632 return false;
633 }
634
635 /* If populating the vector type requires unrolling then fail
636 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
637 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
638 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 639 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
640 && (!nunits.is_constant (&const_nunits)
641 || const_nunits > group_size))
b161f2c9
RS
642 {
643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
644 "Build SLP failed: unrolling required "
645 "in basic block SLP\n");
646 /* Fatal mismatch. */
647 return false;
648 }
649
650 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 651 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
652 return true;
653}
ebfd146a 654
1f3cb663
RS
655/* STMTS is a group of GROUP_SIZE SLP statements in which some
656 statements do the same operation as the first statement and in which
657 the others do ALT_STMT_CODE. Return true if we can take one vector
658 of the first operation and one vector of the second and permute them
659 to get the required result. VECTYPE is the type of the vector that
660 would be permuted. */
661
662static bool
b9787581
RS
663vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
664 unsigned int group_size, tree vectype,
665 tree_code alt_stmt_code)
1f3cb663
RS
666{
667 unsigned HOST_WIDE_INT count;
668 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
669 return false;
670
671 vec_perm_builder sel (count, count, 1);
672 for (unsigned int i = 0; i < count; ++i)
673 {
674 unsigned int elt = i;
b9787581
RS
675 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
676 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
677 elt += count;
678 sel.quick_push (elt);
679 }
680 vec_perm_indices indices (sel, 2, count);
681 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
682}
683
6983e6b5
RB
684/* Verify if the scalar stmts STMTS are isomorphic, require data
685 permutation or are of unsupported types of operation. Return
686 true if they are, otherwise return false and indicate in *MATCHES
687 which stmts are not isomorphic to the first one. If MATCHES[0]
688 is false then this indicates the comparison could not be
4cecd659
BC
689 carried out or the stmts will never be vectorized by SLP.
690
691 Note COND_EXPR is possibly ismorphic to another one after swapping its
692 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
693 the first stmt by swapping the two operands of comparison; set SWAP[i]
694 to 2 if stmt I is isormorphic to the first stmt by inverting the code
695 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
696 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
697
698static bool
4cecd659 699vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
b9787581 700 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
701 poly_uint64 *max_nunits, bool *matches,
702 bool *two_operators)
ebfd146a 703{
ebfd146a 704 unsigned int i;
b9787581 705 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
706 enum tree_code first_stmt_code = ERROR_MARK;
707 enum tree_code alt_stmt_code = ERROR_MARK;
708 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 709 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 710 tree lhs;
6983e6b5 711 bool need_same_oprnds = false;
1f3cb663 712 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
713 optab optab;
714 int icode;
ef4bddc2
RS
715 machine_mode optab_op2_mode;
716 machine_mode vec_mode;
bffb8014 717 stmt_vec_info first_load = NULL, prev_first_load = NULL;
d092494c 718
ebfd146a 719 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
720 stmt_vec_info stmt_info;
721 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 722 {
b9787581 723 gimple *stmt = stmt_info->stmt;
4cecd659 724 swap[i] = 0;
6983e6b5
RB
725 matches[i] = false;
726
73fbfcad 727 if (dump_enabled_p ())
ebfd146a 728 {
78c60e3d
SS
729 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
730 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
731 }
732
4b5caab7 733 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 734 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 735 {
73fbfcad 736 if (dump_enabled_p ())
4b5caab7 737 {
78c60e3d
SS
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 "Build SLP failed: unvectorizable statement ");
740 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 741 }
6983e6b5
RB
742 /* Fatal mismatch. */
743 matches[0] = false;
4b5caab7
IR
744 return false;
745 }
746
ebfd146a
IR
747 lhs = gimple_get_lhs (stmt);
748 if (lhs == NULL_TREE)
749 {
73fbfcad 750 if (dump_enabled_p ())
ebfd146a 751 {
78c60e3d
SS
752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
753 "Build SLP failed: not GIMPLE_ASSIGN nor "
754 "GIMPLE_CALL ");
755 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 756 }
6983e6b5
RB
757 /* Fatal mismatch. */
758 matches[0] = false;
ebfd146a
IR
759 return false;
760 }
761
1f3cb663
RS
762 tree nunits_vectype;
763 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
764 &nunits_vectype)
765 || (nunits_vectype
b9787581 766 && !vect_record_max_nunits (vinfo, stmt_info, group_size,
1f3cb663 767 nunits_vectype, max_nunits)))
b161f2c9 768 {
6983e6b5
RB
769 /* Fatal mismatch. */
770 matches[0] = false;
1f3cb663
RS
771 return false;
772 }
773
774 gcc_assert (vectype);
b8698a0f 775
538dd0b7 776 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
777 {
778 rhs_code = CALL_EXPR;
5249ee4d
RS
779 if ((gimple_call_internal_p (call_stmt)
780 && (!vectorizable_internal_fn_p
781 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
782 || gimple_call_tail_p (call_stmt)
783 || gimple_call_noreturn_p (call_stmt)
784 || !gimple_call_nothrow_p (call_stmt)
785 || gimple_call_chain (call_stmt))
190c2236 786 {
73fbfcad 787 if (dump_enabled_p ())
190c2236 788 {
78c60e3d
SS
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
790 "Build SLP failed: unsupported call type ");
538dd0b7
DM
791 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
792 call_stmt, 0);
190c2236 793 }
6983e6b5
RB
794 /* Fatal mismatch. */
795 matches[0] = false;
190c2236
JJ
796 return false;
797 }
798 }
ebfd146a
IR
799 else
800 rhs_code = gimple_assign_rhs_code (stmt);
801
802 /* Check the operation. */
803 if (i == 0)
804 {
805 first_stmt_code = rhs_code;
806
b8698a0f 807 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
808 vector shift with scalar shift operand. */
809 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
810 || rhs_code == LROTATE_EXPR
811 || rhs_code == RROTATE_EXPR)
812 {
1f3cb663
RS
813 if (vectype == boolean_type_node)
814 {
815 if (dump_enabled_p ())
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
817 "Build SLP failed: shift of a"
818 " boolean.\n");
819 /* Fatal mismatch. */
820 matches[0] = false;
821 return false;
822 }
823
ebfd146a
IR
824 vec_mode = TYPE_MODE (vectype);
825
826 /* First see if we have a vector/vector shift. */
827 optab = optab_for_tree_code (rhs_code, vectype,
828 optab_vector);
829
830 if (!optab
947131ba 831 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
832 {
833 /* No vector/vector shift, try for a vector/scalar shift. */
834 optab = optab_for_tree_code (rhs_code, vectype,
835 optab_scalar);
836
837 if (!optab)
838 {
73fbfcad 839 if (dump_enabled_p ())
78c60e3d 840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 841 "Build SLP failed: no optab.\n");
6983e6b5
RB
842 /* Fatal mismatch. */
843 matches[0] = false;
ebfd146a
IR
844 return false;
845 }
947131ba 846 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
847 if (icode == CODE_FOR_nothing)
848 {
73fbfcad 849 if (dump_enabled_p ())
78c60e3d
SS
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: "
e645e942 852 "op not supported by target.\n");
6983e6b5
RB
853 /* Fatal mismatch. */
854 matches[0] = false;
ebfd146a
IR
855 return false;
856 }
857 optab_op2_mode = insn_data[icode].operand[2].mode;
858 if (!VECTOR_MODE_P (optab_op2_mode))
859 {
860 need_same_oprnds = true;
861 first_op1 = gimple_assign_rhs2 (stmt);
862 }
863 }
864 }
36ba4aae
IR
865 else if (rhs_code == WIDEN_LSHIFT_EXPR)
866 {
867 need_same_oprnds = true;
868 first_op1 = gimple_assign_rhs2 (stmt);
869 }
ebfd146a
IR
870 }
871 else
872 {
6876e5bc
RB
873 if (first_stmt_code != rhs_code
874 && alt_stmt_code == ERROR_MARK)
875 alt_stmt_code = rhs_code;
ebfd146a
IR
876 if (first_stmt_code != rhs_code
877 && (first_stmt_code != IMAGPART_EXPR
878 || rhs_code != REALPART_EXPR)
879 && (first_stmt_code != REALPART_EXPR
69f11a13 880 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
881 /* Handle mismatches in plus/minus by computing both
882 and merging the results. */
883 && !((first_stmt_code == PLUS_EXPR
884 || first_stmt_code == MINUS_EXPR)
885 && (alt_stmt_code == PLUS_EXPR
886 || alt_stmt_code == MINUS_EXPR)
887 && rhs_code == alt_stmt_code)
b9787581 888 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 889 && (first_stmt_code == ARRAY_REF
38000232 890 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
891 || first_stmt_code == INDIRECT_REF
892 || first_stmt_code == COMPONENT_REF
893 || first_stmt_code == MEM_REF)))
ebfd146a 894 {
73fbfcad 895 if (dump_enabled_p ())
ebfd146a 896 {
78c60e3d
SS
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "Build SLP failed: different operation "
899 "in stmt ");
900 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
901 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
902 "original stmt ");
903 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 904 first_stmt_info->stmt, 0);
ebfd146a 905 }
6983e6b5
RB
906 /* Mismatch. */
907 continue;
ebfd146a 908 }
b8698a0f
L
909
910 if (need_same_oprnds
ebfd146a
IR
911 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
912 {
73fbfcad 913 if (dump_enabled_p ())
ebfd146a 914 {
78c60e3d
SS
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
916 "Build SLP failed: different shift "
917 "arguments in ");
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 919 }
6983e6b5
RB
920 /* Mismatch. */
921 continue;
ebfd146a 922 }
190c2236
JJ
923
924 if (rhs_code == CALL_EXPR)
925 {
b9787581 926 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 927 as_a <gcall *> (stmt)))
190c2236 928 {
73fbfcad 929 if (dump_enabled_p ())
190c2236 930 {
78c60e3d
SS
931 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
932 "Build SLP failed: different calls in ");
933 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
934 stmt, 0);
190c2236 935 }
6983e6b5
RB
936 /* Mismatch. */
937 continue;
190c2236
JJ
938 }
939 }
ebfd146a
IR
940 }
941
0d0293ac 942 /* Grouped store or load. */
b9787581 943 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
944 {
945 if (REFERENCE_CLASS_P (lhs))
946 {
947 /* Store. */
6983e6b5 948 ;
ebfd146a 949 }
b5aeb3bb
IR
950 else
951 {
952 /* Load. */
b9787581 953 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
954 if (prev_first_load)
955 {
956 /* Check that there are no loads from different interleaving
6983e6b5
RB
957 chains in the same node. */
958 if (prev_first_load != first_load)
78c60e3d 959 {
73fbfcad 960 if (dump_enabled_p ())
b5aeb3bb 961 {
78c60e3d
SS
962 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
963 vect_location,
964 "Build SLP failed: different "
965 "interleaving chains in one node ");
966 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
967 stmt, 0);
b5aeb3bb 968 }
6983e6b5
RB
969 /* Mismatch. */
970 continue;
b5aeb3bb
IR
971 }
972 }
973 else
974 prev_first_load = first_load;
ebfd146a 975 }
0d0293ac 976 } /* Grouped access. */
ebfd146a
IR
977 else
978 {
979 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
980 {
0d0293ac 981 /* Not grouped load. */
73fbfcad 982 if (dump_enabled_p ())
ebfd146a 983 {
78c60e3d
SS
984 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
985 "Build SLP failed: not grouped load ");
986 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
987 }
988
0d0293ac 989 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
990 /* Fatal mismatch. */
991 matches[0] = false;
ebfd146a
IR
992 return false;
993 }
994
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 997 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 998 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 999 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 1000 && rhs_code != CALL_EXPR)
ebfd146a 1001 {
73fbfcad 1002 if (dump_enabled_p ())
ebfd146a 1003 {
78c60e3d
SS
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1005 "Build SLP failed: operation");
1006 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1007 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 1008 }
6983e6b5
RB
1009 /* Fatal mismatch. */
1010 matches[0] = false;
ebfd146a
IR
1011 return false;
1012 }
1013
4cecd659
BC
1014 if (rhs_code == COND_EXPR)
1015 {
1016 tree cond_expr = gimple_assign_rhs1 (stmt);
1017 enum tree_code cond_code = TREE_CODE (cond_expr);
1018 enum tree_code swap_code = ERROR_MARK;
1019 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1020
1021 if (i == 0)
1022 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1023 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1024 {
1025 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1026 swap_code = swap_tree_comparison (cond_code);
1027 invert_code = invert_tree_comparison (cond_code, honor_nans);
1028 }
1029
1030 if (first_cond_code == cond_code)
1031 ;
1032 /* Isomorphic can be achieved by swapping. */
1033 else if (first_cond_code == swap_code)
1034 swap[i] = 1;
1035 /* Isomorphic can be achieved by inverting. */
1036 else if (first_cond_code == invert_code)
1037 swap[i] = 2;
1038 else
1039 {
1040 if (dump_enabled_p ())
1041 {
1042 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1043 "Build SLP failed: different"
1044 " operation");
4cecd659 1045 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 1046 stmt, 0);
4cecd659 1047 }
6983e6b5
RB
1048 /* Mismatch. */
1049 continue;
f7e531cf 1050 }
4cecd659 1051 }
ebfd146a 1052 }
6983e6b5
RB
1053
1054 matches[i] = true;
1055 }
1056
1057 for (i = 0; i < group_size; ++i)
1058 if (!matches[i])
1059 return false;
1060
6876e5bc
RB
1061 /* If we allowed a two-operation SLP node verify the target can cope
1062 with the permute we are going to use. */
1063 if (alt_stmt_code != ERROR_MARK
1064 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1065 {
1f3cb663
RS
1066 if (vectype == boolean_type_node
1067 || !vect_two_operations_perm_ok_p (stmts, group_size,
1068 vectype, alt_stmt_code))
6876e5bc
RB
1069 {
1070 for (i = 0; i < group_size; ++i)
b9787581 1071 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
1072 {
1073 matches[i] = false;
1074 if (dump_enabled_p ())
1075 {
1076 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1077 "Build SLP failed: different operation "
1078 "in stmt ");
1079 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 1080 stmts[i]->stmt, 0);
6876e5bc
RB
1081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1082 "original stmt ");
1083 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 1084 first_stmt_info->stmt, 0);
6876e5bc
RB
1085 }
1086 }
1087 return false;
1088 }
1089 *two_operators = true;
1090 }
1091
6983e6b5
RB
1092 return true;
1093}
1094
26d66f28
RB
1095/* Traits for the hash_set to record failed SLP builds for a stmt set.
1096 Note we never remove apart from at destruction time so we do not
1097 need a special value for deleted that differs from empty. */
1098struct bst_traits
1099{
b9787581
RS
1100 typedef vec <stmt_vec_info> value_type;
1101 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1102 static inline hashval_t hash (value_type);
1103 static inline bool equal (value_type existing, value_type candidate);
1104 static inline bool is_empty (value_type x) { return !x.exists (); }
1105 static inline bool is_deleted (value_type x) { return !x.exists (); }
1106 static inline void mark_empty (value_type &x) { x.release (); }
1107 static inline void mark_deleted (value_type &x) { x.release (); }
1108 static inline void remove (value_type &x) { x.release (); }
1109};
1110inline hashval_t
1111bst_traits::hash (value_type x)
1112{
1113 inchash::hash h;
1114 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1115 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1116 return h.end ();
1117}
1118inline bool
1119bst_traits::equal (value_type existing, value_type candidate)
1120{
1121 if (existing.length () != candidate.length ())
1122 return false;
1123 for (unsigned i = 0; i < existing.length (); ++i)
1124 if (existing[i] != candidate[i])
1125 return false;
1126 return true;
1127}
1128
f7300fff
RB
1129typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1130static scalar_stmts_set_t *bst_fail;
26d66f28 1131
68435eb2
RB
1132typedef hash_map <vec <gimple *>, slp_tree,
1133 simple_hashmap_traits <bst_traits, slp_tree> >
1134 scalar_stmts_to_slp_tree_map_t;
1135
26d66f28
RB
1136static slp_tree
1137vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1138 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1139 poly_uint64 *max_nunits,
26d66f28
RB
1140 vec<slp_tree> *loads,
1141 bool *matches, unsigned *npermutes, unsigned *tree_size,
1142 unsigned max_tree_size);
6983e6b5 1143
e403d17e 1144static slp_tree
310213d4 1145vect_build_slp_tree (vec_info *vinfo,
b9787581 1146 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1147 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1148 bool *matches, unsigned *npermutes, unsigned *tree_size,
1149 unsigned max_tree_size)
26d66f28
RB
1150{
1151 if (bst_fail->contains (stmts))
1152 return NULL;
1153 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1154 loads, matches, npermutes, tree_size,
1155 max_tree_size);
1156 /* When SLP build fails for stmts record this, otherwise SLP build
1157 can be exponential in time when we allow to construct parts from
1158 scalars, see PR81723. */
1159 if (! res)
1160 {
b9787581 1161 vec <stmt_vec_info> x;
26d66f28
RB
1162 x.create (stmts.length ());
1163 x.splice (stmts);
1164 bst_fail->add (x);
1165 }
1166 return res;
1167}
1168
1169/* Recursively build an SLP tree starting from NODE.
1170 Fail (and return a value not equal to zero) if def-stmts are not
1171 isomorphic, require data permutation or are of unsupported types of
1172 operation. Otherwise, return 0.
1173 The value returned is the depth in the SLP tree where a mismatch
1174 was found. */
1175
1176static slp_tree
1177vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1178 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1179 poly_uint64 *max_nunits,
26d66f28
RB
1180 vec<slp_tree> *loads,
1181 bool *matches, unsigned *npermutes, unsigned *tree_size,
1182 unsigned max_tree_size)
6983e6b5 1183{
4b6068ea
RS
1184 unsigned nops, i, this_tree_size = 0;
1185 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1186 slp_tree node;
6983e6b5 1187
6983e6b5
RB
1188 matches[0] = false;
1189
b9787581
RS
1190 stmt_vec_info stmt_info = stmts[0];
1191 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1192 nops = gimple_call_num_args (stmt);
b9787581 1193 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1194 {
1195 nops = gimple_num_ops (stmt) - 1;
1196 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1197 nops++;
ebfd146a 1198 }
b9787581 1199 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1200 nops = 0;
6983e6b5 1201 else
e403d17e 1202 return NULL;
6983e6b5 1203
c78e3652
RB
1204 /* If the SLP node is a PHI (induction or reduction), terminate
1205 the recursion. */
b9787581 1206 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1207 {
b161f2c9
RS
1208 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1209 tree vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1210 if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype,
b161f2c9
RS
1211 max_nunits))
1212 return NULL;
1213
b9787581 1214 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1215 /* Induction from different IVs is not supported. */
719488f8
RB
1216 if (def_type == vect_induction_def)
1217 {
b9787581
RS
1218 stmt_vec_info other_info;
1219 FOR_EACH_VEC_ELT (stmts, i, other_info)
1220 if (stmt_info != other_info)
719488f8
RB
1221 return NULL;
1222 }
1223 else
1224 {
1225 /* Else def types have to match. */
b9787581
RS
1226 stmt_vec_info other_info;
1227 FOR_EACH_VEC_ELT (stmts, i, other_info)
719488f8
RB
1228 {
1229 /* But for reduction chains only check on the first stmt. */
b9787581
RS
1230 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1231 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
719488f8 1232 continue;
b9787581 1233 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
719488f8
RB
1234 return NULL;
1235 }
1236 }
e7baeb39
RB
1237 node = vect_create_new_slp_node (stmts);
1238 return node;
1239 }
1240
1241
6876e5bc 1242 bool two_operators = false;
4cecd659 1243 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
5249ee4d 1244 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
e403d17e
RB
1245 &this_max_nunits, matches, &two_operators))
1246 return NULL;
ebfd146a 1247
6983e6b5 1248 /* If the SLP node is a load, terminate the recursion. */
b9787581
RS
1249 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1250 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1251 {
e403d17e
RB
1252 *max_nunits = this_max_nunits;
1253 node = vect_create_new_slp_node (stmts);
1254 loads->safe_push (node);
1255 return node;
ebfd146a
IR
1256 }
1257
6983e6b5
RB
1258 /* Get at the operands, verifying they are compatible. */
1259 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1260 slp_oprnd_info oprnd_info;
b9787581 1261 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1262 {
4cecd659 1263 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1264 stmts, i, &oprnds_info);
4cecd659
BC
1265 if (res != 0)
1266 matches[(res == -1) ? 0 : i] = false;
1267 if (!matches[0])
1268 break;
6983e6b5 1269 }
b0b4483e
RB
1270 for (i = 0; i < group_size; ++i)
1271 if (!matches[i])
1272 {
1273 vect_free_oprnd_info (oprnds_info);
e403d17e 1274 return NULL;
b0b4483e 1275 }
6983e6b5 1276
e403d17e
RB
1277 auto_vec<slp_tree, 4> children;
1278 auto_vec<slp_tree> this_loads;
1279
b9787581 1280 stmt_info = stmts[0];
6983e6b5 1281
26d66f28
RB
1282 if (tree_size)
1283 max_tree_size -= *tree_size;
1284
b8698a0f 1285 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1286 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1287 {
d092494c 1288 slp_tree child;
e403d17e
RB
1289 unsigned old_nloads = this_loads.length ();
1290 unsigned old_tree_size = this_tree_size;
1291 unsigned int j;
b8698a0f 1292
e7baeb39 1293 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1294 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1295 && oprnd_info->first_dt != vect_induction_def)
d092494c 1296 continue;
ebfd146a 1297
1428105c
RB
1298 if (++this_tree_size > max_tree_size)
1299 {
e403d17e 1300 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1301 vect_free_slp_tree (child, false);
1428105c 1302 vect_free_oprnd_info (oprnds_info);
e403d17e 1303 return NULL;
1428105c
RB
1304 }
1305
e403d17e
RB
1306 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1307 group_size, &this_max_nunits,
1308 &this_loads, matches, npermutes,
1309 &this_tree_size,
1310 max_tree_size)) != NULL)
6983e6b5 1311 {
3fc356dc
RB
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
995b6fe0
RB
1314 if (!SLP_TREE_CHILDREN (child).is_empty ()
1315 /* ??? Rejecting patterns this way doesn't work. We'd have to
1316 do extra work to cancel the pattern so the uses see the
1317 scalar version. */
b9787581 1318 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
3fc356dc 1319 {
3fc356dc
RB
1320 slp_tree grandchild;
1321
1322 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1323 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1324 break;
1325 if (!grandchild)
1326 {
1327 /* Roll back. */
e403d17e
RB
1328 this_loads.truncate (old_nloads);
1329 this_tree_size = old_tree_size;
3fc356dc 1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1331 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1332 SLP_TREE_CHILDREN (child).truncate (0);
1333
1334 dump_printf_loc (MSG_NOTE, vect_location,
1335 "Building parent vector operands from "
1336 "scalars instead\n");
1337 oprnd_info->def_stmts = vNULL;
603cca93 1338 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1339 children.safe_push (child);
3fc356dc
RB
1340 continue;
1341 }
1342 }
1343
6983e6b5 1344 oprnd_info->def_stmts = vNULL;
e403d17e 1345 children.safe_push (child);
6983e6b5
RB
1346 continue;
1347 }
1348
90dd6e3d
RB
1349 /* If the SLP build failed fatally and we analyze a basic-block
1350 simply treat nodes we fail to build as externally defined
1351 (and thus build vectors from the scalar defs).
1352 The cost model will reject outright expensive cases.
1353 ??? This doesn't treat cases where permutation ultimatively
1354 fails (or we don't try permutation below). Ideally we'd
1355 even compute a permutation that will end up with the maximum
1356 SLP tree size... */
310213d4 1357 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1358 && !matches[0]
1359 /* ??? Rejecting patterns this way doesn't work. We'd have to
1360 do extra work to cancel the pattern so the uses see the
1361 scalar version. */
b9787581 1362 && !is_pattern_stmt_p (stmt_info))
90dd6e3d
RB
1363 {
1364 dump_printf_loc (MSG_NOTE, vect_location,
1365 "Building vector operands from scalars\n");
e403d17e 1366 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1367 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1368 children.safe_push (child);
1369 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1370 continue;
1371 }
1372
6983e6b5
RB
1373 /* If the SLP build for operand zero failed and operand zero
1374 and one can be commutated try that for the scalar stmts
1375 that failed the match. */
1376 if (i == 0
1377 /* A first scalar stmt mismatch signals a fatal mismatch. */
1378 && matches[0]
1379 /* ??? For COND_EXPRs we can swap the comparison operands
1380 as well as the arms under some constraints. */
1381 && nops == 2
1382 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1383 && is_gimple_assign (stmt_info->stmt)
6983e6b5
RB
1384 /* Do so only if the number of not successful permutes was nor more
1385 than a cut-ff as re-trying the recursive match on
1386 possibly each level of the tree would expose exponential
1387 behavior. */
1388 && *npermutes < 4)
1389 {
85c5e2f5
RB
1390 /* See whether we can swap the matching or the non-matching
1391 stmt operands. */
1392 bool swap_not_matching = true;
1393 do
1394 {
1395 for (j = 0; j < group_size; ++j)
1396 {
1397 if (matches[j] != !swap_not_matching)
1398 continue;
b9787581 1399 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1400 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1401 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1402 if (!stmt
85c5e2f5
RB
1403 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1404 {
1405 if (!swap_not_matching)
1406 goto fail;
1407 swap_not_matching = false;
1408 break;
1409 }
1410 /* Verify if we can safely swap or if we committed to a
1411 specific operand order already.
1412 ??? Instead of modifying GIMPLE stmts here we could
1413 record whether we want to swap operands in the SLP
1414 node and temporarily do that when processing it
1415 (or wrap operand accessors in a helper). */
1416 else if (swap[j] != 0
b9787581 1417 || STMT_VINFO_NUM_SLP_USES (stmt_info))
85c5e2f5
RB
1418 {
1419 if (!swap_not_matching)
1420 {
1421 if (dump_enabled_p ())
1422 {
1423 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1424 vect_location,
1425 "Build SLP failed: cannot swap "
1426 "operands of shared stmt ");
1427 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
b9787581 1428 TDF_SLIM, stmts[j]->stmt, 0);
85c5e2f5
RB
1429 }
1430 goto fail;
1431 }
1432 swap_not_matching = false;
1433 break;
1434 }
1435 }
1436 }
1437 while (j != group_size);
78810bd3 1438
6983e6b5 1439 /* Swap mismatched definition stmts. */
b0b4483e
RB
1440 dump_printf_loc (MSG_NOTE, vect_location,
1441 "Re-trying with swapped operands of stmts ");
e72baed7 1442 for (j = 0; j < group_size; ++j)
85c5e2f5 1443 if (matches[j] == !swap_not_matching)
6983e6b5 1444 {
6b4db501
MM
1445 std::swap (oprnds_info[0]->def_stmts[j],
1446 oprnds_info[1]->def_stmts[j]);
b0b4483e 1447 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1448 }
b0b4483e 1449 dump_printf (MSG_NOTE, "\n");
74574669
RB
1450 /* And try again with scratch 'matches' ... */
1451 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1452 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1453 group_size, &this_max_nunits,
1454 &this_loads, tem, npermutes,
1455 &this_tree_size,
1456 max_tree_size)) != NULL)
6983e6b5 1457 {
60f2b864
RB
1458 /* ... so if successful we can apply the operand swapping
1459 to the GIMPLE IL. This is necessary because for example
1460 vect_get_slp_defs uses operand indexes and thus expects
1461 canonical operand order. This is also necessary even
1462 if we end up building the operand from scalars as
1463 we'll continue to process swapped operand two. */
1464 for (j = 0; j < group_size; ++j)
b9787581 1465 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
f47cda24 1466 for (j = 0; j < group_size; ++j)
b9787581
RS
1467 if (matches[j] == !swap_not_matching)
1468 {
1469 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1470 /* Avoid swapping operands twice. */
1471 if (gimple_plf (stmt, GF_PLF_1))
1472 continue;
1473 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1474 gimple_assign_rhs2_ptr (stmt));
1475 gimple_set_plf (stmt, GF_PLF_1, true);
1476 }
f47cda24
RB
1477 /* Verify we swap all duplicates or none. */
1478 if (flag_checking)
1479 for (j = 0; j < group_size; ++j)
b9787581
RS
1480 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1481 == (matches[j] == !swap_not_matching));
60f2b864 1482
85c69b0b
RB
1483 /* If we have all children of child built up from scalars then
1484 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1485 if (!SLP_TREE_CHILDREN (child).is_empty ()
1486 /* ??? Rejecting patterns this way doesn't work. We'd have
1487 to do extra work to cancel the pattern so the uses see the
1488 scalar version. */
b9787581 1489 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
85c69b0b
RB
1490 {
1491 unsigned int j;
1492 slp_tree grandchild;
1493
1494 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1495 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1496 break;
1497 if (!grandchild)
1498 {
1499 /* Roll back. */
e403d17e
RB
1500 this_loads.truncate (old_nloads);
1501 this_tree_size = old_tree_size;
85c69b0b 1502 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1503 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1504 SLP_TREE_CHILDREN (child).truncate (0);
1505
1506 dump_printf_loc (MSG_NOTE, vect_location,
1507 "Building parent vector operands from "
1508 "scalars instead\n");
1509 oprnd_info->def_stmts = vNULL;
603cca93 1510 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1511 children.safe_push (child);
85c69b0b
RB
1512 continue;
1513 }
1514 }
1515
6983e6b5 1516 oprnd_info->def_stmts = vNULL;
e403d17e 1517 children.safe_push (child);
6983e6b5
RB
1518 continue;
1519 }
1520
1521 ++*npermutes;
1522 }
1523
78810bd3 1524fail:
e403d17e
RB
1525 gcc_assert (child == NULL);
1526 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1527 vect_free_slp_tree (child, false);
6983e6b5 1528 vect_free_oprnd_info (oprnds_info);
e403d17e 1529 return NULL;
ebfd146a
IR
1530 }
1531
e403d17e
RB
1532 vect_free_oprnd_info (oprnds_info);
1533
1428105c
RB
1534 if (tree_size)
1535 *tree_size += this_tree_size;
e403d17e
RB
1536 *max_nunits = this_max_nunits;
1537 loads->safe_splice (this_loads);
1428105c 1538
e403d17e
RB
1539 node = vect_create_new_slp_node (stmts);
1540 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1541 SLP_TREE_CHILDREN (node).splice (children);
1542 return node;
ebfd146a
IR
1543}
1544
78c60e3d 1545/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1546
1547static void
4f5b9c80
DM
1548vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1549 slp_tree node)
ebfd146a
IR
1550{
1551 int i;
b9787581 1552 stmt_vec_info stmt_info;
d755c7ef 1553 slp_tree child;
ebfd146a 1554
603cca93
RB
1555 dump_printf_loc (dump_kind, loc, "node%s\n",
1556 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1557 ? " (external)" : "");
b9787581 1558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1559 {
c2a12ca0 1560 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
b9787581 1561 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt_info->stmt, 0);
ebfd146a 1562 }
9771b263 1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1564 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1565}
1566
1567
b8698a0f
L
1568/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1569 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1570 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1571 stmts in NODE are to be marked. */
1572
1573static void
1574vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1575{
1576 int i;
b9787581 1577 stmt_vec_info stmt_info;
d755c7ef 1578 slp_tree child;
ebfd146a 1579
603cca93 1580 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1581 return;
1582
b9787581 1583 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1584 if (j < 0 || i == j)
b9787581 1585 STMT_SLP_TYPE (stmt_info) = mark;
ebfd146a 1586
9771b263 1587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1588 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1589}
1590
1591
a70d6342
IR
1592/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1593
1594static void
1595vect_mark_slp_stmts_relevant (slp_tree node)
1596{
1597 int i;
a70d6342 1598 stmt_vec_info stmt_info;
d755c7ef 1599 slp_tree child;
a70d6342 1600
603cca93 1601 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1602 return;
1603
b9787581 1604 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1605 {
b8698a0f 1606 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1607 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1608 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1609 }
1610
9771b263 1611 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1612 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1613}
1614
1615
b5aeb3bb
IR
1616/* Rearrange the statements of NODE according to PERMUTATION. */
1617
1618static void
1619vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1620 vec<unsigned> permutation)
b5aeb3bb 1621{
b9787581
RS
1622 stmt_vec_info stmt_info;
1623 vec<stmt_vec_info> tmp_stmts;
d755c7ef
RB
1624 unsigned int i;
1625 slp_tree child;
b5aeb3bb 1626
9771b263 1627 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1628 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1629
9771b263
DN
1630 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1631 tmp_stmts.create (group_size);
d755c7ef 1632 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1633
b9787581
RS
1634 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1635 tmp_stmts[permutation[i]] = stmt_info;
b5aeb3bb 1636
9771b263 1637 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1638 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1639}
1640
1641
b266b968
RB
1642/* Attempt to reorder stmts in a reduction chain so that we don't
1643 require any load permutation. Return true if that was possible,
1644 otherwise return false. */
1645
1646static bool
1647vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1648{
1649 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1650 unsigned int i, j;
b266b968
RB
1651 unsigned int lidx;
1652 slp_tree node, load;
1653
1654 /* Compare all the permutation sequences to the first one. We know
1655 that at least one load is permuted. */
1656 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1657 if (!node->load_permutation.exists ())
1658 return false;
1659 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1660 {
1661 if (!load->load_permutation.exists ())
1662 return false;
1663 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1664 if (lidx != node->load_permutation[j])
1665 return false;
1666 }
1667
1668 /* Check that the loads in the first sequence are different and there
1669 are no gaps between them. */
7ba9e72d 1670 auto_sbitmap load_index (group_size);
b266b968
RB
1671 bitmap_clear (load_index);
1672 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1673 {
41eefe13 1674 if (lidx >= group_size)
7ba9e72d 1675 return false;
b266b968 1676 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1677 return false;
1678
b266b968
RB
1679 bitmap_set_bit (load_index, lidx);
1680 }
1681 for (i = 0; i < group_size; i++)
1682 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1683 return false;
b266b968
RB
1684
1685 /* This permutation is valid for reduction. Since the order of the
1686 statements in the nodes is not important unless they are memory
1687 accesses, we can rearrange the statements in all the nodes
1688 according to the order of the loads. */
1689 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1690 node->load_permutation);
1691
1692 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1693 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1694 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1695 {
b9787581 1696 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1697 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1698 /* But we have to keep those permutations that are required because
1699 of handling of gaps. */
d9f21f6a 1700 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1701 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1702 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1703 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1704 else
1705 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1706 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1707 }
1708
b266b968
RB
1709 return true;
1710}
1711
01d8bf07
RB
1712/* Check if the required load permutations in the SLP instance
1713 SLP_INSTN are supported. */
ebfd146a
IR
1714
1715static bool
01d8bf07 1716vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1717{
01d8bf07
RB
1718 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1719 unsigned int i, j, k, next;
6983e6b5 1720 slp_tree node;
ebfd146a 1721
73fbfcad 1722 if (dump_enabled_p ())
ebfd146a 1723 {
78c60e3d 1724 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1725 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1726 if (node->load_permutation.exists ())
1727 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1728 dump_printf (MSG_NOTE, "%d ", next);
1729 else
bddc974e
TJ
1730 for (k = 0; k < group_size; ++k)
1731 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1732 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1733 }
1734
b5aeb3bb
IR
1735 /* In case of reduction every load permutation is allowed, since the order
1736 of the reduction statements is not important (as opposed to the case of
0d0293ac 1737 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1738 load nodes are of the same size and have the same permutation (and then
1739 rearrange all the nodes of the SLP instance according to this
1740 permutation). */
1741
1742 /* Check that all the load nodes are of the same size. */
01d8bf07 1743 /* ??? Can't we assert this? */
9771b263 1744 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1745 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1746 return false;
2200fc49 1747
b5aeb3bb 1748 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1749 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1750
b010117a 1751 /* Reduction (there are no data-refs in the root).
b266b968 1752 In reduction chain the order of the loads is not important. */
b9787581
RS
1753 if (!STMT_VINFO_DATA_REF (stmt_info)
1754 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1755 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1756
6aa904c4
IR
1757 /* In basic block vectorization we allow any subchain of an interleaving
1758 chain.
1759 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1760 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1761 {
240a94da
RB
1762 /* Check whether the loads in an instance form a subchain and thus
1763 no permutation is necessary. */
9771b263 1764 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1765 {
9626d143
RB
1766 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1767 continue;
240a94da 1768 bool subchain_p = true;
bffb8014 1769 stmt_vec_info next_load_info = NULL;
b9787581
RS
1770 stmt_vec_info load_info;
1771 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1772 {
1773 if (j != 0
bffb8014 1774 && (next_load_info != load_info
b9787581 1775 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1776 {
1777 subchain_p = false;
1778 break;
1779 }
bffb8014 1780 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1781 }
240a94da
RB
1782 if (subchain_p)
1783 SLP_TREE_LOAD_PERMUTATION (node).release ();
1784 else
1785 {
b9787581 1786 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1787 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1788 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1789 unsigned k, maxk = 0;
1790 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1791 if (k > maxk)
1792 maxk = k;
1793 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1794 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1795 tree vectype = STMT_VINFO_VECTYPE (group_info);
1796 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1797 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1798 {
1799 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1800 "BB vectorization with gaps at the end of "
1801 "a load is not supported\n");
1802 return false;
1803 }
1804
240a94da
RB
1805 /* Verify the permutation can be generated. */
1806 vec<tree> tem;
29afecdf 1807 unsigned n_perms;
240a94da 1808 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1809 1, slp_instn, true, &n_perms))
240a94da
RB
1810 {
1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1812 vect_location,
1813 "unsupported load permutation\n");
1814 return false;
1815 }
1816 }
6aa904c4 1817 }
01d8bf07 1818 return true;
6aa904c4
IR
1819 }
1820
31bee964
RB
1821 /* For loop vectorization verify we can generate the permutation. Be
1822 conservative about the vectorization factor, there are permutations
1823 that will use three vector inputs only starting from a specific factor
1824 and the vectorization factor is not yet final.
1825 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1826 unsigned n_perms;
d9f21f6a
RS
1827 poly_uint64 test_vf
1828 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1829 LOOP_VINFO_VECT_FACTOR
b9787581 1830 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1831 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1832 if (node->load_permutation.exists ()
31bee964
RB
1833 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1834 slp_instn, true, &n_perms))
01d8bf07 1835 return false;
9b999e8c 1836
01d8bf07 1837 return true;
ebfd146a
IR
1838}
1839
1840
e4a707c4 1841/* Find the last store in SLP INSTANCE. */
ff802fa1 1842
95c68311 1843stmt_vec_info
2e8ab70c 1844vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1845{
95c68311 1846 stmt_vec_info last = NULL;
b9787581 1847 stmt_vec_info stmt_vinfo;
e4a707c4 1848
b9787581 1849 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1850 {
2e8ab70c 1851 if (is_pattern_stmt_p (stmt_vinfo))
95c68311
RS
1852 stmt_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
1853 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1854 }
e4a707c4 1855
2e8ab70c 1856 return last;
e4a707c4
IR
1857}
1858
82570274
RS
1859/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1860 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1861 (also containing the first GROUP1_SIZE stmts, since stores are
1862 consecutive), the second containing the remainder.
1ba91a49
AL
1863 Return the first stmt in the second group. */
1864
82570274
RS
1865static stmt_vec_info
1866vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1867{
bffb8014 1868 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1869 gcc_assert (group1_size > 0);
2c53b149 1870 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1871 gcc_assert (group2_size > 0);
2c53b149 1872 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1873
bffb8014 1874 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1875 for (unsigned i = group1_size; i > 1; i--)
1876 {
bffb8014
RS
1877 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1878 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1879 }
1880 /* STMT is now the last element of the first group. */
bffb8014
RS
1881 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1882 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1883
bffb8014
RS
1884 DR_GROUP_SIZE (group2) = group2_size;
1885 for (stmt_info = group2; stmt_info;
1886 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1887 {
bffb8014
RS
1888 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1889 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1890 }
1891
2c53b149 1892 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1893 plus skipping over the first vector. */
bffb8014 1894 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1895
2c53b149
RB
1896 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1897 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1898
1899 if (dump_enabled_p ())
1900 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1901 group1_size, group2_size);
1902
1903 return group2;
1904}
1905
4b6068ea
RS
1906/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1907 statements and a vector of NUNITS elements. */
1908
1909static poly_uint64
1910calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1911{
1912 return exact_div (common_multiple (nunits, group_size), group_size);
1913}
1914
0d0293ac 1915/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1916 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1917 Return FALSE if it's impossible to SLP any stmt in the loop. */
1918
1919static bool
310213d4 1920vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1921 gimple *stmt, unsigned max_tree_size)
ebfd146a 1922{
b9787581 1923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 1924 slp_instance new_instance;
d092494c 1925 slp_tree node;
2c53b149 1926 unsigned int group_size;
b5aeb3bb 1927 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1928 unsigned int i;
9771b263 1929 vec<slp_tree> loads;
b9787581
RS
1930 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1931 vec<stmt_vec_info> scalar_stmts;
b5aeb3bb 1932
b9787581 1933 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1934 {
2c53b149
RB
1935 scalar_type = TREE_TYPE (DR_REF (dr));
1936 vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1937 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 1938 }
b9787581 1939 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1940 {
1941 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
1942 vectype = STMT_VINFO_VECTYPE (stmt_info);
1943 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb
IR
1944 }
1945 else
1946 {
310213d4 1947 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 1948 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 1949 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1950 }
b8698a0f 1951
ebfd146a
IR
1952 if (!vectype)
1953 {
73fbfcad 1954 if (dump_enabled_p ())
ebfd146a 1955 {
78c60e3d
SS
1956 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1957 "Build SLP failed: unsupported data-type ");
1958 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1959 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1960 }
b5aeb3bb 1961
ebfd146a
IR
1962 return false;
1963 }
4b6068ea 1964 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1965
0d0293ac 1966 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1967 scalar_stmts.create (group_size);
bffb8014 1968 stmt_vec_info next_info = stmt_info;
b9787581 1969 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 1970 {
b5aeb3bb 1971 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 1972 while (next_info)
b5aeb3bb 1973 {
b9787581
RS
1974 if (STMT_VINFO_IN_PATTERN_P (next_info)
1975 && STMT_VINFO_RELATED_STMT (next_info))
1976 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
f7e531cf 1977 else
b9787581 1978 scalar_stmts.safe_push (next_info);
bffb8014 1979 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
1980 }
1981 }
b9787581 1982 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1983 {
1984 /* Collect the reduction stmts and store them in
1985 SLP_TREE_SCALAR_STMTS. */
bffb8014 1986 while (next_info)
2c53b149 1987 {
b9787581
RS
1988 if (STMT_VINFO_IN_PATTERN_P (next_info)
1989 && STMT_VINFO_RELATED_STMT (next_info))
1990 scalar_stmts.safe_push (STMT_VINFO_RELATED_STMT (next_info));
2c53b149 1991 else
b9787581 1992 scalar_stmts.safe_push (next_info);
bffb8014 1993 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 1994 }
14a61437
RB
1995 /* Mark the first element of the reduction chain as reduction to properly
1996 transform the node. In the reduction analysis phase only the last
1997 element of the chain is marked as reduction. */
b9787581 1998 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
b5aeb3bb
IR
1999 }
2000 else
2001 {
2002 /* Collect reduction statements. */
32c91dfc
RS
2003 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2004 for (i = 0; reductions.iterate (i, &next_info); i++)
2005 scalar_stmts.safe_push (next_info);
ebfd146a
IR
2006 }
2007
9771b263 2008 loads.create (group_size);
ebfd146a
IR
2009
2010 /* Build the tree for the SLP instance. */
89d390e5
RB
2011 bool *matches = XALLOCAVEC (bool, group_size);
2012 unsigned npermutes = 0;
f7300fff 2013 bst_fail = new scalar_stmts_set_t ();
4b6068ea 2014 poly_uint64 max_nunits = nunits;
e569db5f 2015 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 2016 &max_nunits, &loads, matches, &npermutes,
e569db5f 2017 NULL, max_tree_size);
26d66f28 2018 delete bst_fail;
e569db5f 2019 if (node != NULL)
ebfd146a 2020 {
4ef69dfc 2021 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2022 poly_uint64 unrolling_factor
4b6068ea 2023 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2024
d9f21f6a 2025 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2026 && is_a <bb_vec_info> (vinfo))
2027 {
4b6068ea
RS
2028 unsigned HOST_WIDE_INT const_max_nunits;
2029 if (!max_nunits.is_constant (&const_max_nunits)
2030 || const_max_nunits > group_size)
2031 {
2032 if (dump_enabled_p ())
2033 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2034 "Build SLP failed: store group "
2035 "size not a multiple of the vector size "
2036 "in basic block SLP\n");
6e2dd807 2037 vect_free_slp_tree (node, false);
4b6068ea
RS
2038 loads.release ();
2039 return false;
2040 }
e569db5f 2041 /* Fatal mismatch. */
4b6068ea 2042 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2043 vect_free_slp_tree (node, false);
e569db5f
VK
2044 loads.release ();
2045 }
2046 else
2047 {
4ef69dfc
IR
2048 /* Create a new SLP instance. */
2049 new_instance = XNEW (struct _slp_instance);
2050 SLP_INSTANCE_TREE (new_instance) = node;
2051 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2052 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2053 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2054
2055 /* Compute the load permutation. */
2056 slp_tree load_node;
2057 bool loads_permuted = false;
abf9bfbc
RB
2058 FOR_EACH_VEC_ELT (loads, i, load_node)
2059 {
01d8bf07 2060 vec<unsigned> load_permutation;
abf9bfbc 2061 int j;
b9787581 2062 stmt_vec_info load_info;
01d8bf07
RB
2063 bool this_load_permuted = false;
2064 load_permutation.create (group_size);
bffb8014 2065 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
b9787581
RS
2066 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2067 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
abf9bfbc 2068 {
b9787581 2069 int load_place = vect_get_place_in_interleaving_chain
bffb8014 2070 (load_info, first_stmt_info);
6983e6b5
RB
2071 gcc_assert (load_place != -1);
2072 if (load_place != j)
01d8bf07 2073 this_load_permuted = true;
abf9bfbc
RB
2074 load_permutation.safe_push (load_place);
2075 }
fe2bef71
RB
2076 if (!this_load_permuted
2077 /* The load requires permutation when unrolling exposes
2078 a gap either because the group is larger than the SLP
2079 group-size or because there is a gap between the groups. */
d9f21f6a 2080 && (known_eq (unrolling_factor, 1U)
bffb8014
RS
2081 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2082 && DR_GROUP_GAP (first_stmt_info) == 0)))
01d8bf07
RB
2083 {
2084 load_permutation.release ();
2085 continue;
2086 }
2087 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2088 loads_permuted = true;
abf9bfbc 2089 }
6aa904c4
IR
2090
2091 if (loads_permuted)
ebfd146a 2092 {
01d8bf07 2093 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2094 {
73fbfcad 2095 if (dump_enabled_p ())
ebfd146a 2096 {
e645e942 2097 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2098 "Build SLP failed: unsupported load "
2099 "permutation ");
86a91c0a
RS
2100 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2101 TDF_SLIM, stmt_info->stmt, 0);
ebfd146a 2102 }
6e2dd807 2103 vect_free_slp_instance (new_instance, false);
ebfd146a
IR
2104 return false;
2105 }
ebfd146a 2106 }
ebfd146a 2107
e569db5f 2108 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2109 instructions do not generate this SLP instance. */
2110 if (is_a <loop_vec_info> (vinfo)
2111 && loads_permuted
7e11fc7f 2112 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
2113 {
2114 slp_tree load_node;
2115 FOR_EACH_VEC_ELT (loads, i, load_node)
2116 {
bffb8014 2117 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
b9787581 2118 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
bffb8014 2119 /* Use SLP for strided accesses (or if we can't load-lanes). */
bb0f5ca7
AL
2120 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2121 || ! vect_load_lanes_supported
2122 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 2123 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
2124 break;
2125 }
2126 if (i == loads.length ())
2127 {
2128 if (dump_enabled_p ())
2129 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2130 "Built SLP cancelled: can use "
2131 "load/store-lanes\n");
6e2dd807 2132 vect_free_slp_instance (new_instance, false);
bb0f5ca7
AL
2133 return false;
2134 }
2135 }
2136
310213d4 2137 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2138
73fbfcad 2139 if (dump_enabled_p ())
c2a12ca0
RB
2140 {
2141 dump_printf_loc (MSG_NOTE, vect_location,
2142 "Final SLP tree for instance:\n");
2143 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2144 }
ebfd146a
IR
2145
2146 return true;
2147 }
e569db5f
VK
2148 }
2149 else
2150 {
ebfd146a
IR
2151 /* Failed to SLP. */
2152 /* Free the allocated memory. */
e403d17e 2153 scalar_stmts.release ();
9771b263 2154 loads.release ();
e569db5f 2155 }
b8698a0f 2156
1ba91a49 2157 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2158 vector size. */
4b6068ea 2159 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2160 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2161 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2162 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2163 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2164 {
2165 /* We consider breaking the group only on VF boundaries from the existing
2166 start. */
2167 for (i = 0; i < group_size; i++)
2168 if (!matches[i]) break;
2169
4b6068ea 2170 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2171 {
2172 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2173 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2174 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2175
82570274
RS
2176 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2177 group1_size);
86a91c0a
RS
2178 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2179 max_tree_size);
1ba91a49
AL
2180 /* If the first non-match was in the middle of a vector,
2181 skip the rest of that vector. */
2182 if (group1_size < i)
2183 {
4b6068ea 2184 i = group1_size + const_nunits;
1ba91a49 2185 if (i < group_size)
4b6068ea 2186 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2187 }
2188 if (i < group_size)
2189 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2190 return res;
2191 }
2192 /* Even though the first vector did not all match, we might be able to SLP
2193 (some) of the remainder. FORNOW ignore this possibility. */
2194 }
2195
a70d6342 2196 return false;
ebfd146a
IR
2197}
2198
2199
ff802fa1 2200/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2201 trees of packed scalar stmts if SLP is possible. */
2202
2203bool
310213d4 2204vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2205{
2206 unsigned int i;
f698fccf 2207 stmt_vec_info first_element;
ebfd146a 2208
adac3a68 2209 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2210
0d0293ac 2211 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2212 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2213 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2214
310213d4 2215 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2216 {
310213d4
RB
2217 if (loop_vinfo->reduction_chains.length () > 0)
2218 {
2219 /* Find SLP sequences starting from reduction chains. */
2220 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2221 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2222 max_tree_size))
6b5e165b
RB
2223 {
2224 /* Dissolve reduction chain group. */
f698fccf
RS
2225 stmt_vec_info vinfo = first_element;
2226 while (vinfo)
6b5e165b 2227 {
bffb8014 2228 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2229 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2230 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
f698fccf 2231 vinfo = next;
6b5e165b 2232 }
f698fccf 2233 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
6b5e165b 2234 }
310213d4 2235 }
b010117a 2236
310213d4 2237 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2238 if (loop_vinfo->reductions.length () > 1)
2239 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2240 max_tree_size);
310213d4 2241 }
b5aeb3bb 2242
ebfd146a
IR
2243 return true;
2244}
2245
2246
2247/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2248 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2249 least one instance. */
ebfd146a 2250
437f4a00 2251bool
ebfd146a
IR
2252vect_make_slp_decision (loop_vec_info loop_vinfo)
2253{
d9f21f6a
RS
2254 unsigned int i;
2255 poly_uint64 unrolling_factor = 1;
9771b263 2256 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2257 slp_instance instance;
2258 int decided_to_slp = 0;
2259
adac3a68 2260 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2261
9771b263 2262 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2263 {
2264 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2265 /* All unroll factors have the form current_vector_size * X for some
2266 rational X, so they must have a common multiple. */
2267 unrolling_factor
2268 = force_common_multiple (unrolling_factor,
2269 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2270
ff802fa1 2271 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2272 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2273 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2274 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2275 decided_to_slp++;
2276 }
2277
2278 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2279
73fbfcad 2280 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2281 {
2282 dump_printf_loc (MSG_NOTE, vect_location,
2283 "Decided to SLP %d instances. Unrolling factor ",
2284 decided_to_slp);
2285 dump_dec (MSG_NOTE, unrolling_factor);
2286 dump_printf (MSG_NOTE, "\n");
2287 }
437f4a00
IR
2288
2289 return (decided_to_slp > 0);
ebfd146a
IR
2290}
2291
2292
2293/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2294 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2295
2296static void
642fce57 2297vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2298{
b9787581 2299 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2300 imm_use_iterator imm_iter;
355fe088 2301 gimple *use_stmt;
b9787581 2302 stmt_vec_info use_vinfo;
d755c7ef 2303 slp_tree child;
f2c74cc4 2304 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2305 int j;
2306
2307 /* Propagate hybrid down the SLP tree. */
2308 if (stype == hybrid)
2309 ;
2310 else if (HYBRID_SLP_STMT (stmt_vinfo))
2311 stype = hybrid;
2312 else
2313 {
2314 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2315 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2316 /* If we get a pattern stmt here we have to use the LHS of the
2317 original stmt for immediate uses. */
b9787581 2318 gimple *stmt = stmt_vinfo->stmt;
2935d994
RB
2319 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2320 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
10681ce8 2321 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo)->stmt;
e7baeb39
RB
2322 tree def;
2323 if (gimple_code (stmt) == GIMPLE_PHI)
2324 def = gimple_phi_result (stmt);
2325 else
2326 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2327 if (def)
2328 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2329 {
6585ff8f
RS
2330 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2331 if (!use_vinfo)
29764870 2332 continue;
29764870
RB
2333 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2334 && STMT_VINFO_RELATED_STMT (use_vinfo))
10681ce8 2335 use_vinfo = STMT_VINFO_RELATED_STMT (use_vinfo);
29764870
RB
2336 if (!STMT_SLP_TYPE (use_vinfo)
2337 && (STMT_VINFO_RELEVANT (use_vinfo)
2338 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2339 && !(gimple_code (use_stmt) == GIMPLE_PHI
2340 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2341 {
2342 if (dump_enabled_p ())
2343 {
2344 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2345 "def in non-SLP stmt: ");
2346 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2347 }
2348 stype = hybrid;
2349 }
29764870 2350 }
642fce57 2351 }
ebfd146a 2352
502f0263
RB
2353 if (stype == hybrid
2354 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2355 {
2356 if (dump_enabled_p ())
2357 {
2358 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
b9787581 2359 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_vinfo->stmt, 0);
b1af7da6
RB
2360 }
2361 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2362 }
ebfd146a 2363
642fce57 2364 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2365 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2366 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2367}
f2c74cc4 2368
642fce57 2369/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2370
642fce57
RB
2371static tree
2372vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2373{
2374 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2375 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2376
2377 if (wi->is_lhs)
2378 return NULL_TREE;
2379
c98d0595
RS
2380 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2381 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2382 {
6585ff8f 2383 if (dump_enabled_p ())
b1af7da6 2384 {
6585ff8f
RS
2385 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2386 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
b1af7da6 2387 }
6585ff8f 2388 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2389 }
2390
2391 return NULL_TREE;
ebfd146a
IR
2392}
2393
642fce57
RB
2394static tree
2395vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2396 walk_stmt_info *wi)
642fce57 2397{
6585ff8f
RS
2398 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2399 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2400 /* If the stmt is in a SLP instance then this isn't a reason
2401 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2402 if (! STMT_SLP_TYPE (use_vinfo)
2403 && (STMT_VINFO_RELEVANT (use_vinfo)
2404 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2405 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2406 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2407 ;
2408 else
642fce57
RB
2409 *handled = true;
2410 return NULL_TREE;
2411}
ebfd146a
IR
2412
2413/* Find stmts that must be both vectorized and SLPed. */
2414
2415void
2416vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2417{
2418 unsigned int i;
9771b263 2419 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2420 slp_instance instance;
2421
adac3a68 2422 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2423
642fce57
RB
2424 /* First walk all pattern stmt in the loop and mark defs of uses as
2425 hybrid because immediate uses in them are not recorded. */
2426 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2427 {
2428 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2429 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2430 gsi_next (&gsi))
2431 {
355fe088 2432 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2433 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2434 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2435 {
2436 walk_stmt_info wi;
2437 memset (&wi, 0, sizeof (wi));
6585ff8f 2438 wi.info = loop_vinfo;
642fce57 2439 gimple_stmt_iterator gsi2
10681ce8 2440 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2441 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2442 vect_detect_hybrid_slp_1, &wi);
2443 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2444 vect_detect_hybrid_slp_2,
2445 vect_detect_hybrid_slp_1, &wi);
2446 }
2447 }
2448 }
2449
2450 /* Then walk the SLP instance trees marking stmts with uses in
2451 non-SLP stmts as hybrid, also propagating hybrid down the
2452 SLP tree, collecting the above info on-the-fly. */
9771b263 2453 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2454 {
2455 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2456 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2457 i, pure_slp);
2458 }
ebfd146a
IR
2459}
2460
a70d6342 2461
2c515559
RS
2462/* Initialize a bb_vec_info struct for the statements between
2463 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2464
2465_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2466 gimple_stmt_iterator region_end_in,
2467 vec_info_shared *shared)
2468 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2469 bb (gsi_bb (region_begin_in)),
2470 region_begin (region_begin_in),
2471 region_end (region_end_in)
a70d6342 2472{
a70d6342
IR
2473 gimple_stmt_iterator gsi;
2474
61d371eb
RB
2475 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2476 gsi_next (&gsi))
a70d6342 2477 {
355fe088 2478 gimple *stmt = gsi_stmt (gsi);
a70d6342 2479 gimple_set_uid (stmt, 0);
4fbeb363 2480 add_stmt (stmt);
a70d6342
IR
2481 }
2482
2c515559 2483 bb->aux = this;
a70d6342
IR
2484}
2485
2486
2487/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2488 stmts in the basic block. */
2489
2c515559 2490_bb_vec_info::~_bb_vec_info ()
a70d6342 2491{
2c515559
RS
2492 for (gimple_stmt_iterator si = region_begin;
2493 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2494 {
355fe088 2495 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2496 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2497
2498 if (stmt_info)
2499 /* Free stmt_vec_info. */
2500 free_stmt_vec_info (stmt);
61d371eb
RB
2501
2502 /* Reset region marker. */
2503 gimple_set_uid (stmt, -1);
a70d6342
IR
2504 }
2505
2c515559 2506 bb->aux = NULL;
a70d6342
IR
2507}
2508
15944069
RS
2509/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2510 given then that child nodes have already been processed, and that
2511 their def types currently match their SLP node's def type. */
a70d6342
IR
2512
2513static bool
15944069
RS
2514vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2515 slp_instance node_instance,
2516 stmt_vector_for_cost *cost_vec)
a70d6342 2517{
b9787581 2518 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2519 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2520
2521 /* For BB vectorization vector types are assigned here.
2522 Memory accesses already got their vector type assigned
2523 in vect_analyze_data_refs. */
2524 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2525 if (bb_vinfo
2526 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2527 {
1f3cb663
RS
2528 tree vectype, nunits_vectype;
2529 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2530 &nunits_vectype))
2531 /* We checked this when building the node. */
2532 gcc_unreachable ();
2533 if (vectype == boolean_type_node)
bd2f172f 2534 {
1f3cb663
RS
2535 vectype = vect_get_mask_type_for_stmt (stmt_info);
2536 if (!vectype)
2537 /* vect_get_mask_type_for_stmt has already explained the
2538 failure. */
2539 return false;
bd2f172f
RB
2540 }
2541
b9787581 2542 stmt_vec_info sstmt_info;
15944069 2543 unsigned int i;
b9787581
RS
2544 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2545 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2546 }
2547
8b7e9dba
RS
2548 /* Calculate the number of vector statements to be created for the
2549 scalar stmts in this node. For SLP reductions it is equal to the
2550 number of vector statements in the children (which has already been
2551 calculated by the recursive call). Otherwise it is the number of
2c53b149 2552 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2553 VF divided by the number of elements in a vector. */
2c53b149
RB
2554 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2555 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2556 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2557 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2558 else
2559 {
d9f21f6a 2560 poly_uint64 vf;
8b7e9dba
RS
2561 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2562 vf = loop_vinfo->vectorization_factor;
2563 else
2564 vf = 1;
2565 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2566 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2567 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2568 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2569 }
2570
15944069 2571 bool dummy;
86a91c0a 2572 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2573}
2574
2575/* Analyze statements contained in SLP tree NODE after recursively analyzing
2576 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2577
2578 Return true if the operations are supported. */
2579
2580static bool
2581vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2582 slp_instance node_instance,
2583 scalar_stmts_to_slp_tree_map_t *visited,
2584 scalar_stmts_to_slp_tree_map_t *lvisited,
2585 stmt_vector_for_cost *cost_vec)
2586{
2587 int i, j;
2588 slp_tree child;
2589
2590 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2591 return true;
2592
2593 /* If we already analyzed the exact same set of scalar stmts we're done.
2594 We share the generated vector stmts for those. */
2595 slp_tree *leader;
2596 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2597 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2598 {
2599 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2600 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2601 return true;
2602 }
2603
2604 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2605 doesn't result in any issue since we throw away the lvisited set
2606 when we fail. */
2607 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2608
2609 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2610 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2611 visited, lvisited, cost_vec))
2612 return false;
2613
bd2f172f
RB
2614 /* Push SLP node def-type to stmt operands. */
2615 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2616 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2617 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2618 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2619 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2620 cost_vec);
bd2f172f
RB
2621 /* Restore def-types. */
2622 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2623 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2624 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f
RB
2625 = vect_internal_def;
2626 if (! res)
2627 return false;
2628
2629 return true;
a70d6342
IR
2630}
2631
2632
8b7e9dba 2633/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2634 operations are supported. */
2635
a12e42fc 2636bool
8b7e9dba 2637vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2638{
a70d6342
IR
2639 slp_instance instance;
2640 int i;
2641
adac3a68 2642 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2643
68435eb2
RB
2644 scalar_stmts_to_slp_tree_map_t *visited
2645 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2646 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2647 {
68435eb2
RB
2648 scalar_stmts_to_slp_tree_map_t lvisited;
2649 stmt_vector_for_cost cost_vec;
2650 cost_vec.create (2);
8b7e9dba
RS
2651 if (!vect_slp_analyze_node_operations (vinfo,
2652 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2653 instance, visited, &lvisited,
2654 &cost_vec))
a70d6342 2655 {
b9787581
RS
2656 slp_tree node = SLP_INSTANCE_TREE (instance);
2657 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a12e42fc
RB
2658 dump_printf_loc (MSG_NOTE, vect_location,
2659 "removing SLP instance operations starting from: ");
b9787581 2660 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
6e2dd807 2661 vect_free_slp_instance (instance, false);
8b7e9dba 2662 vinfo->slp_instances.ordered_remove (i);
68435eb2 2663 cost_vec.release ();
a70d6342
IR
2664 }
2665 else
68435eb2
RB
2666 {
2667 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2668 x != lvisited.end(); ++x)
2669 visited->put ((*x).first.copy (), (*x).second);
2670 i++;
78604de0 2671
68435eb2
RB
2672 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2673 cost_vec.release ();
2674 }
2675 }
78604de0
RB
2676 delete visited;
2677
8b7e9dba 2678 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2679}
2680
6eddf228
RB
2681
2682/* Compute the scalar cost of the SLP node NODE and its children
2683 and return it. Do not account defs that are marked in LIFE and
2684 update LIFE according to uses of NODE. */
2685
a296d6d3 2686static void
292cba13 2687vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2688 slp_tree node, vec<bool, va_heap> *life,
2689 stmt_vector_for_cost *cost_vec)
6eddf228 2690{
6eddf228 2691 unsigned i;
b9787581 2692 stmt_vec_info stmt_info;
6eddf228
RB
2693 slp_tree child;
2694
b9787581 2695 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2696 {
b9787581 2697 gimple *stmt = stmt_info->stmt;
91987857 2698 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2699 ssa_op_iter op_iter;
2700 def_operand_p def_p;
6eddf228 2701
ff4c81cc 2702 if ((*life)[i])
6eddf228
RB
2703 continue;
2704
2705 /* If there is a non-vectorized use of the defs then the scalar
2706 stmt is kept live in which case we do not account it or any
2707 required defs in the SLP children in the scalar cost. This
2708 way we make the vectorization more costly when compared to
2709 the scalar cost. */
2710 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2711 {
2712 imm_use_iterator use_iter;
355fe088 2713 gimple *use_stmt;
6eddf228 2714 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2715 if (!is_gimple_debug (use_stmt))
6eddf228 2716 {
91987857
RS
2717 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2718 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2719 {
2720 (*life)[i] = true;
2721 BREAK_FROM_IMM_USE_STMT (use_iter);
2722 }
6eddf228
RB
2723 }
2724 }
ff4c81cc 2725 if ((*life)[i])
6eddf228
RB
2726 continue;
2727
b555a2e4
RB
2728 /* Count scalar stmts only once. */
2729 if (gimple_visited_p (stmt))
2730 continue;
2731 gimple_set_visited (stmt, true);
2732
a296d6d3 2733 vect_cost_for_stmt kind;
6eddf228
RB
2734 if (STMT_VINFO_DATA_REF (stmt_info))
2735 {
2736 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2737 kind = scalar_load;
6eddf228 2738 else
a296d6d3 2739 kind = scalar_store;
6eddf228
RB
2740 }
2741 else
a296d6d3
RB
2742 kind = scalar_stmt;
2743 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2744 }
2745
faa5399b 2746 auto_vec<bool, 20> subtree_life;
6eddf228 2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2748 {
2749 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2750 {
2751 /* Do not directly pass LIFE to the recursive call, copy it to
2752 confine changes in the callee to the current child/subtree. */
2753 subtree_life.safe_splice (*life);
a296d6d3 2754 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2755 subtree_life.truncate (0);
2756 }
2757 }
6eddf228
RB
2758}
2759
69f11a13
IR
2760/* Check if vectorization of the basic block is profitable. */
2761
2762static bool
2763vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2764{
9771b263 2765 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2766 slp_instance instance;
1a4b99c1 2767 int i;
c3e7ee41 2768 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2769 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2770
2771 /* Calculate scalar cost. */
a296d6d3
RB
2772 stmt_vector_for_cost scalar_costs;
2773 scalar_costs.create (0);
6eddf228 2774 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2775 {
00f96dc9 2776 auto_vec<bool, 20> life;
ff4c81cc 2777 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2778 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2779 SLP_INSTANCE_TREE (instance),
2780 &life, &scalar_costs);
2781 }
2782 void *target_cost_data = init_cost (NULL);
68435eb2 2783 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2784 scalar_costs.release ();
2785 unsigned dummy;
2786 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2787 destroy_cost_data (target_cost_data);
69f11a13 2788
b555a2e4
RB
2789 /* Unset visited flag. */
2790 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2791 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2792 gimple_set_visited (gsi_stmt (gsi), false);
2793
c3e7ee41 2794 /* Complete the target-specific cost calculation. */
92345349
BS
2795 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2796 &vec_inside_cost, &vec_epilogue_cost);
2797
2798 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2799
73fbfcad 2800 if (dump_enabled_p ())
69f11a13 2801 {
78c60e3d
SS
2802 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2803 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2804 vec_inside_cost);
2805 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2806 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2807 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2808 }
2809
a6524bba
RB
2810 /* Vectorization is profitable if its cost is more than the cost of scalar
2811 version. Note that we err on the vector side for equal cost because
2812 the cost estimate is otherwise quite pessimistic (constant uses are
2813 free on the scalar side but cost a load on the vector side for
2814 example). */
2815 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2816 return false;
2817
2818 return true;
2819}
2820
a5b50aa1
RB
2821/* Check if the basic block can be vectorized. Returns a bb_vec_info
2822 if so and sets fatal to true if failure is independent of
2823 current_vector_size. */
a70d6342 2824
8e19f5a1 2825static bb_vec_info
61d371eb
RB
2826vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2827 gimple_stmt_iterator region_end,
a5b50aa1 2828 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2829 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2830{
2831 bb_vec_info bb_vinfo;
a70d6342 2832 slp_instance instance;
8e19f5a1 2833 int i;
d9f21f6a 2834 poly_uint64 min_vf = 2;
e4a707c4 2835
a5b50aa1
RB
2836 /* The first group of checks is independent of the vector size. */
2837 fatal = true;
2838
61d371eb
RB
2839 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2840 {
2841 if (dump_enabled_p ())
2842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2843 "not vectorized: too many instructions in "
2844 "basic block.\n");
2845 free_data_refs (datarefs);
2846 return NULL;
2847 }
2848
ca823c85 2849 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2850 if (!bb_vinfo)
2851 return NULL;
2852
61d371eb 2853 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2854 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2855
2856 /* Analyze the data references. */
2857
2858 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2859 {
73fbfcad 2860 if (dump_enabled_p ())
78c60e3d
SS
2861 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2862 "not vectorized: unhandled data-ref in basic "
2863 "block.\n");
b8698a0f 2864
2c515559 2865 delete bb_vinfo;
a70d6342
IR
2866 return NULL;
2867 }
2868
fcac74a1 2869 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2870 {
73fbfcad 2871 if (dump_enabled_p ())
78c60e3d
SS
2872 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2873 "not vectorized: not enough data-refs in "
2874 "basic block.\n");
a70d6342 2875
2c515559 2876 delete bb_vinfo;
a70d6342
IR
2877 return NULL;
2878 }
2879
310213d4 2880 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2881 {
2882 if (dump_enabled_p ())
2883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2884 "not vectorized: unhandled data access in "
2885 "basic block.\n");
2886
2c515559 2887 delete bb_vinfo;
5abe1e05
RB
2888 return NULL;
2889 }
2890
a5b50aa1
RB
2891 /* If there are no grouped stores in the region there is no need
2892 to continue with pattern recog as vect_analyze_slp will fail
2893 anyway. */
2894 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2895 {
73fbfcad 2896 if (dump_enabled_p ())
a5b50aa1
RB
2897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2898 "not vectorized: no grouped stores in "
2899 "basic block.\n");
b8698a0f 2900
2c515559 2901 delete bb_vinfo;
a70d6342
IR
2902 return NULL;
2903 }
b8698a0f 2904
a5b50aa1
RB
2905 /* While the rest of the analysis below depends on it in some way. */
2906 fatal = false;
2907
2908 vect_pattern_recog (bb_vinfo);
2909
a70d6342
IR
2910 /* Check the SLP opportunities in the basic block, analyze and build SLP
2911 trees. */
310213d4 2912 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2913 {
73fbfcad 2914 if (dump_enabled_p ())
effb52da
RB
2915 {
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2917 "Failed to SLP the basic block.\n");
2918 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2919 "not vectorized: failed to find SLP opportunities "
2920 "in basic block.\n");
2921 }
a70d6342 2922
2c515559 2923 delete bb_vinfo;
a70d6342
IR
2924 return NULL;
2925 }
b8698a0f 2926
62c8a2cf
RS
2927 vect_record_base_alignments (bb_vinfo);
2928
c2a12ca0
RB
2929 /* Analyze and verify the alignment of data references and the
2930 dependence in the SLP instances. */
a5b50aa1
RB
2931 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2932 {
c2a12ca0
RB
2933 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2934 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 2935 {
b9787581
RS
2936 slp_tree node = SLP_INSTANCE_TREE (instance);
2937 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a5b50aa1
RB
2938 dump_printf_loc (MSG_NOTE, vect_location,
2939 "removing SLP instance operations starting from: ");
b9787581 2940 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
6e2dd807 2941 vect_free_slp_instance (instance, false);
a5b50aa1
RB
2942 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2943 continue;
2944 }
c2a12ca0
RB
2945
2946 /* Mark all the statements that we want to vectorize as pure SLP and
2947 relevant. */
2948 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2949 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2950
a5b50aa1
RB
2951 i++;
2952 }
a5b50aa1
RB
2953 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2954 {
2c515559 2955 delete bb_vinfo;
a5b50aa1
RB
2956 return NULL;
2957 }
2958
8b7e9dba 2959 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2960 {
73fbfcad 2961 if (dump_enabled_p ())
e645e942 2962 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2963 "not vectorized: bad operation in basic block.\n");
a70d6342 2964
2c515559 2965 delete bb_vinfo;
a70d6342
IR
2966 return NULL;
2967 }
2968
69f11a13 2969 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2970 if (!unlimited_cost_model (NULL)
69f11a13
IR
2971 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2972 {
73fbfcad 2973 if (dump_enabled_p ())
78c60e3d
SS
2974 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2975 "not vectorized: vectorization is not "
2976 "profitable.\n");
69f11a13 2977
2c515559 2978 delete bb_vinfo;
69f11a13
IR
2979 return NULL;
2980 }
2981
73fbfcad 2982 if (dump_enabled_p ())
78c60e3d
SS
2983 dump_printf_loc (MSG_NOTE, vect_location,
2984 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2985
2986 return bb_vinfo;
2987}
2988
2989
428db0ba
RB
2990/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2991 true if anything in the basic-block was vectorized. */
2992
2993bool
2994vect_slp_bb (basic_block bb)
8e19f5a1
IR
2995{
2996 bb_vec_info bb_vinfo;
8e19f5a1 2997 gimple_stmt_iterator gsi;
61d371eb 2998 bool any_vectorized = false;
86e36728 2999 auto_vector_sizes vector_sizes;
8e19f5a1 3000
adac3a68 3001 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 3002
8e19f5a1
IR
3003 /* Autodetect first vector size we try. */
3004 current_vector_size = 0;
86e36728
RS
3005 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3006 unsigned int next_size = 0;
8e19f5a1 3007
61d371eb
RB
3008 gsi = gsi_start_bb (bb);
3009
86e36728 3010 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3011 while (1)
3012 {
61d371eb
RB
3013 if (gsi_end_p (gsi))
3014 break;
3015
3016 gimple_stmt_iterator region_begin = gsi;
3017 vec<data_reference_p> datarefs = vNULL;
3018 int insns = 0;
3019
3020 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3021 {
61d371eb
RB
3022 gimple *stmt = gsi_stmt (gsi);
3023 if (is_gimple_debug (stmt))
3024 continue;
3025 insns++;
3026
3027 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 3028 vect_location = stmt;
61d371eb 3029
8e846c66 3030 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
3031 break;
3032 }
3033
3034 /* Skip leading unhandled stmts. */
3035 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3036 {
3037 gsi_next (&gsi);
3038 continue;
3039 }
428db0ba 3040
61d371eb
RB
3041 gimple_stmt_iterator region_end = gsi;
3042
3043 bool vectorized = false;
a5b50aa1 3044 bool fatal = false;
ca823c85 3045 vec_info_shared shared;
61d371eb 3046 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 3047 datarefs, insns, fatal, &shared);
61d371eb
RB
3048 if (bb_vinfo
3049 && dbg_cnt (vect_slp))
3050 {
428db0ba 3051 if (dump_enabled_p ())
61d371eb 3052 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 3053
ca823c85 3054 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3055 vect_schedule_slp (bb_vinfo);
3056
d1ac60d5
RB
3057 unsigned HOST_WIDE_INT bytes;
3058 if (current_vector_size.is_constant (&bytes))
3059 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3060 "basic block part vectorized using "
3061 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3062 "vectors\n", bytes);
3063 else
3064 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3065 "basic block part vectorized using variable "
3066 "length vectors\n");
428db0ba 3067
61d371eb 3068 vectorized = true;
428db0ba 3069 }
2c515559 3070 delete bb_vinfo;
8e19f5a1 3071
61d371eb 3072 any_vectorized |= vectorized;
8e19f5a1 3073
86e36728
RS
3074 if (next_size == 0)
3075 autodetected_vector_size = current_vector_size;
3076
3077 if (next_size < vector_sizes.length ()
3078 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3079 next_size += 1;
3080
61d371eb 3081 if (vectorized
86e36728
RS
3082 || next_size == vector_sizes.length ()
3083 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3084 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3085 vector sizes will fail do not bother iterating. */
3086 || fatal)
61d371eb
RB
3087 {
3088 if (gsi_end_p (region_end))
3089 break;
8e19f5a1 3090
61d371eb
RB
3091 /* Skip the unhandled stmt. */
3092 gsi_next (&gsi);
3093
3094 /* And reset vector sizes. */
3095 current_vector_size = 0;
86e36728 3096 next_size = 0;
61d371eb
RB
3097 }
3098 else
3099 {
3100 /* Try the next biggest vector size. */
86e36728 3101 current_vector_size = vector_sizes[next_size++];
61d371eb 3102 if (dump_enabled_p ())
86e36728
RS
3103 {
3104 dump_printf_loc (MSG_NOTE, vect_location,
3105 "***** Re-trying analysis with "
3106 "vector size ");
3107 dump_dec (MSG_NOTE, current_vector_size);
3108 dump_printf (MSG_NOTE, "\n");
3109 }
61d371eb
RB
3110
3111 /* Start over. */
3112 gsi = region_begin;
3113 }
8e19f5a1 3114 }
61d371eb
RB
3115
3116 return any_vectorized;
8e19f5a1
IR
3117}
3118
3119
e4af0bc4
IE
3120/* Return 1 if vector type of boolean constant which is OPNUM
3121 operand in statement STMT is a boolean vector. */
3122
3123static bool
3124vect_mask_constant_operand_p (gimple *stmt, int opnum)
3125{
3126 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3127 enum tree_code code = gimple_expr_code (stmt);
3128 tree op, vectype;
e4af0bc4
IE
3129 enum vect_def_type dt;
3130
3131 /* For comparison and COND_EXPR type is chosen depending
3132 on the other comparison operand. */
3133 if (TREE_CODE_CLASS (code) == tcc_comparison)
3134 {
3135 if (opnum)
3136 op = gimple_assign_rhs1 (stmt);
3137 else
3138 op = gimple_assign_rhs2 (stmt);
3139
894dd753 3140 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3141 gcc_unreachable ();
3142
3143 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3144 }
3145
3146 if (code == COND_EXPR)
3147 {
3148 tree cond = gimple_assign_rhs1 (stmt);
3149
3150 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3151 op = cond;
3152 else if (opnum)
e4af0bc4
IE
3153 op = TREE_OPERAND (cond, 1);
3154 else
3155 op = TREE_OPERAND (cond, 0);
3156
894dd753 3157 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3158 gcc_unreachable ();
3159
3160 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3161 }
3162
3163 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3164}
3165
018b2744
RS
3166/* Build a variable-length vector in which the elements in ELTS are repeated
3167 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3168 RESULTS and add any new instructions to SEQ.
3169
3170 The approach we use is:
3171
3172 (1) Find a vector mode VM with integer elements of mode IM.
3173
3174 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3175 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3176 from small vectors to IM.
3177
3178 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3179
3180 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3181 correct byte contents.
3182
3183 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3184
3185 We try to find the largest IM for which this sequence works, in order
3186 to cut down on the number of interleaves. */
3187
f1739b48 3188void
018b2744
RS
3189duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3190 unsigned int nresults, vec<tree> &results)
3191{
3192 unsigned int nelts = elts.length ();
3193 tree element_type = TREE_TYPE (vector_type);
3194
3195 /* (1) Find a vector mode VM with integer elements of mode IM. */
3196 unsigned int nvectors = 1;
3197 tree new_vector_type;
3198 tree permutes[2];
3199 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3200 &nvectors, &new_vector_type,
3201 permutes))
3202 gcc_unreachable ();
3203
3204 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3205 unsigned int partial_nelts = nelts / nvectors;
3206 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3207
3208 tree_vector_builder partial_elts;
3209 auto_vec<tree, 32> pieces (nvectors * 2);
3210 pieces.quick_grow (nvectors * 2);
3211 for (unsigned int i = 0; i < nvectors; ++i)
3212 {
3213 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3214 ELTS' has mode IM. */
3215 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3216 for (unsigned int j = 0; j < partial_nelts; ++j)
3217 partial_elts.quick_push (elts[i * partial_nelts + j]);
3218 tree t = gimple_build_vector (seq, &partial_elts);
3219 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3220 TREE_TYPE (new_vector_type), t);
3221
3222 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3223 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3224 }
3225
3226 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3227 correct byte contents.
3228
3229 We need to repeat the following operation log2(nvectors) times:
3230
3231 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3232 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3233
3234 However, if each input repeats every N elements and the VF is
3235 a multiple of N * 2, the HI result is the same as the LO. */
3236 unsigned int in_start = 0;
3237 unsigned int out_start = nvectors;
3238 unsigned int hi_start = nvectors / 2;
3239 /* A bound on the number of outputs needed to produce NRESULTS results
3240 in the final iteration. */
3241 unsigned int noutputs_bound = nvectors * nresults;
3242 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3243 {
3244 noutputs_bound /= 2;
3245 unsigned int limit = MIN (noutputs_bound, nvectors);
3246 for (unsigned int i = 0; i < limit; ++i)
3247 {
3248 if ((i & 1) != 0
3249 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3250 2 * in_repeat))
3251 {
3252 pieces[out_start + i] = pieces[out_start + i - 1];
3253 continue;
3254 }
3255
3256 tree output = make_ssa_name (new_vector_type);
3257 tree input1 = pieces[in_start + (i / 2)];
3258 tree input2 = pieces[in_start + (i / 2) + hi_start];
3259 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3260 input1, input2,
3261 permutes[i & 1]);
3262 gimple_seq_add_stmt (seq, stmt);
3263 pieces[out_start + i] = output;
3264 }
3265 std::swap (in_start, out_start);
3266 }
3267
3268 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3269 results.reserve (nresults);
3270 for (unsigned int i = 0; i < nresults; ++i)
3271 if (i < nvectors)
3272 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3273 pieces[in_start + i]));
3274 else
3275 results.quick_push (results[i - nvectors]);
3276}
3277
e4af0bc4 3278
b8698a0f
L
3279/* For constant and loop invariant defs of SLP_NODE this function returns
3280 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3281 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3282 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3283 REDUC_INDEX is the index of the reduction operand in the statements, unless
3284 it is -1. */
ebfd146a
IR
3285
3286static void
9dc3f7de 3287vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3288 vec<tree> *vec_oprnds,
306b0c92 3289 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3290{
b9787581
RS
3291 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3292 stmt_vec_info stmt_vinfo = stmts[0];
3293 gimple *stmt = stmt_vinfo->stmt;
018b2744 3294 unsigned HOST_WIDE_INT nunits;
ebfd146a 3295 tree vec_cst;
d2a12ae7 3296 unsigned j, number_of_places_left_in_vector;
ebfd146a 3297 tree vector_type;
9dc3f7de 3298 tree vop;
9771b263 3299 int group_size = stmts.length ();
ebfd146a 3300 unsigned int vec_num, i;
d2a12ae7 3301 unsigned number_of_copies = 1;
9771b263
DN
3302 vec<tree> voprnds;
3303 voprnds.create (number_of_vectors);
ebfd146a 3304 bool constant_p, is_store;
b5aeb3bb 3305 tree neutral_op = NULL;
bac430c9 3306 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3307 gimple_seq ctor_seq = NULL;
018b2744 3308 auto_vec<tree, 16> permute_results;
b5aeb3bb 3309
42fd8198 3310 /* Check if vector type is a boolean vector. */
2568d8a1 3311 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
b9787581 3312 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
42fd8198
IE
3313 vector_type
3314 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3315 else
3316 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3317
ebfd146a
IR
3318 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3319 {
3320 is_store = true;
3321 op = gimple_assign_rhs1 (stmt);
3322 }
3323 else
9dc3f7de
IR
3324 is_store = false;
3325
3326 gcc_assert (op);
ebfd146a 3327
ebfd146a 3328 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3329 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3330
3331 For example, we have two scalar operands, s1 and s2 (e.g., group of
3332 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3333 of this type can be packed in a vector). The output vector will contain
3334 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3335 will be 2).
3336
b8698a0f 3337 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3338 containing the operands.
3339
3340 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3341 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3342 {s5, s6, s7, s8}. */
b8698a0f 3343
018b2744
RS
3344 /* When using duplicate_and_interleave, we just need one element for
3345 each scalar statement. */
3346 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3347 nunits = group_size;
3348
14a61437 3349 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3350
3351 number_of_places_left_in_vector = nunits;
62cf7335 3352 constant_p = true;
5ebaa477 3353 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3354 elts.quick_grow (nunits);
90dd6e3d 3355 bool place_after_defs = false;
ebfd146a
IR
3356 for (j = 0; j < number_of_copies; j++)
3357 {
b9787581 3358 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
ebfd146a 3359 {
b9787581 3360 stmt = stmt_vinfo->stmt;
ebfd146a
IR
3361 if (is_store)
3362 op = gimple_assign_rhs1 (stmt);
bac430c9 3363 else
f7e531cf 3364 {
bac430c9 3365 switch (code)
f7e531cf 3366 {
bac430c9 3367 case COND_EXPR:
a989bcc3
IE
3368 {
3369 tree cond = gimple_assign_rhs1 (stmt);
3370 if (TREE_CODE (cond) == SSA_NAME)
3371 op = gimple_op (stmt, op_num + 1);
3372 else if (op_num == 0 || op_num == 1)
bac430c9 3373 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3374 else
3375 {
3376 if (op_num == 2)
3377 op = gimple_assign_rhs2 (stmt);
3378 else
3379 op = gimple_assign_rhs3 (stmt);
3380 }
3381 }
bac430c9
IR
3382 break;
3383
3384 case CALL_EXPR:
3385 op = gimple_call_arg (stmt, op_num);
3386 break;
3387
b84b294a
JJ
3388 case LSHIFT_EXPR:
3389 case RSHIFT_EXPR:
3390 case LROTATE_EXPR:
3391 case RROTATE_EXPR:
3392 op = gimple_op (stmt, op_num + 1);
3393 /* Unlike the other binary operators, shifts/rotates have
3394 the shift count being int, instead of the same type as
3395 the lhs, so make sure the scalar is the right type if
3396 we are dealing with vectors of
3397 long long/long/short/char. */
793d9a16 3398 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3399 op = fold_convert (TREE_TYPE (vector_type), op);
3400 break;
3401
bac430c9
IR
3402 default:
3403 op = gimple_op (stmt, op_num + 1);
b84b294a 3404 break;
f7e531cf
IR
3405 }
3406 }
b8698a0f 3407
ebfd146a 3408 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3409 number_of_places_left_in_vector--;
90dd6e3d 3410 tree orig_op = op;
13396b6e 3411 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3412 {
793d9a16 3413 if (CONSTANT_CLASS_P (op))
13396b6e 3414 {
42fd8198
IE
3415 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3416 {
3417 /* Can't use VIEW_CONVERT_EXPR for booleans because
3418 of possibly different sizes of scalar value and
3419 vector element. */
3420 if (integer_zerop (op))
3421 op = build_int_cst (TREE_TYPE (vector_type), 0);
3422 else if (integer_onep (op))
158beb4a 3423 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3424 else
3425 gcc_unreachable ();
3426 }
3427 else
3428 op = fold_unary (VIEW_CONVERT_EXPR,
3429 TREE_TYPE (vector_type), op);
13396b6e
JJ
3430 gcc_assert (op && CONSTANT_CLASS_P (op));
3431 }
3432 else
3433 {
b731b390 3434 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3435 gimple *init_stmt;
262a363f
JJ
3436 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3437 {
158beb4a
JJ
3438 tree true_val
3439 = build_all_ones_cst (TREE_TYPE (vector_type));
3440 tree false_val
3441 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3442 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3443 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3444 op, true_val,
3445 false_val);
262a363f 3446 }
262a363f
JJ
3447 else
3448 {
3449 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3450 op);
3451 init_stmt
3452 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3453 op);
3454 }
13396b6e
JJ
3455 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3456 op = new_temp;
3457 }
50eeef09 3458 }
d2a12ae7 3459 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3460 if (!CONSTANT_CLASS_P (op))
3461 constant_p = false;
90dd6e3d
RB
3462 if (TREE_CODE (orig_op) == SSA_NAME
3463 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3464 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3465 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3466 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3467 place_after_defs = true;
ebfd146a
IR
3468
3469 if (number_of_places_left_in_vector == 0)
3470 {
018b2744
RS
3471 if (constant_p
3472 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3473 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3474 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3475 else
d2a12ae7 3476 {
018b2744
RS
3477 if (vec_oprnds->is_empty ())
3478 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3479 number_of_vectors,
3480 permute_results);
3481 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3482 }
90dd6e3d
RB
3483 tree init;
3484 gimple_stmt_iterator gsi;
3485 if (place_after_defs)
3486 {
95c68311
RS
3487 stmt_vec_info last_stmt_info
3488 = vect_find_last_scalar_stmt_in_slp (slp_node);
3489 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3490 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3491 &gsi);
90dd6e3d
RB
3492 }
3493 else
b9787581
RS
3494 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3495 NULL);
13396b6e
JJ
3496 if (ctor_seq != NULL)
3497 {
90dd6e3d 3498 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3499 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3500 ctor_seq = NULL;
3501 }
90dd6e3d
RB
3502 voprnds.quick_push (init);
3503 place_after_defs = false;
62cf7335
RB
3504 number_of_places_left_in_vector = nunits;
3505 constant_p = true;
5ebaa477
RS
3506 elts.new_vector (vector_type, nunits, 1);
3507 elts.quick_grow (nunits);
ebfd146a
IR
3508 }
3509 }
3510 }
3511
b8698a0f 3512 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3513 them. */
9771b263 3514 vec_num = voprnds.length ();
d2a12ae7 3515 for (j = vec_num; j != 0; j--)
ebfd146a 3516 {
9771b263
DN
3517 vop = voprnds[j - 1];
3518 vec_oprnds->quick_push (vop);
ebfd146a
IR
3519 }
3520
9771b263 3521 voprnds.release ();
ebfd146a
IR
3522
3523 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3524 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3525 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3526 to replicate the vectors. */
9771b263 3527 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3528 {
b5aeb3bb
IR
3529 tree neutral_vec = NULL;
3530
3531 if (neutral_op)
3532 {
3533 if (!neutral_vec)
b9acc9f1 3534 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3535
9771b263 3536 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3537 }
3538 else
3539 {
9771b263
DN
3540 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3541 vec_oprnds->quick_push (vop);
b5aeb3bb 3542 }
ebfd146a
IR
3543 }
3544}
3545
3546
3547/* Get vectorized definitions from SLP_NODE that contains corresponding
3548 vectorized def-stmts. */
3549
3550static void
9771b263 3551vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3552{
3553 tree vec_oprnd;
16edaeb8 3554 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3555 unsigned int i;
3556
9771b263 3557 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3558
16edaeb8 3559 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
ebfd146a 3560 {
16edaeb8
RS
3561 gcc_assert (vec_def_stmt_info);
3562 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3563 vec_oprnd = gimple_phi_result (vec_def_phi);
e7baeb39 3564 else
16edaeb8 3565 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
9771b263 3566 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3567 }
3568}
3569
3570
b8698a0f
L
3571/* Get vectorized definitions for SLP_NODE.
3572 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3573 call vect_get_constant_vectors() to create vector stmts.
3574 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3575 must be stored in the corresponding child of SLP_NODE, and we call
3576 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3577
ebfd146a 3578void
9771b263 3579vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3580 vec<vec<tree> > *vec_oprnds)
ebfd146a 3581{
d092494c 3582 int number_of_vects = 0, i;
77eefb71 3583 unsigned int child_index = 0;
b8698a0f 3584 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3585 slp_tree child = NULL;
37b5ec8f 3586 vec<tree> vec_defs;
e44978dc 3587 tree oprnd;
77eefb71 3588 bool vectorized_defs;
ebfd146a 3589
a1824cfd 3590 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9771b263 3591 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3592 {
d092494c
IR
3593 /* For each operand we check if it has vectorized definitions in a child
3594 node or we need to create them (for invariants and constants). We
3595 check if the LHS of the first stmt of the next child matches OPRND.
3596 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3597 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3598 to check this child node for the next operand. */
3599 vectorized_defs = false;
3600 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3601 {
01d8bf07 3602 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3603
e44978dc 3604 /* We have to check both pattern and original def, if available. */
603cca93 3605 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3606 {
b9787581
RS
3607 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3608 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
e7baeb39 3609 tree first_def_op;
90dd6e3d 3610
b9787581 3611 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
e7baeb39
RB
3612 first_def_op = gimple_phi_result (first_def);
3613 else
b9787581 3614 first_def_op = gimple_get_lhs (first_def_info->stmt);
e7baeb39 3615 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d 3616 || (related
10681ce8
RS
3617 && operand_equal_p (oprnd,
3618 gimple_get_lhs (related->stmt), 0)))
90dd6e3d
RB
3619 {
3620 /* The number of vector defs is determined by the number of
3621 vector statements in the node from which we get those
3622 statements. */
3623 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3624 vectorized_defs = true;
77eefb71 3625 child_index++;
90dd6e3d 3626 }
e44978dc 3627 }
77eefb71
RB
3628 else
3629 child_index++;
d092494c 3630 }
ebfd146a 3631
77eefb71
RB
3632 if (!vectorized_defs)
3633 {
3634 if (i == 0)
3635 {
3636 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3637 /* Number of vector stmts was calculated according to LHS in
3638 vect_schedule_slp_instance (), fix it by replacing LHS with
3639 RHS, if necessary. See vect_get_smallest_scalar_type () for
3640 details. */
a1824cfd
RS
3641 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3642 &rhs_size_unit);
77eefb71
RB
3643 if (rhs_size_unit != lhs_size_unit)
3644 {
3645 number_of_vects *= rhs_size_unit;
3646 number_of_vects /= lhs_size_unit;
3647 }
3648 }
d092494c 3649 }
b5aeb3bb 3650
d092494c 3651 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3652 vec_defs = vNULL;
3653 vec_defs.create (number_of_vects);
ebfd146a 3654
d092494c
IR
3655 /* For reduction defs we call vect_get_constant_vectors (), since we are
3656 looking for initial loop invariant values. */
306b0c92 3657 if (vectorized_defs)
d092494c 3658 /* The defs are already vectorized. */
37b5ec8f 3659 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3660 else
e7baeb39 3661 /* Build vectors from scalar defs. */
37b5ec8f 3662 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3663 number_of_vects);
ebfd146a 3664
37b5ec8f 3665 vec_oprnds->quick_push (vec_defs);
d092494c 3666 }
ebfd146a
IR
3667}
3668
ebfd146a
IR
3669/* Generate vector permute statements from a list of loads in DR_CHAIN.
3670 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3671 permute statements for the SLP node NODE of the SLP instance
3672 SLP_NODE_INSTANCE. */
3673
ebfd146a 3674bool
01d8bf07 3675vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3676 gimple_stmt_iterator *gsi, poly_uint64 vf,
3677 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3678 unsigned *n_perms)
ebfd146a 3679{
b9787581 3680 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3681 vec_info *vinfo = stmt_info->vinfo;
ebfd146a 3682 tree mask_element_type = NULL_TREE, mask_type;
928686b1 3683 int vec_index = 0;
2635892a 3684 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3685 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3686 unsigned int mask_element;
ef4bddc2 3687 machine_mode mode;
928686b1 3688 unsigned HOST_WIDE_INT nunits, const_vf;
ebfd146a 3689
91ff1504
RB
3690 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3691 return false;
3692
bffb8014 3693 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3694
22e4dee7
RH
3695 mode = TYPE_MODE (vectype);
3696
d9f21f6a 3697 /* At the moment, all permutations are represented using per-element
928686b1
RS
3698 indices, so we can't cope with variable vector lengths or
3699 vectorization factors. */
3700 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3701 || !vf.is_constant (&const_vf))
d9f21f6a
RS
3702 return false;
3703
2635892a
RH
3704 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3705 same size as the vector element being permuted. */
96f9265a 3706 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3707 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3708 mask_type = get_vectype_for_scalar_type (mask_element_type);
e3342de4 3709 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3710 mask.quick_grow (nunits);
e3342de4 3711 vec_perm_indices indices;
ebfd146a 3712
61fdfd8c
RB
3713 /* Initialize the vect stmts of NODE to properly insert the generated
3714 stmts later. */
3715 if (! analyze_only)
3716 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3717 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3718 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3719
b8698a0f
L
3720 /* Generate permutation masks for every NODE. Number of masks for each NODE
3721 is equal to GROUP_SIZE.
3722 E.g., we have a group of three nodes with three loads from the same
3723 location in each node, and the vector size is 4. I.e., we have a
3724 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3725 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3726 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3727 ...
3728
2635892a 3729 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3730 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3731 operation, and the mask element values can't be outside that range.
3732 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3733 For the first two permutations we need the first and the second input
ebfd146a 3734 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3735 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3736 {c2,a3,b3,c3}. */
3737
2ce27200 3738 int vect_stmts_counter = 0;
928686b1 3739 unsigned int index = 0;
2ce27200
RB
3740 int first_vec_index = -1;
3741 int second_vec_index = -1;
be377c80 3742 bool noop_p = true;
29afecdf 3743 *n_perms = 0;
ebfd146a 3744
d9f21f6a 3745 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3746 {
3747 for (int k = 0; k < group_size; k++)
3748 {
928686b1 3749 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
2c53b149 3750 + j * DR_GROUP_SIZE (stmt_info));
2ce27200
RB
3751 vec_index = i / nunits;
3752 mask_element = i % nunits;
3753 if (vec_index == first_vec_index
3754 || first_vec_index == -1)
3755 {
3756 first_vec_index = vec_index;
3757 }
3758 else if (vec_index == second_vec_index
3759 || second_vec_index == -1)
3760 {
3761 second_vec_index = vec_index;
3762 mask_element += nunits;
3763 }
3764 else
3765 {
3766 if (dump_enabled_p ())
3767 {
3768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3769 "permutation requires at "
3770 "least three vectors ");
3771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
b9787581 3772 stmt_info->stmt, 0);
2ce27200 3773 }
31bee964 3774 gcc_assert (analyze_only);
2ce27200
RB
3775 return false;
3776 }
ebfd146a 3777
928686b1 3778 gcc_assert (mask_element < 2 * nunits);
be377c80
RB
3779 if (mask_element != index)
3780 noop_p = false;
2ce27200
RB
3781 mask[index++] = mask_element;
3782
e3342de4 3783 if (index == nunits && !noop_p)
2ce27200 3784 {
e3342de4
RS
3785 indices.new_vector (mask, 2, nunits);
3786 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3787 {
3788 if (dump_enabled_p ())
22e4dee7 3789 {
2ce27200
RB
3790 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3791 vect_location,
3792 "unsupported vect permute { ");
3793 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3794 {
3795 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3796 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3797 }
2ce27200 3798 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3799 }
31bee964 3800 gcc_assert (analyze_only);
2ce27200
RB
3801 return false;
3802 }
22e4dee7 3803
e3342de4
RS
3804 ++*n_perms;
3805 }
29afecdf 3806
e3342de4
RS
3807 if (index == nunits)
3808 {
2ce27200
RB
3809 if (!analyze_only)
3810 {
be377c80
RB
3811 tree mask_vec = NULL_TREE;
3812
3813 if (! noop_p)
736d0f28 3814 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3815
3816 if (second_vec_index == -1)
3817 second_vec_index = first_vec_index;
61fdfd8c
RB
3818
3819 /* Generate the permute statement if necessary. */
3820 tree first_vec = dr_chain[first_vec_index];
3821 tree second_vec = dr_chain[second_vec_index];
16edaeb8 3822 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3823 if (! noop_p)
3824 {
b9787581 3825 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3826 tree perm_dest
3827 = vect_create_destination_var (gimple_assign_lhs (stmt),
3828 vectype);
3829 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3830 gassign *perm_stmt
3831 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3832 first_vec, second_vec,
3833 mask_vec);
3834 perm_stmt_info
b9787581
RS
3835 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3836 gsi);
61fdfd8c
RB
3837 }
3838 else
3839 /* If mask was NULL_TREE generate the requested
3840 identity transform. */
16edaeb8 3841 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3842
3843 /* Store the vector statement in NODE. */
16edaeb8
RS
3844 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3845 = perm_stmt_info;
2ce27200 3846 }
ebfd146a 3847
2ce27200
RB
3848 index = 0;
3849 first_vec_index = -1;
3850 second_vec_index = -1;
be377c80 3851 noop_p = true;
2ce27200
RB
3852 }
3853 }
b8698a0f 3854 }
ebfd146a 3855
ebfd146a
IR
3856 return true;
3857}
3858
ebfd146a
IR
3859/* Vectorize SLP instance tree in postorder. */
3860
3861static bool
f7300fff
RB
3862vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3863 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3864{
0d0293ac 3865 bool grouped_store, is_store;
ebfd146a
IR
3866 gimple_stmt_iterator si;
3867 stmt_vec_info stmt_info;
8b7e9dba 3868 unsigned int group_size;
ebfd146a 3869 tree vectype;
603cca93 3870 int i, j;
d755c7ef 3871 slp_tree child;
ebfd146a 3872
603cca93 3873 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3874 return false;
3875
f7300fff
RB
3876 /* See if we have already vectorized the same set of stmts and reuse their
3877 vectorized stmts. */
dd172744 3878 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3879 {
dd172744 3880 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
f7300fff
RB
3881 return false;
3882 }
3883
dd172744 3884 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3885 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3886 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3887
603cca93
RB
3888 /* Push SLP node def-type to stmts. */
3889 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3890 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3891 {
3892 stmt_vec_info child_stmt_info;
3893 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3894 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3895 }
603cca93 3896
b9787581 3897 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3898
3899 /* VECTYPE is the type of the destination. */
b690cc0f 3900 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3901 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3902 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3903
68435eb2 3904 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3905 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3906 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3907
73fbfcad 3908 if (dump_enabled_p ())
ebfd146a 3909 {
78c60e3d
SS
3910 dump_printf_loc (MSG_NOTE,vect_location,
3911 "------>vectorizing SLP node starting from: ");
b9787581 3912 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt_info->stmt, 0);
b8698a0f 3913 }
ebfd146a 3914
2e8ab70c
RB
3915 /* Vectorized stmts go before the last scalar stmt which is where
3916 all uses are ready. */
95c68311
RS
3917 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3918 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 3919
b010117a
IR
3920 /* Mark the first element of the reduction chain as reduction to properly
3921 transform the node. In the analysis phase only the last element of the
3922 chain is marked as reduction. */
2c53b149
RB
3923 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3924 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
b9787581 3925 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
b010117a
IR
3926 {
3927 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3928 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3929 }
3930
6876e5bc
RB
3931 /* Handle two-operation SLP nodes by vectorizing the group with
3932 both operations and then performing a merge. */
3933 if (SLP_TREE_TWO_OPERATORS (node))
3934 {
b9787581 3935 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 3936 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3937 enum tree_code ocode = ERROR_MARK;
b9787581 3938 stmt_vec_info ostmt_info;
e3342de4 3939 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
3940 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3941 {
3942 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3943 if (gimple_assign_rhs_code (ostmt) != code0)
3944 {
3945 mask.quick_push (1);
3946 ocode = gimple_assign_rhs_code (ostmt);
3947 }
3948 else
3949 mask.quick_push (0);
3950 }
567a3691 3951 if (ocode != ERROR_MARK)
6876e5bc 3952 {
16edaeb8
RS
3953 vec<stmt_vec_info> v0;
3954 vec<stmt_vec_info> v1;
6876e5bc
RB
3955 unsigned j;
3956 tree tmask = NULL_TREE;
b9787581 3957 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
6876e5bc
RB
3958 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3959 SLP_TREE_VEC_STMTS (node).truncate (0);
3960 gimple_assign_set_rhs_code (stmt, ocode);
b9787581 3961 vect_transform_stmt (stmt_info, &si, &grouped_store, node, instance);
6876e5bc
RB
3962 gimple_assign_set_rhs_code (stmt, code0);
3963 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3964 SLP_TREE_VEC_STMTS (node).truncate (0);
3965 tree meltype = build_nonstandard_integer_type
b397965c 3966 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3967 tree mvectype = get_same_sized_vectype (meltype, vectype);
3968 unsigned k = 0, l;
3969 for (j = 0; j < v0.length (); ++j)
3970 {
dad55d70
RS
3971 /* Enforced by vect_build_slp_tree, which rejects variable-length
3972 vectors for SLP_TREE_TWO_OPERATORS. */
3973 unsigned int const_nunits = nunits.to_constant ();
3974 tree_vector_builder melts (mvectype, const_nunits, 1);
3975 for (l = 0; l < const_nunits; ++l)
6876e5bc 3976 {
1ece8d4c 3977 if (k >= group_size)
6876e5bc 3978 k = 0;
dad55d70
RS
3979 tree t = build_int_cst (meltype,
3980 mask[k++] * const_nunits + l);
794e3180 3981 melts.quick_push (t);
6876e5bc 3982 }
5ebaa477 3983 tmask = melts.build ();
6876e5bc
RB
3984
3985 /* ??? Not all targets support a VEC_PERM_EXPR with a
3986 constant mask that would translate to a vec_merge RTX
3987 (with their vec_perm_const_ok). We can either not
3988 vectorize in that case or let veclower do its job.
3989 Unfortunately that isn't too great and at least for
3990 plus/minus we'd eventually like to match targets
3991 vector addsub instructions. */
355fe088 3992 gimple *vstmt;
6876e5bc
RB
3993 vstmt = gimple_build_assign (make_ssa_name (vectype),
3994 VEC_PERM_EXPR,
16edaeb8
RS
3995 gimple_assign_lhs (v0[j]->stmt),
3996 gimple_assign_lhs (v1[j]->stmt),
3997 tmask);
3998 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 3999 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
4000 }
4001 v0.release ();
4002 v1.release ();
4003 return false;
4004 }
4005 }
b9787581
RS
4006 is_store = vect_transform_stmt (stmt_info, &si, &grouped_store, node,
4007 instance);
603cca93
RB
4008
4009 /* Restore stmt def-types. */
4010 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4011 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
4012 {
4013 stmt_vec_info child_stmt_info;
4014 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4015 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4016 }
603cca93 4017
b5aeb3bb 4018 return is_store;
ebfd146a
IR
4019}
4020
dd34c087
JJ
4021/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4022 For loop vectorization this is done in vectorizable_call, but for SLP
4023 it needs to be deferred until end of vect_schedule_slp, because multiple
4024 SLP instances may refer to the same scalar stmt. */
4025
4026static void
4027vect_remove_slp_scalar_calls (slp_tree node)
4028{
b9787581 4029 gimple *new_stmt;
dd34c087
JJ
4030 gimple_stmt_iterator gsi;
4031 int i;
d755c7ef 4032 slp_tree child;
dd34c087
JJ
4033 tree lhs;
4034 stmt_vec_info stmt_info;
4035
603cca93 4036 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4037 return;
4038
9771b263 4039 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 4040 vect_remove_slp_scalar_calls (child);
dd34c087 4041
b9787581 4042 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 4043 {
b9787581
RS
4044 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4045 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 4046 continue;
b9787581 4047 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
4048 || !PURE_SLP_STMT (stmt_info))
4049 continue;
4050 lhs = gimple_call_lhs (stmt);
4051 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4052 set_vinfo_for_stmt (new_stmt, stmt_info);
4053 set_vinfo_for_stmt (stmt, NULL);
4054 STMT_VINFO_STMT (stmt_info) = new_stmt;
4055 gsi = gsi_for_stmt (stmt);
4056 gsi_replace (&gsi, new_stmt, false);
4057 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4058 }
4059}
ebfd146a 4060
ff802fa1
IR
4061/* Generate vector code for all SLP instances in the loop/basic block. */
4062
ebfd146a 4063bool
310213d4 4064vect_schedule_slp (vec_info *vinfo)
ebfd146a 4065{
9771b263 4066 vec<slp_instance> slp_instances;
ebfd146a 4067 slp_instance instance;
8b7e9dba 4068 unsigned int i;
ebfd146a
IR
4069 bool is_store = false;
4070
78604de0
RB
4071
4072 scalar_stmts_to_slp_tree_map_t *bst_map
4073 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4074 slp_instances = vinfo->slp_instances;
9771b263 4075 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
4076 {
4077 /* Schedule the tree of INSTANCE. */
4078 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff 4079 instance, bst_map);
73fbfcad 4080 if (dump_enabled_p ())
78c60e3d 4081 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4082 "vectorizing stmts using SLP.\n");
ebfd146a 4083 }
78604de0 4084 delete bst_map;
ebfd146a 4085
9771b263 4086 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4087 {
4088 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 4089 stmt_vec_info store_info;
b5aeb3bb
IR
4090 unsigned int j;
4091 gimple_stmt_iterator gsi;
4092
c40eced0
RB
4093 /* Remove scalar call stmts. Do not do this for basic-block
4094 vectorization as not all uses may be vectorized.
4095 ??? Why should this be necessary? DCE should be able to
4096 remove the stmts itself.
4097 ??? For BB vectorization we can as well remove scalar
4098 stmts starting from the SLP tree root if they have no
4099 uses. */
310213d4 4100 if (is_a <loop_vec_info> (vinfo))
c40eced0 4101 vect_remove_slp_scalar_calls (root);
dd34c087 4102
b9787581 4103 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4104 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4105 {
b9787581
RS
4106 if (!STMT_VINFO_DATA_REF (store_info))
4107 break;
4108
4109 if (is_pattern_stmt_p (store_info))
4110 store_info = STMT_VINFO_RELATED_STMT (store_info);
4111 /* Free the attached stmt_vec_info and remove the stmt. */
4112 gsi = gsi_for_stmt (store_info);
4113 unlink_stmt_vdef (store_info);
4114 gsi_remove (&gsi, true);
4115 release_defs (store_info);
4116 free_stmt_vec_info (store_info);
b5aeb3bb
IR
4117 }
4118 }
4119
ebfd146a
IR
4120 return is_store;
4121}