]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
[07/46] Add vec_info::lookup_stmt
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
9771b263 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 61 vect_free_slp_tree (child, final_p);
b8698a0f 62
6e2dd807
RS
63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
64 Some statements might no longer exist, after having been
65 removed by vect_transform_stmt. Updating the remaining
66 statements would be redundant. */
67 if (!final_p)
68 {
69 gimple *stmt;
70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
71 {
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
73 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
74 }
75 }
78810bd3 76
9771b263
DN
77 SLP_TREE_CHILDREN (node).release ();
78 SLP_TREE_SCALAR_STMTS (node).release ();
79 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 80 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
81
82 free (node);
83}
84
85
6e2dd807
RS
86/* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
ebfd146a
IR
89
90void
6e2dd807 91vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 92{
6e2dd807 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 94 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 95 free (instance);
ebfd146a
IR
96}
97
98
d092494c
IR
99/* Create an SLP node for SCALAR_STMTS. */
100
101static slp_tree
355fe088 102vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 103{
d3cfd39e 104 slp_tree node;
355fe088 105 gimple *stmt = scalar_stmts[0];
d092494c
IR
106 unsigned int nops;
107
108 if (is_gimple_call (stmt))
109 nops = gimple_call_num_args (stmt);
110 else if (is_gimple_assign (stmt))
f7e531cf
IR
111 {
112 nops = gimple_num_ops (stmt) - 1;
113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
114 nops++;
115 }
e7baeb39
RB
116 else if (gimple_code (stmt) == GIMPLE_PHI)
117 nops = 0;
d092494c
IR
118 else
119 return NULL;
120
d3cfd39e 121 node = XNEW (struct _slp_tree);
d092494c 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 123 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 125 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 127 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 129
78810bd3
RB
130 unsigned i;
131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
132 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
133
d092494c
IR
134 return node;
135}
136
137
ddf56386
RB
138/* This structure is used in creation of an SLP tree. Each instance
139 corresponds to the same operand in a group of scalar stmts in an SLP
140 node. */
141typedef struct _slp_oprnd_info
142{
143 /* Def-stmts for the operands. */
144 vec<gimple *> def_stmts;
145 /* Information about the first statement, its vector def-type, type, the
146 operand itself in case it's constant, and an indication if it's a pattern
147 stmt. */
ddf56386 148 tree first_op_type;
34e82342 149 enum vect_def_type first_dt;
ddf56386
RB
150 bool first_pattern;
151 bool second_pattern;
152} *slp_oprnd_info;
153
154
d092494c
IR
155/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 operand. */
9771b263 157static vec<slp_oprnd_info>
d092494c
IR
158vect_create_oprnd_info (int nops, int group_size)
159{
160 int i;
161 slp_oprnd_info oprnd_info;
9771b263 162 vec<slp_oprnd_info> oprnds_info;
d092494c 163
9771b263 164 oprnds_info.create (nops);
d092494c
IR
165 for (i = 0; i < nops; i++)
166 {
167 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 168 oprnd_info->def_stmts.create (group_size);
d092494c 169 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 170 oprnd_info->first_op_type = NULL_TREE;
d092494c 171 oprnd_info->first_pattern = false;
effb52da 172 oprnd_info->second_pattern = false;
9771b263 173 oprnds_info.quick_push (oprnd_info);
d092494c
IR
174 }
175
176 return oprnds_info;
177}
178
179
d3cfd39e
JJ
180/* Free operands info. */
181
d092494c 182static void
9771b263 183vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
184{
185 int i;
186 slp_oprnd_info oprnd_info;
187
9771b263 188 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 189 {
9771b263 190 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
191 XDELETE (oprnd_info);
192 }
d092494c 193
9771b263 194 oprnds_info.release ();
d092494c
IR
195}
196
197
d755c7ef
RB
198/* Find the place of the data-ref in STMT in the interleaving chain that starts
199 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
200
b210f45f 201int
355fe088 202vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 203{
355fe088 204 gimple *next_stmt = first_stmt;
d755c7ef
RB
205 int result = 0;
206
2c53b149 207 if (first_stmt != DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
d755c7ef
RB
208 return -1;
209
210 do
211 {
212 if (next_stmt == stmt)
213 return result;
2c53b149 214 next_stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699 215 if (next_stmt)
2c53b149 216 result += DR_GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
217 }
218 while (next_stmt);
219
220 return -1;
221}
222
018b2744
RS
223/* Check whether it is possible to load COUNT elements of type ELT_MODE
224 using the method implemented by duplicate_and_interleave. Return true
225 if so, returning the number of intermediate vectors in *NVECTORS_OUT
226 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
227 (if nonnull). */
228
f1739b48 229bool
018b2744 230can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
231 unsigned int *nvectors_out,
232 tree *vector_type_out,
233 tree *permutes)
018b2744
RS
234{
235 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
236 poly_int64 nelts;
237 unsigned int nvectors = 1;
238 for (;;)
239 {
240 scalar_int_mode int_mode;
241 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
242 if (multiple_p (current_vector_size, elt_bytes, &nelts)
243 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
244 {
245 tree int_type = build_nonstandard_integer_type
246 (GET_MODE_BITSIZE (int_mode), 1);
247 tree vector_type = build_vector_type (int_type, nelts);
248 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
249 {
250 vec_perm_builder sel1 (nelts, 2, 3);
251 vec_perm_builder sel2 (nelts, 2, 3);
252 poly_int64 half_nelts = exact_div (nelts, 2);
253 for (unsigned int i = 0; i < 3; ++i)
254 {
255 sel1.quick_push (i);
256 sel1.quick_push (i + nelts);
257 sel2.quick_push (half_nelts + i);
258 sel2.quick_push (half_nelts + i + nelts);
259 }
260 vec_perm_indices indices1 (sel1, 2, nelts);
261 vec_perm_indices indices2 (sel2, 2, nelts);
262 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
263 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
264 {
265 if (nvectors_out)
266 *nvectors_out = nvectors;
267 if (vector_type_out)
268 *vector_type_out = vector_type;
269 if (permutes)
270 {
271 permutes[0] = vect_gen_perm_mask_checked (vector_type,
272 indices1);
273 permutes[1] = vect_gen_perm_mask_checked (vector_type,
274 indices2);
275 }
276 return true;
277 }
278 }
279 }
280 if (!multiple_p (elt_bytes, 2, &elt_bytes))
281 return false;
282 nvectors *= 2;
283 }
284}
d755c7ef 285
d092494c
IR
286/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
287 they are of a valid type and that they match the defs of the first stmt of
4cecd659 288 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
289 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
290 indicates swap is required for cond_expr stmts. Specifically, *SWAP
291 is 1 if STMT is cond and operands of comparison need to be swapped;
292 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
293 If there is any operand swap in this function, *SWAP is set to non-zero
294 value.
4cecd659
BC
295 If there was a fatal error return -1; if the error could be corrected by
296 swapping operands of father node of this one, return 1; if everything is
297 ok return 0. */
4cecd659
BC
298static int
299vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
018b2744 300 vec<gimple *> stmts, unsigned stmt_num,
4cecd659 301 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 302{
018b2744 303 gimple *stmt = stmts[stmt_num];
ebfd146a
IR
304 tree oprnd;
305 unsigned int i, number_of_oprnds;
355fe088 306 gimple *def_stmt;
d092494c 307 enum vect_def_type dt = vect_uninitialized_def;
d092494c 308 bool pattern = false;
abf9bfbc 309 slp_oprnd_info oprnd_info;
b0b4483e
RB
310 int first_op_idx = 1;
311 bool commutative = false;
312 bool first_op_cond = false;
effb52da
RB
313 bool first = stmt_num == 0;
314 bool second = stmt_num == 1;
b8698a0f 315
d092494c 316 if (is_gimple_call (stmt))
190c2236
JJ
317 {
318 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 319 first_op_idx = 3;
190c2236 320 }
f7e531cf
IR
321 else if (is_gimple_assign (stmt))
322 {
b0b4483e 323 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 324 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
325 /* Swap can only be done for cond_expr if asked to, otherwise we
326 could result in different comparison code to the first stmt. */
a414c77f
IE
327 if (gimple_assign_rhs_code (stmt) == COND_EXPR
328 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
329 {
330 first_op_cond = true;
b0b4483e
RB
331 number_of_oprnds++;
332 }
333 else
334 commutative = commutative_tree_code (code);
f7e531cf 335 }
d092494c 336 else
b0b4483e 337 return -1;
ebfd146a 338
4cecd659
BC
339 bool swapped = (*swap != 0);
340 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
341 for (i = 0; i < number_of_oprnds; i++)
342 {
b0b4483e
RB
343again:
344 if (first_op_cond)
f7e531cf 345 {
4cecd659
BC
346 /* Map indicating how operands of cond_expr should be swapped. */
347 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
348 int *map = maps[*swap];
349
350 if (i < 2)
351 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 352 else
4cecd659 353 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
354 }
355 else
4cecd659 356 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 357
9771b263 358 oprnd_info = (*oprnds_info)[i];
ebfd146a 359
894dd753 360 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt))
ebfd146a 361 {
73fbfcad 362 if (dump_enabled_p ())
ebfd146a 363 {
78c60e3d 364 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 365 "Build SLP failed: can't analyze def for ");
78c60e3d 366 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 367 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
368 }
369
b0b4483e 370 return -1;
ebfd146a
IR
371 }
372
a70d6342 373 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 374 from the pattern. Check that all the stmts of the node are in the
ebfd146a 375 pattern. */
f5709183 376 if (def_stmt && gimple_bb (def_stmt)
0f8c840c
RS
377 && vect_stmt_in_region_p (vinfo, def_stmt)
378 && vinfo_for_stmt (def_stmt)
379 && is_pattern_stmt_p (vinfo_for_stmt (def_stmt)))
ebfd146a 380 {
d092494c 381 pattern = true;
effb52da
RB
382 if (!first && !oprnd_info->first_pattern
383 /* Allow different pattern state for the defs of the
384 first stmt in reduction chains. */
385 && (oprnd_info->first_dt != vect_reduction_def
386 || (!second && !oprnd_info->second_pattern)))
d092494c 387 {
b0b4483e
RB
388 if (i == 0
389 && !swapped
390 && commutative)
391 {
392 swapped = true;
393 goto again;
394 }
395
73fbfcad 396 if (dump_enabled_p ())
d092494c 397 {
78c60e3d
SS
398 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
399 "Build SLP failed: some of the stmts"
400 " are in a pattern, and others are not ");
401 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 402 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 403 }
ebfd146a 404
b0b4483e 405 return 1;
ebfd146a
IR
406 }
407
d092494c 408 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 409
f7e531cf 410 if (dt == vect_unknown_def_type)
ebfd146a 411 {
73fbfcad 412 if (dump_enabled_p ())
78c60e3d 413 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 414 "Unsupported pattern.\n");
b0b4483e 415 return -1;
ebfd146a
IR
416 }
417
418 switch (gimple_code (def_stmt))
419 {
81c40241
RB
420 case GIMPLE_PHI:
421 case GIMPLE_ASSIGN:
422 break;
423
424 default:
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
427 "unsupported defining stmt:\n");
428 return -1;
ebfd146a
IR
429 }
430 }
431
effb52da
RB
432 if (second)
433 oprnd_info->second_pattern = pattern;
434
d092494c 435 if (first)
ebfd146a 436 {
d092494c
IR
437 oprnd_info->first_dt = dt;
438 oprnd_info->first_pattern = pattern;
793d9a16 439 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 440 }
ebfd146a
IR
441 else
442 {
d092494c
IR
443 /* Not first stmt of the group, check that the def-stmt/s match
444 the def-stmt/s of the first stmt. Allow different definition
445 types for reduction chains: the first stmt must be a
446 vect_reduction_def (a phi node), and the rest
447 vect_internal_def. */
018b2744
RS
448 tree type = TREE_TYPE (oprnd);
449 if ((oprnd_info->first_dt != dt
450 && !(oprnd_info->first_dt == vect_reduction_def
451 && dt == vect_internal_def)
452 && !((oprnd_info->first_dt == vect_external_def
453 || oprnd_info->first_dt == vect_constant_def)
454 && (dt == vect_external_def
455 || dt == vect_constant_def)))
456 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 457 {
b0b4483e
RB
458 /* Try swapping operands if we got a mismatch. */
459 if (i == 0
460 && !swapped
461 && commutative)
462 {
463 swapped = true;
464 goto again;
465 }
466
abf9bfbc
RB
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 469 "Build SLP failed: different types\n");
d092494c 470
b0b4483e 471 return 1;
ebfd146a 472 }
018b2744
RS
473 if ((dt == vect_constant_def
474 || dt == vect_external_def)
475 && !current_vector_size.is_constant ()
476 && (TREE_CODE (type) == BOOLEAN_TYPE
477 || !can_duplicate_and_interleave_p (stmts.length (),
478 TYPE_MODE (type))))
a23644f2
RS
479 {
480 if (dump_enabled_p ())
481 {
482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
483 "Build SLP failed: invalid type of def "
484 "for variable-length SLP ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
486 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
487 }
488 return -1;
489 }
018b2744
RS
490 }
491
492 /* Check the types of the definitions. */
493 switch (dt)
494 {
495 case vect_constant_def:
496 case vect_external_def:
ebfd146a 497 break;
b8698a0f 498
c78e3652 499 case vect_reduction_def:
e7baeb39 500 case vect_induction_def:
8644a673 501 case vect_internal_def:
abf9bfbc 502 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
503 break;
504
505 default:
506 /* FORNOW: Not supported. */
73fbfcad 507 if (dump_enabled_p ())
ebfd146a 508 {
78c60e3d
SS
509 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
510 "Build SLP failed: illegal type of def ");
81c40241 511 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 512 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
513 }
514
b0b4483e 515 return -1;
ebfd146a
IR
516 }
517 }
518
b0b4483e
RB
519 /* Swap operands. */
520 if (swapped)
521 {
78810bd3
RB
522 /* If there are already uses of this stmt in a SLP instance then
523 we've committed to the operand order and can't swap it. */
524 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
525 {
526 if (dump_enabled_p ())
527 {
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: cannot swap operands of "
530 "shared stmt ");
531 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
532 }
533 return -1;
534 }
535
b0b4483e
RB
536 if (first_op_cond)
537 {
538 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
539 enum tree_code code = TREE_CODE (cond);
540
541 /* Swap. */
542 if (*swap == 1)
543 {
544 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
545 &TREE_OPERAND (cond, 1));
546 TREE_SET_CODE (cond, swap_tree_comparison (code));
547 }
548 /* Invert. */
549 else
550 {
551 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
552 gimple_assign_rhs3_ptr (stmt));
553 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
554 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
555 gcc_assert (code != ERROR_MARK);
556 TREE_SET_CODE (cond, code);
557 }
b0b4483e
RB
558 }
559 else
560 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
561 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
562 if (dump_enabled_p ())
563 {
564 dump_printf_loc (MSG_NOTE, vect_location,
565 "swapped operands to match def types in ");
566 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
567 }
b0b4483e
RB
568 }
569
4cecd659 570 *swap = swapped;
b0b4483e 571 return 0;
ebfd146a
IR
572}
573
5249ee4d
RS
574/* Return true if call statements CALL1 and CALL2 are similar enough
575 to be combined into the same SLP group. */
576
577static bool
578compatible_calls_p (gcall *call1, gcall *call2)
579{
580 unsigned int nargs = gimple_call_num_args (call1);
581 if (nargs != gimple_call_num_args (call2))
582 return false;
583
584 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
585 return false;
586
587 if (gimple_call_internal_p (call1))
588 {
589 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
590 TREE_TYPE (gimple_call_lhs (call2))))
591 return false;
592 for (unsigned int i = 0; i < nargs; ++i)
593 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
594 TREE_TYPE (gimple_call_arg (call2, i))))
595 return false;
596 }
597 else
598 {
599 if (!operand_equal_p (gimple_call_fn (call1),
600 gimple_call_fn (call2), 0))
601 return false;
602
603 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
604 return false;
605 }
606 return true;
607}
608
b161f2c9
RS
609/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
610 caller's attempt to find the vector type in STMT with the narrowest
611 element type. Return true if VECTYPE is nonnull and if it is valid
612 for VINFO. When returning true, update MAX_NUNITS to reflect the
613 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
614 as for vect_build_slp_tree. */
615
616static bool
617vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size,
4b6068ea 618 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
619{
620 if (!vectype)
621 {
622 if (dump_enabled_p ())
623 {
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
625 "Build SLP failed: unsupported data-type in ");
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
627 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
628 }
629 /* Fatal mismatch. */
630 return false;
631 }
632
633 /* If populating the vector type requires unrolling then fail
634 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
635 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
636 unsigned HOST_WIDE_INT const_nunits;
b161f2c9 637 if (is_a <bb_vec_info> (vinfo)
4b6068ea
RS
638 && (!nunits.is_constant (&const_nunits)
639 || const_nunits > group_size))
b161f2c9
RS
640 {
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
642 "Build SLP failed: unrolling required "
643 "in basic block SLP\n");
644 /* Fatal mismatch. */
645 return false;
646 }
647
648 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 649 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
650 return true;
651}
ebfd146a 652
1f3cb663
RS
653/* STMTS is a group of GROUP_SIZE SLP statements in which some
654 statements do the same operation as the first statement and in which
655 the others do ALT_STMT_CODE. Return true if we can take one vector
656 of the first operation and one vector of the second and permute them
657 to get the required result. VECTYPE is the type of the vector that
658 would be permuted. */
659
660static bool
661vect_two_operations_perm_ok_p (vec<gimple *> stmts, unsigned int group_size,
662 tree vectype, tree_code alt_stmt_code)
663{
664 unsigned HOST_WIDE_INT count;
665 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
666 return false;
667
668 vec_perm_builder sel (count, count, 1);
669 for (unsigned int i = 0; i < count; ++i)
670 {
671 unsigned int elt = i;
672 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
673 elt += count;
674 sel.quick_push (elt);
675 }
676 vec_perm_indices indices (sel, 2, count);
677 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
678}
679
6983e6b5
RB
680/* Verify if the scalar stmts STMTS are isomorphic, require data
681 permutation or are of unsupported types of operation. Return
682 true if they are, otherwise return false and indicate in *MATCHES
683 which stmts are not isomorphic to the first one. If MATCHES[0]
684 is false then this indicates the comparison could not be
4cecd659
BC
685 carried out or the stmts will never be vectorized by SLP.
686
687 Note COND_EXPR is possibly ismorphic to another one after swapping its
688 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
689 the first stmt by swapping the two operands of comparison; set SWAP[i]
690 to 2 if stmt I is isormorphic to the first stmt by inverting the code
691 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
692 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
693
694static bool
4cecd659 695vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 696 vec<gimple *> stmts, unsigned int group_size,
5249ee4d
RS
697 poly_uint64 *max_nunits, bool *matches,
698 bool *two_operators)
ebfd146a 699{
ebfd146a 700 unsigned int i;
355fe088 701 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
702 enum tree_code first_stmt_code = ERROR_MARK;
703 enum tree_code alt_stmt_code = ERROR_MARK;
704 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 705 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 706 tree lhs;
6983e6b5 707 bool need_same_oprnds = false;
1f3cb663 708 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
709 optab optab;
710 int icode;
ef4bddc2
RS
711 machine_mode optab_op2_mode;
712 machine_mode vec_mode;
355fe088 713 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 714
ebfd146a 715 /* For every stmt in NODE find its def stmt/s. */
9771b263 716 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 717 {
1f3cb663 718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4cecd659 719 swap[i] = 0;
6983e6b5
RB
720 matches[i] = false;
721
73fbfcad 722 if (dump_enabled_p ())
ebfd146a 723 {
78c60e3d
SS
724 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
725 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
726 }
727
4b5caab7
IR
728 /* Fail to vectorize statements marked as unvectorizable. */
729 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
730 {
73fbfcad 731 if (dump_enabled_p ())
4b5caab7 732 {
78c60e3d
SS
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
734 "Build SLP failed: unvectorizable statement ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 736 }
6983e6b5
RB
737 /* Fatal mismatch. */
738 matches[0] = false;
4b5caab7
IR
739 return false;
740 }
741
ebfd146a
IR
742 lhs = gimple_get_lhs (stmt);
743 if (lhs == NULL_TREE)
744 {
73fbfcad 745 if (dump_enabled_p ())
ebfd146a 746 {
78c60e3d
SS
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
748 "Build SLP failed: not GIMPLE_ASSIGN nor "
749 "GIMPLE_CALL ");
750 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 751 }
6983e6b5
RB
752 /* Fatal mismatch. */
753 matches[0] = false;
ebfd146a
IR
754 return false;
755 }
756
1f3cb663
RS
757 tree nunits_vectype;
758 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
759 &nunits_vectype)
760 || (nunits_vectype
761 && !vect_record_max_nunits (vinfo, stmt, group_size,
762 nunits_vectype, max_nunits)))
b161f2c9 763 {
6983e6b5
RB
764 /* Fatal mismatch. */
765 matches[0] = false;
1f3cb663
RS
766 return false;
767 }
768
769 gcc_assert (vectype);
b8698a0f 770
538dd0b7 771 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
772 {
773 rhs_code = CALL_EXPR;
5249ee4d
RS
774 if ((gimple_call_internal_p (call_stmt)
775 && (!vectorizable_internal_fn_p
776 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
777 || gimple_call_tail_p (call_stmt)
778 || gimple_call_noreturn_p (call_stmt)
779 || !gimple_call_nothrow_p (call_stmt)
780 || gimple_call_chain (call_stmt))
190c2236 781 {
73fbfcad 782 if (dump_enabled_p ())
190c2236 783 {
78c60e3d
SS
784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
785 "Build SLP failed: unsupported call type ");
538dd0b7
DM
786 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
787 call_stmt, 0);
190c2236 788 }
6983e6b5
RB
789 /* Fatal mismatch. */
790 matches[0] = false;
190c2236
JJ
791 return false;
792 }
793 }
ebfd146a
IR
794 else
795 rhs_code = gimple_assign_rhs_code (stmt);
796
797 /* Check the operation. */
798 if (i == 0)
799 {
800 first_stmt_code = rhs_code;
801
b8698a0f 802 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
803 vector shift with scalar shift operand. */
804 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
805 || rhs_code == LROTATE_EXPR
806 || rhs_code == RROTATE_EXPR)
807 {
1f3cb663
RS
808 if (vectype == boolean_type_node)
809 {
810 if (dump_enabled_p ())
811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
812 "Build SLP failed: shift of a"
813 " boolean.\n");
814 /* Fatal mismatch. */
815 matches[0] = false;
816 return false;
817 }
818
ebfd146a
IR
819 vec_mode = TYPE_MODE (vectype);
820
821 /* First see if we have a vector/vector shift. */
822 optab = optab_for_tree_code (rhs_code, vectype,
823 optab_vector);
824
825 if (!optab
947131ba 826 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
827 {
828 /* No vector/vector shift, try for a vector/scalar shift. */
829 optab = optab_for_tree_code (rhs_code, vectype,
830 optab_scalar);
831
832 if (!optab)
833 {
73fbfcad 834 if (dump_enabled_p ())
78c60e3d 835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 836 "Build SLP failed: no optab.\n");
6983e6b5
RB
837 /* Fatal mismatch. */
838 matches[0] = false;
ebfd146a
IR
839 return false;
840 }
947131ba 841 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
842 if (icode == CODE_FOR_nothing)
843 {
73fbfcad 844 if (dump_enabled_p ())
78c60e3d
SS
845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
846 "Build SLP failed: "
e645e942 847 "op not supported by target.\n");
6983e6b5
RB
848 /* Fatal mismatch. */
849 matches[0] = false;
ebfd146a
IR
850 return false;
851 }
852 optab_op2_mode = insn_data[icode].operand[2].mode;
853 if (!VECTOR_MODE_P (optab_op2_mode))
854 {
855 need_same_oprnds = true;
856 first_op1 = gimple_assign_rhs2 (stmt);
857 }
858 }
859 }
36ba4aae
IR
860 else if (rhs_code == WIDEN_LSHIFT_EXPR)
861 {
862 need_same_oprnds = true;
863 first_op1 = gimple_assign_rhs2 (stmt);
864 }
ebfd146a
IR
865 }
866 else
867 {
6876e5bc
RB
868 if (first_stmt_code != rhs_code
869 && alt_stmt_code == ERROR_MARK)
870 alt_stmt_code = rhs_code;
ebfd146a
IR
871 if (first_stmt_code != rhs_code
872 && (first_stmt_code != IMAGPART_EXPR
873 || rhs_code != REALPART_EXPR)
874 && (first_stmt_code != REALPART_EXPR
69f11a13 875 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
876 /* Handle mismatches in plus/minus by computing both
877 and merging the results. */
878 && !((first_stmt_code == PLUS_EXPR
879 || first_stmt_code == MINUS_EXPR)
880 && (alt_stmt_code == PLUS_EXPR
881 || alt_stmt_code == MINUS_EXPR)
882 && rhs_code == alt_stmt_code)
0d0293ac 883 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 884 && (first_stmt_code == ARRAY_REF
38000232 885 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
886 || first_stmt_code == INDIRECT_REF
887 || first_stmt_code == COMPONENT_REF
888 || first_stmt_code == MEM_REF)))
ebfd146a 889 {
73fbfcad 890 if (dump_enabled_p ())
ebfd146a 891 {
78c60e3d
SS
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
896 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
897 "original stmt ");
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
899 first_stmt, 0);
ebfd146a 900 }
6983e6b5
RB
901 /* Mismatch. */
902 continue;
ebfd146a 903 }
b8698a0f
L
904
905 if (need_same_oprnds
ebfd146a
IR
906 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
907 {
73fbfcad 908 if (dump_enabled_p ())
ebfd146a 909 {
78c60e3d
SS
910 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
911 "Build SLP failed: different shift "
912 "arguments in ");
913 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 914 }
6983e6b5
RB
915 /* Mismatch. */
916 continue;
ebfd146a 917 }
190c2236
JJ
918
919 if (rhs_code == CALL_EXPR)
920 {
355fe088 921 gimple *first_stmt = stmts[0];
5249ee4d
RS
922 if (!compatible_calls_p (as_a <gcall *> (first_stmt),
923 as_a <gcall *> (stmt)))
190c2236 924 {
73fbfcad 925 if (dump_enabled_p ())
190c2236 926 {
78c60e3d
SS
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "Build SLP failed: different calls in ");
929 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
930 stmt, 0);
190c2236 931 }
6983e6b5
RB
932 /* Mismatch. */
933 continue;
190c2236
JJ
934 }
935 }
ebfd146a
IR
936 }
937
0d0293ac
MM
938 /* Grouped store or load. */
939 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
940 {
941 if (REFERENCE_CLASS_P (lhs))
942 {
943 /* Store. */
6983e6b5 944 ;
ebfd146a 945 }
b5aeb3bb
IR
946 else
947 {
948 /* Load. */
2c53b149 949 first_load = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
950 if (prev_first_load)
951 {
952 /* Check that there are no loads from different interleaving
6983e6b5
RB
953 chains in the same node. */
954 if (prev_first_load != first_load)
78c60e3d 955 {
73fbfcad 956 if (dump_enabled_p ())
b5aeb3bb 957 {
78c60e3d
SS
958 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
959 vect_location,
960 "Build SLP failed: different "
961 "interleaving chains in one node ");
962 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
963 stmt, 0);
b5aeb3bb 964 }
6983e6b5
RB
965 /* Mismatch. */
966 continue;
b5aeb3bb
IR
967 }
968 }
969 else
970 prev_first_load = first_load;
ebfd146a 971 }
0d0293ac 972 } /* Grouped access. */
ebfd146a
IR
973 else
974 {
975 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
976 {
0d0293ac 977 /* Not grouped load. */
73fbfcad 978 if (dump_enabled_p ())
ebfd146a 979 {
78c60e3d
SS
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
981 "Build SLP failed: not grouped load ");
982 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
983 }
984
0d0293ac 985 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
986 /* Fatal mismatch. */
987 matches[0] = false;
ebfd146a
IR
988 return false;
989 }
990
991 /* Not memory operation. */
992 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 993 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 994 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 995 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 996 && rhs_code != CALL_EXPR)
ebfd146a 997 {
73fbfcad 998 if (dump_enabled_p ())
ebfd146a 999 {
78c60e3d
SS
1000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1001 "Build SLP failed: operation");
1002 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
1003 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 1004 }
6983e6b5
RB
1005 /* Fatal mismatch. */
1006 matches[0] = false;
ebfd146a
IR
1007 return false;
1008 }
1009
4cecd659
BC
1010 if (rhs_code == COND_EXPR)
1011 {
1012 tree cond_expr = gimple_assign_rhs1 (stmt);
1013 enum tree_code cond_code = TREE_CODE (cond_expr);
1014 enum tree_code swap_code = ERROR_MARK;
1015 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
1016
1017 if (i == 0)
1018 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
1019 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1020 {
1021 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1022 swap_code = swap_tree_comparison (cond_code);
1023 invert_code = invert_tree_comparison (cond_code, honor_nans);
1024 }
1025
1026 if (first_cond_code == cond_code)
1027 ;
1028 /* Isomorphic can be achieved by swapping. */
1029 else if (first_cond_code == swap_code)
1030 swap[i] = 1;
1031 /* Isomorphic can be achieved by inverting. */
1032 else if (first_cond_code == invert_code)
1033 swap[i] = 2;
1034 else
1035 {
1036 if (dump_enabled_p ())
1037 {
1038 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1039 "Build SLP failed: different"
1040 " operation");
4cecd659 1041 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 1042 stmt, 0);
4cecd659 1043 }
6983e6b5
RB
1044 /* Mismatch. */
1045 continue;
f7e531cf 1046 }
4cecd659 1047 }
ebfd146a 1048 }
6983e6b5
RB
1049
1050 matches[i] = true;
1051 }
1052
1053 for (i = 0; i < group_size; ++i)
1054 if (!matches[i])
1055 return false;
1056
6876e5bc
RB
1057 /* If we allowed a two-operation SLP node verify the target can cope
1058 with the permute we are going to use. */
1059 if (alt_stmt_code != ERROR_MARK
1060 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1061 {
1f3cb663
RS
1062 if (vectype == boolean_type_node
1063 || !vect_two_operations_perm_ok_p (stmts, group_size,
1064 vectype, alt_stmt_code))
6876e5bc
RB
1065 {
1066 for (i = 0; i < group_size; ++i)
1067 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
1068 {
1069 matches[i] = false;
1070 if (dump_enabled_p ())
1071 {
1072 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1073 "Build SLP failed: different operation "
1074 "in stmt ");
1075 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1076 stmts[i], 0);
1077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1078 "original stmt ");
1079 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1080 first_stmt, 0);
1081 }
1082 }
1083 return false;
1084 }
1085 *two_operators = true;
1086 }
1087
6983e6b5
RB
1088 return true;
1089}
1090
26d66f28
RB
1091/* Traits for the hash_set to record failed SLP builds for a stmt set.
1092 Note we never remove apart from at destruction time so we do not
1093 need a special value for deleted that differs from empty. */
1094struct bst_traits
1095{
1096 typedef vec <gimple *> value_type;
1097 typedef vec <gimple *> compare_type;
1098 static inline hashval_t hash (value_type);
1099 static inline bool equal (value_type existing, value_type candidate);
1100 static inline bool is_empty (value_type x) { return !x.exists (); }
1101 static inline bool is_deleted (value_type x) { return !x.exists (); }
1102 static inline void mark_empty (value_type &x) { x.release (); }
1103 static inline void mark_deleted (value_type &x) { x.release (); }
1104 static inline void remove (value_type &x) { x.release (); }
1105};
1106inline hashval_t
1107bst_traits::hash (value_type x)
1108{
1109 inchash::hash h;
1110 for (unsigned i = 0; i < x.length (); ++i)
1111 h.add_int (gimple_uid (x[i]));
1112 return h.end ();
1113}
1114inline bool
1115bst_traits::equal (value_type existing, value_type candidate)
1116{
1117 if (existing.length () != candidate.length ())
1118 return false;
1119 for (unsigned i = 0; i < existing.length (); ++i)
1120 if (existing[i] != candidate[i])
1121 return false;
1122 return true;
1123}
1124
f7300fff
RB
1125typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1126static scalar_stmts_set_t *bst_fail;
26d66f28 1127
68435eb2
RB
1128typedef hash_map <vec <gimple *>, slp_tree,
1129 simple_hashmap_traits <bst_traits, slp_tree> >
1130 scalar_stmts_to_slp_tree_map_t;
1131
26d66f28
RB
1132static slp_tree
1133vect_build_slp_tree_2 (vec_info *vinfo,
1134 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1135 poly_uint64 *max_nunits,
26d66f28
RB
1136 vec<slp_tree> *loads,
1137 bool *matches, unsigned *npermutes, unsigned *tree_size,
1138 unsigned max_tree_size);
6983e6b5 1139
e403d17e 1140static slp_tree
310213d4 1141vect_build_slp_tree (vec_info *vinfo,
4b6068ea
RS
1142 vec<gimple *> stmts, unsigned int group_size,
1143 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c
RB
1144 bool *matches, unsigned *npermutes, unsigned *tree_size,
1145 unsigned max_tree_size)
26d66f28
RB
1146{
1147 if (bst_fail->contains (stmts))
1148 return NULL;
1149 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1150 loads, matches, npermutes, tree_size,
1151 max_tree_size);
1152 /* When SLP build fails for stmts record this, otherwise SLP build
1153 can be exponential in time when we allow to construct parts from
1154 scalars, see PR81723. */
1155 if (! res)
1156 {
1157 vec <gimple *> x;
1158 x.create (stmts.length ());
1159 x.splice (stmts);
1160 bst_fail->add (x);
1161 }
1162 return res;
1163}
1164
1165/* Recursively build an SLP tree starting from NODE.
1166 Fail (and return a value not equal to zero) if def-stmts are not
1167 isomorphic, require data permutation or are of unsupported types of
1168 operation. Otherwise, return 0.
1169 The value returned is the depth in the SLP tree where a mismatch
1170 was found. */
1171
1172static slp_tree
1173vect_build_slp_tree_2 (vec_info *vinfo,
1174 vec<gimple *> stmts, unsigned int group_size,
4b6068ea 1175 poly_uint64 *max_nunits,
26d66f28
RB
1176 vec<slp_tree> *loads,
1177 bool *matches, unsigned *npermutes, unsigned *tree_size,
1178 unsigned max_tree_size)
6983e6b5 1179{
4b6068ea
RS
1180 unsigned nops, i, this_tree_size = 0;
1181 poly_uint64 this_max_nunits = *max_nunits;
355fe088 1182 gimple *stmt;
e403d17e 1183 slp_tree node;
6983e6b5 1184
6983e6b5
RB
1185 matches[0] = false;
1186
e403d17e 1187 stmt = stmts[0];
6983e6b5
RB
1188 if (is_gimple_call (stmt))
1189 nops = gimple_call_num_args (stmt);
1190 else if (is_gimple_assign (stmt))
1191 {
1192 nops = gimple_num_ops (stmt) - 1;
1193 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1194 nops++;
ebfd146a 1195 }
e7baeb39
RB
1196 else if (gimple_code (stmt) == GIMPLE_PHI)
1197 nops = 0;
6983e6b5 1198 else
e403d17e 1199 return NULL;
6983e6b5 1200
c78e3652
RB
1201 /* If the SLP node is a PHI (induction or reduction), terminate
1202 the recursion. */
e7baeb39
RB
1203 if (gimple_code (stmt) == GIMPLE_PHI)
1204 {
b161f2c9
RS
1205 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1206 tree vectype = get_vectype_for_scalar_type (scalar_type);
1207 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype,
1208 max_nunits))
1209 return NULL;
1210
719488f8 1211 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 1212 /* Induction from different IVs is not supported. */
719488f8
RB
1213 if (def_type == vect_induction_def)
1214 {
1215 FOR_EACH_VEC_ELT (stmts, i, stmt)
1216 if (stmt != stmts[0])
1217 return NULL;
1218 }
1219 else
1220 {
1221 /* Else def types have to match. */
1222 FOR_EACH_VEC_ELT (stmts, i, stmt)
1223 {
1224 /* But for reduction chains only check on the first stmt. */
2c53b149
RB
1225 if (REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1226 && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
719488f8
RB
1227 continue;
1228 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1229 return NULL;
1230 }
1231 }
e7baeb39
RB
1232 node = vect_create_new_slp_node (stmts);
1233 return node;
1234 }
1235
1236
6876e5bc 1237 bool two_operators = false;
4cecd659 1238 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
5249ee4d 1239 if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size,
e403d17e
RB
1240 &this_max_nunits, matches, &two_operators))
1241 return NULL;
ebfd146a 1242
6983e6b5
RB
1243 /* If the SLP node is a load, terminate the recursion. */
1244 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1245 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 1246 {
e403d17e
RB
1247 *max_nunits = this_max_nunits;
1248 node = vect_create_new_slp_node (stmts);
1249 loads->safe_push (node);
1250 return node;
ebfd146a
IR
1251 }
1252
6983e6b5
RB
1253 /* Get at the operands, verifying they are compatible. */
1254 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1255 slp_oprnd_info oprnd_info;
e403d17e 1256 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 1257 {
4cecd659 1258 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1259 stmts, i, &oprnds_info);
4cecd659
BC
1260 if (res != 0)
1261 matches[(res == -1) ? 0 : i] = false;
1262 if (!matches[0])
1263 break;
6983e6b5 1264 }
b0b4483e
RB
1265 for (i = 0; i < group_size; ++i)
1266 if (!matches[i])
1267 {
1268 vect_free_oprnd_info (oprnds_info);
e403d17e 1269 return NULL;
b0b4483e 1270 }
6983e6b5 1271
e403d17e
RB
1272 auto_vec<slp_tree, 4> children;
1273 auto_vec<slp_tree> this_loads;
1274
1275 stmt = stmts[0];
6983e6b5 1276
26d66f28
RB
1277 if (tree_size)
1278 max_tree_size -= *tree_size;
1279
b8698a0f 1280 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1281 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1282 {
d092494c 1283 slp_tree child;
e403d17e
RB
1284 unsigned old_nloads = this_loads.length ();
1285 unsigned old_tree_size = this_tree_size;
1286 unsigned int j;
b8698a0f 1287
e7baeb39 1288 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1289 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1290 && oprnd_info->first_dt != vect_induction_def)
d092494c 1291 continue;
ebfd146a 1292
1428105c
RB
1293 if (++this_tree_size > max_tree_size)
1294 {
e403d17e 1295 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1296 vect_free_slp_tree (child, false);
1428105c 1297 vect_free_oprnd_info (oprnds_info);
e403d17e 1298 return NULL;
1428105c
RB
1299 }
1300
e403d17e
RB
1301 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1302 group_size, &this_max_nunits,
1303 &this_loads, matches, npermutes,
1304 &this_tree_size,
1305 max_tree_size)) != NULL)
6983e6b5 1306 {
3fc356dc
RB
1307 /* If we have all children of child built up from scalars then just
1308 throw that away and build it up this node from scalars. */
995b6fe0
RB
1309 if (!SLP_TREE_CHILDREN (child).is_empty ()
1310 /* ??? Rejecting patterns this way doesn't work. We'd have to
1311 do extra work to cancel the pattern so the uses see the
1312 scalar version. */
1313 && !is_pattern_stmt_p
1314 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1315 {
3fc356dc
RB
1316 slp_tree grandchild;
1317
1318 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1319 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1320 break;
1321 if (!grandchild)
1322 {
1323 /* Roll back. */
e403d17e
RB
1324 this_loads.truncate (old_nloads);
1325 this_tree_size = old_tree_size;
3fc356dc 1326 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1327 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1328 SLP_TREE_CHILDREN (child).truncate (0);
1329
1330 dump_printf_loc (MSG_NOTE, vect_location,
1331 "Building parent vector operands from "
1332 "scalars instead\n");
1333 oprnd_info->def_stmts = vNULL;
603cca93 1334 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1335 children.safe_push (child);
3fc356dc
RB
1336 continue;
1337 }
1338 }
1339
6983e6b5 1340 oprnd_info->def_stmts = vNULL;
e403d17e 1341 children.safe_push (child);
6983e6b5
RB
1342 continue;
1343 }
1344
90dd6e3d
RB
1345 /* If the SLP build failed fatally and we analyze a basic-block
1346 simply treat nodes we fail to build as externally defined
1347 (and thus build vectors from the scalar defs).
1348 The cost model will reject outright expensive cases.
1349 ??? This doesn't treat cases where permutation ultimatively
1350 fails (or we don't try permutation below). Ideally we'd
1351 even compute a permutation that will end up with the maximum
1352 SLP tree size... */
310213d4 1353 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1354 && !matches[0]
1355 /* ??? Rejecting patterns this way doesn't work. We'd have to
1356 do extra work to cancel the pattern so the uses see the
1357 scalar version. */
1358 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1359 {
1360 dump_printf_loc (MSG_NOTE, vect_location,
1361 "Building vector operands from scalars\n");
e403d17e 1362 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1363 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1364 children.safe_push (child);
1365 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1366 continue;
1367 }
1368
6983e6b5
RB
1369 /* If the SLP build for operand zero failed and operand zero
1370 and one can be commutated try that for the scalar stmts
1371 that failed the match. */
1372 if (i == 0
1373 /* A first scalar stmt mismatch signals a fatal mismatch. */
1374 && matches[0]
1375 /* ??? For COND_EXPRs we can swap the comparison operands
1376 as well as the arms under some constraints. */
1377 && nops == 2
1378 && oprnds_info[1]->first_dt == vect_internal_def
1379 && is_gimple_assign (stmt)
6983e6b5
RB
1380 /* Do so only if the number of not successful permutes was nor more
1381 than a cut-ff as re-trying the recursive match on
1382 possibly each level of the tree would expose exponential
1383 behavior. */
1384 && *npermutes < 4)
1385 {
85c5e2f5
RB
1386 /* See whether we can swap the matching or the non-matching
1387 stmt operands. */
1388 bool swap_not_matching = true;
1389 do
1390 {
1391 for (j = 0; j < group_size; ++j)
1392 {
1393 if (matches[j] != !swap_not_matching)
1394 continue;
1395 gimple *stmt = stmts[j];
1396 /* Verify if we can swap operands of this stmt. */
1397 if (!is_gimple_assign (stmt)
1398 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1399 {
1400 if (!swap_not_matching)
1401 goto fail;
1402 swap_not_matching = false;
1403 break;
1404 }
1405 /* Verify if we can safely swap or if we committed to a
1406 specific operand order already.
1407 ??? Instead of modifying GIMPLE stmts here we could
1408 record whether we want to swap operands in the SLP
1409 node and temporarily do that when processing it
1410 (or wrap operand accessors in a helper). */
1411 else if (swap[j] != 0
1412 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)))
1413 {
1414 if (!swap_not_matching)
1415 {
1416 if (dump_enabled_p ())
1417 {
1418 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1419 vect_location,
1420 "Build SLP failed: cannot swap "
1421 "operands of shared stmt ");
1422 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1423 TDF_SLIM, stmts[j], 0);
1424 }
1425 goto fail;
1426 }
1427 swap_not_matching = false;
1428 break;
1429 }
1430 }
1431 }
1432 while (j != group_size);
78810bd3 1433
6983e6b5 1434 /* Swap mismatched definition stmts. */
b0b4483e
RB
1435 dump_printf_loc (MSG_NOTE, vect_location,
1436 "Re-trying with swapped operands of stmts ");
e72baed7 1437 for (j = 0; j < group_size; ++j)
85c5e2f5 1438 if (matches[j] == !swap_not_matching)
6983e6b5 1439 {
6b4db501
MM
1440 std::swap (oprnds_info[0]->def_stmts[j],
1441 oprnds_info[1]->def_stmts[j]);
b0b4483e 1442 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1443 }
b0b4483e 1444 dump_printf (MSG_NOTE, "\n");
74574669
RB
1445 /* And try again with scratch 'matches' ... */
1446 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1447 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1448 group_size, &this_max_nunits,
1449 &this_loads, tem, npermutes,
1450 &this_tree_size,
1451 max_tree_size)) != NULL)
6983e6b5 1452 {
60f2b864
RB
1453 /* ... so if successful we can apply the operand swapping
1454 to the GIMPLE IL. This is necessary because for example
1455 vect_get_slp_defs uses operand indexes and thus expects
1456 canonical operand order. This is also necessary even
1457 if we end up building the operand from scalars as
1458 we'll continue to process swapped operand two. */
1459 for (j = 0; j < group_size; ++j)
f47cda24 1460 {
e403d17e 1461 gimple *stmt = stmts[j];
f47cda24
RB
1462 gimple_set_plf (stmt, GF_PLF_1, false);
1463 }
1464 for (j = 0; j < group_size; ++j)
1465 {
e403d17e 1466 gimple *stmt = stmts[j];
85c5e2f5 1467 if (matches[j] == !swap_not_matching)
f47cda24
RB
1468 {
1469 /* Avoid swapping operands twice. */
1470 if (gimple_plf (stmt, GF_PLF_1))
1471 continue;
1472 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1473 gimple_assign_rhs2_ptr (stmt));
1474 gimple_set_plf (stmt, GF_PLF_1, true);
1475 }
1476 }
1477 /* Verify we swap all duplicates or none. */
1478 if (flag_checking)
1479 for (j = 0; j < group_size; ++j)
60f2b864 1480 {
e403d17e 1481 gimple *stmt = stmts[j];
85c5e2f5
RB
1482 gcc_assert (gimple_plf (stmt, GF_PLF_1)
1483 == (matches[j] == !swap_not_matching));
60f2b864
RB
1484 }
1485
85c69b0b
RB
1486 /* If we have all children of child built up from scalars then
1487 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1488 if (!SLP_TREE_CHILDREN (child).is_empty ()
1489 /* ??? Rejecting patterns this way doesn't work. We'd have
1490 to do extra work to cancel the pattern so the uses see the
1491 scalar version. */
1492 && !is_pattern_stmt_p
1493 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1494 {
1495 unsigned int j;
1496 slp_tree grandchild;
1497
1498 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1499 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1500 break;
1501 if (!grandchild)
1502 {
1503 /* Roll back. */
e403d17e
RB
1504 this_loads.truncate (old_nloads);
1505 this_tree_size = old_tree_size;
85c69b0b 1506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1507 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1508 SLP_TREE_CHILDREN (child).truncate (0);
1509
1510 dump_printf_loc (MSG_NOTE, vect_location,
1511 "Building parent vector operands from "
1512 "scalars instead\n");
1513 oprnd_info->def_stmts = vNULL;
603cca93 1514 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1515 children.safe_push (child);
85c69b0b
RB
1516 continue;
1517 }
1518 }
1519
6983e6b5 1520 oprnd_info->def_stmts = vNULL;
e403d17e 1521 children.safe_push (child);
6983e6b5
RB
1522 continue;
1523 }
1524
1525 ++*npermutes;
1526 }
1527
78810bd3 1528fail:
e403d17e
RB
1529 gcc_assert (child == NULL);
1530 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1531 vect_free_slp_tree (child, false);
6983e6b5 1532 vect_free_oprnd_info (oprnds_info);
e403d17e 1533 return NULL;
ebfd146a
IR
1534 }
1535
e403d17e
RB
1536 vect_free_oprnd_info (oprnds_info);
1537
1428105c
RB
1538 if (tree_size)
1539 *tree_size += this_tree_size;
e403d17e
RB
1540 *max_nunits = this_max_nunits;
1541 loads->safe_splice (this_loads);
1428105c 1542
e403d17e
RB
1543 node = vect_create_new_slp_node (stmts);
1544 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1545 SLP_TREE_CHILDREN (node).splice (children);
1546 return node;
ebfd146a
IR
1547}
1548
78c60e3d 1549/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1550
1551static void
4f5b9c80
DM
1552vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1553 slp_tree node)
ebfd146a
IR
1554{
1555 int i;
355fe088 1556 gimple *stmt;
d755c7ef 1557 slp_tree child;
ebfd146a 1558
603cca93
RB
1559 dump_printf_loc (dump_kind, loc, "node%s\n",
1560 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1561 ? " (external)" : "");
9771b263 1562 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1563 {
c2a12ca0 1564 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1565 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1566 }
9771b263 1567 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1568 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1569}
1570
1571
b8698a0f
L
1572/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1573 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1574 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1575 stmts in NODE are to be marked. */
1576
1577static void
1578vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1579{
1580 int i;
355fe088 1581 gimple *stmt;
d755c7ef 1582 slp_tree child;
ebfd146a 1583
603cca93 1584 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1585 return;
1586
9771b263 1587 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1588 if (j < 0 || i == j)
1589 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1590
9771b263 1591 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1592 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1593}
1594
1595
a70d6342
IR
1596/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1597
1598static void
1599vect_mark_slp_stmts_relevant (slp_tree node)
1600{
1601 int i;
355fe088 1602 gimple *stmt;
a70d6342 1603 stmt_vec_info stmt_info;
d755c7ef 1604 slp_tree child;
a70d6342 1605
603cca93 1606 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1607 return;
1608
9771b263 1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1610 {
1611 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1612 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1613 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1614 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1615 }
1616
9771b263 1617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1618 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1619}
1620
1621
b5aeb3bb
IR
1622/* Rearrange the statements of NODE according to PERMUTATION. */
1623
1624static void
1625vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1626 vec<unsigned> permutation)
b5aeb3bb 1627{
355fe088
TS
1628 gimple *stmt;
1629 vec<gimple *> tmp_stmts;
d755c7ef
RB
1630 unsigned int i;
1631 slp_tree child;
b5aeb3bb 1632
9771b263 1633 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1634 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1635
9771b263
DN
1636 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1637 tmp_stmts.create (group_size);
d755c7ef 1638 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1639
9771b263 1640 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1641 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1642
9771b263 1643 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1644 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1645}
1646
1647
b266b968
RB
1648/* Attempt to reorder stmts in a reduction chain so that we don't
1649 require any load permutation. Return true if that was possible,
1650 otherwise return false. */
1651
1652static bool
1653vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1654{
1655 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1656 unsigned int i, j;
b266b968
RB
1657 unsigned int lidx;
1658 slp_tree node, load;
1659
1660 /* Compare all the permutation sequences to the first one. We know
1661 that at least one load is permuted. */
1662 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1663 if (!node->load_permutation.exists ())
1664 return false;
1665 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1666 {
1667 if (!load->load_permutation.exists ())
1668 return false;
1669 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1670 if (lidx != node->load_permutation[j])
1671 return false;
1672 }
1673
1674 /* Check that the loads in the first sequence are different and there
1675 are no gaps between them. */
7ba9e72d 1676 auto_sbitmap load_index (group_size);
b266b968
RB
1677 bitmap_clear (load_index);
1678 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1679 {
41eefe13 1680 if (lidx >= group_size)
7ba9e72d 1681 return false;
b266b968 1682 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1683 return false;
1684
b266b968
RB
1685 bitmap_set_bit (load_index, lidx);
1686 }
1687 for (i = 0; i < group_size; i++)
1688 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1689 return false;
b266b968
RB
1690
1691 /* This permutation is valid for reduction. Since the order of the
1692 statements in the nodes is not important unless they are memory
1693 accesses, we can rearrange the statements in all the nodes
1694 according to the order of the loads. */
1695 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1696 node->load_permutation);
1697
1698 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1699 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1700 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1701 {
1702 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2c53b149 1703 first_stmt = DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
c4e360f4
RB
1704 /* But we have to keep those permutations that are required because
1705 of handling of gaps. */
d9f21f6a 1706 if (known_eq (unrolling_factor, 1U)
2c53b149
RB
1707 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
1708 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
c4e360f4 1709 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1710 else
1711 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1712 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1713 }
1714
b266b968
RB
1715 return true;
1716}
1717
01d8bf07
RB
1718/* Check if the required load permutations in the SLP instance
1719 SLP_INSTN are supported. */
ebfd146a
IR
1720
1721static bool
01d8bf07 1722vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1723{
01d8bf07
RB
1724 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1725 unsigned int i, j, k, next;
6983e6b5 1726 slp_tree node;
a5b50aa1 1727 gimple *stmt, *load, *next_load;
ebfd146a 1728
73fbfcad 1729 if (dump_enabled_p ())
ebfd146a 1730 {
78c60e3d 1731 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1732 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1733 if (node->load_permutation.exists ())
1734 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1735 dump_printf (MSG_NOTE, "%d ", next);
1736 else
bddc974e
TJ
1737 for (k = 0; k < group_size; ++k)
1738 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1739 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1740 }
1741
b5aeb3bb
IR
1742 /* In case of reduction every load permutation is allowed, since the order
1743 of the reduction statements is not important (as opposed to the case of
0d0293ac 1744 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1745 load nodes are of the same size and have the same permutation (and then
1746 rearrange all the nodes of the SLP instance according to this
1747 permutation). */
1748
1749 /* Check that all the load nodes are of the same size. */
01d8bf07 1750 /* ??? Can't we assert this? */
9771b263 1751 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1752 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1753 return false;
2200fc49 1754
b5aeb3bb 1755 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1756 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1757
b010117a 1758 /* Reduction (there are no data-refs in the root).
b266b968 1759 In reduction chain the order of the loads is not important. */
b010117a 1760 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
2c53b149 1761 && !REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1762 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1763
6aa904c4
IR
1764 /* In basic block vectorization we allow any subchain of an interleaving
1765 chain.
1766 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1767 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1768 {
240a94da
RB
1769 /* Check whether the loads in an instance form a subchain and thus
1770 no permutation is necessary. */
9771b263 1771 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1772 {
9626d143
RB
1773 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1774 continue;
240a94da 1775 bool subchain_p = true;
6aa904c4 1776 next_load = NULL;
9771b263 1777 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1778 {
5b5826c4
RB
1779 if (j != 0
1780 && (next_load != load
2c53b149 1781 || DR_GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1782 {
1783 subchain_p = false;
1784 break;
1785 }
2c53b149 1786 next_load = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
6aa904c4 1787 }
240a94da
RB
1788 if (subchain_p)
1789 SLP_TREE_LOAD_PERMUTATION (node).release ();
1790 else
1791 {
fe73a332
RB
1792 stmt_vec_info group_info
1793 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
2c53b149 1794 group_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (group_info));
928686b1 1795 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1796 unsigned k, maxk = 0;
1797 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1798 if (k > maxk)
1799 maxk = k;
1800 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1801 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1802 tree vectype = STMT_VINFO_VECTYPE (group_info);
1803 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1804 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1805 {
1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1807 "BB vectorization with gaps at the end of "
1808 "a load is not supported\n");
1809 return false;
1810 }
1811
240a94da
RB
1812 /* Verify the permutation can be generated. */
1813 vec<tree> tem;
29afecdf 1814 unsigned n_perms;
240a94da 1815 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1816 1, slp_instn, true, &n_perms))
240a94da
RB
1817 {
1818 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1819 vect_location,
1820 "unsupported load permutation\n");
1821 return false;
1822 }
1823 }
6aa904c4 1824 }
01d8bf07 1825 return true;
6aa904c4
IR
1826 }
1827
31bee964
RB
1828 /* For loop vectorization verify we can generate the permutation. Be
1829 conservative about the vectorization factor, there are permutations
1830 that will use three vector inputs only starting from a specific factor
1831 and the vectorization factor is not yet final.
1832 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1833 unsigned n_perms;
d9f21f6a
RS
1834 poly_uint64 test_vf
1835 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1836 LOOP_VINFO_VECT_FACTOR
d9f21f6a 1837 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt))));
01d8bf07
RB
1838 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1839 if (node->load_permutation.exists ()
31bee964
RB
1840 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1841 slp_instn, true, &n_perms))
01d8bf07 1842 return false;
9b999e8c 1843
01d8bf07 1844 return true;
ebfd146a
IR
1845}
1846
1847
e4a707c4 1848/* Find the last store in SLP INSTANCE. */
ff802fa1 1849
64900538 1850gimple *
2e8ab70c 1851vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1852{
355fe088 1853 gimple *last = NULL, *stmt;
e4a707c4 1854
2e8ab70c
RB
1855 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1856 {
1857 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1858 if (is_pattern_stmt_p (stmt_vinfo))
1859 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1860 else
1861 last = get_later_stmt (stmt, last);
1862 }
e4a707c4 1863
2e8ab70c 1864 return last;
e4a707c4
IR
1865}
1866
1ba91a49
AL
1867/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1868 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1869 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1870 containing the remainder.
1871 Return the first stmt in the second group. */
1872
1873static gimple *
1874vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1875{
1876 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
2c53b149 1877 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1ba91a49 1878 gcc_assert (group1_size > 0);
2c53b149 1879 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1880 gcc_assert (group2_size > 0);
2c53b149 1881 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49
AL
1882
1883 gimple *stmt = first_stmt;
1884 for (unsigned i = group1_size; i > 1; i--)
1885 {
2c53b149
RB
1886 stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1887 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1888 }
1889 /* STMT is now the last element of the first group. */
2c53b149
RB
1890 gimple *group2 = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1891 DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1ba91a49 1892
2c53b149
RB
1893 DR_GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1894 for (stmt = group2; stmt; stmt = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1ba91a49 1895 {
2c53b149
RB
1896 DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1897 gcc_assert (DR_GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1ba91a49
AL
1898 }
1899
2c53b149 1900 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1901 plus skipping over the first vector. */
2c53b149
RB
1902 DR_GROUP_GAP (vinfo_for_stmt (group2))
1903 = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1904
2c53b149
RB
1905 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1906 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1907
1908 if (dump_enabled_p ())
1909 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1910 group1_size, group2_size);
1911
1912 return group2;
1913}
1914
4b6068ea
RS
1915/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1916 statements and a vector of NUNITS elements. */
1917
1918static poly_uint64
1919calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1920{
1921 return exact_div (common_multiple (nunits, group_size), group_size);
1922}
1923
0d0293ac 1924/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1925 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1926 Return FALSE if it's impossible to SLP any stmt in the loop. */
1927
1928static bool
310213d4 1929vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1930 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1931{
1932 slp_instance new_instance;
d092494c 1933 slp_tree node;
2c53b149 1934 unsigned int group_size;
b5aeb3bb 1935 tree vectype, scalar_type = NULL_TREE;
355fe088 1936 gimple *next;
1ba91a49 1937 unsigned int i;
9771b263 1938 vec<slp_tree> loads;
b5aeb3bb 1939 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1940 vec<gimple *> scalar_stmts;
b5aeb3bb 1941
2c53b149 1942 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
b5aeb3bb 1943 {
2c53b149
RB
1944 scalar_type = TREE_TYPE (DR_REF (dr));
1945 vectype = get_vectype_for_scalar_type (scalar_type);
1946 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1947 }
1948 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1949 {
1950 gcc_assert (is_a <loop_vec_info> (vinfo));
1951 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1952 group_size = REDUC_GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1953 }
1954 else
1955 {
310213d4 1956 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1957 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1958 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1959 }
b8698a0f 1960
ebfd146a
IR
1961 if (!vectype)
1962 {
73fbfcad 1963 if (dump_enabled_p ())
ebfd146a 1964 {
78c60e3d
SS
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "Build SLP failed: unsupported data-type ");
1967 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1969 }
b5aeb3bb 1970
ebfd146a
IR
1971 return false;
1972 }
4b6068ea 1973 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1974
0d0293ac 1975 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1976 scalar_stmts.create (group_size);
ebfd146a 1977 next = stmt;
2c53b149 1978 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a 1979 {
b5aeb3bb
IR
1980 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1981 while (next)
1982 {
f7e531cf
IR
1983 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1984 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1985 scalar_stmts.safe_push (
1986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1987 else
9771b263 1988 scalar_stmts.safe_push (next);
2c53b149
RB
1989 next = DR_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
1990 }
1991 }
1992 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
1993 {
1994 /* Collect the reduction stmts and store them in
1995 SLP_TREE_SCALAR_STMTS. */
1996 while (next)
1997 {
1998 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1999 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
2000 scalar_stmts.safe_push (
2001 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
2002 else
2003 scalar_stmts.safe_push (next);
2004 next = REDUC_GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 2005 }
14a61437
RB
2006 /* Mark the first element of the reduction chain as reduction to properly
2007 transform the node. In the reduction analysis phase only the last
2008 element of the chain is marked as reduction. */
2c53b149 2009 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
2010 }
2011 else
2012 {
2013 /* Collect reduction statements. */
310213d4 2014 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
2015 for (i = 0; reductions.iterate (i, &next); i++)
2016 scalar_stmts.safe_push (next);
ebfd146a
IR
2017 }
2018
9771b263 2019 loads.create (group_size);
ebfd146a
IR
2020
2021 /* Build the tree for the SLP instance. */
89d390e5
RB
2022 bool *matches = XALLOCAVEC (bool, group_size);
2023 unsigned npermutes = 0;
f7300fff 2024 bst_fail = new scalar_stmts_set_t ();
4b6068ea 2025 poly_uint64 max_nunits = nunits;
e569db5f 2026 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 2027 &max_nunits, &loads, matches, &npermutes,
e569db5f 2028 NULL, max_tree_size);
26d66f28 2029 delete bst_fail;
e569db5f 2030 if (node != NULL)
ebfd146a 2031 {
4ef69dfc 2032 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 2033 poly_uint64 unrolling_factor
4b6068ea 2034 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 2035
d9f21f6a 2036 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
2037 && is_a <bb_vec_info> (vinfo))
2038 {
4b6068ea
RS
2039 unsigned HOST_WIDE_INT const_max_nunits;
2040 if (!max_nunits.is_constant (&const_max_nunits)
2041 || const_max_nunits > group_size)
2042 {
2043 if (dump_enabled_p ())
2044 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2045 "Build SLP failed: store group "
2046 "size not a multiple of the vector size "
2047 "in basic block SLP\n");
6e2dd807 2048 vect_free_slp_tree (node, false);
4b6068ea
RS
2049 loads.release ();
2050 return false;
2051 }
e569db5f 2052 /* Fatal mismatch. */
4b6068ea 2053 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 2054 vect_free_slp_tree (node, false);
e569db5f
VK
2055 loads.release ();
2056 }
2057 else
2058 {
4ef69dfc
IR
2059 /* Create a new SLP instance. */
2060 new_instance = XNEW (struct _slp_instance);
2061 SLP_INSTANCE_TREE (new_instance) = node;
2062 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2063 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2064 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2065
2066 /* Compute the load permutation. */
2067 slp_tree load_node;
2068 bool loads_permuted = false;
abf9bfbc
RB
2069 FOR_EACH_VEC_ELT (loads, i, load_node)
2070 {
01d8bf07 2071 vec<unsigned> load_permutation;
abf9bfbc 2072 int j;
355fe088 2073 gimple *load, *first_stmt;
01d8bf07
RB
2074 bool this_load_permuted = false;
2075 load_permutation.create (group_size);
2c53b149 2076 first_stmt = DR_GROUP_FIRST_ELEMENT
6983e6b5 2077 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
2078 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2079 {
e569db5f
VK
2080 int load_place = vect_get_place_in_interleaving_chain
2081 (load, first_stmt);
6983e6b5
RB
2082 gcc_assert (load_place != -1);
2083 if (load_place != j)
01d8bf07 2084 this_load_permuted = true;
abf9bfbc
RB
2085 load_permutation.safe_push (load_place);
2086 }
fe2bef71
RB
2087 if (!this_load_permuted
2088 /* The load requires permutation when unrolling exposes
2089 a gap either because the group is larger than the SLP
2090 group-size or because there is a gap between the groups. */
d9f21f6a 2091 && (known_eq (unrolling_factor, 1U)
2c53b149
RB
2092 || (group_size == DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
2093 && DR_GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2094 {
2095 load_permutation.release ();
2096 continue;
2097 }
2098 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2099 loads_permuted = true;
abf9bfbc 2100 }
6aa904c4
IR
2101
2102 if (loads_permuted)
ebfd146a 2103 {
01d8bf07 2104 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2105 {
73fbfcad 2106 if (dump_enabled_p ())
ebfd146a 2107 {
e645e942 2108 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2109 "Build SLP failed: unsupported load "
2110 "permutation ");
e569db5f
VK
2111 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2112 TDF_SLIM, stmt, 0);
ebfd146a 2113 }
6e2dd807 2114 vect_free_slp_instance (new_instance, false);
ebfd146a
IR
2115 return false;
2116 }
ebfd146a 2117 }
ebfd146a 2118
e569db5f 2119 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2120 instructions do not generate this SLP instance. */
2121 if (is_a <loop_vec_info> (vinfo)
2122 && loads_permuted
7e11fc7f 2123 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7
AL
2124 {
2125 slp_tree load_node;
2126 FOR_EACH_VEC_ELT (loads, i, load_node)
2127 {
2c53b149 2128 gimple *first_stmt = DR_GROUP_FIRST_ELEMENT
bb0f5ca7
AL
2129 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2130 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2131 /* Use SLP for strided accesses (or if we
2132 can't load-lanes). */
bb0f5ca7
AL
2133 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2134 || ! vect_load_lanes_supported
2135 (STMT_VINFO_VECTYPE (stmt_vinfo),
2c53b149 2136 DR_GROUP_SIZE (stmt_vinfo), false))
bb0f5ca7
AL
2137 break;
2138 }
2139 if (i == loads.length ())
2140 {
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2143 "Built SLP cancelled: can use "
2144 "load/store-lanes\n");
6e2dd807 2145 vect_free_slp_instance (new_instance, false);
bb0f5ca7
AL
2146 return false;
2147 }
2148 }
2149
310213d4 2150 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2151
73fbfcad 2152 if (dump_enabled_p ())
c2a12ca0
RB
2153 {
2154 dump_printf_loc (MSG_NOTE, vect_location,
2155 "Final SLP tree for instance:\n");
2156 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2157 }
ebfd146a
IR
2158
2159 return true;
2160 }
e569db5f
VK
2161 }
2162 else
2163 {
ebfd146a
IR
2164 /* Failed to SLP. */
2165 /* Free the allocated memory. */
e403d17e 2166 scalar_stmts.release ();
9771b263 2167 loads.release ();
e569db5f 2168 }
b8698a0f 2169
1ba91a49 2170 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2171 vector size. */
4b6068ea 2172 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2173 if (is_a <bb_vec_info> (vinfo)
4b6068ea 2174 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
2c53b149 2175 && DR_GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
4b6068ea 2176 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2177 {
2178 /* We consider breaking the group only on VF boundaries from the existing
2179 start. */
2180 for (i = 0; i < group_size; i++)
2181 if (!matches[i]) break;
2182
4b6068ea 2183 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2184 {
2185 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2186 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2187 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49
AL
2188
2189 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2190 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2191 /* If the first non-match was in the middle of a vector,
2192 skip the rest of that vector. */
2193 if (group1_size < i)
2194 {
4b6068ea 2195 i = group1_size + const_nunits;
1ba91a49 2196 if (i < group_size)
4b6068ea 2197 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2198 }
2199 if (i < group_size)
2200 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2201 return res;
2202 }
2203 /* Even though the first vector did not all match, we might be able to SLP
2204 (some) of the remainder. FORNOW ignore this possibility. */
2205 }
2206
a70d6342 2207 return false;
ebfd146a
IR
2208}
2209
2210
ff802fa1 2211/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2212 trees of packed scalar stmts if SLP is possible. */
2213
2214bool
310213d4 2215vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2216{
2217 unsigned int i;
355fe088 2218 gimple *first_element;
ebfd146a 2219
adac3a68 2220 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2221
0d0293ac 2222 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2223 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2224 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2225
310213d4 2226 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2227 {
310213d4
RB
2228 if (loop_vinfo->reduction_chains.length () > 0)
2229 {
2230 /* Find SLP sequences starting from reduction chains. */
2231 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2232 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2233 max_tree_size))
6b5e165b
RB
2234 {
2235 /* Dissolve reduction chain group. */
2236 gimple *next, *stmt = first_element;
2237 while (stmt)
2238 {
2239 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2c53b149
RB
2240 next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2241 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2242 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
6b5e165b
RB
2243 stmt = next;
2244 }
2245 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2246 = vect_internal_def;
2247 }
310213d4 2248 }
b010117a 2249
310213d4 2250 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2251 if (loop_vinfo->reductions.length () > 1)
2252 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2253 max_tree_size);
310213d4 2254 }
b5aeb3bb 2255
ebfd146a
IR
2256 return true;
2257}
2258
2259
2260/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2261 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2262 least one instance. */
ebfd146a 2263
437f4a00 2264bool
ebfd146a
IR
2265vect_make_slp_decision (loop_vec_info loop_vinfo)
2266{
d9f21f6a
RS
2267 unsigned int i;
2268 poly_uint64 unrolling_factor = 1;
9771b263 2269 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2270 slp_instance instance;
2271 int decided_to_slp = 0;
2272
adac3a68 2273 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2274
9771b263 2275 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2276 {
2277 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2278 /* All unroll factors have the form current_vector_size * X for some
2279 rational X, so they must have a common multiple. */
2280 unrolling_factor
2281 = force_common_multiple (unrolling_factor,
2282 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2283
ff802fa1 2284 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2285 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2286 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2287 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2288 decided_to_slp++;
2289 }
2290
2291 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2292
73fbfcad 2293 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2294 {
2295 dump_printf_loc (MSG_NOTE, vect_location,
2296 "Decided to SLP %d instances. Unrolling factor ",
2297 decided_to_slp);
2298 dump_dec (MSG_NOTE, unrolling_factor);
2299 dump_printf (MSG_NOTE, "\n");
2300 }
437f4a00
IR
2301
2302 return (decided_to_slp > 0);
ebfd146a
IR
2303}
2304
2305
2306/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2307 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2308
2309static void
642fce57 2310vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2311{
355fe088 2312 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2313 imm_use_iterator imm_iter;
355fe088 2314 gimple *use_stmt;
642fce57 2315 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2316 slp_tree child;
f2c74cc4 2317 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2318 int j;
2319
2320 /* Propagate hybrid down the SLP tree. */
2321 if (stype == hybrid)
2322 ;
2323 else if (HYBRID_SLP_STMT (stmt_vinfo))
2324 stype = hybrid;
2325 else
2326 {
2327 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2328 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2329 /* If we get a pattern stmt here we have to use the LHS of the
2330 original stmt for immediate uses. */
2331 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2332 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2333 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2334 tree def;
2335 if (gimple_code (stmt) == GIMPLE_PHI)
2336 def = gimple_phi_result (stmt);
2337 else
2338 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2339 if (def)
2340 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2341 {
6585ff8f
RS
2342 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2343 if (!use_vinfo)
29764870 2344 continue;
29764870
RB
2345 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2346 && STMT_VINFO_RELATED_STMT (use_vinfo))
2347 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2348 if (!STMT_SLP_TYPE (use_vinfo)
2349 && (STMT_VINFO_RELEVANT (use_vinfo)
2350 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2351 && !(gimple_code (use_stmt) == GIMPLE_PHI
2352 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2353 {
2354 if (dump_enabled_p ())
2355 {
2356 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2357 "def in non-SLP stmt: ");
2358 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2359 }
2360 stype = hybrid;
2361 }
29764870 2362 }
642fce57 2363 }
ebfd146a 2364
502f0263
RB
2365 if (stype == hybrid
2366 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2367 {
2368 if (dump_enabled_p ())
2369 {
2370 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2371 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2372 }
2373 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2374 }
ebfd146a 2375
642fce57 2376 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2377 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2378 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2379}
f2c74cc4 2380
642fce57 2381/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2382
642fce57
RB
2383static tree
2384vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2385{
2386 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2387 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2388
2389 if (wi->is_lhs)
2390 return NULL_TREE;
2391
6585ff8f 2392 stmt_vec_info def_stmt_info;
642fce57 2393 if (TREE_CODE (*tp) == SSA_NAME
6585ff8f
RS
2394 && !SSA_NAME_IS_DEFAULT_DEF (*tp)
2395 && (def_stmt_info = loop_vinfo->lookup_stmt (SSA_NAME_DEF_STMT (*tp)))
2396 && PURE_SLP_STMT (def_stmt_info))
642fce57 2397 {
6585ff8f 2398 if (dump_enabled_p ())
b1af7da6 2399 {
6585ff8f
RS
2400 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2401 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt_info->stmt, 0);
b1af7da6 2402 }
6585ff8f 2403 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2404 }
2405
2406 return NULL_TREE;
ebfd146a
IR
2407}
2408
642fce57
RB
2409static tree
2410vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2411 walk_stmt_info *wi)
642fce57 2412{
6585ff8f
RS
2413 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2414 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2415 /* If the stmt is in a SLP instance then this isn't a reason
2416 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2417 if (! STMT_SLP_TYPE (use_vinfo)
2418 && (STMT_VINFO_RELEVANT (use_vinfo)
2419 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2420 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2421 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2422 ;
2423 else
642fce57
RB
2424 *handled = true;
2425 return NULL_TREE;
2426}
ebfd146a
IR
2427
2428/* Find stmts that must be both vectorized and SLPed. */
2429
2430void
2431vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2432{
2433 unsigned int i;
9771b263 2434 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2435 slp_instance instance;
2436
adac3a68 2437 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2438
642fce57
RB
2439 /* First walk all pattern stmt in the loop and mark defs of uses as
2440 hybrid because immediate uses in them are not recorded. */
2441 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2442 {
2443 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2444 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2445 gsi_next (&gsi))
2446 {
355fe088 2447 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2448 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2449 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2450 {
2451 walk_stmt_info wi;
2452 memset (&wi, 0, sizeof (wi));
6585ff8f 2453 wi.info = loop_vinfo;
642fce57
RB
2454 gimple_stmt_iterator gsi2
2455 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2456 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2457 vect_detect_hybrid_slp_1, &wi);
2458 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2459 vect_detect_hybrid_slp_2,
2460 vect_detect_hybrid_slp_1, &wi);
2461 }
2462 }
2463 }
2464
2465 /* Then walk the SLP instance trees marking stmts with uses in
2466 non-SLP stmts as hybrid, also propagating hybrid down the
2467 SLP tree, collecting the above info on-the-fly. */
9771b263 2468 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2469 {
2470 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2471 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2472 i, pure_slp);
2473 }
ebfd146a
IR
2474}
2475
a70d6342 2476
2c515559
RS
2477/* Initialize a bb_vec_info struct for the statements between
2478 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2479
2480_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2481 gimple_stmt_iterator region_end_in,
2482 vec_info_shared *shared)
2483 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2484 bb (gsi_bb (region_begin_in)),
2485 region_begin (region_begin_in),
2486 region_end (region_end_in)
a70d6342 2487{
a70d6342
IR
2488 gimple_stmt_iterator gsi;
2489
61d371eb
RB
2490 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2491 gsi_next (&gsi))
a70d6342 2492 {
355fe088 2493 gimple *stmt = gsi_stmt (gsi);
a70d6342 2494 gimple_set_uid (stmt, 0);
4fbeb363 2495 add_stmt (stmt);
a70d6342
IR
2496 }
2497
2c515559 2498 bb->aux = this;
a70d6342
IR
2499}
2500
2501
2502/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2503 stmts in the basic block. */
2504
2c515559 2505_bb_vec_info::~_bb_vec_info ()
a70d6342 2506{
2c515559
RS
2507 for (gimple_stmt_iterator si = region_begin;
2508 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2509 {
355fe088 2510 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2511 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2512
2513 if (stmt_info)
2514 /* Free stmt_vec_info. */
2515 free_stmt_vec_info (stmt);
61d371eb
RB
2516
2517 /* Reset region marker. */
2518 gimple_set_uid (stmt, -1);
a70d6342
IR
2519 }
2520
2c515559 2521 bb->aux = NULL;
a70d6342
IR
2522}
2523
15944069
RS
2524/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2525 given then that child nodes have already been processed, and that
2526 their def types currently match their SLP node's def type. */
a70d6342
IR
2527
2528static bool
15944069
RS
2529vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2530 slp_instance node_instance,
2531 stmt_vector_for_cost *cost_vec)
a70d6342 2532{
15944069 2533 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2534 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2535 gcc_assert (stmt_info);
2536 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2537
2538 /* For BB vectorization vector types are assigned here.
2539 Memory accesses already got their vector type assigned
2540 in vect_analyze_data_refs. */
2541 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2542 if (bb_vinfo
2543 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2544 {
1f3cb663
RS
2545 tree vectype, nunits_vectype;
2546 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2547 &nunits_vectype))
2548 /* We checked this when building the node. */
2549 gcc_unreachable ();
2550 if (vectype == boolean_type_node)
bd2f172f 2551 {
1f3cb663
RS
2552 vectype = vect_get_mask_type_for_stmt (stmt_info);
2553 if (!vectype)
2554 /* vect_get_mask_type_for_stmt has already explained the
2555 failure. */
2556 return false;
bd2f172f
RB
2557 }
2558
2559 gimple *sstmt;
15944069 2560 unsigned int i;
bd2f172f
RB
2561 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2562 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2563 }
2564
8b7e9dba
RS
2565 /* Calculate the number of vector statements to be created for the
2566 scalar stmts in this node. For SLP reductions it is equal to the
2567 number of vector statements in the children (which has already been
2568 calculated by the recursive call). Otherwise it is the number of
2c53b149 2569 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2570 VF divided by the number of elements in a vector. */
2c53b149
RB
2571 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2572 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2573 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2574 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2575 else
2576 {
d9f21f6a 2577 poly_uint64 vf;
8b7e9dba
RS
2578 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2579 vf = loop_vinfo->vectorization_factor;
2580 else
2581 vf = 1;
2582 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2583 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2584 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2585 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2586 }
2587
15944069
RS
2588 bool dummy;
2589 return vect_analyze_stmt (stmt, &dummy, node, node_instance, cost_vec);
2590}
2591
2592/* Analyze statements contained in SLP tree NODE after recursively analyzing
2593 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2594
2595 Return true if the operations are supported. */
2596
2597static bool
2598vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2599 slp_instance node_instance,
2600 scalar_stmts_to_slp_tree_map_t *visited,
2601 scalar_stmts_to_slp_tree_map_t *lvisited,
2602 stmt_vector_for_cost *cost_vec)
2603{
2604 int i, j;
2605 slp_tree child;
2606
2607 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2608 return true;
2609
2610 /* If we already analyzed the exact same set of scalar stmts we're done.
2611 We share the generated vector stmts for those. */
2612 slp_tree *leader;
2613 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2614 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2615 {
2616 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2617 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2618 return true;
2619 }
2620
2621 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2622 doesn't result in any issue since we throw away the lvisited set
2623 when we fail. */
2624 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2625
2626 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2627 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2628 visited, lvisited, cost_vec))
2629 return false;
2630
bd2f172f
RB
2631 /* Push SLP node def-type to stmt operands. */
2632 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2633 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2634 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2635 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2636 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2637 cost_vec);
bd2f172f
RB
2638 /* Restore def-types. */
2639 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2640 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2641 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2642 = vect_internal_def;
2643 if (! res)
2644 return false;
2645
2646 return true;
a70d6342
IR
2647}
2648
2649
8b7e9dba 2650/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2651 operations are supported. */
2652
a12e42fc 2653bool
8b7e9dba 2654vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2655{
a70d6342
IR
2656 slp_instance instance;
2657 int i;
2658
adac3a68 2659 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2660
68435eb2
RB
2661 scalar_stmts_to_slp_tree_map_t *visited
2662 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2663 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2664 {
68435eb2
RB
2665 scalar_stmts_to_slp_tree_map_t lvisited;
2666 stmt_vector_for_cost cost_vec;
2667 cost_vec.create (2);
8b7e9dba
RS
2668 if (!vect_slp_analyze_node_operations (vinfo,
2669 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2670 instance, visited, &lvisited,
2671 &cost_vec))
a70d6342 2672 {
a12e42fc
RB
2673 dump_printf_loc (MSG_NOTE, vect_location,
2674 "removing SLP instance operations starting from: ");
2675 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2676 SLP_TREE_SCALAR_STMTS
2677 (SLP_INSTANCE_TREE (instance))[0], 0);
6e2dd807 2678 vect_free_slp_instance (instance, false);
8b7e9dba 2679 vinfo->slp_instances.ordered_remove (i);
68435eb2 2680 cost_vec.release ();
a70d6342
IR
2681 }
2682 else
68435eb2
RB
2683 {
2684 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2685 x != lvisited.end(); ++x)
2686 visited->put ((*x).first.copy (), (*x).second);
2687 i++;
78604de0 2688
68435eb2
RB
2689 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2690 cost_vec.release ();
2691 }
2692 }
78604de0
RB
2693 delete visited;
2694
8b7e9dba 2695 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2696}
2697
6eddf228
RB
2698
2699/* Compute the scalar cost of the SLP node NODE and its children
2700 and return it. Do not account defs that are marked in LIFE and
2701 update LIFE according to uses of NODE. */
2702
a296d6d3 2703static void
292cba13 2704vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2705 slp_tree node, vec<bool, va_heap> *life,
2706 stmt_vector_for_cost *cost_vec)
6eddf228 2707{
6eddf228 2708 unsigned i;
355fe088 2709 gimple *stmt;
6eddf228
RB
2710 slp_tree child;
2711
2712 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2713 {
6eddf228
RB
2714 ssa_op_iter op_iter;
2715 def_operand_p def_p;
2716 stmt_vec_info stmt_info;
2717
ff4c81cc 2718 if ((*life)[i])
6eddf228
RB
2719 continue;
2720
2721 /* If there is a non-vectorized use of the defs then the scalar
2722 stmt is kept live in which case we do not account it or any
2723 required defs in the SLP children in the scalar cost. This
2724 way we make the vectorization more costly when compared to
2725 the scalar cost. */
2726 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2727 {
2728 imm_use_iterator use_iter;
355fe088 2729 gimple *use_stmt;
6eddf228 2730 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2731 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2732 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2733 use_stmt)
603cca93 2734 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2735 {
ff4c81cc 2736 (*life)[i] = true;
6eddf228
RB
2737 BREAK_FROM_IMM_USE_STMT (use_iter);
2738 }
2739 }
ff4c81cc 2740 if ((*life)[i])
6eddf228
RB
2741 continue;
2742
b555a2e4
RB
2743 /* Count scalar stmts only once. */
2744 if (gimple_visited_p (stmt))
2745 continue;
2746 gimple_set_visited (stmt, true);
2747
6eddf228 2748 stmt_info = vinfo_for_stmt (stmt);
a296d6d3 2749 vect_cost_for_stmt kind;
6eddf228
RB
2750 if (STMT_VINFO_DATA_REF (stmt_info))
2751 {
2752 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2753 kind = scalar_load;
6eddf228 2754 else
a296d6d3 2755 kind = scalar_store;
6eddf228
RB
2756 }
2757 else
a296d6d3
RB
2758 kind = scalar_stmt;
2759 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2760 }
2761
faa5399b 2762 auto_vec<bool, 20> subtree_life;
6eddf228 2763 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2764 {
2765 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2766 {
2767 /* Do not directly pass LIFE to the recursive call, copy it to
2768 confine changes in the callee to the current child/subtree. */
2769 subtree_life.safe_splice (*life);
a296d6d3 2770 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2771 subtree_life.truncate (0);
2772 }
2773 }
6eddf228
RB
2774}
2775
69f11a13
IR
2776/* Check if vectorization of the basic block is profitable. */
2777
2778static bool
2779vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2780{
9771b263 2781 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2782 slp_instance instance;
1a4b99c1 2783 int i;
c3e7ee41 2784 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2785 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2786
2787 /* Calculate scalar cost. */
a296d6d3
RB
2788 stmt_vector_for_cost scalar_costs;
2789 scalar_costs.create (0);
6eddf228 2790 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2791 {
00f96dc9 2792 auto_vec<bool, 20> life;
ff4c81cc 2793 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2794 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2795 SLP_INSTANCE_TREE (instance),
2796 &life, &scalar_costs);
2797 }
2798 void *target_cost_data = init_cost (NULL);
68435eb2 2799 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2800 scalar_costs.release ();
2801 unsigned dummy;
2802 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2803 destroy_cost_data (target_cost_data);
69f11a13 2804
b555a2e4
RB
2805 /* Unset visited flag. */
2806 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2807 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2808 gimple_set_visited (gsi_stmt (gsi), false);
2809
c3e7ee41 2810 /* Complete the target-specific cost calculation. */
92345349
BS
2811 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2812 &vec_inside_cost, &vec_epilogue_cost);
2813
2814 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2815
73fbfcad 2816 if (dump_enabled_p ())
69f11a13 2817 {
78c60e3d
SS
2818 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2819 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2820 vec_inside_cost);
2821 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2822 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2823 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2824 }
2825
a6524bba
RB
2826 /* Vectorization is profitable if its cost is more than the cost of scalar
2827 version. Note that we err on the vector side for equal cost because
2828 the cost estimate is otherwise quite pessimistic (constant uses are
2829 free on the scalar side but cost a load on the vector side for
2830 example). */
2831 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2832 return false;
2833
2834 return true;
2835}
2836
a5b50aa1
RB
2837/* Check if the basic block can be vectorized. Returns a bb_vec_info
2838 if so and sets fatal to true if failure is independent of
2839 current_vector_size. */
a70d6342 2840
8e19f5a1 2841static bb_vec_info
61d371eb
RB
2842vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2843 gimple_stmt_iterator region_end,
a5b50aa1 2844 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2845 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2846{
2847 bb_vec_info bb_vinfo;
a70d6342 2848 slp_instance instance;
8e19f5a1 2849 int i;
d9f21f6a 2850 poly_uint64 min_vf = 2;
e4a707c4 2851
a5b50aa1
RB
2852 /* The first group of checks is independent of the vector size. */
2853 fatal = true;
2854
61d371eb
RB
2855 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2856 {
2857 if (dump_enabled_p ())
2858 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2859 "not vectorized: too many instructions in "
2860 "basic block.\n");
2861 free_data_refs (datarefs);
2862 return NULL;
2863 }
2864
ca823c85 2865 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2866 if (!bb_vinfo)
2867 return NULL;
2868
61d371eb 2869 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2870 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2871
2872 /* Analyze the data references. */
2873
2874 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2875 {
73fbfcad 2876 if (dump_enabled_p ())
78c60e3d
SS
2877 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2878 "not vectorized: unhandled data-ref in basic "
2879 "block.\n");
b8698a0f 2880
2c515559 2881 delete bb_vinfo;
a70d6342
IR
2882 return NULL;
2883 }
2884
fcac74a1 2885 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2886 {
73fbfcad 2887 if (dump_enabled_p ())
78c60e3d
SS
2888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2889 "not vectorized: not enough data-refs in "
2890 "basic block.\n");
a70d6342 2891
2c515559 2892 delete bb_vinfo;
a70d6342
IR
2893 return NULL;
2894 }
2895
310213d4 2896 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2897 {
2898 if (dump_enabled_p ())
2899 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2900 "not vectorized: unhandled data access in "
2901 "basic block.\n");
2902
2c515559 2903 delete bb_vinfo;
5abe1e05
RB
2904 return NULL;
2905 }
2906
a5b50aa1
RB
2907 /* If there are no grouped stores in the region there is no need
2908 to continue with pattern recog as vect_analyze_slp will fail
2909 anyway. */
2910 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2911 {
73fbfcad 2912 if (dump_enabled_p ())
a5b50aa1
RB
2913 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2914 "not vectorized: no grouped stores in "
2915 "basic block.\n");
b8698a0f 2916
2c515559 2917 delete bb_vinfo;
a70d6342
IR
2918 return NULL;
2919 }
b8698a0f 2920
a5b50aa1
RB
2921 /* While the rest of the analysis below depends on it in some way. */
2922 fatal = false;
2923
2924 vect_pattern_recog (bb_vinfo);
2925
a70d6342
IR
2926 /* Check the SLP opportunities in the basic block, analyze and build SLP
2927 trees. */
310213d4 2928 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2929 {
73fbfcad 2930 if (dump_enabled_p ())
effb52da
RB
2931 {
2932 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2933 "Failed to SLP the basic block.\n");
2934 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2935 "not vectorized: failed to find SLP opportunities "
2936 "in basic block.\n");
2937 }
a70d6342 2938
2c515559 2939 delete bb_vinfo;
a70d6342
IR
2940 return NULL;
2941 }
b8698a0f 2942
62c8a2cf
RS
2943 vect_record_base_alignments (bb_vinfo);
2944
c2a12ca0
RB
2945 /* Analyze and verify the alignment of data references and the
2946 dependence in the SLP instances. */
a5b50aa1
RB
2947 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2948 {
c2a12ca0
RB
2949 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2950 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2951 {
2952 dump_printf_loc (MSG_NOTE, vect_location,
2953 "removing SLP instance operations starting from: ");
2954 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2955 SLP_TREE_SCALAR_STMTS
2956 (SLP_INSTANCE_TREE (instance))[0], 0);
6e2dd807 2957 vect_free_slp_instance (instance, false);
a5b50aa1
RB
2958 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2959 continue;
2960 }
c2a12ca0
RB
2961
2962 /* Mark all the statements that we want to vectorize as pure SLP and
2963 relevant. */
2964 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2965 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2966
a5b50aa1
RB
2967 i++;
2968 }
a5b50aa1
RB
2969 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2970 {
2c515559 2971 delete bb_vinfo;
a5b50aa1
RB
2972 return NULL;
2973 }
2974
8b7e9dba 2975 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2976 {
73fbfcad 2977 if (dump_enabled_p ())
e645e942 2978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2979 "not vectorized: bad operation in basic block.\n");
a70d6342 2980
2c515559 2981 delete bb_vinfo;
a70d6342
IR
2982 return NULL;
2983 }
2984
69f11a13 2985 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2986 if (!unlimited_cost_model (NULL)
69f11a13
IR
2987 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2988 {
73fbfcad 2989 if (dump_enabled_p ())
78c60e3d
SS
2990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2991 "not vectorized: vectorization is not "
2992 "profitable.\n");
69f11a13 2993
2c515559 2994 delete bb_vinfo;
69f11a13
IR
2995 return NULL;
2996 }
2997
73fbfcad 2998 if (dump_enabled_p ())
78c60e3d
SS
2999 dump_printf_loc (MSG_NOTE, vect_location,
3000 "Basic block will be vectorized using SLP\n");
a70d6342
IR
3001
3002 return bb_vinfo;
3003}
3004
3005
428db0ba
RB
3006/* Main entry for the BB vectorizer. Analyze and transform BB, returns
3007 true if anything in the basic-block was vectorized. */
3008
3009bool
3010vect_slp_bb (basic_block bb)
8e19f5a1
IR
3011{
3012 bb_vec_info bb_vinfo;
8e19f5a1 3013 gimple_stmt_iterator gsi;
61d371eb 3014 bool any_vectorized = false;
86e36728 3015 auto_vector_sizes vector_sizes;
8e19f5a1 3016
adac3a68 3017 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 3018
8e19f5a1
IR
3019 /* Autodetect first vector size we try. */
3020 current_vector_size = 0;
86e36728
RS
3021 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
3022 unsigned int next_size = 0;
8e19f5a1 3023
61d371eb
RB
3024 gsi = gsi_start_bb (bb);
3025
86e36728 3026 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
3027 while (1)
3028 {
61d371eb
RB
3029 if (gsi_end_p (gsi))
3030 break;
3031
3032 gimple_stmt_iterator region_begin = gsi;
3033 vec<data_reference_p> datarefs = vNULL;
3034 int insns = 0;
3035
3036 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 3037 {
61d371eb
RB
3038 gimple *stmt = gsi_stmt (gsi);
3039 if (is_gimple_debug (stmt))
3040 continue;
3041 insns++;
3042
3043 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 3044 vect_location = stmt;
61d371eb 3045
8e846c66 3046 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
3047 break;
3048 }
3049
3050 /* Skip leading unhandled stmts. */
3051 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3052 {
3053 gsi_next (&gsi);
3054 continue;
3055 }
428db0ba 3056
61d371eb
RB
3057 gimple_stmt_iterator region_end = gsi;
3058
3059 bool vectorized = false;
a5b50aa1 3060 bool fatal = false;
ca823c85 3061 vec_info_shared shared;
61d371eb 3062 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 3063 datarefs, insns, fatal, &shared);
61d371eb
RB
3064 if (bb_vinfo
3065 && dbg_cnt (vect_slp))
3066 {
428db0ba 3067 if (dump_enabled_p ())
61d371eb 3068 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 3069
ca823c85 3070 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
3071 vect_schedule_slp (bb_vinfo);
3072
d1ac60d5
RB
3073 unsigned HOST_WIDE_INT bytes;
3074 if (current_vector_size.is_constant (&bytes))
3075 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3076 "basic block part vectorized using "
3077 HOST_WIDE_INT_PRINT_UNSIGNED " byte "
3078 "vectors\n", bytes);
3079 else
3080 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3081 "basic block part vectorized using variable "
3082 "length vectors\n");
428db0ba 3083
61d371eb 3084 vectorized = true;
428db0ba 3085 }
2c515559 3086 delete bb_vinfo;
8e19f5a1 3087
61d371eb 3088 any_vectorized |= vectorized;
8e19f5a1 3089
86e36728
RS
3090 if (next_size == 0)
3091 autodetected_vector_size = current_vector_size;
3092
3093 if (next_size < vector_sizes.length ()
3094 && known_eq (vector_sizes[next_size], autodetected_vector_size))
3095 next_size += 1;
3096
61d371eb 3097 if (vectorized
86e36728
RS
3098 || next_size == vector_sizes.length ()
3099 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
3100 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3101 vector sizes will fail do not bother iterating. */
3102 || fatal)
61d371eb
RB
3103 {
3104 if (gsi_end_p (region_end))
3105 break;
8e19f5a1 3106
61d371eb
RB
3107 /* Skip the unhandled stmt. */
3108 gsi_next (&gsi);
3109
3110 /* And reset vector sizes. */
3111 current_vector_size = 0;
86e36728 3112 next_size = 0;
61d371eb
RB
3113 }
3114 else
3115 {
3116 /* Try the next biggest vector size. */
86e36728 3117 current_vector_size = vector_sizes[next_size++];
61d371eb 3118 if (dump_enabled_p ())
86e36728
RS
3119 {
3120 dump_printf_loc (MSG_NOTE, vect_location,
3121 "***** Re-trying analysis with "
3122 "vector size ");
3123 dump_dec (MSG_NOTE, current_vector_size);
3124 dump_printf (MSG_NOTE, "\n");
3125 }
61d371eb
RB
3126
3127 /* Start over. */
3128 gsi = region_begin;
3129 }
8e19f5a1 3130 }
61d371eb
RB
3131
3132 return any_vectorized;
8e19f5a1
IR
3133}
3134
3135
e4af0bc4
IE
3136/* Return 1 if vector type of boolean constant which is OPNUM
3137 operand in statement STMT is a boolean vector. */
3138
3139static bool
3140vect_mask_constant_operand_p (gimple *stmt, int opnum)
3141{
3142 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3143 enum tree_code code = gimple_expr_code (stmt);
3144 tree op, vectype;
e4af0bc4
IE
3145 enum vect_def_type dt;
3146
3147 /* For comparison and COND_EXPR type is chosen depending
3148 on the other comparison operand. */
3149 if (TREE_CODE_CLASS (code) == tcc_comparison)
3150 {
3151 if (opnum)
3152 op = gimple_assign_rhs1 (stmt);
3153 else
3154 op = gimple_assign_rhs2 (stmt);
3155
894dd753 3156 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3157 gcc_unreachable ();
3158
3159 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3160 }
3161
3162 if (code == COND_EXPR)
3163 {
3164 tree cond = gimple_assign_rhs1 (stmt);
3165
3166 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3167 op = cond;
3168 else if (opnum)
e4af0bc4
IE
3169 op = TREE_OPERAND (cond, 1);
3170 else
3171 op = TREE_OPERAND (cond, 0);
3172
894dd753 3173 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3174 gcc_unreachable ();
3175
3176 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3177 }
3178
3179 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3180}
3181
018b2744
RS
3182/* Build a variable-length vector in which the elements in ELTS are repeated
3183 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3184 RESULTS and add any new instructions to SEQ.
3185
3186 The approach we use is:
3187
3188 (1) Find a vector mode VM with integer elements of mode IM.
3189
3190 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3191 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3192 from small vectors to IM.
3193
3194 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3195
3196 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3197 correct byte contents.
3198
3199 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3200
3201 We try to find the largest IM for which this sequence works, in order
3202 to cut down on the number of interleaves. */
3203
f1739b48 3204void
018b2744
RS
3205duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3206 unsigned int nresults, vec<tree> &results)
3207{
3208 unsigned int nelts = elts.length ();
3209 tree element_type = TREE_TYPE (vector_type);
3210
3211 /* (1) Find a vector mode VM with integer elements of mode IM. */
3212 unsigned int nvectors = 1;
3213 tree new_vector_type;
3214 tree permutes[2];
3215 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3216 &nvectors, &new_vector_type,
3217 permutes))
3218 gcc_unreachable ();
3219
3220 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3221 unsigned int partial_nelts = nelts / nvectors;
3222 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3223
3224 tree_vector_builder partial_elts;
3225 auto_vec<tree, 32> pieces (nvectors * 2);
3226 pieces.quick_grow (nvectors * 2);
3227 for (unsigned int i = 0; i < nvectors; ++i)
3228 {
3229 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3230 ELTS' has mode IM. */
3231 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3232 for (unsigned int j = 0; j < partial_nelts; ++j)
3233 partial_elts.quick_push (elts[i * partial_nelts + j]);
3234 tree t = gimple_build_vector (seq, &partial_elts);
3235 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3236 TREE_TYPE (new_vector_type), t);
3237
3238 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3239 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3240 }
3241
3242 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3243 correct byte contents.
3244
3245 We need to repeat the following operation log2(nvectors) times:
3246
3247 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3248 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3249
3250 However, if each input repeats every N elements and the VF is
3251 a multiple of N * 2, the HI result is the same as the LO. */
3252 unsigned int in_start = 0;
3253 unsigned int out_start = nvectors;
3254 unsigned int hi_start = nvectors / 2;
3255 /* A bound on the number of outputs needed to produce NRESULTS results
3256 in the final iteration. */
3257 unsigned int noutputs_bound = nvectors * nresults;
3258 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3259 {
3260 noutputs_bound /= 2;
3261 unsigned int limit = MIN (noutputs_bound, nvectors);
3262 for (unsigned int i = 0; i < limit; ++i)
3263 {
3264 if ((i & 1) != 0
3265 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3266 2 * in_repeat))
3267 {
3268 pieces[out_start + i] = pieces[out_start + i - 1];
3269 continue;
3270 }
3271
3272 tree output = make_ssa_name (new_vector_type);
3273 tree input1 = pieces[in_start + (i / 2)];
3274 tree input2 = pieces[in_start + (i / 2) + hi_start];
3275 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3276 input1, input2,
3277 permutes[i & 1]);
3278 gimple_seq_add_stmt (seq, stmt);
3279 pieces[out_start + i] = output;
3280 }
3281 std::swap (in_start, out_start);
3282 }
3283
3284 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3285 results.reserve (nresults);
3286 for (unsigned int i = 0; i < nresults; ++i)
3287 if (i < nvectors)
3288 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3289 pieces[in_start + i]));
3290 else
3291 results.quick_push (results[i - nvectors]);
3292}
3293
e4af0bc4 3294
b8698a0f
L
3295/* For constant and loop invariant defs of SLP_NODE this function returns
3296 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3297 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3298 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3299 REDUC_INDEX is the index of the reduction operand in the statements, unless
3300 it is -1. */
ebfd146a
IR
3301
3302static void
9dc3f7de 3303vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3304 vec<tree> *vec_oprnds,
306b0c92 3305 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3306{
355fe088
TS
3307 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3308 gimple *stmt = stmts[0];
ebfd146a 3309 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
018b2744 3310 unsigned HOST_WIDE_INT nunits;
ebfd146a 3311 tree vec_cst;
d2a12ae7 3312 unsigned j, number_of_places_left_in_vector;
ebfd146a 3313 tree vector_type;
9dc3f7de 3314 tree vop;
9771b263 3315 int group_size = stmts.length ();
ebfd146a 3316 unsigned int vec_num, i;
d2a12ae7 3317 unsigned number_of_copies = 1;
9771b263
DN
3318 vec<tree> voprnds;
3319 voprnds.create (number_of_vectors);
ebfd146a 3320 bool constant_p, is_store;
b5aeb3bb 3321 tree neutral_op = NULL;
bac430c9 3322 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3323 gimple_seq ctor_seq = NULL;
018b2744 3324 auto_vec<tree, 16> permute_results;
b5aeb3bb 3325
42fd8198 3326 /* Check if vector type is a boolean vector. */
2568d8a1 3327 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3328 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3329 vector_type
3330 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3331 else
3332 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3333
ebfd146a
IR
3334 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3335 {
3336 is_store = true;
3337 op = gimple_assign_rhs1 (stmt);
3338 }
3339 else
9dc3f7de
IR
3340 is_store = false;
3341
3342 gcc_assert (op);
ebfd146a 3343
ebfd146a 3344 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3345 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3346
3347 For example, we have two scalar operands, s1 and s2 (e.g., group of
3348 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3349 of this type can be packed in a vector). The output vector will contain
3350 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3351 will be 2).
3352
b8698a0f 3353 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3354 containing the operands.
3355
3356 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3357 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3358 {s5, s6, s7, s8}. */
b8698a0f 3359
018b2744
RS
3360 /* When using duplicate_and_interleave, we just need one element for
3361 each scalar statement. */
3362 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3363 nunits = group_size;
3364
14a61437 3365 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3366
3367 number_of_places_left_in_vector = nunits;
62cf7335 3368 constant_p = true;
5ebaa477 3369 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3370 elts.quick_grow (nunits);
90dd6e3d 3371 bool place_after_defs = false;
ebfd146a
IR
3372 for (j = 0; j < number_of_copies; j++)
3373 {
9771b263 3374 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3375 {
3376 if (is_store)
3377 op = gimple_assign_rhs1 (stmt);
bac430c9 3378 else
f7e531cf 3379 {
bac430c9 3380 switch (code)
f7e531cf 3381 {
bac430c9 3382 case COND_EXPR:
a989bcc3
IE
3383 {
3384 tree cond = gimple_assign_rhs1 (stmt);
3385 if (TREE_CODE (cond) == SSA_NAME)
3386 op = gimple_op (stmt, op_num + 1);
3387 else if (op_num == 0 || op_num == 1)
bac430c9 3388 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3389 else
3390 {
3391 if (op_num == 2)
3392 op = gimple_assign_rhs2 (stmt);
3393 else
3394 op = gimple_assign_rhs3 (stmt);
3395 }
3396 }
bac430c9
IR
3397 break;
3398
3399 case CALL_EXPR:
3400 op = gimple_call_arg (stmt, op_num);
3401 break;
3402
b84b294a
JJ
3403 case LSHIFT_EXPR:
3404 case RSHIFT_EXPR:
3405 case LROTATE_EXPR:
3406 case RROTATE_EXPR:
3407 op = gimple_op (stmt, op_num + 1);
3408 /* Unlike the other binary operators, shifts/rotates have
3409 the shift count being int, instead of the same type as
3410 the lhs, so make sure the scalar is the right type if
3411 we are dealing with vectors of
3412 long long/long/short/char. */
793d9a16 3413 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3414 op = fold_convert (TREE_TYPE (vector_type), op);
3415 break;
3416
bac430c9
IR
3417 default:
3418 op = gimple_op (stmt, op_num + 1);
b84b294a 3419 break;
f7e531cf
IR
3420 }
3421 }
b8698a0f 3422
ebfd146a 3423 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3424 number_of_places_left_in_vector--;
90dd6e3d 3425 tree orig_op = op;
13396b6e 3426 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3427 {
793d9a16 3428 if (CONSTANT_CLASS_P (op))
13396b6e 3429 {
42fd8198
IE
3430 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3431 {
3432 /* Can't use VIEW_CONVERT_EXPR for booleans because
3433 of possibly different sizes of scalar value and
3434 vector element. */
3435 if (integer_zerop (op))
3436 op = build_int_cst (TREE_TYPE (vector_type), 0);
3437 else if (integer_onep (op))
158beb4a 3438 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3439 else
3440 gcc_unreachable ();
3441 }
3442 else
3443 op = fold_unary (VIEW_CONVERT_EXPR,
3444 TREE_TYPE (vector_type), op);
13396b6e
JJ
3445 gcc_assert (op && CONSTANT_CLASS_P (op));
3446 }
3447 else
3448 {
b731b390 3449 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3450 gimple *init_stmt;
262a363f
JJ
3451 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3452 {
158beb4a
JJ
3453 tree true_val
3454 = build_all_ones_cst (TREE_TYPE (vector_type));
3455 tree false_val
3456 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3457 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3458 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3459 op, true_val,
3460 false_val);
262a363f 3461 }
262a363f
JJ
3462 else
3463 {
3464 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3465 op);
3466 init_stmt
3467 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3468 op);
3469 }
13396b6e
JJ
3470 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3471 op = new_temp;
3472 }
50eeef09 3473 }
d2a12ae7 3474 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3475 if (!CONSTANT_CLASS_P (op))
3476 constant_p = false;
90dd6e3d
RB
3477 if (TREE_CODE (orig_op) == SSA_NAME
3478 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3479 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3480 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3481 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3482 place_after_defs = true;
ebfd146a
IR
3483
3484 if (number_of_places_left_in_vector == 0)
3485 {
018b2744
RS
3486 if (constant_p
3487 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3488 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3489 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3490 else
d2a12ae7 3491 {
018b2744
RS
3492 if (vec_oprnds->is_empty ())
3493 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3494 number_of_vectors,
3495 permute_results);
3496 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3497 }
90dd6e3d
RB
3498 tree init;
3499 gimple_stmt_iterator gsi;
3500 if (place_after_defs)
3501 {
3502 gsi = gsi_for_stmt
3503 (vect_find_last_scalar_stmt_in_slp (slp_node));
3504 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3505 }
3506 else
3507 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3508 if (ctor_seq != NULL)
3509 {
90dd6e3d 3510 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3511 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3512 ctor_seq = NULL;
3513 }
90dd6e3d
RB
3514 voprnds.quick_push (init);
3515 place_after_defs = false;
62cf7335
RB
3516 number_of_places_left_in_vector = nunits;
3517 constant_p = true;
5ebaa477
RS
3518 elts.new_vector (vector_type, nunits, 1);
3519 elts.quick_grow (nunits);
ebfd146a
IR
3520 }
3521 }
3522 }
3523
b8698a0f 3524 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3525 them. */
9771b263 3526 vec_num = voprnds.length ();
d2a12ae7 3527 for (j = vec_num; j != 0; j--)
ebfd146a 3528 {
9771b263
DN
3529 vop = voprnds[j - 1];
3530 vec_oprnds->quick_push (vop);
ebfd146a
IR
3531 }
3532
9771b263 3533 voprnds.release ();
ebfd146a
IR
3534
3535 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3536 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3537 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3538 to replicate the vectors. */
9771b263 3539 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3540 {
b5aeb3bb
IR
3541 tree neutral_vec = NULL;
3542
3543 if (neutral_op)
3544 {
3545 if (!neutral_vec)
b9acc9f1 3546 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3547
9771b263 3548 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3549 }
3550 else
3551 {
9771b263
DN
3552 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3553 vec_oprnds->quick_push (vop);
b5aeb3bb 3554 }
ebfd146a
IR
3555 }
3556}
3557
3558
3559/* Get vectorized definitions from SLP_NODE that contains corresponding
3560 vectorized def-stmts. */
3561
3562static void
9771b263 3563vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3564{
3565 tree vec_oprnd;
355fe088 3566 gimple *vec_def_stmt;
ebfd146a
IR
3567 unsigned int i;
3568
9771b263 3569 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3570
9771b263 3571 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3572 {
3573 gcc_assert (vec_def_stmt);
e7baeb39
RB
3574 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3575 vec_oprnd = gimple_phi_result (vec_def_stmt);
3576 else
3577 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3578 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3579 }
3580}
3581
3582
b8698a0f
L
3583/* Get vectorized definitions for SLP_NODE.
3584 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3585 call vect_get_constant_vectors() to create vector stmts.
3586 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3587 must be stored in the corresponding child of SLP_NODE, and we call
3588 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3589
ebfd146a 3590void
9771b263 3591vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3592 vec<vec<tree> > *vec_oprnds)
ebfd146a 3593{
355fe088 3594 gimple *first_stmt;
d092494c 3595 int number_of_vects = 0, i;
77eefb71 3596 unsigned int child_index = 0;
b8698a0f 3597 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3598 slp_tree child = NULL;
37b5ec8f 3599 vec<tree> vec_defs;
e44978dc 3600 tree oprnd;
77eefb71 3601 bool vectorized_defs;
ebfd146a 3602
9771b263
DN
3603 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3604 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3605 {
d092494c
IR
3606 /* For each operand we check if it has vectorized definitions in a child
3607 node or we need to create them (for invariants and constants). We
3608 check if the LHS of the first stmt of the next child matches OPRND.
3609 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3610 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3611 to check this child node for the next operand. */
3612 vectorized_defs = false;
3613 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3614 {
01d8bf07 3615 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3616
e44978dc 3617 /* We have to check both pattern and original def, if available. */
603cca93 3618 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3619 {
355fe088
TS
3620 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3621 gimple *related
90dd6e3d 3622 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3623 tree first_def_op;
90dd6e3d 3624
e7baeb39
RB
3625 if (gimple_code (first_def) == GIMPLE_PHI)
3626 first_def_op = gimple_phi_result (first_def);
3627 else
3628 first_def_op = gimple_get_lhs (first_def);
3629 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3630 || (related
3631 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3632 {
3633 /* The number of vector defs is determined by the number of
3634 vector statements in the node from which we get those
3635 statements. */
3636 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3637 vectorized_defs = true;
77eefb71 3638 child_index++;
90dd6e3d 3639 }
e44978dc 3640 }
77eefb71
RB
3641 else
3642 child_index++;
d092494c 3643 }
ebfd146a 3644
77eefb71
RB
3645 if (!vectorized_defs)
3646 {
3647 if (i == 0)
3648 {
3649 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3650 /* Number of vector stmts was calculated according to LHS in
3651 vect_schedule_slp_instance (), fix it by replacing LHS with
3652 RHS, if necessary. See vect_get_smallest_scalar_type () for
3653 details. */
3654 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3655 &rhs_size_unit);
3656 if (rhs_size_unit != lhs_size_unit)
3657 {
3658 number_of_vects *= rhs_size_unit;
3659 number_of_vects /= lhs_size_unit;
3660 }
3661 }
d092494c 3662 }
b5aeb3bb 3663
d092494c 3664 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3665 vec_defs = vNULL;
3666 vec_defs.create (number_of_vects);
ebfd146a 3667
d092494c
IR
3668 /* For reduction defs we call vect_get_constant_vectors (), since we are
3669 looking for initial loop invariant values. */
306b0c92 3670 if (vectorized_defs)
d092494c 3671 /* The defs are already vectorized. */
37b5ec8f 3672 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3673 else
e7baeb39 3674 /* Build vectors from scalar defs. */
37b5ec8f 3675 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3676 number_of_vects);
ebfd146a 3677
37b5ec8f 3678 vec_oprnds->quick_push (vec_defs);
d092494c 3679 }
ebfd146a
IR
3680}
3681
ebfd146a
IR
3682/* Generate vector permute statements from a list of loads in DR_CHAIN.
3683 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3684 permute statements for the SLP node NODE of the SLP instance
3685 SLP_NODE_INSTANCE. */
3686
ebfd146a 3687bool
01d8bf07 3688vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3689 gimple_stmt_iterator *gsi, poly_uint64 vf,
3690 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3691 unsigned *n_perms)
ebfd146a 3692{
355fe088 3693 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3694 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3695 tree mask_element_type = NULL_TREE, mask_type;
928686b1 3696 int vec_index = 0;
2635892a 3697 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3698 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3699 unsigned int mask_element;
ef4bddc2 3700 machine_mode mode;
928686b1 3701 unsigned HOST_WIDE_INT nunits, const_vf;
ebfd146a 3702
91ff1504
RB
3703 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3704 return false;
3705
2c53b149 3706 stmt_info = vinfo_for_stmt (DR_GROUP_FIRST_ELEMENT (stmt_info));
91ff1504 3707
22e4dee7
RH
3708 mode = TYPE_MODE (vectype);
3709
d9f21f6a 3710 /* At the moment, all permutations are represented using per-element
928686b1
RS
3711 indices, so we can't cope with variable vector lengths or
3712 vectorization factors. */
3713 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
3714 || !vf.is_constant (&const_vf))
d9f21f6a
RS
3715 return false;
3716
2635892a
RH
3717 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3718 same size as the vector element being permuted. */
96f9265a 3719 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3720 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3721 mask_type = get_vectype_for_scalar_type (mask_element_type);
e3342de4 3722 vec_perm_builder mask (nunits, nunits, 1);
908a1a16 3723 mask.quick_grow (nunits);
e3342de4 3724 vec_perm_indices indices;
ebfd146a 3725
61fdfd8c
RB
3726 /* Initialize the vect stmts of NODE to properly insert the generated
3727 stmts later. */
3728 if (! analyze_only)
3729 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3730 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3731 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3732
b8698a0f
L
3733 /* Generate permutation masks for every NODE. Number of masks for each NODE
3734 is equal to GROUP_SIZE.
3735 E.g., we have a group of three nodes with three loads from the same
3736 location in each node, and the vector size is 4. I.e., we have a
3737 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3738 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3739 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3740 ...
3741
2635892a 3742 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3743 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3744 operation, and the mask element values can't be outside that range.
3745 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3746 For the first two permutations we need the first and the second input
ebfd146a 3747 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3748 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3749 {c2,a3,b3,c3}. */
3750
2ce27200 3751 int vect_stmts_counter = 0;
928686b1 3752 unsigned int index = 0;
2ce27200
RB
3753 int first_vec_index = -1;
3754 int second_vec_index = -1;
be377c80 3755 bool noop_p = true;
29afecdf 3756 *n_perms = 0;
ebfd146a 3757
d9f21f6a 3758 for (unsigned int j = 0; j < const_vf; j++)
2ce27200
RB
3759 {
3760 for (int k = 0; k < group_size; k++)
3761 {
928686b1 3762 unsigned int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
2c53b149 3763 + j * DR_GROUP_SIZE (stmt_info));
2ce27200
RB
3764 vec_index = i / nunits;
3765 mask_element = i % nunits;
3766 if (vec_index == first_vec_index
3767 || first_vec_index == -1)
3768 {
3769 first_vec_index = vec_index;
3770 }
3771 else if (vec_index == second_vec_index
3772 || second_vec_index == -1)
3773 {
3774 second_vec_index = vec_index;
3775 mask_element += nunits;
3776 }
3777 else
3778 {
3779 if (dump_enabled_p ())
3780 {
3781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3782 "permutation requires at "
3783 "least three vectors ");
3784 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3785 stmt, 0);
2ce27200 3786 }
31bee964 3787 gcc_assert (analyze_only);
2ce27200
RB
3788 return false;
3789 }
ebfd146a 3790
928686b1 3791 gcc_assert (mask_element < 2 * nunits);
be377c80
RB
3792 if (mask_element != index)
3793 noop_p = false;
2ce27200
RB
3794 mask[index++] = mask_element;
3795
e3342de4 3796 if (index == nunits && !noop_p)
2ce27200 3797 {
e3342de4
RS
3798 indices.new_vector (mask, 2, nunits);
3799 if (!can_vec_perm_const_p (mode, indices))
2ce27200
RB
3800 {
3801 if (dump_enabled_p ())
22e4dee7 3802 {
2ce27200
RB
3803 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3804 vect_location,
3805 "unsupported vect permute { ");
3806 for (i = 0; i < nunits; ++i)
6b0630fb
RS
3807 {
3808 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3809 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3810 }
2ce27200 3811 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3812 }
31bee964 3813 gcc_assert (analyze_only);
2ce27200
RB
3814 return false;
3815 }
22e4dee7 3816
e3342de4
RS
3817 ++*n_perms;
3818 }
29afecdf 3819
e3342de4
RS
3820 if (index == nunits)
3821 {
2ce27200
RB
3822 if (!analyze_only)
3823 {
be377c80
RB
3824 tree mask_vec = NULL_TREE;
3825
3826 if (! noop_p)
736d0f28 3827 mask_vec = vec_perm_indices_to_tree (mask_type, indices);
2ce27200
RB
3828
3829 if (second_vec_index == -1)
3830 second_vec_index = first_vec_index;
61fdfd8c
RB
3831
3832 /* Generate the permute statement if necessary. */
3833 tree first_vec = dr_chain[first_vec_index];
3834 tree second_vec = dr_chain[second_vec_index];
3835 gimple *perm_stmt;
3836 if (! noop_p)
3837 {
3838 tree perm_dest
3839 = vect_create_destination_var (gimple_assign_lhs (stmt),
3840 vectype);
3841 perm_dest = make_ssa_name (perm_dest);
3842 perm_stmt = gimple_build_assign (perm_dest,
3843 VEC_PERM_EXPR,
3844 first_vec, second_vec,
3845 mask_vec);
3846 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3847 }
3848 else
3849 /* If mask was NULL_TREE generate the requested
3850 identity transform. */
3851 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3852
3853 /* Store the vector statement in NODE. */
3854 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3855 }
ebfd146a 3856
2ce27200
RB
3857 index = 0;
3858 first_vec_index = -1;
3859 second_vec_index = -1;
be377c80 3860 noop_p = true;
2ce27200
RB
3861 }
3862 }
b8698a0f 3863 }
ebfd146a 3864
ebfd146a
IR
3865 return true;
3866}
3867
ebfd146a
IR
3868/* Vectorize SLP instance tree in postorder. */
3869
3870static bool
f7300fff
RB
3871vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3872 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3873{
355fe088 3874 gimple *stmt;
0d0293ac 3875 bool grouped_store, is_store;
ebfd146a
IR
3876 gimple_stmt_iterator si;
3877 stmt_vec_info stmt_info;
8b7e9dba 3878 unsigned int group_size;
ebfd146a 3879 tree vectype;
603cca93 3880 int i, j;
d755c7ef 3881 slp_tree child;
ebfd146a 3882
603cca93 3883 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3884 return false;
3885
f7300fff
RB
3886 /* See if we have already vectorized the same set of stmts and reuse their
3887 vectorized stmts. */
dd172744 3888 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3889 {
dd172744 3890 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
f7300fff
RB
3891 return false;
3892 }
3893
dd172744 3894 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3895 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3896 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3897
603cca93
RB
3898 /* Push SLP node def-type to stmts. */
3899 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3900 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3901 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3902 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3903
9771b263 3904 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3905 stmt_info = vinfo_for_stmt (stmt);
3906
3907 /* VECTYPE is the type of the destination. */
b690cc0f 3908 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3909 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3910 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3911
68435eb2 3912 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
9771b263 3913 if (!SLP_TREE_VEC_STMTS (node).exists ())
8b7e9dba 3914 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3915
73fbfcad 3916 if (dump_enabled_p ())
ebfd146a 3917 {
78c60e3d
SS
3918 dump_printf_loc (MSG_NOTE,vect_location,
3919 "------>vectorizing SLP node starting from: ");
3920 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3921 }
ebfd146a 3922
2e8ab70c
RB
3923 /* Vectorized stmts go before the last scalar stmt which is where
3924 all uses are ready. */
3925 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3926
b010117a
IR
3927 /* Mark the first element of the reduction chain as reduction to properly
3928 transform the node. In the analysis phase only the last element of the
3929 chain is marked as reduction. */
2c53b149
RB
3930 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3931 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3932 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt)
b010117a
IR
3933 {
3934 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3935 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3936 }
3937
6876e5bc
RB
3938 /* Handle two-operation SLP nodes by vectorizing the group with
3939 both operations and then performing a merge. */
3940 if (SLP_TREE_TWO_OPERATORS (node))
3941 {
3942 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3943 enum tree_code ocode = ERROR_MARK;
355fe088 3944 gimple *ostmt;
e3342de4 3945 vec_perm_builder mask (group_size, group_size, 1);
6876e5bc
RB
3946 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3947 if (gimple_assign_rhs_code (ostmt) != code0)
3948 {
908a1a16 3949 mask.quick_push (1);
6876e5bc
RB
3950 ocode = gimple_assign_rhs_code (ostmt);
3951 }
3952 else
908a1a16 3953 mask.quick_push (0);
567a3691 3954 if (ocode != ERROR_MARK)
6876e5bc 3955 {
355fe088
TS
3956 vec<gimple *> v0;
3957 vec<gimple *> v1;
6876e5bc
RB
3958 unsigned j;
3959 tree tmask = NULL_TREE;
3960 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3961 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3962 SLP_TREE_VEC_STMTS (node).truncate (0);
3963 gimple_assign_set_rhs_code (stmt, ocode);
3964 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3965 gimple_assign_set_rhs_code (stmt, code0);
3966 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3967 SLP_TREE_VEC_STMTS (node).truncate (0);
3968 tree meltype = build_nonstandard_integer_type
b397965c 3969 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3970 tree mvectype = get_same_sized_vectype (meltype, vectype);
3971 unsigned k = 0, l;
3972 for (j = 0; j < v0.length (); ++j)
3973 {
dad55d70
RS
3974 /* Enforced by vect_build_slp_tree, which rejects variable-length
3975 vectors for SLP_TREE_TWO_OPERATORS. */
3976 unsigned int const_nunits = nunits.to_constant ();
3977 tree_vector_builder melts (mvectype, const_nunits, 1);
3978 for (l = 0; l < const_nunits; ++l)
6876e5bc 3979 {
1ece8d4c 3980 if (k >= group_size)
6876e5bc 3981 k = 0;
dad55d70
RS
3982 tree t = build_int_cst (meltype,
3983 mask[k++] * const_nunits + l);
794e3180 3984 melts.quick_push (t);
6876e5bc 3985 }
5ebaa477 3986 tmask = melts.build ();
6876e5bc
RB
3987
3988 /* ??? Not all targets support a VEC_PERM_EXPR with a
3989 constant mask that would translate to a vec_merge RTX
3990 (with their vec_perm_const_ok). We can either not
3991 vectorize in that case or let veclower do its job.
3992 Unfortunately that isn't too great and at least for
3993 plus/minus we'd eventually like to match targets
3994 vector addsub instructions. */
355fe088 3995 gimple *vstmt;
6876e5bc
RB
3996 vstmt = gimple_build_assign (make_ssa_name (vectype),
3997 VEC_PERM_EXPR,
3998 gimple_assign_lhs (v0[j]),
3999 gimple_assign_lhs (v1[j]), tmask);
4000 vect_finish_stmt_generation (stmt, vstmt, &si);
4001 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
4002 }
4003 v0.release ();
4004 v1.release ();
4005 return false;
4006 }
4007 }
0d0293ac 4008 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
4009
4010 /* Restore stmt def-types. */
4011 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4012 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4013 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
4014 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
4015
b5aeb3bb 4016 return is_store;
ebfd146a
IR
4017}
4018
dd34c087
JJ
4019/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4020 For loop vectorization this is done in vectorizable_call, but for SLP
4021 it needs to be deferred until end of vect_schedule_slp, because multiple
4022 SLP instances may refer to the same scalar stmt. */
4023
4024static void
4025vect_remove_slp_scalar_calls (slp_tree node)
4026{
355fe088 4027 gimple *stmt, *new_stmt;
dd34c087
JJ
4028 gimple_stmt_iterator gsi;
4029 int i;
d755c7ef 4030 slp_tree child;
dd34c087
JJ
4031 tree lhs;
4032 stmt_vec_info stmt_info;
4033
603cca93 4034 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
4035 return;
4036
9771b263 4037 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 4038 vect_remove_slp_scalar_calls (child);
dd34c087 4039
9771b263 4040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
4041 {
4042 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
4043 continue;
4044 stmt_info = vinfo_for_stmt (stmt);
4045 if (stmt_info == NULL
4046 || is_pattern_stmt_p (stmt_info)
4047 || !PURE_SLP_STMT (stmt_info))
4048 continue;
4049 lhs = gimple_call_lhs (stmt);
4050 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4051 set_vinfo_for_stmt (new_stmt, stmt_info);
4052 set_vinfo_for_stmt (stmt, NULL);
4053 STMT_VINFO_STMT (stmt_info) = new_stmt;
4054 gsi = gsi_for_stmt (stmt);
4055 gsi_replace (&gsi, new_stmt, false);
4056 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4057 }
4058}
ebfd146a 4059
ff802fa1
IR
4060/* Generate vector code for all SLP instances in the loop/basic block. */
4061
ebfd146a 4062bool
310213d4 4063vect_schedule_slp (vec_info *vinfo)
ebfd146a 4064{
9771b263 4065 vec<slp_instance> slp_instances;
ebfd146a 4066 slp_instance instance;
8b7e9dba 4067 unsigned int i;
ebfd146a
IR
4068 bool is_store = false;
4069
78604de0
RB
4070
4071 scalar_stmts_to_slp_tree_map_t *bst_map
4072 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 4073 slp_instances = vinfo->slp_instances;
9771b263 4074 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
4075 {
4076 /* Schedule the tree of INSTANCE. */
4077 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
f7300fff 4078 instance, bst_map);
73fbfcad 4079 if (dump_enabled_p ())
78c60e3d 4080 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 4081 "vectorizing stmts using SLP.\n");
ebfd146a 4082 }
78604de0 4083 delete bst_map;
ebfd146a 4084
9771b263 4085 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
4086 {
4087 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 4088 gimple *store;
b5aeb3bb
IR
4089 unsigned int j;
4090 gimple_stmt_iterator gsi;
4091
c40eced0
RB
4092 /* Remove scalar call stmts. Do not do this for basic-block
4093 vectorization as not all uses may be vectorized.
4094 ??? Why should this be necessary? DCE should be able to
4095 remove the stmts itself.
4096 ??? For BB vectorization we can as well remove scalar
4097 stmts starting from the SLP tree root if they have no
4098 uses. */
310213d4 4099 if (is_a <loop_vec_info> (vinfo))
c40eced0 4100 vect_remove_slp_scalar_calls (root);
dd34c087 4101
9771b263 4102 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
4103 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4104 {
4105 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
4106 break;
4107
a024e70e
IR
4108 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
4109 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
4110 /* Free the attached stmt_vec_info and remove the stmt. */
4111 gsi = gsi_for_stmt (store);
3d3f2249 4112 unlink_stmt_vdef (store);
b5aeb3bb 4113 gsi_remove (&gsi, true);
3d3f2249 4114 release_defs (store);
b5aeb3bb
IR
4115 free_stmt_vec_info (store);
4116 }
4117 }
4118
ebfd146a
IR
4119 return is_store;
4120}