]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
re PR tree-optimization/87105 (Autovectorization [X86, SSE2, AVX2, DoublePrecision])
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
85ec4feb 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
5ebaa477 44#include "tree-vector-builder.h"
f151c9e1 45#include "vec-perm-indices.h"
018b2744
RS
46#include "gimple-fold.h"
47#include "internal-fn.h"
a70d6342
IR
48
49
6e2dd807
RS
50/* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
ebfd146a
IR
53
54static void
6e2dd807 55vect_free_slp_tree (slp_tree node, bool final_p)
ebfd146a 56{
d092494c 57 int i;
d755c7ef 58 slp_tree child;
d092494c 59
a1f072e2
RB
60 if (--node->refcnt != 0)
61 return;
62
9771b263 63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
6e2dd807 64 vect_free_slp_tree (child, final_p);
b8698a0f 65
6e2dd807
RS
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
70 if (!final_p)
71 {
b9787581
RS
72 stmt_vec_info stmt_info;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6e2dd807 74 {
b9787581
RS
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
6e2dd807
RS
77 }
78 }
78810bd3 79
9771b263
DN
80 SLP_TREE_CHILDREN (node).release ();
81 SLP_TREE_SCALAR_STMTS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 83 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
84
85 free (node);
86}
87
6e2dd807
RS
88/* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
ebfd146a
IR
91
92void
6e2dd807 93vect_free_slp_instance (slp_instance instance, bool final_p)
ebfd146a 94{
6e2dd807 95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
9771b263 96 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 97 free (instance);
ebfd146a
IR
98}
99
100
d092494c
IR
101/* Create an SLP node for SCALAR_STMTS. */
102
103static slp_tree
b9787581 104vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
d092494c 105{
d3cfd39e 106 slp_tree node;
b9787581 107 stmt_vec_info stmt_info = scalar_stmts[0];
d092494c
IR
108 unsigned int nops;
109
b9787581 110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
d092494c 111 nops = gimple_call_num_args (stmt);
b9787581 112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf
IR
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
b9787581 118 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 119 nops = 0;
d092494c
IR
120 else
121 return NULL;
122
d3cfd39e 123 node = XNEW (struct _slp_tree);
d092494c 124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263 125 SLP_TREE_VEC_STMTS (node).create (0);
68435eb2 126 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
9771b263 127 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 128 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 129 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 130 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
a1f072e2 131 node->refcnt = 1;
d092494c 132
78810bd3 133 unsigned i;
b9787581
RS
134 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
135 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
78810bd3 136
d092494c
IR
137 return node;
138}
139
140
ddf56386
RB
141/* This structure is used in creation of an SLP tree. Each instance
142 corresponds to the same operand in a group of scalar stmts in an SLP
143 node. */
144typedef struct _slp_oprnd_info
145{
146 /* Def-stmts for the operands. */
b9787581 147 vec<stmt_vec_info> def_stmts;
ddf56386
RB
148 /* Information about the first statement, its vector def-type, type, the
149 operand itself in case it's constant, and an indication if it's a pattern
150 stmt. */
ddf56386 151 tree first_op_type;
34e82342 152 enum vect_def_type first_dt;
ddf56386
RB
153 bool first_pattern;
154 bool second_pattern;
155} *slp_oprnd_info;
156
157
d092494c
IR
158/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
159 operand. */
9771b263 160static vec<slp_oprnd_info>
d092494c
IR
161vect_create_oprnd_info (int nops, int group_size)
162{
163 int i;
164 slp_oprnd_info oprnd_info;
9771b263 165 vec<slp_oprnd_info> oprnds_info;
d092494c 166
9771b263 167 oprnds_info.create (nops);
d092494c
IR
168 for (i = 0; i < nops; i++)
169 {
170 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 171 oprnd_info->def_stmts.create (group_size);
d092494c 172 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 173 oprnd_info->first_op_type = NULL_TREE;
d092494c 174 oprnd_info->first_pattern = false;
effb52da 175 oprnd_info->second_pattern = false;
9771b263 176 oprnds_info.quick_push (oprnd_info);
d092494c
IR
177 }
178
179 return oprnds_info;
180}
181
182
d3cfd39e
JJ
183/* Free operands info. */
184
d092494c 185static void
9771b263 186vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
187{
188 int i;
189 slp_oprnd_info oprnd_info;
190
9771b263 191 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 192 {
9771b263 193 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
194 XDELETE (oprnd_info);
195 }
d092494c 196
9771b263 197 oprnds_info.release ();
d092494c
IR
198}
199
200
32e8e429
RS
201/* Find the place of the data-ref in STMT_INFO in the interleaving chain
202 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
203 of the chain. */
d755c7ef 204
b210f45f 205int
32e8e429
RS
206vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
207 stmt_vec_info first_stmt_info)
d755c7ef 208{
a1824cfd 209 stmt_vec_info next_stmt_info = first_stmt_info;
d755c7ef
RB
210 int result = 0;
211
a1824cfd 212 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
d755c7ef
RB
213 return -1;
214
215 do
216 {
a1824cfd 217 if (next_stmt_info == stmt_info)
d755c7ef 218 return result;
a1824cfd
RS
219 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
220 if (next_stmt_info)
221 result += DR_GROUP_GAP (next_stmt_info);
d755c7ef 222 }
a1824cfd 223 while (next_stmt_info);
d755c7ef
RB
224
225 return -1;
226}
227
018b2744
RS
228/* Check whether it is possible to load COUNT elements of type ELT_MODE
229 using the method implemented by duplicate_and_interleave. Return true
230 if so, returning the number of intermediate vectors in *NVECTORS_OUT
231 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
232 (if nonnull). */
233
f1739b48 234bool
018b2744 235can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
f1739b48
RS
236 unsigned int *nvectors_out,
237 tree *vector_type_out,
238 tree *permutes)
018b2744
RS
239{
240 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
241 poly_int64 nelts;
242 unsigned int nvectors = 1;
243 for (;;)
244 {
245 scalar_int_mode int_mode;
246 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
247 if (multiple_p (current_vector_size, elt_bytes, &nelts)
248 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
249 {
250 tree int_type = build_nonstandard_integer_type
251 (GET_MODE_BITSIZE (int_mode), 1);
252 tree vector_type = build_vector_type (int_type, nelts);
253 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
254 {
255 vec_perm_builder sel1 (nelts, 2, 3);
256 vec_perm_builder sel2 (nelts, 2, 3);
257 poly_int64 half_nelts = exact_div (nelts, 2);
258 for (unsigned int i = 0; i < 3; ++i)
259 {
260 sel1.quick_push (i);
261 sel1.quick_push (i + nelts);
262 sel2.quick_push (half_nelts + i);
263 sel2.quick_push (half_nelts + i + nelts);
264 }
265 vec_perm_indices indices1 (sel1, 2, nelts);
266 vec_perm_indices indices2 (sel2, 2, nelts);
267 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
268 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
269 {
270 if (nvectors_out)
271 *nvectors_out = nvectors;
272 if (vector_type_out)
273 *vector_type_out = vector_type;
274 if (permutes)
275 {
276 permutes[0] = vect_gen_perm_mask_checked (vector_type,
277 indices1);
278 permutes[1] = vect_gen_perm_mask_checked (vector_type,
279 indices2);
280 }
281 return true;
282 }
283 }
284 }
285 if (!multiple_p (elt_bytes, 2, &elt_bytes))
286 return false;
287 nvectors *= 2;
288 }
289}
d755c7ef 290
d092494c
IR
291/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
292 they are of a valid type and that they match the defs of the first stmt of
4cecd659 293 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
018b2744
RS
294 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
295 indicates swap is required for cond_expr stmts. Specifically, *SWAP
296 is 1 if STMT is cond and operands of comparison need to be swapped;
297 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
298 If there is any operand swap in this function, *SWAP is set to non-zero
299 value.
4cecd659
BC
300 If there was a fatal error return -1; if the error could be corrected by
301 swapping operands of father node of this one, return 1; if everything is
302 ok return 0. */
4cecd659
BC
303static int
304vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
b9787581 305 vec<stmt_vec_info> stmts, unsigned stmt_num,
4cecd659 306 vec<slp_oprnd_info> *oprnds_info)
ebfd146a 307{
b9787581 308 stmt_vec_info stmt_info = stmts[stmt_num];
ebfd146a
IR
309 tree oprnd;
310 unsigned int i, number_of_oprnds;
d092494c 311 enum vect_def_type dt = vect_uninitialized_def;
d092494c 312 bool pattern = false;
abf9bfbc 313 slp_oprnd_info oprnd_info;
b0b4483e 314 int first_op_idx = 1;
0246112a 315 unsigned int commutative_op = -1U;
b0b4483e 316 bool first_op_cond = false;
effb52da
RB
317 bool first = stmt_num == 0;
318 bool second = stmt_num == 1;
b8698a0f 319
b9787581 320 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
190c2236
JJ
321 {
322 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 323 first_op_idx = 3;
0246112a
RS
324 if (gimple_call_internal_p (stmt))
325 {
326 internal_fn ifn = gimple_call_internal_fn (stmt);
327 commutative_op = first_commutative_argument (ifn);
328 }
190c2236 329 }
b9787581 330 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
f7e531cf 331 {
b0b4483e 332 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 333 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
334 /* Swap can only be done for cond_expr if asked to, otherwise we
335 could result in different comparison code to the first stmt. */
a414c77f
IE
336 if (gimple_assign_rhs_code (stmt) == COND_EXPR
337 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
338 {
339 first_op_cond = true;
b0b4483e
RB
340 number_of_oprnds++;
341 }
342 else
0246112a 343 commutative_op = commutative_tree_code (code) ? 0U : -1U;
f7e531cf 344 }
d092494c 345 else
b0b4483e 346 return -1;
ebfd146a 347
4cecd659
BC
348 bool swapped = (*swap != 0);
349 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
350 for (i = 0; i < number_of_oprnds; i++)
351 {
b0b4483e
RB
352again:
353 if (first_op_cond)
f7e531cf 354 {
4cecd659
BC
355 /* Map indicating how operands of cond_expr should be swapped. */
356 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
357 int *map = maps[*swap];
358
359 if (i < 2)
b9787581
RS
360 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
361 first_op_idx), map[i]);
b0b4483e 362 else
b9787581 363 oprnd = gimple_op (stmt_info->stmt, map[i]);
f7e531cf
IR
364 }
365 else
b9787581 366 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 367
9771b263 368 oprnd_info = (*oprnds_info)[i];
ebfd146a 369
fef96d8e
RS
370 stmt_vec_info def_stmt_info;
371 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
ebfd146a 372 {
73fbfcad 373 if (dump_enabled_p ())
3c2a8ed0
DM
374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
375 "Build SLP failed: can't analyze def for %T\n",
376 oprnd);
ebfd146a 377
b0b4483e 378 return -1;
ebfd146a
IR
379 }
380
effb52da
RB
381 if (second)
382 oprnd_info->second_pattern = pattern;
383
d092494c 384 if (first)
ebfd146a 385 {
d092494c
IR
386 oprnd_info->first_dt = dt;
387 oprnd_info->first_pattern = pattern;
793d9a16 388 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 389 }
ebfd146a
IR
390 else
391 {
d092494c
IR
392 /* Not first stmt of the group, check that the def-stmt/s match
393 the def-stmt/s of the first stmt. Allow different definition
394 types for reduction chains: the first stmt must be a
395 vect_reduction_def (a phi node), and the rest
396 vect_internal_def. */
018b2744
RS
397 tree type = TREE_TYPE (oprnd);
398 if ((oprnd_info->first_dt != dt
399 && !(oprnd_info->first_dt == vect_reduction_def
400 && dt == vect_internal_def)
401 && !((oprnd_info->first_dt == vect_external_def
402 || oprnd_info->first_dt == vect_constant_def)
403 && (dt == vect_external_def
404 || dt == vect_constant_def)))
405 || !types_compatible_p (oprnd_info->first_op_type, type))
ebfd146a 406 {
b0b4483e 407 /* Try swapping operands if we got a mismatch. */
0246112a 408 if (i == commutative_op && !swapped)
b0b4483e
RB
409 {
410 swapped = true;
411 goto again;
412 }
413
abf9bfbc
RB
414 if (dump_enabled_p ())
415 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 416 "Build SLP failed: different types\n");
d092494c 417
b0b4483e 418 return 1;
ebfd146a 419 }
018b2744
RS
420 if ((dt == vect_constant_def
421 || dt == vect_external_def)
422 && !current_vector_size.is_constant ()
423 && (TREE_CODE (type) == BOOLEAN_TYPE
424 || !can_duplicate_and_interleave_p (stmts.length (),
425 TYPE_MODE (type))))
a23644f2
RS
426 {
427 if (dump_enabled_p ())
3c2a8ed0
DM
428 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
429 "Build SLP failed: invalid type of def "
430 "for variable-length SLP %T\n", oprnd);
a23644f2
RS
431 return -1;
432 }
018b2744
RS
433 }
434
435 /* Check the types of the definitions. */
436 switch (dt)
437 {
438 case vect_constant_def:
439 case vect_external_def:
ebfd146a 440 break;
b8698a0f 441
c78e3652 442 case vect_reduction_def:
e7baeb39 443 case vect_induction_def:
8644a673 444 case vect_internal_def:
fef96d8e 445 oprnd_info->def_stmts.quick_push (def_stmt_info);
ebfd146a
IR
446 break;
447
448 default:
449 /* FORNOW: Not supported. */
73fbfcad 450 if (dump_enabled_p ())
3c2a8ed0
DM
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
452 "Build SLP failed: illegal type of def %T\n",
453 oprnd);
ebfd146a 454
b0b4483e 455 return -1;
ebfd146a
IR
456 }
457 }
458
b0b4483e
RB
459 /* Swap operands. */
460 if (swapped)
461 {
78810bd3
RB
462 /* If there are already uses of this stmt in a SLP instance then
463 we've committed to the operand order and can't swap it. */
b9787581 464 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
78810bd3
RB
465 {
466 if (dump_enabled_p ())
3c2a8ed0
DM
467 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
468 "Build SLP failed: cannot swap operands of "
469 "shared stmt %G", stmt_info->stmt);
78810bd3
RB
470 return -1;
471 }
472
b0b4483e
RB
473 if (first_op_cond)
474 {
0246112a 475 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
b0b4483e 476 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
477 enum tree_code code = TREE_CODE (cond);
478
479 /* Swap. */
480 if (*swap == 1)
481 {
482 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
483 &TREE_OPERAND (cond, 1));
484 TREE_SET_CODE (cond, swap_tree_comparison (code));
485 }
486 /* Invert. */
487 else
488 {
489 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
490 gimple_assign_rhs3_ptr (stmt));
491 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
492 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
493 gcc_assert (code != ERROR_MARK);
494 TREE_SET_CODE (cond, code);
495 }
b0b4483e
RB
496 }
497 else
0246112a
RS
498 {
499 unsigned int op = commutative_op + first_op_idx;
500 swap_ssa_operands (stmt_info->stmt,
501 gimple_op_ptr (stmt_info->stmt, op),
502 gimple_op_ptr (stmt_info->stmt, op + 1));
503 }
78810bd3 504 if (dump_enabled_p ())
3c2a8ed0
DM
505 dump_printf_loc (MSG_NOTE, vect_location,
506 "swapped operands to match def types in %G",
507 stmt_info->stmt);
b0b4483e
RB
508 }
509
4cecd659 510 *swap = swapped;
b0b4483e 511 return 0;
ebfd146a
IR
512}
513
5249ee4d
RS
514/* Return true if call statements CALL1 and CALL2 are similar enough
515 to be combined into the same SLP group. */
516
517static bool
518compatible_calls_p (gcall *call1, gcall *call2)
519{
520 unsigned int nargs = gimple_call_num_args (call1);
521 if (nargs != gimple_call_num_args (call2))
522 return false;
523
524 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
525 return false;
526
527 if (gimple_call_internal_p (call1))
528 {
529 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
530 TREE_TYPE (gimple_call_lhs (call2))))
531 return false;
532 for (unsigned int i = 0; i < nargs; ++i)
533 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
534 TREE_TYPE (gimple_call_arg (call2, i))))
535 return false;
536 }
537 else
538 {
539 if (!operand_equal_p (gimple_call_fn (call1),
540 gimple_call_fn (call2), 0))
541 return false;
542
543 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
544 return false;
545 }
546 return true;
547}
548
b161f2c9 549/* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
d7609678 550 caller's attempt to find the vector type in STMT_INFO with the narrowest
b161f2c9 551 element type. Return true if VECTYPE is nonnull and if it is valid
d7609678
RS
552 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
553 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
554 vect_build_slp_tree. */
b161f2c9
RS
555
556static bool
d7609678 557vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
4b6068ea 558 tree vectype, poly_uint64 *max_nunits)
b161f2c9
RS
559{
560 if (!vectype)
561 {
562 if (dump_enabled_p ())
3c2a8ed0
DM
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: unsupported data-type in %G\n",
565 stmt_info->stmt);
b161f2c9
RS
566 /* Fatal mismatch. */
567 return false;
568 }
569
570 /* If populating the vector type requires unrolling then fail
571 before adjusting *max_nunits for basic-block vectorization. */
4b6068ea
RS
572 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
573 unsigned HOST_WIDE_INT const_nunits;
d7609678 574 if (STMT_VINFO_BB_VINFO (stmt_info)
4b6068ea
RS
575 && (!nunits.is_constant (&const_nunits)
576 || const_nunits > group_size))
b161f2c9
RS
577 {
578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
579 "Build SLP failed: unrolling required "
580 "in basic block SLP\n");
581 /* Fatal mismatch. */
582 return false;
583 }
584
585 /* In case of multiple types we need to detect the smallest type. */
4b6068ea 586 vect_update_max_nunits (max_nunits, vectype);
b161f2c9
RS
587 return true;
588}
ebfd146a 589
1f3cb663
RS
590/* STMTS is a group of GROUP_SIZE SLP statements in which some
591 statements do the same operation as the first statement and in which
592 the others do ALT_STMT_CODE. Return true if we can take one vector
593 of the first operation and one vector of the second and permute them
594 to get the required result. VECTYPE is the type of the vector that
595 would be permuted. */
596
597static bool
b9787581
RS
598vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
599 unsigned int group_size, tree vectype,
600 tree_code alt_stmt_code)
1f3cb663
RS
601{
602 unsigned HOST_WIDE_INT count;
603 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
604 return false;
605
606 vec_perm_builder sel (count, count, 1);
607 for (unsigned int i = 0; i < count; ++i)
608 {
609 unsigned int elt = i;
b9787581
RS
610 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
611 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
1f3cb663
RS
612 elt += count;
613 sel.quick_push (elt);
614 }
615 vec_perm_indices indices (sel, 2, count);
616 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
617}
618
6983e6b5
RB
619/* Verify if the scalar stmts STMTS are isomorphic, require data
620 permutation or are of unsupported types of operation. Return
621 true if they are, otherwise return false and indicate in *MATCHES
622 which stmts are not isomorphic to the first one. If MATCHES[0]
623 is false then this indicates the comparison could not be
4cecd659
BC
624 carried out or the stmts will never be vectorized by SLP.
625
626 Note COND_EXPR is possibly ismorphic to another one after swapping its
627 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
628 the first stmt by swapping the two operands of comparison; set SWAP[i]
629 to 2 if stmt I is isormorphic to the first stmt by inverting the code
630 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
631 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
632
633static bool
d7609678 634vect_build_slp_tree_1 (unsigned char *swap,
b9787581 635 vec<stmt_vec_info> stmts, unsigned int group_size,
5249ee4d
RS
636 poly_uint64 *max_nunits, bool *matches,
637 bool *two_operators)
ebfd146a 638{
ebfd146a 639 unsigned int i;
b9787581 640 stmt_vec_info first_stmt_info = stmts[0];
6876e5bc
RB
641 enum tree_code first_stmt_code = ERROR_MARK;
642 enum tree_code alt_stmt_code = ERROR_MARK;
643 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 644 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 645 tree lhs;
6983e6b5 646 bool need_same_oprnds = false;
1f3cb663 647 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
ebfd146a
IR
648 optab optab;
649 int icode;
ef4bddc2
RS
650 machine_mode optab_op2_mode;
651 machine_mode vec_mode;
bffb8014 652 stmt_vec_info first_load = NULL, prev_first_load = NULL;
d092494c 653
ebfd146a 654 /* For every stmt in NODE find its def stmt/s. */
b9787581
RS
655 stmt_vec_info stmt_info;
656 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
ebfd146a 657 {
b9787581 658 gimple *stmt = stmt_info->stmt;
4cecd659 659 swap[i] = 0;
6983e6b5
RB
660 matches[i] = false;
661
73fbfcad 662 if (dump_enabled_p ())
3c2a8ed0 663 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
ebfd146a 664
4b5caab7 665 /* Fail to vectorize statements marked as unvectorizable. */
b9787581 666 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
4b5caab7 667 {
73fbfcad 668 if (dump_enabled_p ())
3c2a8ed0
DM
669 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
670 "Build SLP failed: unvectorizable statement %G",
671 stmt);
6983e6b5
RB
672 /* Fatal mismatch. */
673 matches[0] = false;
4b5caab7
IR
674 return false;
675 }
676
ebfd146a
IR
677 lhs = gimple_get_lhs (stmt);
678 if (lhs == NULL_TREE)
679 {
73fbfcad 680 if (dump_enabled_p ())
3c2a8ed0
DM
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "Build SLP failed: not GIMPLE_ASSIGN nor "
683 "GIMPLE_CALL %G", stmt);
6983e6b5
RB
684 /* Fatal mismatch. */
685 matches[0] = false;
ebfd146a
IR
686 return false;
687 }
688
1f3cb663
RS
689 tree nunits_vectype;
690 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
691 &nunits_vectype)
692 || (nunits_vectype
d7609678 693 && !vect_record_max_nunits (stmt_info, group_size,
1f3cb663 694 nunits_vectype, max_nunits)))
b161f2c9 695 {
6983e6b5
RB
696 /* Fatal mismatch. */
697 matches[0] = false;
1f3cb663
RS
698 return false;
699 }
700
701 gcc_assert (vectype);
b8698a0f 702
538dd0b7 703 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
704 {
705 rhs_code = CALL_EXPR;
5249ee4d
RS
706 if ((gimple_call_internal_p (call_stmt)
707 && (!vectorizable_internal_fn_p
708 (gimple_call_internal_fn (call_stmt))))
538dd0b7
DM
709 || gimple_call_tail_p (call_stmt)
710 || gimple_call_noreturn_p (call_stmt)
711 || !gimple_call_nothrow_p (call_stmt)
712 || gimple_call_chain (call_stmt))
190c2236 713 {
73fbfcad 714 if (dump_enabled_p ())
3c2a8ed0
DM
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
716 "Build SLP failed: unsupported call type %G",
717 call_stmt);
6983e6b5
RB
718 /* Fatal mismatch. */
719 matches[0] = false;
190c2236
JJ
720 return false;
721 }
722 }
ebfd146a
IR
723 else
724 rhs_code = gimple_assign_rhs_code (stmt);
725
726 /* Check the operation. */
727 if (i == 0)
728 {
729 first_stmt_code = rhs_code;
730
b8698a0f 731 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
732 vector shift with scalar shift operand. */
733 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
734 || rhs_code == LROTATE_EXPR
735 || rhs_code == RROTATE_EXPR)
736 {
1f3cb663
RS
737 if (vectype == boolean_type_node)
738 {
739 if (dump_enabled_p ())
740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
741 "Build SLP failed: shift of a"
742 " boolean.\n");
743 /* Fatal mismatch. */
744 matches[0] = false;
745 return false;
746 }
747
ebfd146a
IR
748 vec_mode = TYPE_MODE (vectype);
749
750 /* First see if we have a vector/vector shift. */
751 optab = optab_for_tree_code (rhs_code, vectype,
752 optab_vector);
753
754 if (!optab
947131ba 755 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
756 {
757 /* No vector/vector shift, try for a vector/scalar shift. */
758 optab = optab_for_tree_code (rhs_code, vectype,
759 optab_scalar);
760
761 if (!optab)
762 {
73fbfcad 763 if (dump_enabled_p ())
78c60e3d 764 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 765 "Build SLP failed: no optab.\n");
6983e6b5
RB
766 /* Fatal mismatch. */
767 matches[0] = false;
ebfd146a
IR
768 return false;
769 }
947131ba 770 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
771 if (icode == CODE_FOR_nothing)
772 {
73fbfcad 773 if (dump_enabled_p ())
78c60e3d
SS
774 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
775 "Build SLP failed: "
e645e942 776 "op not supported by target.\n");
6983e6b5
RB
777 /* Fatal mismatch. */
778 matches[0] = false;
ebfd146a
IR
779 return false;
780 }
781 optab_op2_mode = insn_data[icode].operand[2].mode;
782 if (!VECTOR_MODE_P (optab_op2_mode))
783 {
784 need_same_oprnds = true;
785 first_op1 = gimple_assign_rhs2 (stmt);
786 }
787 }
788 }
36ba4aae
IR
789 else if (rhs_code == WIDEN_LSHIFT_EXPR)
790 {
791 need_same_oprnds = true;
792 first_op1 = gimple_assign_rhs2 (stmt);
793 }
ebfd146a
IR
794 }
795 else
796 {
6876e5bc
RB
797 if (first_stmt_code != rhs_code
798 && alt_stmt_code == ERROR_MARK)
799 alt_stmt_code = rhs_code;
ebfd146a
IR
800 if (first_stmt_code != rhs_code
801 && (first_stmt_code != IMAGPART_EXPR
802 || rhs_code != REALPART_EXPR)
803 && (first_stmt_code != REALPART_EXPR
69f11a13 804 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
805 /* Handle mismatches in plus/minus by computing both
806 and merging the results. */
807 && !((first_stmt_code == PLUS_EXPR
808 || first_stmt_code == MINUS_EXPR)
809 && (alt_stmt_code == PLUS_EXPR
810 || alt_stmt_code == MINUS_EXPR)
811 && rhs_code == alt_stmt_code)
b9787581 812 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
69f11a13 813 && (first_stmt_code == ARRAY_REF
38000232 814 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
815 || first_stmt_code == INDIRECT_REF
816 || first_stmt_code == COMPONENT_REF
817 || first_stmt_code == MEM_REF)))
ebfd146a 818 {
73fbfcad 819 if (dump_enabled_p ())
ebfd146a 820 {
78c60e3d
SS
821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
822 "Build SLP failed: different operation "
3c2a8ed0 823 "in stmt %G", stmt);
6876e5bc 824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 825 "original stmt %G", first_stmt_info->stmt);
ebfd146a 826 }
6983e6b5
RB
827 /* Mismatch. */
828 continue;
ebfd146a 829 }
b8698a0f
L
830
831 if (need_same_oprnds
ebfd146a
IR
832 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
833 {
73fbfcad 834 if (dump_enabled_p ())
3c2a8ed0
DM
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
836 "Build SLP failed: different shift "
837 "arguments in %G", stmt);
6983e6b5
RB
838 /* Mismatch. */
839 continue;
ebfd146a 840 }
190c2236
JJ
841
842 if (rhs_code == CALL_EXPR)
843 {
b9787581 844 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
5249ee4d 845 as_a <gcall *> (stmt)))
190c2236 846 {
73fbfcad 847 if (dump_enabled_p ())
3c2a8ed0
DM
848 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
849 "Build SLP failed: different calls in %G",
850 stmt);
6983e6b5
RB
851 /* Mismatch. */
852 continue;
190c2236
JJ
853 }
854 }
ebfd146a
IR
855 }
856
0d0293ac 857 /* Grouped store or load. */
b9787581 858 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
859 {
860 if (REFERENCE_CLASS_P (lhs))
861 {
862 /* Store. */
6983e6b5 863 ;
ebfd146a 864 }
b5aeb3bb
IR
865 else
866 {
867 /* Load. */
b9787581 868 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
b5aeb3bb
IR
869 if (prev_first_load)
870 {
871 /* Check that there are no loads from different interleaving
6983e6b5
RB
872 chains in the same node. */
873 if (prev_first_load != first_load)
78c60e3d 874 {
73fbfcad 875 if (dump_enabled_p ())
3c2a8ed0
DM
876 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
877 vect_location,
878 "Build SLP failed: different "
879 "interleaving chains in one node %G",
880 stmt);
6983e6b5
RB
881 /* Mismatch. */
882 continue;
b5aeb3bb
IR
883 }
884 }
885 else
886 prev_first_load = first_load;
ebfd146a 887 }
0d0293ac 888 } /* Grouped access. */
ebfd146a
IR
889 else
890 {
891 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
892 {
0d0293ac 893 /* Not grouped load. */
73fbfcad 894 if (dump_enabled_p ())
3c2a8ed0
DM
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
896 "Build SLP failed: not grouped load %G", stmt);
ebfd146a 897
0d0293ac 898 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
899 /* Fatal mismatch. */
900 matches[0] = false;
ebfd146a
IR
901 return false;
902 }
903
904 /* Not memory operation. */
905 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 906 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 907 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 908 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 909 && rhs_code != CALL_EXPR)
ebfd146a 910 {
73fbfcad 911 if (dump_enabled_p ())
3c2a8ed0
DM
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
913 "Build SLP failed: operation unsupported %G",
914 stmt);
6983e6b5
RB
915 /* Fatal mismatch. */
916 matches[0] = false;
ebfd146a
IR
917 return false;
918 }
919
4cecd659
BC
920 if (rhs_code == COND_EXPR)
921 {
922 tree cond_expr = gimple_assign_rhs1 (stmt);
923 enum tree_code cond_code = TREE_CODE (cond_expr);
924 enum tree_code swap_code = ERROR_MARK;
925 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
926
927 if (i == 0)
928 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
929 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
930 {
931 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
932 swap_code = swap_tree_comparison (cond_code);
933 invert_code = invert_tree_comparison (cond_code, honor_nans);
934 }
935
936 if (first_cond_code == cond_code)
937 ;
938 /* Isomorphic can be achieved by swapping. */
939 else if (first_cond_code == swap_code)
940 swap[i] = 1;
941 /* Isomorphic can be achieved by inverting. */
942 else if (first_cond_code == invert_code)
943 swap[i] = 2;
944 else
945 {
946 if (dump_enabled_p ())
3c2a8ed0
DM
947 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
948 "Build SLP failed: different"
949 " operation %G", stmt);
6983e6b5
RB
950 /* Mismatch. */
951 continue;
f7e531cf 952 }
4cecd659 953 }
ebfd146a 954 }
6983e6b5
RB
955
956 matches[i] = true;
957 }
958
959 for (i = 0; i < group_size; ++i)
960 if (!matches[i])
961 return false;
962
6876e5bc
RB
963 /* If we allowed a two-operation SLP node verify the target can cope
964 with the permute we are going to use. */
965 if (alt_stmt_code != ERROR_MARK
966 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
967 {
1f3cb663
RS
968 if (vectype == boolean_type_node
969 || !vect_two_operations_perm_ok_p (stmts, group_size,
970 vectype, alt_stmt_code))
6876e5bc
RB
971 {
972 for (i = 0; i < group_size; ++i)
b9787581 973 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
6876e5bc
RB
974 {
975 matches[i] = false;
976 if (dump_enabled_p ())
977 {
978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
979 "Build SLP failed: different operation "
3c2a8ed0 980 "in stmt %G", stmts[i]->stmt);
6876e5bc 981 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3c2a8ed0 982 "original stmt %G", first_stmt_info->stmt);
6876e5bc
RB
983 }
984 }
985 return false;
986 }
987 *two_operators = true;
988 }
989
6983e6b5
RB
990 return true;
991}
992
26d66f28
RB
993/* Traits for the hash_set to record failed SLP builds for a stmt set.
994 Note we never remove apart from at destruction time so we do not
995 need a special value for deleted that differs from empty. */
996struct bst_traits
997{
b9787581
RS
998 typedef vec <stmt_vec_info> value_type;
999 typedef vec <stmt_vec_info> compare_type;
26d66f28
RB
1000 static inline hashval_t hash (value_type);
1001 static inline bool equal (value_type existing, value_type candidate);
1002 static inline bool is_empty (value_type x) { return !x.exists (); }
1003 static inline bool is_deleted (value_type x) { return !x.exists (); }
1004 static inline void mark_empty (value_type &x) { x.release (); }
1005 static inline void mark_deleted (value_type &x) { x.release (); }
1006 static inline void remove (value_type &x) { x.release (); }
1007};
1008inline hashval_t
1009bst_traits::hash (value_type x)
1010{
1011 inchash::hash h;
1012 for (unsigned i = 0; i < x.length (); ++i)
b9787581 1013 h.add_int (gimple_uid (x[i]->stmt));
26d66f28
RB
1014 return h.end ();
1015}
1016inline bool
1017bst_traits::equal (value_type existing, value_type candidate)
1018{
1019 if (existing.length () != candidate.length ())
1020 return false;
1021 for (unsigned i = 0; i < existing.length (); ++i)
1022 if (existing[i] != candidate[i])
1023 return false;
1024 return true;
1025}
1026
68435eb2
RB
1027typedef hash_map <vec <gimple *>, slp_tree,
1028 simple_hashmap_traits <bst_traits, slp_tree> >
1029 scalar_stmts_to_slp_tree_map_t;
1030
26d66f28
RB
1031static slp_tree
1032vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1033 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1034 poly_uint64 *max_nunits,
26d66f28
RB
1035 vec<slp_tree> *loads,
1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2
RB
1037 unsigned max_tree_size,
1038 scalar_stmts_to_slp_tree_map_t *bst_map);
6983e6b5 1039
e403d17e 1040static slp_tree
310213d4 1041vect_build_slp_tree (vec_info *vinfo,
b9787581 1042 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1043 poly_uint64 *max_nunits, vec<slp_tree> *loads,
1428105c 1044 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2
RB
1045 unsigned max_tree_size,
1046 scalar_stmts_to_slp_tree_map_t *bst_map)
26d66f28 1047{
a1f072e2 1048 if (slp_tree *leader = bst_map->get (stmts))
26d66f28 1049 {
a1f072e2
RB
1050 if (dump_enabled_p ())
1051 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1052 *leader ? "" : "failed ", *leader);
1053 if (*leader)
1054 (*leader)->refcnt++;
1055 return *leader;
26d66f28 1056 }
a1f072e2
RB
1057 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
1058 loads, matches, npermutes, tree_size,
1059 max_tree_size, bst_map);
1060 /* Keep a reference for the bst_map use. */
1061 if (res)
1062 res->refcnt++;
1063 bst_map->put (stmts.copy (), res);
26d66f28
RB
1064 return res;
1065}
1066
1067/* Recursively build an SLP tree starting from NODE.
1068 Fail (and return a value not equal to zero) if def-stmts are not
1069 isomorphic, require data permutation or are of unsupported types of
1070 operation. Otherwise, return 0.
1071 The value returned is the depth in the SLP tree where a mismatch
1072 was found. */
1073
1074static slp_tree
1075vect_build_slp_tree_2 (vec_info *vinfo,
b9787581 1076 vec<stmt_vec_info> stmts, unsigned int group_size,
4b6068ea 1077 poly_uint64 *max_nunits,
26d66f28
RB
1078 vec<slp_tree> *loads,
1079 bool *matches, unsigned *npermutes, unsigned *tree_size,
a1f072e2
RB
1080 unsigned max_tree_size,
1081 scalar_stmts_to_slp_tree_map_t *bst_map)
6983e6b5 1082{
4b6068ea
RS
1083 unsigned nops, i, this_tree_size = 0;
1084 poly_uint64 this_max_nunits = *max_nunits;
e403d17e 1085 slp_tree node;
6983e6b5 1086
6983e6b5
RB
1087 matches[0] = false;
1088
b9787581
RS
1089 stmt_vec_info stmt_info = stmts[0];
1090 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6983e6b5 1091 nops = gimple_call_num_args (stmt);
b9787581 1092 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
6983e6b5
RB
1093 {
1094 nops = gimple_num_ops (stmt) - 1;
1095 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1096 nops++;
ebfd146a 1097 }
b9787581 1098 else if (is_a <gphi *> (stmt_info->stmt))
e7baeb39 1099 nops = 0;
6983e6b5 1100 else
e403d17e 1101 return NULL;
6983e6b5 1102
c78e3652
RB
1103 /* If the SLP node is a PHI (induction or reduction), terminate
1104 the recursion. */
b9787581 1105 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
e7baeb39 1106 {
b161f2c9
RS
1107 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1108 tree vectype = get_vectype_for_scalar_type (scalar_type);
d7609678 1109 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
b161f2c9
RS
1110 return NULL;
1111
b9787581 1112 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
c78e3652 1113 /* Induction from different IVs is not supported. */
719488f8
RB
1114 if (def_type == vect_induction_def)
1115 {
b9787581
RS
1116 stmt_vec_info other_info;
1117 FOR_EACH_VEC_ELT (stmts, i, other_info)
1118 if (stmt_info != other_info)
719488f8
RB
1119 return NULL;
1120 }
1121 else
1122 {
1123 /* Else def types have to match. */
b9787581
RS
1124 stmt_vec_info other_info;
1125 FOR_EACH_VEC_ELT (stmts, i, other_info)
719488f8
RB
1126 {
1127 /* But for reduction chains only check on the first stmt. */
b9787581
RS
1128 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1129 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
719488f8 1130 continue;
b9787581 1131 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
719488f8
RB
1132 return NULL;
1133 }
1134 }
e7baeb39
RB
1135 node = vect_create_new_slp_node (stmts);
1136 return node;
1137 }
1138
1139
6876e5bc 1140 bool two_operators = false;
4cecd659 1141 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
d7609678 1142 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
e403d17e
RB
1143 &this_max_nunits, matches, &two_operators))
1144 return NULL;
ebfd146a 1145
6983e6b5 1146 /* If the SLP node is a load, terminate the recursion. */
b9787581
RS
1147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1148 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
ebfd146a 1149 {
e403d17e
RB
1150 *max_nunits = this_max_nunits;
1151 node = vect_create_new_slp_node (stmts);
1152 loads->safe_push (node);
1153 return node;
ebfd146a
IR
1154 }
1155
6983e6b5
RB
1156 /* Get at the operands, verifying they are compatible. */
1157 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1158 slp_oprnd_info oprnd_info;
b9787581 1159 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
6983e6b5 1160 {
4cecd659 1161 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
018b2744 1162 stmts, i, &oprnds_info);
4cecd659
BC
1163 if (res != 0)
1164 matches[(res == -1) ? 0 : i] = false;
1165 if (!matches[0])
1166 break;
6983e6b5 1167 }
b0b4483e
RB
1168 for (i = 0; i < group_size; ++i)
1169 if (!matches[i])
1170 {
1171 vect_free_oprnd_info (oprnds_info);
e403d17e 1172 return NULL;
b0b4483e 1173 }
6983e6b5 1174
e403d17e
RB
1175 auto_vec<slp_tree, 4> children;
1176 auto_vec<slp_tree> this_loads;
1177
b9787581 1178 stmt_info = stmts[0];
6983e6b5 1179
26d66f28
RB
1180 if (tree_size)
1181 max_tree_size -= *tree_size;
1182
b8698a0f 1183 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1184 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1185 {
d092494c 1186 slp_tree child;
e403d17e
RB
1187 unsigned old_nloads = this_loads.length ();
1188 unsigned old_tree_size = this_tree_size;
1189 unsigned int j;
b8698a0f 1190
e7baeb39 1191 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1192 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1193 && oprnd_info->first_dt != vect_induction_def)
d092494c 1194 continue;
ebfd146a 1195
1428105c
RB
1196 if (++this_tree_size > max_tree_size)
1197 {
be43a887
RB
1198 if (dump_enabled_p ())
1199 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1200 vect_location,
1201 "Build SLP failed: SLP tree too large\n");
e403d17e 1202 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1203 vect_free_slp_tree (child, false);
1428105c 1204 vect_free_oprnd_info (oprnds_info);
e403d17e 1205 return NULL;
1428105c
RB
1206 }
1207
e403d17e
RB
1208 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1209 group_size, &this_max_nunits,
1210 &this_loads, matches, npermutes,
1211 &this_tree_size,
a1f072e2 1212 max_tree_size, bst_map)) != NULL)
6983e6b5 1213 {
3fc356dc
RB
1214 /* If we have all children of child built up from scalars then just
1215 throw that away and build it up this node from scalars. */
995b6fe0
RB
1216 if (!SLP_TREE_CHILDREN (child).is_empty ()
1217 /* ??? Rejecting patterns this way doesn't work. We'd have to
1218 do extra work to cancel the pattern so the uses see the
1219 scalar version. */
b9787581 1220 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
3fc356dc 1221 {
3fc356dc
RB
1222 slp_tree grandchild;
1223
1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1225 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1226 break;
1227 if (!grandchild)
1228 {
1229 /* Roll back. */
e403d17e
RB
1230 this_loads.truncate (old_nloads);
1231 this_tree_size = old_tree_size;
3fc356dc 1232 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1233 vect_free_slp_tree (grandchild, false);
3fc356dc
RB
1234 SLP_TREE_CHILDREN (child).truncate (0);
1235
1236 dump_printf_loc (MSG_NOTE, vect_location,
1237 "Building parent vector operands from "
1238 "scalars instead\n");
1239 oprnd_info->def_stmts = vNULL;
603cca93 1240 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1241 children.safe_push (child);
3fc356dc
RB
1242 continue;
1243 }
1244 }
1245
6983e6b5 1246 oprnd_info->def_stmts = vNULL;
e403d17e 1247 children.safe_push (child);
6983e6b5
RB
1248 continue;
1249 }
1250
90dd6e3d
RB
1251 /* If the SLP build failed fatally and we analyze a basic-block
1252 simply treat nodes we fail to build as externally defined
1253 (and thus build vectors from the scalar defs).
1254 The cost model will reject outright expensive cases.
1255 ??? This doesn't treat cases where permutation ultimatively
1256 fails (or we don't try permutation below). Ideally we'd
1257 even compute a permutation that will end up with the maximum
1258 SLP tree size... */
310213d4 1259 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1260 && !matches[0]
1261 /* ??? Rejecting patterns this way doesn't work. We'd have to
1262 do extra work to cancel the pattern so the uses see the
1263 scalar version. */
b9787581 1264 && !is_pattern_stmt_p (stmt_info))
90dd6e3d
RB
1265 {
1266 dump_printf_loc (MSG_NOTE, vect_location,
1267 "Building vector operands from scalars\n");
e403d17e 1268 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1269 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1270 children.safe_push (child);
1271 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1272 continue;
1273 }
1274
6983e6b5
RB
1275 /* If the SLP build for operand zero failed and operand zero
1276 and one can be commutated try that for the scalar stmts
1277 that failed the match. */
1278 if (i == 0
1279 /* A first scalar stmt mismatch signals a fatal mismatch. */
1280 && matches[0]
1281 /* ??? For COND_EXPRs we can swap the comparison operands
1282 as well as the arms under some constraints. */
1283 && nops == 2
1284 && oprnds_info[1]->first_dt == vect_internal_def
b9787581 1285 && is_gimple_assign (stmt_info->stmt)
6983e6b5
RB
1286 /* Do so only if the number of not successful permutes was nor more
1287 than a cut-ff as re-trying the recursive match on
1288 possibly each level of the tree would expose exponential
1289 behavior. */
1290 && *npermutes < 4)
1291 {
85c5e2f5
RB
1292 /* See whether we can swap the matching or the non-matching
1293 stmt operands. */
1294 bool swap_not_matching = true;
1295 do
1296 {
1297 for (j = 0; j < group_size; ++j)
1298 {
1299 if (matches[j] != !swap_not_matching)
1300 continue;
b9787581 1301 stmt_vec_info stmt_info = stmts[j];
85c5e2f5 1302 /* Verify if we can swap operands of this stmt. */
b9787581
RS
1303 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1304 if (!stmt
85c5e2f5
RB
1305 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1306 {
1307 if (!swap_not_matching)
1308 goto fail;
1309 swap_not_matching = false;
1310 break;
1311 }
1312 /* Verify if we can safely swap or if we committed to a
1313 specific operand order already.
1314 ??? Instead of modifying GIMPLE stmts here we could
1315 record whether we want to swap operands in the SLP
1316 node and temporarily do that when processing it
1317 (or wrap operand accessors in a helper). */
1318 else if (swap[j] != 0
b9787581 1319 || STMT_VINFO_NUM_SLP_USES (stmt_info))
85c5e2f5
RB
1320 {
1321 if (!swap_not_matching)
1322 {
1323 if (dump_enabled_p ())
3c2a8ed0
DM
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1325 vect_location,
1326 "Build SLP failed: cannot swap "
1327 "operands of shared stmt %G",
1328 stmts[j]->stmt);
85c5e2f5
RB
1329 goto fail;
1330 }
1331 swap_not_matching = false;
1332 break;
1333 }
1334 }
1335 }
1336 while (j != group_size);
78810bd3 1337
6983e6b5 1338 /* Swap mismatched definition stmts. */
b0b4483e
RB
1339 dump_printf_loc (MSG_NOTE, vect_location,
1340 "Re-trying with swapped operands of stmts ");
e72baed7 1341 for (j = 0; j < group_size; ++j)
85c5e2f5 1342 if (matches[j] == !swap_not_matching)
6983e6b5 1343 {
6b4db501
MM
1344 std::swap (oprnds_info[0]->def_stmts[j],
1345 oprnds_info[1]->def_stmts[j]);
b0b4483e 1346 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1347 }
b0b4483e 1348 dump_printf (MSG_NOTE, "\n");
74574669
RB
1349 /* And try again with scratch 'matches' ... */
1350 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1351 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1352 group_size, &this_max_nunits,
1353 &this_loads, tem, npermutes,
1354 &this_tree_size,
a1f072e2 1355 max_tree_size, bst_map)) != NULL)
6983e6b5 1356 {
60f2b864
RB
1357 /* ... so if successful we can apply the operand swapping
1358 to the GIMPLE IL. This is necessary because for example
1359 vect_get_slp_defs uses operand indexes and thus expects
1360 canonical operand order. This is also necessary even
1361 if we end up building the operand from scalars as
1362 we'll continue to process swapped operand two. */
1363 for (j = 0; j < group_size; ++j)
b9787581 1364 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
f47cda24 1365 for (j = 0; j < group_size; ++j)
b9787581
RS
1366 if (matches[j] == !swap_not_matching)
1367 {
1368 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1369 /* Avoid swapping operands twice. */
1370 if (gimple_plf (stmt, GF_PLF_1))
1371 continue;
1372 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1373 gimple_assign_rhs2_ptr (stmt));
1374 gimple_set_plf (stmt, GF_PLF_1, true);
1375 }
f47cda24
RB
1376 /* Verify we swap all duplicates or none. */
1377 if (flag_checking)
1378 for (j = 0; j < group_size; ++j)
b9787581
RS
1379 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1380 == (matches[j] == !swap_not_matching));
60f2b864 1381
85c69b0b
RB
1382 /* If we have all children of child built up from scalars then
1383 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1384 if (!SLP_TREE_CHILDREN (child).is_empty ()
1385 /* ??? Rejecting patterns this way doesn't work. We'd have
1386 to do extra work to cancel the pattern so the uses see the
1387 scalar version. */
b9787581 1388 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
85c69b0b
RB
1389 {
1390 unsigned int j;
1391 slp_tree grandchild;
1392
1393 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1394 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1395 break;
1396 if (!grandchild)
1397 {
1398 /* Roll back. */
e403d17e
RB
1399 this_loads.truncate (old_nloads);
1400 this_tree_size = old_tree_size;
85c69b0b 1401 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
6e2dd807 1402 vect_free_slp_tree (grandchild, false);
85c69b0b
RB
1403 SLP_TREE_CHILDREN (child).truncate (0);
1404
1405 dump_printf_loc (MSG_NOTE, vect_location,
1406 "Building parent vector operands from "
1407 "scalars instead\n");
1408 oprnd_info->def_stmts = vNULL;
603cca93 1409 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1410 children.safe_push (child);
85c69b0b
RB
1411 continue;
1412 }
1413 }
1414
6983e6b5 1415 oprnd_info->def_stmts = vNULL;
e403d17e 1416 children.safe_push (child);
6983e6b5
RB
1417 continue;
1418 }
1419
1420 ++*npermutes;
1421 }
1422
78810bd3 1423fail:
e403d17e
RB
1424 gcc_assert (child == NULL);
1425 FOR_EACH_VEC_ELT (children, j, child)
6e2dd807 1426 vect_free_slp_tree (child, false);
6983e6b5 1427 vect_free_oprnd_info (oprnds_info);
e403d17e 1428 return NULL;
ebfd146a
IR
1429 }
1430
e403d17e
RB
1431 vect_free_oprnd_info (oprnds_info);
1432
1428105c
RB
1433 if (tree_size)
1434 *tree_size += this_tree_size;
e403d17e
RB
1435 *max_nunits = this_max_nunits;
1436 loads->safe_splice (this_loads);
1428105c 1437
e403d17e
RB
1438 node = vect_create_new_slp_node (stmts);
1439 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1440 SLP_TREE_CHILDREN (node).splice (children);
1441 return node;
ebfd146a
IR
1442}
1443
78c60e3d 1444/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1445
1446static void
4f5b9c80 1447vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
a1f072e2 1448 slp_tree node, hash_set<slp_tree> &visited)
ebfd146a
IR
1449{
1450 int i;
b9787581 1451 stmt_vec_info stmt_info;
d755c7ef 1452 slp_tree child;
ebfd146a 1453
a1f072e2
RB
1454 if (visited.add (node))
1455 return;
1456
1457 dump_printf_loc (dump_kind, loc, "node%s %p\n",
603cca93 1458 SLP_TREE_DEF_TYPE (node) != vect_internal_def
a1f072e2 1459 ? " (external)" : "", node);
b9787581 1460 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3c2a8ed0 1461 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
a1f072e2
RB
1462 if (SLP_TREE_CHILDREN (node).is_empty ())
1463 return;
1464 dump_printf_loc (dump_kind, loc, "\tchildren");
9771b263 1465 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2
RB
1466 dump_printf (dump_kind, " %p", (void *)child);
1467 dump_printf (dump_kind, "\n");
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1469 vect_print_slp_tree (dump_kind, loc, child, visited);
ebfd146a
IR
1470}
1471
a1f072e2
RB
1472static void
1473vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1474 slp_tree node)
1475{
1476 hash_set<slp_tree> visited;
1477 vect_print_slp_tree (dump_kind, loc, node, visited);
1478}
ebfd146a 1479
b8698a0f
L
1480/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1481 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1482 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1483 stmts in NODE are to be marked. */
1484
1485static void
1486vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1487{
1488 int i;
b9787581 1489 stmt_vec_info stmt_info;
d755c7ef 1490 slp_tree child;
ebfd146a 1491
603cca93 1492 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1493 return;
1494
b9787581 1495 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
ebfd146a 1496 if (j < 0 || i == j)
b9787581 1497 STMT_SLP_TYPE (stmt_info) = mark;
ebfd146a 1498
9771b263 1499 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1500 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1501}
1502
1503
a70d6342
IR
1504/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1505
1506static void
1507vect_mark_slp_stmts_relevant (slp_tree node)
1508{
1509 int i;
a70d6342 1510 stmt_vec_info stmt_info;
d755c7ef 1511 slp_tree child;
a70d6342 1512
603cca93 1513 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1514 return;
1515
b9787581 1516 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
a70d6342 1517 {
b8698a0f 1518 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1519 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1520 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1521 }
1522
9771b263 1523 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1524 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1525}
1526
1527
b5aeb3bb
IR
1528/* Rearrange the statements of NODE according to PERMUTATION. */
1529
1530static void
1531vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
a1f072e2
RB
1532 vec<unsigned> permutation,
1533 hash_set<slp_tree> &visited)
b5aeb3bb 1534{
b9787581
RS
1535 stmt_vec_info stmt_info;
1536 vec<stmt_vec_info> tmp_stmts;
d755c7ef
RB
1537 unsigned int i;
1538 slp_tree child;
b5aeb3bb 1539
a1f072e2
RB
1540 if (visited.add (node))
1541 return;
1542
9771b263 1543 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a1f072e2 1544 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
b5aeb3bb 1545
9771b263
DN
1546 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1547 tmp_stmts.create (group_size);
d755c7ef 1548 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1549
b9787581
RS
1550 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1551 tmp_stmts[permutation[i]] = stmt_info;
b5aeb3bb 1552
9771b263 1553 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1554 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1555}
1556
1557
b266b968
RB
1558/* Attempt to reorder stmts in a reduction chain so that we don't
1559 require any load permutation. Return true if that was possible,
1560 otherwise return false. */
1561
1562static bool
1563vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1564{
1565 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1566 unsigned int i, j;
b266b968
RB
1567 unsigned int lidx;
1568 slp_tree node, load;
1569
1570 /* Compare all the permutation sequences to the first one. We know
1571 that at least one load is permuted. */
1572 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1573 if (!node->load_permutation.exists ())
1574 return false;
1575 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1576 {
1577 if (!load->load_permutation.exists ())
1578 return false;
1579 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1580 if (lidx != node->load_permutation[j])
1581 return false;
1582 }
1583
1584 /* Check that the loads in the first sequence are different and there
1585 are no gaps between them. */
7ba9e72d 1586 auto_sbitmap load_index (group_size);
b266b968
RB
1587 bitmap_clear (load_index);
1588 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1589 {
41eefe13 1590 if (lidx >= group_size)
7ba9e72d 1591 return false;
b266b968 1592 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1593 return false;
1594
b266b968
RB
1595 bitmap_set_bit (load_index, lidx);
1596 }
1597 for (i = 0; i < group_size; i++)
1598 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1599 return false;
b266b968
RB
1600
1601 /* This permutation is valid for reduction. Since the order of the
1602 statements in the nodes is not important unless they are memory
1603 accesses, we can rearrange the statements in all the nodes
1604 according to the order of the loads. */
a1f072e2 1605 hash_set<slp_tree> visited;
b266b968 1606 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
a1f072e2 1607 node->load_permutation, visited);
b266b968
RB
1608
1609 /* We are done, no actual permutations need to be generated. */
d9f21f6a 1610 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1611 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4 1612 {
b9787581 1613 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1614 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
c4e360f4
RB
1615 /* But we have to keep those permutations that are required because
1616 of handling of gaps. */
d9f21f6a 1617 if (known_eq (unrolling_factor, 1U)
b9787581
RS
1618 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1619 && DR_GROUP_GAP (first_stmt_info) == 0))
c4e360f4 1620 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1621 else
1622 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1623 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1624 }
1625
b266b968
RB
1626 return true;
1627}
1628
01d8bf07
RB
1629/* Check if the required load permutations in the SLP instance
1630 SLP_INSTN are supported. */
ebfd146a
IR
1631
1632static bool
01d8bf07 1633vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1634{
01d8bf07
RB
1635 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1636 unsigned int i, j, k, next;
6983e6b5 1637 slp_tree node;
ebfd146a 1638
73fbfcad 1639 if (dump_enabled_p ())
ebfd146a 1640 {
78c60e3d 1641 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1642 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1643 if (node->load_permutation.exists ())
1644 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1645 dump_printf (MSG_NOTE, "%d ", next);
1646 else
bddc974e
TJ
1647 for (k = 0; k < group_size; ++k)
1648 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1649 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1650 }
1651
b5aeb3bb
IR
1652 /* In case of reduction every load permutation is allowed, since the order
1653 of the reduction statements is not important (as opposed to the case of
0d0293ac 1654 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1655 load nodes are of the same size and have the same permutation (and then
1656 rearrange all the nodes of the SLP instance according to this
1657 permutation). */
1658
1659 /* Check that all the load nodes are of the same size. */
01d8bf07 1660 /* ??? Can't we assert this? */
9771b263 1661 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1662 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1663 return false;
2200fc49 1664
b5aeb3bb 1665 node = SLP_INSTANCE_TREE (slp_instn);
b9787581 1666 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1667
b010117a 1668 /* Reduction (there are no data-refs in the root).
b266b968 1669 In reduction chain the order of the loads is not important. */
b9787581
RS
1670 if (!STMT_VINFO_DATA_REF (stmt_info)
1671 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
c4e360f4 1672 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1673
6aa904c4
IR
1674 /* In basic block vectorization we allow any subchain of an interleaving
1675 chain.
1676 FORNOW: not supported in loop SLP because of realignment compications. */
b9787581 1677 if (STMT_VINFO_BB_VINFO (stmt_info))
6aa904c4 1678 {
240a94da
RB
1679 /* Check whether the loads in an instance form a subchain and thus
1680 no permutation is necessary. */
9771b263 1681 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1682 {
9626d143
RB
1683 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1684 continue;
240a94da 1685 bool subchain_p = true;
bffb8014 1686 stmt_vec_info next_load_info = NULL;
b9787581
RS
1687 stmt_vec_info load_info;
1688 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1689 {
1690 if (j != 0
bffb8014 1691 && (next_load_info != load_info
b9787581 1692 || DR_GROUP_GAP (load_info) != 1))
240a94da
RB
1693 {
1694 subchain_p = false;
1695 break;
1696 }
bffb8014 1697 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
b9787581 1698 }
240a94da
RB
1699 if (subchain_p)
1700 SLP_TREE_LOAD_PERMUTATION (node).release ();
1701 else
1702 {
b9787581 1703 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
bffb8014 1704 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
928686b1 1705 unsigned HOST_WIDE_INT nunits;
fe73a332
RB
1706 unsigned k, maxk = 0;
1707 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1708 if (k > maxk)
1709 maxk = k;
1710 /* In BB vectorization we may not actually use a loaded vector
2c53b149 1711 accessing elements in excess of DR_GROUP_SIZE. */
928686b1
RS
1712 tree vectype = STMT_VINFO_VECTYPE (group_info);
1713 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
2c53b149 1714 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
fe73a332
RB
1715 {
1716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1717 "BB vectorization with gaps at the end of "
1718 "a load is not supported\n");
1719 return false;
1720 }
1721
240a94da
RB
1722 /* Verify the permutation can be generated. */
1723 vec<tree> tem;
29afecdf 1724 unsigned n_perms;
240a94da 1725 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1726 1, slp_instn, true, &n_perms))
240a94da
RB
1727 {
1728 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1729 vect_location,
1730 "unsupported load permutation\n");
1731 return false;
1732 }
1733 }
6aa904c4 1734 }
01d8bf07 1735 return true;
6aa904c4
IR
1736 }
1737
31bee964
RB
1738 /* For loop vectorization verify we can generate the permutation. Be
1739 conservative about the vectorization factor, there are permutations
1740 that will use three vector inputs only starting from a specific factor
1741 and the vectorization factor is not yet final.
1742 ??? The SLP instance unrolling factor might not be the maximum one. */
29afecdf 1743 unsigned n_perms;
d9f21f6a
RS
1744 poly_uint64 test_vf
1745 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
31bee964 1746 LOOP_VINFO_VECT_FACTOR
b9787581 1747 (STMT_VINFO_LOOP_VINFO (stmt_info)));
01d8bf07
RB
1748 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1749 if (node->load_permutation.exists ()
31bee964
RB
1750 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1751 slp_instn, true, &n_perms))
01d8bf07 1752 return false;
9b999e8c 1753
01d8bf07 1754 return true;
ebfd146a
IR
1755}
1756
1757
e4a707c4 1758/* Find the last store in SLP INSTANCE. */
ff802fa1 1759
95c68311 1760stmt_vec_info
2e8ab70c 1761vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1762{
95c68311 1763 stmt_vec_info last = NULL;
b9787581 1764 stmt_vec_info stmt_vinfo;
e4a707c4 1765
b9787581 1766 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2e8ab70c 1767 {
211cd1e2 1768 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
95c68311 1769 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2e8ab70c 1770 }
e4a707c4 1771
2e8ab70c 1772 return last;
e4a707c4
IR
1773}
1774
82570274
RS
1775/* Splits a group of stores, currently beginning at FIRST_VINFO, into
1776 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1777 (also containing the first GROUP1_SIZE stmts, since stores are
1778 consecutive), the second containing the remainder.
1ba91a49
AL
1779 Return the first stmt in the second group. */
1780
82570274
RS
1781static stmt_vec_info
1782vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1ba91a49 1783{
bffb8014 1784 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1ba91a49 1785 gcc_assert (group1_size > 0);
2c53b149 1786 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1ba91a49 1787 gcc_assert (group2_size > 0);
2c53b149 1788 DR_GROUP_SIZE (first_vinfo) = group1_size;
1ba91a49 1789
bffb8014 1790 stmt_vec_info stmt_info = first_vinfo;
1ba91a49
AL
1791 for (unsigned i = group1_size; i > 1; i--)
1792 {
bffb8014
RS
1793 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1794 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1795 }
1796 /* STMT is now the last element of the first group. */
bffb8014
RS
1797 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1798 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1ba91a49 1799
bffb8014
RS
1800 DR_GROUP_SIZE (group2) = group2_size;
1801 for (stmt_info = group2; stmt_info;
1802 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1ba91a49 1803 {
bffb8014
RS
1804 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1805 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1ba91a49
AL
1806 }
1807
2c53b149 1808 /* For the second group, the DR_GROUP_GAP is that before the original group,
1ba91a49 1809 plus skipping over the first vector. */
bffb8014 1810 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1ba91a49 1811
2c53b149
RB
1812 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1813 DR_GROUP_GAP (first_vinfo) += group2_size;
1ba91a49
AL
1814
1815 if (dump_enabled_p ())
1816 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1817 group1_size, group2_size);
1818
1819 return group2;
1820}
1821
4b6068ea
RS
1822/* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1823 statements and a vector of NUNITS elements. */
1824
1825static poly_uint64
1826calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1827{
1828 return exact_div (common_multiple (nunits, group_size), group_size);
1829}
1830
0d0293ac 1831/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1832 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1833 Return FALSE if it's impossible to SLP any stmt in the loop. */
1834
1835static bool
310213d4 1836vect_analyze_slp_instance (vec_info *vinfo,
32e8e429 1837 stmt_vec_info stmt_info, unsigned max_tree_size)
ebfd146a
IR
1838{
1839 slp_instance new_instance;
d092494c 1840 slp_tree node;
2c53b149 1841 unsigned int group_size;
b5aeb3bb 1842 tree vectype, scalar_type = NULL_TREE;
1ba91a49 1843 unsigned int i;
9771b263 1844 vec<slp_tree> loads;
b9787581
RS
1845 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1846 vec<stmt_vec_info> scalar_stmts;
b5aeb3bb 1847
b9787581 1848 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
b5aeb3bb 1849 {
2c53b149
RB
1850 scalar_type = TREE_TYPE (DR_REF (dr));
1851 vectype = get_vectype_for_scalar_type (scalar_type);
b9787581 1852 group_size = DR_GROUP_SIZE (stmt_info);
2c53b149 1853 }
b9787581 1854 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1855 {
1856 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581
RS
1857 vectype = STMT_VINFO_VECTYPE (stmt_info);
1858 group_size = REDUC_GROUP_SIZE (stmt_info);
b5aeb3bb
IR
1859 }
1860 else
1861 {
310213d4 1862 gcc_assert (is_a <loop_vec_info> (vinfo));
b9787581 1863 vectype = STMT_VINFO_VECTYPE (stmt_info);
310213d4 1864 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1865 }
b8698a0f 1866
ebfd146a
IR
1867 if (!vectype)
1868 {
73fbfcad 1869 if (dump_enabled_p ())
3c2a8ed0
DM
1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1871 "Build SLP failed: unsupported data-type %T\n",
1872 scalar_type);
b5aeb3bb 1873
ebfd146a
IR
1874 return false;
1875 }
4b6068ea 1876 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1877
0d0293ac 1878 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1879 scalar_stmts.create (group_size);
bffb8014 1880 stmt_vec_info next_info = stmt_info;
b9787581 1881 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a 1882 {
b5aeb3bb 1883 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
bffb8014 1884 while (next_info)
b5aeb3bb 1885 {
6e6b18e5 1886 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 1887 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2c53b149
RB
1888 }
1889 }
b9787581 1890 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2c53b149
RB
1891 {
1892 /* Collect the reduction stmts and store them in
1893 SLP_TREE_SCALAR_STMTS. */
bffb8014 1894 while (next_info)
2c53b149 1895 {
6e6b18e5 1896 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
bffb8014 1897 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
b5aeb3bb 1898 }
14a61437
RB
1899 /* Mark the first element of the reduction chain as reduction to properly
1900 transform the node. In the reduction analysis phase only the last
1901 element of the chain is marked as reduction. */
b9787581 1902 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
b5aeb3bb
IR
1903 }
1904 else
1905 {
1906 /* Collect reduction statements. */
32c91dfc
RS
1907 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
1908 for (i = 0; reductions.iterate (i, &next_info); i++)
1909 scalar_stmts.safe_push (next_info);
ebfd146a
IR
1910 }
1911
9771b263 1912 loads.create (group_size);
ebfd146a
IR
1913
1914 /* Build the tree for the SLP instance. */
89d390e5
RB
1915 bool *matches = XALLOCAVEC (bool, group_size);
1916 unsigned npermutes = 0;
a1f072e2
RB
1917 scalar_stmts_to_slp_tree_map_t *bst_map
1918 = new scalar_stmts_to_slp_tree_map_t ();
4b6068ea 1919 poly_uint64 max_nunits = nunits;
e569db5f 1920 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
4b6068ea 1921 &max_nunits, &loads, matches, &npermutes,
a1f072e2
RB
1922 NULL, max_tree_size, bst_map);
1923 /* The map keeps a reference on SLP nodes built, release that. */
1924 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
1925 it != bst_map->end (); ++it)
1926 if ((*it).second)
1927 vect_free_slp_tree ((*it).second, false);
1928 delete bst_map;
e569db5f 1929 if (node != NULL)
ebfd146a 1930 {
4ef69dfc 1931 /* Calculate the unrolling factor based on the smallest type. */
d9f21f6a 1932 poly_uint64 unrolling_factor
4b6068ea 1933 = calculate_unrolling_factor (max_nunits, group_size);
b8698a0f 1934
d9f21f6a 1935 if (maybe_ne (unrolling_factor, 1U)
e569db5f
VK
1936 && is_a <bb_vec_info> (vinfo))
1937 {
4b6068ea
RS
1938 unsigned HOST_WIDE_INT const_max_nunits;
1939 if (!max_nunits.is_constant (&const_max_nunits)
1940 || const_max_nunits > group_size)
1941 {
1942 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1944 "Build SLP failed: store group "
1945 "size not a multiple of the vector size "
1946 "in basic block SLP\n");
6e2dd807 1947 vect_free_slp_tree (node, false);
4b6068ea
RS
1948 loads.release ();
1949 return false;
1950 }
e569db5f 1951 /* Fatal mismatch. */
4b6068ea 1952 matches[group_size / const_max_nunits * const_max_nunits] = false;
6e2dd807 1953 vect_free_slp_tree (node, false);
e569db5f
VK
1954 loads.release ();
1955 }
1956 else
1957 {
a1f072e2
RB
1958 /* Create a new SLP instance. */
1959 new_instance = XNEW (struct _slp_instance);
1960 SLP_INSTANCE_TREE (new_instance) = node;
1961 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1962 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1963 SLP_INSTANCE_LOADS (new_instance) = loads;
1964
1965 /* Compute the load permutation. */
1966 slp_tree load_node;
1967 bool loads_permuted = false;
1968 FOR_EACH_VEC_ELT (loads, i, load_node)
abf9bfbc 1969 {
a1f072e2
RB
1970 vec<unsigned> load_permutation;
1971 int j;
1972 stmt_vec_info load_info;
1973 bool this_load_permuted = false;
1974 load_permutation.create (group_size);
1975 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
1976 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
1977 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
1978 {
1979 int load_place = vect_get_place_in_interleaving_chain
1980 (load_info, first_stmt_info);
1981 gcc_assert (load_place != -1);
1982 if (load_place != j)
1983 this_load_permuted = true;
1984 load_permutation.safe_push (load_place);
1985 }
1986 if (!this_load_permuted
1987 /* The load requires permutation when unrolling exposes
1988 a gap either because the group is larger than the SLP
1989 group-size or because there is a gap between the groups. */
1990 && (known_eq (unrolling_factor, 1U)
1991 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1992 && DR_GROUP_GAP (first_stmt_info) == 0)))
1993 {
1994 load_permutation.release ();
1995 continue;
1996 }
1997 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1998 loads_permuted = true;
abf9bfbc 1999 }
a1f072e2
RB
2000
2001 if (loads_permuted)
01d8bf07 2002 {
a1f072e2
RB
2003 if (!vect_supported_load_permutation_p (new_instance))
2004 {
2005 if (dump_enabled_p ())
2006 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2007 "Build SLP failed: unsupported load "
2008 "permutation %G", stmt_info->stmt);
2009 vect_free_slp_instance (new_instance, false);
2010 return false;
2011 }
01d8bf07 2012 }
ebfd146a 2013
e569db5f 2014 /* If the loads and stores can be handled with load/store-lan
a1f072e2
RB
2015 instructions do not generate this SLP instance. */
2016 if (is_a <loop_vec_info> (vinfo)
2017 && loads_permuted
2018 && dr && vect_store_lanes_supported (vectype, group_size, false))
bb0f5ca7 2019 {
a1f072e2
RB
2020 slp_tree load_node;
2021 FOR_EACH_VEC_ELT (loads, i, load_node)
2022 {
2023 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2024 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2025 /* Use SLP for strided accesses (or if we can't load-lanes). */
2026 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2027 || ! vect_load_lanes_supported
2028 (STMT_VINFO_VECTYPE (stmt_vinfo),
2029 DR_GROUP_SIZE (stmt_vinfo), false))
2030 break;
2031 }
2032 if (i == loads.length ())
2033 {
2034 if (dump_enabled_p ())
2035 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2036 "Built SLP cancelled: can use "
2037 "load/store-lanes\n");
2038 vect_free_slp_instance (new_instance, false);
2039 return false;
2040 }
bb0f5ca7 2041 }
a1f072e2
RB
2042
2043 vinfo->slp_instances.safe_push (new_instance);
2044
2045 if (dump_enabled_p ())
bb0f5ca7 2046 {
a1f072e2
RB
2047 dump_printf_loc (MSG_NOTE, vect_location,
2048 "Final SLP tree for instance:\n");
2049 vect_print_slp_tree (MSG_NOTE, vect_location, node);
bb0f5ca7 2050 }
bb0f5ca7 2051
a1f072e2 2052 return true;
c2a12ca0 2053 }
e569db5f
VK
2054 }
2055 else
2056 {
a1f072e2
RB
2057 /* Failed to SLP. */
2058 /* Free the allocated memory. */
2059 scalar_stmts.release ();
2060 loads.release ();
e569db5f 2061 }
b8698a0f 2062
1ba91a49 2063 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2064 vector size. */
4b6068ea 2065 unsigned HOST_WIDE_INT const_nunits;
1ba91a49 2066 if (is_a <bb_vec_info> (vinfo)
91987857
RS
2067 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2068 && DR_GROUP_FIRST_ELEMENT (stmt_info)
4b6068ea 2069 && nunits.is_constant (&const_nunits))
1ba91a49
AL
2070 {
2071 /* We consider breaking the group only on VF boundaries from the existing
2072 start. */
2073 for (i = 0; i < group_size; i++)
2074 if (!matches[i]) break;
2075
4b6068ea 2076 if (i >= const_nunits && i < group_size)
1ba91a49
AL
2077 {
2078 /* Split into two groups at the first vector boundary before i. */
4b6068ea
RS
2079 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2080 unsigned group1_size = i & ~(const_nunits - 1);
1ba91a49 2081
82570274
RS
2082 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2083 group1_size);
86a91c0a
RS
2084 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2085 max_tree_size);
1ba91a49
AL
2086 /* If the first non-match was in the middle of a vector,
2087 skip the rest of that vector. */
2088 if (group1_size < i)
2089 {
4b6068ea 2090 i = group1_size + const_nunits;
1ba91a49 2091 if (i < group_size)
4b6068ea 2092 rest = vect_split_slp_store_group (rest, const_nunits);
1ba91a49
AL
2093 }
2094 if (i < group_size)
2095 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2096 return res;
2097 }
2098 /* Even though the first vector did not all match, we might be able to SLP
2099 (some) of the remainder. FORNOW ignore this possibility. */
2100 }
2101
a70d6342 2102 return false;
ebfd146a
IR
2103}
2104
2105
ff802fa1 2106/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2107 trees of packed scalar stmts if SLP is possible. */
2108
f4ebbd24 2109opt_result
310213d4 2110vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2111{
2112 unsigned int i;
f698fccf 2113 stmt_vec_info first_element;
ebfd146a 2114
adac3a68 2115 DUMP_VECT_SCOPE ("vect_analyze_slp");
ebfd146a 2116
0d0293ac 2117 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2118 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2119 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2120
310213d4 2121 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2122 {
310213d4
RB
2123 if (loop_vinfo->reduction_chains.length () > 0)
2124 {
2125 /* Find SLP sequences starting from reduction chains. */
2126 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2127 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2128 max_tree_size))
6b5e165b
RB
2129 {
2130 /* Dissolve reduction chain group. */
f698fccf
RS
2131 stmt_vec_info vinfo = first_element;
2132 while (vinfo)
6b5e165b 2133 {
bffb8014 2134 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2c53b149
RB
2135 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2136 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
f698fccf 2137 vinfo = next;
6b5e165b 2138 }
f698fccf 2139 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
6b5e165b 2140 }
310213d4 2141 }
b010117a 2142
310213d4 2143 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2144 if (loop_vinfo->reductions.length () > 1)
2145 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2146 max_tree_size);
310213d4 2147 }
b5aeb3bb 2148
f4ebbd24 2149 return opt_result::success ();
ebfd146a
IR
2150}
2151
2152
2153/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2154 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2155 least one instance. */
ebfd146a 2156
437f4a00 2157bool
ebfd146a
IR
2158vect_make_slp_decision (loop_vec_info loop_vinfo)
2159{
d9f21f6a
RS
2160 unsigned int i;
2161 poly_uint64 unrolling_factor = 1;
9771b263 2162 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2163 slp_instance instance;
2164 int decided_to_slp = 0;
2165
adac3a68 2166 DUMP_VECT_SCOPE ("vect_make_slp_decision");
ebfd146a 2167
9771b263 2168 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2169 {
2170 /* FORNOW: SLP if you can. */
d9f21f6a
RS
2171 /* All unroll factors have the form current_vector_size * X for some
2172 rational X, so they must have a common multiple. */
2173 unrolling_factor
2174 = force_common_multiple (unrolling_factor,
2175 SLP_INSTANCE_UNROLLING_FACTOR (instance));
ebfd146a 2176
ff802fa1 2177 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2178 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2179 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2180 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2181 decided_to_slp++;
2182 }
2183
2184 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2185
73fbfcad 2186 if (decided_to_slp && dump_enabled_p ())
d9f21f6a
RS
2187 {
2188 dump_printf_loc (MSG_NOTE, vect_location,
2189 "Decided to SLP %d instances. Unrolling factor ",
2190 decided_to_slp);
2191 dump_dec (MSG_NOTE, unrolling_factor);
2192 dump_printf (MSG_NOTE, "\n");
2193 }
437f4a00
IR
2194
2195 return (decided_to_slp > 0);
ebfd146a
IR
2196}
2197
2198
2199/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2200 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2201
2202static void
642fce57 2203vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2204{
b9787581 2205 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2206 imm_use_iterator imm_iter;
355fe088 2207 gimple *use_stmt;
b9787581 2208 stmt_vec_info use_vinfo;
d755c7ef 2209 slp_tree child;
f2c74cc4 2210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2211 int j;
2212
2213 /* Propagate hybrid down the SLP tree. */
2214 if (stype == hybrid)
2215 ;
2216 else if (HYBRID_SLP_STMT (stmt_vinfo))
2217 stype = hybrid;
2218 else
2219 {
2220 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2221 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2222 /* If we get a pattern stmt here we have to use the LHS of the
2223 original stmt for immediate uses. */
211cd1e2 2224 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
e7baeb39
RB
2225 tree def;
2226 if (gimple_code (stmt) == GIMPLE_PHI)
2227 def = gimple_phi_result (stmt);
2228 else
2229 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2230 if (def)
2231 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870 2232 {
6585ff8f
RS
2233 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2234 if (!use_vinfo)
29764870 2235 continue;
6e6b18e5 2236 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
29764870
RB
2237 if (!STMT_SLP_TYPE (use_vinfo)
2238 && (STMT_VINFO_RELEVANT (use_vinfo)
2239 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2240 && !(gimple_code (use_stmt) == GIMPLE_PHI
2241 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2242 {
2243 if (dump_enabled_p ())
3c2a8ed0
DM
2244 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2245 "def in non-SLP stmt: %G", use_stmt);
502f0263
RB
2246 stype = hybrid;
2247 }
29764870 2248 }
642fce57 2249 }
ebfd146a 2250
502f0263
RB
2251 if (stype == hybrid
2252 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2253 {
2254 if (dump_enabled_p ())
3c2a8ed0
DM
2255 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2256 stmt_vinfo->stmt);
b1af7da6
RB
2257 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2258 }
ebfd146a 2259
642fce57 2260 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2261 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2262 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2263}
f2c74cc4 2264
642fce57 2265/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2266
642fce57
RB
2267static tree
2268vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2269{
2270 walk_stmt_info *wi = (walk_stmt_info *)data;
6585ff8f 2271 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
642fce57
RB
2272
2273 if (wi->is_lhs)
2274 return NULL_TREE;
2275
c98d0595
RS
2276 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2277 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
642fce57 2278 {
6585ff8f 2279 if (dump_enabled_p ())
3c2a8ed0
DM
2280 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2281 def_stmt_info->stmt);
6585ff8f 2282 STMT_SLP_TYPE (def_stmt_info) = hybrid;
642fce57
RB
2283 }
2284
2285 return NULL_TREE;
ebfd146a
IR
2286}
2287
642fce57
RB
2288static tree
2289vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
6585ff8f 2290 walk_stmt_info *wi)
642fce57 2291{
6585ff8f
RS
2292 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2293 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
642fce57
RB
2294 /* If the stmt is in a SLP instance then this isn't a reason
2295 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2296 if (! STMT_SLP_TYPE (use_vinfo)
2297 && (STMT_VINFO_RELEVANT (use_vinfo)
2298 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2299 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2300 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2301 ;
2302 else
642fce57
RB
2303 *handled = true;
2304 return NULL_TREE;
2305}
ebfd146a
IR
2306
2307/* Find stmts that must be both vectorized and SLPed. */
2308
2309void
2310vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2311{
2312 unsigned int i;
9771b263 2313 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2314 slp_instance instance;
2315
adac3a68 2316 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
ebfd146a 2317
642fce57
RB
2318 /* First walk all pattern stmt in the loop and mark defs of uses as
2319 hybrid because immediate uses in them are not recorded. */
2320 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2321 {
2322 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2323 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2324 gsi_next (&gsi))
2325 {
355fe088 2326 gimple *stmt = gsi_stmt (gsi);
6585ff8f 2327 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
642fce57
RB
2328 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2329 {
2330 walk_stmt_info wi;
2331 memset (&wi, 0, sizeof (wi));
6585ff8f 2332 wi.info = loop_vinfo;
642fce57 2333 gimple_stmt_iterator gsi2
10681ce8 2334 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
642fce57
RB
2335 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2336 vect_detect_hybrid_slp_1, &wi);
2337 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2338 vect_detect_hybrid_slp_2,
2339 vect_detect_hybrid_slp_1, &wi);
2340 }
2341 }
2342 }
2343
2344 /* Then walk the SLP instance trees marking stmts with uses in
2345 non-SLP stmts as hybrid, also propagating hybrid down the
2346 SLP tree, collecting the above info on-the-fly. */
9771b263 2347 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2348 {
2349 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2350 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2351 i, pure_slp);
2352 }
ebfd146a
IR
2353}
2354
a70d6342 2355
2c515559
RS
2356/* Initialize a bb_vec_info struct for the statements between
2357 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2358
2359_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
ca823c85
RB
2360 gimple_stmt_iterator region_end_in,
2361 vec_info_shared *shared)
2362 : vec_info (vec_info::bb, init_cost (NULL), shared),
2c515559
RS
2363 bb (gsi_bb (region_begin_in)),
2364 region_begin (region_begin_in),
2365 region_end (region_end_in)
a70d6342 2366{
a70d6342
IR
2367 gimple_stmt_iterator gsi;
2368
61d371eb
RB
2369 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2370 gsi_next (&gsi))
a70d6342 2371 {
355fe088 2372 gimple *stmt = gsi_stmt (gsi);
a70d6342 2373 gimple_set_uid (stmt, 0);
4fbeb363 2374 add_stmt (stmt);
a70d6342
IR
2375 }
2376
2c515559 2377 bb->aux = this;
a70d6342
IR
2378}
2379
2380
2381/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2382 stmts in the basic block. */
2383
2c515559 2384_bb_vec_info::~_bb_vec_info ()
a70d6342 2385{
2c515559
RS
2386 for (gimple_stmt_iterator si = region_begin;
2387 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
458135c0
RS
2388 /* Reset region marker. */
2389 gimple_set_uid (gsi_stmt (si), -1);
a70d6342 2390
2c515559 2391 bb->aux = NULL;
a70d6342
IR
2392}
2393
15944069
RS
2394/* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2395 given then that child nodes have already been processed, and that
2396 their def types currently match their SLP node's def type. */
a70d6342
IR
2397
2398static bool
15944069
RS
2399vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2400 slp_instance node_instance,
2401 stmt_vector_for_cost *cost_vec)
a70d6342 2402{
b9787581 2403 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
bd2f172f
RB
2404 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2405
2406 /* For BB vectorization vector types are assigned here.
2407 Memory accesses already got their vector type assigned
2408 in vect_analyze_data_refs. */
2409 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2410 if (bb_vinfo
2411 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2412 {
1f3cb663
RS
2413 tree vectype, nunits_vectype;
2414 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2415 &nunits_vectype))
2416 /* We checked this when building the node. */
2417 gcc_unreachable ();
2418 if (vectype == boolean_type_node)
bd2f172f 2419 {
1f3cb663
RS
2420 vectype = vect_get_mask_type_for_stmt (stmt_info);
2421 if (!vectype)
2422 /* vect_get_mask_type_for_stmt has already explained the
2423 failure. */
2424 return false;
bd2f172f
RB
2425 }
2426
b9787581 2427 stmt_vec_info sstmt_info;
15944069 2428 unsigned int i;
b9787581
RS
2429 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2430 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
a70d6342
IR
2431 }
2432
8b7e9dba
RS
2433 /* Calculate the number of vector statements to be created for the
2434 scalar stmts in this node. For SLP reductions it is equal to the
2435 number of vector statements in the children (which has already been
2436 calculated by the recursive call). Otherwise it is the number of
2c53b149 2437 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
8b7e9dba 2438 VF divided by the number of elements in a vector. */
2c53b149
RB
2439 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2440 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
8b7e9dba
RS
2441 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2442 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2443 else
2444 {
d9f21f6a 2445 poly_uint64 vf;
8b7e9dba
RS
2446 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2447 vf = loop_vinfo->vectorization_factor;
2448 else
2449 vf = 1;
2450 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2451 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2452 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
d9f21f6a 2453 = vect_get_num_vectors (vf * group_size, vectype);
8b7e9dba
RS
2454 }
2455
15944069 2456 bool dummy;
86a91c0a 2457 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
15944069
RS
2458}
2459
2460/* Analyze statements contained in SLP tree NODE after recursively analyzing
2461 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2462
2463 Return true if the operations are supported. */
2464
2465static bool
2466vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2467 slp_instance node_instance,
2468 scalar_stmts_to_slp_tree_map_t *visited,
2469 scalar_stmts_to_slp_tree_map_t *lvisited,
2470 stmt_vector_for_cost *cost_vec)
2471{
2472 int i, j;
2473 slp_tree child;
2474
2475 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2476 return true;
2477
2478 /* If we already analyzed the exact same set of scalar stmts we're done.
2479 We share the generated vector stmts for those. */
2480 slp_tree *leader;
2481 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2482 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2483 {
2484 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2485 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2486 return true;
2487 }
2488
2489 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2490 doesn't result in any issue since we throw away the lvisited set
2491 when we fail. */
2492 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2493
2494 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2495 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2496 visited, lvisited, cost_vec))
2497 return false;
2498
bd2f172f
RB
2499 /* Push SLP node def-type to stmt operands. */
2500 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2501 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2502 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f 2503 = SLP_TREE_DEF_TYPE (child);
15944069
RS
2504 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2505 cost_vec);
bd2f172f
RB
2506 /* Restore def-types. */
2507 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2508 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581 2509 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
bd2f172f
RB
2510 = vect_internal_def;
2511 if (! res)
2512 return false;
2513
2514 return true;
a70d6342
IR
2515}
2516
2517
8b7e9dba 2518/* Analyze statements in SLP instances of VINFO. Return true if the
a70d6342
IR
2519 operations are supported. */
2520
a12e42fc 2521bool
8b7e9dba 2522vect_slp_analyze_operations (vec_info *vinfo)
a70d6342 2523{
a70d6342
IR
2524 slp_instance instance;
2525 int i;
2526
adac3a68 2527 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
a12e42fc 2528
68435eb2
RB
2529 scalar_stmts_to_slp_tree_map_t *visited
2530 = new scalar_stmts_to_slp_tree_map_t ();
8b7e9dba 2531 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
a70d6342 2532 {
68435eb2
RB
2533 scalar_stmts_to_slp_tree_map_t lvisited;
2534 stmt_vector_for_cost cost_vec;
2535 cost_vec.create (2);
8b7e9dba
RS
2536 if (!vect_slp_analyze_node_operations (vinfo,
2537 SLP_INSTANCE_TREE (instance),
68435eb2
RB
2538 instance, visited, &lvisited,
2539 &cost_vec))
a70d6342 2540 {
b9787581
RS
2541 slp_tree node = SLP_INSTANCE_TREE (instance);
2542 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a12e42fc 2543 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
2544 "removing SLP instance operations starting from: %G",
2545 stmt_info->stmt);
6e2dd807 2546 vect_free_slp_instance (instance, false);
8b7e9dba 2547 vinfo->slp_instances.ordered_remove (i);
68435eb2 2548 cost_vec.release ();
a70d6342
IR
2549 }
2550 else
68435eb2
RB
2551 {
2552 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2553 x != lvisited.end(); ++x)
2554 visited->put ((*x).first.copy (), (*x).second);
2555 i++;
78604de0 2556
68435eb2
RB
2557 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2558 cost_vec.release ();
2559 }
2560 }
78604de0
RB
2561 delete visited;
2562
8b7e9dba 2563 return !vinfo->slp_instances.is_empty ();
a70d6342
IR
2564}
2565
6eddf228
RB
2566
2567/* Compute the scalar cost of the SLP node NODE and its children
2568 and return it. Do not account defs that are marked in LIFE and
2569 update LIFE according to uses of NODE. */
2570
a296d6d3 2571static void
292cba13 2572vect_bb_slp_scalar_cost (basic_block bb,
a296d6d3
RB
2573 slp_tree node, vec<bool, va_heap> *life,
2574 stmt_vector_for_cost *cost_vec)
6eddf228 2575{
6eddf228 2576 unsigned i;
b9787581 2577 stmt_vec_info stmt_info;
6eddf228
RB
2578 slp_tree child;
2579
b9787581 2580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
6eddf228 2581 {
b9787581 2582 gimple *stmt = stmt_info->stmt;
91987857 2583 vec_info *vinfo = stmt_info->vinfo;
6eddf228
RB
2584 ssa_op_iter op_iter;
2585 def_operand_p def_p;
6eddf228 2586
ff4c81cc 2587 if ((*life)[i])
6eddf228
RB
2588 continue;
2589
2590 /* If there is a non-vectorized use of the defs then the scalar
2591 stmt is kept live in which case we do not account it or any
2592 required defs in the SLP children in the scalar cost. This
2593 way we make the vectorization more costly when compared to
2594 the scalar cost. */
2595 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2596 {
2597 imm_use_iterator use_iter;
355fe088 2598 gimple *use_stmt;
6eddf228 2599 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
91987857 2600 if (!is_gimple_debug (use_stmt))
6eddf228 2601 {
91987857
RS
2602 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2603 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2604 {
2605 (*life)[i] = true;
2606 BREAK_FROM_IMM_USE_STMT (use_iter);
2607 }
6eddf228
RB
2608 }
2609 }
ff4c81cc 2610 if ((*life)[i])
6eddf228
RB
2611 continue;
2612
b555a2e4
RB
2613 /* Count scalar stmts only once. */
2614 if (gimple_visited_p (stmt))
2615 continue;
2616 gimple_set_visited (stmt, true);
2617
a296d6d3 2618 vect_cost_for_stmt kind;
6eddf228
RB
2619 if (STMT_VINFO_DATA_REF (stmt_info))
2620 {
2621 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
a296d6d3 2622 kind = scalar_load;
6eddf228 2623 else
a296d6d3 2624 kind = scalar_store;
6eddf228
RB
2625 }
2626 else
a296d6d3
RB
2627 kind = scalar_stmt;
2628 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
6eddf228
RB
2629 }
2630
faa5399b 2631 auto_vec<bool, 20> subtree_life;
6eddf228 2632 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2633 {
2634 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2635 {
2636 /* Do not directly pass LIFE to the recursive call, copy it to
2637 confine changes in the callee to the current child/subtree. */
2638 subtree_life.safe_splice (*life);
a296d6d3 2639 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
faa5399b
RB
2640 subtree_life.truncate (0);
2641 }
2642 }
6eddf228
RB
2643}
2644
69f11a13
IR
2645/* Check if vectorization of the basic block is profitable. */
2646
2647static bool
2648vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2649{
9771b263 2650 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2651 slp_instance instance;
1a4b99c1 2652 int i;
c3e7ee41 2653 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2654 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2655
2656 /* Calculate scalar cost. */
a296d6d3
RB
2657 stmt_vector_for_cost scalar_costs;
2658 scalar_costs.create (0);
6eddf228 2659 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2660 {
00f96dc9 2661 auto_vec<bool, 20> life;
ff4c81cc 2662 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
a296d6d3
RB
2663 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2664 SLP_INSTANCE_TREE (instance),
2665 &life, &scalar_costs);
2666 }
2667 void *target_cost_data = init_cost (NULL);
68435eb2 2668 add_stmt_costs (target_cost_data, &scalar_costs);
a296d6d3
RB
2669 scalar_costs.release ();
2670 unsigned dummy;
2671 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2672 destroy_cost_data (target_cost_data);
69f11a13 2673
b555a2e4
RB
2674 /* Unset visited flag. */
2675 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2676 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2677 gimple_set_visited (gsi_stmt (gsi), false);
2678
c3e7ee41 2679 /* Complete the target-specific cost calculation. */
92345349
BS
2680 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2681 &vec_inside_cost, &vec_epilogue_cost);
2682
2683 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2684
73fbfcad 2685 if (dump_enabled_p ())
69f11a13 2686 {
78c60e3d
SS
2687 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2688 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2689 vec_inside_cost);
2690 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2691 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2692 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2693 }
2694
a6524bba
RB
2695 /* Vectorization is profitable if its cost is more than the cost of scalar
2696 version. Note that we err on the vector side for equal cost because
2697 the cost estimate is otherwise quite pessimistic (constant uses are
2698 free on the scalar side but cost a load on the vector side for
2699 example). */
2700 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2701 return false;
2702
2703 return true;
2704}
2705
a5b50aa1
RB
2706/* Check if the basic block can be vectorized. Returns a bb_vec_info
2707 if so and sets fatal to true if failure is independent of
2708 current_vector_size. */
a70d6342 2709
8e19f5a1 2710static bb_vec_info
61d371eb
RB
2711vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2712 gimple_stmt_iterator region_end,
a5b50aa1 2713 vec<data_reference_p> datarefs, int n_stmts,
ca823c85 2714 bool &fatal, vec_info_shared *shared)
a70d6342
IR
2715{
2716 bb_vec_info bb_vinfo;
a70d6342 2717 slp_instance instance;
8e19f5a1 2718 int i;
d9f21f6a 2719 poly_uint64 min_vf = 2;
e4a707c4 2720
a5b50aa1
RB
2721 /* The first group of checks is independent of the vector size. */
2722 fatal = true;
2723
61d371eb
RB
2724 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2725 {
2726 if (dump_enabled_p ())
2727 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2728 "not vectorized: too many instructions in "
2729 "basic block.\n");
2730 free_data_refs (datarefs);
2731 return NULL;
2732 }
2733
ca823c85 2734 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
a70d6342
IR
2735 if (!bb_vinfo)
2736 return NULL;
2737
61d371eb 2738 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
ca823c85 2739 bb_vinfo->shared->save_datarefs ();
428db0ba
RB
2740
2741 /* Analyze the data references. */
2742
2743 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2744 {
73fbfcad 2745 if (dump_enabled_p ())
78c60e3d
SS
2746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2747 "not vectorized: unhandled data-ref in basic "
2748 "block.\n");
b8698a0f 2749
2c515559 2750 delete bb_vinfo;
a70d6342
IR
2751 return NULL;
2752 }
2753
fcac74a1 2754 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2755 {
73fbfcad 2756 if (dump_enabled_p ())
78c60e3d
SS
2757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2758 "not vectorized: not enough data-refs in "
2759 "basic block.\n");
a70d6342 2760
2c515559 2761 delete bb_vinfo;
a70d6342
IR
2762 return NULL;
2763 }
2764
310213d4 2765 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2766 {
2767 if (dump_enabled_p ())
2768 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2769 "not vectorized: unhandled data access in "
2770 "basic block.\n");
2771
2c515559 2772 delete bb_vinfo;
5abe1e05
RB
2773 return NULL;
2774 }
2775
a5b50aa1
RB
2776 /* If there are no grouped stores in the region there is no need
2777 to continue with pattern recog as vect_analyze_slp will fail
2778 anyway. */
2779 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2780 {
73fbfcad 2781 if (dump_enabled_p ())
a5b50aa1
RB
2782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2783 "not vectorized: no grouped stores in "
2784 "basic block.\n");
b8698a0f 2785
2c515559 2786 delete bb_vinfo;
a70d6342
IR
2787 return NULL;
2788 }
b8698a0f 2789
a5b50aa1
RB
2790 /* While the rest of the analysis below depends on it in some way. */
2791 fatal = false;
2792
2793 vect_pattern_recog (bb_vinfo);
2794
a70d6342
IR
2795 /* Check the SLP opportunities in the basic block, analyze and build SLP
2796 trees. */
310213d4 2797 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2798 {
73fbfcad 2799 if (dump_enabled_p ())
effb52da
RB
2800 {
2801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2802 "Failed to SLP the basic block.\n");
2803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2804 "not vectorized: failed to find SLP opportunities "
2805 "in basic block.\n");
2806 }
a70d6342 2807
2c515559 2808 delete bb_vinfo;
a70d6342
IR
2809 return NULL;
2810 }
b8698a0f 2811
62c8a2cf
RS
2812 vect_record_base_alignments (bb_vinfo);
2813
c2a12ca0
RB
2814 /* Analyze and verify the alignment of data references and the
2815 dependence in the SLP instances. */
a5b50aa1
RB
2816 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2817 {
c2a12ca0
RB
2818 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2819 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1 2820 {
b9787581
RS
2821 slp_tree node = SLP_INSTANCE_TREE (instance);
2822 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
a5b50aa1 2823 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
2824 "removing SLP instance operations starting from: %G",
2825 stmt_info->stmt);
6e2dd807 2826 vect_free_slp_instance (instance, false);
a5b50aa1
RB
2827 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2828 continue;
2829 }
c2a12ca0
RB
2830
2831 /* Mark all the statements that we want to vectorize as pure SLP and
2832 relevant. */
2833 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2834 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2835
a5b50aa1
RB
2836 i++;
2837 }
a5b50aa1
RB
2838 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2839 {
2c515559 2840 delete bb_vinfo;
a5b50aa1
RB
2841 return NULL;
2842 }
2843
8b7e9dba 2844 if (!vect_slp_analyze_operations (bb_vinfo))
a70d6342 2845 {
73fbfcad 2846 if (dump_enabled_p ())
e645e942 2847 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2848 "not vectorized: bad operation in basic block.\n");
a70d6342 2849
2c515559 2850 delete bb_vinfo;
a70d6342
IR
2851 return NULL;
2852 }
2853
69f11a13 2854 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2855 if (!unlimited_cost_model (NULL)
69f11a13
IR
2856 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2857 {
73fbfcad 2858 if (dump_enabled_p ())
78c60e3d
SS
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "not vectorized: vectorization is not "
2861 "profitable.\n");
69f11a13 2862
2c515559 2863 delete bb_vinfo;
69f11a13
IR
2864 return NULL;
2865 }
2866
73fbfcad 2867 if (dump_enabled_p ())
78c60e3d
SS
2868 dump_printf_loc (MSG_NOTE, vect_location,
2869 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2870
2871 return bb_vinfo;
2872}
2873
2874
428db0ba
RB
2875/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2876 true if anything in the basic-block was vectorized. */
2877
2878bool
2879vect_slp_bb (basic_block bb)
8e19f5a1
IR
2880{
2881 bb_vec_info bb_vinfo;
8e19f5a1 2882 gimple_stmt_iterator gsi;
61d371eb 2883 bool any_vectorized = false;
86e36728 2884 auto_vector_sizes vector_sizes;
8e19f5a1 2885
adac3a68 2886 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
8e19f5a1 2887
8e19f5a1
IR
2888 /* Autodetect first vector size we try. */
2889 current_vector_size = 0;
86e36728
RS
2890 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2891 unsigned int next_size = 0;
8e19f5a1 2892
61d371eb
RB
2893 gsi = gsi_start_bb (bb);
2894
86e36728 2895 poly_uint64 autodetected_vector_size = 0;
8e19f5a1
IR
2896 while (1)
2897 {
61d371eb
RB
2898 if (gsi_end_p (gsi))
2899 break;
2900
2901 gimple_stmt_iterator region_begin = gsi;
2902 vec<data_reference_p> datarefs = vNULL;
2903 int insns = 0;
2904
2905 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2906 {
61d371eb
RB
2907 gimple *stmt = gsi_stmt (gsi);
2908 if (is_gimple_debug (stmt))
2909 continue;
2910 insns++;
2911
2912 if (gimple_location (stmt) != UNKNOWN_LOCATION)
4f5b9c80 2913 vect_location = stmt;
61d371eb 2914
8e846c66 2915 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
61d371eb
RB
2916 break;
2917 }
2918
2919 /* Skip leading unhandled stmts. */
2920 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2921 {
2922 gsi_next (&gsi);
2923 continue;
2924 }
428db0ba 2925
61d371eb
RB
2926 gimple_stmt_iterator region_end = gsi;
2927
2928 bool vectorized = false;
a5b50aa1 2929 bool fatal = false;
ca823c85 2930 vec_info_shared shared;
61d371eb 2931 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
ca823c85 2932 datarefs, insns, fatal, &shared);
61d371eb
RB
2933 if (bb_vinfo
2934 && dbg_cnt (vect_slp))
2935 {
428db0ba 2936 if (dump_enabled_p ())
61d371eb 2937 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba 2938
ca823c85 2939 bb_vinfo->shared->check_datarefs ();
428db0ba
RB
2940 vect_schedule_slp (bb_vinfo);
2941
d1ac60d5
RB
2942 unsigned HOST_WIDE_INT bytes;
2943 if (current_vector_size.is_constant (&bytes))
2944 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
6f795a92 2945 "basic block part vectorized using %wu byte "
d1ac60d5
RB
2946 "vectors\n", bytes);
2947 else
2948 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2949 "basic block part vectorized using variable "
2950 "length vectors\n");
428db0ba 2951
61d371eb 2952 vectorized = true;
428db0ba 2953 }
2c515559 2954 delete bb_vinfo;
8e19f5a1 2955
61d371eb 2956 any_vectorized |= vectorized;
8e19f5a1 2957
86e36728
RS
2958 if (next_size == 0)
2959 autodetected_vector_size = current_vector_size;
2960
2961 if (next_size < vector_sizes.length ()
2962 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2963 next_size += 1;
2964
61d371eb 2965 if (vectorized
86e36728
RS
2966 || next_size == vector_sizes.length ()
2967 || known_eq (current_vector_size, 0U)
a5b50aa1
RB
2968 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2969 vector sizes will fail do not bother iterating. */
2970 || fatal)
61d371eb
RB
2971 {
2972 if (gsi_end_p (region_end))
2973 break;
8e19f5a1 2974
61d371eb
RB
2975 /* Skip the unhandled stmt. */
2976 gsi_next (&gsi);
2977
2978 /* And reset vector sizes. */
2979 current_vector_size = 0;
86e36728 2980 next_size = 0;
61d371eb
RB
2981 }
2982 else
2983 {
2984 /* Try the next biggest vector size. */
86e36728 2985 current_vector_size = vector_sizes[next_size++];
61d371eb 2986 if (dump_enabled_p ())
86e36728
RS
2987 {
2988 dump_printf_loc (MSG_NOTE, vect_location,
2989 "***** Re-trying analysis with "
2990 "vector size ");
2991 dump_dec (MSG_NOTE, current_vector_size);
2992 dump_printf (MSG_NOTE, "\n");
2993 }
61d371eb
RB
2994
2995 /* Start over. */
2996 gsi = region_begin;
2997 }
8e19f5a1 2998 }
61d371eb
RB
2999
3000 return any_vectorized;
8e19f5a1
IR
3001}
3002
3003
e4af0bc4 3004/* Return 1 if vector type of boolean constant which is OPNUM
32e8e429 3005 operand in statement STMT_VINFO is a boolean vector. */
e4af0bc4
IE
3006
3007static bool
32e8e429 3008vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
e4af0bc4 3009{
32e8e429 3010 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
e4af0bc4 3011 tree op, vectype;
e4af0bc4
IE
3012 enum vect_def_type dt;
3013
3014 /* For comparison and COND_EXPR type is chosen depending
3015 on the other comparison operand. */
3016 if (TREE_CODE_CLASS (code) == tcc_comparison)
3017 {
32e8e429 3018 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3019 if (opnum)
3020 op = gimple_assign_rhs1 (stmt);
3021 else
3022 op = gimple_assign_rhs2 (stmt);
3023
894dd753 3024 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3025 gcc_unreachable ();
3026
3027 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3028 }
3029
3030 if (code == COND_EXPR)
3031 {
32e8e429 3032 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
e4af0bc4
IE
3033 tree cond = gimple_assign_rhs1 (stmt);
3034
3035 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3036 op = cond;
3037 else if (opnum)
e4af0bc4
IE
3038 op = TREE_OPERAND (cond, 1);
3039 else
3040 op = TREE_OPERAND (cond, 0);
3041
894dd753 3042 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
e4af0bc4
IE
3043 gcc_unreachable ();
3044
3045 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3046 }
3047
3048 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3049}
3050
018b2744
RS
3051/* Build a variable-length vector in which the elements in ELTS are repeated
3052 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3053 RESULTS and add any new instructions to SEQ.
3054
3055 The approach we use is:
3056
3057 (1) Find a vector mode VM with integer elements of mode IM.
3058
3059 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3060 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3061 from small vectors to IM.
3062
3063 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3064
3065 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3066 correct byte contents.
3067
3068 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3069
3070 We try to find the largest IM for which this sequence works, in order
3071 to cut down on the number of interleaves. */
3072
f1739b48 3073void
018b2744
RS
3074duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3075 unsigned int nresults, vec<tree> &results)
3076{
3077 unsigned int nelts = elts.length ();
3078 tree element_type = TREE_TYPE (vector_type);
3079
3080 /* (1) Find a vector mode VM with integer elements of mode IM. */
3081 unsigned int nvectors = 1;
3082 tree new_vector_type;
3083 tree permutes[2];
3084 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3085 &nvectors, &new_vector_type,
3086 permutes))
3087 gcc_unreachable ();
3088
3089 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3090 unsigned int partial_nelts = nelts / nvectors;
3091 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3092
3093 tree_vector_builder partial_elts;
3094 auto_vec<tree, 32> pieces (nvectors * 2);
3095 pieces.quick_grow (nvectors * 2);
3096 for (unsigned int i = 0; i < nvectors; ++i)
3097 {
3098 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3099 ELTS' has mode IM. */
3100 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3101 for (unsigned int j = 0; j < partial_nelts; ++j)
3102 partial_elts.quick_push (elts[i * partial_nelts + j]);
3103 tree t = gimple_build_vector (seq, &partial_elts);
3104 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3105 TREE_TYPE (new_vector_type), t);
3106
3107 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3108 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3109 }
3110
3111 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3112 correct byte contents.
3113
3114 We need to repeat the following operation log2(nvectors) times:
3115
3116 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3117 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3118
3119 However, if each input repeats every N elements and the VF is
3120 a multiple of N * 2, the HI result is the same as the LO. */
3121 unsigned int in_start = 0;
3122 unsigned int out_start = nvectors;
3123 unsigned int hi_start = nvectors / 2;
3124 /* A bound on the number of outputs needed to produce NRESULTS results
3125 in the final iteration. */
3126 unsigned int noutputs_bound = nvectors * nresults;
3127 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3128 {
3129 noutputs_bound /= 2;
3130 unsigned int limit = MIN (noutputs_bound, nvectors);
3131 for (unsigned int i = 0; i < limit; ++i)
3132 {
3133 if ((i & 1) != 0
3134 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3135 2 * in_repeat))
3136 {
3137 pieces[out_start + i] = pieces[out_start + i - 1];
3138 continue;
3139 }
3140
3141 tree output = make_ssa_name (new_vector_type);
3142 tree input1 = pieces[in_start + (i / 2)];
3143 tree input2 = pieces[in_start + (i / 2) + hi_start];
3144 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3145 input1, input2,
3146 permutes[i & 1]);
3147 gimple_seq_add_stmt (seq, stmt);
3148 pieces[out_start + i] = output;
3149 }
3150 std::swap (in_start, out_start);
3151 }
3152
3153 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3154 results.reserve (nresults);
3155 for (unsigned int i = 0; i < nresults; ++i)
3156 if (i < nvectors)
3157 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3158 pieces[in_start + i]));
3159 else
3160 results.quick_push (results[i - nvectors]);
3161}
3162
e4af0bc4 3163
b8698a0f
L
3164/* For constant and loop invariant defs of SLP_NODE this function returns
3165 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3166 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3167 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3168 REDUC_INDEX is the index of the reduction operand in the statements, unless
3169 it is -1. */
ebfd146a
IR
3170
3171static void
9dc3f7de 3172vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3173 vec<tree> *vec_oprnds,
306b0c92 3174 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3175{
b9787581
RS
3176 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3177 stmt_vec_info stmt_vinfo = stmts[0];
3178 gimple *stmt = stmt_vinfo->stmt;
018b2744 3179 unsigned HOST_WIDE_INT nunits;
ebfd146a 3180 tree vec_cst;
d2a12ae7 3181 unsigned j, number_of_places_left_in_vector;
ebfd146a 3182 tree vector_type;
9dc3f7de 3183 tree vop;
9771b263 3184 int group_size = stmts.length ();
ebfd146a 3185 unsigned int vec_num, i;
d2a12ae7 3186 unsigned number_of_copies = 1;
9771b263
DN
3187 vec<tree> voprnds;
3188 voprnds.create (number_of_vectors);
ebfd146a 3189 bool constant_p, is_store;
b5aeb3bb 3190 tree neutral_op = NULL;
bac430c9 3191 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3192 gimple_seq ctor_seq = NULL;
018b2744 3193 auto_vec<tree, 16> permute_results;
b5aeb3bb 3194
42fd8198 3195 /* Check if vector type is a boolean vector. */
2568d8a1 3196 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
b9787581 3197 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
42fd8198
IE
3198 vector_type
3199 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3200 else
3201 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325 3202
ebfd146a
IR
3203 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3204 {
3205 is_store = true;
3206 op = gimple_assign_rhs1 (stmt);
3207 }
3208 else
9dc3f7de
IR
3209 is_store = false;
3210
3211 gcc_assert (op);
ebfd146a 3212
ebfd146a 3213 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3214 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3215
3216 For example, we have two scalar operands, s1 and s2 (e.g., group of
3217 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3218 of this type can be packed in a vector). The output vector will contain
3219 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3220 will be 2).
3221
b8698a0f 3222 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3223 containing the operands.
3224
3225 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3226 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3227 {s5, s6, s7, s8}. */
b8698a0f 3228
018b2744
RS
3229 /* When using duplicate_and_interleave, we just need one element for
3230 each scalar statement. */
3231 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3232 nunits = group_size;
3233
14a61437 3234 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3235
3236 number_of_places_left_in_vector = nunits;
62cf7335 3237 constant_p = true;
5ebaa477 3238 tree_vector_builder elts (vector_type, nunits, 1);
794e3180 3239 elts.quick_grow (nunits);
90dd6e3d 3240 bool place_after_defs = false;
ebfd146a
IR
3241 for (j = 0; j < number_of_copies; j++)
3242 {
b9787581 3243 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
ebfd146a 3244 {
b9787581 3245 stmt = stmt_vinfo->stmt;
ebfd146a
IR
3246 if (is_store)
3247 op = gimple_assign_rhs1 (stmt);
bac430c9 3248 else
f7e531cf 3249 {
bac430c9 3250 switch (code)
f7e531cf 3251 {
bac430c9 3252 case COND_EXPR:
a989bcc3
IE
3253 {
3254 tree cond = gimple_assign_rhs1 (stmt);
3255 if (TREE_CODE (cond) == SSA_NAME)
3256 op = gimple_op (stmt, op_num + 1);
3257 else if (op_num == 0 || op_num == 1)
bac430c9 3258 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3259 else
3260 {
3261 if (op_num == 2)
3262 op = gimple_assign_rhs2 (stmt);
3263 else
3264 op = gimple_assign_rhs3 (stmt);
3265 }
3266 }
bac430c9
IR
3267 break;
3268
3269 case CALL_EXPR:
3270 op = gimple_call_arg (stmt, op_num);
3271 break;
3272
b84b294a
JJ
3273 case LSHIFT_EXPR:
3274 case RSHIFT_EXPR:
3275 case LROTATE_EXPR:
3276 case RROTATE_EXPR:
3277 op = gimple_op (stmt, op_num + 1);
3278 /* Unlike the other binary operators, shifts/rotates have
3279 the shift count being int, instead of the same type as
3280 the lhs, so make sure the scalar is the right type if
3281 we are dealing with vectors of
3282 long long/long/short/char. */
793d9a16 3283 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3284 op = fold_convert (TREE_TYPE (vector_type), op);
3285 break;
3286
bac430c9
IR
3287 default:
3288 op = gimple_op (stmt, op_num + 1);
b84b294a 3289 break;
f7e531cf
IR
3290 }
3291 }
b8698a0f 3292
ebfd146a 3293 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3294 number_of_places_left_in_vector--;
90dd6e3d 3295 tree orig_op = op;
13396b6e 3296 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3297 {
793d9a16 3298 if (CONSTANT_CLASS_P (op))
13396b6e 3299 {
42fd8198
IE
3300 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3301 {
3302 /* Can't use VIEW_CONVERT_EXPR for booleans because
3303 of possibly different sizes of scalar value and
3304 vector element. */
3305 if (integer_zerop (op))
3306 op = build_int_cst (TREE_TYPE (vector_type), 0);
3307 else if (integer_onep (op))
158beb4a 3308 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3309 else
3310 gcc_unreachable ();
3311 }
3312 else
3313 op = fold_unary (VIEW_CONVERT_EXPR,
3314 TREE_TYPE (vector_type), op);
13396b6e
JJ
3315 gcc_assert (op && CONSTANT_CLASS_P (op));
3316 }
3317 else
3318 {
b731b390 3319 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3320 gimple *init_stmt;
262a363f
JJ
3321 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3322 {
158beb4a
JJ
3323 tree true_val
3324 = build_all_ones_cst (TREE_TYPE (vector_type));
3325 tree false_val
3326 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3327 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3328 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3329 op, true_val,
3330 false_val);
262a363f 3331 }
262a363f
JJ
3332 else
3333 {
3334 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3335 op);
3336 init_stmt
3337 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3338 op);
3339 }
13396b6e
JJ
3340 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3341 op = new_temp;
3342 }
50eeef09 3343 }
d2a12ae7 3344 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3345 if (!CONSTANT_CLASS_P (op))
3346 constant_p = false;
90dd6e3d
RB
3347 if (TREE_CODE (orig_op) == SSA_NAME
3348 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3349 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3350 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3351 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3352 place_after_defs = true;
ebfd146a
IR
3353
3354 if (number_of_places_left_in_vector == 0)
3355 {
018b2744
RS
3356 if (constant_p
3357 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3358 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3359 vec_cst = gimple_build_vector (&ctor_seq, &elts);
ebfd146a 3360 else
d2a12ae7 3361 {
018b2744
RS
3362 if (vec_oprnds->is_empty ())
3363 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3364 number_of_vectors,
3365 permute_results);
3366 vec_cst = permute_results[number_of_vectors - j - 1];
d2a12ae7 3367 }
90dd6e3d
RB
3368 tree init;
3369 gimple_stmt_iterator gsi;
3370 if (place_after_defs)
3371 {
95c68311
RS
3372 stmt_vec_info last_stmt_info
3373 = vect_find_last_scalar_stmt_in_slp (slp_node);
3374 gsi = gsi_for_stmt (last_stmt_info->stmt);
b9787581
RS
3375 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3376 &gsi);
90dd6e3d
RB
3377 }
3378 else
b9787581
RS
3379 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3380 NULL);
13396b6e
JJ
3381 if (ctor_seq != NULL)
3382 {
90dd6e3d 3383 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
018b2744 3384 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
13396b6e
JJ
3385 ctor_seq = NULL;
3386 }
90dd6e3d
RB
3387 voprnds.quick_push (init);
3388 place_after_defs = false;
62cf7335
RB
3389 number_of_places_left_in_vector = nunits;
3390 constant_p = true;
5ebaa477
RS
3391 elts.new_vector (vector_type, nunits, 1);
3392 elts.quick_grow (nunits);
ebfd146a
IR
3393 }
3394 }
3395 }
3396
b8698a0f 3397 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3398 them. */
9771b263 3399 vec_num = voprnds.length ();
d2a12ae7 3400 for (j = vec_num; j != 0; j--)
ebfd146a 3401 {
9771b263
DN
3402 vop = voprnds[j - 1];
3403 vec_oprnds->quick_push (vop);
ebfd146a
IR
3404 }
3405
9771b263 3406 voprnds.release ();
ebfd146a
IR
3407
3408 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3409 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3410 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3411 to replicate the vectors. */
9771b263 3412 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3413 {
b5aeb3bb
IR
3414 tree neutral_vec = NULL;
3415
3416 if (neutral_op)
3417 {
3418 if (!neutral_vec)
b9acc9f1 3419 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3420
9771b263 3421 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3422 }
3423 else
3424 {
9771b263
DN
3425 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3426 vec_oprnds->quick_push (vop);
b5aeb3bb 3427 }
ebfd146a
IR
3428 }
3429}
3430
3431
3432/* Get vectorized definitions from SLP_NODE that contains corresponding
3433 vectorized def-stmts. */
3434
3435static void
9771b263 3436vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3437{
3438 tree vec_oprnd;
16edaeb8 3439 stmt_vec_info vec_def_stmt_info;
ebfd146a
IR
3440 unsigned int i;
3441
9771b263 3442 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3443
16edaeb8 3444 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
ebfd146a 3445 {
16edaeb8
RS
3446 gcc_assert (vec_def_stmt_info);
3447 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3448 vec_oprnd = gimple_phi_result (vec_def_phi);
e7baeb39 3449 else
16edaeb8 3450 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
9771b263 3451 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3452 }
3453}
3454
3455
b8698a0f
L
3456/* Get vectorized definitions for SLP_NODE.
3457 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3458 call vect_get_constant_vectors() to create vector stmts.
3459 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3460 must be stored in the corresponding child of SLP_NODE, and we call
3461 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3462
ebfd146a 3463void
9771b263 3464vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3465 vec<vec<tree> > *vec_oprnds)
ebfd146a 3466{
d092494c 3467 int number_of_vects = 0, i;
77eefb71 3468 unsigned int child_index = 0;
b8698a0f 3469 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3470 slp_tree child = NULL;
37b5ec8f 3471 vec<tree> vec_defs;
e44978dc 3472 tree oprnd;
77eefb71 3473 bool vectorized_defs;
ebfd146a 3474
a1824cfd 3475 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
9771b263 3476 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3477 {
d092494c
IR
3478 /* For each operand we check if it has vectorized definitions in a child
3479 node or we need to create them (for invariants and constants). We
3480 check if the LHS of the first stmt of the next child matches OPRND.
3481 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3482 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3483 to check this child node for the next operand. */
3484 vectorized_defs = false;
3485 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3486 {
01d8bf07 3487 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3488
e44978dc 3489 /* We have to check both pattern and original def, if available. */
603cca93 3490 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3491 {
b9787581
RS
3492 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3493 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
e7baeb39 3494 tree first_def_op;
90dd6e3d 3495
b9787581 3496 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
e7baeb39
RB
3497 first_def_op = gimple_phi_result (first_def);
3498 else
b9787581 3499 first_def_op = gimple_get_lhs (first_def_info->stmt);
e7baeb39 3500 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d 3501 || (related
10681ce8
RS
3502 && operand_equal_p (oprnd,
3503 gimple_get_lhs (related->stmt), 0)))
90dd6e3d
RB
3504 {
3505 /* The number of vector defs is determined by the number of
3506 vector statements in the node from which we get those
3507 statements. */
3508 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3509 vectorized_defs = true;
77eefb71 3510 child_index++;
90dd6e3d 3511 }
e44978dc 3512 }
77eefb71
RB
3513 else
3514 child_index++;
d092494c 3515 }
ebfd146a 3516
77eefb71
RB
3517 if (!vectorized_defs)
3518 {
3519 if (i == 0)
3520 {
3521 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3522 /* Number of vector stmts was calculated according to LHS in
3523 vect_schedule_slp_instance (), fix it by replacing LHS with
3524 RHS, if necessary. See vect_get_smallest_scalar_type () for
3525 details. */
a1824cfd
RS
3526 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3527 &rhs_size_unit);
77eefb71
RB
3528 if (rhs_size_unit != lhs_size_unit)
3529 {
3530 number_of_vects *= rhs_size_unit;
3531 number_of_vects /= lhs_size_unit;
3532 }
3533 }
d092494c 3534 }
b5aeb3bb 3535
d092494c 3536 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3537 vec_defs = vNULL;
3538 vec_defs.create (number_of_vects);
ebfd146a 3539
d092494c
IR
3540 /* For reduction defs we call vect_get_constant_vectors (), since we are
3541 looking for initial loop invariant values. */
306b0c92 3542 if (vectorized_defs)
d092494c 3543 /* The defs are already vectorized. */
37b5ec8f 3544 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3545 else
e7baeb39 3546 /* Build vectors from scalar defs. */
37b5ec8f 3547 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3548 number_of_vects);
ebfd146a 3549
37b5ec8f 3550 vec_oprnds->quick_push (vec_defs);
d092494c 3551 }
ebfd146a
IR
3552}
3553
ebfd146a
IR
3554/* Generate vector permute statements from a list of loads in DR_CHAIN.
3555 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3556 permute statements for the SLP node NODE of the SLP instance
3557 SLP_NODE_INSTANCE. */
3558
ebfd146a 3559bool
01d8bf07 3560vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
d9f21f6a
RS
3561 gimple_stmt_iterator *gsi, poly_uint64 vf,
3562 slp_instance slp_node_instance, bool analyze_only,
29afecdf 3563 unsigned *n_perms)
ebfd146a 3564{
b9787581 3565 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
16edaeb8 3566 vec_info *vinfo = stmt_info->vinfo;
928686b1 3567 int vec_index = 0;
2635892a 3568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ab7e60ce 3569 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
928686b1 3570 unsigned int mask_element;
ef4bddc2 3571 machine_mode mode;
ebfd146a 3572
91ff1504
RB
3573 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3574 return false;
3575
bffb8014 3576 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
91ff1504 3577
22e4dee7 3578 mode = TYPE_MODE (vectype);
ab7e60ce 3579 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a 3580
61fdfd8c
RB
3581 /* Initialize the vect stmts of NODE to properly insert the generated
3582 stmts later. */
3583 if (! analyze_only)
3584 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3585 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3586 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3587
b8698a0f
L
3588 /* Generate permutation masks for every NODE. Number of masks for each NODE
3589 is equal to GROUP_SIZE.
3590 E.g., we have a group of three nodes with three loads from the same
3591 location in each node, and the vector size is 4. I.e., we have a
3592 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3593 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3594 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3595 ...
3596
2635892a 3597 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3598 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3599 operation, and the mask element values can't be outside that range.
3600 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3601 For the first two permutations we need the first and the second input
ebfd146a 3602 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3603 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3604 {c2,a3,b3,c3}. */
3605
2ce27200 3606 int vect_stmts_counter = 0;
928686b1 3607 unsigned int index = 0;
2ce27200
RB
3608 int first_vec_index = -1;
3609 int second_vec_index = -1;
be377c80 3610 bool noop_p = true;
29afecdf 3611 *n_perms = 0;
ebfd146a 3612
ab7e60ce
RS
3613 vec_perm_builder mask;
3614 unsigned int nelts_to_build;
3615 unsigned int nvectors_per_build;
3616 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3617 && multiple_p (nunits, group_size));
3618 if (repeating_p)
2ce27200 3619 {
ab7e60ce
RS
3620 /* A single vector contains a whole number of copies of the node, so:
3621 (a) all permutes can use the same mask; and
3622 (b) the permutes only need a single vector input. */
3623 mask.new_vector (nunits, group_size, 3);
3624 nelts_to_build = mask.encoded_nelts ();
3625 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3626 }
3627 else
3628 {
3629 /* We need to construct a separate mask for each vector statement. */
3630 unsigned HOST_WIDE_INT const_nunits, const_vf;
3631 if (!nunits.is_constant (&const_nunits)
3632 || !vf.is_constant (&const_vf))
3633 return false;
3634 mask.new_vector (const_nunits, const_nunits, 1);
3635 nelts_to_build = const_vf * group_size;
3636 nvectors_per_build = 1;
3637 }
3638
3639 unsigned int count = mask.encoded_nelts ();
3640 mask.quick_grow (count);
3641 vec_perm_indices indices;
3642
3643 for (unsigned int j = 0; j < nelts_to_build; j++)
3644 {
3645 unsigned int iter_num = j / group_size;
3646 unsigned int stmt_num = j % group_size;
3647 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3648 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3649 if (repeating_p)
2ce27200 3650 {
ab7e60ce
RS
3651 first_vec_index = 0;
3652 mask_element = i;
3653 }
3654 else
3655 {
3656 /* Enforced before the loop when !repeating_p. */
3657 unsigned int const_nunits = nunits.to_constant ();
3658 vec_index = i / const_nunits;
3659 mask_element = i % const_nunits;
2ce27200
RB
3660 if (vec_index == first_vec_index
3661 || first_vec_index == -1)
3662 {
3663 first_vec_index = vec_index;
3664 }
3665 else if (vec_index == second_vec_index
3666 || second_vec_index == -1)
3667 {
3668 second_vec_index = vec_index;
ab7e60ce 3669 mask_element += const_nunits;
2ce27200
RB
3670 }
3671 else
3672 {
3673 if (dump_enabled_p ())
3c2a8ed0
DM
3674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3675 "permutation requires at "
3676 "least three vectors %G",
3677 stmt_info->stmt);
31bee964 3678 gcc_assert (analyze_only);
2ce27200
RB
3679 return false;
3680 }
ebfd146a 3681
ab7e60ce
RS
3682 gcc_assert (mask_element < 2 * const_nunits);
3683 }
3684
3685 if (mask_element != index)
3686 noop_p = false;
3687 mask[index++] = mask_element;
2ce27200 3688
ab7e60ce
RS
3689 if (index == count && !noop_p)
3690 {
3691 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3692 if (!can_vec_perm_const_p (mode, indices))
2ce27200 3693 {
ab7e60ce 3694 if (dump_enabled_p ())
2ce27200 3695 {
ab7e60ce
RS
3696 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3697 vect_location,
3698 "unsupported vect permute { ");
3699 for (i = 0; i < count; ++i)
22e4dee7 3700 {
ab7e60ce
RS
3701 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3702 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
22e4dee7 3703 }
ab7e60ce 3704 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
2ce27200 3705 }
ab7e60ce
RS
3706 gcc_assert (analyze_only);
3707 return false;
e3342de4 3708 }
29afecdf 3709
ab7e60ce
RS
3710 ++*n_perms;
3711 }
3712
3713 if (index == count)
3714 {
3715 if (!analyze_only)
e3342de4 3716 {
ab7e60ce 3717 tree mask_vec = NULL_TREE;
be377c80 3718
ab7e60ce
RS
3719 if (! noop_p)
3720 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
2ce27200 3721
ab7e60ce
RS
3722 if (second_vec_index == -1)
3723 second_vec_index = first_vec_index;
61fdfd8c 3724
ab7e60ce
RS
3725 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3726 {
61fdfd8c 3727 /* Generate the permute statement if necessary. */
ab7e60ce
RS
3728 tree first_vec = dr_chain[first_vec_index + ri];
3729 tree second_vec = dr_chain[second_vec_index + ri];
16edaeb8 3730 stmt_vec_info perm_stmt_info;
61fdfd8c
RB
3731 if (! noop_p)
3732 {
b9787581 3733 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
61fdfd8c
RB
3734 tree perm_dest
3735 = vect_create_destination_var (gimple_assign_lhs (stmt),
3736 vectype);
3737 perm_dest = make_ssa_name (perm_dest);
16edaeb8
RS
3738 gassign *perm_stmt
3739 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3740 first_vec, second_vec,
3741 mask_vec);
3742 perm_stmt_info
b9787581
RS
3743 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3744 gsi);
61fdfd8c
RB
3745 }
3746 else
3747 /* If mask was NULL_TREE generate the requested
3748 identity transform. */
16edaeb8 3749 perm_stmt_info = vinfo->lookup_def (first_vec);
61fdfd8c
RB
3750
3751 /* Store the vector statement in NODE. */
16edaeb8
RS
3752 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3753 = perm_stmt_info;
2ce27200 3754 }
2ce27200 3755 }
ab7e60ce
RS
3756
3757 index = 0;
3758 first_vec_index = -1;
3759 second_vec_index = -1;
3760 noop_p = true;
2ce27200 3761 }
b8698a0f 3762 }
ebfd146a 3763
ebfd146a
IR
3764 return true;
3765}
3766
ebfd146a
IR
3767/* Vectorize SLP instance tree in postorder. */
3768
8fe1bd30 3769static void
f7300fff
RB
3770vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3771 scalar_stmts_to_slp_tree_map_t *bst_map)
ebfd146a 3772{
ebfd146a
IR
3773 gimple_stmt_iterator si;
3774 stmt_vec_info stmt_info;
8b7e9dba 3775 unsigned int group_size;
ebfd146a 3776 tree vectype;
603cca93 3777 int i, j;
d755c7ef 3778 slp_tree child;
ebfd146a 3779
603cca93 3780 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
8fe1bd30 3781 return;
ebfd146a 3782
a1f072e2
RB
3783 /* See if we have already vectorized the node in the graph of the
3784 SLP instance. */
3785 if (SLP_TREE_VEC_STMTS (node).exists ())
3786 return;
3787
f7300fff 3788 /* See if we have already vectorized the same set of stmts and reuse their
a1f072e2 3789 vectorized stmts across instances. */
dd172744 3790 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
f7300fff 3791 {
dd172744 3792 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
8fe1bd30 3793 return;
f7300fff
RB
3794 }
3795
dd172744 3796 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
9771b263 3797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
f7300fff 3798 vect_schedule_slp_instance (child, instance, bst_map);
b8698a0f 3799
603cca93
RB
3800 /* Push SLP node def-type to stmts. */
3801 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3802 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3803 {
3804 stmt_vec_info child_stmt_info;
3805 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3806 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3807 }
603cca93 3808
b9787581 3809 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3810
3811 /* VECTYPE is the type of the destination. */
b690cc0f 3812 vectype = STMT_VINFO_VECTYPE (stmt_info);
dad55d70 3813 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
3814 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3815
68435eb2 3816 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
a1f072e2 3817 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
ebfd146a 3818
73fbfcad 3819 if (dump_enabled_p ())
3c2a8ed0
DM
3820 dump_printf_loc (MSG_NOTE, vect_location,
3821 "------>vectorizing SLP node starting from: %G",
3822 stmt_info->stmt);
ebfd146a 3823
2e8ab70c
RB
3824 /* Vectorized stmts go before the last scalar stmt which is where
3825 all uses are ready. */
95c68311
RS
3826 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3827 si = gsi_for_stmt (last_stmt_info->stmt);
e4a707c4 3828
b010117a
IR
3829 /* Mark the first element of the reduction chain as reduction to properly
3830 transform the node. In the analysis phase only the last element of the
3831 chain is marked as reduction. */
2c53b149
RB
3832 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3833 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
b9787581 3834 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
b010117a
IR
3835 {
3836 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3837 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3838 }
3839
6876e5bc
RB
3840 /* Handle two-operation SLP nodes by vectorizing the group with
3841 both operations and then performing a merge. */
3842 if (SLP_TREE_TWO_OPERATORS (node))
3843 {
b9787581 3844 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
6876e5bc 3845 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3846 enum tree_code ocode = ERROR_MARK;
b9787581 3847 stmt_vec_info ostmt_info;
e3342de4 3848 vec_perm_builder mask (group_size, group_size, 1);
b9787581
RS
3849 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3850 {
3851 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3852 if (gimple_assign_rhs_code (ostmt) != code0)
3853 {
3854 mask.quick_push (1);
3855 ocode = gimple_assign_rhs_code (ostmt);
3856 }
3857 else
3858 mask.quick_push (0);
3859 }
567a3691 3860 if (ocode != ERROR_MARK)
6876e5bc 3861 {
16edaeb8
RS
3862 vec<stmt_vec_info> v0;
3863 vec<stmt_vec_info> v1;
6876e5bc
RB
3864 unsigned j;
3865 tree tmask = NULL_TREE;
b0b45e58 3866 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
3867 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3868 SLP_TREE_VEC_STMTS (node).truncate (0);
3869 gimple_assign_set_rhs_code (stmt, ocode);
b0b45e58 3870 vect_transform_stmt (stmt_info, &si, node, instance);
6876e5bc
RB
3871 gimple_assign_set_rhs_code (stmt, code0);
3872 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3873 SLP_TREE_VEC_STMTS (node).truncate (0);
3874 tree meltype = build_nonstandard_integer_type
b397965c 3875 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3876 tree mvectype = get_same_sized_vectype (meltype, vectype);
3877 unsigned k = 0, l;
3878 for (j = 0; j < v0.length (); ++j)
3879 {
dad55d70
RS
3880 /* Enforced by vect_build_slp_tree, which rejects variable-length
3881 vectors for SLP_TREE_TWO_OPERATORS. */
3882 unsigned int const_nunits = nunits.to_constant ();
3883 tree_vector_builder melts (mvectype, const_nunits, 1);
3884 for (l = 0; l < const_nunits; ++l)
6876e5bc 3885 {
1ece8d4c 3886 if (k >= group_size)
6876e5bc 3887 k = 0;
dad55d70
RS
3888 tree t = build_int_cst (meltype,
3889 mask[k++] * const_nunits + l);
794e3180 3890 melts.quick_push (t);
6876e5bc 3891 }
5ebaa477 3892 tmask = melts.build ();
6876e5bc
RB
3893
3894 /* ??? Not all targets support a VEC_PERM_EXPR with a
3895 constant mask that would translate to a vec_merge RTX
3896 (with their vec_perm_const_ok). We can either not
3897 vectorize in that case or let veclower do its job.
3898 Unfortunately that isn't too great and at least for
3899 plus/minus we'd eventually like to match targets
3900 vector addsub instructions. */
355fe088 3901 gimple *vstmt;
6876e5bc
RB
3902 vstmt = gimple_build_assign (make_ssa_name (vectype),
3903 VEC_PERM_EXPR,
16edaeb8
RS
3904 gimple_assign_lhs (v0[j]->stmt),
3905 gimple_assign_lhs (v1[j]->stmt),
3906 tmask);
3907 SLP_TREE_VEC_STMTS (node).quick_push
b9787581 3908 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
6876e5bc
RB
3909 }
3910 v0.release ();
3911 v1.release ();
8fe1bd30 3912 return;
6876e5bc
RB
3913 }
3914 }
b0b45e58 3915 vect_transform_stmt (stmt_info, &si, node, instance);
603cca93
RB
3916
3917 /* Restore stmt def-types. */
3918 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3919 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
b9787581
RS
3920 {
3921 stmt_vec_info child_stmt_info;
3922 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3923 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3924 }
ebfd146a
IR
3925}
3926
dd34c087
JJ
3927/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3928 For loop vectorization this is done in vectorizable_call, but for SLP
3929 it needs to be deferred until end of vect_schedule_slp, because multiple
3930 SLP instances may refer to the same scalar stmt. */
3931
3932static void
3933vect_remove_slp_scalar_calls (slp_tree node)
3934{
b9787581 3935 gimple *new_stmt;
dd34c087
JJ
3936 gimple_stmt_iterator gsi;
3937 int i;
d755c7ef 3938 slp_tree child;
dd34c087
JJ
3939 tree lhs;
3940 stmt_vec_info stmt_info;
3941
603cca93 3942 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3943 return;
3944
9771b263 3945 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3946 vect_remove_slp_scalar_calls (child);
dd34c087 3947
b9787581 3948 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
dd34c087 3949 {
b9787581
RS
3950 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3951 if (!stmt || gimple_bb (stmt) == NULL)
dd34c087 3952 continue;
b9787581 3953 if (is_pattern_stmt_p (stmt_info)
dd34c087
JJ
3954 || !PURE_SLP_STMT (stmt_info))
3955 continue;
3956 lhs = gimple_call_lhs (stmt);
3957 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
dd34c087 3958 gsi = gsi_for_stmt (stmt);
9d97912b 3959 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
dd34c087
JJ
3960 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3961 }
3962}
ebfd146a 3963
ff802fa1
IR
3964/* Generate vector code for all SLP instances in the loop/basic block. */
3965
8fe1bd30 3966void
310213d4 3967vect_schedule_slp (vec_info *vinfo)
ebfd146a 3968{
9771b263 3969 vec<slp_instance> slp_instances;
ebfd146a 3970 slp_instance instance;
8b7e9dba 3971 unsigned int i;
78604de0
RB
3972
3973 scalar_stmts_to_slp_tree_map_t *bst_map
3974 = new scalar_stmts_to_slp_tree_map_t ();
310213d4 3975 slp_instances = vinfo->slp_instances;
9771b263 3976 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3977 {
3978 /* Schedule the tree of INSTANCE. */
8fe1bd30
RS
3979 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3980 instance, bst_map);
73fbfcad 3981 if (dump_enabled_p ())
78c60e3d 3982 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3983 "vectorizing stmts using SLP.\n");
ebfd146a 3984 }
78604de0 3985 delete bst_map;
ebfd146a 3986
9771b263 3987 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3988 {
3989 slp_tree root = SLP_INSTANCE_TREE (instance);
b9787581 3990 stmt_vec_info store_info;
b5aeb3bb 3991 unsigned int j;
b5aeb3bb 3992
c40eced0
RB
3993 /* Remove scalar call stmts. Do not do this for basic-block
3994 vectorization as not all uses may be vectorized.
3995 ??? Why should this be necessary? DCE should be able to
3996 remove the stmts itself.
3997 ??? For BB vectorization we can as well remove scalar
3998 stmts starting from the SLP tree root if they have no
3999 uses. */
310213d4 4000 if (is_a <loop_vec_info> (vinfo))
c40eced0 4001 vect_remove_slp_scalar_calls (root);
dd34c087 4002
b9787581 4003 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
b5aeb3bb
IR
4004 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4005 {
b9787581
RS
4006 if (!STMT_VINFO_DATA_REF (store_info))
4007 break;
4008
211cd1e2 4009 store_info = vect_orig_stmt (store_info);
b9787581 4010 /* Free the attached stmt_vec_info and remove the stmt. */
b5b56c2a 4011 vinfo->remove_stmt (store_info);
b5aeb3bb
IR
4012 }
4013 }
ebfd146a 4014}