]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
add auto_sbitmap class
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
818ab71a 2 Copyright (C) 2007-2016 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
a70d6342
IR
44
45
ebfd146a
IR
46/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48static void
49vect_free_slp_tree (slp_tree node)
50{
d092494c 51 int i;
d755c7ef 52 slp_tree child;
d092494c 53
9771b263 54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 55 vect_free_slp_tree (child);
b8698a0f 56
78810bd3
RB
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
61 {
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64 }
65
9771b263
DN
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 69 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
70
71 free (node);
72}
73
74
75/* Free the memory allocated for the SLP instance. */
76
77void
78vect_free_slp_instance (slp_instance instance)
79{
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 81 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 82 free (instance);
ebfd146a
IR
83}
84
85
d092494c
IR
86/* Create an SLP node for SCALAR_STMTS. */
87
88static slp_tree
355fe088 89vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 90{
d3cfd39e 91 slp_tree node;
355fe088 92 gimple *stmt = scalar_stmts[0];
d092494c
IR
93 unsigned int nops;
94
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
f7e531cf
IR
98 {
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
102 }
d092494c
IR
103 else
104 return NULL;
105
d3cfd39e 106 node = XNEW (struct _slp_tree);
d092494c 107 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
108 SLP_TREE_VEC_STMTS (node).create (0);
109 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 110 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 111 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 112 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 113
78810bd3
RB
114 unsigned i;
115 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
116 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
117
d092494c
IR
118 return node;
119}
120
121
ddf56386
RB
122/* This structure is used in creation of an SLP tree. Each instance
123 corresponds to the same operand in a group of scalar stmts in an SLP
124 node. */
125typedef struct _slp_oprnd_info
126{
127 /* Def-stmts for the operands. */
128 vec<gimple *> def_stmts;
129 /* Information about the first statement, its vector def-type, type, the
130 operand itself in case it's constant, and an indication if it's a pattern
131 stmt. */
132 enum vect_def_type first_dt;
133 tree first_op_type;
134 bool first_pattern;
135 bool second_pattern;
136} *slp_oprnd_info;
137
138
d092494c
IR
139/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
140 operand. */
9771b263 141static vec<slp_oprnd_info>
d092494c
IR
142vect_create_oprnd_info (int nops, int group_size)
143{
144 int i;
145 slp_oprnd_info oprnd_info;
9771b263 146 vec<slp_oprnd_info> oprnds_info;
d092494c 147
9771b263 148 oprnds_info.create (nops);
d092494c
IR
149 for (i = 0; i < nops; i++)
150 {
151 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 152 oprnd_info->def_stmts.create (group_size);
d092494c 153 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 154 oprnd_info->first_op_type = NULL_TREE;
d092494c 155 oprnd_info->first_pattern = false;
effb52da 156 oprnd_info->second_pattern = false;
9771b263 157 oprnds_info.quick_push (oprnd_info);
d092494c
IR
158 }
159
160 return oprnds_info;
161}
162
163
d3cfd39e
JJ
164/* Free operands info. */
165
d092494c 166static void
9771b263 167vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
168{
169 int i;
170 slp_oprnd_info oprnd_info;
171
9771b263 172 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 173 {
9771b263 174 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
175 XDELETE (oprnd_info);
176 }
d092494c 177
9771b263 178 oprnds_info.release ();
d092494c
IR
179}
180
181
d755c7ef
RB
182/* Find the place of the data-ref in STMT in the interleaving chain that starts
183 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
184
185static int
355fe088 186vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 187{
355fe088 188 gimple *next_stmt = first_stmt;
d755c7ef
RB
189 int result = 0;
190
191 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
192 return -1;
193
194 do
195 {
196 if (next_stmt == stmt)
197 return result;
d755c7ef 198 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
199 if (next_stmt)
200 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
201 }
202 while (next_stmt);
203
204 return -1;
205}
206
207
d092494c
IR
208/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
209 they are of a valid type and that they match the defs of the first stmt of
b0b4483e
RB
210 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
211 return -1, if the error could be corrected by swapping operands of the
212 operation return 1, if everything is ok return 0. */
ebfd146a 213
b0b4483e 214static int
310213d4 215vect_get_and_check_slp_defs (vec_info *vinfo,
355fe088 216 gimple *stmt, unsigned stmt_num,
23847df4 217 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
218{
219 tree oprnd;
220 unsigned int i, number_of_oprnds;
355fe088 221 gimple *def_stmt;
d092494c 222 enum vect_def_type dt = vect_uninitialized_def;
d092494c 223 bool pattern = false;
abf9bfbc 224 slp_oprnd_info oprnd_info;
b0b4483e
RB
225 int first_op_idx = 1;
226 bool commutative = false;
227 bool first_op_cond = false;
effb52da
RB
228 bool first = stmt_num == 0;
229 bool second = stmt_num == 1;
b8698a0f 230
d092494c 231 if (is_gimple_call (stmt))
190c2236
JJ
232 {
233 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 234 first_op_idx = 3;
190c2236 235 }
f7e531cf
IR
236 else if (is_gimple_assign (stmt))
237 {
b0b4483e 238 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 239 number_of_oprnds = gimple_num_ops (stmt) - 1;
a414c77f
IE
240 if (gimple_assign_rhs_code (stmt) == COND_EXPR
241 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
242 {
243 first_op_cond = true;
244 commutative = true;
245 number_of_oprnds++;
246 }
247 else
248 commutative = commutative_tree_code (code);
f7e531cf 249 }
d092494c 250 else
b0b4483e 251 return -1;
ebfd146a 252
b0b4483e 253 bool swapped = false;
ebfd146a
IR
254 for (i = 0; i < number_of_oprnds; i++)
255 {
b0b4483e
RB
256again:
257 if (first_op_cond)
f7e531cf 258 {
b0b4483e
RB
259 if (i == 0 || i == 1)
260 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
261 swapped ? !i : i);
262 else
263 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f7e531cf
IR
264 }
265 else
b0b4483e 266 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 267
9771b263 268 oprnd_info = (*oprnds_info)[i];
ebfd146a 269
81c40241 270 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 271 {
73fbfcad 272 if (dump_enabled_p ())
ebfd146a 273 {
78c60e3d 274 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 275 "Build SLP failed: can't analyze def for ");
78c60e3d 276 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 277 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
278 }
279
b0b4483e 280 return -1;
ebfd146a
IR
281 }
282
a70d6342 283 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 284 from the pattern. Check that all the stmts of the node are in the
ebfd146a 285 pattern. */
f5709183 286 if (def_stmt && gimple_bb (def_stmt)
61d371eb 287 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 288 && vinfo_for_stmt (def_stmt)
83197f37 289 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
290 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
291 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 292 {
d092494c 293 pattern = true;
effb52da
RB
294 if (!first && !oprnd_info->first_pattern
295 /* Allow different pattern state for the defs of the
296 first stmt in reduction chains. */
297 && (oprnd_info->first_dt != vect_reduction_def
298 || (!second && !oprnd_info->second_pattern)))
d092494c 299 {
b0b4483e
RB
300 if (i == 0
301 && !swapped
302 && commutative)
303 {
304 swapped = true;
305 goto again;
306 }
307
73fbfcad 308 if (dump_enabled_p ())
d092494c 309 {
78c60e3d
SS
310 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
311 "Build SLP failed: some of the stmts"
312 " are in a pattern, and others are not ");
313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 315 }
ebfd146a 316
b0b4483e 317 return 1;
ebfd146a
IR
318 }
319
320 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 321 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 322
f7e531cf 323 if (dt == vect_unknown_def_type)
ebfd146a 324 {
73fbfcad 325 if (dump_enabled_p ())
78c60e3d 326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 327 "Unsupported pattern.\n");
b0b4483e 328 return -1;
ebfd146a
IR
329 }
330
331 switch (gimple_code (def_stmt))
332 {
81c40241
RB
333 case GIMPLE_PHI:
334 case GIMPLE_ASSIGN:
335 break;
336
337 default:
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "unsupported defining stmt:\n");
341 return -1;
ebfd146a
IR
342 }
343 }
344
effb52da
RB
345 if (second)
346 oprnd_info->second_pattern = pattern;
347
d092494c 348 if (first)
ebfd146a 349 {
d092494c
IR
350 oprnd_info->first_dt = dt;
351 oprnd_info->first_pattern = pattern;
793d9a16 352 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 353 }
ebfd146a
IR
354 else
355 {
d092494c
IR
356 /* Not first stmt of the group, check that the def-stmt/s match
357 the def-stmt/s of the first stmt. Allow different definition
358 types for reduction chains: the first stmt must be a
359 vect_reduction_def (a phi node), and the rest
360 vect_internal_def. */
361 if (((oprnd_info->first_dt != dt
362 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
363 && dt == vect_internal_def)
364 && !((oprnd_info->first_dt == vect_external_def
365 || oprnd_info->first_dt == vect_constant_def)
366 && (dt == vect_external_def
367 || dt == vect_constant_def)))
368 || !types_compatible_p (oprnd_info->first_op_type,
369 TREE_TYPE (oprnd))))
ebfd146a 370 {
b0b4483e
RB
371 /* Try swapping operands if we got a mismatch. */
372 if (i == 0
373 && !swapped
374 && commutative)
375 {
376 swapped = true;
377 goto again;
378 }
379
abf9bfbc
RB
380 if (dump_enabled_p ())
381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 382 "Build SLP failed: different types\n");
d092494c 383
b0b4483e 384 return 1;
ebfd146a
IR
385 }
386 }
387
388 /* Check the types of the definitions. */
d092494c 389 switch (dt)
ebfd146a
IR
390 {
391 case vect_constant_def:
8644a673 392 case vect_external_def:
d092494c 393 case vect_reduction_def:
ebfd146a 394 break;
b8698a0f 395
8644a673 396 case vect_internal_def:
abf9bfbc 397 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
398 break;
399
400 default:
401 /* FORNOW: Not supported. */
73fbfcad 402 if (dump_enabled_p ())
ebfd146a 403 {
78c60e3d
SS
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
405 "Build SLP failed: illegal type of def ");
81c40241 406 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 407 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
408 }
409
b0b4483e 410 return -1;
ebfd146a
IR
411 }
412 }
413
b0b4483e
RB
414 /* Swap operands. */
415 if (swapped)
416 {
78810bd3
RB
417 /* If there are already uses of this stmt in a SLP instance then
418 we've committed to the operand order and can't swap it. */
419 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
420 {
421 if (dump_enabled_p ())
422 {
423 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
424 "Build SLP failed: cannot swap operands of "
425 "shared stmt ");
426 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
427 }
428 return -1;
429 }
430
b0b4483e
RB
431 if (first_op_cond)
432 {
433 tree cond = gimple_assign_rhs1 (stmt);
434 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
435 &TREE_OPERAND (cond, 1));
436 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
437 }
438 else
439 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
440 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
441 if (dump_enabled_p ())
442 {
443 dump_printf_loc (MSG_NOTE, vect_location,
444 "swapped operands to match def types in ");
445 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
446 }
b0b4483e
RB
447 }
448
449 return 0;
ebfd146a
IR
450}
451
452
6983e6b5
RB
453/* Verify if the scalar stmts STMTS are isomorphic, require data
454 permutation or are of unsupported types of operation. Return
455 true if they are, otherwise return false and indicate in *MATCHES
456 which stmts are not isomorphic to the first one. If MATCHES[0]
457 is false then this indicates the comparison could not be
458 carried out or the stmts will never be vectorized by SLP. */
ebfd146a
IR
459
460static bool
310213d4 461vect_build_slp_tree_1 (vec_info *vinfo,
355fe088 462 vec<gimple *> stmts, unsigned int group_size,
6983e6b5 463 unsigned nops, unsigned int *max_nunits,
97a1a642 464 bool *matches, bool *two_operators)
ebfd146a 465{
ebfd146a 466 unsigned int i;
355fe088 467 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
468 enum tree_code first_stmt_code = ERROR_MARK;
469 enum tree_code alt_stmt_code = ERROR_MARK;
470 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 471 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 472 tree lhs;
6983e6b5 473 bool need_same_oprnds = false;
e00cdb8a 474 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
475 optab optab;
476 int icode;
ef4bddc2
RS
477 machine_mode optab_op2_mode;
478 machine_mode vec_mode;
ebfd146a 479 HOST_WIDE_INT dummy;
355fe088 480 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 481
ebfd146a 482 /* For every stmt in NODE find its def stmt/s. */
9771b263 483 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 484 {
6983e6b5
RB
485 matches[i] = false;
486
73fbfcad 487 if (dump_enabled_p ())
ebfd146a 488 {
78c60e3d
SS
489 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
490 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
491 }
492
4b5caab7
IR
493 /* Fail to vectorize statements marked as unvectorizable. */
494 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
495 {
73fbfcad 496 if (dump_enabled_p ())
4b5caab7 497 {
78c60e3d
SS
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
499 "Build SLP failed: unvectorizable statement ");
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 501 }
6983e6b5
RB
502 /* Fatal mismatch. */
503 matches[0] = false;
4b5caab7
IR
504 return false;
505 }
506
ebfd146a
IR
507 lhs = gimple_get_lhs (stmt);
508 if (lhs == NULL_TREE)
509 {
73fbfcad 510 if (dump_enabled_p ())
ebfd146a 511 {
78c60e3d
SS
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: not GIMPLE_ASSIGN nor "
514 "GIMPLE_CALL ");
515 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 516 }
6983e6b5
RB
517 /* Fatal mismatch. */
518 matches[0] = false;
ebfd146a
IR
519 return false;
520 }
521
b8698a0f 522 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
523 vectype = get_vectype_for_scalar_type (scalar_type);
524 if (!vectype)
525 {
73fbfcad 526 if (dump_enabled_p ())
ebfd146a 527 {
78c60e3d
SS
528 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
529 "Build SLP failed: unsupported data-type ");
530 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
531 scalar_type);
e645e942 532 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 533 }
6983e6b5
RB
534 /* Fatal mismatch. */
535 matches[0] = false;
ebfd146a
IR
536 return false;
537 }
b8698a0f 538
dfc55d30
RB
539 /* If populating the vector type requires unrolling then fail
540 before adjusting *max_nunits for basic-block vectorization. */
310213d4 541 if (is_a <bb_vec_info> (vinfo)
dfc55d30
RB
542 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
543 {
544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
545 "Build SLP failed: unrolling required "
546 "in basic block SLP\n");
547 /* Fatal mismatch. */
548 matches[0] = false;
549 return false;
550 }
551
4ef69dfc
IR
552 /* In case of multiple types we need to detect the smallest type. */
553 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
97a1a642 554 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 555
538dd0b7 556 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
557 {
558 rhs_code = CALL_EXPR;
538dd0b7
DM
559 if (gimple_call_internal_p (call_stmt)
560 || gimple_call_tail_p (call_stmt)
561 || gimple_call_noreturn_p (call_stmt)
562 || !gimple_call_nothrow_p (call_stmt)
563 || gimple_call_chain (call_stmt))
190c2236 564 {
73fbfcad 565 if (dump_enabled_p ())
190c2236 566 {
78c60e3d
SS
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported call type ");
538dd0b7
DM
569 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 call_stmt, 0);
190c2236 571 }
6983e6b5
RB
572 /* Fatal mismatch. */
573 matches[0] = false;
190c2236
JJ
574 return false;
575 }
576 }
ebfd146a
IR
577 else
578 rhs_code = gimple_assign_rhs_code (stmt);
579
580 /* Check the operation. */
581 if (i == 0)
582 {
583 first_stmt_code = rhs_code;
584
b8698a0f 585 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
586 vector shift with scalar shift operand. */
587 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
588 || rhs_code == LROTATE_EXPR
589 || rhs_code == RROTATE_EXPR)
590 {
591 vec_mode = TYPE_MODE (vectype);
592
593 /* First see if we have a vector/vector shift. */
594 optab = optab_for_tree_code (rhs_code, vectype,
595 optab_vector);
596
597 if (!optab
947131ba 598 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
599 {
600 /* No vector/vector shift, try for a vector/scalar shift. */
601 optab = optab_for_tree_code (rhs_code, vectype,
602 optab_scalar);
603
604 if (!optab)
605 {
73fbfcad 606 if (dump_enabled_p ())
78c60e3d 607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 608 "Build SLP failed: no optab.\n");
6983e6b5
RB
609 /* Fatal mismatch. */
610 matches[0] = false;
ebfd146a
IR
611 return false;
612 }
947131ba 613 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
614 if (icode == CODE_FOR_nothing)
615 {
73fbfcad 616 if (dump_enabled_p ())
78c60e3d
SS
617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
618 "Build SLP failed: "
e645e942 619 "op not supported by target.\n");
6983e6b5
RB
620 /* Fatal mismatch. */
621 matches[0] = false;
ebfd146a
IR
622 return false;
623 }
624 optab_op2_mode = insn_data[icode].operand[2].mode;
625 if (!VECTOR_MODE_P (optab_op2_mode))
626 {
627 need_same_oprnds = true;
628 first_op1 = gimple_assign_rhs2 (stmt);
629 }
630 }
631 }
36ba4aae
IR
632 else if (rhs_code == WIDEN_LSHIFT_EXPR)
633 {
634 need_same_oprnds = true;
635 first_op1 = gimple_assign_rhs2 (stmt);
636 }
ebfd146a
IR
637 }
638 else
639 {
6876e5bc
RB
640 if (first_stmt_code != rhs_code
641 && alt_stmt_code == ERROR_MARK)
642 alt_stmt_code = rhs_code;
ebfd146a
IR
643 if (first_stmt_code != rhs_code
644 && (first_stmt_code != IMAGPART_EXPR
645 || rhs_code != REALPART_EXPR)
646 && (first_stmt_code != REALPART_EXPR
69f11a13 647 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
648 /* Handle mismatches in plus/minus by computing both
649 and merging the results. */
650 && !((first_stmt_code == PLUS_EXPR
651 || first_stmt_code == MINUS_EXPR)
652 && (alt_stmt_code == PLUS_EXPR
653 || alt_stmt_code == MINUS_EXPR)
654 && rhs_code == alt_stmt_code)
0d0293ac 655 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 656 && (first_stmt_code == ARRAY_REF
38000232 657 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
658 || first_stmt_code == INDIRECT_REF
659 || first_stmt_code == COMPONENT_REF
660 || first_stmt_code == MEM_REF)))
ebfd146a 661 {
73fbfcad 662 if (dump_enabled_p ())
ebfd146a 663 {
78c60e3d
SS
664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
665 "Build SLP failed: different operation "
666 "in stmt ");
667 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
669 "original stmt ");
670 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
671 first_stmt, 0);
ebfd146a 672 }
6983e6b5
RB
673 /* Mismatch. */
674 continue;
ebfd146a 675 }
b8698a0f
L
676
677 if (need_same_oprnds
ebfd146a
IR
678 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
679 {
73fbfcad 680 if (dump_enabled_p ())
ebfd146a 681 {
78c60e3d
SS
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: different shift "
684 "arguments in ");
685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 686 }
6983e6b5
RB
687 /* Mismatch. */
688 continue;
ebfd146a 689 }
190c2236
JJ
690
691 if (rhs_code == CALL_EXPR)
692 {
355fe088 693 gimple *first_stmt = stmts[0];
190c2236
JJ
694 if (gimple_call_num_args (stmt) != nops
695 || !operand_equal_p (gimple_call_fn (first_stmt),
696 gimple_call_fn (stmt), 0)
697 || gimple_call_fntype (first_stmt)
698 != gimple_call_fntype (stmt))
699 {
73fbfcad 700 if (dump_enabled_p ())
190c2236 701 {
78c60e3d
SS
702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
703 "Build SLP failed: different calls in ");
704 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
705 stmt, 0);
190c2236 706 }
6983e6b5
RB
707 /* Mismatch. */
708 continue;
190c2236
JJ
709 }
710 }
ebfd146a
IR
711 }
712
0d0293ac
MM
713 /* Grouped store or load. */
714 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
715 {
716 if (REFERENCE_CLASS_P (lhs))
717 {
718 /* Store. */
6983e6b5 719 ;
ebfd146a 720 }
b5aeb3bb
IR
721 else
722 {
723 /* Load. */
e14c1050 724 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
725 if (prev_first_load)
726 {
727 /* Check that there are no loads from different interleaving
6983e6b5
RB
728 chains in the same node. */
729 if (prev_first_load != first_load)
78c60e3d 730 {
73fbfcad 731 if (dump_enabled_p ())
b5aeb3bb 732 {
78c60e3d
SS
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
734 vect_location,
735 "Build SLP failed: different "
736 "interleaving chains in one node ");
737 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
738 stmt, 0);
b5aeb3bb 739 }
6983e6b5
RB
740 /* Mismatch. */
741 continue;
b5aeb3bb
IR
742 }
743 }
744 else
745 prev_first_load = first_load;
ebfd146a 746 }
0d0293ac 747 } /* Grouped access. */
ebfd146a
IR
748 else
749 {
750 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
751 {
0d0293ac 752 /* Not grouped load. */
73fbfcad 753 if (dump_enabled_p ())
ebfd146a 754 {
78c60e3d
SS
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
756 "Build SLP failed: not grouped load ");
757 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
758 }
759
0d0293ac 760 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
761 /* Fatal mismatch. */
762 matches[0] = false;
ebfd146a
IR
763 return false;
764 }
765
766 /* Not memory operation. */
767 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 768 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 769 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 770 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 771 && rhs_code != CALL_EXPR)
ebfd146a 772 {
73fbfcad 773 if (dump_enabled_p ())
ebfd146a 774 {
78c60e3d
SS
775 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
776 "Build SLP failed: operation");
777 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 779 }
6983e6b5
RB
780 /* Fatal mismatch. */
781 matches[0] = false;
ebfd146a
IR
782 return false;
783 }
784
f7e531cf
IR
785 if (rhs_code == COND_EXPR)
786 {
787 tree cond_expr = gimple_assign_rhs1 (stmt);
788
789 if (i == 0)
790 first_cond_code = TREE_CODE (cond_expr);
791 else if (first_cond_code != TREE_CODE (cond_expr))
792 {
73fbfcad 793 if (dump_enabled_p ())
f7e531cf 794 {
78c60e3d
SS
795 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
796 "Build SLP failed: different"
797 " operation");
798 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
799 stmt, 0);
f7e531cf 800 }
6983e6b5
RB
801 /* Mismatch. */
802 continue;
f7e531cf
IR
803 }
804 }
ebfd146a 805 }
6983e6b5
RB
806
807 matches[i] = true;
808 }
809
810 for (i = 0; i < group_size; ++i)
811 if (!matches[i])
812 return false;
813
6876e5bc
RB
814 /* If we allowed a two-operation SLP node verify the target can cope
815 with the permute we are going to use. */
816 if (alt_stmt_code != ERROR_MARK
817 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
818 {
819 unsigned char *sel
820 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
821 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
822 {
823 sel[i] = i;
824 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
825 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
826 }
827 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
828 {
829 for (i = 0; i < group_size; ++i)
830 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
831 {
832 matches[i] = false;
833 if (dump_enabled_p ())
834 {
835 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
836 "Build SLP failed: different operation "
837 "in stmt ");
838 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
839 stmts[i], 0);
840 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
841 "original stmt ");
842 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
843 first_stmt, 0);
844 }
845 }
846 return false;
847 }
848 *two_operators = true;
849 }
850
6983e6b5
RB
851 return true;
852}
853
854/* Recursively build an SLP tree starting from NODE.
855 Fail (and return a value not equal to zero) if def-stmts are not
856 isomorphic, require data permutation or are of unsupported types of
857 operation. Otherwise, return 0.
858 The value returned is the depth in the SLP tree where a mismatch
859 was found. */
860
e403d17e 861static slp_tree
310213d4 862vect_build_slp_tree (vec_info *vinfo,
e403d17e 863 vec<gimple *> stmts, unsigned int group_size,
6983e6b5
RB
864 unsigned int *max_nunits,
865 vec<slp_tree> *loads,
1428105c
RB
866 bool *matches, unsigned *npermutes, unsigned *tree_size,
867 unsigned max_tree_size)
6983e6b5 868{
e403d17e 869 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
355fe088 870 gimple *stmt;
e403d17e 871 slp_tree node;
6983e6b5 872
6983e6b5
RB
873 matches[0] = false;
874
e403d17e 875 stmt = stmts[0];
6983e6b5
RB
876 if (is_gimple_call (stmt))
877 nops = gimple_call_num_args (stmt);
878 else if (is_gimple_assign (stmt))
879 {
880 nops = gimple_num_ops (stmt) - 1;
881 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
882 nops++;
ebfd146a 883 }
6983e6b5 884 else
e403d17e 885 return NULL;
6983e6b5 886
6876e5bc 887 bool two_operators = false;
310213d4 888 if (!vect_build_slp_tree_1 (vinfo,
e403d17e
RB
889 stmts, group_size, nops,
890 &this_max_nunits, matches, &two_operators))
891 return NULL;
ebfd146a 892
6983e6b5
RB
893 /* If the SLP node is a load, terminate the recursion. */
894 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
895 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 896 {
e403d17e
RB
897 *max_nunits = this_max_nunits;
898 node = vect_create_new_slp_node (stmts);
899 loads->safe_push (node);
900 return node;
ebfd146a
IR
901 }
902
6983e6b5
RB
903 /* Get at the operands, verifying they are compatible. */
904 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
905 slp_oprnd_info oprnd_info;
e403d17e 906 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 907 {
310213d4 908 switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
6983e6b5 909 {
b0b4483e
RB
910 case 0:
911 break;
912 case -1:
913 matches[0] = false;
6983e6b5 914 vect_free_oprnd_info (oprnds_info);
e403d17e 915 return NULL;
b0b4483e
RB
916 case 1:
917 matches[i] = false;
918 break;
6983e6b5
RB
919 }
920 }
b0b4483e
RB
921 for (i = 0; i < group_size; ++i)
922 if (!matches[i])
923 {
924 vect_free_oprnd_info (oprnds_info);
e403d17e 925 return NULL;
b0b4483e 926 }
6983e6b5 927
e403d17e
RB
928 auto_vec<slp_tree, 4> children;
929 auto_vec<slp_tree> this_loads;
930
931 stmt = stmts[0];
6983e6b5 932
b8698a0f 933 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 934 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 935 {
d092494c 936 slp_tree child;
e403d17e
RB
937 unsigned old_nloads = this_loads.length ();
938 unsigned old_tree_size = this_tree_size;
939 unsigned int j;
b8698a0f 940
d092494c
IR
941 if (oprnd_info->first_dt != vect_internal_def)
942 continue;
ebfd146a 943
1428105c
RB
944 if (++this_tree_size > max_tree_size)
945 {
e403d17e
RB
946 FOR_EACH_VEC_ELT (children, j, child)
947 vect_free_slp_tree (child);
1428105c 948 vect_free_oprnd_info (oprnds_info);
e403d17e 949 return NULL;
1428105c
RB
950 }
951
e403d17e
RB
952 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
953 group_size, &this_max_nunits,
954 &this_loads, matches, npermutes,
955 &this_tree_size,
956 max_tree_size)) != NULL)
6983e6b5 957 {
3fc356dc
RB
958 /* If we have all children of child built up from scalars then just
959 throw that away and build it up this node from scalars. */
995b6fe0
RB
960 if (!SLP_TREE_CHILDREN (child).is_empty ()
961 /* ??? Rejecting patterns this way doesn't work. We'd have to
962 do extra work to cancel the pattern so the uses see the
963 scalar version. */
964 && !is_pattern_stmt_p
965 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 966 {
3fc356dc
RB
967 slp_tree grandchild;
968
969 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 970 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
971 break;
972 if (!grandchild)
973 {
974 /* Roll back. */
e403d17e
RB
975 this_loads.truncate (old_nloads);
976 this_tree_size = old_tree_size;
3fc356dc 977 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 978 vect_free_slp_tree (grandchild);
3fc356dc
RB
979 SLP_TREE_CHILDREN (child).truncate (0);
980
981 dump_printf_loc (MSG_NOTE, vect_location,
982 "Building parent vector operands from "
983 "scalars instead\n");
984 oprnd_info->def_stmts = vNULL;
603cca93 985 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 986 children.safe_push (child);
3fc356dc
RB
987 continue;
988 }
989 }
990
6983e6b5 991 oprnd_info->def_stmts = vNULL;
e403d17e 992 children.safe_push (child);
6983e6b5
RB
993 continue;
994 }
995
90dd6e3d
RB
996 /* If the SLP build failed fatally and we analyze a basic-block
997 simply treat nodes we fail to build as externally defined
998 (and thus build vectors from the scalar defs).
999 The cost model will reject outright expensive cases.
1000 ??? This doesn't treat cases where permutation ultimatively
1001 fails (or we don't try permutation below). Ideally we'd
1002 even compute a permutation that will end up with the maximum
1003 SLP tree size... */
310213d4 1004 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1005 && !matches[0]
1006 /* ??? Rejecting patterns this way doesn't work. We'd have to
1007 do extra work to cancel the pattern so the uses see the
1008 scalar version. */
1009 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1010 {
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "Building vector operands from scalars\n");
e403d17e 1013 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1014 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1015 children.safe_push (child);
1016 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1017 continue;
1018 }
1019
6983e6b5
RB
1020 /* If the SLP build for operand zero failed and operand zero
1021 and one can be commutated try that for the scalar stmts
1022 that failed the match. */
1023 if (i == 0
1024 /* A first scalar stmt mismatch signals a fatal mismatch. */
1025 && matches[0]
1026 /* ??? For COND_EXPRs we can swap the comparison operands
1027 as well as the arms under some constraints. */
1028 && nops == 2
1029 && oprnds_info[1]->first_dt == vect_internal_def
1030 && is_gimple_assign (stmt)
1031 && commutative_tree_code (gimple_assign_rhs_code (stmt))
e403d17e 1032 && ! two_operators
6983e6b5
RB
1033 /* Do so only if the number of not successful permutes was nor more
1034 than a cut-ff as re-trying the recursive match on
1035 possibly each level of the tree would expose exponential
1036 behavior. */
1037 && *npermutes < 4)
1038 {
78810bd3
RB
1039 /* Verify if we can safely swap or if we committed to a specific
1040 operand order already. */
1041 for (j = 0; j < group_size; ++j)
1042 if (!matches[j]
1043 && STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])) != 0)
1044 {
1045 if (dump_enabled_p ())
1046 {
1047 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1048 "Build SLP failed: cannot swap operands "
1049 "of shared stmt ");
1050 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1051 stmts[j], 0);
1052 }
1053 goto fail;
1054 }
1055
6983e6b5 1056 /* Swap mismatched definition stmts. */
b0b4483e
RB
1057 dump_printf_loc (MSG_NOTE, vect_location,
1058 "Re-trying with swapped operands of stmts ");
e72baed7 1059 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1060 if (!matches[j])
1061 {
6b4db501
MM
1062 std::swap (oprnds_info[0]->def_stmts[j],
1063 oprnds_info[1]->def_stmts[j]);
b0b4483e 1064 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1065 }
b0b4483e 1066 dump_printf (MSG_NOTE, "\n");
74574669
RB
1067 /* And try again with scratch 'matches' ... */
1068 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1069 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1070 group_size, &this_max_nunits,
1071 &this_loads, tem, npermutes,
1072 &this_tree_size,
1073 max_tree_size)) != NULL)
6983e6b5 1074 {
60f2b864
RB
1075 /* ... so if successful we can apply the operand swapping
1076 to the GIMPLE IL. This is necessary because for example
1077 vect_get_slp_defs uses operand indexes and thus expects
1078 canonical operand order. This is also necessary even
1079 if we end up building the operand from scalars as
1080 we'll continue to process swapped operand two. */
1081 for (j = 0; j < group_size; ++j)
f47cda24 1082 {
e403d17e 1083 gimple *stmt = stmts[j];
f47cda24
RB
1084 gimple_set_plf (stmt, GF_PLF_1, false);
1085 }
1086 for (j = 0; j < group_size; ++j)
1087 {
e403d17e 1088 gimple *stmt = stmts[j];
f47cda24
RB
1089 if (!matches[j])
1090 {
1091 /* Avoid swapping operands twice. */
1092 if (gimple_plf (stmt, GF_PLF_1))
1093 continue;
1094 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1095 gimple_assign_rhs2_ptr (stmt));
1096 gimple_set_plf (stmt, GF_PLF_1, true);
1097 }
1098 }
1099 /* Verify we swap all duplicates or none. */
1100 if (flag_checking)
1101 for (j = 0; j < group_size; ++j)
60f2b864 1102 {
e403d17e 1103 gimple *stmt = stmts[j];
f47cda24 1104 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
60f2b864
RB
1105 }
1106
85c69b0b
RB
1107 /* If we have all children of child built up from scalars then
1108 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1109 if (!SLP_TREE_CHILDREN (child).is_empty ()
1110 /* ??? Rejecting patterns this way doesn't work. We'd have
1111 to do extra work to cancel the pattern so the uses see the
1112 scalar version. */
1113 && !is_pattern_stmt_p
1114 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1115 {
1116 unsigned int j;
1117 slp_tree grandchild;
1118
1119 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1120 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1121 break;
1122 if (!grandchild)
1123 {
1124 /* Roll back. */
e403d17e
RB
1125 this_loads.truncate (old_nloads);
1126 this_tree_size = old_tree_size;
85c69b0b
RB
1127 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1128 vect_free_slp_tree (grandchild);
1129 SLP_TREE_CHILDREN (child).truncate (0);
1130
1131 dump_printf_loc (MSG_NOTE, vect_location,
1132 "Building parent vector operands from "
1133 "scalars instead\n");
1134 oprnd_info->def_stmts = vNULL;
603cca93 1135 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1136 children.safe_push (child);
85c69b0b
RB
1137 continue;
1138 }
1139 }
1140
6983e6b5 1141 oprnd_info->def_stmts = vNULL;
e403d17e 1142 children.safe_push (child);
6983e6b5
RB
1143 continue;
1144 }
1145
1146 ++*npermutes;
1147 }
1148
78810bd3 1149fail:
e403d17e
RB
1150 gcc_assert (child == NULL);
1151 FOR_EACH_VEC_ELT (children, j, child)
1152 vect_free_slp_tree (child);
6983e6b5 1153 vect_free_oprnd_info (oprnds_info);
e403d17e 1154 return NULL;
ebfd146a
IR
1155 }
1156
e403d17e
RB
1157 vect_free_oprnd_info (oprnds_info);
1158
1428105c
RB
1159 if (tree_size)
1160 *tree_size += this_tree_size;
e403d17e
RB
1161 *max_nunits = this_max_nunits;
1162 loads->safe_splice (this_loads);
1428105c 1163
e403d17e
RB
1164 node = vect_create_new_slp_node (stmts);
1165 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1166 SLP_TREE_CHILDREN (node).splice (children);
1167 return node;
ebfd146a
IR
1168}
1169
78c60e3d 1170/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1171
1172static void
c2a12ca0 1173vect_print_slp_tree (int dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1174{
1175 int i;
355fe088 1176 gimple *stmt;
d755c7ef 1177 slp_tree child;
ebfd146a 1178
603cca93
RB
1179 dump_printf_loc (dump_kind, loc, "node%s\n",
1180 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1181 ? " (external)" : "");
9771b263 1182 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1183 {
c2a12ca0 1184 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1185 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1186 }
9771b263 1187 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1188 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1189}
1190
1191
b8698a0f
L
1192/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1193 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1194 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1195 stmts in NODE are to be marked. */
1196
1197static void
1198vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1199{
1200 int i;
355fe088 1201 gimple *stmt;
d755c7ef 1202 slp_tree child;
ebfd146a 1203
603cca93 1204 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1205 return;
1206
9771b263 1207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1208 if (j < 0 || i == j)
1209 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1210
9771b263 1211 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1212 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1213}
1214
1215
a70d6342
IR
1216/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1217
1218static void
1219vect_mark_slp_stmts_relevant (slp_tree node)
1220{
1221 int i;
355fe088 1222 gimple *stmt;
a70d6342 1223 stmt_vec_info stmt_info;
d755c7ef 1224 slp_tree child;
a70d6342 1225
603cca93 1226 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1227 return;
1228
9771b263 1229 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1230 {
1231 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1232 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1233 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1234 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1235 }
1236
9771b263 1237 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1238 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1239}
1240
1241
b5aeb3bb
IR
1242/* Rearrange the statements of NODE according to PERMUTATION. */
1243
1244static void
1245vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1246 vec<unsigned> permutation)
b5aeb3bb 1247{
355fe088
TS
1248 gimple *stmt;
1249 vec<gimple *> tmp_stmts;
d755c7ef
RB
1250 unsigned int i;
1251 slp_tree child;
b5aeb3bb 1252
9771b263 1253 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1254 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1255
9771b263
DN
1256 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1257 tmp_stmts.create (group_size);
d755c7ef 1258 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1259
9771b263 1260 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1261 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1262
9771b263 1263 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1264 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1265}
1266
1267
b266b968
RB
1268/* Attempt to reorder stmts in a reduction chain so that we don't
1269 require any load permutation. Return true if that was possible,
1270 otherwise return false. */
1271
1272static bool
1273vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1274{
1275 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1276 unsigned int i, j;
1277 sbitmap load_index;
1278 unsigned int lidx;
1279 slp_tree node, load;
1280
1281 /* Compare all the permutation sequences to the first one. We know
1282 that at least one load is permuted. */
1283 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1284 if (!node->load_permutation.exists ())
1285 return false;
1286 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1287 {
1288 if (!load->load_permutation.exists ())
1289 return false;
1290 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1291 if (lidx != node->load_permutation[j])
1292 return false;
1293 }
1294
1295 /* Check that the loads in the first sequence are different and there
1296 are no gaps between them. */
1297 load_index = sbitmap_alloc (group_size);
1298 bitmap_clear (load_index);
1299 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1300 {
41eefe13 1301 if (lidx >= group_size)
6e078af8
ML
1302 {
1303 sbitmap_free (load_index);
1304 return false;
1305 }
b266b968
RB
1306 if (bitmap_bit_p (load_index, lidx))
1307 {
1308 sbitmap_free (load_index);
1309 return false;
1310 }
1311 bitmap_set_bit (load_index, lidx);
1312 }
1313 for (i = 0; i < group_size; i++)
1314 if (!bitmap_bit_p (load_index, i))
1315 {
1316 sbitmap_free (load_index);
1317 return false;
1318 }
1319 sbitmap_free (load_index);
1320
1321 /* This permutation is valid for reduction. Since the order of the
1322 statements in the nodes is not important unless they are memory
1323 accesses, we can rearrange the statements in all the nodes
1324 according to the order of the loads. */
1325 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1326 node->load_permutation);
1327
1328 /* We are done, no actual permutations need to be generated. */
c4e360f4 1329 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1330 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1331 {
1332 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1333 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1334 /* But we have to keep those permutations that are required because
1335 of handling of gaps. */
1336 if (unrolling_factor == 1
1337 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1338 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1339 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1340 else
1341 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1342 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1343 }
1344
b266b968
RB
1345 return true;
1346}
1347
01d8bf07
RB
1348/* Check if the required load permutations in the SLP instance
1349 SLP_INSTN are supported. */
ebfd146a
IR
1350
1351static bool
01d8bf07 1352vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1353{
01d8bf07
RB
1354 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1355 unsigned int i, j, k, next;
6983e6b5 1356 slp_tree node;
a5b50aa1 1357 gimple *stmt, *load, *next_load;
ebfd146a 1358
73fbfcad 1359 if (dump_enabled_p ())
ebfd146a 1360 {
78c60e3d 1361 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1362 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1363 if (node->load_permutation.exists ())
1364 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1365 dump_printf (MSG_NOTE, "%d ", next);
1366 else
bddc974e
TJ
1367 for (k = 0; k < group_size; ++k)
1368 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1369 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1370 }
1371
b5aeb3bb
IR
1372 /* In case of reduction every load permutation is allowed, since the order
1373 of the reduction statements is not important (as opposed to the case of
0d0293ac 1374 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1375 load nodes are of the same size and have the same permutation (and then
1376 rearrange all the nodes of the SLP instance according to this
1377 permutation). */
1378
1379 /* Check that all the load nodes are of the same size. */
01d8bf07 1380 /* ??? Can't we assert this? */
9771b263 1381 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1382 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1383 return false;
2200fc49 1384
b5aeb3bb 1385 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1386 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1387
b010117a 1388 /* Reduction (there are no data-refs in the root).
b266b968 1389 In reduction chain the order of the loads is not important. */
b010117a
IR
1390 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1391 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1392 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1393
6aa904c4
IR
1394 /* In basic block vectorization we allow any subchain of an interleaving
1395 chain.
1396 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1397 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1398 {
240a94da
RB
1399 /* Check whether the loads in an instance form a subchain and thus
1400 no permutation is necessary. */
9771b263 1401 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1402 {
9626d143
RB
1403 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1404 continue;
240a94da 1405 bool subchain_p = true;
6aa904c4 1406 next_load = NULL;
9771b263 1407 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1408 {
5b5826c4
RB
1409 if (j != 0
1410 && (next_load != load
1411 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1412 {
1413 subchain_p = false;
1414 break;
1415 }
6aa904c4
IR
1416 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1417 }
240a94da
RB
1418 if (subchain_p)
1419 SLP_TREE_LOAD_PERMUTATION (node).release ();
1420 else
1421 {
1422 /* Verify the permutation can be generated. */
1423 vec<tree> tem;
1424 if (!vect_transform_slp_perm_load (node, tem, NULL,
1425 1, slp_instn, true))
1426 {
1427 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1428 vect_location,
1429 "unsupported load permutation\n");
1430 return false;
1431 }
1432 }
6aa904c4 1433 }
01d8bf07 1434 return true;
6aa904c4
IR
1435 }
1436
9b999e8c 1437 /* For loop vectorization verify we can generate the permutation. */
01d8bf07
RB
1438 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1439 if (node->load_permutation.exists ()
1440 && !vect_transform_slp_perm_load
1441 (node, vNULL, NULL,
1442 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1443 return false;
9b999e8c 1444
01d8bf07 1445 return true;
ebfd146a
IR
1446}
1447
1448
e4a707c4 1449/* Find the last store in SLP INSTANCE. */
ff802fa1 1450
64900538 1451gimple *
2e8ab70c 1452vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1453{
355fe088 1454 gimple *last = NULL, *stmt;
e4a707c4 1455
2e8ab70c
RB
1456 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1457 {
1458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1459 if (is_pattern_stmt_p (stmt_vinfo))
1460 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1461 else
1462 last = get_later_stmt (stmt, last);
1463 }
e4a707c4 1464
2e8ab70c 1465 return last;
e4a707c4
IR
1466}
1467
23847df4
RB
1468/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1469
1470static void
1a4b99c1 1471vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1472 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1473 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1474 unsigned ncopies_for_cost)
1475{
603cca93 1476 unsigned i, j;
23847df4 1477 slp_tree child;
8155f4d8 1478 gimple *stmt;
23847df4
RB
1479 stmt_vec_info stmt_info;
1480 tree lhs;
23847df4
RB
1481
1482 /* Recurse down the SLP tree. */
1483 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 1484 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1a4b99c1
RB
1485 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1486 body_cost_vec, ncopies_for_cost);
23847df4
RB
1487
1488 /* Look at the first scalar stmt to determine the cost. */
1489 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1490 stmt_info = vinfo_for_stmt (stmt);
1491 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1492 {
2de001ee
RS
1493 vect_memory_access_type memory_access_type
1494 = (STMT_VINFO_STRIDED_P (stmt_info)
1495 ? VMAT_STRIDED_SLP
1496 : VMAT_CONTIGUOUS);
23847df4 1497 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
2de001ee
RS
1498 vect_model_store_cost (stmt_info, ncopies_for_cost,
1499 memory_access_type, vect_uninitialized_def,
23847df4
RB
1500 node, prologue_cost_vec, body_cost_vec);
1501 else
1502 {
23847df4 1503 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
52eab378
RB
1504 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1505 {
8155f4d8
RB
1506 /* If the load is permuted then the alignment is determined by
1507 the first group element not by the first scalar stmt DR. */
52eab378
RB
1508 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1509 stmt_info = vinfo_for_stmt (stmt);
8155f4d8
RB
1510 /* Record the cost for the permutation. */
1511 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1512 stmt_info, 0, vect_body);
1513 /* And adjust the number of loads performed. */
1514 unsigned nunits
1515 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1516 ncopies_for_cost
1517 = (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1518 + nunits - 1) / nunits;
1519 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
52eab378 1520 }
8155f4d8 1521 /* Record the cost for the vector loads. */
2de001ee
RS
1522 vect_model_load_cost (stmt_info, ncopies_for_cost,
1523 memory_access_type, node, prologue_cost_vec,
1524 body_cost_vec);
89483f99 1525 return;
23847df4
RB
1526 }
1527 }
89483f99 1528 else
6876e5bc
RB
1529 {
1530 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1531 stmt_info, 0, vect_body);
89483f99
RB
1532 if (SLP_TREE_TWO_OPERATORS (node))
1533 {
1534 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1535 stmt_info, 0, vect_body);
1536 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1537 stmt_info, 0, vect_body);
1538 }
6876e5bc 1539 }
23847df4 1540
603cca93
RB
1541 /* Push SLP node def-type to stmts. */
1542 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1543 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1544 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1545 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1546
23847df4
RB
1547 /* Scan operands and account for prologue cost of constants/externals.
1548 ??? This over-estimates cost for multiple uses and should be
1549 re-engineered. */
603cca93 1550 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
23847df4
RB
1551 lhs = gimple_get_lhs (stmt);
1552 for (i = 0; i < gimple_num_ops (stmt); ++i)
1553 {
81c40241 1554 tree op = gimple_op (stmt, i);
355fe088 1555 gimple *def_stmt;
23847df4
RB
1556 enum vect_def_type dt;
1557 if (!op || op == lhs)
1558 continue;
81c40241 1559 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1560 {
1561 /* Without looking at the actual initializer a vector of
1562 constants can be implemented as load from the constant pool.
1563 ??? We need to pass down stmt_info for a vector type
1564 even if it points to the wrong stmt. */
1565 if (dt == vect_constant_def)
1566 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1567 stmt_info, 0, vect_prologue);
1568 else if (dt == vect_external_def)
1569 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1570 stmt_info, 0, vect_prologue);
1571 }
23847df4 1572 }
603cca93
RB
1573
1574 /* Restore stmt def-types. */
1575 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1576 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1577 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1578 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
23847df4
RB
1579}
1580
1581/* Compute the cost for the SLP instance INSTANCE. */
1582
1583static void
1a4b99c1 1584vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1585{
1586 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1587 unsigned ncopies_for_cost;
1588 stmt_info_for_cost *si;
1589 unsigned i;
1590
b939ea86
RB
1591 if (dump_enabled_p ())
1592 dump_printf_loc (MSG_NOTE, vect_location,
1593 "=== vect_analyze_slp_cost ===\n");
1594
23847df4
RB
1595 /* Calculate the number of vector stmts to create based on the unrolling
1596 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1597 GROUP_SIZE / NUNITS otherwise. */
1598 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1599 slp_tree node = SLP_INSTANCE_TREE (instance);
1600 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1601 /* Adjust the group_size by the vectorization factor which is always one
1602 for basic-block vectorization. */
1603 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1604 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1605 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1606 /* For reductions look at a reduction operand in case the reduction
1607 operation is widening like DOT_PROD or SAD. */
1608 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1609 {
355fe088 1610 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1611 switch (gimple_assign_rhs_code (stmt))
1612 {
1613 case DOT_PROD_EXPR:
1614 case SAD_EXPR:
1615 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1616 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1617 break;
1618 default:;
1619 }
1620 }
23847df4
RB
1621 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1622
1623 prologue_cost_vec.create (10);
1624 body_cost_vec.create (10);
1a4b99c1
RB
1625 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1626 &prologue_cost_vec, &body_cost_vec,
1627 ncopies_for_cost);
23847df4
RB
1628
1629 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1630 sure that SLP was successful. */
23847df4
RB
1631 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1632 {
1633 struct _stmt_vec_info *stmt_info
1634 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1635 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1636 si->misalign, vect_prologue);
1637 }
1638
1a4b99c1
RB
1639 /* Record the instance's instructions in the target cost model. */
1640 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1641 {
1642 struct _stmt_vec_info *stmt_info
1643 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1644 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1645 si->misalign, vect_body);
1646 }
1647
23847df4 1648 prologue_cost_vec.release ();
1a4b99c1 1649 body_cost_vec.release ();
23847df4 1650}
e4a707c4 1651
1ba91a49
AL
1652/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1653 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1654 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1655 containing the remainder.
1656 Return the first stmt in the second group. */
1657
1658static gimple *
1659vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1660{
1661 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1662 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1663 gcc_assert (group1_size > 0);
1664 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1665 gcc_assert (group2_size > 0);
1666 GROUP_SIZE (first_vinfo) = group1_size;
1667
1668 gimple *stmt = first_stmt;
1669 for (unsigned i = group1_size; i > 1; i--)
1670 {
1671 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1672 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1673 }
1674 /* STMT is now the last element of the first group. */
1675 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1676 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1677
1678 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1679 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1680 {
1681 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1682 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1683 }
1684
1685 /* For the second group, the GROUP_GAP is that before the original group,
1686 plus skipping over the first vector. */
1687 GROUP_GAP (vinfo_for_stmt (group2)) =
1688 GROUP_GAP (first_vinfo) + group1_size;
1689
1690 /* GROUP_GAP of the first group now has to skip over the second group too. */
1691 GROUP_GAP (first_vinfo) += group2_size;
1692
1693 if (dump_enabled_p ())
1694 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1695 group1_size, group2_size);
1696
1697 return group2;
1698}
1699
0d0293ac 1700/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1701 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1702 Return FALSE if it's impossible to SLP any stmt in the loop. */
1703
1704static bool
310213d4 1705vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1706 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1707{
1708 slp_instance new_instance;
d092494c 1709 slp_tree node;
e14c1050 1710 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1711 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1712 tree vectype, scalar_type = NULL_TREE;
355fe088 1713 gimple *next;
1ba91a49 1714 unsigned int i;
ebfd146a 1715 unsigned int max_nunits = 0;
9771b263 1716 vec<slp_tree> loads;
b5aeb3bb 1717 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1718 vec<gimple *> scalar_stmts;
b5aeb3bb 1719
b010117a 1720 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1721 {
b010117a
IR
1722 if (dr)
1723 {
1724 scalar_type = TREE_TYPE (DR_REF (dr));
1725 vectype = get_vectype_for_scalar_type (scalar_type);
1726 }
1727 else
1728 {
310213d4 1729 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
1730 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1731 }
1732
e14c1050 1733 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1734 }
1735 else
1736 {
310213d4 1737 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1738 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1739 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1740 }
b8698a0f 1741
ebfd146a
IR
1742 if (!vectype)
1743 {
73fbfcad 1744 if (dump_enabled_p ())
ebfd146a 1745 {
78c60e3d
SS
1746 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1747 "Build SLP failed: unsupported data-type ");
1748 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1749 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1750 }
b5aeb3bb 1751
ebfd146a
IR
1752 return false;
1753 }
ebfd146a 1754 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1755
0d0293ac 1756 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1757 scalar_stmts.create (group_size);
ebfd146a 1758 next = stmt;
b010117a 1759 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1760 {
b5aeb3bb
IR
1761 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1762 while (next)
1763 {
f7e531cf
IR
1764 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1765 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1766 scalar_stmts.safe_push (
1767 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1768 else
9771b263 1769 scalar_stmts.safe_push (next);
e14c1050 1770 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1771 }
14a61437
RB
1772 /* Mark the first element of the reduction chain as reduction to properly
1773 transform the node. In the reduction analysis phase only the last
1774 element of the chain is marked as reduction. */
1775 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1776 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1777 }
1778 else
1779 {
1780 /* Collect reduction statements. */
310213d4 1781 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
1782 for (i = 0; reductions.iterate (i, &next); i++)
1783 scalar_stmts.safe_push (next);
ebfd146a
IR
1784 }
1785
9771b263 1786 loads.create (group_size);
ebfd146a
IR
1787
1788 /* Build the tree for the SLP instance. */
89d390e5
RB
1789 bool *matches = XALLOCAVEC (bool, group_size);
1790 unsigned npermutes = 0;
e569db5f 1791 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
e403d17e 1792 &max_nunits, &loads, matches, &npermutes,
e569db5f
VK
1793 NULL, max_tree_size);
1794 if (node != NULL)
ebfd146a 1795 {
4ef69dfc 1796 /* Calculate the unrolling factor based on the smallest type. */
e569db5f
VK
1797 unrolling_factor
1798 = least_common_multiple (max_nunits, group_size) / group_size;
b8698a0f 1799
e569db5f
VK
1800 if (unrolling_factor != 1
1801 && is_a <bb_vec_info> (vinfo))
1802 {
1803
1804 if (max_nunits > group_size)
4ef69dfc 1805 {
e645e942 1806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e569db5f
VK
1807 "Build SLP failed: store group "
1808 "size not a multiple of the vector size "
1809 "in basic block SLP\n");
c7e62a26 1810 vect_free_slp_tree (node);
9771b263 1811 loads.release ();
4ef69dfc
IR
1812 return false;
1813 }
e569db5f
VK
1814 /* Fatal mismatch. */
1815 matches[group_size/max_nunits * max_nunits] = false;
1816 vect_free_slp_tree (node);
1817 loads.release ();
1818 }
1819 else
1820 {
4ef69dfc
IR
1821 /* Create a new SLP instance. */
1822 new_instance = XNEW (struct _slp_instance);
1823 SLP_INSTANCE_TREE (new_instance) = node;
1824 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1825 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1826 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1827
1828 /* Compute the load permutation. */
1829 slp_tree load_node;
1830 bool loads_permuted = false;
abf9bfbc
RB
1831 FOR_EACH_VEC_ELT (loads, i, load_node)
1832 {
01d8bf07 1833 vec<unsigned> load_permutation;
abf9bfbc 1834 int j;
355fe088 1835 gimple *load, *first_stmt;
01d8bf07
RB
1836 bool this_load_permuted = false;
1837 load_permutation.create (group_size);
6983e6b5
RB
1838 first_stmt = GROUP_FIRST_ELEMENT
1839 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1840 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1841 {
e569db5f
VK
1842 int load_place = vect_get_place_in_interleaving_chain
1843 (load, first_stmt);
6983e6b5
RB
1844 gcc_assert (load_place != -1);
1845 if (load_place != j)
01d8bf07 1846 this_load_permuted = true;
abf9bfbc
RB
1847 load_permutation.safe_push (load_place);
1848 }
fe2bef71
RB
1849 if (!this_load_permuted
1850 /* The load requires permutation when unrolling exposes
1851 a gap either because the group is larger than the SLP
1852 group-size or because there is a gap between the groups. */
1853 && (unrolling_factor == 1
1854 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1855 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
1856 {
1857 load_permutation.release ();
1858 continue;
1859 }
1860 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1861 loads_permuted = true;
abf9bfbc 1862 }
6aa904c4
IR
1863
1864 if (loads_permuted)
ebfd146a 1865 {
01d8bf07 1866 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1867 {
73fbfcad 1868 if (dump_enabled_p ())
ebfd146a 1869 {
e645e942 1870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1871 "Build SLP failed: unsupported load "
1872 "permutation ");
e569db5f
VK
1873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
1874 TDF_SLIM, stmt, 0);
ebfd146a 1875 }
ebfd146a
IR
1876 vect_free_slp_instance (new_instance);
1877 return false;
1878 }
ebfd146a 1879 }
ebfd146a 1880
e569db5f 1881 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
1882 instructions do not generate this SLP instance. */
1883 if (is_a <loop_vec_info> (vinfo)
1884 && loads_permuted
1885 && dr && vect_store_lanes_supported (vectype, group_size))
1886 {
1887 slp_tree load_node;
1888 FOR_EACH_VEC_ELT (loads, i, load_node)
1889 {
1890 gimple *first_stmt = GROUP_FIRST_ELEMENT
1891 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
1892 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
1893 /* Use SLP for strided accesses (or if we
1894 can't load-lanes). */
bb0f5ca7
AL
1895 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
1896 || ! vect_load_lanes_supported
1897 (STMT_VINFO_VECTYPE (stmt_vinfo),
1898 GROUP_SIZE (stmt_vinfo)))
1899 break;
1900 }
1901 if (i == loads.length ())
1902 {
1903 if (dump_enabled_p ())
1904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1905 "Built SLP cancelled: can use "
1906 "load/store-lanes\n");
1907 vect_free_slp_instance (new_instance);
1908 return false;
1909 }
1910 }
1911
310213d4 1912 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 1913
73fbfcad 1914 if (dump_enabled_p ())
c2a12ca0
RB
1915 {
1916 dump_printf_loc (MSG_NOTE, vect_location,
1917 "Final SLP tree for instance:\n");
1918 vect_print_slp_tree (MSG_NOTE, vect_location, node);
1919 }
ebfd146a
IR
1920
1921 return true;
1922 }
e569db5f
VK
1923 }
1924 else
1925 {
ebfd146a
IR
1926 /* Failed to SLP. */
1927 /* Free the allocated memory. */
e403d17e 1928 scalar_stmts.release ();
9771b263 1929 loads.release ();
e569db5f 1930 }
b8698a0f 1931
1ba91a49 1932 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 1933 vector size. */
1ba91a49
AL
1934 if (is_a <bb_vec_info> (vinfo)
1935 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1936 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1937 {
1938 /* We consider breaking the group only on VF boundaries from the existing
1939 start. */
1940 for (i = 0; i < group_size; i++)
1941 if (!matches[i]) break;
1942
97a1a642 1943 if (i >= nunits && i < group_size)
1ba91a49
AL
1944 {
1945 /* Split into two groups at the first vector boundary before i. */
97a1a642
RB
1946 gcc_assert ((nunits & (nunits - 1)) == 0);
1947 unsigned group1_size = i & ~(nunits - 1);
1ba91a49
AL
1948
1949 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
1950 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
1951 /* If the first non-match was in the middle of a vector,
1952 skip the rest of that vector. */
1953 if (group1_size < i)
1954 {
97a1a642 1955 i = group1_size + nunits;
1ba91a49 1956 if (i < group_size)
97a1a642 1957 rest = vect_split_slp_store_group (rest, nunits);
1ba91a49
AL
1958 }
1959 if (i < group_size)
1960 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
1961 return res;
1962 }
1963 /* Even though the first vector did not all match, we might be able to SLP
1964 (some) of the remainder. FORNOW ignore this possibility. */
1965 }
1966
a70d6342 1967 return false;
ebfd146a
IR
1968}
1969
1970
ff802fa1 1971/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1972 trees of packed scalar stmts if SLP is possible. */
1973
1974bool
310213d4 1975vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
1976{
1977 unsigned int i;
355fe088 1978 gimple *first_element;
a70d6342 1979 bool ok = false;
ebfd146a 1980
73fbfcad 1981 if (dump_enabled_p ())
e645e942 1982 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 1983
0d0293ac 1984 /* Find SLP sequences starting from groups of grouped stores. */
310213d4
RB
1985 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1986 if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
a70d6342 1987 ok = true;
ebfd146a 1988
310213d4 1989 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 1990 {
310213d4
RB
1991 if (loop_vinfo->reduction_chains.length () > 0)
1992 {
1993 /* Find SLP sequences starting from reduction chains. */
1994 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1995 if (vect_analyze_slp_instance (vinfo, first_element,
1996 max_tree_size))
1997 ok = true;
1998 else
1999 return false;
b010117a 2000
310213d4
RB
2001 /* Don't try to vectorize SLP reductions if reduction chain was
2002 detected. */
2003 return ok;
2004 }
b010117a 2005
310213d4
RB
2006 /* Find SLP sequences starting from groups of reductions. */
2007 if (loop_vinfo->reductions.length () > 1
2008 && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2009 max_tree_size))
2010 ok = true;
2011 }
b5aeb3bb 2012
ebfd146a
IR
2013 return true;
2014}
2015
2016
2017/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2018 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2019 least one instance. */
ebfd146a 2020
437f4a00 2021bool
ebfd146a
IR
2022vect_make_slp_decision (loop_vec_info loop_vinfo)
2023{
2024 unsigned int i, unrolling_factor = 1;
9771b263 2025 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2026 slp_instance instance;
2027 int decided_to_slp = 0;
2028
73fbfcad 2029 if (dump_enabled_p ())
e645e942
TJ
2030 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2031 "\n");
ebfd146a 2032
9771b263 2033 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2034 {
2035 /* FORNOW: SLP if you can. */
2036 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2037 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2038
ff802fa1 2039 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2040 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2041 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2042 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2043 decided_to_slp++;
2044 }
2045
2046 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2047
73fbfcad 2048 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 2049 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2050 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 2051 decided_to_slp, unrolling_factor);
437f4a00
IR
2052
2053 return (decided_to_slp > 0);
ebfd146a
IR
2054}
2055
2056
2057/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2058 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2059
2060static void
642fce57 2061vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2062{
355fe088 2063 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2064 imm_use_iterator imm_iter;
355fe088 2065 gimple *use_stmt;
642fce57 2066 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2067 slp_tree child;
f2c74cc4 2068 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2069 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2070 int j;
2071
2072 /* Propagate hybrid down the SLP tree. */
2073 if (stype == hybrid)
2074 ;
2075 else if (HYBRID_SLP_STMT (stmt_vinfo))
2076 stype = hybrid;
2077 else
2078 {
2079 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2080 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2081 /* If we get a pattern stmt here we have to use the LHS of the
2082 original stmt for immediate uses. */
2083 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2084 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2085 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
642fce57
RB
2086 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
2087 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
29764870
RB
2088 {
2089 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2090 continue;
2091 use_vinfo = vinfo_for_stmt (use_stmt);
2092 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2093 && STMT_VINFO_RELATED_STMT (use_vinfo))
2094 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2095 if (!STMT_SLP_TYPE (use_vinfo)
2096 && (STMT_VINFO_RELEVANT (use_vinfo)
2097 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2098 && !(gimple_code (use_stmt) == GIMPLE_PHI
2099 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2100 {
2101 if (dump_enabled_p ())
2102 {
2103 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2104 "def in non-SLP stmt: ");
2105 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2106 }
2107 stype = hybrid;
2108 }
29764870 2109 }
642fce57 2110 }
ebfd146a 2111
502f0263
RB
2112 if (stype == hybrid
2113 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2114 {
2115 if (dump_enabled_p ())
2116 {
2117 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2118 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2119 }
2120 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2121 }
ebfd146a 2122
642fce57 2123 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2124 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2125 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2126}
f2c74cc4 2127
642fce57 2128/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2129
642fce57
RB
2130static tree
2131vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2132{
2133 walk_stmt_info *wi = (walk_stmt_info *)data;
2134 struct loop *loopp = (struct loop *)wi->info;
2135
2136 if (wi->is_lhs)
2137 return NULL_TREE;
2138
2139 if (TREE_CODE (*tp) == SSA_NAME
2140 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2141 {
355fe088 2142 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2143 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2144 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2145 {
2146 if (dump_enabled_p ())
2147 {
2148 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2149 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2150 }
2151 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2152 }
642fce57
RB
2153 }
2154
2155 return NULL_TREE;
ebfd146a
IR
2156}
2157
642fce57
RB
2158static tree
2159vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2160 walk_stmt_info *)
2161{
2162 /* If the stmt is in a SLP instance then this isn't a reason
2163 to mark use definitions in other SLP instances as hybrid. */
2164 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2165 *handled = true;
2166 return NULL_TREE;
2167}
ebfd146a
IR
2168
2169/* Find stmts that must be both vectorized and SLPed. */
2170
2171void
2172vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2173{
2174 unsigned int i;
9771b263 2175 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2176 slp_instance instance;
2177
73fbfcad 2178 if (dump_enabled_p ())
e645e942
TJ
2179 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2180 "\n");
ebfd146a 2181
642fce57
RB
2182 /* First walk all pattern stmt in the loop and mark defs of uses as
2183 hybrid because immediate uses in them are not recorded. */
2184 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2185 {
2186 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2187 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2188 gsi_next (&gsi))
2189 {
355fe088 2190 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2193 {
2194 walk_stmt_info wi;
2195 memset (&wi, 0, sizeof (wi));
2196 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2197 gimple_stmt_iterator gsi2
2198 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2199 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2200 vect_detect_hybrid_slp_1, &wi);
2201 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2202 vect_detect_hybrid_slp_2,
2203 vect_detect_hybrid_slp_1, &wi);
2204 }
2205 }
2206 }
2207
2208 /* Then walk the SLP instance trees marking stmts with uses in
2209 non-SLP stmts as hybrid, also propagating hybrid down the
2210 SLP tree, collecting the above info on-the-fly. */
9771b263 2211 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2212 {
2213 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2214 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2215 i, pure_slp);
2216 }
ebfd146a
IR
2217}
2218
a70d6342
IR
2219
2220/* Create and initialize a new bb_vec_info struct for BB, as well as
2221 stmt_vec_info structs for all the stmts in it. */
b8698a0f 2222
a70d6342 2223static bb_vec_info
61d371eb
RB
2224new_bb_vec_info (gimple_stmt_iterator region_begin,
2225 gimple_stmt_iterator region_end)
a70d6342 2226{
61d371eb 2227 basic_block bb = gsi_bb (region_begin);
a70d6342
IR
2228 bb_vec_info res = NULL;
2229 gimple_stmt_iterator gsi;
2230
2231 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
310213d4 2232 res->kind = vec_info::bb;
a70d6342 2233 BB_VINFO_BB (res) = bb;
61d371eb
RB
2234 res->region_begin = region_begin;
2235 res->region_end = region_end;
a70d6342 2236
61d371eb
RB
2237 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2238 gsi_next (&gsi))
a70d6342 2239 {
355fe088 2240 gimple *stmt = gsi_stmt (gsi);
a70d6342 2241 gimple_set_uid (stmt, 0);
310213d4 2242 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
a70d6342
IR
2243 }
2244
9771b263
DN
2245 BB_VINFO_GROUPED_STORES (res).create (10);
2246 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 2247 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
2248
2249 bb->aux = res;
2250 return res;
2251}
2252
2253
2254/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2255 stmts in the basic block. */
2256
2257static void
2258destroy_bb_vec_info (bb_vec_info bb_vinfo)
2259{
c7e62a26 2260 slp_instance instance;
c7e62a26 2261 unsigned i;
a70d6342
IR
2262
2263 if (!bb_vinfo)
2264 return;
2265
78810bd3
RB
2266 vect_destroy_datarefs (bb_vinfo);
2267 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2268 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2269 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2270 vect_free_slp_instance (instance);
2271 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2272 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342 2273
78810bd3 2274 for (gimple_stmt_iterator si = bb_vinfo->region_begin;
61d371eb 2275 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
a70d6342 2276 {
355fe088 2277 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2278 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2279
2280 if (stmt_info)
2281 /* Free stmt_vec_info. */
2282 free_stmt_vec_info (stmt);
61d371eb
RB
2283
2284 /* Reset region marker. */
2285 gimple_set_uid (stmt, -1);
a70d6342
IR
2286 }
2287
78810bd3 2288 BB_VINFO_BB (bb_vinfo)->aux = NULL;
a70d6342 2289 free (bb_vinfo);
a70d6342
IR
2290}
2291
2292
2293/* Analyze statements contained in SLP tree node after recursively analyzing
2294 the subtree. Return TRUE if the operations are supported. */
2295
2296static bool
a12e42fc 2297vect_slp_analyze_node_operations (slp_tree node)
a70d6342
IR
2298{
2299 bool dummy;
603cca93 2300 int i, j;
355fe088 2301 gimple *stmt;
d755c7ef 2302 slp_tree child;
a70d6342 2303
603cca93 2304 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
2305 return true;
2306
9771b263 2307 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a12e42fc 2308 if (!vect_slp_analyze_node_operations (child))
d092494c 2309 return false;
a70d6342 2310
603cca93 2311 bool res = true;
9771b263 2312 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
2313 {
2314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2315 gcc_assert (stmt_info);
a12e42fc 2316 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
a70d6342 2317
6379dfb5
RB
2318 /* Push SLP node def-type to stmt operands. */
2319 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2320 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2321 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2322 = SLP_TREE_DEF_TYPE (child);
2323 res = vect_analyze_stmt (stmt, &dummy, node);
2324 /* Restore def-types. */
2325 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2326 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2327 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[i]))
2328 = vect_internal_def;
2329 if (! res)
2330 break;
a70d6342
IR
2331 }
2332
603cca93 2333 return res;
a70d6342
IR
2334}
2335
2336
ff802fa1 2337/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2338 operations are supported. */
2339
a12e42fc 2340bool
1a4b99c1 2341vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2342{
a70d6342
IR
2343 slp_instance instance;
2344 int i;
2345
a12e42fc
RB
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_NOTE, vect_location,
2348 "=== vect_slp_analyze_operations ===\n");
2349
9771b263 2350 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2351 {
a12e42fc 2352 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
a70d6342 2353 {
a12e42fc
RB
2354 dump_printf_loc (MSG_NOTE, vect_location,
2355 "removing SLP instance operations starting from: ");
2356 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2357 SLP_TREE_SCALAR_STMTS
2358 (SLP_INSTANCE_TREE (instance))[0], 0);
2359 vect_free_slp_instance (instance);
9771b263 2360 slp_instances.ordered_remove (i);
a70d6342
IR
2361 }
2362 else
1a4b99c1
RB
2363 {
2364 /* Compute the costs of the SLP instance. */
2365 vect_analyze_slp_cost (instance, data);
2366 i++;
2367 }
b8698a0f
L
2368 }
2369
9771b263 2370 if (!slp_instances.length ())
a70d6342
IR
2371 return false;
2372
2373 return true;
2374}
2375
6eddf228
RB
2376
2377/* Compute the scalar cost of the SLP node NODE and its children
2378 and return it. Do not account defs that are marked in LIFE and
2379 update LIFE according to uses of NODE. */
2380
2381static unsigned
292cba13 2382vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2383 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2384{
2385 unsigned scalar_cost = 0;
2386 unsigned i;
355fe088 2387 gimple *stmt;
6eddf228
RB
2388 slp_tree child;
2389
2390 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2391 {
2392 unsigned stmt_cost;
2393 ssa_op_iter op_iter;
2394 def_operand_p def_p;
2395 stmt_vec_info stmt_info;
2396
ff4c81cc 2397 if ((*life)[i])
6eddf228
RB
2398 continue;
2399
2400 /* If there is a non-vectorized use of the defs then the scalar
2401 stmt is kept live in which case we do not account it or any
2402 required defs in the SLP children in the scalar cost. This
2403 way we make the vectorization more costly when compared to
2404 the scalar cost. */
2405 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2406 {
2407 imm_use_iterator use_iter;
355fe088 2408 gimple *use_stmt;
6eddf228 2409 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2410 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2411 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2412 use_stmt)
603cca93 2413 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2414 {
ff4c81cc 2415 (*life)[i] = true;
6eddf228
RB
2416 BREAK_FROM_IMM_USE_STMT (use_iter);
2417 }
2418 }
ff4c81cc 2419 if ((*life)[i])
6eddf228
RB
2420 continue;
2421
b555a2e4
RB
2422 /* Count scalar stmts only once. */
2423 if (gimple_visited_p (stmt))
2424 continue;
2425 gimple_set_visited (stmt, true);
2426
6eddf228
RB
2427 stmt_info = vinfo_for_stmt (stmt);
2428 if (STMT_VINFO_DATA_REF (stmt_info))
2429 {
2430 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2431 stmt_cost = vect_get_stmt_cost (scalar_load);
2432 else
2433 stmt_cost = vect_get_stmt_cost (scalar_store);
2434 }
2435 else
2436 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2437
2438 scalar_cost += stmt_cost;
2439 }
2440
2441 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 2442 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
90dd6e3d 2443 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
2444
2445 return scalar_cost;
2446}
2447
69f11a13
IR
2448/* Check if vectorization of the basic block is profitable. */
2449
2450static bool
2451vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2452{
9771b263 2453 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2454 slp_instance instance;
1a4b99c1 2455 int i;
c3e7ee41 2456 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2457 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2458
2459 /* Calculate scalar cost. */
6eddf228 2460 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2461 {
00f96dc9 2462 auto_vec<bool, 20> life;
ff4c81cc 2463 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2464 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2465 SLP_INSTANCE_TREE (instance),
ff4c81cc 2466 &life);
69f11a13
IR
2467 }
2468
b555a2e4
RB
2469 /* Unset visited flag. */
2470 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2471 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2472 gimple_set_visited (gsi_stmt (gsi), false);
2473
c3e7ee41 2474 /* Complete the target-specific cost calculation. */
92345349
BS
2475 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2476 &vec_inside_cost, &vec_epilogue_cost);
2477
2478 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2479
73fbfcad 2480 if (dump_enabled_p ())
69f11a13 2481 {
78c60e3d
SS
2482 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2483 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2484 vec_inside_cost);
2485 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2486 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2487 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2488 }
2489
a6524bba
RB
2490 /* Vectorization is profitable if its cost is more than the cost of scalar
2491 version. Note that we err on the vector side for equal cost because
2492 the cost estimate is otherwise quite pessimistic (constant uses are
2493 free on the scalar side but cost a load on the vector side for
2494 example). */
2495 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2496 return false;
2497
2498 return true;
2499}
2500
a5b50aa1
RB
2501/* Check if the basic block can be vectorized. Returns a bb_vec_info
2502 if so and sets fatal to true if failure is independent of
2503 current_vector_size. */
a70d6342 2504
8e19f5a1 2505static bb_vec_info
61d371eb
RB
2506vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2507 gimple_stmt_iterator region_end,
a5b50aa1
RB
2508 vec<data_reference_p> datarefs, int n_stmts,
2509 bool &fatal)
a70d6342
IR
2510{
2511 bb_vec_info bb_vinfo;
a70d6342 2512 slp_instance instance;
8e19f5a1 2513 int i;
777e1f09 2514 int min_vf = 2;
e4a707c4 2515
a5b50aa1
RB
2516 /* The first group of checks is independent of the vector size. */
2517 fatal = true;
2518
61d371eb
RB
2519 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2520 {
2521 if (dump_enabled_p ())
2522 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2523 "not vectorized: too many instructions in "
2524 "basic block.\n");
2525 free_data_refs (datarefs);
2526 return NULL;
2527 }
2528
2529 bb_vinfo = new_bb_vec_info (region_begin, region_end);
a70d6342
IR
2530 if (!bb_vinfo)
2531 return NULL;
2532
61d371eb 2533 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2534
2535 /* Analyze the data references. */
2536
2537 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2538 {
73fbfcad 2539 if (dump_enabled_p ())
78c60e3d
SS
2540 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2541 "not vectorized: unhandled data-ref in basic "
2542 "block.\n");
b8698a0f 2543
a70d6342
IR
2544 destroy_bb_vec_info (bb_vinfo);
2545 return NULL;
2546 }
2547
fcac74a1 2548 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2549 {
73fbfcad 2550 if (dump_enabled_p ())
78c60e3d
SS
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2552 "not vectorized: not enough data-refs in "
2553 "basic block.\n");
a70d6342
IR
2554
2555 destroy_bb_vec_info (bb_vinfo);
2556 return NULL;
2557 }
2558
310213d4 2559 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2560 {
2561 if (dump_enabled_p ())
2562 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2563 "not vectorized: unhandled data access in "
2564 "basic block.\n");
2565
2566 destroy_bb_vec_info (bb_vinfo);
2567 return NULL;
2568 }
2569
a5b50aa1
RB
2570 /* If there are no grouped stores in the region there is no need
2571 to continue with pattern recog as vect_analyze_slp will fail
2572 anyway. */
2573 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2574 {
73fbfcad 2575 if (dump_enabled_p ())
a5b50aa1
RB
2576 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2577 "not vectorized: no grouped stores in "
2578 "basic block.\n");
b8698a0f 2579
a70d6342
IR
2580 destroy_bb_vec_info (bb_vinfo);
2581 return NULL;
2582 }
b8698a0f 2583
a5b50aa1
RB
2584 /* While the rest of the analysis below depends on it in some way. */
2585 fatal = false;
2586
2587 vect_pattern_recog (bb_vinfo);
2588
a70d6342
IR
2589 /* Check the SLP opportunities in the basic block, analyze and build SLP
2590 trees. */
310213d4 2591 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2592 {
73fbfcad 2593 if (dump_enabled_p ())
effb52da
RB
2594 {
2595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2596 "Failed to SLP the basic block.\n");
2597 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2598 "not vectorized: failed to find SLP opportunities "
2599 "in basic block.\n");
2600 }
a70d6342
IR
2601
2602 destroy_bb_vec_info (bb_vinfo);
2603 return NULL;
2604 }
b8698a0f 2605
c2a12ca0
RB
2606 /* Analyze and verify the alignment of data references and the
2607 dependence in the SLP instances. */
a5b50aa1
RB
2608 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2609 {
c2a12ca0
RB
2610 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2611 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2612 {
2613 dump_printf_loc (MSG_NOTE, vect_location,
2614 "removing SLP instance operations starting from: ");
2615 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2616 SLP_TREE_SCALAR_STMTS
2617 (SLP_INSTANCE_TREE (instance))[0], 0);
2618 vect_free_slp_instance (instance);
2619 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2620 continue;
2621 }
c2a12ca0
RB
2622
2623 /* Mark all the statements that we want to vectorize as pure SLP and
2624 relevant. */
2625 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2626 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2627
a5b50aa1
RB
2628 i++;
2629 }
a5b50aa1
RB
2630 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2631 {
2632 destroy_bb_vec_info (bb_vinfo);
2633 return NULL;
2634 }
2635
1a4b99c1
RB
2636 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2637 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2638 {
73fbfcad 2639 if (dump_enabled_p ())
e645e942 2640 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2641 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2642
2643 destroy_bb_vec_info (bb_vinfo);
2644 return NULL;
2645 }
2646
69f11a13 2647 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2648 if (!unlimited_cost_model (NULL)
69f11a13
IR
2649 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2650 {
73fbfcad 2651 if (dump_enabled_p ())
78c60e3d
SS
2652 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2653 "not vectorized: vectorization is not "
2654 "profitable.\n");
69f11a13
IR
2655
2656 destroy_bb_vec_info (bb_vinfo);
2657 return NULL;
2658 }
2659
73fbfcad 2660 if (dump_enabled_p ())
78c60e3d
SS
2661 dump_printf_loc (MSG_NOTE, vect_location,
2662 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2663
2664 return bb_vinfo;
2665}
2666
2667
428db0ba
RB
2668/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2669 true if anything in the basic-block was vectorized. */
2670
2671bool
2672vect_slp_bb (basic_block bb)
8e19f5a1
IR
2673{
2674 bb_vec_info bb_vinfo;
8e19f5a1
IR
2675 gimple_stmt_iterator gsi;
2676 unsigned int vector_sizes;
61d371eb 2677 bool any_vectorized = false;
8e19f5a1 2678
73fbfcad 2679 if (dump_enabled_p ())
78c60e3d 2680 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2681
8e19f5a1
IR
2682 /* Autodetect first vector size we try. */
2683 current_vector_size = 0;
2684 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2685
61d371eb
RB
2686 gsi = gsi_start_bb (bb);
2687
8e19f5a1
IR
2688 while (1)
2689 {
61d371eb
RB
2690 if (gsi_end_p (gsi))
2691 break;
2692
2693 gimple_stmt_iterator region_begin = gsi;
2694 vec<data_reference_p> datarefs = vNULL;
2695 int insns = 0;
2696
2697 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2698 {
61d371eb
RB
2699 gimple *stmt = gsi_stmt (gsi);
2700 if (is_gimple_debug (stmt))
2701 continue;
2702 insns++;
2703
2704 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2705 vect_location = gimple_location (stmt);
2706
2707 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2708 break;
2709 }
2710
2711 /* Skip leading unhandled stmts. */
2712 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2713 {
2714 gsi_next (&gsi);
2715 continue;
2716 }
428db0ba 2717
61d371eb
RB
2718 gimple_stmt_iterator region_end = gsi;
2719
2720 bool vectorized = false;
a5b50aa1 2721 bool fatal = false;
61d371eb 2722 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 2723 datarefs, insns, fatal);
61d371eb
RB
2724 if (bb_vinfo
2725 && dbg_cnt (vect_slp))
2726 {
428db0ba 2727 if (dump_enabled_p ())
61d371eb 2728 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
2729
2730 vect_schedule_slp (bb_vinfo);
2731
2732 if (dump_enabled_p ())
2733 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 2734 "basic block part vectorized\n");
428db0ba
RB
2735
2736 destroy_bb_vec_info (bb_vinfo);
2737
61d371eb 2738 vectorized = true;
428db0ba 2739 }
61d371eb
RB
2740 else
2741 destroy_bb_vec_info (bb_vinfo);
8e19f5a1 2742
61d371eb 2743 any_vectorized |= vectorized;
8e19f5a1
IR
2744
2745 vector_sizes &= ~current_vector_size;
61d371eb
RB
2746 if (vectorized
2747 || vector_sizes == 0
a5b50aa1
RB
2748 || current_vector_size == 0
2749 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2750 vector sizes will fail do not bother iterating. */
2751 || fatal)
61d371eb
RB
2752 {
2753 if (gsi_end_p (region_end))
2754 break;
8e19f5a1 2755
61d371eb
RB
2756 /* Skip the unhandled stmt. */
2757 gsi_next (&gsi);
2758
2759 /* And reset vector sizes. */
2760 current_vector_size = 0;
2761 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2762 }
2763 else
2764 {
2765 /* Try the next biggest vector size. */
2766 current_vector_size = 1 << floor_log2 (vector_sizes);
2767 if (dump_enabled_p ())
2768 dump_printf_loc (MSG_NOTE, vect_location,
2769 "***** Re-trying analysis with "
2770 "vector size %d\n", current_vector_size);
2771
2772 /* Start over. */
2773 gsi = region_begin;
2774 }
8e19f5a1 2775 }
61d371eb
RB
2776
2777 return any_vectorized;
8e19f5a1
IR
2778}
2779
2780
e4af0bc4
IE
2781/* Return 1 if vector type of boolean constant which is OPNUM
2782 operand in statement STMT is a boolean vector. */
2783
2784static bool
2785vect_mask_constant_operand_p (gimple *stmt, int opnum)
2786{
2787 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2788 enum tree_code code = gimple_expr_code (stmt);
2789 tree op, vectype;
2790 gimple *def_stmt;
2791 enum vect_def_type dt;
2792
2793 /* For comparison and COND_EXPR type is chosen depending
2794 on the other comparison operand. */
2795 if (TREE_CODE_CLASS (code) == tcc_comparison)
2796 {
2797 if (opnum)
2798 op = gimple_assign_rhs1 (stmt);
2799 else
2800 op = gimple_assign_rhs2 (stmt);
2801
2802 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2803 &dt, &vectype))
2804 gcc_unreachable ();
2805
2806 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2807 }
2808
2809 if (code == COND_EXPR)
2810 {
2811 tree cond = gimple_assign_rhs1 (stmt);
2812
2813 if (TREE_CODE (cond) == SSA_NAME)
2814 return false;
2815
2816 if (opnum)
2817 op = TREE_OPERAND (cond, 1);
2818 else
2819 op = TREE_OPERAND (cond, 0);
2820
2821 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
2822 &dt, &vectype))
2823 gcc_unreachable ();
2824
2825 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
2826 }
2827
2828 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
2829}
2830
2831
b8698a0f
L
2832/* For constant and loop invariant defs of SLP_NODE this function returns
2833 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2834 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2835 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2836 REDUC_INDEX is the index of the reduction operand in the statements, unless
2837 it is -1. */
ebfd146a
IR
2838
2839static void
9dc3f7de 2840vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2841 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2842 unsigned int op_num, unsigned int number_of_vectors,
2843 int reduc_index)
ebfd146a 2844{
355fe088
TS
2845 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2846 gimple *stmt = stmts[0];
ebfd146a 2847 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2848 unsigned nunits;
ebfd146a 2849 tree vec_cst;
d2a12ae7
RG
2850 tree *elts;
2851 unsigned j, number_of_places_left_in_vector;
ebfd146a 2852 tree vector_type;
9dc3f7de 2853 tree vop;
9771b263 2854 int group_size = stmts.length ();
ebfd146a 2855 unsigned int vec_num, i;
d2a12ae7 2856 unsigned number_of_copies = 1;
9771b263
DN
2857 vec<tree> voprnds;
2858 voprnds.create (number_of_vectors);
ebfd146a 2859 bool constant_p, is_store;
b5aeb3bb 2860 tree neutral_op = NULL;
bac430c9 2861 enum tree_code code = gimple_expr_code (stmt);
355fe088 2862 gimple *def_stmt;
0e93a64e 2863 struct loop *loop;
13396b6e 2864 gimple_seq ctor_seq = NULL;
b5aeb3bb 2865
42fd8198
IE
2866 /* Check if vector type is a boolean vector. */
2867 if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
e4af0bc4 2868 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
2869 vector_type
2870 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2871 else
2872 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325
RB
2873 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2874
29ed4920
IR
2875 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2876 && reduc_index != -1)
b5aeb3bb 2877 {
afbe6325
RB
2878 op_num = reduc_index;
2879 op = gimple_op (stmt, op_num + 1);
b5aeb3bb 2880 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2881 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2882 get_initial_def_for_reduction() for details. */
2883 switch (code)
2884 {
2885 case WIDEN_SUM_EXPR:
2886 case DOT_PROD_EXPR:
afbe6325 2887 case SAD_EXPR:
b5aeb3bb
IR
2888 case PLUS_EXPR:
2889 case MINUS_EXPR:
2890 case BIT_IOR_EXPR:
2891 case BIT_XOR_EXPR:
2892 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2893 neutral_op = build_real (TREE_TYPE (op), dconst0);
2894 else
2895 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2896
2897 break;
2898
2899 case MULT_EXPR:
b5aeb3bb
IR
2900 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2901 neutral_op = build_real (TREE_TYPE (op), dconst1);
2902 else
2903 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2904
2905 break;
2906
c1e822d5
IR
2907 case BIT_AND_EXPR:
2908 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2909 break;
2910
f1485e5b
RB
2911 /* For MIN/MAX we don't have an easy neutral operand but
2912 the initial values can be used fine here. Only for
2913 a reduction chain we have to force a neutral element. */
2914 case MAX_EXPR:
2915 case MIN_EXPR:
2916 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2917 neutral_op = NULL;
2918 else
2919 {
2920 def_stmt = SSA_NAME_DEF_STMT (op);
2921 loop = (gimple_bb (stmt))->loop_father;
2922 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2923 loop_preheader_edge (loop));
2924 }
2925 break;
0e93a64e 2926
b5aeb3bb 2927 default:
afbe6325 2928 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
0e93a64e 2929 neutral_op = NULL;
b5aeb3bb
IR
2930 }
2931 }
ebfd146a
IR
2932
2933 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2934 {
2935 is_store = true;
2936 op = gimple_assign_rhs1 (stmt);
2937 }
2938 else
9dc3f7de
IR
2939 is_store = false;
2940
2941 gcc_assert (op);
ebfd146a
IR
2942
2943 if (CONSTANT_CLASS_P (op))
d59dc888 2944 constant_p = true;
ebfd146a 2945 else
d59dc888
IR
2946 constant_p = false;
2947
ebfd146a 2948 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2949 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2950
2951 For example, we have two scalar operands, s1 and s2 (e.g., group of
2952 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2953 of this type can be packed in a vector). The output vector will contain
2954 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2955 will be 2).
2956
b8698a0f 2957 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2958 containing the operands.
2959
2960 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2961 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2962 {s5, s6, s7, s8}. */
b8698a0f 2963
14a61437 2964 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
2965
2966 number_of_places_left_in_vector = nunits;
d2a12ae7 2967 elts = XALLOCAVEC (tree, nunits);
90dd6e3d 2968 bool place_after_defs = false;
ebfd146a
IR
2969 for (j = 0; j < number_of_copies; j++)
2970 {
9771b263 2971 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2972 {
2973 if (is_store)
2974 op = gimple_assign_rhs1 (stmt);
bac430c9 2975 else
f7e531cf 2976 {
bac430c9 2977 switch (code)
f7e531cf 2978 {
bac430c9 2979 case COND_EXPR:
a989bcc3
IE
2980 {
2981 tree cond = gimple_assign_rhs1 (stmt);
2982 if (TREE_CODE (cond) == SSA_NAME)
2983 op = gimple_op (stmt, op_num + 1);
2984 else if (op_num == 0 || op_num == 1)
bac430c9 2985 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
2986 else
2987 {
2988 if (op_num == 2)
2989 op = gimple_assign_rhs2 (stmt);
2990 else
2991 op = gimple_assign_rhs3 (stmt);
2992 }
2993 }
bac430c9
IR
2994 break;
2995
2996 case CALL_EXPR:
2997 op = gimple_call_arg (stmt, op_num);
2998 break;
2999
b84b294a
JJ
3000 case LSHIFT_EXPR:
3001 case RSHIFT_EXPR:
3002 case LROTATE_EXPR:
3003 case RROTATE_EXPR:
3004 op = gimple_op (stmt, op_num + 1);
3005 /* Unlike the other binary operators, shifts/rotates have
3006 the shift count being int, instead of the same type as
3007 the lhs, so make sure the scalar is the right type if
3008 we are dealing with vectors of
3009 long long/long/short/char. */
793d9a16 3010 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3011 op = fold_convert (TREE_TYPE (vector_type), op);
3012 break;
3013
bac430c9
IR
3014 default:
3015 op = gimple_op (stmt, op_num + 1);
b84b294a 3016 break;
f7e531cf
IR
3017 }
3018 }
b8698a0f 3019
b5aeb3bb
IR
3020 if (reduc_index != -1)
3021 {
0e93a64e
IR
3022 loop = (gimple_bb (stmt))->loop_father;
3023 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
3024
3025 gcc_assert (loop);
b010117a
IR
3026
3027 /* Get the def before the loop. In reduction chain we have only
3028 one initial value. */
3029 if ((j != (number_of_copies - 1)
3030 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
3031 && i != 0))
3032 && neutral_op)
b5aeb3bb 3033 op = neutral_op;
b010117a
IR
3034 else
3035 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
3036 loop_preheader_edge (loop));
b5aeb3bb
IR
3037 }
3038
ebfd146a 3039 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3040 number_of_places_left_in_vector--;
90dd6e3d 3041 tree orig_op = op;
13396b6e 3042 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3043 {
793d9a16 3044 if (CONSTANT_CLASS_P (op))
13396b6e 3045 {
42fd8198
IE
3046 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3047 {
3048 /* Can't use VIEW_CONVERT_EXPR for booleans because
3049 of possibly different sizes of scalar value and
3050 vector element. */
3051 if (integer_zerop (op))
3052 op = build_int_cst (TREE_TYPE (vector_type), 0);
3053 else if (integer_onep (op))
158beb4a 3054 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3055 else
3056 gcc_unreachable ();
3057 }
3058 else
3059 op = fold_unary (VIEW_CONVERT_EXPR,
3060 TREE_TYPE (vector_type), op);
13396b6e
JJ
3061 gcc_assert (op && CONSTANT_CLASS_P (op));
3062 }
3063 else
3064 {
b731b390 3065 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3066 gimple *init_stmt;
262a363f
JJ
3067 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3068 {
158beb4a
JJ
3069 tree true_val
3070 = build_all_ones_cst (TREE_TYPE (vector_type));
3071 tree false_val
3072 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3073 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3074 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3075 op, true_val,
3076 false_val);
262a363f 3077 }
262a363f
JJ
3078 else
3079 {
3080 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3081 op);
3082 init_stmt
3083 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3084 op);
3085 }
13396b6e
JJ
3086 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3087 op = new_temp;
3088 }
50eeef09 3089 }
d2a12ae7 3090 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3091 if (!CONSTANT_CLASS_P (op))
3092 constant_p = false;
90dd6e3d
RB
3093 if (TREE_CODE (orig_op) == SSA_NAME
3094 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3095 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3096 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3097 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3098 place_after_defs = true;
ebfd146a
IR
3099
3100 if (number_of_places_left_in_vector == 0)
3101 {
3102 number_of_places_left_in_vector = nunits;
3103
3104 if (constant_p)
d2a12ae7 3105 vec_cst = build_vector (vector_type, elts);
ebfd146a 3106 else
d2a12ae7 3107 {
9771b263 3108 vec<constructor_elt, va_gc> *v;
d2a12ae7 3109 unsigned k;
9771b263 3110 vec_alloc (v, nunits);
d2a12ae7
RG
3111 for (k = 0; k < nunits; ++k)
3112 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3113 vec_cst = build_constructor (vector_type, v);
3114 }
90dd6e3d
RB
3115 tree init;
3116 gimple_stmt_iterator gsi;
3117 if (place_after_defs)
3118 {
3119 gsi = gsi_for_stmt
3120 (vect_find_last_scalar_stmt_in_slp (slp_node));
3121 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3122 }
3123 else
3124 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3125 if (ctor_seq != NULL)
3126 {
90dd6e3d 3127 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
3128 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3129 GSI_SAME_STMT);
3130 ctor_seq = NULL;
3131 }
90dd6e3d
RB
3132 voprnds.quick_push (init);
3133 place_after_defs = false;
ebfd146a
IR
3134 }
3135 }
3136 }
3137
b8698a0f 3138 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3139 them. */
9771b263 3140 vec_num = voprnds.length ();
d2a12ae7 3141 for (j = vec_num; j != 0; j--)
ebfd146a 3142 {
9771b263
DN
3143 vop = voprnds[j - 1];
3144 vec_oprnds->quick_push (vop);
ebfd146a
IR
3145 }
3146
9771b263 3147 voprnds.release ();
ebfd146a
IR
3148
3149 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3150 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3151 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3152 to replicate the vectors. */
9771b263 3153 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3154 {
b5aeb3bb
IR
3155 tree neutral_vec = NULL;
3156
3157 if (neutral_op)
3158 {
3159 if (!neutral_vec)
b9acc9f1 3160 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3161
9771b263 3162 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3163 }
3164 else
3165 {
9771b263
DN
3166 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3167 vec_oprnds->quick_push (vop);
b5aeb3bb 3168 }
ebfd146a
IR
3169 }
3170}
3171
3172
3173/* Get vectorized definitions from SLP_NODE that contains corresponding
3174 vectorized def-stmts. */
3175
3176static void
9771b263 3177vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3178{
3179 tree vec_oprnd;
355fe088 3180 gimple *vec_def_stmt;
ebfd146a
IR
3181 unsigned int i;
3182
9771b263 3183 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3184
9771b263 3185 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3186 {
3187 gcc_assert (vec_def_stmt);
3188 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3189 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3190 }
3191}
3192
3193
b8698a0f
L
3194/* Get vectorized definitions for SLP_NODE.
3195 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3196 call vect_get_constant_vectors() to create vector stmts.
3197 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3198 must be stored in the corresponding child of SLP_NODE, and we call
3199 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3200
ebfd146a 3201void
9771b263 3202vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 3203 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 3204{
355fe088 3205 gimple *first_stmt;
d092494c
IR
3206 int number_of_vects = 0, i;
3207 unsigned int child_index = 0;
b8698a0f 3208 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3209 slp_tree child = NULL;
37b5ec8f 3210 vec<tree> vec_defs;
e44978dc 3211 tree oprnd;
d092494c 3212 bool vectorized_defs;
ebfd146a 3213
9771b263
DN
3214 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3215 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3216 {
d092494c
IR
3217 /* For each operand we check if it has vectorized definitions in a child
3218 node or we need to create them (for invariants and constants). We
3219 check if the LHS of the first stmt of the next child matches OPRND.
3220 If it does, we found the correct child. Otherwise, we call
3221 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3222 to check this child node for the next operand. */
3223 vectorized_defs = false;
9771b263 3224 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3225 {
01d8bf07 3226 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3227
e44978dc 3228 /* We have to check both pattern and original def, if available. */
603cca93 3229 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3230 {
355fe088
TS
3231 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3232 gimple *related
90dd6e3d
RB
3233 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3234
3235 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
3236 || (related
3237 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3238 {
3239 /* The number of vector defs is determined by the number of
3240 vector statements in the node from which we get those
3241 statements. */
3242 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3243 vectorized_defs = true;
3244 child_index++;
3245 }
e44978dc 3246 }
90dd6e3d
RB
3247 else
3248 child_index++;
d092494c 3249 }
ebfd146a 3250
d092494c
IR
3251 if (!vectorized_defs)
3252 {
3253 if (i == 0)
3254 {
3255 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3256 /* Number of vector stmts was calculated according to LHS in
3257 vect_schedule_slp_instance (), fix it by replacing LHS with
3258 RHS, if necessary. See vect_get_smallest_scalar_type () for
3259 details. */
3260 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3261 &rhs_size_unit);
3262 if (rhs_size_unit != lhs_size_unit)
3263 {
3264 number_of_vects *= rhs_size_unit;
3265 number_of_vects /= lhs_size_unit;
3266 }
3267 }
3268 }
b5aeb3bb 3269
d092494c 3270 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3271 vec_defs = vNULL;
3272 vec_defs.create (number_of_vects);
ebfd146a 3273
d092494c
IR
3274 /* For reduction defs we call vect_get_constant_vectors (), since we are
3275 looking for initial loop invariant values. */
3276 if (vectorized_defs && reduc_index == -1)
3277 /* The defs are already vectorized. */
37b5ec8f 3278 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
3279 else
3280 /* Build vectors from scalar defs. */
37b5ec8f 3281 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 3282 number_of_vects, reduc_index);
ebfd146a 3283
37b5ec8f 3284 vec_oprnds->quick_push (vec_defs);
ebfd146a 3285
d092494c
IR
3286 /* For reductions, we only need initial values. */
3287 if (reduc_index != -1)
3288 return;
3289 }
ebfd146a
IR
3290}
3291
a70d6342 3292
b8698a0f 3293/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
3294 building a vector of type MASK_TYPE from it) and two input vectors placed in
3295 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3296 shifting by STRIDE elements of DR_CHAIN for every copy.
3297 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 3298 copies).
ebfd146a
IR
3299 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3300 the created stmts must be inserted. */
3301
3302static inline void
355fe088 3303vect_create_mask_and_perm (gimple *stmt,
faf63e39 3304 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 3305 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 3306 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
3307 int ncopies, int vect_stmts_counter)
3308{
faf63e39 3309 tree perm_dest;
355fe088 3310 gimple *perm_stmt = NULL;
7706cb01 3311 int i, stride_in, stride_out;
ebfd146a 3312 tree first_vec, second_vec, data_ref;
ebfd146a 3313
7706cb01
RB
3314 stride_out = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
3315 stride_in = dr_chain.length () / ncopies;
ebfd146a 3316
b8698a0f 3317 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 3318 stmts later. */
9771b263 3319 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 3320 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 3321 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
3322
3323 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3324 for (i = 0; i < ncopies; i++)
3325 {
9771b263
DN
3326 first_vec = dr_chain[first_vec_indx];
3327 second_vec = dr_chain[second_vec_indx];
ebfd146a 3328
be377c80
RB
3329 /* Generate the permute statement if necessary. */
3330 if (mask)
3331 {
3332 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3333 first_vec, second_vec, mask);
3334 data_ref = make_ssa_name (perm_dest, perm_stmt);
3335 gimple_set_lhs (perm_stmt, data_ref);
3336 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3337 }
3338 else
3339 /* If mask was NULL_TREE generate the requested identity transform. */
3340 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
ebfd146a 3341
b8698a0f 3342 /* Store the vector statement in NODE. */
7706cb01
RB
3343 SLP_TREE_VEC_STMTS (node)[stride_out * i + vect_stmts_counter]
3344 = perm_stmt;
ebfd146a 3345
7706cb01
RB
3346 first_vec_indx += stride_in;
3347 second_vec_indx += stride_in;
ebfd146a 3348 }
ebfd146a
IR
3349}
3350
3351
ebfd146a
IR
3352/* Generate vector permute statements from a list of loads in DR_CHAIN.
3353 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3354 permute statements for the SLP node NODE of the SLP instance
3355 SLP_NODE_INSTANCE. */
3356
ebfd146a 3357bool
01d8bf07 3358vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a
IR
3359 gimple_stmt_iterator *gsi, int vf,
3360 slp_instance slp_node_instance, bool analyze_only)
3361{
355fe088 3362 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3363 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3364 tree mask_element_type = NULL_TREE, mask_type;
2ce27200 3365 int nunits, vec_index = 0;
2635892a 3366 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3367 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2ce27200 3368 int unroll_factor, mask_element, ncopies;
22e4dee7 3369 unsigned char *mask;
ef4bddc2 3370 machine_mode mode;
ebfd146a 3371
91ff1504
RB
3372 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3373 return false;
3374
3375 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3376
22e4dee7
RH
3377 mode = TYPE_MODE (vectype);
3378
2635892a
RH
3379 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3380 same size as the vector element being permuted. */
96f9265a
RG
3381 mask_element_type = lang_hooks.types.type_for_mode
3382 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 3383 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3384 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 3385 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
3386 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3387
b8698a0f 3388 /* Number of copies is determined by the final vectorization factor
ebfd146a 3389 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 3390 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 3391
b8698a0f
L
3392 /* Generate permutation masks for every NODE. Number of masks for each NODE
3393 is equal to GROUP_SIZE.
3394 E.g., we have a group of three nodes with three loads from the same
3395 location in each node, and the vector size is 4. I.e., we have a
3396 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3397 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3398 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3399 ...
3400
2635892a 3401 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3402 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3403 operation, and the mask element values can't be outside that range.
3404 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3405 For the first two permutations we need the first and the second input
ebfd146a 3406 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3407 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3408 {c2,a3,b3,c3}. */
3409
2ce27200
RB
3410 int vect_stmts_counter = 0;
3411 int index = 0;
3412 int first_vec_index = -1;
3413 int second_vec_index = -1;
be377c80 3414 bool noop_p = true;
ebfd146a 3415
2ce27200
RB
3416 for (int j = 0; j < unroll_factor; j++)
3417 {
3418 for (int k = 0; k < group_size; k++)
3419 {
3420 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3421 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3422 vec_index = i / nunits;
3423 mask_element = i % nunits;
3424 if (vec_index == first_vec_index
3425 || first_vec_index == -1)
3426 {
3427 first_vec_index = vec_index;
3428 }
3429 else if (vec_index == second_vec_index
3430 || second_vec_index == -1)
3431 {
3432 second_vec_index = vec_index;
3433 mask_element += nunits;
3434 }
3435 else
3436 {
3437 if (dump_enabled_p ())
3438 {
3439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3440 "permutation requires at "
3441 "least three vectors ");
3442 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3443 stmt, 0);
2ce27200
RB
3444 }
3445 return false;
3446 }
ebfd146a 3447
2ce27200
RB
3448 gcc_assert (mask_element >= 0
3449 && mask_element < 2 * nunits);
be377c80
RB
3450 if (mask_element != index)
3451 noop_p = false;
2ce27200
RB
3452 mask[index++] = mask_element;
3453
3454 if (index == nunits)
3455 {
be377c80
RB
3456 if (! noop_p
3457 && ! can_vec_perm_p (mode, false, mask))
2ce27200
RB
3458 {
3459 if (dump_enabled_p ())
22e4dee7 3460 {
2ce27200
RB
3461 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3462 vect_location,
3463 "unsupported vect permute { ");
3464 for (i = 0; i < nunits; ++i)
3465 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3466 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3467 }
2ce27200
RB
3468 return false;
3469 }
22e4dee7 3470
2ce27200
RB
3471 if (!analyze_only)
3472 {
be377c80
RB
3473 tree mask_vec = NULL_TREE;
3474
3475 if (! noop_p)
3476 {
3477 tree *mask_elts = XALLOCAVEC (tree, nunits);
3478 for (int l = 0; l < nunits; ++l)
3479 mask_elts[l] = build_int_cst (mask_element_type,
3480 mask[l]);
3481 mask_vec = build_vector (mask_type, mask_elts);
3482 }
2ce27200
RB
3483
3484 if (second_vec_index == -1)
3485 second_vec_index = first_vec_index;
3486 vect_create_mask_and_perm (stmt, mask_vec, first_vec_index,
3487 second_vec_index,
3488 gsi, node, vectype, dr_chain,
3489 ncopies, vect_stmts_counter++);
3490 }
ebfd146a 3491
2ce27200
RB
3492 index = 0;
3493 first_vec_index = -1;
3494 second_vec_index = -1;
be377c80 3495 noop_p = true;
2ce27200
RB
3496 }
3497 }
b8698a0f 3498 }
ebfd146a 3499
ebfd146a
IR
3500 return true;
3501}
3502
3503
3504
3505/* Vectorize SLP instance tree in postorder. */
3506
3507static bool
3508vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3509 unsigned int vectorization_factor)
ebfd146a 3510{
355fe088 3511 gimple *stmt;
0d0293ac 3512 bool grouped_store, is_store;
ebfd146a
IR
3513 gimple_stmt_iterator si;
3514 stmt_vec_info stmt_info;
3515 unsigned int vec_stmts_size, nunits, group_size;
3516 tree vectype;
603cca93 3517 int i, j;
d755c7ef 3518 slp_tree child;
ebfd146a 3519
603cca93 3520 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3521 return false;
3522
9771b263 3523 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3524 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3525
603cca93
RB
3526 /* Push SLP node def-type to stmts. */
3527 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3528 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3529 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3530 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3531
9771b263 3532 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3533 stmt_info = vinfo_for_stmt (stmt);
3534
3535 /* VECTYPE is the type of the destination. */
b690cc0f 3536 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3537 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3538 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3539
3540 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3541 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3542 elements in one vector iteration is the number of scalar elements in
3543 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3544 size.
3545 Unless this is a SLP reduction in which case the number of vector
3546 stmts is equal to the number of vector stmts of the children. */
3547 if (GROUP_FIRST_ELEMENT (stmt_info)
3548 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3549 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3550 else
3551 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3552
9771b263 3553 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3554 {
9771b263 3555 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3556 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3557 }
3558
73fbfcad 3559 if (dump_enabled_p ())
ebfd146a 3560 {
78c60e3d
SS
3561 dump_printf_loc (MSG_NOTE,vect_location,
3562 "------>vectorizing SLP node starting from: ");
3563 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3564 }
ebfd146a 3565
2e8ab70c
RB
3566 /* Vectorized stmts go before the last scalar stmt which is where
3567 all uses are ready. */
3568 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3569
b010117a
IR
3570 /* Mark the first element of the reduction chain as reduction to properly
3571 transform the node. In the analysis phase only the last element of the
3572 chain is marked as reduction. */
0d0293ac 3573 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3574 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3575 {
3576 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3577 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3578 }
3579
6876e5bc
RB
3580 /* Handle two-operation SLP nodes by vectorizing the group with
3581 both operations and then performing a merge. */
3582 if (SLP_TREE_TWO_OPERATORS (node))
3583 {
3584 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3585 enum tree_code ocode = ERROR_MARK;
355fe088 3586 gimple *ostmt;
6876e5bc 3587 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
6876e5bc
RB
3588 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3589 if (gimple_assign_rhs_code (ostmt) != code0)
3590 {
3591 mask[i] = 1;
6876e5bc
RB
3592 ocode = gimple_assign_rhs_code (ostmt);
3593 }
3594 else
3595 mask[i] = 0;
567a3691 3596 if (ocode != ERROR_MARK)
6876e5bc 3597 {
355fe088
TS
3598 vec<gimple *> v0;
3599 vec<gimple *> v1;
6876e5bc
RB
3600 unsigned j;
3601 tree tmask = NULL_TREE;
3602 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3603 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3604 SLP_TREE_VEC_STMTS (node).truncate (0);
3605 gimple_assign_set_rhs_code (stmt, ocode);
3606 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3607 gimple_assign_set_rhs_code (stmt, code0);
3608 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3609 SLP_TREE_VEC_STMTS (node).truncate (0);
3610 tree meltype = build_nonstandard_integer_type
3611 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3612 tree mvectype = get_same_sized_vectype (meltype, vectype);
3613 unsigned k = 0, l;
3614 for (j = 0; j < v0.length (); ++j)
3615 {
3616 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3617 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3618 {
1ece8d4c 3619 if (k >= group_size)
6876e5bc
RB
3620 k = 0;
3621 melts[l] = build_int_cst
3622 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3623 }
3624 tmask = build_vector (mvectype, melts);
3625
3626 /* ??? Not all targets support a VEC_PERM_EXPR with a
3627 constant mask that would translate to a vec_merge RTX
3628 (with their vec_perm_const_ok). We can either not
3629 vectorize in that case or let veclower do its job.
3630 Unfortunately that isn't too great and at least for
3631 plus/minus we'd eventually like to match targets
3632 vector addsub instructions. */
355fe088 3633 gimple *vstmt;
6876e5bc
RB
3634 vstmt = gimple_build_assign (make_ssa_name (vectype),
3635 VEC_PERM_EXPR,
3636 gimple_assign_lhs (v0[j]),
3637 gimple_assign_lhs (v1[j]), tmask);
3638 vect_finish_stmt_generation (stmt, vstmt, &si);
3639 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3640 }
3641 v0.release ();
3642 v1.release ();
3643 return false;
3644 }
3645 }
0d0293ac 3646 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
3647
3648 /* Restore stmt def-types. */
3649 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3650 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3652 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3653
b5aeb3bb 3654 return is_store;
ebfd146a
IR
3655}
3656
dd34c087
JJ
3657/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3658 For loop vectorization this is done in vectorizable_call, but for SLP
3659 it needs to be deferred until end of vect_schedule_slp, because multiple
3660 SLP instances may refer to the same scalar stmt. */
3661
3662static void
3663vect_remove_slp_scalar_calls (slp_tree node)
3664{
355fe088 3665 gimple *stmt, *new_stmt;
dd34c087
JJ
3666 gimple_stmt_iterator gsi;
3667 int i;
d755c7ef 3668 slp_tree child;
dd34c087
JJ
3669 tree lhs;
3670 stmt_vec_info stmt_info;
3671
603cca93 3672 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3673 return;
3674
9771b263 3675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3676 vect_remove_slp_scalar_calls (child);
dd34c087 3677
9771b263 3678 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3679 {
3680 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3681 continue;
3682 stmt_info = vinfo_for_stmt (stmt);
3683 if (stmt_info == NULL
3684 || is_pattern_stmt_p (stmt_info)
3685 || !PURE_SLP_STMT (stmt_info))
3686 continue;
3687 lhs = gimple_call_lhs (stmt);
3688 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3689 set_vinfo_for_stmt (new_stmt, stmt_info);
3690 set_vinfo_for_stmt (stmt, NULL);
3691 STMT_VINFO_STMT (stmt_info) = new_stmt;
3692 gsi = gsi_for_stmt (stmt);
3693 gsi_replace (&gsi, new_stmt, false);
3694 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3695 }
3696}
ebfd146a 3697
ff802fa1
IR
3698/* Generate vector code for all SLP instances in the loop/basic block. */
3699
ebfd146a 3700bool
310213d4 3701vect_schedule_slp (vec_info *vinfo)
ebfd146a 3702{
9771b263 3703 vec<slp_instance> slp_instances;
ebfd146a 3704 slp_instance instance;
01d8bf07 3705 unsigned int i, vf;
ebfd146a
IR
3706 bool is_store = false;
3707
310213d4
RB
3708 slp_instances = vinfo->slp_instances;
3709 if (is_a <loop_vec_info> (vinfo))
3710 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 3711 else
310213d4 3712 vf = 1;
a70d6342 3713
9771b263 3714 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3715 {
3716 /* Schedule the tree of INSTANCE. */
3717 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3718 instance, vf);
73fbfcad 3719 if (dump_enabled_p ())
78c60e3d 3720 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3721 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3722 }
3723
9771b263 3724 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3725 {
3726 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 3727 gimple *store;
b5aeb3bb
IR
3728 unsigned int j;
3729 gimple_stmt_iterator gsi;
3730
c40eced0
RB
3731 /* Remove scalar call stmts. Do not do this for basic-block
3732 vectorization as not all uses may be vectorized.
3733 ??? Why should this be necessary? DCE should be able to
3734 remove the stmts itself.
3735 ??? For BB vectorization we can as well remove scalar
3736 stmts starting from the SLP tree root if they have no
3737 uses. */
310213d4 3738 if (is_a <loop_vec_info> (vinfo))
c40eced0 3739 vect_remove_slp_scalar_calls (root);
dd34c087 3740
9771b263 3741 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3742 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3743 {
3744 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3745 break;
3746
a024e70e
IR
3747 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3748 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3749 /* Free the attached stmt_vec_info and remove the stmt. */
3750 gsi = gsi_for_stmt (store);
3d3f2249 3751 unlink_stmt_vdef (store);
b5aeb3bb 3752 gsi_remove (&gsi, true);
3d3f2249 3753 release_defs (store);
b5aeb3bb
IR
3754 free_stmt_vec_info (store);
3755 }
3756 }
3757
ebfd146a
IR
3758 return is_store;
3759}