]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
i386.md (prefetch): Allow TARGET_AVX512PF.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
d1e082c2 2 Copyright (C) 2007-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838 31#include "gimple-pretty-print.h"
442b4905
AM
32#include "gimple.h"
33#include "gimple-ssa.h"
34#include "tree-phinodes.h"
35#include "ssa-iterators.h"
36#include "tree-ssanames.h"
7ee2468b 37#include "tree-pass.h"
ebfd146a 38#include "cfgloop.h"
ebfd146a 39#include "expr.h"
7ee2468b 40#include "recog.h" /* FIXME: for insn_data */
ebfd146a
IR
41#include "optabs.h"
42#include "tree-vectorizer.h"
2635892a 43#include "langhooks.h"
ebfd146a 44
a70d6342
IR
45/* Extract the location of the basic block in the source code.
46 Return the basic block location if succeed and NULL if not. */
47
48LOC
49find_bb_location (basic_block bb)
50{
51 gimple stmt = NULL;
52 gimple_stmt_iterator si;
53
54 if (!bb)
55 return UNKNOWN_LOC;
56
57 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
58 {
59 stmt = gsi_stmt (si);
60 if (gimple_location (stmt) != UNKNOWN_LOC)
61 return gimple_location (stmt);
62 }
63
64 return UNKNOWN_LOC;
65}
66
67
ebfd146a
IR
68/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
69
70static void
71vect_free_slp_tree (slp_tree node)
72{
d092494c 73 int i;
d755c7ef 74 slp_tree child;
d092494c 75
ebfd146a
IR
76 if (!node)
77 return;
78
9771b263 79 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 80 vect_free_slp_tree (child);
b8698a0f 81
9771b263
DN
82 SLP_TREE_CHILDREN (node).release ();
83 SLP_TREE_SCALAR_STMTS (node).release ();
84 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 85 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
86
87 free (node);
88}
89
90
91/* Free the memory allocated for the SLP instance. */
92
93void
94vect_free_slp_instance (slp_instance instance)
95{
96 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263
DN
97 SLP_INSTANCE_LOADS (instance).release ();
98 SLP_INSTANCE_BODY_COST_VEC (instance).release ();
c7e62a26 99 free (instance);
ebfd146a
IR
100}
101
102
d092494c
IR
103/* Create an SLP node for SCALAR_STMTS. */
104
105static slp_tree
9771b263 106vect_create_new_slp_node (vec<gimple> scalar_stmts)
d092494c 107{
d3cfd39e 108 slp_tree node;
9771b263 109 gimple stmt = scalar_stmts[0];
d092494c
IR
110 unsigned int nops;
111
112 if (is_gimple_call (stmt))
113 nops = gimple_call_num_args (stmt);
114 else if (is_gimple_assign (stmt))
f7e531cf
IR
115 {
116 nops = gimple_num_ops (stmt) - 1;
117 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
118 nops++;
119 }
d092494c
IR
120 else
121 return NULL;
122
d3cfd39e 123 node = XNEW (struct _slp_tree);
d092494c 124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
125 SLP_TREE_VEC_STMTS (node).create (0);
126 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 127 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
d092494c
IR
128
129 return node;
130}
131
132
133/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
134 operand. */
9771b263 135static vec<slp_oprnd_info>
d092494c
IR
136vect_create_oprnd_info (int nops, int group_size)
137{
138 int i;
139 slp_oprnd_info oprnd_info;
9771b263 140 vec<slp_oprnd_info> oprnds_info;
d092494c 141
9771b263 142 oprnds_info.create (nops);
d092494c
IR
143 for (i = 0; i < nops; i++)
144 {
145 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 146 oprnd_info->def_stmts.create (group_size);
d092494c 147 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 148 oprnd_info->first_op_type = NULL_TREE;
d092494c 149 oprnd_info->first_pattern = false;
9771b263 150 oprnds_info.quick_push (oprnd_info);
d092494c
IR
151 }
152
153 return oprnds_info;
154}
155
156
d3cfd39e
JJ
157/* Free operands info. */
158
d092494c 159static void
9771b263 160vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
161{
162 int i;
163 slp_oprnd_info oprnd_info;
164
9771b263 165 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 166 {
9771b263 167 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
168 XDELETE (oprnd_info);
169 }
d092494c 170
9771b263 171 oprnds_info.release ();
d092494c
IR
172}
173
174
d755c7ef
RB
175/* Find the place of the data-ref in STMT in the interleaving chain that starts
176 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
177
178static int
179vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
180{
181 gimple next_stmt = first_stmt;
182 int result = 0;
183
184 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
185 return -1;
186
187 do
188 {
189 if (next_stmt == stmt)
190 return result;
191 result++;
192 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
193 }
194 while (next_stmt);
195
196 return -1;
197}
198
199
d092494c
IR
200/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
201 they are of a valid type and that they match the defs of the first stmt of
202 the SLP group (stored in OPRNDS_INFO). */
ebfd146a
IR
203
204static bool
a70d6342 205vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
23847df4
RB
206 gimple stmt, bool first,
207 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
208{
209 tree oprnd;
210 unsigned int i, number_of_oprnds;
abf9bfbc 211 tree def;
ebfd146a 212 gimple def_stmt;
d092494c 213 enum vect_def_type dt = vect_uninitialized_def;
a70d6342 214 struct loop *loop = NULL;
d092494c 215 bool pattern = false;
abf9bfbc 216 slp_oprnd_info oprnd_info;
f7e531cf
IR
217 int op_idx = 1;
218 tree compare_rhs = NULL_TREE;
b8698a0f 219
a70d6342
IR
220 if (loop_vinfo)
221 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 222
d092494c 223 if (is_gimple_call (stmt))
190c2236
JJ
224 {
225 number_of_oprnds = gimple_call_num_args (stmt);
226 op_idx = 3;
227 }
f7e531cf
IR
228 else if (is_gimple_assign (stmt))
229 {
230 number_of_oprnds = gimple_num_ops (stmt) - 1;
231 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
232 number_of_oprnds++;
233 }
d092494c 234 else
f7e531cf 235 return false;
ebfd146a
IR
236
237 for (i = 0; i < number_of_oprnds; i++)
238 {
f7e531cf
IR
239 if (compare_rhs)
240 {
241 oprnd = compare_rhs;
242 compare_rhs = NULL_TREE;
243 }
244 else
245 oprnd = gimple_op (stmt, op_idx++);
246
9771b263 247 oprnd_info = (*oprnds_info)[i];
ebfd146a 248
f7e531cf
IR
249 if (COMPARISON_CLASS_P (oprnd))
250 {
251 compare_rhs = TREE_OPERAND (oprnd, 1);
252 oprnd = TREE_OPERAND (oprnd, 0);
253 }
254
24ee1384
IR
255 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
256 &def, &dt)
d092494c 257 || (!def_stmt && dt != vect_constant_def))
ebfd146a 258 {
73fbfcad 259 if (dump_enabled_p ())
ebfd146a 260 {
78c60e3d
SS
261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
262 "Build SLP failed: can't find def for ");
263 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 264 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
265 }
266
267 return false;
268 }
269
a70d6342 270 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 271 from the pattern. Check that all the stmts of the node are in the
ebfd146a 272 pattern. */
f5709183
IR
273 if (def_stmt && gimple_bb (def_stmt)
274 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
275 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
276 && gimple_code (def_stmt) != GIMPLE_PHI))
ebfd146a 277 && vinfo_for_stmt (def_stmt)
83197f37 278 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
279 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
280 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 281 {
d092494c
IR
282 pattern = true;
283 if (!first && !oprnd_info->first_pattern)
284 {
73fbfcad 285 if (dump_enabled_p ())
d092494c 286 {
78c60e3d
SS
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
288 "Build SLP failed: some of the stmts"
289 " are in a pattern, and others are not ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 291 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 292 }
ebfd146a 293
d092494c 294 return false;
ebfd146a
IR
295 }
296
297 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 298 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 299
f7e531cf 300 if (dt == vect_unknown_def_type)
ebfd146a 301 {
73fbfcad 302 if (dump_enabled_p ())
78c60e3d 303 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 304 "Unsupported pattern.\n");
ebfd146a
IR
305 return false;
306 }
307
308 switch (gimple_code (def_stmt))
309 {
310 case GIMPLE_PHI:
d092494c 311 def = gimple_phi_result (def_stmt);
ebfd146a
IR
312 break;
313
314 case GIMPLE_ASSIGN:
d092494c 315 def = gimple_assign_lhs (def_stmt);
ebfd146a
IR
316 break;
317
318 default:
73fbfcad 319 if (dump_enabled_p ())
78c60e3d 320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 321 "unsupported defining stmt:\n");
ebfd146a
IR
322 return false;
323 }
324 }
325
d092494c 326 if (first)
ebfd146a 327 {
d092494c
IR
328 oprnd_info->first_dt = dt;
329 oprnd_info->first_pattern = pattern;
793d9a16 330 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 331 }
ebfd146a
IR
332 else
333 {
d092494c
IR
334 /* Not first stmt of the group, check that the def-stmt/s match
335 the def-stmt/s of the first stmt. Allow different definition
336 types for reduction chains: the first stmt must be a
337 vect_reduction_def (a phi node), and the rest
338 vect_internal_def. */
339 if (((oprnd_info->first_dt != dt
340 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
341 && dt == vect_internal_def)
342 && !((oprnd_info->first_dt == vect_external_def
343 || oprnd_info->first_dt == vect_constant_def)
344 && (dt == vect_external_def
345 || dt == vect_constant_def)))
346 || !types_compatible_p (oprnd_info->first_op_type,
347 TREE_TYPE (oprnd))))
ebfd146a 348 {
abf9bfbc
RB
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 351 "Build SLP failed: different types\n");
d092494c 352
abf9bfbc 353 return false;
ebfd146a
IR
354 }
355 }
356
357 /* Check the types of the definitions. */
d092494c 358 switch (dt)
ebfd146a
IR
359 {
360 case vect_constant_def:
8644a673 361 case vect_external_def:
d092494c 362 case vect_reduction_def:
ebfd146a 363 break;
b8698a0f 364
8644a673 365 case vect_internal_def:
abf9bfbc 366 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
367 break;
368
369 default:
370 /* FORNOW: Not supported. */
73fbfcad 371 if (dump_enabled_p ())
ebfd146a 372 {
78c60e3d
SS
373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
374 "Build SLP failed: illegal type of def ");
375 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
e645e942 376 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
377 }
378
379 return false;
380 }
381 }
382
383 return true;
384}
385
386
6983e6b5
RB
387/* Verify if the scalar stmts STMTS are isomorphic, require data
388 permutation or are of unsupported types of operation. Return
389 true if they are, otherwise return false and indicate in *MATCHES
390 which stmts are not isomorphic to the first one. If MATCHES[0]
391 is false then this indicates the comparison could not be
392 carried out or the stmts will never be vectorized by SLP. */
ebfd146a
IR
393
394static bool
6983e6b5
RB
395vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
396 vec<gimple> stmts, unsigned int group_size,
397 unsigned nops, unsigned int *max_nunits,
398 unsigned int vectorization_factor, bool *matches)
ebfd146a 399{
ebfd146a 400 unsigned int i;
9771b263 401 gimple stmt = stmts[0];
2200fc49 402 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f7e531cf 403 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 404 tree lhs;
6983e6b5 405 bool need_same_oprnds = false;
ebfd146a 406 tree vectype, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
407 optab optab;
408 int icode;
409 enum machine_mode optab_op2_mode;
410 enum machine_mode vec_mode;
ebfd146a 411 struct data_reference *first_dr;
ebfd146a 412 HOST_WIDE_INT dummy;
c3e7ee41 413 gimple first_load = NULL, prev_first_load = NULL, old_first_load = NULL;
f7e531cf 414 tree cond;
d092494c 415
ebfd146a 416 /* For every stmt in NODE find its def stmt/s. */
9771b263 417 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 418 {
6983e6b5
RB
419 matches[i] = false;
420
73fbfcad 421 if (dump_enabled_p ())
ebfd146a 422 {
78c60e3d
SS
423 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
424 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 425 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
426 }
427
4b5caab7
IR
428 /* Fail to vectorize statements marked as unvectorizable. */
429 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
430 {
73fbfcad 431 if (dump_enabled_p ())
4b5caab7 432 {
78c60e3d
SS
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: unvectorizable statement ");
435 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 436 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7 437 }
6983e6b5
RB
438 /* Fatal mismatch. */
439 matches[0] = false;
4b5caab7
IR
440 return false;
441 }
442
ebfd146a
IR
443 lhs = gimple_get_lhs (stmt);
444 if (lhs == NULL_TREE)
445 {
73fbfcad 446 if (dump_enabled_p ())
ebfd146a 447 {
78c60e3d
SS
448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
449 "Build SLP failed: not GIMPLE_ASSIGN nor "
450 "GIMPLE_CALL ");
451 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 452 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 453 }
6983e6b5
RB
454 /* Fatal mismatch. */
455 matches[0] = false;
ebfd146a
IR
456 return false;
457 }
458
f7e531cf
IR
459 if (is_gimple_assign (stmt)
460 && gimple_assign_rhs_code (stmt) == COND_EXPR
461 && (cond = gimple_assign_rhs1 (stmt))
462 && !COMPARISON_CLASS_P (cond))
463 {
73fbfcad 464 if (dump_enabled_p ())
f7e531cf 465 {
78c60e3d
SS
466 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
467 "Build SLP failed: condition is not "
468 "comparison ");
469 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 470 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 471 }
6983e6b5
RB
472 /* Fatal mismatch. */
473 matches[0] = false;
f7e531cf
IR
474 return false;
475 }
476
b8698a0f 477 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
478 vectype = get_vectype_for_scalar_type (scalar_type);
479 if (!vectype)
480 {
73fbfcad 481 if (dump_enabled_p ())
ebfd146a 482 {
78c60e3d
SS
483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
484 "Build SLP failed: unsupported data-type ");
485 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
486 scalar_type);
e645e942 487 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 488 }
6983e6b5
RB
489 /* Fatal mismatch. */
490 matches[0] = false;
ebfd146a
IR
491 return false;
492 }
b8698a0f 493
4ef69dfc
IR
494 /* In case of multiple types we need to detect the smallest type. */
495 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 496 {
4ef69dfc
IR
497 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
498 if (bb_vinfo)
499 vectorization_factor = *max_nunits;
a70d6342 500 }
b8698a0f 501
ebfd146a 502 if (is_gimple_call (stmt))
190c2236
JJ
503 {
504 rhs_code = CALL_EXPR;
505 if (gimple_call_internal_p (stmt)
506 || gimple_call_tail_p (stmt)
507 || gimple_call_noreturn_p (stmt)
508 || !gimple_call_nothrow_p (stmt)
509 || gimple_call_chain (stmt))
510 {
73fbfcad 511 if (dump_enabled_p ())
190c2236 512 {
78c60e3d
SS
513 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
514 "Build SLP failed: unsupported call type ");
515 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 516 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 517 }
6983e6b5
RB
518 /* Fatal mismatch. */
519 matches[0] = false;
190c2236
JJ
520 return false;
521 }
522 }
ebfd146a
IR
523 else
524 rhs_code = gimple_assign_rhs_code (stmt);
525
526 /* Check the operation. */
527 if (i == 0)
528 {
529 first_stmt_code = rhs_code;
530
b8698a0f 531 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
532 vector shift with scalar shift operand. */
533 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
534 || rhs_code == LROTATE_EXPR
535 || rhs_code == RROTATE_EXPR)
536 {
537 vec_mode = TYPE_MODE (vectype);
538
539 /* First see if we have a vector/vector shift. */
540 optab = optab_for_tree_code (rhs_code, vectype,
541 optab_vector);
542
543 if (!optab
947131ba 544 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
545 {
546 /* No vector/vector shift, try for a vector/scalar shift. */
547 optab = optab_for_tree_code (rhs_code, vectype,
548 optab_scalar);
549
550 if (!optab)
551 {
73fbfcad 552 if (dump_enabled_p ())
78c60e3d 553 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 554 "Build SLP failed: no optab.\n");
6983e6b5
RB
555 /* Fatal mismatch. */
556 matches[0] = false;
ebfd146a
IR
557 return false;
558 }
947131ba 559 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
560 if (icode == CODE_FOR_nothing)
561 {
73fbfcad 562 if (dump_enabled_p ())
78c60e3d
SS
563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
564 "Build SLP failed: "
e645e942 565 "op not supported by target.\n");
6983e6b5
RB
566 /* Fatal mismatch. */
567 matches[0] = false;
ebfd146a
IR
568 return false;
569 }
570 optab_op2_mode = insn_data[icode].operand[2].mode;
571 if (!VECTOR_MODE_P (optab_op2_mode))
572 {
573 need_same_oprnds = true;
574 first_op1 = gimple_assign_rhs2 (stmt);
575 }
576 }
577 }
36ba4aae
IR
578 else if (rhs_code == WIDEN_LSHIFT_EXPR)
579 {
580 need_same_oprnds = true;
581 first_op1 = gimple_assign_rhs2 (stmt);
582 }
ebfd146a
IR
583 }
584 else
585 {
586 if (first_stmt_code != rhs_code
587 && (first_stmt_code != IMAGPART_EXPR
588 || rhs_code != REALPART_EXPR)
589 && (first_stmt_code != REALPART_EXPR
69f11a13 590 || rhs_code != IMAGPART_EXPR)
0d0293ac 591 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 592 && (first_stmt_code == ARRAY_REF
38000232 593 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
594 || first_stmt_code == INDIRECT_REF
595 || first_stmt_code == COMPONENT_REF
596 || first_stmt_code == MEM_REF)))
ebfd146a 597 {
73fbfcad 598 if (dump_enabled_p ())
ebfd146a 599 {
78c60e3d
SS
600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
601 "Build SLP failed: different operation "
602 "in stmt ");
603 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 604 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 605 }
6983e6b5
RB
606 /* Mismatch. */
607 continue;
ebfd146a 608 }
b8698a0f
L
609
610 if (need_same_oprnds
ebfd146a
IR
611 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
612 {
73fbfcad 613 if (dump_enabled_p ())
ebfd146a 614 {
78c60e3d
SS
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
616 "Build SLP failed: different shift "
617 "arguments in ");
618 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 619 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 620 }
6983e6b5
RB
621 /* Mismatch. */
622 continue;
ebfd146a 623 }
190c2236
JJ
624
625 if (rhs_code == CALL_EXPR)
626 {
9771b263 627 gimple first_stmt = stmts[0];
190c2236
JJ
628 if (gimple_call_num_args (stmt) != nops
629 || !operand_equal_p (gimple_call_fn (first_stmt),
630 gimple_call_fn (stmt), 0)
631 || gimple_call_fntype (first_stmt)
632 != gimple_call_fntype (stmt))
633 {
73fbfcad 634 if (dump_enabled_p ())
190c2236 635 {
78c60e3d
SS
636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
637 "Build SLP failed: different calls in ");
638 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
639 stmt, 0);
e645e942 640 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 641 }
6983e6b5
RB
642 /* Mismatch. */
643 continue;
190c2236
JJ
644 }
645 }
ebfd146a
IR
646 }
647
0d0293ac
MM
648 /* Grouped store or load. */
649 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
650 {
651 if (REFERENCE_CLASS_P (lhs))
652 {
653 /* Store. */
6983e6b5 654 ;
ebfd146a 655 }
b5aeb3bb
IR
656 else
657 {
658 /* Load. */
314f64eb
RB
659 unsigned unrolling_factor
660 = least_common_multiple
661 (*max_nunits, group_size) / group_size;
a64b9c26
RB
662 /* FORNOW: Check that there is no gap between the loads
663 and no gap between the groups when we need to load
664 multiple groups at once.
665 ??? We should enhance this to only disallow gaps
666 inside vectors. */
314f64eb 667 if ((unrolling_factor > 1
a64b9c26
RB
668 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
669 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
670 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
671 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb 672 {
73fbfcad 673 if (dump_enabled_p ())
b5aeb3bb 674 {
78c60e3d
SS
675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
676 "Build SLP failed: grouped "
677 "loads have gaps ");
678 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
679 stmt, 0);
e645e942 680 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 681 }
6983e6b5
RB
682 /* Fatal mismatch. */
683 matches[0] = false;
b5aeb3bb
IR
684 return false;
685 }
2f0fa28e 686
b5aeb3bb
IR
687 /* Check that the size of interleaved loads group is not
688 greater than the SLP group size. */
314f64eb
RB
689 unsigned ncopies
690 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
6aa904c4 691 if (loop_vinfo
a64b9c26
RB
692 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
693 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
694 - GROUP_GAP (vinfo_for_stmt (stmt)))
695 > ncopies * group_size))
b5aeb3bb 696 {
73fbfcad 697 if (dump_enabled_p ())
b5aeb3bb 698 {
78c60e3d
SS
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
700 "Build SLP failed: the number "
701 "of interleaved loads is greater than "
702 "the SLP group size ");
703 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
704 stmt, 0);
e645e942 705 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 706 }
6983e6b5
RB
707 /* Fatal mismatch. */
708 matches[0] = false;
b5aeb3bb
IR
709 return false;
710 }
711
c3e7ee41 712 old_first_load = first_load;
e14c1050 713 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
714 if (prev_first_load)
715 {
716 /* Check that there are no loads from different interleaving
6983e6b5
RB
717 chains in the same node. */
718 if (prev_first_load != first_load)
78c60e3d 719 {
73fbfcad 720 if (dump_enabled_p ())
b5aeb3bb 721 {
78c60e3d
SS
722 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
723 vect_location,
724 "Build SLP failed: different "
725 "interleaving chains in one node ");
726 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
727 stmt, 0);
e645e942 728 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 729 }
6983e6b5
RB
730 /* Mismatch. */
731 continue;
b5aeb3bb
IR
732 }
733 }
734 else
735 prev_first_load = first_load;
b8698a0f 736
c3e7ee41
BS
737 /* In some cases a group of loads is just the same load
738 repeated N times. Only analyze its cost once. */
739 if (first_load == stmt && old_first_load != first_load)
ebfd146a
IR
740 {
741 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 742 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
743 == dr_unaligned_unsupported)
744 {
73fbfcad 745 if (dump_enabled_p ())
ebfd146a 746 {
78c60e3d
SS
747 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
748 vect_location,
749 "Build SLP failed: unsupported "
750 "unaligned load ");
751 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
752 stmt, 0);
e645e942 753 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 754 }
6983e6b5
RB
755 /* Fatal mismatch. */
756 matches[0] = false;
ebfd146a
IR
757 return false;
758 }
ebfd146a 759 }
ebfd146a 760 }
0d0293ac 761 } /* Grouped access. */
ebfd146a
IR
762 else
763 {
764 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
765 {
0d0293ac 766 /* Not grouped load. */
73fbfcad 767 if (dump_enabled_p ())
ebfd146a 768 {
78c60e3d
SS
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
770 "Build SLP failed: not grouped load ");
771 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 772 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
773 }
774
0d0293ac 775 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
776 /* Fatal mismatch. */
777 matches[0] = false;
ebfd146a
IR
778 return false;
779 }
780
781 /* Not memory operation. */
782 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 783 && TREE_CODE_CLASS (rhs_code) != tcc_unary
190c2236
JJ
784 && rhs_code != COND_EXPR
785 && rhs_code != CALL_EXPR)
ebfd146a 786 {
73fbfcad 787 if (dump_enabled_p ())
ebfd146a 788 {
78c60e3d
SS
789 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
790 "Build SLP failed: operation");
791 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
792 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 793 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 794 }
6983e6b5
RB
795 /* Fatal mismatch. */
796 matches[0] = false;
ebfd146a
IR
797 return false;
798 }
799
f7e531cf
IR
800 if (rhs_code == COND_EXPR)
801 {
802 tree cond_expr = gimple_assign_rhs1 (stmt);
803
804 if (i == 0)
805 first_cond_code = TREE_CODE (cond_expr);
806 else if (first_cond_code != TREE_CODE (cond_expr))
807 {
73fbfcad 808 if (dump_enabled_p ())
f7e531cf 809 {
78c60e3d
SS
810 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
811 "Build SLP failed: different"
812 " operation");
813 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
814 stmt, 0);
e645e942 815 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 816 }
6983e6b5
RB
817 /* Mismatch. */
818 continue;
f7e531cf
IR
819 }
820 }
ebfd146a 821 }
6983e6b5
RB
822
823 matches[i] = true;
824 }
825
826 for (i = 0; i < group_size; ++i)
827 if (!matches[i])
828 return false;
829
830 return true;
831}
832
833/* Recursively build an SLP tree starting from NODE.
834 Fail (and return a value not equal to zero) if def-stmts are not
835 isomorphic, require data permutation or are of unsupported types of
836 operation. Otherwise, return 0.
837 The value returned is the depth in the SLP tree where a mismatch
838 was found. */
839
840static bool
841vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
842 slp_tree *node, unsigned int group_size,
843 unsigned int *max_nunits,
844 vec<slp_tree> *loads,
845 unsigned int vectorization_factor,
846 bool *matches, unsigned *npermutes)
847{
848 unsigned nops, i, this_npermutes = 0;
849 gimple stmt;
850
851 if (!matches)
852 matches = XALLOCAVEC (bool, group_size);
853 if (!npermutes)
854 npermutes = &this_npermutes;
855
856 matches[0] = false;
857
858 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
859 if (is_gimple_call (stmt))
860 nops = gimple_call_num_args (stmt);
861 else if (is_gimple_assign (stmt))
862 {
863 nops = gimple_num_ops (stmt) - 1;
864 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
865 nops++;
ebfd146a 866 }
6983e6b5
RB
867 else
868 return false;
869
870 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
871 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
872 max_nunits, vectorization_factor, matches))
873 return false;
ebfd146a 874
6983e6b5
RB
875 /* If the SLP node is a load, terminate the recursion. */
876 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
877 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 878 {
9771b263 879 loads->safe_push (*node);
ebfd146a
IR
880 return true;
881 }
882
6983e6b5
RB
883 /* Get at the operands, verifying they are compatible. */
884 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
885 slp_oprnd_info oprnd_info;
886 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
887 {
888 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
889 stmt, (i == 0), &oprnds_info))
890 {
891 vect_free_oprnd_info (oprnds_info);
892 return false;
893 }
894 }
895
896 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
897
b8698a0f 898 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 899 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 900 {
d092494c 901 slp_tree child;
6983e6b5
RB
902 unsigned old_nloads = loads->length ();
903 unsigned old_max_nunits = *max_nunits;
b8698a0f 904
d092494c
IR
905 if (oprnd_info->first_dt != vect_internal_def)
906 continue;
ebfd146a 907
d092494c 908 child = vect_create_new_slp_node (oprnd_info->def_stmts);
6983e6b5
RB
909 if (!child)
910 {
9771b263 911 vect_free_oprnd_info (oprnds_info);
6983e6b5 912 return false;
d092494c 913 }
b8698a0f 914
6983e6b5
RB
915 bool *matches = XALLOCAVEC (bool, group_size);
916 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
917 group_size, max_nunits, loads,
918 vectorization_factor, matches, npermutes))
919 {
920 oprnd_info->def_stmts = vNULL;
921 SLP_TREE_CHILDREN (*node).quick_push (child);
922 continue;
923 }
924
925 /* If the SLP build for operand zero failed and operand zero
926 and one can be commutated try that for the scalar stmts
927 that failed the match. */
928 if (i == 0
929 /* A first scalar stmt mismatch signals a fatal mismatch. */
930 && matches[0]
931 /* ??? For COND_EXPRs we can swap the comparison operands
932 as well as the arms under some constraints. */
933 && nops == 2
934 && oprnds_info[1]->first_dt == vect_internal_def
935 && is_gimple_assign (stmt)
936 && commutative_tree_code (gimple_assign_rhs_code (stmt))
937 /* Do so only if the number of not successful permutes was nor more
938 than a cut-ff as re-trying the recursive match on
939 possibly each level of the tree would expose exponential
940 behavior. */
941 && *npermutes < 4)
942 {
943 /* Roll back. */
944 *max_nunits = old_max_nunits;
945 loads->truncate (old_nloads);
946 /* Swap mismatched definition stmts. */
947 for (unsigned j = 0; j < group_size; ++j)
948 if (!matches[j])
949 {
950 gimple tem = oprnds_info[0]->def_stmts[j];
951 oprnds_info[0]->def_stmts[j] = oprnds_info[1]->def_stmts[j];
952 oprnds_info[1]->def_stmts[j] = tem;
953 }
954 /* And try again ... */
955 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
956 group_size, max_nunits, loads,
957 vectorization_factor,
958 matches, npermutes))
959 {
960 oprnd_info->def_stmts = vNULL;
961 SLP_TREE_CHILDREN (*node).quick_push (child);
962 continue;
963 }
964
965 ++*npermutes;
966 }
967
968 oprnd_info->def_stmts = vNULL;
969 vect_free_slp_tree (child);
970 vect_free_oprnd_info (oprnds_info);
971 return false;
ebfd146a
IR
972 }
973
9771b263 974 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
975 return true;
976}
977
78c60e3d 978/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
979
980static void
78c60e3d 981vect_print_slp_tree (int dump_kind, slp_tree node)
ebfd146a
IR
982{
983 int i;
984 gimple stmt;
d755c7ef 985 slp_tree child;
ebfd146a
IR
986
987 if (!node)
988 return;
989
78c60e3d 990 dump_printf (dump_kind, "node ");
9771b263 991 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 992 {
78c60e3d
SS
993 dump_printf (dump_kind, "\n\tstmt %d ", i);
994 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 995 }
78c60e3d 996 dump_printf (dump_kind, "\n");
ebfd146a 997
9771b263 998 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 999 vect_print_slp_tree (dump_kind, child);
ebfd146a
IR
1000}
1001
1002
b8698a0f
L
1003/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1004 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1005 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1006 stmts in NODE are to be marked. */
1007
1008static void
1009vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1010{
1011 int i;
1012 gimple stmt;
d755c7ef 1013 slp_tree child;
ebfd146a
IR
1014
1015 if (!node)
1016 return;
1017
9771b263 1018 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1019 if (j < 0 || i == j)
1020 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1021
9771b263 1022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1023 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1024}
1025
1026
a70d6342
IR
1027/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1028
1029static void
1030vect_mark_slp_stmts_relevant (slp_tree node)
1031{
1032 int i;
1033 gimple stmt;
1034 stmt_vec_info stmt_info;
d755c7ef 1035 slp_tree child;
a70d6342
IR
1036
1037 if (!node)
1038 return;
1039
9771b263 1040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1041 {
1042 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1043 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1044 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1045 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1046 }
1047
9771b263 1048 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1049 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1050}
1051
1052
b5aeb3bb
IR
1053/* Rearrange the statements of NODE according to PERMUTATION. */
1054
1055static void
1056vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1057 vec<unsigned> permutation)
b5aeb3bb
IR
1058{
1059 gimple stmt;
9771b263 1060 vec<gimple> tmp_stmts;
d755c7ef
RB
1061 unsigned int i;
1062 slp_tree child;
b5aeb3bb 1063
9771b263 1064 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1065 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1066
9771b263
DN
1067 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1068 tmp_stmts.create (group_size);
d755c7ef 1069 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1070
9771b263 1071 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1072 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1073
9771b263 1074 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1075 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1076}
1077
1078
01d8bf07
RB
1079/* Check if the required load permutations in the SLP instance
1080 SLP_INSTN are supported. */
ebfd146a
IR
1081
1082static bool
01d8bf07 1083vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1084{
01d8bf07
RB
1085 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1086 unsigned int i, j, k, next;
7417f6c0 1087 sbitmap load_index;
6983e6b5
RB
1088 slp_tree node;
1089 gimple stmt, load, next_load, first_load;
6aa904c4 1090 struct data_reference *dr;
ebfd146a 1091
73fbfcad 1092 if (dump_enabled_p ())
ebfd146a 1093 {
78c60e3d 1094 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1095 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1096 if (node->load_permutation.exists ())
1097 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1098 dump_printf (MSG_NOTE, "%d ", next);
1099 else
1100 for (i = 0; i < group_size; ++i)
1101 dump_printf (MSG_NOTE, "%d ", i);
e645e942 1102 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1103 }
1104
b5aeb3bb
IR
1105 /* In case of reduction every load permutation is allowed, since the order
1106 of the reduction statements is not important (as opposed to the case of
0d0293ac 1107 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1108 load nodes are of the same size and have the same permutation (and then
1109 rearrange all the nodes of the SLP instance according to this
1110 permutation). */
1111
1112 /* Check that all the load nodes are of the same size. */
01d8bf07 1113 /* ??? Can't we assert this? */
9771b263 1114 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1115 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1116 return false;
2200fc49 1117
b5aeb3bb 1118 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1119 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1120
b010117a
IR
1121 /* Reduction (there are no data-refs in the root).
1122 In reduction chain the order of the loads is important. */
1123 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1124 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1125 {
01d8bf07
RB
1126 slp_tree load;
1127 unsigned int lidx;
b5aeb3bb 1128
01d8bf07
RB
1129 /* Compare all the permutation sequences to the first one. We know
1130 that at least one load is permuted. */
1131 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1132 if (!node->load_permutation.exists ())
1133 return false;
1134 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1135 {
1136 if (!load->load_permutation.exists ())
1137 return false;
1138 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1139 if (lidx != node->load_permutation[j])
1140 return false;
1141 }
c9c1e775 1142
01d8bf07
RB
1143 /* Check that the loads in the first sequence are different and there
1144 are no gaps between them. */
1145 load_index = sbitmap_alloc (group_size);
1146 bitmap_clear (load_index);
1147 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1148 {
1149 if (bitmap_bit_p (load_index, lidx))
1150 {
1151 sbitmap_free (load_index);
1152 return false;
1153 }
1154 bitmap_set_bit (load_index, lidx);
1155 }
1156 for (i = 0; i < group_size; i++)
1157 if (!bitmap_bit_p (load_index, i))
1158 {
1159 sbitmap_free (load_index);
1160 return false;
1161 }
1162 sbitmap_free (load_index);
1163
1164 /* This permutation is valid for reduction. Since the order of the
1165 statements in the nodes is not important unless they are memory
1166 accesses, we can rearrange the statements in all the nodes
1167 according to the order of the loads. */
1168 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1169 node->load_permutation);
1170
1171 /* We are done, no actual permutations need to be generated. */
1172 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1173 SLP_TREE_LOAD_PERMUTATION (node).release ();
1174 return true;
b5aeb3bb
IR
1175 }
1176
6aa904c4
IR
1177 /* In basic block vectorization we allow any subchain of an interleaving
1178 chain.
1179 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1180 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1181 {
01d8bf07
RB
1182 /* Check that for every node in the instance the loads
1183 form a subchain. */
9771b263 1184 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4
IR
1185 {
1186 next_load = NULL;
9771b263 1187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1188 {
6aa904c4 1189 if (j != 0 && next_load != load)
01d8bf07 1190 return false;
6aa904c4
IR
1191 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1192 }
6aa904c4
IR
1193 }
1194
1195 /* Check that the alignment of the first load in every subchain, i.e.,
01d8bf07
RB
1196 the first statement in every load node, is supported.
1197 ??? This belongs in alignment checking. */
1198 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1199 {
1200 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1201 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1202 {
1203 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1204 if (vect_supportable_dr_alignment (dr, false)
1205 == dr_unaligned_unsupported)
1206 {
1207 if (dump_enabled_p ())
1208 {
1209 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1210 vect_location,
1211 "unsupported unaligned load ");
1212 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1213 first_load, 0);
e645e942 1214 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
01d8bf07
RB
1215 }
1216 return false;
1217 }
1218 }
1219 }
6aa904c4 1220
01d8bf07
RB
1221 /* We are done, no actual permutations need to be generated. */
1222 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1223 SLP_TREE_LOAD_PERMUTATION (node).release ();
1224 return true;
6aa904c4
IR
1225 }
1226
b8698a0f
L
1227 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1228 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1229 well (unless it's reduction). */
01d8bf07 1230 if (SLP_INSTANCE_LOADS (slp_instn).length () != group_size)
ebfd146a 1231 return false;
01d8bf07
RB
1232 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1233 if (!node->load_permutation.exists ())
1234 return false;
ebfd146a 1235
7417f6c0 1236 load_index = sbitmap_alloc (group_size);
f61e445a 1237 bitmap_clear (load_index);
01d8bf07
RB
1238 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1239 {
1240 unsigned int lidx = node->load_permutation[0];
1241 if (bitmap_bit_p (load_index, lidx))
1242 {
1243 sbitmap_free (load_index);
1244 return false;
1245 }
1246 bitmap_set_bit (load_index, lidx);
1247 FOR_EACH_VEC_ELT (node->load_permutation, j, k)
1248 if (k != lidx)
1249 {
1250 sbitmap_free (load_index);
1251 return false;
1252 }
ebfd146a 1253 }
01d8bf07
RB
1254 for (i = 0; i < group_size; i++)
1255 if (!bitmap_bit_p (load_index, i))
b8d381a3
JJ
1256 {
1257 sbitmap_free (load_index);
1258 return false;
1259 }
7417f6c0 1260 sbitmap_free (load_index);
ebfd146a 1261
01d8bf07
RB
1262 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1263 if (node->load_permutation.exists ()
1264 && !vect_transform_slp_perm_load
1265 (node, vNULL, NULL,
1266 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1267 return false;
1268 return true;
ebfd146a
IR
1269}
1270
1271
b8698a0f 1272/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1273 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1274 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1275 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1276 they must be inserted before the first load of the SLP instance and not
1277 before the first load of the first node of the instance. */
ff802fa1 1278
b8698a0f
L
1279static gimple
1280vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1281{
1282 int i, j;
1283 slp_tree load_node;
1284 gimple first_load = NULL, load;
1285
9771b263
DN
1286 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load_node)
1287 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1288 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1289
ebfd146a
IR
1290 return first_load;
1291}
1292
1293
e4a707c4 1294/* Find the last store in SLP INSTANCE. */
ff802fa1 1295
e4a707c4
IR
1296static gimple
1297vect_find_last_store_in_slp_instance (slp_instance instance)
1298{
1299 int i;
1300 slp_tree node;
1301 gimple last_store = NULL, store;
1302
1303 node = SLP_INSTANCE_TREE (instance);
9771b263 1304 for (i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &store); i++)
e4a707c4
IR
1305 last_store = get_later_stmt (store, last_store);
1306
1307 return last_store;
1308}
1309
23847df4
RB
1310/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1311
1312static void
1313vect_analyze_slp_cost_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1314 slp_instance instance, slp_tree node,
1315 stmt_vector_for_cost *prologue_cost_vec,
1316 unsigned ncopies_for_cost)
1317{
1318 stmt_vector_for_cost *body_cost_vec = &SLP_INSTANCE_BODY_COST_VEC (instance);
1319
1320 unsigned i;
1321 slp_tree child;
1322 gimple stmt, s;
1323 stmt_vec_info stmt_info;
1324 tree lhs;
1325 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1326
1327 /* Recurse down the SLP tree. */
1328 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1329 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1330 instance, child, prologue_cost_vec,
1331 ncopies_for_cost);
1332
1333 /* Look at the first scalar stmt to determine the cost. */
1334 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1335 stmt_info = vinfo_for_stmt (stmt);
1336 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1337 {
1338 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1339 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1340 vect_uninitialized_def,
1341 node, prologue_cost_vec, body_cost_vec);
1342 else
1343 {
1344 int i;
1345 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1346 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1347 node, prologue_cost_vec, body_cost_vec);
1348 /* If the load is permuted record the cost for the permutation.
1349 ??? Loads from multiple chains are let through here only
1350 for a single special case involving complex numbers where
1351 in the end no permutation is necessary. */
1352 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1353 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1354 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1355 && vect_get_place_in_interleaving_chain
1356 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1357 {
1358 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1359 stmt_info, 0, vect_body);
1360 break;
1361 }
1362 }
1363 }
1364 else
1365 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1366 stmt_info, 0, vect_body);
1367
1368 /* Scan operands and account for prologue cost of constants/externals.
1369 ??? This over-estimates cost for multiple uses and should be
1370 re-engineered. */
1371 lhs = gimple_get_lhs (stmt);
1372 for (i = 0; i < gimple_num_ops (stmt); ++i)
1373 {
1374 tree def, op = gimple_op (stmt, i);
1375 gimple def_stmt;
1376 enum vect_def_type dt;
1377 if (!op || op == lhs)
1378 continue;
1379 if (vect_is_simple_use (op, NULL, loop_vinfo, bb_vinfo,
1380 &def_stmt, &def, &dt)
1381 && (dt == vect_constant_def || dt == vect_external_def))
1382 record_stmt_cost (prologue_cost_vec, 1, vector_stmt,
1383 stmt_info, 0, vect_prologue);
1384 }
1385}
1386
1387/* Compute the cost for the SLP instance INSTANCE. */
1388
1389static void
1390vect_analyze_slp_cost (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1391 slp_instance instance, unsigned nunits)
1392{
1393 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1394 unsigned ncopies_for_cost;
1395 stmt_info_for_cost *si;
1396 unsigned i;
1397
1398 /* Calculate the number of vector stmts to create based on the unrolling
1399 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1400 GROUP_SIZE / NUNITS otherwise. */
1401 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1402 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1403
1404 prologue_cost_vec.create (10);
1405 body_cost_vec.create (10);
1406 SLP_INSTANCE_BODY_COST_VEC (instance) = body_cost_vec;
1407 vect_analyze_slp_cost_1 (loop_vinfo, bb_vinfo,
1408 instance, SLP_INSTANCE_TREE (instance),
1409 &prologue_cost_vec, ncopies_for_cost);
1410
1411 /* Record the prologue costs, which were delayed until we were
1412 sure that SLP was successful. Unlike the body costs, we know
1413 the final values now regardless of the loop vectorization factor. */
1414 void *data = (loop_vinfo ? LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
1415 : BB_VINFO_TARGET_COST_DATA (bb_vinfo));
1416 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1417 {
1418 struct _stmt_vec_info *stmt_info
1419 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1420 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1421 si->misalign, vect_prologue);
1422 }
1423
1424 prologue_cost_vec.release ();
1425}
e4a707c4 1426
0d0293ac 1427/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1428 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1429 Return FALSE if it's impossible to SLP any stmt in the loop. */
1430
1431static bool
a70d6342
IR
1432vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1433 gimple stmt)
ebfd146a
IR
1434{
1435 slp_instance new_instance;
d092494c 1436 slp_tree node;
e14c1050 1437 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1438 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1439 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1440 gimple next;
0f900dfa 1441 unsigned int vectorization_factor = 0;
23847df4 1442 int i;
ebfd146a 1443 unsigned int max_nunits = 0;
9771b263 1444 vec<slp_tree> loads;
b5aeb3bb 1445 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
9771b263 1446 vec<gimple> scalar_stmts;
b5aeb3bb 1447
b010117a 1448 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1449 {
b010117a
IR
1450 if (dr)
1451 {
1452 scalar_type = TREE_TYPE (DR_REF (dr));
1453 vectype = get_vectype_for_scalar_type (scalar_type);
1454 }
1455 else
1456 {
1457 gcc_assert (loop_vinfo);
1458 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1459 }
1460
e14c1050 1461 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1462 }
1463 else
1464 {
1465 gcc_assert (loop_vinfo);
1466 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
9771b263 1467 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
b5aeb3bb 1468 }
b8698a0f 1469
ebfd146a
IR
1470 if (!vectype)
1471 {
73fbfcad 1472 if (dump_enabled_p ())
ebfd146a 1473 {
78c60e3d
SS
1474 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1475 "Build SLP failed: unsupported data-type ");
1476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1477 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1478 }
b5aeb3bb 1479
ebfd146a
IR
1480 return false;
1481 }
1482
1483 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1484 if (loop_vinfo)
1485 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1486 else
a70d6342
IR
1487 vectorization_factor = nunits;
1488
a70d6342
IR
1489 /* Calculate the unrolling factor. */
1490 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1491 if (unrolling_factor != 1 && !loop_vinfo)
1492 {
73fbfcad 1493 if (dump_enabled_p ())
e645e942 1494 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1495 "Build SLP failed: unrolling required in basic"
e645e942 1496 " block SLP\n");
b8698a0f 1497
a70d6342
IR
1498 return false;
1499 }
1500
0d0293ac 1501 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1502 scalar_stmts.create (group_size);
ebfd146a 1503 next = stmt;
b010117a 1504 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1505 {
b5aeb3bb
IR
1506 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1507 while (next)
1508 {
f7e531cf
IR
1509 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1510 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1511 scalar_stmts.safe_push (
1512 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1513 else
9771b263 1514 scalar_stmts.safe_push (next);
e14c1050 1515 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1516 }
1517 }
1518 else
1519 {
1520 /* Collect reduction statements. */
9771b263
DN
1521 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1522 for (i = 0; reductions.iterate (i, &next); i++)
1523 scalar_stmts.safe_push (next);
ebfd146a
IR
1524 }
1525
d092494c 1526 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1527
9771b263 1528 loads.create (group_size);
ebfd146a
IR
1529
1530 /* Build the tree for the SLP instance. */
b8698a0f 1531 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
abf9bfbc 1532 &max_nunits, &loads,
6983e6b5 1533 vectorization_factor, NULL, NULL))
ebfd146a 1534 {
4ef69dfc 1535 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1536 if (max_nunits > nunits)
1537 unrolling_factor = least_common_multiple (max_nunits, group_size)
1538 / group_size;
b8698a0f 1539
4ef69dfc
IR
1540 if (unrolling_factor != 1 && !loop_vinfo)
1541 {
73fbfcad 1542 if (dump_enabled_p ())
e645e942 1543 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1544 "Build SLP failed: unrolling required in basic"
e645e942 1545 " block SLP\n");
c7e62a26 1546 vect_free_slp_tree (node);
9771b263 1547 loads.release ();
4ef69dfc
IR
1548 return false;
1549 }
1550
1551 /* Create a new SLP instance. */
1552 new_instance = XNEW (struct _slp_instance);
1553 SLP_INSTANCE_TREE (new_instance) = node;
1554 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1555 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
23847df4 1556 SLP_INSTANCE_BODY_COST_VEC (new_instance) = vNULL;
ebfd146a
IR
1557 SLP_INSTANCE_LOADS (new_instance) = loads;
1558 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
abf9bfbc
RB
1559
1560 /* Compute the load permutation. */
1561 slp_tree load_node;
1562 bool loads_permuted = false;
abf9bfbc
RB
1563 FOR_EACH_VEC_ELT (loads, i, load_node)
1564 {
01d8bf07 1565 vec<unsigned> load_permutation;
abf9bfbc 1566 int j;
6983e6b5 1567 gimple load, first_stmt;
01d8bf07
RB
1568 bool this_load_permuted = false;
1569 load_permutation.create (group_size);
6983e6b5
RB
1570 first_stmt = GROUP_FIRST_ELEMENT
1571 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1572 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1573 {
6983e6b5
RB
1574 int load_place
1575 = vect_get_place_in_interleaving_chain (load, first_stmt);
1576 gcc_assert (load_place != -1);
1577 if (load_place != j)
01d8bf07 1578 this_load_permuted = true;
abf9bfbc
RB
1579 load_permutation.safe_push (load_place);
1580 }
01d8bf07
RB
1581 if (!this_load_permuted)
1582 {
1583 load_permutation.release ();
1584 continue;
1585 }
1586 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1587 loads_permuted = true;
abf9bfbc 1588 }
6aa904c4
IR
1589
1590 if (loads_permuted)
ebfd146a 1591 {
01d8bf07 1592 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1593 {
73fbfcad 1594 if (dump_enabled_p ())
ebfd146a 1595 {
e645e942 1596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1597 "Build SLP failed: unsupported load "
1598 "permutation ");
1599 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 1600 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1601 }
ebfd146a
IR
1602 vect_free_slp_instance (new_instance);
1603 return false;
1604 }
1605
1606 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
01d8bf07 1607 = vect_find_first_load_in_slp_instance (new_instance);
ebfd146a 1608 }
ebfd146a 1609
23847df4
RB
1610 /* Compute the costs of this SLP instance. */
1611 vect_analyze_slp_cost (loop_vinfo, bb_vinfo,
1612 new_instance, TYPE_VECTOR_SUBPARTS (vectype));
92345349 1613
a70d6342 1614 if (loop_vinfo)
9771b263 1615 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
a70d6342 1616 else
9771b263 1617 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
b8698a0f 1618
73fbfcad 1619 if (dump_enabled_p ())
78c60e3d 1620 vect_print_slp_tree (MSG_NOTE, node);
ebfd146a
IR
1621
1622 return true;
1623 }
1624
1625 /* Failed to SLP. */
1626 /* Free the allocated memory. */
1627 vect_free_slp_tree (node);
9771b263 1628 loads.release ();
b8698a0f 1629
a70d6342 1630 return false;
ebfd146a
IR
1631}
1632
1633
ff802fa1 1634/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1635 trees of packed scalar stmts if SLP is possible. */
1636
1637bool
a70d6342 1638vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1639{
1640 unsigned int i;
9771b263 1641 vec<gimple> grouped_stores;
6e1aa848
DN
1642 vec<gimple> reductions = vNULL;
1643 vec<gimple> reduc_chains = vNULL;
b010117a 1644 gimple first_element;
a70d6342 1645 bool ok = false;
ebfd146a 1646
73fbfcad 1647 if (dump_enabled_p ())
e645e942 1648 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 1649
a70d6342 1650 if (loop_vinfo)
b5aeb3bb 1651 {
0d0293ac 1652 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
b010117a 1653 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1654 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1655 }
a70d6342 1656 else
0d0293ac 1657 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
b8698a0f 1658
0d0293ac 1659 /* Find SLP sequences starting from groups of grouped stores. */
9771b263 1660 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
b010117a 1661 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
a70d6342 1662 ok = true;
ebfd146a 1663
b8698a0f 1664 if (bb_vinfo && !ok)
a70d6342 1665 {
73fbfcad 1666 if (dump_enabled_p ())
78c60e3d 1667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 1668 "Failed to SLP the basic block.\n");
a70d6342
IR
1669
1670 return false;
1671 }
ebfd146a 1672
b010117a 1673 if (loop_vinfo
9771b263 1674 && LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).length () > 0)
b010117a
IR
1675 {
1676 /* Find SLP sequences starting from reduction chains. */
9771b263 1677 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
b010117a
IR
1678 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1679 ok = true;
1680 else
1681 return false;
1682
1683 /* Don't try to vectorize SLP reductions if reduction chain was
1684 detected. */
1685 return ok;
1686 }
1687
b5aeb3bb 1688 /* Find SLP sequences starting from groups of reductions. */
9771b263
DN
1689 if (loop_vinfo && LOOP_VINFO_REDUCTIONS (loop_vinfo).length () > 1
1690 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0]))
b5aeb3bb
IR
1691 ok = true;
1692
ebfd146a
IR
1693 return true;
1694}
1695
1696
1697/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1698 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1699 least one instance. */
ebfd146a 1700
437f4a00 1701bool
ebfd146a
IR
1702vect_make_slp_decision (loop_vec_info loop_vinfo)
1703{
1704 unsigned int i, unrolling_factor = 1;
9771b263 1705 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1706 slp_instance instance;
1707 int decided_to_slp = 0;
1708
73fbfcad 1709 if (dump_enabled_p ())
e645e942
TJ
1710 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1711 "\n");
ebfd146a 1712
9771b263 1713 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1714 {
1715 /* FORNOW: SLP if you can. */
1716 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1717 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1718
ff802fa1 1719 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1720 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1721 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1722 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1723 decided_to_slp++;
1724 }
1725
1726 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1727
73fbfcad 1728 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 1729 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1730 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 1731 decided_to_slp, unrolling_factor);
437f4a00
IR
1732
1733 return (decided_to_slp > 0);
ebfd146a
IR
1734}
1735
1736
1737/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1738 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1739
1740static void
1741vect_detect_hybrid_slp_stmts (slp_tree node)
1742{
1743 int i;
9771b263
DN
1744 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (node);
1745 gimple stmt = stmts[0];
ebfd146a
IR
1746 imm_use_iterator imm_iter;
1747 gimple use_stmt;
f2c74cc4 1748 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 1749 slp_tree child;
f2c74cc4
IR
1750 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1751 struct loop *loop = NULL;
1752 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1753 basic_block bb = NULL;
ebfd146a
IR
1754
1755 if (!node)
1756 return;
1757
f2c74cc4
IR
1758 if (loop_vinfo)
1759 loop = LOOP_VINFO_LOOP (loop_vinfo);
1760 else
1761 bb = BB_VINFO_BB (bb_vinfo);
1762
9771b263 1763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1764 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1765 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1766 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
f2c74cc4
IR
1767 if (gimple_bb (use_stmt)
1768 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1769 || bb == gimple_bb (use_stmt))
1770 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
99f51320
IR
1771 && !STMT_SLP_TYPE (stmt_vinfo)
1772 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb 1773 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
f2c74cc4
IR
1774 && !(gimple_code (use_stmt) == GIMPLE_PHI
1775 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1776 == vect_reduction_def))
ebfd146a
IR
1777 vect_mark_slp_stmts (node, hybrid, i);
1778
9771b263 1779 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1780 vect_detect_hybrid_slp_stmts (child);
ebfd146a
IR
1781}
1782
1783
1784/* Find stmts that must be both vectorized and SLPed. */
1785
1786void
1787vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1788{
1789 unsigned int i;
9771b263 1790 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1791 slp_instance instance;
1792
73fbfcad 1793 if (dump_enabled_p ())
e645e942
TJ
1794 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1795 "\n");
ebfd146a 1796
9771b263 1797 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1798 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1799}
1800
a70d6342
IR
1801
1802/* Create and initialize a new bb_vec_info struct for BB, as well as
1803 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1804
a70d6342
IR
1805static bb_vec_info
1806new_bb_vec_info (basic_block bb)
1807{
1808 bb_vec_info res = NULL;
1809 gimple_stmt_iterator gsi;
1810
1811 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1812 BB_VINFO_BB (res) = bb;
1813
1814 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1815 {
1816 gimple stmt = gsi_stmt (gsi);
1817 gimple_set_uid (stmt, 0);
1818 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1819 }
1820
9771b263
DN
1821 BB_VINFO_GROUPED_STORES (res).create (10);
1822 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 1823 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
1824
1825 bb->aux = res;
1826 return res;
1827}
1828
1829
1830/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1831 stmts in the basic block. */
1832
1833static void
1834destroy_bb_vec_info (bb_vec_info bb_vinfo)
1835{
9771b263 1836 vec<slp_instance> slp_instances;
c7e62a26 1837 slp_instance instance;
a70d6342
IR
1838 basic_block bb;
1839 gimple_stmt_iterator si;
c7e62a26 1840 unsigned i;
a70d6342
IR
1841
1842 if (!bb_vinfo)
1843 return;
1844
1845 bb = BB_VINFO_BB (bb_vinfo);
1846
1847 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1848 {
1849 gimple stmt = gsi_stmt (si);
1850 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1851
1852 if (stmt_info)
1853 /* Free stmt_vec_info. */
1854 free_stmt_vec_info (stmt);
1855 }
1856
c716e67f 1857 vect_destroy_datarefs (NULL, bb_vinfo);
01be8516 1858 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
9771b263 1859 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
c7e62a26 1860 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
9771b263 1861 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c7e62a26 1862 vect_free_slp_instance (instance);
9771b263 1863 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
c3e7ee41 1864 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342
IR
1865 free (bb_vinfo);
1866 bb->aux = NULL;
1867}
1868
1869
1870/* Analyze statements contained in SLP tree node after recursively analyzing
1871 the subtree. Return TRUE if the operations are supported. */
1872
1873static bool
1874vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1875{
1876 bool dummy;
1877 int i;
1878 gimple stmt;
d755c7ef 1879 slp_tree child;
a70d6342
IR
1880
1881 if (!node)
1882 return true;
1883
9771b263 1884 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1885 if (!vect_slp_analyze_node_operations (bb_vinfo, child))
d092494c 1886 return false;
a70d6342 1887
9771b263 1888 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1889 {
1890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1891 gcc_assert (stmt_info);
1892 gcc_assert (PURE_SLP_STMT (stmt_info));
1893
1894 if (!vect_analyze_stmt (stmt, &dummy, node))
1895 return false;
1896 }
1897
1898 return true;
1899}
1900
1901
ff802fa1 1902/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1903 operations are supported. */
1904
1905static bool
1906vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1907{
9771b263 1908 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
a70d6342
IR
1909 slp_instance instance;
1910 int i;
1911
9771b263 1912 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 1913 {
b8698a0f 1914 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1915 SLP_INSTANCE_TREE (instance)))
1916 {
1917 vect_free_slp_instance (instance);
9771b263 1918 slp_instances.ordered_remove (i);
a70d6342
IR
1919 }
1920 else
1921 i++;
b8698a0f
L
1922 }
1923
9771b263 1924 if (!slp_instances.length ())
a70d6342
IR
1925 return false;
1926
1927 return true;
1928}
1929
6eddf228
RB
1930
1931/* Compute the scalar cost of the SLP node NODE and its children
1932 and return it. Do not account defs that are marked in LIFE and
1933 update LIFE according to uses of NODE. */
1934
1935static unsigned
292cba13
RB
1936vect_bb_slp_scalar_cost (basic_block bb,
1937 slp_tree node, vec<bool, va_stack> life)
6eddf228
RB
1938{
1939 unsigned scalar_cost = 0;
1940 unsigned i;
1941 gimple stmt;
1942 slp_tree child;
1943
1944 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
1945 {
1946 unsigned stmt_cost;
1947 ssa_op_iter op_iter;
1948 def_operand_p def_p;
1949 stmt_vec_info stmt_info;
1950
1951 if (life[i])
1952 continue;
1953
1954 /* If there is a non-vectorized use of the defs then the scalar
1955 stmt is kept live in which case we do not account it or any
1956 required defs in the SLP children in the scalar cost. This
1957 way we make the vectorization more costly when compared to
1958 the scalar cost. */
1959 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
1960 {
1961 imm_use_iterator use_iter;
1962 gimple use_stmt;
1963 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f800c166
RB
1964 if (gimple_code (use_stmt) == GIMPLE_PHI
1965 || gimple_bb (use_stmt) != bb
6eddf228
RB
1966 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt)))
1967 {
1968 life[i] = true;
1969 BREAK_FROM_IMM_USE_STMT (use_iter);
1970 }
1971 }
1972 if (life[i])
1973 continue;
1974
1975 stmt_info = vinfo_for_stmt (stmt);
1976 if (STMT_VINFO_DATA_REF (stmt_info))
1977 {
1978 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1979 stmt_cost = vect_get_stmt_cost (scalar_load);
1980 else
1981 stmt_cost = vect_get_stmt_cost (scalar_store);
1982 }
1983 else
1984 stmt_cost = vect_get_stmt_cost (scalar_stmt);
1985
1986 scalar_cost += stmt_cost;
1987 }
1988
1989 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
292cba13 1990 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
1991
1992 return scalar_cost;
1993}
1994
69f11a13
IR
1995/* Check if vectorization of the basic block is profitable. */
1996
1997static bool
1998vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1999{
9771b263 2000 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2001 slp_instance instance;
c3e7ee41
BS
2002 int i, j;
2003 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2004 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
92345349 2005 void *target_cost_data = BB_VINFO_TARGET_COST_DATA (bb_vinfo);
69f11a13 2006 stmt_vec_info stmt_info = NULL;
92345349 2007 stmt_vector_for_cost body_cost_vec;
c3e7ee41 2008 stmt_info_for_cost *ci;
69f11a13
IR
2009
2010 /* Calculate vector costs. */
9771b263 2011 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2012 {
92345349 2013 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
c3e7ee41 2014
9771b263 2015 FOR_EACH_VEC_ELT (body_cost_vec, j, ci)
92345349
BS
2016 {
2017 stmt_info = ci->stmt ? vinfo_for_stmt (ci->stmt) : NULL;
2018 (void) add_stmt_cost (target_cost_data, ci->count, ci->kind,
2019 stmt_info, ci->misalign, vect_body);
2020 }
69f11a13
IR
2021 }
2022
2023 /* Calculate scalar cost. */
6eddf228 2024 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2025 {
6eddf228
RB
2026 vec<bool, va_stack> life;
2027 vec_stack_alloc (bool, life, SLP_INSTANCE_GROUP_SIZE (instance));
2028 life.quick_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2029 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2030 SLP_INSTANCE_TREE (instance),
6eddf228
RB
2031 life);
2032 life.release ();
69f11a13
IR
2033 }
2034
c3e7ee41 2035 /* Complete the target-specific cost calculation. */
92345349
BS
2036 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2037 &vec_inside_cost, &vec_epilogue_cost);
2038
2039 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2040
73fbfcad 2041 if (dump_enabled_p ())
69f11a13 2042 {
78c60e3d
SS
2043 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2044 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2045 vec_inside_cost);
2046 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2047 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2048 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2049 }
2050
2051 /* Vectorization is profitable if its cost is less than the cost of scalar
2052 version. */
2053 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2054 return false;
2055
2056 return true;
2057}
2058
2059/* Check if the basic block can be vectorized. */
a70d6342 2060
8e19f5a1
IR
2061static bb_vec_info
2062vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
2063{
2064 bb_vec_info bb_vinfo;
9771b263 2065 vec<slp_instance> slp_instances;
a70d6342 2066 slp_instance instance;
8e19f5a1 2067 int i;
777e1f09 2068 int min_vf = 2;
e4a707c4 2069
a70d6342
IR
2070 bb_vinfo = new_bb_vec_info (bb);
2071 if (!bb_vinfo)
2072 return NULL;
2073
777e1f09 2074 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342 2075 {
73fbfcad 2076 if (dump_enabled_p ())
78c60e3d
SS
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2078 "not vectorized: unhandled data-ref in basic "
2079 "block.\n");
b8698a0f 2080
a70d6342
IR
2081 destroy_bb_vec_info (bb_vinfo);
2082 return NULL;
2083 }
2084
fcac74a1 2085 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2086 {
73fbfcad 2087 if (dump_enabled_p ())
78c60e3d
SS
2088 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2089 "not vectorized: not enough data-refs in "
2090 "basic block.\n");
a70d6342
IR
2091
2092 destroy_bb_vec_info (bb_vinfo);
2093 return NULL;
2094 }
2095
5abe1e05
RB
2096 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2097 {
2098 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2100 "not vectorized: unhandled data access in "
2101 "basic block.\n");
2102
2103 destroy_bb_vec_info (bb_vinfo);
2104 return NULL;
2105 }
2106
f5709183
IR
2107 vect_pattern_recog (NULL, bb_vinfo);
2108
5abe1e05 2109 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
777e1f09 2110 {
73fbfcad 2111 if (dump_enabled_p ())
78c60e3d
SS
2112 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2113 "not vectorized: unhandled data dependence "
2114 "in basic block.\n");
777e1f09
RG
2115
2116 destroy_bb_vec_info (bb_vinfo);
2117 return NULL;
2118 }
2119
a70d6342
IR
2120 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2121 {
73fbfcad 2122 if (dump_enabled_p ())
78c60e3d
SS
2123 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2124 "not vectorized: bad data alignment in basic "
2125 "block.\n");
b8698a0f 2126
a70d6342
IR
2127 destroy_bb_vec_info (bb_vinfo);
2128 return NULL;
2129 }
b8698a0f 2130
a70d6342
IR
2131 /* Check the SLP opportunities in the basic block, analyze and build SLP
2132 trees. */
2133 if (!vect_analyze_slp (NULL, bb_vinfo))
2134 {
73fbfcad 2135 if (dump_enabled_p ())
78c60e3d
SS
2136 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2137 "not vectorized: failed to find SLP opportunities "
2138 "in basic block.\n");
a70d6342
IR
2139
2140 destroy_bb_vec_info (bb_vinfo);
2141 return NULL;
2142 }
b8698a0f 2143
a70d6342
IR
2144 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2145
2146 /* Mark all the statements that we want to vectorize as pure SLP and
2147 relevant. */
9771b263 2148 FOR_EACH_VEC_ELT (slp_instances, i, instance)
a70d6342
IR
2149 {
2150 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2151 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 2152 }
a70d6342 2153
c3e7ee41 2154 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
38eec4c6 2155 {
73fbfcad 2156 if (dump_enabled_p ())
78c60e3d
SS
2157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2158 "not vectorized: unsupported alignment in basic "
2159 "block.\n");
38eec4c6
UW
2160 destroy_bb_vec_info (bb_vinfo);
2161 return NULL;
2162 }
2163
a70d6342
IR
2164 if (!vect_slp_analyze_operations (bb_vinfo))
2165 {
73fbfcad 2166 if (dump_enabled_p ())
e645e942 2167 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2168 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2169
2170 destroy_bb_vec_info (bb_vinfo);
2171 return NULL;
2172 }
2173
69f11a13 2174 /* Cost model: check if the vectorization is worthwhile. */
d6d11272 2175 if (!unlimited_cost_model ()
69f11a13
IR
2176 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2177 {
73fbfcad 2178 if (dump_enabled_p ())
78c60e3d
SS
2179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2180 "not vectorized: vectorization is not "
2181 "profitable.\n");
69f11a13
IR
2182
2183 destroy_bb_vec_info (bb_vinfo);
2184 return NULL;
2185 }
2186
73fbfcad 2187 if (dump_enabled_p ())
78c60e3d
SS
2188 dump_printf_loc (MSG_NOTE, vect_location,
2189 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2190
2191 return bb_vinfo;
2192}
2193
2194
8e19f5a1
IR
2195bb_vec_info
2196vect_slp_analyze_bb (basic_block bb)
2197{
2198 bb_vec_info bb_vinfo;
2199 int insns = 0;
2200 gimple_stmt_iterator gsi;
2201 unsigned int vector_sizes;
2202
73fbfcad 2203 if (dump_enabled_p ())
78c60e3d 2204 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1
IR
2205
2206 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2207 {
2208 gimple stmt = gsi_stmt (gsi);
2209 if (!is_gimple_debug (stmt)
2210 && !gimple_nop_p (stmt)
2211 && gimple_code (stmt) != GIMPLE_LABEL)
2212 insns++;
2213 }
2214
2215 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2216 {
73fbfcad 2217 if (dump_enabled_p ())
78c60e3d
SS
2218 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2219 "not vectorized: too many instructions in "
2220 "basic block.\n");
8e19f5a1
IR
2221
2222 return NULL;
2223 }
2224
2225 /* Autodetect first vector size we try. */
2226 current_vector_size = 0;
2227 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2228
2229 while (1)
2230 {
2231 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2232 if (bb_vinfo)
2233 return bb_vinfo;
2234
2235 destroy_bb_vec_info (bb_vinfo);
2236
2237 vector_sizes &= ~current_vector_size;
2238 if (vector_sizes == 0
2239 || current_vector_size == 0)
2240 return NULL;
2241
2242 /* Try the next biggest vector size. */
2243 current_vector_size = 1 << floor_log2 (vector_sizes);
73fbfcad 2244 if (dump_enabled_p ())
78c60e3d
SS
2245 dump_printf_loc (MSG_NOTE, vect_location,
2246 "***** Re-trying analysis with "
2247 "vector size %d\n", current_vector_size);
8e19f5a1
IR
2248 }
2249}
2250
2251
b8698a0f 2252/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
2253 the number of created vector stmts depends on the unrolling factor).
2254 However, the actual number of vector stmts for every SLP node depends on
2255 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2256 should be updated. In this function we assume that the inside costs
2257 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
2258
2259void
2260vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2261{
c3e7ee41 2262 unsigned int i, j, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9771b263 2263 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a 2264 slp_instance instance;
92345349 2265 stmt_vector_for_cost body_cost_vec;
c3e7ee41 2266 stmt_info_for_cost *si;
92345349 2267 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
ebfd146a 2268
73fbfcad 2269 if (dump_enabled_p ())
78c60e3d 2270 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2271 "=== vect_update_slp_costs_according_to_vf ===\n");
ebfd146a 2272
9771b263 2273 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c3e7ee41
BS
2274 {
2275 /* We assume that costs are linear in ncopies. */
2276 int ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (instance);
2277
2278 /* Record the instance's instructions in the target cost model.
2279 This was delayed until here because the count of instructions
2280 isn't known beforehand. */
92345349 2281 body_cost_vec = SLP_INSTANCE_BODY_COST_VEC (instance);
c3e7ee41 2282
9771b263 2283 FOR_EACH_VEC_ELT (body_cost_vec, j, si)
92345349
BS
2284 (void) add_stmt_cost (data, si->count * ncopies, si->kind,
2285 vinfo_for_stmt (si->stmt), si->misalign,
2286 vect_body);
c3e7ee41 2287 }
ebfd146a
IR
2288}
2289
a70d6342 2290
b8698a0f
L
2291/* For constant and loop invariant defs of SLP_NODE this function returns
2292 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2293 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2294 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2295 REDUC_INDEX is the index of the reduction operand in the statements, unless
2296 it is -1. */
ebfd146a
IR
2297
2298static void
9dc3f7de 2299vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2300 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2301 unsigned int op_num, unsigned int number_of_vectors,
2302 int reduc_index)
ebfd146a 2303{
9771b263
DN
2304 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2305 gimple stmt = stmts[0];
ebfd146a 2306 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2307 unsigned nunits;
ebfd146a 2308 tree vec_cst;
d2a12ae7
RG
2309 tree *elts;
2310 unsigned j, number_of_places_left_in_vector;
ebfd146a 2311 tree vector_type;
9dc3f7de 2312 tree vop;
9771b263 2313 int group_size = stmts.length ();
ebfd146a 2314 unsigned int vec_num, i;
d2a12ae7 2315 unsigned number_of_copies = 1;
9771b263
DN
2316 vec<tree> voprnds;
2317 voprnds.create (number_of_vectors);
ebfd146a 2318 bool constant_p, is_store;
b5aeb3bb 2319 tree neutral_op = NULL;
bac430c9 2320 enum tree_code code = gimple_expr_code (stmt);
0e93a64e
IR
2321 gimple def_stmt;
2322 struct loop *loop;
13396b6e 2323 gimple_seq ctor_seq = NULL;
b5aeb3bb 2324
29ed4920
IR
2325 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2326 && reduc_index != -1)
b5aeb3bb 2327 {
b5aeb3bb 2328 op_num = reduc_index - 1;
9dc3f7de 2329 op = gimple_op (stmt, reduc_index);
b5aeb3bb 2330 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2331 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2332 get_initial_def_for_reduction() for details. */
2333 switch (code)
2334 {
2335 case WIDEN_SUM_EXPR:
2336 case DOT_PROD_EXPR:
2337 case PLUS_EXPR:
2338 case MINUS_EXPR:
2339 case BIT_IOR_EXPR:
2340 case BIT_XOR_EXPR:
2341 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2342 neutral_op = build_real (TREE_TYPE (op), dconst0);
2343 else
2344 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2345
2346 break;
2347
2348 case MULT_EXPR:
b5aeb3bb
IR
2349 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2350 neutral_op = build_real (TREE_TYPE (op), dconst1);
2351 else
2352 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2353
2354 break;
2355
c1e822d5
IR
2356 case BIT_AND_EXPR:
2357 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2358 break;
2359
0e93a64e
IR
2360 case MAX_EXPR:
2361 case MIN_EXPR:
2362 def_stmt = SSA_NAME_DEF_STMT (op);
2363 loop = (gimple_bb (stmt))->loop_father;
2364 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2365 loop_preheader_edge (loop));
2366 break;
2367
b5aeb3bb 2368 default:
0e93a64e 2369 neutral_op = NULL;
b5aeb3bb
IR
2370 }
2371 }
ebfd146a
IR
2372
2373 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2374 {
2375 is_store = true;
2376 op = gimple_assign_rhs1 (stmt);
2377 }
2378 else
9dc3f7de
IR
2379 is_store = false;
2380
2381 gcc_assert (op);
ebfd146a
IR
2382
2383 if (CONSTANT_CLASS_P (op))
d59dc888 2384 constant_p = true;
ebfd146a 2385 else
d59dc888
IR
2386 constant_p = false;
2387
9dc3f7de 2388 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 2389 gcc_assert (vector_type);
ebfd146a
IR
2390 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2391
2392 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2393 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2394
2395 For example, we have two scalar operands, s1 and s2 (e.g., group of
2396 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2397 of this type can be packed in a vector). The output vector will contain
2398 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2399 will be 2).
2400
b8698a0f 2401 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2402 containing the operands.
2403
2404 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2405 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2406 {s5, s6, s7, s8}. */
b8698a0f 2407
ebfd146a
IR
2408 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2409
2410 number_of_places_left_in_vector = nunits;
d2a12ae7 2411 elts = XALLOCAVEC (tree, nunits);
ebfd146a
IR
2412 for (j = 0; j < number_of_copies; j++)
2413 {
9771b263 2414 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2415 {
2416 if (is_store)
2417 op = gimple_assign_rhs1 (stmt);
bac430c9 2418 else
f7e531cf 2419 {
bac430c9 2420 switch (code)
f7e531cf 2421 {
bac430c9
IR
2422 case COND_EXPR:
2423 if (op_num == 0 || op_num == 1)
2424 {
2425 tree cond = gimple_assign_rhs1 (stmt);
2426 op = TREE_OPERAND (cond, op_num);
2427 }
2428 else
2429 {
2430 if (op_num == 2)
2431 op = gimple_assign_rhs2 (stmt);
2432 else
2433 op = gimple_assign_rhs3 (stmt);
2434 }
2435 break;
2436
2437 case CALL_EXPR:
2438 op = gimple_call_arg (stmt, op_num);
2439 break;
2440
b84b294a
JJ
2441 case LSHIFT_EXPR:
2442 case RSHIFT_EXPR:
2443 case LROTATE_EXPR:
2444 case RROTATE_EXPR:
2445 op = gimple_op (stmt, op_num + 1);
2446 /* Unlike the other binary operators, shifts/rotates have
2447 the shift count being int, instead of the same type as
2448 the lhs, so make sure the scalar is the right type if
2449 we are dealing with vectors of
2450 long long/long/short/char. */
793d9a16 2451 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
2452 op = fold_convert (TREE_TYPE (vector_type), op);
2453 break;
2454
bac430c9
IR
2455 default:
2456 op = gimple_op (stmt, op_num + 1);
b84b294a 2457 break;
f7e531cf
IR
2458 }
2459 }
b8698a0f 2460
b5aeb3bb
IR
2461 if (reduc_index != -1)
2462 {
0e93a64e
IR
2463 loop = (gimple_bb (stmt))->loop_father;
2464 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2465
2466 gcc_assert (loop);
b010117a
IR
2467
2468 /* Get the def before the loop. In reduction chain we have only
2469 one initial value. */
2470 if ((j != (number_of_copies - 1)
2471 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2472 && i != 0))
2473 && neutral_op)
b5aeb3bb 2474 op = neutral_op;
b010117a
IR
2475 else
2476 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2477 loop_preheader_edge (loop));
b5aeb3bb
IR
2478 }
2479
ebfd146a 2480 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2481 number_of_places_left_in_vector--;
13396b6e 2482 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 2483 {
793d9a16 2484 if (CONSTANT_CLASS_P (op))
13396b6e
JJ
2485 {
2486 op = fold_unary (VIEW_CONVERT_EXPR,
2487 TREE_TYPE (vector_type), op);
2488 gcc_assert (op && CONSTANT_CLASS_P (op));
2489 }
2490 else
2491 {
2492 tree new_temp
2493 = make_ssa_name (TREE_TYPE (vector_type), NULL);
2494 gimple init_stmt;
2495 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
2496 op);
2497 init_stmt
2498 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
2499 new_temp, op, NULL_TREE);
2500 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2501 op = new_temp;
2502 }
50eeef09 2503 }
d2a12ae7 2504 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
2505 if (!CONSTANT_CLASS_P (op))
2506 constant_p = false;
ebfd146a
IR
2507
2508 if (number_of_places_left_in_vector == 0)
2509 {
2510 number_of_places_left_in_vector = nunits;
2511
2512 if (constant_p)
d2a12ae7 2513 vec_cst = build_vector (vector_type, elts);
ebfd146a 2514 else
d2a12ae7 2515 {
9771b263 2516 vec<constructor_elt, va_gc> *v;
d2a12ae7 2517 unsigned k;
9771b263 2518 vec_alloc (v, nunits);
d2a12ae7
RG
2519 for (k = 0; k < nunits; ++k)
2520 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2521 vec_cst = build_constructor (vector_type, v);
2522 }
9771b263
DN
2523 voprnds.quick_push (vect_init_vector (stmt, vec_cst,
2524 vector_type, NULL));
13396b6e
JJ
2525 if (ctor_seq != NULL)
2526 {
9771b263 2527 gimple init_stmt = SSA_NAME_DEF_STMT (voprnds.last ());
13396b6e
JJ
2528 gimple_stmt_iterator gsi = gsi_for_stmt (init_stmt);
2529 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2530 GSI_SAME_STMT);
2531 ctor_seq = NULL;
2532 }
ebfd146a
IR
2533 }
2534 }
2535 }
2536
b8698a0f 2537 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 2538 them. */
9771b263 2539 vec_num = voprnds.length ();
d2a12ae7 2540 for (j = vec_num; j != 0; j--)
ebfd146a 2541 {
9771b263
DN
2542 vop = voprnds[j - 1];
2543 vec_oprnds->quick_push (vop);
ebfd146a
IR
2544 }
2545
9771b263 2546 voprnds.release ();
ebfd146a
IR
2547
2548 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2549 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2550 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 2551 to replicate the vectors. */
9771b263 2552 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 2553 {
b5aeb3bb
IR
2554 tree neutral_vec = NULL;
2555
2556 if (neutral_op)
2557 {
2558 if (!neutral_vec)
b9acc9f1 2559 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 2560
9771b263 2561 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
2562 }
2563 else
2564 {
9771b263
DN
2565 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2566 vec_oprnds->quick_push (vop);
b5aeb3bb 2567 }
ebfd146a
IR
2568 }
2569}
2570
2571
2572/* Get vectorized definitions from SLP_NODE that contains corresponding
2573 vectorized def-stmts. */
2574
2575static void
9771b263 2576vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
2577{
2578 tree vec_oprnd;
2579 gimple vec_def_stmt;
2580 unsigned int i;
2581
9771b263 2582 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 2583
9771b263 2584 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2585 {
2586 gcc_assert (vec_def_stmt);
2587 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 2588 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
2589 }
2590}
2591
2592
b8698a0f
L
2593/* Get vectorized definitions for SLP_NODE.
2594 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2595 call vect_get_constant_vectors() to create vector stmts.
2596 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2597 must be stored in the corresponding child of SLP_NODE, and we call
2598 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2599
ebfd146a 2600void
9771b263 2601vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 2602 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 2603{
e44978dc 2604 gimple first_stmt;
d092494c
IR
2605 int number_of_vects = 0, i;
2606 unsigned int child_index = 0;
b8698a0f 2607 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 2608 slp_tree child = NULL;
37b5ec8f 2609 vec<tree> vec_defs;
e44978dc 2610 tree oprnd;
d092494c 2611 bool vectorized_defs;
ebfd146a 2612
9771b263
DN
2613 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2614 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 2615 {
d092494c
IR
2616 /* For each operand we check if it has vectorized definitions in a child
2617 node or we need to create them (for invariants and constants). We
2618 check if the LHS of the first stmt of the next child matches OPRND.
2619 If it does, we found the correct child. Otherwise, we call
2620 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2621 to check this child node for the next operand. */
2622 vectorized_defs = false;
9771b263 2623 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 2624 {
01d8bf07 2625 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 2626
e44978dc
RB
2627 /* We have to check both pattern and original def, if available. */
2628 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2629 gimple related = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
ebfd146a 2630
e44978dc
RB
2631 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2632 || (related
2633 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2634 {
2635 /* The number of vector defs is determined by the number of
2636 vector statements in the node from which we get those
d092494c 2637 statements. */
e44978dc
RB
2638 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2639 vectorized_defs = true;
d092494c 2640 child_index++;
e44978dc 2641 }
d092494c 2642 }
ebfd146a 2643
d092494c
IR
2644 if (!vectorized_defs)
2645 {
2646 if (i == 0)
2647 {
2648 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2649 /* Number of vector stmts was calculated according to LHS in
2650 vect_schedule_slp_instance (), fix it by replacing LHS with
2651 RHS, if necessary. See vect_get_smallest_scalar_type () for
2652 details. */
2653 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2654 &rhs_size_unit);
2655 if (rhs_size_unit != lhs_size_unit)
2656 {
2657 number_of_vects *= rhs_size_unit;
2658 number_of_vects /= lhs_size_unit;
2659 }
2660 }
2661 }
b5aeb3bb 2662
d092494c 2663 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
2664 vec_defs = vNULL;
2665 vec_defs.create (number_of_vects);
ebfd146a 2666
d092494c
IR
2667 /* For reduction defs we call vect_get_constant_vectors (), since we are
2668 looking for initial loop invariant values. */
2669 if (vectorized_defs && reduc_index == -1)
2670 /* The defs are already vectorized. */
37b5ec8f 2671 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
2672 else
2673 /* Build vectors from scalar defs. */
37b5ec8f 2674 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 2675 number_of_vects, reduc_index);
ebfd146a 2676
37b5ec8f 2677 vec_oprnds->quick_push (vec_defs);
ebfd146a 2678
d092494c
IR
2679 /* For reductions, we only need initial values. */
2680 if (reduc_index != -1)
2681 return;
2682 }
ebfd146a
IR
2683}
2684
a70d6342 2685
b8698a0f 2686/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2687 building a vector of type MASK_TYPE from it) and two input vectors placed in
2688 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2689 shifting by STRIDE elements of DR_CHAIN for every copy.
2690 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2691 copies).
ebfd146a
IR
2692 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2693 the created stmts must be inserted. */
2694
2695static inline void
b8698a0f 2696vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2697 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 2698 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 2699 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
2700 int ncopies, int vect_stmts_counter)
2701{
faf63e39 2702 tree perm_dest;
ebfd146a
IR
2703 gimple perm_stmt = NULL;
2704 stmt_vec_info next_stmt_info;
0f900dfa 2705 int i, stride;
ebfd146a 2706 tree first_vec, second_vec, data_ref;
ebfd146a 2707
ebfd146a 2708 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2709
b8698a0f 2710 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2711 stmts later. */
9771b263 2712 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 2713 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 2714 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
2715
2716 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2717 for (i = 0; i < ncopies; i++)
2718 {
9771b263
DN
2719 first_vec = dr_chain[first_vec_indx];
2720 second_vec = dr_chain[second_vec_indx];
ebfd146a 2721
ebfd146a 2722 /* Generate the permute statement. */
73804b12
RG
2723 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, perm_dest,
2724 first_vec, second_vec, mask);
ebfd146a 2725 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 2726 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 2727 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2728
b8698a0f 2729 /* Store the vector statement in NODE. */
9771b263 2730 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
ebfd146a
IR
2731
2732 first_vec_indx += stride;
2733 second_vec_indx += stride;
2734 }
2735
2736 /* Mark the scalar stmt as vectorized. */
2737 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2738 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2739}
2740
2741
b8698a0f 2742/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2743 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2744 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2745 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2746 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2747
ebfd146a 2748static bool
b8698a0f 2749vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2750 int mask_nunits, bool only_one_vec, int index,
22e4dee7 2751 unsigned char *mask, int *current_mask_element,
694a4f61
IR
2752 bool *need_next_vector, int *number_of_mask_fixes,
2753 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2754{
2755 int i;
ebfd146a
IR
2756
2757 /* Convert to target specific representation. */
2758 *current_mask_element = first_mask_element + m;
2759 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2760 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2761
2762 if (*current_mask_element < mask_nunits)
694a4f61 2763 *needs_first_vector = true;
ebfd146a
IR
2764
2765 /* We have only one input vector to permute but the mask accesses values in
2766 the next vector as well. */
2767 if (only_one_vec && *current_mask_element >= mask_nunits)
2768 {
73fbfcad 2769 if (dump_enabled_p ())
ebfd146a 2770 {
e645e942 2771 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2772 "permutation requires at least two vectors ");
2773 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 2774 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2775 }
2776
2777 return false;
2778 }
2779
2780 /* The mask requires the next vector. */
2781 if (*current_mask_element >= mask_nunits * 2)
2782 {
694a4f61 2783 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2784 {
2785 /* We either need the first vector too or have already moved to the
b8698a0f 2786 next vector. In both cases, this permutation needs three
ebfd146a 2787 vectors. */
73fbfcad 2788 if (dump_enabled_p ())
ebfd146a 2789 {
78c60e3d
SS
2790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2791 "permutation requires at "
2792 "least three vectors ");
2793 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 2794 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2795 }
2796
2797 return false;
2798 }
2799
2800 /* We move to the next vector, dropping the first one and working with
2801 the second and the third - we need to adjust the values of the mask
2802 accordingly. */
694a4f61 2803 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2804
2805 for (i = 0; i < index; i++)
694a4f61 2806 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2807
694a4f61
IR
2808 (*number_of_mask_fixes)++;
2809 *mask_fixed = true;
ebfd146a
IR
2810 }
2811
694a4f61 2812 *need_next_vector = *mask_fixed;
ebfd146a
IR
2813
2814 /* This was the last element of this mask. Start a new one. */
2815 if (index == mask_nunits - 1)
2816 {
694a4f61
IR
2817 *number_of_mask_fixes = 1;
2818 *mask_fixed = false;
2819 *needs_first_vector = false;
ebfd146a
IR
2820 }
2821
2822 return true;
2823}
2824
2825
2826/* Generate vector permute statements from a list of loads in DR_CHAIN.
2827 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
2828 permute statements for the SLP node NODE of the SLP instance
2829 SLP_NODE_INSTANCE. */
2830
ebfd146a 2831bool
01d8bf07 2832vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a
IR
2833 gimple_stmt_iterator *gsi, int vf,
2834 slp_instance slp_node_instance, bool analyze_only)
2835{
01d8bf07 2836 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
2837 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2838 tree mask_element_type = NULL_TREE, mask_type;
2635892a 2839 int i, j, k, nunits, vec_index = 0, scalar_index;
2635892a 2840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2841 gimple next_scalar_stmt;
2842 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2843 int first_mask_element;
22e4dee7
RH
2844 int index, unroll_factor, current_mask_element, ncopies;
2845 unsigned char *mask;
ebfd146a
IR
2846 bool only_one_vec = false, need_next_vector = false;
2847 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2848 int number_of_mask_fixes = 1;
2849 bool mask_fixed = false;
2850 bool needs_first_vector = false;
22e4dee7 2851 enum machine_mode mode;
ebfd146a 2852
22e4dee7
RH
2853 mode = TYPE_MODE (vectype);
2854
2855 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a 2856 {
73fbfcad 2857 if (dump_enabled_p ())
ebfd146a 2858 {
78c60e3d
SS
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "no vect permute for ");
2861 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 2862 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 2863 }
2635892a 2864 return false;
ebfd146a
IR
2865 }
2866
2635892a
RH
2867 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2868 same size as the vector element being permuted. */
96f9265a
RG
2869 mask_element_type = lang_hooks.types.type_for_mode
2870 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 2871 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 2872 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 2873 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
2874 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2875
2876 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2877 unrolling factor. */
b8698a0f 2878 orig_vec_stmts_num = group_size *
ebfd146a
IR
2879 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2880 if (orig_vec_stmts_num == 1)
2881 only_one_vec = true;
2882
b8698a0f 2883 /* Number of copies is determined by the final vectorization factor
ebfd146a 2884 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2885 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2886
01d8bf07
RB
2887 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2888 return false;
2889
b8698a0f
L
2890 /* Generate permutation masks for every NODE. Number of masks for each NODE
2891 is equal to GROUP_SIZE.
2892 E.g., we have a group of three nodes with three loads from the same
2893 location in each node, and the vector size is 4. I.e., we have a
2894 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2895 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2896 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2897 ...
2898
2635892a 2899 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 2900 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2901 operation, and the mask element values can't be outside that range.
2902 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2903 For the first two permutations we need the first and the second input
ebfd146a 2904 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2905 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2906 {c2,a3,b3,c3}. */
2907
ebfd146a
IR
2908 {
2909 scalar_index = 0;
2910 index = 0;
2911 vect_stmts_counter = 0;
2912 vec_index = 0;
2913 first_vec_index = vec_index++;
2914 if (only_one_vec)
2915 second_vec_index = first_vec_index;
2916 else
2917 second_vec_index = vec_index++;
2918
2919 for (j = 0; j < unroll_factor; j++)
2920 {
2921 for (k = 0; k < group_size; k++)
2922 {
01d8bf07 2923 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
2635892a
RH
2924 first_mask_element = i + j * group_size;
2925 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2926 nunits, only_one_vec, index,
2927 mask, &current_mask_element,
2928 &need_next_vector,
2929 &number_of_mask_fixes, &mask_fixed,
2930 &needs_first_vector))
2931 return false;
2932 mask[index++] = current_mask_element;
ebfd146a 2933
2635892a 2934 if (index == nunits)
ebfd146a 2935 {
01d8bf07 2936 index = 0;
22e4dee7
RH
2937 if (!can_vec_perm_p (mode, false, mask))
2938 {
73fbfcad 2939 if (dump_enabled_p ())
22e4dee7 2940 {
78c60e3d
SS
2941 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
2942 vect_location,
2943 "unsupported vect permute { ");
22e4dee7 2944 for (i = 0; i < nunits; ++i)
78c60e3d
SS
2945 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
2946 mask[i]);
2947 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7
RH
2948 }
2949 return false;
2950 }
2951
ebfd146a
IR
2952 if (!analyze_only)
2953 {
01d8bf07
RB
2954 int l;
2955 tree mask_vec, *mask_elts;
2956 mask_elts = XALLOCAVEC (tree, nunits);
2957 for (l = 0; l < nunits; ++l)
2958 mask_elts[l] = build_int_cst (mask_element_type,
2959 mask[l]);
2960 mask_vec = build_vector (mask_type, mask_elts);
2961
2962 if (need_next_vector)
ebfd146a
IR
2963 {
2964 first_vec_index = second_vec_index;
2965 second_vec_index = vec_index;
2966 }
2967
9771b263
DN
2968 next_scalar_stmt
2969 = SLP_TREE_SCALAR_STMTS (node)[scalar_index++];
ebfd146a
IR
2970
2971 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39 2972 mask_vec, first_vec_index, second_vec_index,
2635892a 2973 gsi, node, vectype, dr_chain,
faf63e39 2974 ncopies, vect_stmts_counter++);
ebfd146a 2975 }
b8698a0f
L
2976 }
2977 }
2978 }
2979 }
ebfd146a 2980
ebfd146a
IR
2981 return true;
2982}
2983
2984
2985
2986/* Vectorize SLP instance tree in postorder. */
2987
2988static bool
2989vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2990 unsigned int vectorization_factor)
ebfd146a
IR
2991{
2992 gimple stmt;
0d0293ac 2993 bool grouped_store, is_store;
ebfd146a
IR
2994 gimple_stmt_iterator si;
2995 stmt_vec_info stmt_info;
2996 unsigned int vec_stmts_size, nunits, group_size;
2997 tree vectype;
2998 int i;
d755c7ef 2999 slp_tree child;
ebfd146a
IR
3000
3001 if (!node)
3002 return false;
3003
9771b263 3004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3005 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3006
9771b263 3007 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3008 stmt_info = vinfo_for_stmt (stmt);
3009
3010 /* VECTYPE is the type of the destination. */
b690cc0f 3011 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3012 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3013 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3014
3015 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3016 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3017 elements in one vector iteration is the number of scalar elements in
3018 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3019 size. */
3020 vec_stmts_size = (vectorization_factor * group_size) / nunits;
3021
9771b263 3022 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3023 {
9771b263 3024 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3025 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3026 }
3027
73fbfcad 3028 if (dump_enabled_p ())
ebfd146a 3029 {
78c60e3d
SS
3030 dump_printf_loc (MSG_NOTE,vect_location,
3031 "------>vectorizing SLP node starting from: ");
3032 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3033 dump_printf (MSG_NOTE, "\n");
b8698a0f 3034 }
ebfd146a
IR
3035
3036 /* Loads should be inserted before the first load. */
3037 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
0d0293ac 3038 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6aa904c4 3039 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
01d8bf07 3040 && SLP_TREE_LOAD_PERMUTATION (node).exists ())
ebfd146a 3041 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
9d5e7640 3042 else if (is_pattern_stmt_p (stmt_info))
6aa904c4 3043 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
ebfd146a
IR
3044 else
3045 si = gsi_for_stmt (stmt);
b8698a0f 3046
e4a707c4 3047 /* Stores should be inserted just before the last store. */
0d0293ac 3048 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e4a707c4
IR
3049 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
3050 {
3051 gimple last_store = vect_find_last_store_in_slp_instance (instance);
a024e70e
IR
3052 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
3053 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
e4a707c4
IR
3054 si = gsi_for_stmt (last_store);
3055 }
3056
b010117a
IR
3057 /* Mark the first element of the reduction chain as reduction to properly
3058 transform the node. In the analysis phase only the last element of the
3059 chain is marked as reduction. */
0d0293ac 3060 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3061 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3062 {
3063 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3064 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3065 }
3066
0d0293ac 3067 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 3068 return is_store;
ebfd146a
IR
3069}
3070
dd34c087
JJ
3071/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3072 For loop vectorization this is done in vectorizable_call, but for SLP
3073 it needs to be deferred until end of vect_schedule_slp, because multiple
3074 SLP instances may refer to the same scalar stmt. */
3075
3076static void
3077vect_remove_slp_scalar_calls (slp_tree node)
3078{
3079 gimple stmt, new_stmt;
3080 gimple_stmt_iterator gsi;
3081 int i;
d755c7ef 3082 slp_tree child;
dd34c087
JJ
3083 tree lhs;
3084 stmt_vec_info stmt_info;
3085
3086 if (!node)
3087 return;
3088
9771b263 3089 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3090 vect_remove_slp_scalar_calls (child);
dd34c087 3091
9771b263 3092 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3093 {
3094 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3095 continue;
3096 stmt_info = vinfo_for_stmt (stmt);
3097 if (stmt_info == NULL
3098 || is_pattern_stmt_p (stmt_info)
3099 || !PURE_SLP_STMT (stmt_info))
3100 continue;
3101 lhs = gimple_call_lhs (stmt);
3102 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3103 set_vinfo_for_stmt (new_stmt, stmt_info);
3104 set_vinfo_for_stmt (stmt, NULL);
3105 STMT_VINFO_STMT (stmt_info) = new_stmt;
3106 gsi = gsi_for_stmt (stmt);
3107 gsi_replace (&gsi, new_stmt, false);
3108 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3109 }
3110}
ebfd146a 3111
ff802fa1
IR
3112/* Generate vector code for all SLP instances in the loop/basic block. */
3113
ebfd146a 3114bool
a70d6342 3115vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 3116{
9771b263 3117 vec<slp_instance> slp_instances;
ebfd146a 3118 slp_instance instance;
01d8bf07 3119 unsigned int i, vf;
ebfd146a
IR
3120 bool is_store = false;
3121
a70d6342
IR
3122 if (loop_vinfo)
3123 {
3124 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3125 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 3126 }
a70d6342
IR
3127 else
3128 {
3129 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3130 vf = 1;
b8698a0f 3131 }
a70d6342 3132
9771b263 3133 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3134 {
3135 /* Schedule the tree of INSTANCE. */
3136 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3137 instance, vf);
73fbfcad 3138 if (dump_enabled_p ())
78c60e3d 3139 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3140 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3141 }
3142
9771b263 3143 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3144 {
3145 slp_tree root = SLP_INSTANCE_TREE (instance);
3146 gimple store;
3147 unsigned int j;
3148 gimple_stmt_iterator gsi;
3149
c40eced0
RB
3150 /* Remove scalar call stmts. Do not do this for basic-block
3151 vectorization as not all uses may be vectorized.
3152 ??? Why should this be necessary? DCE should be able to
3153 remove the stmts itself.
3154 ??? For BB vectorization we can as well remove scalar
3155 stmts starting from the SLP tree root if they have no
3156 uses. */
3157 if (loop_vinfo)
3158 vect_remove_slp_scalar_calls (root);
dd34c087 3159
9771b263 3160 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3161 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3162 {
3163 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3164 break;
3165
a024e70e
IR
3166 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3167 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3168 /* Free the attached stmt_vec_info and remove the stmt. */
3169 gsi = gsi_for_stmt (store);
3d3f2249 3170 unlink_stmt_vdef (store);
b5aeb3bb 3171 gsi_remove (&gsi, true);
3d3f2249 3172 release_defs (store);
b5aeb3bb
IR
3173 free_stmt_vec_info (store);
3174 }
3175 }
3176
ebfd146a
IR
3177 return is_store;
3178}
a70d6342
IR
3179
3180
3181/* Vectorize the basic block. */
3182
3183void
3184vect_slp_transform_bb (basic_block bb)
3185{
3186 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3187 gimple_stmt_iterator si;
3188
3189 gcc_assert (bb_vinfo);
3190
73fbfcad 3191 if (dump_enabled_p ())
78c60e3d 3192 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
a70d6342
IR
3193
3194 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3195 {
3196 gimple stmt = gsi_stmt (si);
3197 stmt_vec_info stmt_info;
3198
73fbfcad 3199 if (dump_enabled_p ())
a70d6342 3200 {
78c60e3d
SS
3201 dump_printf_loc (MSG_NOTE, vect_location,
3202 "------>SLPing statement: ");
3203 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3204 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
3205 }
3206
3207 stmt_info = vinfo_for_stmt (stmt);
3208 gcc_assert (stmt_info);
3209
3210 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3211 if (STMT_SLP_TYPE (stmt_info))
3212 {
3213 vect_schedule_slp (NULL, bb_vinfo);
3214 break;
3215 }
3216 }
3217
73fbfcad 3218 if (dump_enabled_p ())
5d318fd4 3219 dump_printf_loc (MSG_NOTE, vect_location,
ccb3ad87 3220 "BASIC BLOCK VECTORIZED\n");
a70d6342 3221
12aaf609
IR
3222 destroy_bb_vec_info (bb_vinfo);
3223}