]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Fix multilib build of libgcc on Linux/sparc.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
2635892a 2 Copyright (C) 2007, 2008, 2009, 2010, 2011
c75c517d 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
ebfd146a 42
a70d6342
IR
43/* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
45
46LOC
47find_bb_location (basic_block bb)
48{
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
51
52 if (!bb)
53 return UNKNOWN_LOC;
54
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 {
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
60 }
61
62 return UNKNOWN_LOC;
63}
64
65
ebfd146a
IR
66/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67
68static void
69vect_free_slp_tree (slp_tree node)
70{
71 if (!node)
72 return;
73
74 if (SLP_TREE_LEFT (node))
75 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 76
ebfd146a
IR
77 if (SLP_TREE_RIGHT (node))
78 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 79
ebfd146a 80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 81
ebfd146a
IR
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84
85 free (node);
86}
87
88
89/* Free the memory allocated for the SLP instance. */
90
91void
92vect_free_slp_instance (slp_instance instance)
93{
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
97}
98
99
100/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
101 they are of a legal type and that they match the defs of the first stmt of
102 the SLP group (stored in FIRST_STMT_...). */
103
104static bool
a70d6342 105vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 106 slp_tree slp_node, gimple stmt,
a70d6342 107 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
108 VEC (gimple, heap) **def_stmts1,
109 enum vect_def_type *first_stmt_dt0,
110 enum vect_def_type *first_stmt_dt1,
b8698a0f 111 tree *first_stmt_def0_type,
ebfd146a
IR
112 tree *first_stmt_def1_type,
113 tree *first_stmt_const_oprnd,
114 int ncopies_for_cost,
115 bool *pattern0, bool *pattern1)
116{
117 tree oprnd;
118 unsigned int i, number_of_oprnds;
6aa904c4 119 tree def[2];
ebfd146a
IR
120 gimple def_stmt;
121 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 122 stmt_vec_info stmt_info =
ebfd146a
IR
123 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
124 enum gimple_rhs_class rhs_class;
a70d6342 125 struct loop *loop = NULL;
6aa904c4
IR
126 enum tree_code rhs_code;
127 bool different_types = false;
b8698a0f 128
a70d6342
IR
129 if (loop_vinfo)
130 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
131
132 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
133 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
134
135 for (i = 0; i < number_of_oprnds; i++)
136 {
137 oprnd = gimple_op (stmt, i + 1);
138
6aa904c4 139 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def[i],
a70d6342 140 &dt[i])
ebfd146a
IR
141 || (!def_stmt && dt[i] != vect_constant_def))
142 {
b8698a0f 143 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
144 {
145 fprintf (vect_dump, "Build SLP failed: can't find def for ");
146 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
147 }
148
149 return false;
150 }
151
a70d6342 152 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 153 from the pattern. Check that all the stmts of the node are in the
ebfd146a 154 pattern. */
a70d6342 155 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
156 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
157 && vinfo_for_stmt (def_stmt)
83197f37
IR
158 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
159 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
160 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a
IR
161 {
162 if (!*first_stmt_dt0)
163 *pattern0 = true;
164 else
165 {
166 if (i == 1 && !*first_stmt_dt1)
167 *pattern1 = true;
168 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
169 {
170 if (vect_print_dump_info (REPORT_DETAILS))
171 {
172 fprintf (vect_dump, "Build SLP failed: some of the stmts"
173 " are in a pattern, and others are not ");
174 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
175 }
176
177 return false;
178 }
179 }
180
181 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
182 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
183
184 if (*dt == vect_unknown_def_type)
185 {
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "Unsupported pattern.");
188 return false;
189 }
190
191 switch (gimple_code (def_stmt))
192 {
193 case GIMPLE_PHI:
6aa904c4 194 def[i] = gimple_phi_result (def_stmt);
ebfd146a
IR
195 break;
196
197 case GIMPLE_ASSIGN:
6aa904c4 198 def[i] = gimple_assign_lhs (def_stmt);
ebfd146a
IR
199 break;
200
201 default:
202 if (vect_print_dump_info (REPORT_DETAILS))
203 fprintf (vect_dump, "unsupported defining stmt: ");
204 return false;
205 }
206 }
207
208 if (!*first_stmt_dt0)
209 {
210 /* op0 of the first stmt of the group - store its info. */
211 *first_stmt_dt0 = dt[i];
6aa904c4
IR
212 if (def[i])
213 *first_stmt_def0_type = TREE_TYPE (def[i]);
ebfd146a
IR
214 else
215 *first_stmt_const_oprnd = oprnd;
216
217 /* Analyze costs (for the first stmt of the group only). */
218 if (rhs_class != GIMPLE_SINGLE_RHS)
219 /* Not memory operation (we don't call this functions for loads). */
220 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
221 else
222 /* Store. */
272c6793
RS
223 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
224 dt[0], slp_node);
ebfd146a 225 }
b8698a0f 226
ebfd146a
IR
227 else
228 {
229 if (!*first_stmt_dt1 && i == 1)
230 {
231 /* op1 of the first stmt of the group - store its info. */
232 *first_stmt_dt1 = dt[i];
6aa904c4
IR
233 if (def[i])
234 *first_stmt_def1_type = TREE_TYPE (def[i]);
ebfd146a
IR
235 else
236 {
b8698a0f 237 /* We assume that the stmt contains only one constant
ebfd146a
IR
238 operand. We fail otherwise, to be on the safe side. */
239 if (*first_stmt_const_oprnd)
240 {
b8698a0f 241 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 242 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 243 "oprnds in stmt");
ebfd146a
IR
244 return false;
245 }
246 *first_stmt_const_oprnd = oprnd;
247 }
248 }
249 else
250 {
b8698a0f 251 /* Not first stmt of the group, check that the def-stmt/s match
b010117a
IR
252 the def-stmt/s of the first stmt. Allow different definition
253 types for reduction chains: the first stmt must be a
254 vect_reduction_def (a phi node), and the rest
255 vect_internal_def. */
b8698a0f 256 if ((i == 0
b010117a
IR
257 && ((*first_stmt_dt0 != dt[i]
258 && !(*first_stmt_dt0 == vect_reduction_def
259 && dt[i] == vect_internal_def))
6aa904c4 260 || (*first_stmt_def0_type && def[0]
8533c9d8 261 && !types_compatible_p (*first_stmt_def0_type,
6aa904c4 262 TREE_TYPE (def[0])))))
b8698a0f 263 || (i == 1
b010117a
IR
264 && ((*first_stmt_dt1 != dt[i]
265 && !(*first_stmt_dt1 == vect_reduction_def
266 && dt[i] == vect_internal_def))
6aa904c4 267 || (*first_stmt_def1_type && def[1]
8533c9d8 268 && !types_compatible_p (*first_stmt_def1_type,
6aa904c4
IR
269 TREE_TYPE (def[1])))))
270 || (!def[i]
8533c9d8 271 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
6aa904c4
IR
272 TREE_TYPE (oprnd)))
273 || different_types)
b8698a0f 274 {
6aa904c4
IR
275 if (i != number_of_oprnds - 1)
276 different_types = true;
277 else
278 {
279 if (is_gimple_assign (stmt)
280 && (rhs_code = gimple_assign_rhs_code (stmt))
281 && TREE_CODE_CLASS (rhs_code) == tcc_binary
282 && commutative_tree_code (rhs_code)
283 && *first_stmt_dt0 == dt[1]
284 && *first_stmt_dt1 == dt[0]
285 && def[0] && def[1]
286 && !(*first_stmt_def0_type
287 && !types_compatible_p (*first_stmt_def0_type,
288 TREE_TYPE (def[1])))
289 && !(*first_stmt_def1_type
290 && !types_compatible_p (*first_stmt_def1_type,
291 TREE_TYPE (def[0]))))
292 {
293 if (vect_print_dump_info (REPORT_SLP))
294 {
295 fprintf (vect_dump, "Swapping operands of ");
296 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
297 }
298
299 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
300 gimple_assign_rhs2_ptr (stmt));
301 }
302 else
303 {
304 if (vect_print_dump_info (REPORT_SLP))
305 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 306
6aa904c4
IR
307 return false;
308 }
309 }
ebfd146a
IR
310 }
311 }
312 }
313
314 /* Check the types of the definitions. */
315 switch (dt[i])
316 {
317 case vect_constant_def:
8644a673 318 case vect_external_def:
ebfd146a 319 break;
b8698a0f 320
8644a673 321 case vect_internal_def:
b5aeb3bb 322 case vect_reduction_def:
6aa904c4 323 if ((i == 0 && !different_types) || (i == 1 && different_types))
ebfd146a
IR
324 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
325 else
6aa904c4 326 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
ebfd146a
IR
327 break;
328
329 default:
330 /* FORNOW: Not supported. */
b8698a0f 331 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
332 {
333 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
6aa904c4 334 print_generic_expr (vect_dump, def[i], TDF_SLIM);
ebfd146a
IR
335 }
336
337 return false;
338 }
339 }
340
341 return true;
342}
343
344
345/* Recursively build an SLP tree starting from NODE.
b8698a0f 346 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 347 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
348 TRUE. */
349
350static bool
b8698a0f 351vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
352 slp_tree *node, unsigned int group_size,
353 int *inside_cost, int *outside_cost,
354 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 355 VEC (int, heap) **load_permutation,
a70d6342 356 VEC (slp_tree, heap) **loads,
6aa904c4 357 unsigned int vectorization_factor, bool *loads_permuted)
ebfd146a
IR
358{
359 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
360 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
361 unsigned int i;
362 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
363 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
364 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
365 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
2200fc49 366 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
ebfd146a
IR
367 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
368 tree lhs;
369 bool stop_recursion = false, need_same_oprnds = false;
370 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 371 unsigned int ncopies;
ebfd146a
IR
372 optab optab;
373 int icode;
374 enum machine_mode optab_op2_mode;
375 enum machine_mode vec_mode;
376 tree first_stmt_const_oprnd = NULL_TREE;
377 struct data_reference *first_dr;
378 bool pattern0 = false, pattern1 = false;
379 HOST_WIDE_INT dummy;
380 bool permutation = false;
381 unsigned int load_place;
b5aeb3bb 382 gimple first_load, prev_first_load = NULL;
ebfd146a
IR
383
384 /* For every stmt in NODE find its def stmt/s. */
ac47786e 385 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
ebfd146a 386 {
b8698a0f 387 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
388 {
389 fprintf (vect_dump, "Build SLP for ");
390 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
391 }
392
4b5caab7
IR
393 /* Fail to vectorize statements marked as unvectorizable. */
394 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
395 {
396 if (vect_print_dump_info (REPORT_SLP))
397 {
398 fprintf (vect_dump,
399 "Build SLP failed: unvectorizable statement ");
400 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
401 }
402
403 return false;
404 }
405
ebfd146a
IR
406 lhs = gimple_get_lhs (stmt);
407 if (lhs == NULL_TREE)
408 {
b8698a0f 409 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
410 {
411 fprintf (vect_dump,
412 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
413 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
414 }
b8698a0f 415
ebfd146a
IR
416 return false;
417 }
418
b8698a0f 419 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
420 vectype = get_vectype_for_scalar_type (scalar_type);
421 if (!vectype)
422 {
423 if (vect_print_dump_info (REPORT_SLP))
424 {
425 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
426 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
427 }
428 return false;
429 }
b8698a0f 430
4ef69dfc
IR
431 /* In case of multiple types we need to detect the smallest type. */
432 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 433 {
4ef69dfc
IR
434 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
435 if (bb_vinfo)
436 vectorization_factor = *max_nunits;
a70d6342 437 }
b8698a0f 438
4ef69dfc 439 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 440
ebfd146a
IR
441 if (is_gimple_call (stmt))
442 rhs_code = CALL_EXPR;
443 else
444 rhs_code = gimple_assign_rhs_code (stmt);
445
446 /* Check the operation. */
447 if (i == 0)
448 {
449 first_stmt_code = rhs_code;
450
b8698a0f 451 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
452 vector shift with scalar shift operand. */
453 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
454 || rhs_code == LROTATE_EXPR
455 || rhs_code == RROTATE_EXPR)
456 {
457 vec_mode = TYPE_MODE (vectype);
458
459 /* First see if we have a vector/vector shift. */
460 optab = optab_for_tree_code (rhs_code, vectype,
461 optab_vector);
462
463 if (!optab
947131ba 464 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
465 {
466 /* No vector/vector shift, try for a vector/scalar shift. */
467 optab = optab_for_tree_code (rhs_code, vectype,
468 optab_scalar);
469
470 if (!optab)
471 {
472 if (vect_print_dump_info (REPORT_SLP))
473 fprintf (vect_dump, "Build SLP failed: no optab.");
474 return false;
475 }
947131ba 476 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
477 if (icode == CODE_FOR_nothing)
478 {
479 if (vect_print_dump_info (REPORT_SLP))
480 fprintf (vect_dump, "Build SLP failed: "
481 "op not supported by target.");
482 return false;
483 }
484 optab_op2_mode = insn_data[icode].operand[2].mode;
485 if (!VECTOR_MODE_P (optab_op2_mode))
486 {
487 need_same_oprnds = true;
488 first_op1 = gimple_assign_rhs2 (stmt);
489 }
490 }
491 }
36ba4aae
IR
492 else if (rhs_code == WIDEN_LSHIFT_EXPR)
493 {
494 need_same_oprnds = true;
495 first_op1 = gimple_assign_rhs2 (stmt);
496 }
ebfd146a
IR
497 }
498 else
499 {
500 if (first_stmt_code != rhs_code
501 && (first_stmt_code != IMAGPART_EXPR
502 || rhs_code != REALPART_EXPR)
503 && (first_stmt_code != REALPART_EXPR
69f11a13
IR
504 || rhs_code != IMAGPART_EXPR)
505 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
506 && (first_stmt_code == ARRAY_REF
507 || first_stmt_code == INDIRECT_REF
508 || first_stmt_code == COMPONENT_REF
509 || first_stmt_code == MEM_REF)))
ebfd146a 510 {
b8698a0f 511 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 512 {
b8698a0f 513 fprintf (vect_dump,
ebfd146a
IR
514 "Build SLP failed: different operation in stmt ");
515 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
516 }
b8698a0f 517
ebfd146a
IR
518 return false;
519 }
b8698a0f
L
520
521 if (need_same_oprnds
ebfd146a
IR
522 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
523 {
b8698a0f 524 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 525 {
b8698a0f 526 fprintf (vect_dump,
ebfd146a
IR
527 "Build SLP failed: different shift arguments in ");
528 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
529 }
b8698a0f 530
ebfd146a
IR
531 return false;
532 }
533 }
534
535 /* Strided store or load. */
536 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
537 {
538 if (REFERENCE_CLASS_P (lhs))
539 {
540 /* Store. */
b8698a0f
L
541 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
542 stmt, &def_stmts0, &def_stmts1,
543 &first_stmt_dt0,
544 &first_stmt_dt1,
545 &first_stmt_def0_type,
ebfd146a
IR
546 &first_stmt_def1_type,
547 &first_stmt_const_oprnd,
548 ncopies_for_cost,
549 &pattern0, &pattern1))
550 return false;
551 }
b5aeb3bb
IR
552 else
553 {
554 /* Load. */
555 /* FORNOW: Check that there is no gap between the loads. */
e14c1050
IR
556 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
557 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
558 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
559 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb
IR
560 {
561 if (vect_print_dump_info (REPORT_SLP))
562 {
563 fprintf (vect_dump, "Build SLP failed: strided "
564 "loads have gaps ");
565 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
566 }
b8698a0f 567
b5aeb3bb
IR
568 return false;
569 }
2f0fa28e 570
b5aeb3bb
IR
571 /* Check that the size of interleaved loads group is not
572 greater than the SLP group size. */
6aa904c4
IR
573 if (loop_vinfo
574 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
b5aeb3bb
IR
575 {
576 if (vect_print_dump_info (REPORT_SLP))
577 {
578 fprintf (vect_dump, "Build SLP failed: the number of "
579 "interleaved loads is greater than"
580 " the SLP group size ");
581 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
582 }
b8698a0f 583
b5aeb3bb
IR
584 return false;
585 }
586
e14c1050 587 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
588 if (prev_first_load)
589 {
590 /* Check that there are no loads from different interleaving
ff802fa1 591 chains in the same node. The only exception is complex
b5aeb3bb
IR
592 numbers. */
593 if (prev_first_load != first_load
594 && rhs_code != REALPART_EXPR
595 && rhs_code != IMAGPART_EXPR)
596 {
597 if (vect_print_dump_info (REPORT_SLP))
598 {
599 fprintf (vect_dump, "Build SLP failed: different "
600 "interleaving chains in one node ");
601 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
602 }
603
604 return false;
605 }
606 }
607 else
608 prev_first_load = first_load;
b8698a0f 609
ebfd146a
IR
610 if (first_load == stmt)
611 {
612 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 613 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
614 == dr_unaligned_unsupported)
615 {
616 if (vect_print_dump_info (REPORT_SLP))
617 {
618 fprintf (vect_dump, "Build SLP failed: unsupported "
619 "unaligned load ");
620 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
621 }
b8698a0f 622
ebfd146a
IR
623 return false;
624 }
b8698a0f 625
ebfd146a
IR
626 /* Analyze costs (for the first stmt in the group). */
627 vect_model_load_cost (vinfo_for_stmt (stmt),
272c6793 628 ncopies_for_cost, false, *node);
ebfd146a 629 }
b8698a0f 630
ff802fa1 631 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
632 case that permutation is needed we later decide if a specific
633 permutation is supported. */
634 load_place = vect_get_place_in_interleaving_chain (stmt,
635 first_load);
636 if (load_place != i)
637 permutation = true;
b8698a0f 638
ebfd146a 639 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 640
ebfd146a
IR
641 /* We stop the tree when we reach a group of loads. */
642 stop_recursion = true;
643 continue;
644 }
645 } /* Strided access. */
646 else
647 {
648 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
649 {
650 /* Not strided load. */
b8698a0f 651 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
652 {
653 fprintf (vect_dump, "Build SLP failed: not strided load ");
654 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
655 }
656
657 /* FORNOW: Not strided loads are not supported. */
658 return false;
659 }
660
661 /* Not memory operation. */
662 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
663 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
664 {
b8698a0f 665 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
666 {
667 fprintf (vect_dump, "Build SLP failed: operation");
668 fprintf (vect_dump, " unsupported ");
669 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
670 }
671
672 return false;
673 }
674
b8698a0f 675 /* Find the def-stmts. */
a70d6342 676 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 677 &def_stmts0, &def_stmts1,
b8698a0f
L
678 &first_stmt_dt0, &first_stmt_dt1,
679 &first_stmt_def0_type,
ebfd146a
IR
680 &first_stmt_def1_type,
681 &first_stmt_const_oprnd,
682 ncopies_for_cost,
683 &pattern0, &pattern1))
684 return false;
685 }
686 }
687
688 /* Add the costs of the node to the overall instance costs. */
b8698a0f 689 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
690 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
691
692 /* Strided loads were reached - stop the recursion. */
693 if (stop_recursion)
694 {
6aa904c4 695 VEC_safe_push (slp_tree, heap, *loads, *node);
ebfd146a
IR
696 if (permutation)
697 {
6aa904c4
IR
698
699 *loads_permuted = true;
35e1a5e7 700 *inside_cost
720f5239 701 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 702 * group_size;
ebfd146a 703 }
2200fc49 704 else
6aa904c4
IR
705 {
706 /* We don't check here complex numbers chains, so we set
707 LOADS_PERMUTED for further check in
708 vect_supported_load_permutation_p. */
2200fc49 709 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
6aa904c4 710 *loads_permuted = true;
2200fc49 711 }
ebfd146a
IR
712
713 return true;
714 }
715
b8698a0f 716 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 717 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
718 {
719 slp_tree left_node = XNEW (struct _slp_tree);
720 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
721 SLP_TREE_VEC_STMTS (left_node) = NULL;
722 SLP_TREE_LEFT (left_node) = NULL;
723 SLP_TREE_RIGHT (left_node) = NULL;
724 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
725 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
726 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
727 inside_cost, outside_cost, ncopies_for_cost,
a70d6342 728 max_nunits, load_permutation, loads,
6aa904c4 729 vectorization_factor, loads_permuted))
ebfd146a 730 return false;
b8698a0f 731
ebfd146a
IR
732 SLP_TREE_LEFT (*node) = left_node;
733 }
734
8644a673 735 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
736 {
737 slp_tree right_node = XNEW (struct _slp_tree);
738 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
739 SLP_TREE_VEC_STMTS (right_node) = NULL;
740 SLP_TREE_LEFT (right_node) = NULL;
741 SLP_TREE_RIGHT (right_node) = NULL;
742 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
743 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 744 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 745 inside_cost, outside_cost, ncopies_for_cost,
a70d6342 746 max_nunits, load_permutation, loads,
6aa904c4 747 vectorization_factor, loads_permuted))
ebfd146a 748 return false;
b8698a0f 749
ebfd146a
IR
750 SLP_TREE_RIGHT (*node) = right_node;
751 }
752
753 return true;
754}
755
756
757static void
758vect_print_slp_tree (slp_tree node)
759{
760 int i;
761 gimple stmt;
762
763 if (!node)
764 return;
765
766 fprintf (vect_dump, "node ");
ac47786e 767 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
768 {
769 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 770 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
771 }
772 fprintf (vect_dump, "\n");
773
774 vect_print_slp_tree (SLP_TREE_LEFT (node));
775 vect_print_slp_tree (SLP_TREE_RIGHT (node));
776}
777
778
b8698a0f
L
779/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
780 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 781 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
782 stmts in NODE are to be marked. */
783
784static void
785vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
786{
787 int i;
788 gimple stmt;
789
790 if (!node)
791 return;
792
ac47786e 793 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
794 if (j < 0 || i == j)
795 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
796
797 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
798 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
799}
800
801
a70d6342
IR
802/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
803
804static void
805vect_mark_slp_stmts_relevant (slp_tree node)
806{
807 int i;
808 gimple stmt;
809 stmt_vec_info stmt_info;
810
811 if (!node)
812 return;
813
ac47786e 814 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
815 {
816 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 817 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
818 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
819 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
820 }
821
822 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
823 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
824}
825
826
b8698a0f 827/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
828 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
829
830static bool
831vect_supported_slp_permutation_p (slp_instance instance)
832{
833 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
834 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
e14c1050 835 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a
IR
836 VEC (slp_tree, heap) *sorted_loads = NULL;
837 int index;
838 slp_tree *tmp_loads = NULL;
b8698a0f 839 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 840 slp_tree load;
b8698a0f
L
841
842 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 843 location in all the loads in the node, when the data-refs in
b8698a0f 844 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
845 Sort the nodes according to the order of accesses in the chain. */
846 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
847 for (i = 0, j = 0;
848 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
849 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
850 i += group_size, j++)
851 {
852 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
853 /* Check that the loads are all in the same interleaving chain. */
e14c1050 854 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
ebfd146a
IR
855 {
856 if (vect_print_dump_info (REPORT_DETAILS))
857 {
858 fprintf (vect_dump, "Build SLP failed: unsupported data "
859 "permutation ");
860 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
861 }
b8698a0f 862
ebfd146a 863 free (tmp_loads);
b8698a0f 864 return false;
ebfd146a
IR
865 }
866
867 tmp_loads[index] = load;
868 }
b8698a0f 869
ebfd146a
IR
870 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
871 for (i = 0; i < group_size; i++)
872 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
873
874 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
875 SLP_INSTANCE_LOADS (instance) = sorted_loads;
876 free (tmp_loads);
877
878 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
879 SLP_INSTANCE_UNROLLING_FACTOR (instance),
880 instance, true))
881 return false;
882
883 return true;
884}
885
886
b5aeb3bb
IR
887/* Rearrange the statements of NODE according to PERMUTATION. */
888
889static void
890vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
891 VEC (int, heap) *permutation)
892{
893 gimple stmt;
894 VEC (gimple, heap) *tmp_stmts;
895 unsigned int index, i;
896
897 if (!node)
898 return;
899
900 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
901 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
902
903 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
904 tmp_stmts = VEC_alloc (gimple, heap, group_size);
905
906 for (i = 0; i < group_size; i++)
907 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
908
ac47786e 909 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb
IR
910 {
911 index = VEC_index (int, permutation, i);
912 VEC_replace (gimple, tmp_stmts, index, stmt);
913 }
914
915 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
916 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
917}
918
919
ebfd146a
IR
920/* Check if the required load permutation is supported.
921 LOAD_PERMUTATION contains a list of indices of the loads.
922 In SLP this permutation is relative to the order of strided stores that are
923 the base of the SLP instance. */
924
925static bool
926vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
927 VEC (int, heap) *load_permutation)
928{
b5aeb3bb
IR
929 int i = 0, j, prev = -1, next, k, number_of_groups;
930 bool supported, bad_permutation = false;
7417f6c0 931 sbitmap load_index;
2200fc49 932 slp_tree node, other_complex_node;
6aa904c4 933 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
2200fc49 934 unsigned complex_numbers = 0;
6aa904c4
IR
935 struct data_reference *dr;
936 bb_vec_info bb_vinfo;
ebfd146a 937
a70d6342 938 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
939 if (!slp_instn)
940 return false;
941
942 if (vect_print_dump_info (REPORT_SLP))
943 {
944 fprintf (vect_dump, "Load permutation ");
ac47786e 945 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
ebfd146a
IR
946 fprintf (vect_dump, "%d ", next);
947 }
948
b5aeb3bb
IR
949 /* In case of reduction every load permutation is allowed, since the order
950 of the reduction statements is not important (as opposed to the case of
ff802fa1 951 strided stores). The only condition we need to check is that all the
b5aeb3bb
IR
952 load nodes are of the same size and have the same permutation (and then
953 rearrange all the nodes of the SLP instance according to this
954 permutation). */
955
956 /* Check that all the load nodes are of the same size. */
ac47786e 957 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49
IR
958 {
959 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
960 != (unsigned) group_size)
961 return false;
962
963 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
964 if (is_gimple_assign (stmt)
965 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
966 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
967 complex_numbers++;
968 }
969
970 /* Complex operands can be swapped as following:
971 real_c = real_b + real_a;
972 imag_c = imag_a + imag_b;
973 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 974 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
975 chains are mixed, they match the above pattern. */
976 if (complex_numbers)
977 {
ac47786e 978 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 979 {
ac47786e 980 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
981 {
982 if (j == 0)
983 first = stmt;
984 else
985 {
e14c1050 986 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
2200fc49
IR
987 {
988 if (complex_numbers != 2)
989 return false;
990
991 if (i == 0)
992 k = 1;
993 else
994 k = 0;
995
996 other_complex_node = VEC_index (slp_tree,
997 SLP_INSTANCE_LOADS (slp_instn), k);
998 other_node_first = VEC_index (gimple,
999 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1000
e14c1050 1001 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2200fc49
IR
1002 != other_node_first)
1003 return false;
1004 }
1005 }
1006 }
1007 }
1008 }
1009
1010 /* We checked that this case ok, so there is no need to proceed with
1011 permutation tests. */
1012 if (complex_numbers == 2)
1013 {
1014 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1015 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1016 return true;
1017 }
1018
b5aeb3bb
IR
1019 node = SLP_INSTANCE_TREE (slp_instn);
1020 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1021 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1022 instance, not all the loads belong to the same node or interleaving
ff802fa1 1023 group. Hence, we need to divide them into groups according to
b5aeb3bb
IR
1024 GROUP_SIZE. */
1025 number_of_groups = VEC_length (int, load_permutation) / group_size;
1026
b010117a
IR
1027 /* Reduction (there are no data-refs in the root).
1028 In reduction chain the order of the loads is important. */
1029 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1030 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb
IR
1031 {
1032 int first_group_load_index;
1033
1034 /* Compare all the permutation sequences to the first one. */
1035 for (i = 1; i < number_of_groups; i++)
1036 {
1037 k = 0;
1038 for (j = i * group_size; j < i * group_size + group_size; j++)
1039 {
1040 next = VEC_index (int, load_permutation, j);
1041 first_group_load_index = VEC_index (int, load_permutation, k);
1042
1043 if (next != first_group_load_index)
1044 {
1045 bad_permutation = true;
1046 break;
1047 }
1048
1049 k++;
1050 }
1051
1052 if (bad_permutation)
1053 break;
1054 }
1055
1056 if (!bad_permutation)
1057 {
c9c1e775
IR
1058 /* Check that the loads in the first sequence are different and there
1059 are no gaps between them. */
1060 load_index = sbitmap_alloc (group_size);
1061 sbitmap_zero (load_index);
1062 for (k = 0; k < group_size; k++)
1063 {
1064 first_group_load_index = VEC_index (int, load_permutation, k);
1065 if (TEST_BIT (load_index, first_group_load_index))
1066 {
1067 bad_permutation = true;
1068 break;
1069 }
1070
1071 SET_BIT (load_index, first_group_load_index);
1072 }
1073
1074 if (!bad_permutation)
1075 for (k = 0; k < group_size; k++)
1076 if (!TEST_BIT (load_index, k))
1077 {
1078 bad_permutation = true;
1079 break;
1080 }
1081
1082 sbitmap_free (load_index);
1083 }
1084
1085 if (!bad_permutation)
1086 {
1087 /* This permutation is valid for reduction. Since the order of the
b5aeb3bb
IR
1088 statements in the nodes is not important unless they are memory
1089 accesses, we can rearrange the statements in all the nodes
1090 according to the order of the loads. */
1091 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1092 load_permutation);
1093 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1094 return true;
1095 }
1096 }
1097
6aa904c4
IR
1098 /* In basic block vectorization we allow any subchain of an interleaving
1099 chain.
1100 FORNOW: not supported in loop SLP because of realignment compications. */
1101 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1102 bad_permutation = false;
1103 /* Check that for every node in the instance teh loads form a subchain. */
1104 if (bb_vinfo)
1105 {
1106 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1107 {
1108 next_load = NULL;
1109 first_load = NULL;
1110 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1111 {
1112 if (!first_load)
1113 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1114 else if (first_load
1115 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1116 {
1117 bad_permutation = true;
1118 break;
1119 }
1120
1121 if (j != 0 && next_load != load)
1122 {
1123 bad_permutation = true;
1124 break;
1125 }
1126
1127 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1128 }
1129
1130 if (bad_permutation)
1131 break;
1132 }
1133
1134 /* Check that the alignment of the first load in every subchain, i.e.,
1135 the first statement in every load node, is supported. */
1136 if (!bad_permutation)
1137 {
1138 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1139 {
1140 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1141 if (first_load
1142 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1143 {
1144 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1145 if (vect_supportable_dr_alignment (dr, false)
1146 == dr_unaligned_unsupported)
1147 {
1148 if (vect_print_dump_info (REPORT_SLP))
1149 {
1150 fprintf (vect_dump, "unsupported unaligned load ");
1151 print_gimple_stmt (vect_dump, first_load, 0,
1152 TDF_SLIM);
1153 }
1154 bad_permutation = true;
1155 break;
1156 }
1157 }
1158 }
1159
1160 if (!bad_permutation)
1161 {
1162 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1163 return true;
1164 }
1165 }
1166 }
1167
b8698a0f
L
1168 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1169 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1170 well (unless it's reduction). */
ebfd146a
IR
1171 if (VEC_length (int, load_permutation)
1172 != (unsigned int) (group_size * group_size))
1173 return false;
1174
1175 supported = true;
7417f6c0
IR
1176 load_index = sbitmap_alloc (group_size);
1177 sbitmap_zero (load_index);
ebfd146a
IR
1178 for (j = 0; j < group_size; j++)
1179 {
1180 for (i = j * group_size, k = 0;
1181 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1182 i++, k++)
1183 {
1184 if (i != j * group_size && next != prev)
1185 {
1186 supported = false;
1187 break;
1188 }
1189
1190 prev = next;
b8698a0f 1191 }
7417f6c0
IR
1192
1193 if (TEST_BIT (load_index, prev))
1194 {
1195 supported = false;
1196 break;
1197 }
1198
1199 SET_BIT (load_index, prev);
ebfd146a 1200 }
59eefaa6
IR
1201
1202 for (j = 0; j < group_size; j++)
1203 if (!TEST_BIT (load_index, j))
1204 return false;
1205
7417f6c0 1206 sbitmap_free (load_index);
ebfd146a
IR
1207
1208 if (supported && i == group_size * group_size
1209 && vect_supported_slp_permutation_p (slp_instn))
1210 return true;
1211
b8698a0f 1212 return false;
ebfd146a
IR
1213}
1214
1215
b8698a0f 1216/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1217 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1218 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1219 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1220 they must be inserted before the first load of the SLP instance and not
1221 before the first load of the first node of the instance. */
ff802fa1 1222
b8698a0f
L
1223static gimple
1224vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1225{
1226 int i, j;
1227 slp_tree load_node;
1228 gimple first_load = NULL, load;
1229
ac47786e
NF
1230 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1231 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1232 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1233
ebfd146a
IR
1234 return first_load;
1235}
1236
1237
e4a707c4 1238/* Find the last store in SLP INSTANCE. */
ff802fa1 1239
e4a707c4
IR
1240static gimple
1241vect_find_last_store_in_slp_instance (slp_instance instance)
1242{
1243 int i;
1244 slp_tree node;
1245 gimple last_store = NULL, store;
1246
1247 node = SLP_INSTANCE_TREE (instance);
1248 for (i = 0;
1249 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1250 i++)
1251 last_store = get_later_stmt (store, last_store);
1252
1253 return last_store;
1254}
1255
1256
ff802fa1 1257/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1258 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1259 Return FALSE if it's impossible to SLP any stmt in the loop. */
1260
1261static bool
a70d6342
IR
1262vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1263 gimple stmt)
ebfd146a
IR
1264{
1265 slp_instance new_instance;
1266 slp_tree node = XNEW (struct _slp_tree);
e14c1050 1267 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1268 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1269 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1270 gimple next;
0f900dfa 1271 unsigned int vectorization_factor = 0;
b5aeb3bb 1272 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1273 unsigned int max_nunits = 0;
1274 VEC (int, heap) *load_permutation;
1275 VEC (slp_tree, heap) *loads;
b5aeb3bb 1276 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
6aa904c4 1277 bool loads_permuted = false;
b5aeb3bb 1278
b010117a 1279 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1280 {
b010117a
IR
1281 if (dr)
1282 {
1283 scalar_type = TREE_TYPE (DR_REF (dr));
1284 vectype = get_vectype_for_scalar_type (scalar_type);
1285 }
1286 else
1287 {
1288 gcc_assert (loop_vinfo);
1289 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1290 }
1291
e14c1050 1292 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1293 }
1294 else
1295 {
1296 gcc_assert (loop_vinfo);
1297 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1298 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1299 }
b8698a0f 1300
ebfd146a
IR
1301 if (!vectype)
1302 {
1303 if (vect_print_dump_info (REPORT_SLP))
1304 {
1305 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1306 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1307 }
b5aeb3bb 1308
ebfd146a
IR
1309 return false;
1310 }
1311
1312 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1313 if (loop_vinfo)
1314 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1315 else
a70d6342
IR
1316 vectorization_factor = nunits;
1317
a70d6342
IR
1318 /* Calculate the unrolling factor. */
1319 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1320 if (unrolling_factor != 1 && !loop_vinfo)
1321 {
1322 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1323 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1324 " block SLP");
b8698a0f 1325
a70d6342
IR
1326 return false;
1327 }
1328
b8698a0f 1329 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
1330 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1331 next = stmt;
b010117a 1332 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1333 {
b5aeb3bb
IR
1334 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1335 while (next)
1336 {
1337 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
e14c1050 1338 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1339 }
1340 }
1341 else
1342 {
1343 /* Collect reduction statements. */
1344 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1345 next);
1346 i++)
b010117a 1347 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
ebfd146a
IR
1348 }
1349
1350 SLP_TREE_VEC_STMTS (node) = NULL;
1351 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1352 SLP_TREE_LEFT (node) = NULL;
1353 SLP_TREE_RIGHT (node) = NULL;
1354 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1355 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1356
ebfd146a
IR
1357 /* Calculate the number of vector stmts to create based on the unrolling
1358 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1359 GROUP_SIZE / NUNITS otherwise. */
1360 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1361
1362 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1363 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1364
1365 /* Build the tree for the SLP instance. */
b8698a0f
L
1366 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1367 &inside_cost, &outside_cost, ncopies_for_cost,
1368 &max_nunits, &load_permutation, &loads,
6aa904c4 1369 vectorization_factor, &loads_permuted))
ebfd146a 1370 {
4ef69dfc 1371 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1372 if (max_nunits > nunits)
1373 unrolling_factor = least_common_multiple (max_nunits, group_size)
1374 / group_size;
b8698a0f 1375
4ef69dfc
IR
1376 if (unrolling_factor != 1 && !loop_vinfo)
1377 {
1378 if (vect_print_dump_info (REPORT_SLP))
1379 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1380 " block SLP");
1381 return false;
1382 }
1383
1384 /* Create a new SLP instance. */
1385 new_instance = XNEW (struct _slp_instance);
1386 SLP_INSTANCE_TREE (new_instance) = node;
1387 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a
IR
1388 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1389 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1390 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1391 SLP_INSTANCE_LOADS (new_instance) = loads;
1392 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1393 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
6aa904c4
IR
1394
1395 if (loads_permuted)
ebfd146a
IR
1396 {
1397 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1398 load_permutation))
ebfd146a
IR
1399 {
1400 if (vect_print_dump_info (REPORT_SLP))
1401 {
1402 fprintf (vect_dump, "Build SLP failed: unsupported load "
1403 "permutation ");
1404 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1405 }
1406
1407 vect_free_slp_instance (new_instance);
1408 return false;
1409 }
1410
1411 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1412 = vect_find_first_load_in_slp_instance (new_instance);
1413 }
1414 else
1415 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1416
a70d6342 1417 if (loop_vinfo)
b8698a0f
L
1418 VEC_safe_push (slp_instance, heap,
1419 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1420 new_instance);
1421 else
1422 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1423 new_instance);
b8698a0f 1424
ebfd146a
IR
1425 if (vect_print_dump_info (REPORT_SLP))
1426 vect_print_slp_tree (node);
1427
1428 return true;
1429 }
1430
1431 /* Failed to SLP. */
1432 /* Free the allocated memory. */
1433 vect_free_slp_tree (node);
1434 VEC_free (int, heap, load_permutation);
1435 VEC_free (slp_tree, heap, loads);
b8698a0f 1436
a70d6342 1437 return false;
ebfd146a
IR
1438}
1439
1440
ff802fa1 1441/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1442 trees of packed scalar stmts if SLP is possible. */
1443
1444bool
a70d6342 1445vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1446{
1447 unsigned int i;
b010117a
IR
1448 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1449 gimple first_element;
a70d6342 1450 bool ok = false;
ebfd146a
IR
1451
1452 if (vect_print_dump_info (REPORT_SLP))
1453 fprintf (vect_dump, "=== vect_analyze_slp ===");
1454
a70d6342 1455 if (loop_vinfo)
b5aeb3bb
IR
1456 {
1457 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
b010117a 1458 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1459 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1460 }
a70d6342
IR
1461 else
1462 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1463
b5aeb3bb 1464 /* Find SLP sequences starting from groups of strided stores. */
b010117a
IR
1465 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1466 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
a70d6342 1467 ok = true;
ebfd146a 1468
b8698a0f 1469 if (bb_vinfo && !ok)
a70d6342
IR
1470 {
1471 if (vect_print_dump_info (REPORT_SLP))
1472 fprintf (vect_dump, "Failed to SLP the basic block.");
1473
1474 return false;
1475 }
ebfd146a 1476
b010117a
IR
1477 if (loop_vinfo
1478 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1479 {
1480 /* Find SLP sequences starting from reduction chains. */
1481 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1482 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1483 ok = true;
1484 else
1485 return false;
1486
1487 /* Don't try to vectorize SLP reductions if reduction chain was
1488 detected. */
1489 return ok;
1490 }
1491
b5aeb3bb 1492 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1493 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1494 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1495 VEC_index (gimple, reductions, 0)))
1496 ok = true;
1497
ebfd146a
IR
1498 return true;
1499}
1500
1501
1502/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1503 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1504 least one instance. */
ebfd146a 1505
437f4a00 1506bool
ebfd146a
IR
1507vect_make_slp_decision (loop_vec_info loop_vinfo)
1508{
1509 unsigned int i, unrolling_factor = 1;
1510 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1511 slp_instance instance;
1512 int decided_to_slp = 0;
1513
1514 if (vect_print_dump_info (REPORT_SLP))
1515 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1516
ac47786e 1517 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1518 {
1519 /* FORNOW: SLP if you can. */
1520 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1521 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1522
ff802fa1 1523 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1524 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1525 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1526 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1527 decided_to_slp++;
1528 }
1529
1530 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1531
b8698a0f
L
1532 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1533 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a 1534 decided_to_slp, unrolling_factor);
437f4a00
IR
1535
1536 return (decided_to_slp > 0);
ebfd146a
IR
1537}
1538
1539
1540/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1541 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1542
1543static void
1544vect_detect_hybrid_slp_stmts (slp_tree node)
1545{
1546 int i;
1547 gimple stmt;
1548 imm_use_iterator imm_iter;
1549 gimple use_stmt;
99f51320 1550 stmt_vec_info stmt_vinfo;
ebfd146a
IR
1551
1552 if (!node)
1553 return;
1554
ac47786e 1555 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1556 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1557 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1558 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1559 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1560 && !STMT_SLP_TYPE (stmt_vinfo)
1561 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1562 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1563 && !(gimple_code (use_stmt) == GIMPLE_PHI
1564 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1565 == vect_reduction_def))
ebfd146a
IR
1566 vect_mark_slp_stmts (node, hybrid, i);
1567
1568 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1569 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1570}
1571
1572
1573/* Find stmts that must be both vectorized and SLPed. */
1574
1575void
1576vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1577{
1578 unsigned int i;
1579 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1580 slp_instance instance;
1581
1582 if (vect_print_dump_info (REPORT_SLP))
1583 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1584
ac47786e 1585 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1586 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1587}
1588
a70d6342
IR
1589
1590/* Create and initialize a new bb_vec_info struct for BB, as well as
1591 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1592
a70d6342
IR
1593static bb_vec_info
1594new_bb_vec_info (basic_block bb)
1595{
1596 bb_vec_info res = NULL;
1597 gimple_stmt_iterator gsi;
1598
1599 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1600 BB_VINFO_BB (res) = bb;
1601
1602 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1603 {
1604 gimple stmt = gsi_stmt (gsi);
1605 gimple_set_uid (stmt, 0);
1606 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1607 }
1608
1609 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1610 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1611
1612 bb->aux = res;
1613 return res;
1614}
1615
1616
1617/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1618 stmts in the basic block. */
1619
1620static void
1621destroy_bb_vec_info (bb_vec_info bb_vinfo)
1622{
1623 basic_block bb;
1624 gimple_stmt_iterator si;
1625
1626 if (!bb_vinfo)
1627 return;
1628
1629 bb = BB_VINFO_BB (bb_vinfo);
1630
1631 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1632 {
1633 gimple stmt = gsi_stmt (si);
1634 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1635
1636 if (stmt_info)
1637 /* Free stmt_vec_info. */
1638 free_stmt_vec_info (stmt);
1639 }
1640
01be8516
SP
1641 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1642 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
a70d6342
IR
1643 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1644 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1645 free (bb_vinfo);
1646 bb->aux = NULL;
1647}
1648
1649
1650/* Analyze statements contained in SLP tree node after recursively analyzing
1651 the subtree. Return TRUE if the operations are supported. */
1652
1653static bool
1654vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1655{
1656 bool dummy;
1657 int i;
1658 gimple stmt;
1659
1660 if (!node)
1661 return true;
1662
1663 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1664 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1665 return false;
1666
ac47786e 1667 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1668 {
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 gcc_assert (stmt_info);
1671 gcc_assert (PURE_SLP_STMT (stmt_info));
1672
1673 if (!vect_analyze_stmt (stmt, &dummy, node))
1674 return false;
1675 }
1676
1677 return true;
1678}
1679
1680
ff802fa1 1681/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1682 operations are supported. */
1683
1684static bool
1685vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1686{
1687 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1688 slp_instance instance;
1689 int i;
1690
1691 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1692 {
b8698a0f 1693 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1694 SLP_INSTANCE_TREE (instance)))
1695 {
1696 vect_free_slp_instance (instance);
1697 VEC_ordered_remove (slp_instance, slp_instances, i);
1698 }
1699 else
1700 i++;
b8698a0f
L
1701 }
1702
a70d6342
IR
1703 if (!VEC_length (slp_instance, slp_instances))
1704 return false;
1705
1706 return true;
1707}
1708
69f11a13
IR
1709/* Check if vectorization of the basic block is profitable. */
1710
1711static bool
1712vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1713{
1714 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1715 slp_instance instance;
1716 int i;
1717 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1718 unsigned int stmt_cost;
1719 gimple stmt;
1720 gimple_stmt_iterator si;
1721 basic_block bb = BB_VINFO_BB (bb_vinfo);
1722 stmt_vec_info stmt_info = NULL;
1723 tree dummy_type = NULL;
1724 int dummy = 0;
1725
1726 /* Calculate vector costs. */
ac47786e 1727 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
69f11a13
IR
1728 {
1729 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1730 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1731 }
1732
1733 /* Calculate scalar cost. */
1734 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1735 {
1736 stmt = gsi_stmt (si);
1737 stmt_info = vinfo_for_stmt (stmt);
1738
1739 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1740 || !PURE_SLP_STMT (stmt_info))
1741 continue;
1742
1743 if (STMT_VINFO_DATA_REF (stmt_info))
1744 {
1745 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1746 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1747 (scalar_load, dummy_type, dummy);
1748 else
1749 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1750 (scalar_store, dummy_type, dummy);
1751 }
1752 else
1753 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1754 (scalar_stmt, dummy_type, dummy);
1755
1756 scalar_cost += stmt_cost;
1757 }
1758
1759 if (vect_print_dump_info (REPORT_COST))
1760 {
1761 fprintf (vect_dump, "Cost model analysis: \n");
1762 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1763 vec_inside_cost);
1764 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1765 vec_outside_cost);
1766 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1767 }
1768
1769 /* Vectorization is profitable if its cost is less than the cost of scalar
1770 version. */
1771 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1772 return false;
1773
1774 return true;
1775}
1776
1777/* Check if the basic block can be vectorized. */
a70d6342 1778
8e19f5a1
IR
1779static bb_vec_info
1780vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
1781{
1782 bb_vec_info bb_vinfo;
1783 VEC (ddr_p, heap) *ddrs;
1784 VEC (slp_instance, heap) *slp_instances;
1785 slp_instance instance;
8e19f5a1 1786 int i;
777e1f09
RG
1787 int min_vf = 2;
1788 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4 1789
a70d6342
IR
1790 bb_vinfo = new_bb_vec_info (bb);
1791 if (!bb_vinfo)
1792 return NULL;
1793
777e1f09 1794 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1795 {
1796 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1797 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1798 "block.\n");
b8698a0f 1799
a70d6342
IR
1800 destroy_bb_vec_info (bb_vinfo);
1801 return NULL;
1802 }
1803
1804 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1805 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1806 {
1807 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1808 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1809 "block.\n");
1810
1811 destroy_bb_vec_info (bb_vinfo);
1812 return NULL;
1813 }
1814
5bfdb7d8
IR
1815 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1816 || min_vf > max_vf)
777e1f09
RG
1817 {
1818 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1819 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1820 "in basic block.\n");
1821
1822 destroy_bb_vec_info (bb_vinfo);
1823 return NULL;
1824 }
1825
a70d6342
IR
1826 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1827 {
1828 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1829 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1830 "block.\n");
b8698a0f 1831
a70d6342
IR
1832 destroy_bb_vec_info (bb_vinfo);
1833 return NULL;
1834 }
b8698a0f 1835
a70d6342
IR
1836 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1837 {
1838 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1839 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1840 "block.\n");
b8698a0f 1841
a70d6342
IR
1842 destroy_bb_vec_info (bb_vinfo);
1843 return NULL;
1844 }
1845
1846 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1847 {
1848 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1849 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1850 "block.\n");
1851
1852 destroy_bb_vec_info (bb_vinfo);
1853 return NULL;
1854 }
1855
1856 /* Check the SLP opportunities in the basic block, analyze and build SLP
1857 trees. */
1858 if (!vect_analyze_slp (NULL, bb_vinfo))
1859 {
1860 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1861 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1862 "in basic block.\n");
1863
1864 destroy_bb_vec_info (bb_vinfo);
1865 return NULL;
1866 }
b8698a0f 1867
a70d6342
IR
1868 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1869
1870 /* Mark all the statements that we want to vectorize as pure SLP and
1871 relevant. */
ac47786e 1872 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
a70d6342
IR
1873 {
1874 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1875 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1876 }
a70d6342
IR
1877
1878 if (!vect_slp_analyze_operations (bb_vinfo))
1879 {
1880 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1881 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1882
1883 destroy_bb_vec_info (bb_vinfo);
1884 return NULL;
1885 }
1886
69f11a13
IR
1887 /* Cost model: check if the vectorization is worthwhile. */
1888 if (flag_vect_cost_model
1889 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1890 {
1891 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1892 fprintf (vect_dump, "not vectorized: vectorization is not "
1893 "profitable.\n");
1894
1895 destroy_bb_vec_info (bb_vinfo);
1896 return NULL;
1897 }
1898
a70d6342 1899 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1900 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1901
1902 return bb_vinfo;
1903}
1904
1905
8e19f5a1
IR
1906bb_vec_info
1907vect_slp_analyze_bb (basic_block bb)
1908{
1909 bb_vec_info bb_vinfo;
1910 int insns = 0;
1911 gimple_stmt_iterator gsi;
1912 unsigned int vector_sizes;
1913
1914 if (vect_print_dump_info (REPORT_DETAILS))
1915 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1916
1917 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1918 {
1919 gimple stmt = gsi_stmt (gsi);
1920 if (!is_gimple_debug (stmt)
1921 && !gimple_nop_p (stmt)
1922 && gimple_code (stmt) != GIMPLE_LABEL)
1923 insns++;
1924 }
1925
1926 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1927 {
1928 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1929 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1930 "block.\n");
1931
1932 return NULL;
1933 }
1934
1935 /* Autodetect first vector size we try. */
1936 current_vector_size = 0;
1937 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1938
1939 while (1)
1940 {
1941 bb_vinfo = vect_slp_analyze_bb_1 (bb);
1942 if (bb_vinfo)
1943 return bb_vinfo;
1944
1945 destroy_bb_vec_info (bb_vinfo);
1946
1947 vector_sizes &= ~current_vector_size;
1948 if (vector_sizes == 0
1949 || current_vector_size == 0)
1950 return NULL;
1951
1952 /* Try the next biggest vector size. */
1953 current_vector_size = 1 << floor_log2 (vector_sizes);
1954 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "***** Re-trying analysis with "
1956 "vector size %d\n", current_vector_size);
1957 }
1958}
1959
1960
b8698a0f 1961/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
1962 the number of created vector stmts depends on the unrolling factor).
1963 However, the actual number of vector stmts for every SLP node depends on
1964 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1965 should be updated. In this function we assume that the inside costs
1966 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
1967
1968void
1969vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1970{
1971 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1972 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1973 slp_instance instance;
1974
1975 if (vect_print_dump_info (REPORT_SLP))
1976 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1977
ac47786e 1978 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a 1979 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1980 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1981 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1982}
1983
a70d6342 1984
b8698a0f
L
1985/* For constant and loop invariant defs of SLP_NODE this function returns
1986 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
1987 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1988 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
1989 REDUC_INDEX is the index of the reduction operand in the statements, unless
1990 it is -1. */
ebfd146a
IR
1991
1992static void
9dc3f7de
IR
1993vect_get_constant_vectors (tree op, slp_tree slp_node,
1994 VEC (tree, heap) **vec_oprnds,
b5aeb3bb
IR
1995 unsigned int op_num, unsigned int number_of_vectors,
1996 int reduc_index)
ebfd146a
IR
1997{
1998 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1999 gimple stmt = VEC_index (gimple, stmts, 0);
2000 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
2001 int nunits;
2002 tree vec_cst;
2003 tree t = NULL_TREE;
2004 int j, number_of_places_left_in_vector;
2005 tree vector_type;
9dc3f7de 2006 tree vop;
ebfd146a
IR
2007 int group_size = VEC_length (gimple, stmts);
2008 unsigned int vec_num, i;
2009 int number_of_copies = 1;
2010 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2011 bool constant_p, is_store;
b5aeb3bb 2012 tree neutral_op = NULL;
d59dc888 2013 enum tree_code code = gimple_assign_rhs_code (stmt);
0e93a64e
IR
2014 gimple def_stmt;
2015 struct loop *loop;
b5aeb3bb 2016
29ed4920
IR
2017 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2018 && reduc_index != -1)
b5aeb3bb 2019 {
b5aeb3bb 2020 op_num = reduc_index - 1;
9dc3f7de 2021 op = gimple_op (stmt, reduc_index);
b5aeb3bb 2022 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2023 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2024 get_initial_def_for_reduction() for details. */
2025 switch (code)
2026 {
2027 case WIDEN_SUM_EXPR:
2028 case DOT_PROD_EXPR:
2029 case PLUS_EXPR:
2030 case MINUS_EXPR:
2031 case BIT_IOR_EXPR:
2032 case BIT_XOR_EXPR:
2033 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2034 neutral_op = build_real (TREE_TYPE (op), dconst0);
2035 else
2036 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2037
2038 break;
2039
2040 case MULT_EXPR:
b5aeb3bb
IR
2041 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2042 neutral_op = build_real (TREE_TYPE (op), dconst1);
2043 else
2044 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2045
2046 break;
2047
c1e822d5
IR
2048 case BIT_AND_EXPR:
2049 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2050 break;
2051
0e93a64e
IR
2052 case MAX_EXPR:
2053 case MIN_EXPR:
2054 def_stmt = SSA_NAME_DEF_STMT (op);
2055 loop = (gimple_bb (stmt))->loop_father;
2056 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2057 loop_preheader_edge (loop));
2058 break;
2059
b5aeb3bb 2060 default:
0e93a64e 2061 neutral_op = NULL;
b5aeb3bb
IR
2062 }
2063 }
ebfd146a
IR
2064
2065 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2066 {
2067 is_store = true;
2068 op = gimple_assign_rhs1 (stmt);
2069 }
2070 else
9dc3f7de
IR
2071 is_store = false;
2072
2073 gcc_assert (op);
ebfd146a
IR
2074
2075 if (CONSTANT_CLASS_P (op))
d59dc888 2076 constant_p = true;
ebfd146a 2077 else
d59dc888
IR
2078 constant_p = false;
2079
9dc3f7de 2080 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 2081 gcc_assert (vector_type);
ebfd146a
IR
2082 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2083
2084 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2085 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2086
2087 For example, we have two scalar operands, s1 and s2 (e.g., group of
2088 strided accesses of size two), while NUNITS is four (i.e., four scalars
2089 of this type can be packed in a vector). The output vector will contain
2090 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2091 will be 2).
2092
b8698a0f 2093 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2094 containing the operands.
2095
2096 For example, NUNITS is four as before, and the group size is 8
2097 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2098 {s5, s6, s7, s8}. */
b8698a0f 2099
ebfd146a
IR
2100 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2101
2102 number_of_places_left_in_vector = nunits;
2103 for (j = 0; j < number_of_copies; j++)
2104 {
2105 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2106 {
2107 if (is_store)
2108 op = gimple_assign_rhs1 (stmt);
2109 else
2110 op = gimple_op (stmt, op_num + 1);
b8698a0f 2111
b5aeb3bb
IR
2112 if (reduc_index != -1)
2113 {
0e93a64e
IR
2114 loop = (gimple_bb (stmt))->loop_father;
2115 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2116
2117 gcc_assert (loop);
b010117a
IR
2118
2119 /* Get the def before the loop. In reduction chain we have only
2120 one initial value. */
2121 if ((j != (number_of_copies - 1)
2122 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2123 && i != 0))
2124 && neutral_op)
b5aeb3bb 2125 op = neutral_op;
b010117a
IR
2126 else
2127 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2128 loop_preheader_edge (loop));
b5aeb3bb
IR
2129 }
2130
ebfd146a
IR
2131 /* Create 'vect_ = {op0,op1,...,opn}'. */
2132 t = tree_cons (NULL_TREE, op, t);
2133
2134 number_of_places_left_in_vector--;
2135
2136 if (number_of_places_left_in_vector == 0)
2137 {
2138 number_of_places_left_in_vector = nunits;
2139
2140 if (constant_p)
2141 vec_cst = build_vector (vector_type, t);
2142 else
2143 vec_cst = build_constructor_from_list (vector_type, t);
2144 VEC_quick_push (tree, voprnds,
2145 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2146 t = NULL_TREE;
2147 }
2148 }
2149 }
2150
b8698a0f 2151 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
2152 them. */
2153 vec_num = VEC_length (tree, voprnds);
2154 for (j = vec_num - 1; j >= 0; j--)
2155 {
2156 vop = VEC_index (tree, voprnds, j);
2157 VEC_quick_push (tree, *vec_oprnds, vop);
2158 }
2159
2160 VEC_free (tree, heap, voprnds);
2161
2162 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2163 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2164 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
2165 to replicate the vectors. */
2166 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2167 {
b5aeb3bb
IR
2168 tree neutral_vec = NULL;
2169
2170 if (neutral_op)
2171 {
2172 if (!neutral_vec)
b9acc9f1 2173 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb
IR
2174
2175 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2176 }
2177 else
2178 {
2179 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2180 VEC_quick_push (tree, *vec_oprnds, vop);
2181 }
ebfd146a
IR
2182 }
2183}
2184
2185
2186/* Get vectorized definitions from SLP_NODE that contains corresponding
2187 vectorized def-stmts. */
2188
2189static void
2190vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2191{
2192 tree vec_oprnd;
2193 gimple vec_def_stmt;
2194 unsigned int i;
2195
2196 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2197
ac47786e 2198 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2199 {
2200 gcc_assert (vec_def_stmt);
2201 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2202 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2203 }
2204}
2205
2206
b8698a0f
L
2207/* Get vectorized definitions for SLP_NODE.
2208 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2209 call vect_get_constant_vectors() to create vector stmts.
2210 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2211 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 2212 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 2213 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
2214 the right node. This is used when the second operand must remain scalar. */
2215
ebfd146a 2216void
9dc3f7de
IR
2217vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2218 VEC (tree,heap) **vec_oprnds0,
b5aeb3bb 2219 VEC (tree,heap) **vec_oprnds1, int reduc_index)
ebfd146a
IR
2220{
2221 gimple first_stmt;
2222 enum tree_code code;
2223 int number_of_vects;
b8698a0f 2224 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
2225
2226 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2227 /* The number of vector defs is determined by the number of vector statements
2228 in the node from which we get those statements. */
b8698a0f 2229 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
2230 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2231 else
2232 {
2233 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2234 /* Number of vector stmts was calculated according to LHS in
2235 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
ff802fa1 2236 necessary. See vect_get_smallest_scalar_type () for details. */
ebfd146a
IR
2237 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2238 &rhs_size_unit);
2239 if (rhs_size_unit != lhs_size_unit)
2240 {
2241 number_of_vects *= rhs_size_unit;
2242 number_of_vects /= lhs_size_unit;
2243 }
2244 }
2245
2246 /* Allocate memory for vectorized defs. */
2247 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2248
2249 /* SLP_NODE corresponds either to a group of stores or to a group of
ff802fa1 2250 unary/binary operations. We don't call this function for loads.
b5aeb3bb
IR
2251 For reduction defs we call vect_get_constant_vectors(), since we are
2252 looking for initial loop invariant values. */
2253 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
ebfd146a
IR
2254 /* The defs are already vectorized. */
2255 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2256 else
2257 /* Build vectors from scalar defs. */
9dc3f7de 2258 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
b5aeb3bb 2259 reduc_index);
ebfd146a
IR
2260
2261 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2262 /* Since we don't call this function with loads, this is a group of
2263 stores. */
2264 return;
2265
b5aeb3bb
IR
2266 /* For reductions, we only need initial values. */
2267 if (reduc_index != -1)
2268 return;
2269
ebfd146a 2270 code = gimple_assign_rhs_code (first_stmt);
29ed4920 2271 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1 || !op1)
ebfd146a
IR
2272 return;
2273
2274 /* The number of vector defs is determined by the number of vector statements
2275 in the node from which we get those statements. */
2276 if (SLP_TREE_RIGHT (slp_node))
2277 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2278 else
2279 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2280
2281 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2282
2283 if (SLP_TREE_RIGHT (slp_node))
2284 /* The defs are already vectorized. */
2285 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2286 else
2287 /* Build vectors from scalar defs. */
9dc3f7de
IR
2288 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2289 -1);
ebfd146a
IR
2290}
2291
a70d6342 2292
b8698a0f 2293/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2294 building a vector of type MASK_TYPE from it) and two input vectors placed in
2295 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2296 shifting by STRIDE elements of DR_CHAIN for every copy.
2297 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2298 copies).
ebfd146a
IR
2299 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2300 the created stmts must be inserted. */
2301
2302static inline void
b8698a0f 2303vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2304 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 2305 gimple_stmt_iterator *gsi, slp_tree node,
2635892a 2306 tree vectype, VEC(tree,heap) *dr_chain,
ebfd146a
IR
2307 int ncopies, int vect_stmts_counter)
2308{
faf63e39 2309 tree perm_dest;
ebfd146a
IR
2310 gimple perm_stmt = NULL;
2311 stmt_vec_info next_stmt_info;
0f900dfa 2312 int i, stride;
ebfd146a 2313 tree first_vec, second_vec, data_ref;
ebfd146a 2314
ebfd146a 2315 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2316
b8698a0f 2317 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2318 stmts later. */
b8698a0f 2319 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
2320 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2321 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2322
2323 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2324 for (i = 0; i < ncopies; i++)
2325 {
2326 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2327 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2328
ebfd146a 2329 /* Generate the permute statement. */
2635892a
RH
2330 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2331 first_vec, second_vec, mask);
ebfd146a 2332 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 2333 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 2334 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2335
b8698a0f
L
2336 /* Store the vector statement in NODE. */
2337 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
2338 stride * i + vect_stmts_counter, perm_stmt);
2339
2340 first_vec_indx += stride;
2341 second_vec_indx += stride;
2342 }
2343
2344 /* Mark the scalar stmt as vectorized. */
2345 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2346 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2347}
2348
2349
b8698a0f 2350/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2351 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2352 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2353 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2354 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2355
ebfd146a 2356static bool
b8698a0f 2357vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2358 int mask_nunits, bool only_one_vec, int index,
22e4dee7 2359 unsigned char *mask, int *current_mask_element,
694a4f61
IR
2360 bool *need_next_vector, int *number_of_mask_fixes,
2361 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2362{
2363 int i;
ebfd146a
IR
2364
2365 /* Convert to target specific representation. */
2366 *current_mask_element = first_mask_element + m;
2367 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2368 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2369
2370 if (*current_mask_element < mask_nunits)
694a4f61 2371 *needs_first_vector = true;
ebfd146a
IR
2372
2373 /* We have only one input vector to permute but the mask accesses values in
2374 the next vector as well. */
2375 if (only_one_vec && *current_mask_element >= mask_nunits)
2376 {
2377 if (vect_print_dump_info (REPORT_DETAILS))
2378 {
2379 fprintf (vect_dump, "permutation requires at least two vectors ");
2380 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2381 }
2382
2383 return false;
2384 }
2385
2386 /* The mask requires the next vector. */
2387 if (*current_mask_element >= mask_nunits * 2)
2388 {
694a4f61 2389 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2390 {
2391 /* We either need the first vector too or have already moved to the
b8698a0f 2392 next vector. In both cases, this permutation needs three
ebfd146a
IR
2393 vectors. */
2394 if (vect_print_dump_info (REPORT_DETAILS))
2395 {
2396 fprintf (vect_dump, "permutation requires at "
2397 "least three vectors ");
2398 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2399 }
2400
2401 return false;
2402 }
2403
2404 /* We move to the next vector, dropping the first one and working with
2405 the second and the third - we need to adjust the values of the mask
2406 accordingly. */
694a4f61 2407 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2408
2409 for (i = 0; i < index; i++)
694a4f61 2410 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2411
694a4f61
IR
2412 (*number_of_mask_fixes)++;
2413 *mask_fixed = true;
ebfd146a
IR
2414 }
2415
694a4f61 2416 *need_next_vector = *mask_fixed;
ebfd146a
IR
2417
2418 /* This was the last element of this mask. Start a new one. */
2419 if (index == mask_nunits - 1)
2420 {
694a4f61
IR
2421 *number_of_mask_fixes = 1;
2422 *mask_fixed = false;
2423 *needs_first_vector = false;
ebfd146a
IR
2424 }
2425
2426 return true;
2427}
2428
2429
2430/* Generate vector permute statements from a list of loads in DR_CHAIN.
2431 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2432 permute statements for SLP_NODE_INSTANCE. */
2433bool
2434vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2435 gimple_stmt_iterator *gsi, int vf,
2436 slp_instance slp_node_instance, bool analyze_only)
2437{
2438 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2439 tree mask_element_type = NULL_TREE, mask_type;
2635892a 2440 int i, j, k, nunits, vec_index = 0, scalar_index;
ebfd146a 2441 slp_tree node;
2635892a 2442 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2443 gimple next_scalar_stmt;
2444 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2445 int first_mask_element;
22e4dee7
RH
2446 int index, unroll_factor, current_mask_element, ncopies;
2447 unsigned char *mask;
ebfd146a
IR
2448 bool only_one_vec = false, need_next_vector = false;
2449 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2450 int number_of_mask_fixes = 1;
2451 bool mask_fixed = false;
2452 bool needs_first_vector = false;
22e4dee7 2453 enum machine_mode mode;
ebfd146a 2454
22e4dee7
RH
2455 mode = TYPE_MODE (vectype);
2456
2457 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a
IR
2458 {
2459 if (vect_print_dump_info (REPORT_DETAILS))
2460 {
2635892a 2461 fprintf (vect_dump, "no vect permute for ");
ebfd146a
IR
2462 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2463 }
2635892a 2464 return false;
ebfd146a
IR
2465 }
2466
2635892a
RH
2467 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2468 same size as the vector element being permuted. */
2469 mask_element_type
2470 = lang_hooks.types.type_for_size
2471 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
ebfd146a 2472 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 2473 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 2474 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
2475 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2476
2477 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2478 unrolling factor. */
b8698a0f 2479 orig_vec_stmts_num = group_size *
ebfd146a
IR
2480 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2481 if (orig_vec_stmts_num == 1)
2482 only_one_vec = true;
2483
b8698a0f 2484 /* Number of copies is determined by the final vectorization factor
ebfd146a 2485 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2486 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2487
b8698a0f
L
2488 /* Generate permutation masks for every NODE. Number of masks for each NODE
2489 is equal to GROUP_SIZE.
2490 E.g., we have a group of three nodes with three loads from the same
2491 location in each node, and the vector size is 4. I.e., we have a
2492 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2493 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2494 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2495 ...
2496
2635892a 2497 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 2498 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2499 operation, and the mask element values can't be outside that range.
2500 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2501 For the first two permutations we need the first and the second input
ebfd146a 2502 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2503 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2504 {c2,a3,b3,c3}. */
2505
ac47786e 2506 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2507 {
2508 scalar_index = 0;
2509 index = 0;
2510 vect_stmts_counter = 0;
2511 vec_index = 0;
2512 first_vec_index = vec_index++;
2513 if (only_one_vec)
2514 second_vec_index = first_vec_index;
2515 else
2516 second_vec_index = vec_index++;
2517
2518 for (j = 0; j < unroll_factor; j++)
2519 {
2520 for (k = 0; k < group_size; k++)
2521 {
2635892a
RH
2522 first_mask_element = i + j * group_size;
2523 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2524 nunits, only_one_vec, index,
2525 mask, &current_mask_element,
2526 &need_next_vector,
2527 &number_of_mask_fixes, &mask_fixed,
2528 &needs_first_vector))
2529 return false;
2530 mask[index++] = current_mask_element;
ebfd146a 2531
2635892a 2532 if (index == nunits)
ebfd146a 2533 {
faf63e39
RH
2534 tree mask_vec = NULL;
2535
22e4dee7
RH
2536 if (!can_vec_perm_p (mode, false, mask))
2537 {
2538 if (vect_print_dump_info (REPORT_DETAILS))
2539 {
2540 fprintf (vect_dump, "unsupported vect permute { ");
2541 for (i = 0; i < nunits; ++i)
2542 fprintf (vect_dump, "%d ", mask[i]);
2543 fprintf (vect_dump, "}\n");
2544 }
2545 return false;
2546 }
2547
faf63e39
RH
2548 while (--index >= 0)
2549 {
2550 tree t = build_int_cst (mask_element_type, mask[index]);
2551 mask_vec = tree_cons (NULL, t, mask_vec);
2552 }
2553 mask_vec = build_vector (mask_type, mask_vec);
2554 index = 0;
2555
ebfd146a
IR
2556 if (!analyze_only)
2557 {
2558 if (need_next_vector)
2559 {
2560 first_vec_index = second_vec_index;
2561 second_vec_index = vec_index;
2562 }
2563
2564 next_scalar_stmt = VEC_index (gimple,
2565 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2566
2567 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39 2568 mask_vec, first_vec_index, second_vec_index,
2635892a 2569 gsi, node, vectype, dr_chain,
faf63e39 2570 ncopies, vect_stmts_counter++);
ebfd146a 2571 }
b8698a0f
L
2572 }
2573 }
2574 }
2575 }
ebfd146a 2576
ebfd146a
IR
2577 return true;
2578}
2579
2580
2581
2582/* Vectorize SLP instance tree in postorder. */
2583
2584static bool
2585vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2586 unsigned int vectorization_factor)
ebfd146a
IR
2587{
2588 gimple stmt;
2589 bool strided_store, is_store;
2590 gimple_stmt_iterator si;
2591 stmt_vec_info stmt_info;
2592 unsigned int vec_stmts_size, nunits, group_size;
2593 tree vectype;
2594 int i;
2595 slp_tree loads_node;
2596
2597 if (!node)
2598 return false;
2599
2600 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2601 vectorization_factor);
2602 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2603 vectorization_factor);
b8698a0f 2604
ebfd146a
IR
2605 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2606 stmt_info = vinfo_for_stmt (stmt);
2607
2608 /* VECTYPE is the type of the destination. */
b690cc0f 2609 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2610 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2611 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2612
2613 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 2614 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
2615 elements in one vector iteration is the number of scalar elements in
2616 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2617 size. */
2618 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2619
2620 /* In case of load permutation we have to allocate vectorized statements for
2621 all the nodes that participate in that permutation. */
2622 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2623 {
ac47786e 2624 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a
IR
2625 {
2626 if (!SLP_TREE_VEC_STMTS (loads_node))
2627 {
2628 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2629 vec_stmts_size);
2630 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2631 }
2632 }
2633 }
2634
2635 if (!SLP_TREE_VEC_STMTS (node))
2636 {
2637 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2638 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2639 }
2640
2641 if (vect_print_dump_info (REPORT_DETAILS))
2642 {
2643 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2644 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2645 }
ebfd146a
IR
2646
2647 /* Loads should be inserted before the first load. */
2648 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2649 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
6aa904c4
IR
2650 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2651 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
ebfd146a 2652 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
9d5e7640 2653 else if (is_pattern_stmt_p (stmt_info))
6aa904c4 2654 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
ebfd146a
IR
2655 else
2656 si = gsi_for_stmt (stmt);
b8698a0f 2657
e4a707c4
IR
2658 /* Stores should be inserted just before the last store. */
2659 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2660 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2661 {
2662 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2663 si = gsi_for_stmt (last_store);
2664 }
2665
b010117a
IR
2666 /* Mark the first element of the reduction chain as reduction to properly
2667 transform the node. In the analysis phase only the last element of the
2668 chain is marked as reduction. */
2669 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2670 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2671 {
2672 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2673 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2674 }
2675
ebfd146a 2676 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2677 return is_store;
ebfd146a
IR
2678}
2679
2680
ff802fa1
IR
2681/* Generate vector code for all SLP instances in the loop/basic block. */
2682
ebfd146a 2683bool
a70d6342 2684vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2685{
a70d6342 2686 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2687 slp_instance instance;
a70d6342 2688 unsigned int i, vf;
ebfd146a
IR
2689 bool is_store = false;
2690
a70d6342
IR
2691 if (loop_vinfo)
2692 {
2693 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2694 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2695 }
a70d6342
IR
2696 else
2697 {
2698 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2699 vf = 1;
b8698a0f 2700 }
a70d6342 2701
ac47786e 2702 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
2703 {
2704 /* Schedule the tree of INSTANCE. */
2705 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2706 instance, vf);
8644a673
IR
2707 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2708 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2709 fprintf (vect_dump, "vectorizing stmts using SLP.");
2710 }
2711
ac47786e 2712 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
b5aeb3bb
IR
2713 {
2714 slp_tree root = SLP_INSTANCE_TREE (instance);
2715 gimple store;
2716 unsigned int j;
2717 gimple_stmt_iterator gsi;
2718
2719 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2720 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2721 {
2722 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2723 break;
2724
2725 /* Free the attached stmt_vec_info and remove the stmt. */
2726 gsi = gsi_for_stmt (store);
2727 gsi_remove (&gsi, true);
2728 free_stmt_vec_info (store);
2729 }
2730 }
2731
ebfd146a
IR
2732 return is_store;
2733}
a70d6342
IR
2734
2735
2736/* Vectorize the basic block. */
2737
2738void
2739vect_slp_transform_bb (basic_block bb)
2740{
2741 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2742 gimple_stmt_iterator si;
2743
2744 gcc_assert (bb_vinfo);
2745
2746 if (vect_print_dump_info (REPORT_DETAILS))
2747 fprintf (vect_dump, "SLPing BB\n");
2748
2749 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2750 {
2751 gimple stmt = gsi_stmt (si);
2752 stmt_vec_info stmt_info;
2753
2754 if (vect_print_dump_info (REPORT_DETAILS))
2755 {
2756 fprintf (vect_dump, "------>SLPing statement: ");
2757 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2758 }
2759
2760 stmt_info = vinfo_for_stmt (stmt);
2761 gcc_assert (stmt_info);
2762
2763 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2764 if (STMT_SLP_TYPE (stmt_info))
2765 {
2766 vect_schedule_slp (NULL, bb_vinfo);
2767 break;
2768 }
2769 }
2770
2771 mark_sym_for_renaming (gimple_vop (cfun));
2772 /* The memory tags and pointers in vectorized statements need to
2773 have their SSA forms updated. FIXME, why can't this be delayed
2774 until all the loops have been transformed? */
2775 update_ssa (TODO_update_ssa);
2776
2777 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2778 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2779
12aaf609
IR
2780 destroy_bb_vec_info (bb_vinfo);
2781}
a70d6342 2782