]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
More improvements to sparc VIS vec_init code generation.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
2635892a 2 Copyright (C) 2007, 2008, 2009, 2010, 2011
c75c517d 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
ebfd146a 42
a70d6342
IR
43/* Extract the location of the basic block in the source code.
44 Return the basic block location if succeed and NULL if not. */
45
46LOC
47find_bb_location (basic_block bb)
48{
49 gimple stmt = NULL;
50 gimple_stmt_iterator si;
51
52 if (!bb)
53 return UNKNOWN_LOC;
54
55 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
56 {
57 stmt = gsi_stmt (si);
58 if (gimple_location (stmt) != UNKNOWN_LOC)
59 return gimple_location (stmt);
60 }
61
62 return UNKNOWN_LOC;
63}
64
65
ebfd146a
IR
66/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
67
68static void
69vect_free_slp_tree (slp_tree node)
70{
d092494c
IR
71 int i;
72 slp_void_p child;
73
ebfd146a
IR
74 if (!node)
75 return;
76
d092494c
IR
77 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
78 vect_free_slp_tree ((slp_tree)child);
b8698a0f 79
ebfd146a 80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 81
ebfd146a
IR
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84
85 free (node);
86}
87
88
89/* Free the memory allocated for the SLP instance. */
90
91void
92vect_free_slp_instance (slp_instance instance)
93{
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
97}
98
99
d092494c
IR
100/* Create an SLP node for SCALAR_STMTS. */
101
102static slp_tree
103vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
104{
105 slp_tree node = XNEW (struct _slp_tree);
106 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
107 unsigned int nops;
108
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
112 nops = gimple_num_ops (stmt) - 1;
113 else
114 return NULL;
115
116 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
117 SLP_TREE_VEC_STMTS (node) = NULL;
118 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
119 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
120 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
121
122 return node;
123}
124
125
126/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
127 operand. */
128static VEC (slp_oprnd_info, heap) *
129vect_create_oprnd_info (int nops, int group_size)
130{
131 int i;
132 slp_oprnd_info oprnd_info;
133 VEC (slp_oprnd_info, heap) *oprnds_info;
134
135 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
136 for (i = 0; i < nops; i++)
137 {
138 oprnd_info = XNEW (struct _slp_oprnd_info);
139 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
140 oprnd_info->first_dt = vect_uninitialized_def;
141 oprnd_info->first_def_type = NULL_TREE;
142 oprnd_info->first_const_oprnd = NULL_TREE;
143 oprnd_info->first_pattern = false;
144 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
145 }
146
147 return oprnds_info;
148}
149
150
151/* Free operands info. Free def-stmts in FREE_DEF_STMTS is true.
152 (FREE_DEF_STMTS is true when the SLP analysis fails, and false when it
153 succeds. In the later case we don't need the operands info that we used to
154 check isomorphism of the stmts, but we still need the def-stmts - they are
155 used as scalar stmts in SLP nodes. */
156static void
157vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info,
158 bool free_def_stmts)
159{
160 int i;
161 slp_oprnd_info oprnd_info;
162
163 if (free_def_stmts)
164 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
165 VEC_free (gimple, heap, oprnd_info->def_stmts);
166
167 VEC_free (slp_oprnd_info, heap, *oprnds_info);
168}
169
170
171/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
172 they are of a valid type and that they match the defs of the first stmt of
173 the SLP group (stored in OPRNDS_INFO). */
ebfd146a
IR
174
175static bool
a70d6342 176vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 177 slp_tree slp_node, gimple stmt,
d092494c
IR
178 int ncopies_for_cost, bool first,
179 VEC (slp_oprnd_info, heap) **oprnds_info)
ebfd146a
IR
180{
181 tree oprnd;
182 unsigned int i, number_of_oprnds;
d092494c 183 tree def, def_op0 = NULL_TREE;
ebfd146a 184 gimple def_stmt;
d092494c
IR
185 enum vect_def_type dt = vect_uninitialized_def;
186 enum vect_def_type dt_op0 = vect_uninitialized_def;
187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
188 tree lhs = gimple_get_lhs (stmt);
a70d6342 189 struct loop *loop = NULL;
6aa904c4
IR
190 enum tree_code rhs_code;
191 bool different_types = false;
d092494c
IR
192 bool pattern = false;
193 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
b8698a0f 194
a70d6342
IR
195 if (loop_vinfo)
196 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 197
d092494c
IR
198 if (is_gimple_call (stmt))
199 number_of_oprnds = gimple_call_num_args (stmt);
200 else
201 number_of_oprnds = gimple_num_ops (stmt) - 1;
ebfd146a
IR
202
203 for (i = 0; i < number_of_oprnds; i++)
204 {
205 oprnd = gimple_op (stmt, i + 1);
d092494c 206 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
ebfd146a 207
d092494c
IR
208 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
209 &dt)
210 || (!def_stmt && dt != vect_constant_def))
ebfd146a 211 {
b8698a0f 212 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
213 {
214 fprintf (vect_dump, "Build SLP failed: can't find def for ");
215 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
216 }
217
218 return false;
219 }
220
a70d6342 221 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 222 from the pattern. Check that all the stmts of the node are in the
ebfd146a 223 pattern. */
a70d6342 224 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
225 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
226 && vinfo_for_stmt (def_stmt)
83197f37
IR
227 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
228 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
229 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 230 {
d092494c
IR
231 pattern = true;
232 if (!first && !oprnd_info->first_pattern)
233 {
234 if (vect_print_dump_info (REPORT_DETAILS))
235 {
236 fprintf (vect_dump, "Build SLP failed: some of the stmts"
237 " are in a pattern, and others are not ");
238 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
239 }
ebfd146a 240
d092494c 241 return false;
ebfd146a
IR
242 }
243
244 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 245 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 246
d092494c
IR
247 if (dt == vect_unknown_def_type
248 || STMT_VINFO_PATTERN_DEF_STMT (vinfo_for_stmt (def_stmt)))
ebfd146a
IR
249 {
250 if (vect_print_dump_info (REPORT_DETAILS))
251 fprintf (vect_dump, "Unsupported pattern.");
252 return false;
253 }
254
255 switch (gimple_code (def_stmt))
256 {
257 case GIMPLE_PHI:
d092494c 258 def = gimple_phi_result (def_stmt);
ebfd146a
IR
259 break;
260
261 case GIMPLE_ASSIGN:
d092494c 262 def = gimple_assign_lhs (def_stmt);
ebfd146a
IR
263 break;
264
265 default:
266 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "unsupported defining stmt: ");
268 return false;
269 }
270 }
271
d092494c 272 if (first)
ebfd146a 273 {
d092494c
IR
274 oprnd_info->first_dt = dt;
275 oprnd_info->first_pattern = pattern;
276 if (def)
277 {
278 oprnd_info->first_def_type = TREE_TYPE (def);
279 oprnd_info->first_const_oprnd = NULL_TREE;
280 }
ebfd146a 281 else
d092494c
IR
282 {
283 oprnd_info->first_def_type = NULL_TREE;
284 oprnd_info->first_const_oprnd = oprnd;
285 }
ebfd146a 286
d092494c
IR
287 if (i == 0)
288 {
289 def_op0 = def;
290 dt_op0 = dt;
291 /* Analyze costs (for the first stmt of the group only). */
292 if (REFERENCE_CLASS_P (lhs))
293 /* Store. */
294 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
295 dt, slp_node);
296 else
297 /* Not memory operation (we don't call this function for
298 loads). */
299 vect_model_simple_cost (stmt_info, ncopies_for_cost, &dt,
300 slp_node);
301 }
ebfd146a 302 }
ebfd146a
IR
303 else
304 {
d092494c
IR
305 /* Not first stmt of the group, check that the def-stmt/s match
306 the def-stmt/s of the first stmt. Allow different definition
307 types for reduction chains: the first stmt must be a
308 vect_reduction_def (a phi node), and the rest
309 vect_internal_def. */
310 if (((oprnd_info->first_dt != dt
311 && !(oprnd_info->first_dt == vect_reduction_def
312 && dt == vect_internal_def))
313 || (oprnd_info->first_def_type != NULL_TREE
314 && def
315 && !types_compatible_p (oprnd_info->first_def_type,
316 TREE_TYPE (def))))
317 || (!def
318 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
319 TREE_TYPE (oprnd)))
320 || different_types)
ebfd146a 321 {
d092494c 322 if (number_of_oprnds != 2)
ebfd146a 323 {
d092494c
IR
324 if (vect_print_dump_info (REPORT_SLP))
325 fprintf (vect_dump, "Build SLP failed: different types ");
326
327 return false;
328 }
329
330 /* Try to swap operands in case of binary operation. */
331 if (i == 0)
332 different_types = true;
333 else
b8698a0f 334 {
d092494c
IR
335 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
336 if (is_gimple_assign (stmt)
337 && (rhs_code = gimple_assign_rhs_code (stmt))
338 && TREE_CODE_CLASS (rhs_code) == tcc_binary
339 && commutative_tree_code (rhs_code)
340 && oprnd0_info->first_dt == dt
341 && oprnd_info->first_dt == dt_op0
342 && def_op0 && def
343 && !(oprnd0_info->first_def_type
344 && !types_compatible_p (oprnd0_info->first_def_type,
345 TREE_TYPE (def)))
346 && !(oprnd_info->first_def_type
347 && !types_compatible_p (oprnd_info->first_def_type,
348 TREE_TYPE (def_op0))))
349 {
350 if (vect_print_dump_info (REPORT_SLP))
351 {
352 fprintf (vect_dump, "Swapping operands of ");
353 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
6aa904c4 354 }
b8698a0f 355
d092494c
IR
356 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
357 gimple_assign_rhs2_ptr (stmt));
358 }
359 else
360 {
361 if (vect_print_dump_info (REPORT_SLP))
362 fprintf (vect_dump, "Build SLP failed: different types ");
363
364 return false;
6aa904c4 365 }
ebfd146a
IR
366 }
367 }
368 }
369
370 /* Check the types of the definitions. */
d092494c 371 switch (dt)
ebfd146a
IR
372 {
373 case vect_constant_def:
8644a673 374 case vect_external_def:
d092494c 375 case vect_reduction_def:
ebfd146a 376 break;
b8698a0f 377
8644a673 378 case vect_internal_def:
d092494c
IR
379 if (different_types)
380 {
381 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
382 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
383 if (i == 0)
384 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
385 else
386 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
387 }
ebfd146a 388 else
d092494c
IR
389 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
390
ebfd146a
IR
391 break;
392
393 default:
394 /* FORNOW: Not supported. */
b8698a0f 395 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
396 {
397 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
d092494c 398 print_generic_expr (vect_dump, def, TDF_SLIM);
ebfd146a
IR
399 }
400
401 return false;
402 }
403 }
404
405 return true;
406}
407
408
409/* Recursively build an SLP tree starting from NODE.
b8698a0f 410 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 411 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
412 TRUE. */
413
414static bool
b8698a0f 415vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
416 slp_tree *node, unsigned int group_size,
417 int *inside_cost, int *outside_cost,
418 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 419 VEC (int, heap) **load_permutation,
a70d6342 420 VEC (slp_tree, heap) **loads,
6aa904c4 421 unsigned int vectorization_factor, bool *loads_permuted)
ebfd146a 422{
ebfd146a
IR
423 unsigned int i;
424 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
425 gimple stmt = VEC_index (gimple, stmts, 0);
2200fc49 426 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
ebfd146a
IR
427 tree lhs;
428 bool stop_recursion = false, need_same_oprnds = false;
429 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 430 unsigned int ncopies;
ebfd146a
IR
431 optab optab;
432 int icode;
433 enum machine_mode optab_op2_mode;
434 enum machine_mode vec_mode;
ebfd146a 435 struct data_reference *first_dr;
ebfd146a
IR
436 HOST_WIDE_INT dummy;
437 bool permutation = false;
438 unsigned int load_place;
b5aeb3bb 439 gimple first_load, prev_first_load = NULL;
d092494c
IR
440 VEC (slp_oprnd_info, heap) *oprnds_info;
441 unsigned int nops;
442 slp_oprnd_info oprnd_info;
443
444 if (is_gimple_call (stmt))
445 nops = gimple_call_num_args (stmt);
446 else
447 nops = gimple_num_ops (stmt) - 1;
448
449 oprnds_info = vect_create_oprnd_info (nops, group_size);
ebfd146a
IR
450
451 /* For every stmt in NODE find its def stmt/s. */
ac47786e 452 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
ebfd146a 453 {
b8698a0f 454 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
455 {
456 fprintf (vect_dump, "Build SLP for ");
457 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
458 }
459
4b5caab7
IR
460 /* Fail to vectorize statements marked as unvectorizable. */
461 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
462 {
463 if (vect_print_dump_info (REPORT_SLP))
464 {
465 fprintf (vect_dump,
466 "Build SLP failed: unvectorizable statement ");
467 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
468 }
469
d092494c 470 vect_free_oprnd_info (&oprnds_info, true);
4b5caab7
IR
471 return false;
472 }
473
ebfd146a
IR
474 lhs = gimple_get_lhs (stmt);
475 if (lhs == NULL_TREE)
476 {
b8698a0f 477 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
478 {
479 fprintf (vect_dump,
d092494c 480 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
ebfd146a
IR
481 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
482 }
b8698a0f 483
d092494c 484 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
485 return false;
486 }
487
b8698a0f 488 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
489 vectype = get_vectype_for_scalar_type (scalar_type);
490 if (!vectype)
491 {
492 if (vect_print_dump_info (REPORT_SLP))
493 {
494 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
495 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
496 }
d092494c
IR
497
498 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
499 return false;
500 }
b8698a0f 501
4ef69dfc
IR
502 /* In case of multiple types we need to detect the smallest type. */
503 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 504 {
4ef69dfc
IR
505 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
506 if (bb_vinfo)
507 vectorization_factor = *max_nunits;
a70d6342 508 }
b8698a0f 509
4ef69dfc 510 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 511
ebfd146a
IR
512 if (is_gimple_call (stmt))
513 rhs_code = CALL_EXPR;
514 else
515 rhs_code = gimple_assign_rhs_code (stmt);
516
517 /* Check the operation. */
518 if (i == 0)
519 {
520 first_stmt_code = rhs_code;
521
b8698a0f 522 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
523 vector shift with scalar shift operand. */
524 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
525 || rhs_code == LROTATE_EXPR
526 || rhs_code == RROTATE_EXPR)
527 {
528 vec_mode = TYPE_MODE (vectype);
529
530 /* First see if we have a vector/vector shift. */
531 optab = optab_for_tree_code (rhs_code, vectype,
532 optab_vector);
533
534 if (!optab
947131ba 535 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
536 {
537 /* No vector/vector shift, try for a vector/scalar shift. */
538 optab = optab_for_tree_code (rhs_code, vectype,
539 optab_scalar);
540
541 if (!optab)
542 {
543 if (vect_print_dump_info (REPORT_SLP))
544 fprintf (vect_dump, "Build SLP failed: no optab.");
d092494c 545 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
546 return false;
547 }
947131ba 548 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
549 if (icode == CODE_FOR_nothing)
550 {
551 if (vect_print_dump_info (REPORT_SLP))
552 fprintf (vect_dump, "Build SLP failed: "
553 "op not supported by target.");
d092494c 554 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
555 return false;
556 }
557 optab_op2_mode = insn_data[icode].operand[2].mode;
558 if (!VECTOR_MODE_P (optab_op2_mode))
559 {
560 need_same_oprnds = true;
561 first_op1 = gimple_assign_rhs2 (stmt);
562 }
563 }
564 }
36ba4aae
IR
565 else if (rhs_code == WIDEN_LSHIFT_EXPR)
566 {
567 need_same_oprnds = true;
568 first_op1 = gimple_assign_rhs2 (stmt);
569 }
ebfd146a
IR
570 }
571 else
572 {
573 if (first_stmt_code != rhs_code
574 && (first_stmt_code != IMAGPART_EXPR
575 || rhs_code != REALPART_EXPR)
576 && (first_stmt_code != REALPART_EXPR
69f11a13
IR
577 || rhs_code != IMAGPART_EXPR)
578 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
579 && (first_stmt_code == ARRAY_REF
580 || first_stmt_code == INDIRECT_REF
581 || first_stmt_code == COMPONENT_REF
582 || first_stmt_code == MEM_REF)))
ebfd146a 583 {
b8698a0f 584 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 585 {
b8698a0f 586 fprintf (vect_dump,
ebfd146a
IR
587 "Build SLP failed: different operation in stmt ");
588 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
589 }
b8698a0f 590
d092494c 591 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
592 return false;
593 }
b8698a0f
L
594
595 if (need_same_oprnds
ebfd146a
IR
596 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
597 {
b8698a0f 598 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 599 {
b8698a0f 600 fprintf (vect_dump,
ebfd146a
IR
601 "Build SLP failed: different shift arguments in ");
602 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
603 }
b8698a0f 604
d092494c 605 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
606 return false;
607 }
608 }
609
610 /* Strided store or load. */
611 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
612 {
613 if (REFERENCE_CLASS_P (lhs))
614 {
615 /* Store. */
b8698a0f 616 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
d092494c
IR
617 stmt, ncopies_for_cost,
618 (i == 0), &oprnds_info))
619 {
620 vect_free_oprnd_info (&oprnds_info, true);
621 return false;
622 }
ebfd146a 623 }
b5aeb3bb
IR
624 else
625 {
626 /* Load. */
627 /* FORNOW: Check that there is no gap between the loads. */
e14c1050
IR
628 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
629 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
630 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
631 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb
IR
632 {
633 if (vect_print_dump_info (REPORT_SLP))
634 {
635 fprintf (vect_dump, "Build SLP failed: strided "
636 "loads have gaps ");
637 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
638 }
b8698a0f 639
d092494c 640 vect_free_oprnd_info (&oprnds_info, true);
b5aeb3bb
IR
641 return false;
642 }
2f0fa28e 643
b5aeb3bb
IR
644 /* Check that the size of interleaved loads group is not
645 greater than the SLP group size. */
6aa904c4
IR
646 if (loop_vinfo
647 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
b5aeb3bb
IR
648 {
649 if (vect_print_dump_info (REPORT_SLP))
650 {
651 fprintf (vect_dump, "Build SLP failed: the number of "
652 "interleaved loads is greater than"
653 " the SLP group size ");
654 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
655 }
b8698a0f 656
d092494c 657 vect_free_oprnd_info (&oprnds_info, true);
b5aeb3bb
IR
658 return false;
659 }
660
e14c1050 661 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
662 if (prev_first_load)
663 {
664 /* Check that there are no loads from different interleaving
ff802fa1 665 chains in the same node. The only exception is complex
b5aeb3bb
IR
666 numbers. */
667 if (prev_first_load != first_load
668 && rhs_code != REALPART_EXPR
669 && rhs_code != IMAGPART_EXPR)
670 {
671 if (vect_print_dump_info (REPORT_SLP))
672 {
673 fprintf (vect_dump, "Build SLP failed: different "
674 "interleaving chains in one node ");
675 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
676 }
677
d092494c 678 vect_free_oprnd_info (&oprnds_info, true);
b5aeb3bb
IR
679 return false;
680 }
681 }
682 else
683 prev_first_load = first_load;
b8698a0f 684
ebfd146a
IR
685 if (first_load == stmt)
686 {
687 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 688 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
689 == dr_unaligned_unsupported)
690 {
691 if (vect_print_dump_info (REPORT_SLP))
692 {
693 fprintf (vect_dump, "Build SLP failed: unsupported "
694 "unaligned load ");
695 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
696 }
b8698a0f 697
d092494c 698 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
699 return false;
700 }
b8698a0f 701
ebfd146a
IR
702 /* Analyze costs (for the first stmt in the group). */
703 vect_model_load_cost (vinfo_for_stmt (stmt),
272c6793 704 ncopies_for_cost, false, *node);
ebfd146a 705 }
b8698a0f 706
ff802fa1 707 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
708 case that permutation is needed we later decide if a specific
709 permutation is supported. */
710 load_place = vect_get_place_in_interleaving_chain (stmt,
711 first_load);
712 if (load_place != i)
713 permutation = true;
b8698a0f 714
ebfd146a 715 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 716
ebfd146a
IR
717 /* We stop the tree when we reach a group of loads. */
718 stop_recursion = true;
719 continue;
720 }
721 } /* Strided access. */
722 else
723 {
724 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
725 {
d092494c 726 /* Not strided load. */
b8698a0f 727 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
728 {
729 fprintf (vect_dump, "Build SLP failed: not strided load ");
730 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
731 }
732
733 /* FORNOW: Not strided loads are not supported. */
d092494c 734 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
735 return false;
736 }
737
738 /* Not memory operation. */
739 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
740 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
741 {
b8698a0f 742 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
743 {
744 fprintf (vect_dump, "Build SLP failed: operation");
745 fprintf (vect_dump, " unsupported ");
746 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
747 }
748
d092494c 749 vect_free_oprnd_info (&oprnds_info, true);
ebfd146a
IR
750 return false;
751 }
752
b8698a0f 753 /* Find the def-stmts. */
a70d6342 754 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
d092494c
IR
755 ncopies_for_cost, (i == 0),
756 &oprnds_info))
757 {
758 vect_free_oprnd_info (&oprnds_info, true);
759 return false;
760 }
ebfd146a
IR
761 }
762 }
763
764 /* Add the costs of the node to the overall instance costs. */
b8698a0f 765 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
766 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
767
768 /* Strided loads were reached - stop the recursion. */
769 if (stop_recursion)
770 {
6aa904c4 771 VEC_safe_push (slp_tree, heap, *loads, *node);
ebfd146a
IR
772 if (permutation)
773 {
6aa904c4
IR
774
775 *loads_permuted = true;
35e1a5e7 776 *inside_cost
720f5239 777 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 778 * group_size;
ebfd146a 779 }
2200fc49 780 else
6aa904c4
IR
781 {
782 /* We don't check here complex numbers chains, so we set
783 LOADS_PERMUTED for further check in
784 vect_supported_load_permutation_p. */
2200fc49 785 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
6aa904c4 786 *loads_permuted = true;
2200fc49 787 }
ebfd146a
IR
788
789 return true;
790 }
791
b8698a0f 792 /* Create SLP_TREE nodes for the definition node/s. */
d092494c 793 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
ebfd146a 794 {
d092494c 795 slp_tree child;
b8698a0f 796
d092494c
IR
797 if (oprnd_info->first_dt != vect_internal_def)
798 continue;
ebfd146a 799
d092494c
IR
800 child = vect_create_new_slp_node (oprnd_info->def_stmts);
801 if (!child
802 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
ebfd146a 803 inside_cost, outside_cost, ncopies_for_cost,
a70d6342 804 max_nunits, load_permutation, loads,
6aa904c4 805 vectorization_factor, loads_permuted))
d092494c
IR
806 {
807 free (child);
808 vect_free_oprnd_info (&oprnds_info, true);
809 return false;
810 }
b8698a0f 811
d092494c 812 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
ebfd146a
IR
813 }
814
d092494c 815 vect_free_oprnd_info (&oprnds_info, false);
ebfd146a
IR
816 return true;
817}
818
819
820static void
821vect_print_slp_tree (slp_tree node)
822{
823 int i;
824 gimple stmt;
d092494c 825 slp_void_p child;
ebfd146a
IR
826
827 if (!node)
828 return;
829
830 fprintf (vect_dump, "node ");
ac47786e 831 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
832 {
833 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 834 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
835 }
836 fprintf (vect_dump, "\n");
837
d092494c
IR
838 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
839 vect_print_slp_tree ((slp_tree) child);
ebfd146a
IR
840}
841
842
b8698a0f
L
843/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
844 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 845 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
846 stmts in NODE are to be marked. */
847
848static void
849vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
850{
851 int i;
852 gimple stmt;
d092494c 853 slp_void_p child;
ebfd146a
IR
854
855 if (!node)
856 return;
857
ac47786e 858 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
859 if (j < 0 || i == j)
860 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
861
d092494c
IR
862 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
863 vect_mark_slp_stmts ((slp_tree) child, mark, j);
ebfd146a
IR
864}
865
866
a70d6342
IR
867/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
868
869static void
870vect_mark_slp_stmts_relevant (slp_tree node)
871{
872 int i;
873 gimple stmt;
874 stmt_vec_info stmt_info;
d092494c 875 slp_void_p child;
a70d6342
IR
876
877 if (!node)
878 return;
879
ac47786e 880 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
881 {
882 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 883 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
884 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
885 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
886 }
887
d092494c
IR
888 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
889 vect_mark_slp_stmts_relevant ((slp_tree) child);
a70d6342
IR
890}
891
892
b8698a0f 893/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
894 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
895
896static bool
897vect_supported_slp_permutation_p (slp_instance instance)
898{
899 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
900 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
e14c1050 901 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a
IR
902 VEC (slp_tree, heap) *sorted_loads = NULL;
903 int index;
904 slp_tree *tmp_loads = NULL;
b8698a0f 905 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 906 slp_tree load;
b8698a0f
L
907
908 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 909 location in all the loads in the node, when the data-refs in
b8698a0f 910 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
911 Sort the nodes according to the order of accesses in the chain. */
912 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
913 for (i = 0, j = 0;
914 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
915 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
916 i += group_size, j++)
917 {
918 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
919 /* Check that the loads are all in the same interleaving chain. */
e14c1050 920 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
ebfd146a
IR
921 {
922 if (vect_print_dump_info (REPORT_DETAILS))
923 {
924 fprintf (vect_dump, "Build SLP failed: unsupported data "
925 "permutation ");
926 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
927 }
b8698a0f 928
ebfd146a 929 free (tmp_loads);
b8698a0f 930 return false;
ebfd146a
IR
931 }
932
933 tmp_loads[index] = load;
934 }
b8698a0f 935
ebfd146a
IR
936 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
937 for (i = 0; i < group_size; i++)
938 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
939
940 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
941 SLP_INSTANCE_LOADS (instance) = sorted_loads;
942 free (tmp_loads);
943
944 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
945 SLP_INSTANCE_UNROLLING_FACTOR (instance),
946 instance, true))
947 return false;
948
949 return true;
950}
951
952
b5aeb3bb
IR
953/* Rearrange the statements of NODE according to PERMUTATION. */
954
955static void
956vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
957 VEC (int, heap) *permutation)
958{
959 gimple stmt;
960 VEC (gimple, heap) *tmp_stmts;
961 unsigned int index, i;
d092494c 962 slp_void_p child;
b5aeb3bb
IR
963
964 if (!node)
965 return;
966
d092494c
IR
967 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
968 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
b5aeb3bb
IR
969
970 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
971 tmp_stmts = VEC_alloc (gimple, heap, group_size);
972
973 for (i = 0; i < group_size; i++)
974 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
975
ac47786e 976 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb
IR
977 {
978 index = VEC_index (int, permutation, i);
979 VEC_replace (gimple, tmp_stmts, index, stmt);
980 }
981
982 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
983 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
984}
985
986
ebfd146a
IR
987/* Check if the required load permutation is supported.
988 LOAD_PERMUTATION contains a list of indices of the loads.
989 In SLP this permutation is relative to the order of strided stores that are
990 the base of the SLP instance. */
991
992static bool
993vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
994 VEC (int, heap) *load_permutation)
995{
b5aeb3bb
IR
996 int i = 0, j, prev = -1, next, k, number_of_groups;
997 bool supported, bad_permutation = false;
7417f6c0 998 sbitmap load_index;
2200fc49 999 slp_tree node, other_complex_node;
6aa904c4 1000 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
2200fc49 1001 unsigned complex_numbers = 0;
6aa904c4
IR
1002 struct data_reference *dr;
1003 bb_vec_info bb_vinfo;
ebfd146a 1004
a70d6342 1005 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
1006 if (!slp_instn)
1007 return false;
1008
1009 if (vect_print_dump_info (REPORT_SLP))
1010 {
1011 fprintf (vect_dump, "Load permutation ");
ac47786e 1012 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
ebfd146a
IR
1013 fprintf (vect_dump, "%d ", next);
1014 }
1015
b5aeb3bb
IR
1016 /* In case of reduction every load permutation is allowed, since the order
1017 of the reduction statements is not important (as opposed to the case of
ff802fa1 1018 strided stores). The only condition we need to check is that all the
b5aeb3bb
IR
1019 load nodes are of the same size and have the same permutation (and then
1020 rearrange all the nodes of the SLP instance according to this
1021 permutation). */
1022
1023 /* Check that all the load nodes are of the same size. */
ac47786e 1024 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49
IR
1025 {
1026 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1027 != (unsigned) group_size)
1028 return false;
1029
1030 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1031 if (is_gimple_assign (stmt)
1032 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1033 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1034 complex_numbers++;
1035 }
1036
1037 /* Complex operands can be swapped as following:
1038 real_c = real_b + real_a;
1039 imag_c = imag_a + imag_b;
1040 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 1041 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
1042 chains are mixed, they match the above pattern. */
1043 if (complex_numbers)
1044 {
ac47786e 1045 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 1046 {
ac47786e 1047 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
1048 {
1049 if (j == 0)
1050 first = stmt;
1051 else
1052 {
e14c1050 1053 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
2200fc49
IR
1054 {
1055 if (complex_numbers != 2)
1056 return false;
1057
1058 if (i == 0)
1059 k = 1;
1060 else
1061 k = 0;
1062
1063 other_complex_node = VEC_index (slp_tree,
1064 SLP_INSTANCE_LOADS (slp_instn), k);
1065 other_node_first = VEC_index (gimple,
1066 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1067
e14c1050 1068 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2200fc49
IR
1069 != other_node_first)
1070 return false;
1071 }
1072 }
1073 }
1074 }
1075 }
1076
1077 /* We checked that this case ok, so there is no need to proceed with
1078 permutation tests. */
1079 if (complex_numbers == 2)
1080 {
1081 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1082 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1083 return true;
1084 }
1085
b5aeb3bb
IR
1086 node = SLP_INSTANCE_TREE (slp_instn);
1087 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1088 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1089 instance, not all the loads belong to the same node or interleaving
ff802fa1 1090 group. Hence, we need to divide them into groups according to
b5aeb3bb
IR
1091 GROUP_SIZE. */
1092 number_of_groups = VEC_length (int, load_permutation) / group_size;
1093
b010117a
IR
1094 /* Reduction (there are no data-refs in the root).
1095 In reduction chain the order of the loads is important. */
1096 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1097 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb
IR
1098 {
1099 int first_group_load_index;
1100
1101 /* Compare all the permutation sequences to the first one. */
1102 for (i = 1; i < number_of_groups; i++)
1103 {
1104 k = 0;
1105 for (j = i * group_size; j < i * group_size + group_size; j++)
1106 {
1107 next = VEC_index (int, load_permutation, j);
1108 first_group_load_index = VEC_index (int, load_permutation, k);
1109
1110 if (next != first_group_load_index)
1111 {
1112 bad_permutation = true;
1113 break;
1114 }
1115
1116 k++;
1117 }
1118
1119 if (bad_permutation)
1120 break;
1121 }
1122
1123 if (!bad_permutation)
1124 {
c9c1e775
IR
1125 /* Check that the loads in the first sequence are different and there
1126 are no gaps between them. */
1127 load_index = sbitmap_alloc (group_size);
1128 sbitmap_zero (load_index);
1129 for (k = 0; k < group_size; k++)
1130 {
1131 first_group_load_index = VEC_index (int, load_permutation, k);
1132 if (TEST_BIT (load_index, first_group_load_index))
1133 {
1134 bad_permutation = true;
1135 break;
1136 }
1137
1138 SET_BIT (load_index, first_group_load_index);
1139 }
1140
1141 if (!bad_permutation)
1142 for (k = 0; k < group_size; k++)
1143 if (!TEST_BIT (load_index, k))
1144 {
1145 bad_permutation = true;
1146 break;
1147 }
1148
1149 sbitmap_free (load_index);
1150 }
1151
1152 if (!bad_permutation)
1153 {
1154 /* This permutation is valid for reduction. Since the order of the
b5aeb3bb
IR
1155 statements in the nodes is not important unless they are memory
1156 accesses, we can rearrange the statements in all the nodes
1157 according to the order of the loads. */
1158 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1159 load_permutation);
1160 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1161 return true;
1162 }
1163 }
1164
6aa904c4
IR
1165 /* In basic block vectorization we allow any subchain of an interleaving
1166 chain.
1167 FORNOW: not supported in loop SLP because of realignment compications. */
1168 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1169 bad_permutation = false;
1170 /* Check that for every node in the instance teh loads form a subchain. */
1171 if (bb_vinfo)
1172 {
1173 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1174 {
1175 next_load = NULL;
1176 first_load = NULL;
1177 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1178 {
1179 if (!first_load)
1180 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1181 else if (first_load
1182 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1183 {
1184 bad_permutation = true;
1185 break;
1186 }
1187
1188 if (j != 0 && next_load != load)
1189 {
1190 bad_permutation = true;
1191 break;
1192 }
1193
1194 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1195 }
1196
1197 if (bad_permutation)
1198 break;
1199 }
1200
1201 /* Check that the alignment of the first load in every subchain, i.e.,
1202 the first statement in every load node, is supported. */
1203 if (!bad_permutation)
1204 {
1205 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1206 {
1207 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1208 if (first_load
1209 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1210 {
1211 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1212 if (vect_supportable_dr_alignment (dr, false)
1213 == dr_unaligned_unsupported)
1214 {
1215 if (vect_print_dump_info (REPORT_SLP))
1216 {
1217 fprintf (vect_dump, "unsupported unaligned load ");
1218 print_gimple_stmt (vect_dump, first_load, 0,
1219 TDF_SLIM);
1220 }
1221 bad_permutation = true;
1222 break;
1223 }
1224 }
1225 }
1226
1227 if (!bad_permutation)
1228 {
1229 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1230 return true;
1231 }
1232 }
1233 }
1234
b8698a0f
L
1235 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1236 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1237 well (unless it's reduction). */
ebfd146a
IR
1238 if (VEC_length (int, load_permutation)
1239 != (unsigned int) (group_size * group_size))
1240 return false;
1241
1242 supported = true;
7417f6c0
IR
1243 load_index = sbitmap_alloc (group_size);
1244 sbitmap_zero (load_index);
ebfd146a
IR
1245 for (j = 0; j < group_size; j++)
1246 {
1247 for (i = j * group_size, k = 0;
1248 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1249 i++, k++)
1250 {
1251 if (i != j * group_size && next != prev)
1252 {
1253 supported = false;
1254 break;
1255 }
1256
1257 prev = next;
b8698a0f 1258 }
7417f6c0
IR
1259
1260 if (TEST_BIT (load_index, prev))
1261 {
1262 supported = false;
1263 break;
1264 }
1265
1266 SET_BIT (load_index, prev);
ebfd146a 1267 }
59eefaa6
IR
1268
1269 for (j = 0; j < group_size; j++)
1270 if (!TEST_BIT (load_index, j))
1271 return false;
1272
7417f6c0 1273 sbitmap_free (load_index);
ebfd146a
IR
1274
1275 if (supported && i == group_size * group_size
1276 && vect_supported_slp_permutation_p (slp_instn))
1277 return true;
1278
b8698a0f 1279 return false;
ebfd146a
IR
1280}
1281
1282
b8698a0f 1283/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1284 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1285 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1286 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1287 they must be inserted before the first load of the SLP instance and not
1288 before the first load of the first node of the instance. */
ff802fa1 1289
b8698a0f
L
1290static gimple
1291vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1292{
1293 int i, j;
1294 slp_tree load_node;
1295 gimple first_load = NULL, load;
1296
ac47786e
NF
1297 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1298 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1299 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1300
ebfd146a
IR
1301 return first_load;
1302}
1303
1304
e4a707c4 1305/* Find the last store in SLP INSTANCE. */
ff802fa1 1306
e4a707c4
IR
1307static gimple
1308vect_find_last_store_in_slp_instance (slp_instance instance)
1309{
1310 int i;
1311 slp_tree node;
1312 gimple last_store = NULL, store;
1313
1314 node = SLP_INSTANCE_TREE (instance);
1315 for (i = 0;
1316 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1317 i++)
1318 last_store = get_later_stmt (store, last_store);
1319
1320 return last_store;
1321}
1322
1323
ff802fa1 1324/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1325 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1326 Return FALSE if it's impossible to SLP any stmt in the loop. */
1327
1328static bool
a70d6342
IR
1329vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1330 gimple stmt)
ebfd146a
IR
1331{
1332 slp_instance new_instance;
d092494c 1333 slp_tree node;
e14c1050 1334 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1335 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1336 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1337 gimple next;
0f900dfa 1338 unsigned int vectorization_factor = 0;
b5aeb3bb 1339 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1340 unsigned int max_nunits = 0;
1341 VEC (int, heap) *load_permutation;
1342 VEC (slp_tree, heap) *loads;
b5aeb3bb 1343 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
6aa904c4 1344 bool loads_permuted = false;
d092494c 1345 VEC (gimple, heap) *scalar_stmts;
b5aeb3bb 1346
b010117a 1347 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1348 {
b010117a
IR
1349 if (dr)
1350 {
1351 scalar_type = TREE_TYPE (DR_REF (dr));
1352 vectype = get_vectype_for_scalar_type (scalar_type);
1353 }
1354 else
1355 {
1356 gcc_assert (loop_vinfo);
1357 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1358 }
1359
e14c1050 1360 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1361 }
1362 else
1363 {
1364 gcc_assert (loop_vinfo);
1365 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1366 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1367 }
b8698a0f 1368
ebfd146a
IR
1369 if (!vectype)
1370 {
1371 if (vect_print_dump_info (REPORT_SLP))
1372 {
1373 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1374 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1375 }
b5aeb3bb 1376
ebfd146a
IR
1377 return false;
1378 }
1379
1380 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1381 if (loop_vinfo)
1382 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1383 else
a70d6342
IR
1384 vectorization_factor = nunits;
1385
a70d6342
IR
1386 /* Calculate the unrolling factor. */
1387 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1388 if (unrolling_factor != 1 && !loop_vinfo)
1389 {
1390 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1391 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1392 " block SLP");
b8698a0f 1393
a70d6342
IR
1394 return false;
1395 }
1396
b8698a0f 1397 /* Create a node (a root of the SLP tree) for the packed strided stores. */
d092494c 1398 scalar_stmts = VEC_alloc (gimple, heap, group_size);
ebfd146a 1399 next = stmt;
b010117a 1400 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1401 {
b5aeb3bb
IR
1402 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1403 while (next)
1404 {
d092494c 1405 VEC_safe_push (gimple, heap, scalar_stmts, next);
e14c1050 1406 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1407 }
1408 }
1409 else
1410 {
1411 /* Collect reduction statements. */
d092494c
IR
1412 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1413 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
1414 VEC_safe_push (gimple, heap, scalar_stmts, next);
ebfd146a
IR
1415 }
1416
d092494c 1417 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1418
ebfd146a
IR
1419 /* Calculate the number of vector stmts to create based on the unrolling
1420 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1421 GROUP_SIZE / NUNITS otherwise. */
1422 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1423
1424 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1425 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1426
1427 /* Build the tree for the SLP instance. */
b8698a0f
L
1428 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1429 &inside_cost, &outside_cost, ncopies_for_cost,
1430 &max_nunits, &load_permutation, &loads,
6aa904c4 1431 vectorization_factor, &loads_permuted))
ebfd146a 1432 {
4ef69dfc 1433 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1434 if (max_nunits > nunits)
1435 unrolling_factor = least_common_multiple (max_nunits, group_size)
1436 / group_size;
b8698a0f 1437
4ef69dfc
IR
1438 if (unrolling_factor != 1 && !loop_vinfo)
1439 {
1440 if (vect_print_dump_info (REPORT_SLP))
1441 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1442 " block SLP");
1443 return false;
1444 }
1445
1446 /* Create a new SLP instance. */
1447 new_instance = XNEW (struct _slp_instance);
1448 SLP_INSTANCE_TREE (new_instance) = node;
1449 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a
IR
1450 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1451 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1452 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1453 SLP_INSTANCE_LOADS (new_instance) = loads;
1454 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1455 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
6aa904c4
IR
1456
1457 if (loads_permuted)
ebfd146a
IR
1458 {
1459 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1460 load_permutation))
ebfd146a
IR
1461 {
1462 if (vect_print_dump_info (REPORT_SLP))
1463 {
1464 fprintf (vect_dump, "Build SLP failed: unsupported load "
1465 "permutation ");
1466 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1467 }
1468
1469 vect_free_slp_instance (new_instance);
1470 return false;
1471 }
1472
1473 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1474 = vect_find_first_load_in_slp_instance (new_instance);
1475 }
1476 else
1477 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1478
a70d6342 1479 if (loop_vinfo)
b8698a0f
L
1480 VEC_safe_push (slp_instance, heap,
1481 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1482 new_instance);
1483 else
1484 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1485 new_instance);
b8698a0f 1486
ebfd146a
IR
1487 if (vect_print_dump_info (REPORT_SLP))
1488 vect_print_slp_tree (node);
1489
1490 return true;
1491 }
1492
1493 /* Failed to SLP. */
1494 /* Free the allocated memory. */
1495 vect_free_slp_tree (node);
1496 VEC_free (int, heap, load_permutation);
1497 VEC_free (slp_tree, heap, loads);
b8698a0f 1498
a70d6342 1499 return false;
ebfd146a
IR
1500}
1501
1502
ff802fa1 1503/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1504 trees of packed scalar stmts if SLP is possible. */
1505
1506bool
a70d6342 1507vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1508{
1509 unsigned int i;
b010117a
IR
1510 VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
1511 gimple first_element;
a70d6342 1512 bool ok = false;
ebfd146a
IR
1513
1514 if (vect_print_dump_info (REPORT_SLP))
1515 fprintf (vect_dump, "=== vect_analyze_slp ===");
1516
a70d6342 1517 if (loop_vinfo)
b5aeb3bb
IR
1518 {
1519 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
b010117a 1520 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1521 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1522 }
a70d6342
IR
1523 else
1524 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1525
b5aeb3bb 1526 /* Find SLP sequences starting from groups of strided stores. */
b010117a
IR
1527 FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
1528 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
a70d6342 1529 ok = true;
ebfd146a 1530
b8698a0f 1531 if (bb_vinfo && !ok)
a70d6342
IR
1532 {
1533 if (vect_print_dump_info (REPORT_SLP))
1534 fprintf (vect_dump, "Failed to SLP the basic block.");
1535
1536 return false;
1537 }
ebfd146a 1538
b010117a
IR
1539 if (loop_vinfo
1540 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1541 {
1542 /* Find SLP sequences starting from reduction chains. */
1543 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1544 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1545 ok = true;
1546 else
1547 return false;
1548
1549 /* Don't try to vectorize SLP reductions if reduction chain was
1550 detected. */
1551 return ok;
1552 }
1553
b5aeb3bb 1554 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1555 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1556 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1557 VEC_index (gimple, reductions, 0)))
1558 ok = true;
1559
ebfd146a
IR
1560 return true;
1561}
1562
1563
1564/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1565 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1566 least one instance. */
ebfd146a 1567
437f4a00 1568bool
ebfd146a
IR
1569vect_make_slp_decision (loop_vec_info loop_vinfo)
1570{
1571 unsigned int i, unrolling_factor = 1;
1572 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1573 slp_instance instance;
1574 int decided_to_slp = 0;
1575
1576 if (vect_print_dump_info (REPORT_SLP))
1577 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1578
ac47786e 1579 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1580 {
1581 /* FORNOW: SLP if you can. */
1582 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1583 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1584
ff802fa1 1585 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1586 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1587 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1588 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1589 decided_to_slp++;
1590 }
1591
1592 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1593
b8698a0f
L
1594 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1595 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a 1596 decided_to_slp, unrolling_factor);
437f4a00
IR
1597
1598 return (decided_to_slp > 0);
ebfd146a
IR
1599}
1600
1601
1602/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1603 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1604
1605static void
1606vect_detect_hybrid_slp_stmts (slp_tree node)
1607{
1608 int i;
1609 gimple stmt;
1610 imm_use_iterator imm_iter;
1611 gimple use_stmt;
99f51320 1612 stmt_vec_info stmt_vinfo;
d092494c 1613 slp_void_p child;
ebfd146a
IR
1614
1615 if (!node)
1616 return;
1617
ac47786e 1618 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1619 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1620 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1621 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1622 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1623 && !STMT_SLP_TYPE (stmt_vinfo)
1624 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1625 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1626 && !(gimple_code (use_stmt) == GIMPLE_PHI
1627 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1628 == vect_reduction_def))
ebfd146a
IR
1629 vect_mark_slp_stmts (node, hybrid, i);
1630
d092494c
IR
1631 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1632 vect_detect_hybrid_slp_stmts ((slp_tree) child);
ebfd146a
IR
1633}
1634
1635
1636/* Find stmts that must be both vectorized and SLPed. */
1637
1638void
1639vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1640{
1641 unsigned int i;
1642 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1643 slp_instance instance;
1644
1645 if (vect_print_dump_info (REPORT_SLP))
1646 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1647
ac47786e 1648 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1649 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1650}
1651
a70d6342
IR
1652
1653/* Create and initialize a new bb_vec_info struct for BB, as well as
1654 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1655
a70d6342
IR
1656static bb_vec_info
1657new_bb_vec_info (basic_block bb)
1658{
1659 bb_vec_info res = NULL;
1660 gimple_stmt_iterator gsi;
1661
1662 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1663 BB_VINFO_BB (res) = bb;
1664
1665 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1666 {
1667 gimple stmt = gsi_stmt (gsi);
1668 gimple_set_uid (stmt, 0);
1669 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1670 }
1671
1672 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1673 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1674
1675 bb->aux = res;
1676 return res;
1677}
1678
1679
1680/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1681 stmts in the basic block. */
1682
1683static void
1684destroy_bb_vec_info (bb_vec_info bb_vinfo)
1685{
1686 basic_block bb;
1687 gimple_stmt_iterator si;
1688
1689 if (!bb_vinfo)
1690 return;
1691
1692 bb = BB_VINFO_BB (bb_vinfo);
1693
1694 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1695 {
1696 gimple stmt = gsi_stmt (si);
1697 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1698
1699 if (stmt_info)
1700 /* Free stmt_vec_info. */
1701 free_stmt_vec_info (stmt);
1702 }
1703
01be8516
SP
1704 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1705 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
a70d6342
IR
1706 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1707 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1708 free (bb_vinfo);
1709 bb->aux = NULL;
1710}
1711
1712
1713/* Analyze statements contained in SLP tree node after recursively analyzing
1714 the subtree. Return TRUE if the operations are supported. */
1715
1716static bool
1717vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1718{
1719 bool dummy;
1720 int i;
1721 gimple stmt;
d092494c 1722 slp_void_p child;
a70d6342
IR
1723
1724 if (!node)
1725 return true;
1726
d092494c
IR
1727 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1728 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1729 return false;
a70d6342 1730
ac47786e 1731 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1732 {
1733 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1734 gcc_assert (stmt_info);
1735 gcc_assert (PURE_SLP_STMT (stmt_info));
1736
1737 if (!vect_analyze_stmt (stmt, &dummy, node))
1738 return false;
1739 }
1740
1741 return true;
1742}
1743
1744
ff802fa1 1745/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1746 operations are supported. */
1747
1748static bool
1749vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1750{
1751 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1752 slp_instance instance;
1753 int i;
1754
1755 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1756 {
b8698a0f 1757 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1758 SLP_INSTANCE_TREE (instance)))
1759 {
1760 vect_free_slp_instance (instance);
1761 VEC_ordered_remove (slp_instance, slp_instances, i);
1762 }
1763 else
1764 i++;
b8698a0f
L
1765 }
1766
a70d6342
IR
1767 if (!VEC_length (slp_instance, slp_instances))
1768 return false;
1769
1770 return true;
1771}
1772
69f11a13
IR
1773/* Check if vectorization of the basic block is profitable. */
1774
1775static bool
1776vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1777{
1778 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1779 slp_instance instance;
1780 int i;
1781 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1782 unsigned int stmt_cost;
1783 gimple stmt;
1784 gimple_stmt_iterator si;
1785 basic_block bb = BB_VINFO_BB (bb_vinfo);
1786 stmt_vec_info stmt_info = NULL;
1787 tree dummy_type = NULL;
1788 int dummy = 0;
1789
1790 /* Calculate vector costs. */
ac47786e 1791 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
69f11a13
IR
1792 {
1793 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1794 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1795 }
1796
1797 /* Calculate scalar cost. */
1798 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1799 {
1800 stmt = gsi_stmt (si);
1801 stmt_info = vinfo_for_stmt (stmt);
1802
1803 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1804 || !PURE_SLP_STMT (stmt_info))
1805 continue;
1806
1807 if (STMT_VINFO_DATA_REF (stmt_info))
1808 {
1809 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1810 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1811 (scalar_load, dummy_type, dummy);
1812 else
1813 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1814 (scalar_store, dummy_type, dummy);
1815 }
1816 else
1817 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1818 (scalar_stmt, dummy_type, dummy);
1819
1820 scalar_cost += stmt_cost;
1821 }
1822
1823 if (vect_print_dump_info (REPORT_COST))
1824 {
1825 fprintf (vect_dump, "Cost model analysis: \n");
1826 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1827 vec_inside_cost);
1828 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1829 vec_outside_cost);
1830 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1831 }
1832
1833 /* Vectorization is profitable if its cost is less than the cost of scalar
1834 version. */
1835 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1836 return false;
1837
1838 return true;
1839}
1840
1841/* Check if the basic block can be vectorized. */
a70d6342 1842
8e19f5a1
IR
1843static bb_vec_info
1844vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
1845{
1846 bb_vec_info bb_vinfo;
1847 VEC (ddr_p, heap) *ddrs;
1848 VEC (slp_instance, heap) *slp_instances;
1849 slp_instance instance;
8e19f5a1 1850 int i;
777e1f09
RG
1851 int min_vf = 2;
1852 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4 1853
a70d6342
IR
1854 bb_vinfo = new_bb_vec_info (bb);
1855 if (!bb_vinfo)
1856 return NULL;
1857
777e1f09 1858 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1859 {
1860 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1861 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1862 "block.\n");
b8698a0f 1863
a70d6342
IR
1864 destroy_bb_vec_info (bb_vinfo);
1865 return NULL;
1866 }
1867
1868 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1869 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1870 {
1871 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1872 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1873 "block.\n");
1874
1875 destroy_bb_vec_info (bb_vinfo);
1876 return NULL;
1877 }
1878
5bfdb7d8
IR
1879 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1880 || min_vf > max_vf)
777e1f09
RG
1881 {
1882 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1883 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1884 "in basic block.\n");
1885
1886 destroy_bb_vec_info (bb_vinfo);
1887 return NULL;
1888 }
1889
a70d6342
IR
1890 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1891 {
1892 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1893 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1894 "block.\n");
b8698a0f 1895
a70d6342
IR
1896 destroy_bb_vec_info (bb_vinfo);
1897 return NULL;
1898 }
b8698a0f 1899
a70d6342
IR
1900 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1901 {
1902 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1903 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1904 "block.\n");
b8698a0f 1905
a70d6342
IR
1906 destroy_bb_vec_info (bb_vinfo);
1907 return NULL;
1908 }
1909
1910 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1911 {
1912 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1913 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1914 "block.\n");
1915
1916 destroy_bb_vec_info (bb_vinfo);
1917 return NULL;
1918 }
1919
1920 /* Check the SLP opportunities in the basic block, analyze and build SLP
1921 trees. */
1922 if (!vect_analyze_slp (NULL, bb_vinfo))
1923 {
1924 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1925 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1926 "in basic block.\n");
1927
1928 destroy_bb_vec_info (bb_vinfo);
1929 return NULL;
1930 }
b8698a0f 1931
a70d6342
IR
1932 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1933
1934 /* Mark all the statements that we want to vectorize as pure SLP and
1935 relevant. */
ac47786e 1936 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
a70d6342
IR
1937 {
1938 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1939 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1940 }
a70d6342
IR
1941
1942 if (!vect_slp_analyze_operations (bb_vinfo))
1943 {
1944 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1945 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1946
1947 destroy_bb_vec_info (bb_vinfo);
1948 return NULL;
1949 }
1950
69f11a13
IR
1951 /* Cost model: check if the vectorization is worthwhile. */
1952 if (flag_vect_cost_model
1953 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1954 {
1955 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1956 fprintf (vect_dump, "not vectorized: vectorization is not "
1957 "profitable.\n");
1958
1959 destroy_bb_vec_info (bb_vinfo);
1960 return NULL;
1961 }
1962
a70d6342 1963 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1964 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1965
1966 return bb_vinfo;
1967}
1968
1969
8e19f5a1
IR
1970bb_vec_info
1971vect_slp_analyze_bb (basic_block bb)
1972{
1973 bb_vec_info bb_vinfo;
1974 int insns = 0;
1975 gimple_stmt_iterator gsi;
1976 unsigned int vector_sizes;
1977
1978 if (vect_print_dump_info (REPORT_DETAILS))
1979 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1980
1981 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1982 {
1983 gimple stmt = gsi_stmt (gsi);
1984 if (!is_gimple_debug (stmt)
1985 && !gimple_nop_p (stmt)
1986 && gimple_code (stmt) != GIMPLE_LABEL)
1987 insns++;
1988 }
1989
1990 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1991 {
1992 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1993 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1994 "block.\n");
1995
1996 return NULL;
1997 }
1998
1999 /* Autodetect first vector size we try. */
2000 current_vector_size = 0;
2001 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2002
2003 while (1)
2004 {
2005 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2006 if (bb_vinfo)
2007 return bb_vinfo;
2008
2009 destroy_bb_vec_info (bb_vinfo);
2010
2011 vector_sizes &= ~current_vector_size;
2012 if (vector_sizes == 0
2013 || current_vector_size == 0)
2014 return NULL;
2015
2016 /* Try the next biggest vector size. */
2017 current_vector_size = 1 << floor_log2 (vector_sizes);
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "***** Re-trying analysis with "
2020 "vector size %d\n", current_vector_size);
2021 }
2022}
2023
2024
b8698a0f 2025/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
2026 the number of created vector stmts depends on the unrolling factor).
2027 However, the actual number of vector stmts for every SLP node depends on
2028 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2029 should be updated. In this function we assume that the inside costs
2030 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
2031
2032void
2033vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2034{
2035 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2036 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2037 slp_instance instance;
2038
2039 if (vect_print_dump_info (REPORT_SLP))
2040 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2041
ac47786e 2042 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a 2043 /* We assume that costs are linear in ncopies. */
b8698a0f
L
2044 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2045 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
2046}
2047
a70d6342 2048
b8698a0f
L
2049/* For constant and loop invariant defs of SLP_NODE this function returns
2050 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2051 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2052 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2053 REDUC_INDEX is the index of the reduction operand in the statements, unless
2054 it is -1. */
ebfd146a
IR
2055
2056static void
9dc3f7de
IR
2057vect_get_constant_vectors (tree op, slp_tree slp_node,
2058 VEC (tree, heap) **vec_oprnds,
b5aeb3bb
IR
2059 unsigned int op_num, unsigned int number_of_vectors,
2060 int reduc_index)
ebfd146a
IR
2061{
2062 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2063 gimple stmt = VEC_index (gimple, stmts, 0);
2064 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
2065 int nunits;
2066 tree vec_cst;
2067 tree t = NULL_TREE;
2068 int j, number_of_places_left_in_vector;
2069 tree vector_type;
9dc3f7de 2070 tree vop;
ebfd146a
IR
2071 int group_size = VEC_length (gimple, stmts);
2072 unsigned int vec_num, i;
2073 int number_of_copies = 1;
2074 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2075 bool constant_p, is_store;
b5aeb3bb 2076 tree neutral_op = NULL;
d59dc888 2077 enum tree_code code = gimple_assign_rhs_code (stmt);
0e93a64e
IR
2078 gimple def_stmt;
2079 struct loop *loop;
b5aeb3bb 2080
29ed4920
IR
2081 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2082 && reduc_index != -1)
b5aeb3bb 2083 {
b5aeb3bb 2084 op_num = reduc_index - 1;
9dc3f7de 2085 op = gimple_op (stmt, reduc_index);
b5aeb3bb 2086 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2087 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2088 get_initial_def_for_reduction() for details. */
2089 switch (code)
2090 {
2091 case WIDEN_SUM_EXPR:
2092 case DOT_PROD_EXPR:
2093 case PLUS_EXPR:
2094 case MINUS_EXPR:
2095 case BIT_IOR_EXPR:
2096 case BIT_XOR_EXPR:
2097 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2098 neutral_op = build_real (TREE_TYPE (op), dconst0);
2099 else
2100 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2101
2102 break;
2103
2104 case MULT_EXPR:
b5aeb3bb
IR
2105 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2106 neutral_op = build_real (TREE_TYPE (op), dconst1);
2107 else
2108 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2109
2110 break;
2111
c1e822d5
IR
2112 case BIT_AND_EXPR:
2113 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2114 break;
2115
0e93a64e
IR
2116 case MAX_EXPR:
2117 case MIN_EXPR:
2118 def_stmt = SSA_NAME_DEF_STMT (op);
2119 loop = (gimple_bb (stmt))->loop_father;
2120 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2121 loop_preheader_edge (loop));
2122 break;
2123
b5aeb3bb 2124 default:
0e93a64e 2125 neutral_op = NULL;
b5aeb3bb
IR
2126 }
2127 }
ebfd146a
IR
2128
2129 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2130 {
2131 is_store = true;
2132 op = gimple_assign_rhs1 (stmt);
2133 }
2134 else
9dc3f7de
IR
2135 is_store = false;
2136
2137 gcc_assert (op);
ebfd146a
IR
2138
2139 if (CONSTANT_CLASS_P (op))
d59dc888 2140 constant_p = true;
ebfd146a 2141 else
d59dc888
IR
2142 constant_p = false;
2143
9dc3f7de 2144 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 2145 gcc_assert (vector_type);
ebfd146a
IR
2146 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2147
2148 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2149 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2150
2151 For example, we have two scalar operands, s1 and s2 (e.g., group of
2152 strided accesses of size two), while NUNITS is four (i.e., four scalars
2153 of this type can be packed in a vector). The output vector will contain
2154 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2155 will be 2).
2156
b8698a0f 2157 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2158 containing the operands.
2159
2160 For example, NUNITS is four as before, and the group size is 8
2161 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2162 {s5, s6, s7, s8}. */
b8698a0f 2163
ebfd146a
IR
2164 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2165
2166 number_of_places_left_in_vector = nunits;
2167 for (j = 0; j < number_of_copies; j++)
2168 {
2169 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2170 {
2171 if (is_store)
2172 op = gimple_assign_rhs1 (stmt);
2173 else
2174 op = gimple_op (stmt, op_num + 1);
b8698a0f 2175
b5aeb3bb
IR
2176 if (reduc_index != -1)
2177 {
0e93a64e
IR
2178 loop = (gimple_bb (stmt))->loop_father;
2179 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2180
2181 gcc_assert (loop);
b010117a
IR
2182
2183 /* Get the def before the loop. In reduction chain we have only
2184 one initial value. */
2185 if ((j != (number_of_copies - 1)
2186 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2187 && i != 0))
2188 && neutral_op)
b5aeb3bb 2189 op = neutral_op;
b010117a
IR
2190 else
2191 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2192 loop_preheader_edge (loop));
b5aeb3bb
IR
2193 }
2194
ebfd146a
IR
2195 /* Create 'vect_ = {op0,op1,...,opn}'. */
2196 t = tree_cons (NULL_TREE, op, t);
2197
2198 number_of_places_left_in_vector--;
2199
2200 if (number_of_places_left_in_vector == 0)
2201 {
2202 number_of_places_left_in_vector = nunits;
2203
2204 if (constant_p)
2205 vec_cst = build_vector (vector_type, t);
2206 else
2207 vec_cst = build_constructor_from_list (vector_type, t);
2208 VEC_quick_push (tree, voprnds,
2209 vect_init_vector (stmt, vec_cst, vector_type, NULL));
2210 t = NULL_TREE;
2211 }
2212 }
2213 }
2214
b8698a0f 2215 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
2216 them. */
2217 vec_num = VEC_length (tree, voprnds);
2218 for (j = vec_num - 1; j >= 0; j--)
2219 {
2220 vop = VEC_index (tree, voprnds, j);
2221 VEC_quick_push (tree, *vec_oprnds, vop);
2222 }
2223
2224 VEC_free (tree, heap, voprnds);
2225
2226 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2227 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2228 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
2229 to replicate the vectors. */
2230 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2231 {
b5aeb3bb
IR
2232 tree neutral_vec = NULL;
2233
2234 if (neutral_op)
2235 {
2236 if (!neutral_vec)
b9acc9f1 2237 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb
IR
2238
2239 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2240 }
2241 else
2242 {
2243 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2244 VEC_quick_push (tree, *vec_oprnds, vop);
2245 }
ebfd146a
IR
2246 }
2247}
2248
2249
2250/* Get vectorized definitions from SLP_NODE that contains corresponding
2251 vectorized def-stmts. */
2252
2253static void
2254vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2255{
2256 tree vec_oprnd;
2257 gimple vec_def_stmt;
2258 unsigned int i;
2259
2260 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2261
ac47786e 2262 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2263 {
2264 gcc_assert (vec_def_stmt);
2265 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2266 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2267 }
2268}
2269
2270
b8698a0f
L
2271/* Get vectorized definitions for SLP_NODE.
2272 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2273 call vect_get_constant_vectors() to create vector stmts.
2274 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2275 must be stored in the corresponding child of SLP_NODE, and we call
2276 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2277
ebfd146a 2278void
d092494c
IR
2279vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2280 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
ebfd146a 2281{
d092494c
IR
2282 gimple first_stmt, first_def;
2283 int number_of_vects = 0, i;
2284 unsigned int child_index = 0;
b8698a0f 2285 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c
IR
2286 slp_tree child = NULL;
2287 VEC (tree, heap) *vec_defs;
2288 tree oprnd, def_lhs;
2289 bool vectorized_defs;
ebfd146a
IR
2290
2291 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
d092494c 2292 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
ebfd146a 2293 {
d092494c
IR
2294 /* For each operand we check if it has vectorized definitions in a child
2295 node or we need to create them (for invariants and constants). We
2296 check if the LHS of the first stmt of the next child matches OPRND.
2297 If it does, we found the correct child. Otherwise, we call
2298 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2299 to check this child node for the next operand. */
2300 vectorized_defs = false;
2301 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
ebfd146a 2302 {
d092494c
IR
2303 child = (slp_tree) VEC_index (slp_void_p,
2304 SLP_TREE_CHILDREN (slp_node),
2305 child_index);
2306 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2307
2308 /* In the end of a pattern sequence we have a use of the original stmt,
2309 so we need to compare OPRND with the original def. */
2310 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2311 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2312 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2313 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2314
2315 if (is_gimple_call (first_def))
2316 def_lhs = gimple_call_lhs (first_def);
2317 else
2318 def_lhs = gimple_assign_lhs (first_def);
ebfd146a 2319
d092494c
IR
2320 if (operand_equal_p (oprnd, def_lhs, 0))
2321 {
2322 /* The number of vector defs is determined by the number of
2323 vector statements in the node from which we get those
2324 statements. */
2325 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2326 vectorized_defs = true;
2327 child_index++;
2328 }
2329 }
ebfd146a 2330
d092494c
IR
2331 if (!vectorized_defs)
2332 {
2333 if (i == 0)
2334 {
2335 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2336 /* Number of vector stmts was calculated according to LHS in
2337 vect_schedule_slp_instance (), fix it by replacing LHS with
2338 RHS, if necessary. See vect_get_smallest_scalar_type () for
2339 details. */
2340 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2341 &rhs_size_unit);
2342 if (rhs_size_unit != lhs_size_unit)
2343 {
2344 number_of_vects *= rhs_size_unit;
2345 number_of_vects /= lhs_size_unit;
2346 }
2347 }
2348 }
b5aeb3bb 2349
d092494c
IR
2350 /* Allocate memory for vectorized defs. */
2351 vec_defs = VEC_alloc (tree, heap, number_of_vects);
ebfd146a 2352
d092494c
IR
2353 /* For reduction defs we call vect_get_constant_vectors (), since we are
2354 looking for initial loop invariant values. */
2355 if (vectorized_defs && reduc_index == -1)
2356 /* The defs are already vectorized. */
2357 vect_get_slp_vect_defs (child, &vec_defs);
2358 else
2359 /* Build vectors from scalar defs. */
2360 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2361 number_of_vects, reduc_index);
ebfd146a 2362
d092494c 2363 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
ebfd146a 2364
d092494c
IR
2365 /* For reductions, we only need initial values. */
2366 if (reduc_index != -1)
2367 return;
2368 }
ebfd146a
IR
2369}
2370
a70d6342 2371
b8698a0f 2372/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2373 building a vector of type MASK_TYPE from it) and two input vectors placed in
2374 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2375 shifting by STRIDE elements of DR_CHAIN for every copy.
2376 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2377 copies).
ebfd146a
IR
2378 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2379 the created stmts must be inserted. */
2380
2381static inline void
b8698a0f 2382vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2383 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 2384 gimple_stmt_iterator *gsi, slp_tree node,
2635892a 2385 tree vectype, VEC(tree,heap) *dr_chain,
ebfd146a
IR
2386 int ncopies, int vect_stmts_counter)
2387{
faf63e39 2388 tree perm_dest;
ebfd146a
IR
2389 gimple perm_stmt = NULL;
2390 stmt_vec_info next_stmt_info;
0f900dfa 2391 int i, stride;
ebfd146a 2392 tree first_vec, second_vec, data_ref;
ebfd146a 2393
ebfd146a 2394 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2395
b8698a0f 2396 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2397 stmts later. */
b8698a0f 2398 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
2399 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2400 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2401
2402 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2403 for (i = 0; i < ncopies; i++)
2404 {
2405 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2406 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2407
ebfd146a 2408 /* Generate the permute statement. */
2635892a
RH
2409 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2410 first_vec, second_vec, mask);
ebfd146a 2411 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 2412 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 2413 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2414
b8698a0f
L
2415 /* Store the vector statement in NODE. */
2416 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
2417 stride * i + vect_stmts_counter, perm_stmt);
2418
2419 first_vec_indx += stride;
2420 second_vec_indx += stride;
2421 }
2422
2423 /* Mark the scalar stmt as vectorized. */
2424 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2425 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2426}
2427
2428
b8698a0f 2429/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2430 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2431 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2432 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2433 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2434
ebfd146a 2435static bool
b8698a0f 2436vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2437 int mask_nunits, bool only_one_vec, int index,
22e4dee7 2438 unsigned char *mask, int *current_mask_element,
694a4f61
IR
2439 bool *need_next_vector, int *number_of_mask_fixes,
2440 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2441{
2442 int i;
ebfd146a
IR
2443
2444 /* Convert to target specific representation. */
2445 *current_mask_element = first_mask_element + m;
2446 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2447 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2448
2449 if (*current_mask_element < mask_nunits)
694a4f61 2450 *needs_first_vector = true;
ebfd146a
IR
2451
2452 /* We have only one input vector to permute but the mask accesses values in
2453 the next vector as well. */
2454 if (only_one_vec && *current_mask_element >= mask_nunits)
2455 {
2456 if (vect_print_dump_info (REPORT_DETAILS))
2457 {
2458 fprintf (vect_dump, "permutation requires at least two vectors ");
2459 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2460 }
2461
2462 return false;
2463 }
2464
2465 /* The mask requires the next vector. */
2466 if (*current_mask_element >= mask_nunits * 2)
2467 {
694a4f61 2468 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2469 {
2470 /* We either need the first vector too or have already moved to the
b8698a0f 2471 next vector. In both cases, this permutation needs three
ebfd146a
IR
2472 vectors. */
2473 if (vect_print_dump_info (REPORT_DETAILS))
2474 {
2475 fprintf (vect_dump, "permutation requires at "
2476 "least three vectors ");
2477 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2478 }
2479
2480 return false;
2481 }
2482
2483 /* We move to the next vector, dropping the first one and working with
2484 the second and the third - we need to adjust the values of the mask
2485 accordingly. */
694a4f61 2486 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2487
2488 for (i = 0; i < index; i++)
694a4f61 2489 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2490
694a4f61
IR
2491 (*number_of_mask_fixes)++;
2492 *mask_fixed = true;
ebfd146a
IR
2493 }
2494
694a4f61 2495 *need_next_vector = *mask_fixed;
ebfd146a
IR
2496
2497 /* This was the last element of this mask. Start a new one. */
2498 if (index == mask_nunits - 1)
2499 {
694a4f61
IR
2500 *number_of_mask_fixes = 1;
2501 *mask_fixed = false;
2502 *needs_first_vector = false;
ebfd146a
IR
2503 }
2504
2505 return true;
2506}
2507
2508
2509/* Generate vector permute statements from a list of loads in DR_CHAIN.
2510 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2511 permute statements for SLP_NODE_INSTANCE. */
2512bool
2513vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2514 gimple_stmt_iterator *gsi, int vf,
2515 slp_instance slp_node_instance, bool analyze_only)
2516{
2517 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2518 tree mask_element_type = NULL_TREE, mask_type;
2635892a 2519 int i, j, k, nunits, vec_index = 0, scalar_index;
ebfd146a 2520 slp_tree node;
2635892a 2521 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2522 gimple next_scalar_stmt;
2523 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2524 int first_mask_element;
22e4dee7
RH
2525 int index, unroll_factor, current_mask_element, ncopies;
2526 unsigned char *mask;
ebfd146a
IR
2527 bool only_one_vec = false, need_next_vector = false;
2528 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2529 int number_of_mask_fixes = 1;
2530 bool mask_fixed = false;
2531 bool needs_first_vector = false;
22e4dee7 2532 enum machine_mode mode;
ebfd146a 2533
22e4dee7
RH
2534 mode = TYPE_MODE (vectype);
2535
2536 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a
IR
2537 {
2538 if (vect_print_dump_info (REPORT_DETAILS))
2539 {
2635892a 2540 fprintf (vect_dump, "no vect permute for ");
ebfd146a
IR
2541 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2542 }
2635892a 2543 return false;
ebfd146a
IR
2544 }
2545
2635892a
RH
2546 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2547 same size as the vector element being permuted. */
2548 mask_element_type
2549 = lang_hooks.types.type_for_size
2550 (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (vectype))), 1);
ebfd146a 2551 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 2552 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 2553 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
2554 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2555
2556 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2557 unrolling factor. */
b8698a0f 2558 orig_vec_stmts_num = group_size *
ebfd146a
IR
2559 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2560 if (orig_vec_stmts_num == 1)
2561 only_one_vec = true;
2562
b8698a0f 2563 /* Number of copies is determined by the final vectorization factor
ebfd146a 2564 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2565 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2566
b8698a0f
L
2567 /* Generate permutation masks for every NODE. Number of masks for each NODE
2568 is equal to GROUP_SIZE.
2569 E.g., we have a group of three nodes with three loads from the same
2570 location in each node, and the vector size is 4. I.e., we have a
2571 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2572 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2573 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2574 ...
2575
2635892a 2576 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 2577 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2578 operation, and the mask element values can't be outside that range.
2579 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2580 For the first two permutations we need the first and the second input
ebfd146a 2581 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2582 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2583 {c2,a3,b3,c3}. */
2584
ac47786e 2585 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2586 {
2587 scalar_index = 0;
2588 index = 0;
2589 vect_stmts_counter = 0;
2590 vec_index = 0;
2591 first_vec_index = vec_index++;
2592 if (only_one_vec)
2593 second_vec_index = first_vec_index;
2594 else
2595 second_vec_index = vec_index++;
2596
2597 for (j = 0; j < unroll_factor; j++)
2598 {
2599 for (k = 0; k < group_size; k++)
2600 {
2635892a
RH
2601 first_mask_element = i + j * group_size;
2602 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2603 nunits, only_one_vec, index,
2604 mask, &current_mask_element,
2605 &need_next_vector,
2606 &number_of_mask_fixes, &mask_fixed,
2607 &needs_first_vector))
2608 return false;
2609 mask[index++] = current_mask_element;
ebfd146a 2610
2635892a 2611 if (index == nunits)
ebfd146a 2612 {
faf63e39
RH
2613 tree mask_vec = NULL;
2614
22e4dee7
RH
2615 if (!can_vec_perm_p (mode, false, mask))
2616 {
2617 if (vect_print_dump_info (REPORT_DETAILS))
2618 {
2619 fprintf (vect_dump, "unsupported vect permute { ");
2620 for (i = 0; i < nunits; ++i)
2621 fprintf (vect_dump, "%d ", mask[i]);
2622 fprintf (vect_dump, "}\n");
2623 }
2624 return false;
2625 }
2626
faf63e39
RH
2627 while (--index >= 0)
2628 {
2629 tree t = build_int_cst (mask_element_type, mask[index]);
2630 mask_vec = tree_cons (NULL, t, mask_vec);
2631 }
2632 mask_vec = build_vector (mask_type, mask_vec);
2633 index = 0;
2634
ebfd146a
IR
2635 if (!analyze_only)
2636 {
2637 if (need_next_vector)
2638 {
2639 first_vec_index = second_vec_index;
2640 second_vec_index = vec_index;
2641 }
2642
2643 next_scalar_stmt = VEC_index (gimple,
2644 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2645
2646 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39 2647 mask_vec, first_vec_index, second_vec_index,
2635892a 2648 gsi, node, vectype, dr_chain,
faf63e39 2649 ncopies, vect_stmts_counter++);
ebfd146a 2650 }
b8698a0f
L
2651 }
2652 }
2653 }
2654 }
ebfd146a 2655
ebfd146a
IR
2656 return true;
2657}
2658
2659
2660
2661/* Vectorize SLP instance tree in postorder. */
2662
2663static bool
2664vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2665 unsigned int vectorization_factor)
ebfd146a
IR
2666{
2667 gimple stmt;
2668 bool strided_store, is_store;
2669 gimple_stmt_iterator si;
2670 stmt_vec_info stmt_info;
2671 unsigned int vec_stmts_size, nunits, group_size;
2672 tree vectype;
2673 int i;
2674 slp_tree loads_node;
d092494c 2675 slp_void_p child;
ebfd146a
IR
2676
2677 if (!node)
2678 return false;
2679
d092494c
IR
2680 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2681 vect_schedule_slp_instance ((slp_tree) child, instance,
2682 vectorization_factor);
b8698a0f 2683
ebfd146a
IR
2684 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2685 stmt_info = vinfo_for_stmt (stmt);
2686
2687 /* VECTYPE is the type of the destination. */
b690cc0f 2688 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2689 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2690 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2691
2692 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 2693 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
2694 elements in one vector iteration is the number of scalar elements in
2695 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2696 size. */
2697 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2698
2699 /* In case of load permutation we have to allocate vectorized statements for
2700 all the nodes that participate in that permutation. */
2701 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2702 {
ac47786e 2703 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a
IR
2704 {
2705 if (!SLP_TREE_VEC_STMTS (loads_node))
2706 {
2707 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2708 vec_stmts_size);
2709 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2710 }
2711 }
2712 }
2713
2714 if (!SLP_TREE_VEC_STMTS (node))
2715 {
2716 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2717 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2718 }
2719
2720 if (vect_print_dump_info (REPORT_DETAILS))
2721 {
2722 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2723 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2724 }
ebfd146a
IR
2725
2726 /* Loads should be inserted before the first load. */
2727 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2728 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
6aa904c4
IR
2729 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2730 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
ebfd146a 2731 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
9d5e7640 2732 else if (is_pattern_stmt_p (stmt_info))
6aa904c4 2733 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
ebfd146a
IR
2734 else
2735 si = gsi_for_stmt (stmt);
b8698a0f 2736
e4a707c4
IR
2737 /* Stores should be inserted just before the last store. */
2738 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2739 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2740 {
2741 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2742 si = gsi_for_stmt (last_store);
2743 }
2744
b010117a
IR
2745 /* Mark the first element of the reduction chain as reduction to properly
2746 transform the node. In the analysis phase only the last element of the
2747 chain is marked as reduction. */
2748 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
2749 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2750 {
2751 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2752 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2753 }
2754
ebfd146a 2755 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2756 return is_store;
ebfd146a
IR
2757}
2758
2759
ff802fa1
IR
2760/* Generate vector code for all SLP instances in the loop/basic block. */
2761
ebfd146a 2762bool
a70d6342 2763vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2764{
a70d6342 2765 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2766 slp_instance instance;
a70d6342 2767 unsigned int i, vf;
ebfd146a
IR
2768 bool is_store = false;
2769
a70d6342
IR
2770 if (loop_vinfo)
2771 {
2772 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2773 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2774 }
a70d6342
IR
2775 else
2776 {
2777 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2778 vf = 1;
b8698a0f 2779 }
a70d6342 2780
ac47786e 2781 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
2782 {
2783 /* Schedule the tree of INSTANCE. */
2784 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2785 instance, vf);
8644a673
IR
2786 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2787 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2788 fprintf (vect_dump, "vectorizing stmts using SLP.");
2789 }
2790
ac47786e 2791 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
b5aeb3bb
IR
2792 {
2793 slp_tree root = SLP_INSTANCE_TREE (instance);
2794 gimple store;
2795 unsigned int j;
2796 gimple_stmt_iterator gsi;
2797
2798 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2799 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2800 {
2801 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2802 break;
2803
2804 /* Free the attached stmt_vec_info and remove the stmt. */
2805 gsi = gsi_for_stmt (store);
2806 gsi_remove (&gsi, true);
2807 free_stmt_vec_info (store);
2808 }
2809 }
2810
ebfd146a
IR
2811 return is_store;
2812}
a70d6342
IR
2813
2814
2815/* Vectorize the basic block. */
2816
2817void
2818vect_slp_transform_bb (basic_block bb)
2819{
2820 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2821 gimple_stmt_iterator si;
2822
2823 gcc_assert (bb_vinfo);
2824
2825 if (vect_print_dump_info (REPORT_DETAILS))
2826 fprintf (vect_dump, "SLPing BB\n");
2827
2828 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2829 {
2830 gimple stmt = gsi_stmt (si);
2831 stmt_vec_info stmt_info;
2832
2833 if (vect_print_dump_info (REPORT_DETAILS))
2834 {
2835 fprintf (vect_dump, "------>SLPing statement: ");
2836 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2837 }
2838
2839 stmt_info = vinfo_for_stmt (stmt);
2840 gcc_assert (stmt_info);
2841
2842 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2843 if (STMT_SLP_TYPE (stmt_info))
2844 {
2845 vect_schedule_slp (NULL, bb_vinfo);
2846 break;
2847 }
2848 }
2849
2850 mark_sym_for_renaming (gimple_vop (cfun));
2851 /* The memory tags and pointers in vectorized statements need to
2852 have their SSA forms updated. FIXME, why can't this be delayed
2853 until all the loops have been transformed? */
2854 update_ssa (TODO_update_ssa);
2855
2856 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2857 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2858
12aaf609
IR
2859 destroy_bb_vec_info (bb_vinfo);
2860}
a70d6342 2861