]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
lto.h (lto_elf_file_open): Rename prototype from this ...
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
c75c517d
SB
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "cfglayout.h"
36#include "expr.h"
37#include "recog.h"
38#include "optabs.h"
39#include "tree-vectorizer.h"
40
a70d6342
IR
41/* Extract the location of the basic block in the source code.
42 Return the basic block location if succeed and NULL if not. */
43
44LOC
45find_bb_location (basic_block bb)
46{
47 gimple stmt = NULL;
48 gimple_stmt_iterator si;
49
50 if (!bb)
51 return UNKNOWN_LOC;
52
53 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
54 {
55 stmt = gsi_stmt (si);
56 if (gimple_location (stmt) != UNKNOWN_LOC)
57 return gimple_location (stmt);
58 }
59
60 return UNKNOWN_LOC;
61}
62
63
ebfd146a
IR
64/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
65
66static void
67vect_free_slp_tree (slp_tree node)
68{
69 if (!node)
70 return;
71
72 if (SLP_TREE_LEFT (node))
73 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 74
ebfd146a
IR
75 if (SLP_TREE_RIGHT (node))
76 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 77
ebfd146a 78 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 79
ebfd146a
IR
80 if (SLP_TREE_VEC_STMTS (node))
81 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
82
83 free (node);
84}
85
86
87/* Free the memory allocated for the SLP instance. */
88
89void
90vect_free_slp_instance (slp_instance instance)
91{
92 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
93 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
94 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
95}
96
97
98/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
99 they are of a legal type and that they match the defs of the first stmt of
100 the SLP group (stored in FIRST_STMT_...). */
101
102static bool
a70d6342 103vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 104 slp_tree slp_node, gimple stmt,
a70d6342 105 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
106 VEC (gimple, heap) **def_stmts1,
107 enum vect_def_type *first_stmt_dt0,
108 enum vect_def_type *first_stmt_dt1,
b8698a0f 109 tree *first_stmt_def0_type,
ebfd146a
IR
110 tree *first_stmt_def1_type,
111 tree *first_stmt_const_oprnd,
112 int ncopies_for_cost,
113 bool *pattern0, bool *pattern1)
114{
115 tree oprnd;
116 unsigned int i, number_of_oprnds;
117 tree def;
118 gimple def_stmt;
119 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 120 stmt_vec_info stmt_info =
ebfd146a
IR
121 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
122 enum gimple_rhs_class rhs_class;
a70d6342 123 struct loop *loop = NULL;
b8698a0f 124
a70d6342
IR
125 if (loop_vinfo)
126 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
127
128 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
129 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
130
131 for (i = 0; i < number_of_oprnds; i++)
132 {
133 oprnd = gimple_op (stmt, i + 1);
134
b8698a0f 135 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 136 &dt[i])
ebfd146a
IR
137 || (!def_stmt && dt[i] != vect_constant_def))
138 {
b8698a0f 139 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
140 {
141 fprintf (vect_dump, "Build SLP failed: can't find def for ");
142 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
143 }
144
145 return false;
146 }
147
a70d6342
IR
148 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
149 from the pattern. Check that all the stmts of the node are in the
ebfd146a 150 pattern. */
a70d6342 151 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
152 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
153 && vinfo_for_stmt (def_stmt)
154 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
155 {
156 if (!*first_stmt_dt0)
157 *pattern0 = true;
158 else
159 {
160 if (i == 1 && !*first_stmt_dt1)
161 *pattern1 = true;
162 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
163 {
164 if (vect_print_dump_info (REPORT_DETAILS))
165 {
166 fprintf (vect_dump, "Build SLP failed: some of the stmts"
167 " are in a pattern, and others are not ");
168 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
169 }
170
171 return false;
172 }
173 }
174
175 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
176 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
177
178 if (*dt == vect_unknown_def_type)
179 {
180 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "Unsupported pattern.");
182 return false;
183 }
184
185 switch (gimple_code (def_stmt))
186 {
187 case GIMPLE_PHI:
188 def = gimple_phi_result (def_stmt);
189 break;
190
191 case GIMPLE_ASSIGN:
192 def = gimple_assign_lhs (def_stmt);
193 break;
194
195 default:
196 if (vect_print_dump_info (REPORT_DETAILS))
197 fprintf (vect_dump, "unsupported defining stmt: ");
198 return false;
199 }
200 }
201
202 if (!*first_stmt_dt0)
203 {
204 /* op0 of the first stmt of the group - store its info. */
205 *first_stmt_dt0 = dt[i];
206 if (def)
207 *first_stmt_def0_type = TREE_TYPE (def);
208 else
209 *first_stmt_const_oprnd = oprnd;
210
211 /* Analyze costs (for the first stmt of the group only). */
212 if (rhs_class != GIMPLE_SINGLE_RHS)
213 /* Not memory operation (we don't call this functions for loads). */
214 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
215 else
216 /* Store. */
217 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
218 }
b8698a0f 219
ebfd146a
IR
220 else
221 {
222 if (!*first_stmt_dt1 && i == 1)
223 {
224 /* op1 of the first stmt of the group - store its info. */
225 *first_stmt_dt1 = dt[i];
226 if (def)
227 *first_stmt_def1_type = TREE_TYPE (def);
228 else
229 {
b8698a0f 230 /* We assume that the stmt contains only one constant
ebfd146a
IR
231 operand. We fail otherwise, to be on the safe side. */
232 if (*first_stmt_const_oprnd)
233 {
b8698a0f 234 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 235 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 236 "oprnds in stmt");
ebfd146a
IR
237 return false;
238 }
239 *first_stmt_const_oprnd = oprnd;
240 }
241 }
242 else
243 {
b8698a0f 244 /* Not first stmt of the group, check that the def-stmt/s match
ebfd146a 245 the def-stmt/s of the first stmt. */
b8698a0f 246 if ((i == 0
ebfd146a
IR
247 && (*first_stmt_dt0 != dt[i]
248 || (*first_stmt_def0_type && def
8533c9d8
SP
249 && !types_compatible_p (*first_stmt_def0_type,
250 TREE_TYPE (def)))))
b8698a0f 251 || (i == 1
ebfd146a
IR
252 && (*first_stmt_dt1 != dt[i]
253 || (*first_stmt_def1_type && def
8533c9d8
SP
254 && !types_compatible_p (*first_stmt_def1_type,
255 TREE_TYPE (def)))))
b8698a0f 256 || (!def
8533c9d8
SP
257 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
258 TREE_TYPE (oprnd))))
b8698a0f
L
259 {
260 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 261 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 262
ebfd146a
IR
263 return false;
264 }
265 }
266 }
267
268 /* Check the types of the definitions. */
269 switch (dt[i])
270 {
271 case vect_constant_def:
8644a673 272 case vect_external_def:
ebfd146a 273 break;
b8698a0f 274
8644a673 275 case vect_internal_def:
b5aeb3bb 276 case vect_reduction_def:
ebfd146a
IR
277 if (i == 0)
278 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
279 else
280 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
281 break;
282
283 default:
284 /* FORNOW: Not supported. */
b8698a0f 285 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
286 {
287 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
288 print_generic_expr (vect_dump, def, TDF_SLIM);
289 }
290
291 return false;
292 }
293 }
294
295 return true;
296}
297
298
299/* Recursively build an SLP tree starting from NODE.
b8698a0f
L
300 Fail (and return FALSE) if def-stmts are not isomorphic, require data
301 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
302 TRUE. */
303
304static bool
b8698a0f 305vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
306 slp_tree *node, unsigned int group_size,
307 int *inside_cost, int *outside_cost,
308 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 309 VEC (int, heap) **load_permutation,
a70d6342
IR
310 VEC (slp_tree, heap) **loads,
311 unsigned int vectorization_factor)
ebfd146a
IR
312{
313 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
314 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
315 unsigned int i;
316 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
317 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
318 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
319 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
320 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
ebfd146a
IR
321 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
322 tree lhs;
323 bool stop_recursion = false, need_same_oprnds = false;
324 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 325 unsigned int ncopies;
ebfd146a
IR
326 optab optab;
327 int icode;
328 enum machine_mode optab_op2_mode;
329 enum machine_mode vec_mode;
330 tree first_stmt_const_oprnd = NULL_TREE;
331 struct data_reference *first_dr;
332 bool pattern0 = false, pattern1 = false;
333 HOST_WIDE_INT dummy;
334 bool permutation = false;
335 unsigned int load_place;
b5aeb3bb 336 gimple first_load, prev_first_load = NULL;
ebfd146a
IR
337
338 /* For every stmt in NODE find its def stmt/s. */
339 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
340 {
b8698a0f 341 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
342 {
343 fprintf (vect_dump, "Build SLP for ");
344 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
345 }
346
347 lhs = gimple_get_lhs (stmt);
348 if (lhs == NULL_TREE)
349 {
b8698a0f 350 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
351 {
352 fprintf (vect_dump,
353 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
355 }
b8698a0f 356
ebfd146a
IR
357 return false;
358 }
359
b8698a0f 360 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
361 vectype = get_vectype_for_scalar_type (scalar_type);
362 if (!vectype)
363 {
364 if (vect_print_dump_info (REPORT_SLP))
365 {
366 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
367 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
368 }
369 return false;
370 }
b8698a0f 371
ebfd146a 372 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
373 if (ncopies != 1)
374 {
375 if (vect_print_dump_info (REPORT_SLP))
376 fprintf (vect_dump, "SLP with multiple types ");
ebfd146a 377
a70d6342
IR
378 /* FORNOW: multiple types are unsupported in BB SLP. */
379 if (bb_vinfo)
380 return false;
381 }
b8698a0f 382
ebfd146a
IR
383 /* In case of multiple types we need to detect the smallest type. */
384 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
385 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 386
ebfd146a
IR
387 if (is_gimple_call (stmt))
388 rhs_code = CALL_EXPR;
389 else
390 rhs_code = gimple_assign_rhs_code (stmt);
391
392 /* Check the operation. */
393 if (i == 0)
394 {
395 first_stmt_code = rhs_code;
396
b8698a0f 397 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
398 vector shift with scalar shift operand. */
399 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
400 || rhs_code == LROTATE_EXPR
401 || rhs_code == RROTATE_EXPR)
402 {
403 vec_mode = TYPE_MODE (vectype);
404
405 /* First see if we have a vector/vector shift. */
406 optab = optab_for_tree_code (rhs_code, vectype,
407 optab_vector);
408
409 if (!optab
410 || (optab->handlers[(int) vec_mode].insn_code
411 == CODE_FOR_nothing))
412 {
413 /* No vector/vector shift, try for a vector/scalar shift. */
414 optab = optab_for_tree_code (rhs_code, vectype,
415 optab_scalar);
416
417 if (!optab)
418 {
419 if (vect_print_dump_info (REPORT_SLP))
420 fprintf (vect_dump, "Build SLP failed: no optab.");
421 return false;
422 }
423 icode = (int) optab->handlers[(int) vec_mode].insn_code;
424 if (icode == CODE_FOR_nothing)
425 {
426 if (vect_print_dump_info (REPORT_SLP))
427 fprintf (vect_dump, "Build SLP failed: "
428 "op not supported by target.");
429 return false;
430 }
431 optab_op2_mode = insn_data[icode].operand[2].mode;
432 if (!VECTOR_MODE_P (optab_op2_mode))
433 {
434 need_same_oprnds = true;
435 first_op1 = gimple_assign_rhs2 (stmt);
436 }
437 }
438 }
439 }
440 else
441 {
442 if (first_stmt_code != rhs_code
443 && (first_stmt_code != IMAGPART_EXPR
444 || rhs_code != REALPART_EXPR)
445 && (first_stmt_code != REALPART_EXPR
446 || rhs_code != IMAGPART_EXPR))
447 {
b8698a0f 448 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 449 {
b8698a0f 450 fprintf (vect_dump,
ebfd146a
IR
451 "Build SLP failed: different operation in stmt ");
452 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
453 }
b8698a0f 454
ebfd146a
IR
455 return false;
456 }
b8698a0f
L
457
458 if (need_same_oprnds
ebfd146a
IR
459 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
460 {
b8698a0f 461 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 462 {
b8698a0f 463 fprintf (vect_dump,
ebfd146a
IR
464 "Build SLP failed: different shift arguments in ");
465 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
466 }
b8698a0f 467
ebfd146a
IR
468 return false;
469 }
470 }
471
472 /* Strided store or load. */
473 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
474 {
475 if (REFERENCE_CLASS_P (lhs))
476 {
477 /* Store. */
b8698a0f
L
478 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
479 stmt, &def_stmts0, &def_stmts1,
480 &first_stmt_dt0,
481 &first_stmt_dt1,
482 &first_stmt_def0_type,
ebfd146a
IR
483 &first_stmt_def1_type,
484 &first_stmt_const_oprnd,
485 ncopies_for_cost,
486 &pattern0, &pattern1))
487 return false;
488 }
b5aeb3bb
IR
489 else
490 {
491 /* Load. */
492 /* FORNOW: Check that there is no gap between the loads. */
493 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
494 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
495 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
496 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
497 {
498 if (vect_print_dump_info (REPORT_SLP))
499 {
500 fprintf (vect_dump, "Build SLP failed: strided "
501 "loads have gaps ");
502 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
503 }
b8698a0f 504
b5aeb3bb
IR
505 return false;
506 }
2f0fa28e 507
b5aeb3bb
IR
508 /* Check that the size of interleaved loads group is not
509 greater than the SLP group size. */
510 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
511 {
512 if (vect_print_dump_info (REPORT_SLP))
513 {
514 fprintf (vect_dump, "Build SLP failed: the number of "
515 "interleaved loads is greater than"
516 " the SLP group size ");
517 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
518 }
b8698a0f 519
b5aeb3bb
IR
520 return false;
521 }
522
523 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
524 if (prev_first_load)
525 {
526 /* Check that there are no loads from different interleaving
527 chains in the same node. The only exception is complex
528 numbers. */
529 if (prev_first_load != first_load
530 && rhs_code != REALPART_EXPR
531 && rhs_code != IMAGPART_EXPR)
532 {
533 if (vect_print_dump_info (REPORT_SLP))
534 {
535 fprintf (vect_dump, "Build SLP failed: different "
536 "interleaving chains in one node ");
537 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
538 }
539
540 return false;
541 }
542 }
543 else
544 prev_first_load = first_load;
b8698a0f 545
ebfd146a
IR
546 if (first_load == stmt)
547 {
548 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
549 if (vect_supportable_dr_alignment (first_dr)
550 == dr_unaligned_unsupported)
551 {
552 if (vect_print_dump_info (REPORT_SLP))
553 {
554 fprintf (vect_dump, "Build SLP failed: unsupported "
555 "unaligned load ");
556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
557 }
b8698a0f 558
ebfd146a
IR
559 return false;
560 }
b8698a0f 561
ebfd146a
IR
562 /* Analyze costs (for the first stmt in the group). */
563 vect_model_load_cost (vinfo_for_stmt (stmt),
564 ncopies_for_cost, *node);
565 }
b8698a0f 566
ebfd146a
IR
567 /* Store the place of this load in the interleaving chain. In
568 case that permutation is needed we later decide if a specific
569 permutation is supported. */
570 load_place = vect_get_place_in_interleaving_chain (stmt,
571 first_load);
572 if (load_place != i)
573 permutation = true;
b8698a0f 574
ebfd146a 575 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 576
ebfd146a
IR
577 /* We stop the tree when we reach a group of loads. */
578 stop_recursion = true;
579 continue;
580 }
581 } /* Strided access. */
582 else
583 {
584 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
585 {
586 /* Not strided load. */
b8698a0f 587 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
588 {
589 fprintf (vect_dump, "Build SLP failed: not strided load ");
590 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
591 }
592
593 /* FORNOW: Not strided loads are not supported. */
594 return false;
595 }
596
597 /* Not memory operation. */
598 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
599 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
600 {
b8698a0f 601 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
602 {
603 fprintf (vect_dump, "Build SLP failed: operation");
604 fprintf (vect_dump, " unsupported ");
605 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
606 }
607
608 return false;
609 }
610
b8698a0f 611 /* Find the def-stmts. */
a70d6342 612 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 613 &def_stmts0, &def_stmts1,
b8698a0f
L
614 &first_stmt_dt0, &first_stmt_dt1,
615 &first_stmt_def0_type,
ebfd146a
IR
616 &first_stmt_def1_type,
617 &first_stmt_const_oprnd,
618 ncopies_for_cost,
619 &pattern0, &pattern1))
620 return false;
621 }
622 }
623
624 /* Add the costs of the node to the overall instance costs. */
b8698a0f 625 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
626 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
627
628 /* Strided loads were reached - stop the recursion. */
629 if (stop_recursion)
630 {
631 if (permutation)
632 {
b8698a0f
L
633 VEC_safe_push (slp_tree, heap, *loads, *node);
634 *inside_cost += TARG_VEC_PERMUTE_COST * group_size;
ebfd146a
IR
635 }
636
637 return true;
638 }
639
b8698a0f 640 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 641 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
642 {
643 slp_tree left_node = XNEW (struct _slp_tree);
644 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
645 SLP_TREE_VEC_STMTS (left_node) = NULL;
646 SLP_TREE_LEFT (left_node) = NULL;
647 SLP_TREE_RIGHT (left_node) = NULL;
648 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
649 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
650 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
651 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
652 max_nunits, load_permutation, loads,
653 vectorization_factor))
ebfd146a 654 return false;
b8698a0f 655
ebfd146a
IR
656 SLP_TREE_LEFT (*node) = left_node;
657 }
658
8644a673 659 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
660 {
661 slp_tree right_node = XNEW (struct _slp_tree);
662 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
663 SLP_TREE_VEC_STMTS (right_node) = NULL;
664 SLP_TREE_LEFT (right_node) = NULL;
665 SLP_TREE_RIGHT (right_node) = NULL;
666 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
667 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 668 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 669 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
670 max_nunits, load_permutation, loads,
671 vectorization_factor))
ebfd146a 672 return false;
b8698a0f 673
ebfd146a
IR
674 SLP_TREE_RIGHT (*node) = right_node;
675 }
676
677 return true;
678}
679
680
681static void
682vect_print_slp_tree (slp_tree node)
683{
684 int i;
685 gimple stmt;
686
687 if (!node)
688 return;
689
690 fprintf (vect_dump, "node ");
691 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
692 {
693 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 694 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
695 }
696 fprintf (vect_dump, "\n");
697
698 vect_print_slp_tree (SLP_TREE_LEFT (node));
699 vect_print_slp_tree (SLP_TREE_RIGHT (node));
700}
701
702
b8698a0f
L
703/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
704 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
705 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
706 stmts in NODE are to be marked. */
707
708static void
709vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
710{
711 int i;
712 gimple stmt;
713
714 if (!node)
715 return;
716
717 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
718 if (j < 0 || i == j)
719 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
720
721 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
722 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
723}
724
725
a70d6342
IR
726/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
727
728static void
729vect_mark_slp_stmts_relevant (slp_tree node)
730{
731 int i;
732 gimple stmt;
733 stmt_vec_info stmt_info;
734
735 if (!node)
736 return;
737
738 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
739 {
740 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 741 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
742 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
743 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
744 }
745
746 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
747 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
748}
749
750
b8698a0f 751/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
752 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
753
754static bool
755vect_supported_slp_permutation_p (slp_instance instance)
756{
757 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
758 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
759 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
760 VEC (slp_tree, heap) *sorted_loads = NULL;
761 int index;
762 slp_tree *tmp_loads = NULL;
b8698a0f 763 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 764 slp_tree load;
b8698a0f
L
765
766 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 767 location in all the loads in the node, when the data-refs in
b8698a0f 768 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
769 Sort the nodes according to the order of accesses in the chain. */
770 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
771 for (i = 0, j = 0;
772 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
773 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
774 i += group_size, j++)
775 {
776 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
777 /* Check that the loads are all in the same interleaving chain. */
778 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
779 {
780 if (vect_print_dump_info (REPORT_DETAILS))
781 {
782 fprintf (vect_dump, "Build SLP failed: unsupported data "
783 "permutation ");
784 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
785 }
b8698a0f 786
ebfd146a 787 free (tmp_loads);
b8698a0f 788 return false;
ebfd146a
IR
789 }
790
791 tmp_loads[index] = load;
792 }
b8698a0f 793
ebfd146a
IR
794 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
795 for (i = 0; i < group_size; i++)
796 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
797
798 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
799 SLP_INSTANCE_LOADS (instance) = sorted_loads;
800 free (tmp_loads);
801
802 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
803 SLP_INSTANCE_UNROLLING_FACTOR (instance),
804 instance, true))
805 return false;
806
807 return true;
808}
809
810
b5aeb3bb
IR
811/* Rearrange the statements of NODE according to PERMUTATION. */
812
813static void
814vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
815 VEC (int, heap) *permutation)
816{
817 gimple stmt;
818 VEC (gimple, heap) *tmp_stmts;
819 unsigned int index, i;
820
821 if (!node)
822 return;
823
824 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
825 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
826
827 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
828 tmp_stmts = VEC_alloc (gimple, heap, group_size);
829
830 for (i = 0; i < group_size; i++)
831 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
832
833 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
834 {
835 index = VEC_index (int, permutation, i);
836 VEC_replace (gimple, tmp_stmts, index, stmt);
837 }
838
839 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
840 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
841}
842
843
ebfd146a
IR
844/* Check if the required load permutation is supported.
845 LOAD_PERMUTATION contains a list of indices of the loads.
846 In SLP this permutation is relative to the order of strided stores that are
847 the base of the SLP instance. */
848
849static bool
850vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
851 VEC (int, heap) *load_permutation)
852{
b5aeb3bb
IR
853 int i = 0, j, prev = -1, next, k, number_of_groups;
854 bool supported, bad_permutation = false;
7417f6c0 855 sbitmap load_index;
b5aeb3bb
IR
856 slp_tree node;
857 gimple stmt;
ebfd146a 858
a70d6342 859 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
860 if (!slp_instn)
861 return false;
862
863 if (vect_print_dump_info (REPORT_SLP))
864 {
865 fprintf (vect_dump, "Load permutation ");
866 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
867 fprintf (vect_dump, "%d ", next);
868 }
869
b5aeb3bb
IR
870 /* In case of reduction every load permutation is allowed, since the order
871 of the reduction statements is not important (as opposed to the case of
872 strided stores). The only condition we need to check is that all the
873 load nodes are of the same size and have the same permutation (and then
874 rearrange all the nodes of the SLP instance according to this
875 permutation). */
876
877 /* Check that all the load nodes are of the same size. */
878 for (i = 0;
879 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
880 i++)
881 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
882 != (unsigned) group_size)
883 return false;
884
885 node = SLP_INSTANCE_TREE (slp_instn);
886 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
887 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
888 instance, not all the loads belong to the same node or interleaving
889 group. Hence, we need to divide them into groups according to
890 GROUP_SIZE. */
891 number_of_groups = VEC_length (int, load_permutation) / group_size;
892
893 /* Reduction (there are no data-refs in the root). */
894 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
895 {
896 int first_group_load_index;
897
898 /* Compare all the permutation sequences to the first one. */
899 for (i = 1; i < number_of_groups; i++)
900 {
901 k = 0;
902 for (j = i * group_size; j < i * group_size + group_size; j++)
903 {
904 next = VEC_index (int, load_permutation, j);
905 first_group_load_index = VEC_index (int, load_permutation, k);
906
907 if (next != first_group_load_index)
908 {
909 bad_permutation = true;
910 break;
911 }
912
913 k++;
914 }
915
916 if (bad_permutation)
917 break;
918 }
919
920 if (!bad_permutation)
921 {
922 /* This permutaion is valid for reduction. Since the order of the
923 statements in the nodes is not important unless they are memory
924 accesses, we can rearrange the statements in all the nodes
925 according to the order of the loads. */
926 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
927 load_permutation);
928 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
929 return true;
930 }
931 }
932
b8698a0f
L
933 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
934 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 935 well (unless it's reduction). */
ebfd146a
IR
936 if (VEC_length (int, load_permutation)
937 != (unsigned int) (group_size * group_size))
938 return false;
939
940 supported = true;
7417f6c0
IR
941 load_index = sbitmap_alloc (group_size);
942 sbitmap_zero (load_index);
ebfd146a
IR
943 for (j = 0; j < group_size; j++)
944 {
945 for (i = j * group_size, k = 0;
946 VEC_iterate (int, load_permutation, i, next) && k < group_size;
947 i++, k++)
948 {
949 if (i != j * group_size && next != prev)
950 {
951 supported = false;
952 break;
953 }
954
955 prev = next;
b8698a0f 956 }
7417f6c0
IR
957
958 if (TEST_BIT (load_index, prev))
959 {
960 supported = false;
961 break;
962 }
963
964 SET_BIT (load_index, prev);
ebfd146a 965 }
59eefaa6
IR
966
967 for (j = 0; j < group_size; j++)
968 if (!TEST_BIT (load_index, j))
969 return false;
970
7417f6c0 971 sbitmap_free (load_index);
ebfd146a
IR
972
973 if (supported && i == group_size * group_size
974 && vect_supported_slp_permutation_p (slp_instn))
975 return true;
976
b8698a0f 977 return false;
ebfd146a
IR
978}
979
980
b8698a0f 981/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 982 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 983 load does not appear in the first SLP node to be transformed, causing
ebfd146a
IR
984 incorrect order of statements. Since we generate all the loads together,
985 they must be inserted before the first load of the SLP instance and not
986 before the first load of the first node of the instance. */
b8698a0f
L
987static gimple
988vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
989{
990 int i, j;
991 slp_tree load_node;
992 gimple first_load = NULL, load;
993
b8698a0f
L
994 for (i = 0;
995 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
ebfd146a 996 i++)
b8698a0f 997 for (j = 0;
ebfd146a
IR
998 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
999 j++)
1000 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1001
ebfd146a
IR
1002 return first_load;
1003}
1004
1005
1006/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1007 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1008 Return FALSE if it's impossible to SLP any stmt in the loop. */
1009
1010static bool
a70d6342
IR
1011vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1012 gimple stmt)
ebfd146a
IR
1013{
1014 slp_instance new_instance;
1015 slp_tree node = XNEW (struct _slp_tree);
1016 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1017 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1018 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1019 gimple next;
0f900dfa 1020 unsigned int vectorization_factor = 0;
b5aeb3bb 1021 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1022 unsigned int max_nunits = 0;
1023 VEC (int, heap) *load_permutation;
1024 VEC (slp_tree, heap) *loads;
b5aeb3bb
IR
1025 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1026
1027 if (dr)
1028 {
1029 scalar_type = TREE_TYPE (DR_REF (dr));
1030 vectype = get_vectype_for_scalar_type (scalar_type);
1031 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1032 }
1033 else
1034 {
1035 gcc_assert (loop_vinfo);
1036 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1037 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1038 }
b8698a0f 1039
ebfd146a
IR
1040 if (!vectype)
1041 {
1042 if (vect_print_dump_info (REPORT_SLP))
1043 {
1044 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1045 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1046 }
b5aeb3bb 1047
ebfd146a
IR
1048 return false;
1049 }
1050
1051 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1052 if (loop_vinfo)
1053 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1054 else
1055 /* No multitypes in BB SLP. */
1056 vectorization_factor = nunits;
1057
a70d6342
IR
1058 /* Calculate the unrolling factor. */
1059 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1060 if (unrolling_factor != 1 && !loop_vinfo)
1061 {
1062 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1063 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1064 " block SLP");
b8698a0f 1065
a70d6342
IR
1066 return false;
1067 }
1068
b8698a0f 1069 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
1070 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1071 next = stmt;
b5aeb3bb 1072 if (dr)
ebfd146a 1073 {
b5aeb3bb
IR
1074 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1075 while (next)
1076 {
1077 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1078 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1079 }
1080 }
1081 else
1082 {
1083 /* Collect reduction statements. */
1084 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1085 next);
1086 i++)
1087 {
1088 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1089 if (vect_print_dump_info (REPORT_DETAILS))
1090 {
1091 fprintf (vect_dump, "pushing reduction into node: ");
1092 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1093 }
1094 }
ebfd146a
IR
1095 }
1096
1097 SLP_TREE_VEC_STMTS (node) = NULL;
1098 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1099 SLP_TREE_LEFT (node) = NULL;
1100 SLP_TREE_RIGHT (node) = NULL;
1101 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1102 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1103
ebfd146a
IR
1104 /* Calculate the number of vector stmts to create based on the unrolling
1105 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1106 GROUP_SIZE / NUNITS otherwise. */
1107 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1108
1109 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1110 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1111
1112 /* Build the tree for the SLP instance. */
b8698a0f
L
1113 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1114 &inside_cost, &outside_cost, ncopies_for_cost,
1115 &max_nunits, &load_permutation, &loads,
a70d6342 1116 vectorization_factor))
ebfd146a 1117 {
b8698a0f 1118 /* Create a new SLP instance. */
ebfd146a
IR
1119 new_instance = XNEW (struct _slp_instance);
1120 SLP_INSTANCE_TREE (new_instance) = node;
1121 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1122 /* Calculate the unrolling factor based on the smallest type in the
1123 loop. */
1124 if (max_nunits > nunits)
1125 unrolling_factor = least_common_multiple (max_nunits, group_size)
1126 / group_size;
b8698a0f 1127
ebfd146a
IR
1128 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1129 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1130 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1131 SLP_INSTANCE_LOADS (new_instance) = loads;
1132 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1133 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1134 if (VEC_length (slp_tree, loads))
1135 {
1136 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1137 load_permutation))
ebfd146a
IR
1138 {
1139 if (vect_print_dump_info (REPORT_SLP))
1140 {
1141 fprintf (vect_dump, "Build SLP failed: unsupported load "
1142 "permutation ");
1143 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1144 }
1145
1146 vect_free_slp_instance (new_instance);
1147 return false;
1148 }
1149
1150 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1151 = vect_find_first_load_in_slp_instance (new_instance);
1152 }
1153 else
1154 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1155
a70d6342 1156 if (loop_vinfo)
b8698a0f
L
1157 VEC_safe_push (slp_instance, heap,
1158 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1159 new_instance);
1160 else
1161 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1162 new_instance);
b8698a0f 1163
ebfd146a
IR
1164 if (vect_print_dump_info (REPORT_SLP))
1165 vect_print_slp_tree (node);
1166
1167 return true;
1168 }
1169
1170 /* Failed to SLP. */
1171 /* Free the allocated memory. */
1172 vect_free_slp_tree (node);
1173 VEC_free (int, heap, load_permutation);
1174 VEC_free (slp_tree, heap, loads);
b8698a0f 1175
a70d6342 1176 return false;
ebfd146a
IR
1177}
1178
1179
1180/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1181 trees of packed scalar stmts if SLP is possible. */
1182
1183bool
a70d6342 1184vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1185{
1186 unsigned int i;
b5aeb3bb 1187 VEC (gimple, heap) *strided_stores, *reductions = NULL;
ebfd146a 1188 gimple store;
a70d6342 1189 bool ok = false;
ebfd146a
IR
1190
1191 if (vect_print_dump_info (REPORT_SLP))
1192 fprintf (vect_dump, "=== vect_analyze_slp ===");
1193
a70d6342 1194 if (loop_vinfo)
b5aeb3bb
IR
1195 {
1196 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1197 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1198 }
a70d6342
IR
1199 else
1200 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1201
b5aeb3bb 1202 /* Find SLP sequences starting from groups of strided stores. */
ebfd146a 1203 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
a70d6342
IR
1204 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1205 ok = true;
ebfd146a 1206
b8698a0f 1207 if (bb_vinfo && !ok)
a70d6342
IR
1208 {
1209 if (vect_print_dump_info (REPORT_SLP))
1210 fprintf (vect_dump, "Failed to SLP the basic block.");
1211
1212 return false;
1213 }
ebfd146a 1214
b5aeb3bb 1215 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1216 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1217 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1218 VEC_index (gimple, reductions, 0)))
1219 ok = true;
1220
ebfd146a
IR
1221 return true;
1222}
1223
1224
1225/* For each possible SLP instance decide whether to SLP it and calculate overall
1226 unrolling factor needed to SLP the loop. */
1227
1228void
1229vect_make_slp_decision (loop_vec_info loop_vinfo)
1230{
1231 unsigned int i, unrolling_factor = 1;
1232 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1233 slp_instance instance;
1234 int decided_to_slp = 0;
1235
1236 if (vect_print_dump_info (REPORT_SLP))
1237 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1238
1239 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1240 {
1241 /* FORNOW: SLP if you can. */
1242 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1243 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1244
b8698a0f
L
1245 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1246 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ebfd146a
IR
1247 loop-based vectorization. Such stmts will be marked as HYBRID. */
1248 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1249 decided_to_slp++;
1250 }
1251
1252 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1253
b8698a0f
L
1254 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1255 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a
IR
1256 decided_to_slp, unrolling_factor);
1257}
1258
1259
1260/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1261 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1262
1263static void
1264vect_detect_hybrid_slp_stmts (slp_tree node)
1265{
1266 int i;
1267 gimple stmt;
1268 imm_use_iterator imm_iter;
1269 gimple use_stmt;
99f51320 1270 stmt_vec_info stmt_vinfo;
ebfd146a
IR
1271
1272 if (!node)
1273 return;
1274
1275 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1276 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1277 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1278 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1279 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1280 && !STMT_SLP_TYPE (stmt_vinfo)
1281 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1282 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1283 && !(gimple_code (use_stmt) == GIMPLE_PHI
1284 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1285 == vect_reduction_def))
ebfd146a
IR
1286 vect_mark_slp_stmts (node, hybrid, i);
1287
1288 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1289 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1290}
1291
1292
1293/* Find stmts that must be both vectorized and SLPed. */
1294
1295void
1296vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1297{
1298 unsigned int i;
1299 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1300 slp_instance instance;
1301
1302 if (vect_print_dump_info (REPORT_SLP))
1303 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1304
1305 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1306 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1307}
1308
a70d6342
IR
1309
1310/* Create and initialize a new bb_vec_info struct for BB, as well as
1311 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1312
a70d6342
IR
1313static bb_vec_info
1314new_bb_vec_info (basic_block bb)
1315{
1316 bb_vec_info res = NULL;
1317 gimple_stmt_iterator gsi;
1318
1319 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1320 BB_VINFO_BB (res) = bb;
1321
1322 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1323 {
1324 gimple stmt = gsi_stmt (gsi);
1325 gimple_set_uid (stmt, 0);
1326 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1327 }
1328
1329 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1330 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1331
1332 bb->aux = res;
1333 return res;
1334}
1335
1336
1337/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1338 stmts in the basic block. */
1339
1340static void
1341destroy_bb_vec_info (bb_vec_info bb_vinfo)
1342{
1343 basic_block bb;
1344 gimple_stmt_iterator si;
1345
1346 if (!bb_vinfo)
1347 return;
1348
1349 bb = BB_VINFO_BB (bb_vinfo);
1350
1351 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1352 {
1353 gimple stmt = gsi_stmt (si);
1354 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1355
1356 if (stmt_info)
1357 /* Free stmt_vec_info. */
1358 free_stmt_vec_info (stmt);
1359 }
1360
1361 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1362 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1363 free (bb_vinfo);
1364 bb->aux = NULL;
1365}
1366
1367
1368/* Analyze statements contained in SLP tree node after recursively analyzing
1369 the subtree. Return TRUE if the operations are supported. */
1370
1371static bool
1372vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1373{
1374 bool dummy;
1375 int i;
1376 gimple stmt;
1377
1378 if (!node)
1379 return true;
1380
1381 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1382 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1383 return false;
1384
1385 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1386 {
1387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388 gcc_assert (stmt_info);
1389 gcc_assert (PURE_SLP_STMT (stmt_info));
1390
1391 if (!vect_analyze_stmt (stmt, &dummy, node))
1392 return false;
1393 }
1394
1395 return true;
1396}
1397
1398
1399/* Analyze statements in SLP instances of the basic block. Return TRUE if the
1400 operations are supported. */
1401
1402static bool
1403vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1404{
1405 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1406 slp_instance instance;
1407 int i;
1408
1409 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1410 {
b8698a0f 1411 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1412 SLP_INSTANCE_TREE (instance)))
1413 {
1414 vect_free_slp_instance (instance);
1415 VEC_ordered_remove (slp_instance, slp_instances, i);
1416 }
1417 else
1418 i++;
b8698a0f
L
1419 }
1420
a70d6342
IR
1421 if (!VEC_length (slp_instance, slp_instances))
1422 return false;
1423
1424 return true;
1425}
1426
1427
1428/* Cheick if the basic block can be vectorized. */
1429
1430bb_vec_info
1431vect_slp_analyze_bb (basic_block bb)
1432{
1433 bb_vec_info bb_vinfo;
1434 VEC (ddr_p, heap) *ddrs;
1435 VEC (slp_instance, heap) *slp_instances;
1436 slp_instance instance;
1437 int i, insns = 0;
1438 gimple_stmt_iterator gsi;
777e1f09
RG
1439 int min_vf = 2;
1440 int max_vf = MAX_VECTORIZATION_FACTOR;
a70d6342
IR
1441
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1444
1445 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
c7da0354
RG
1446 {
1447 gimple stmt = gsi_stmt (gsi);
1448 if (!is_gimple_debug (stmt)
1449 && !gimple_nop_p (stmt)
69e3b47f 1450 && gimple_code (stmt) != GIMPLE_LABEL)
c7da0354
RG
1451 insns++;
1452 }
a70d6342
IR
1453
1454 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1455 {
1456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1457 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1458 "block.\n");
1459
1460 return NULL;
1461 }
1462
1463 bb_vinfo = new_bb_vec_info (bb);
1464 if (!bb_vinfo)
1465 return NULL;
1466
777e1f09 1467 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1468 {
1469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1470 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1471 "block.\n");
b8698a0f 1472
a70d6342
IR
1473 destroy_bb_vec_info (bb_vinfo);
1474 return NULL;
1475 }
1476
1477 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1478 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1479 {
1480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1481 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1482 "block.\n");
1483
1484 destroy_bb_vec_info (bb_vinfo);
1485 return NULL;
1486 }
1487
777e1f09
RG
1488 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1489 || min_vf > max_vf)
1490 {
1491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1492 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1493 "in basic block.\n");
1494
1495 destroy_bb_vec_info (bb_vinfo);
1496 return NULL;
1497 }
1498
a70d6342
IR
1499 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1500 {
1501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1502 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1503 "block.\n");
b8698a0f 1504
a70d6342
IR
1505 destroy_bb_vec_info (bb_vinfo);
1506 return NULL;
1507 }
b8698a0f 1508
a70d6342
IR
1509 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1510 {
1511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1512 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1513 "block.\n");
b8698a0f 1514
a70d6342
IR
1515 destroy_bb_vec_info (bb_vinfo);
1516 return NULL;
1517 }
1518
1519 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1520 {
1521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1522 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1523 "block.\n");
1524
1525 destroy_bb_vec_info (bb_vinfo);
1526 return NULL;
1527 }
1528
1529 /* Check the SLP opportunities in the basic block, analyze and build SLP
1530 trees. */
1531 if (!vect_analyze_slp (NULL, bb_vinfo))
1532 {
1533 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1534 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1535 "in basic block.\n");
1536
1537 destroy_bb_vec_info (bb_vinfo);
1538 return NULL;
1539 }
b8698a0f 1540
a70d6342
IR
1541 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1542
1543 /* Mark all the statements that we want to vectorize as pure SLP and
1544 relevant. */
1545 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1546 {
1547 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1548 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1549 }
a70d6342
IR
1550
1551 if (!vect_slp_analyze_operations (bb_vinfo))
1552 {
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1554 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1555
1556 destroy_bb_vec_info (bb_vinfo);
1557 return NULL;
1558 }
1559
1560 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1561 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1562
1563 return bb_vinfo;
1564}
1565
1566
b8698a0f 1567/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ebfd146a
IR
1568 the number of created vector stmts depends on the unrolling factor). However,
1569 the actual number of vector stmts for every SLP node depends on VF which is
1570 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
b8698a0f 1571 In this function we assume that the inside costs calculated in
ebfd146a
IR
1572 vect_model_xxx_cost are linear in ncopies. */
1573
1574void
1575vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1576{
1577 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1578 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1579 slp_instance instance;
1580
1581 if (vect_print_dump_info (REPORT_SLP))
1582 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1583
1584 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1585 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1586 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1587 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1588}
1589
a70d6342 1590
b8698a0f
L
1591/* For constant and loop invariant defs of SLP_NODE this function returns
1592 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
ebfd146a 1593 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
b5aeb3bb
IR
1594 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1595 REDUC_INDEX is the index of the reduction operand in the statements, unless
1596 it is -1. */
ebfd146a
IR
1597
1598static void
1599vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
b5aeb3bb
IR
1600 unsigned int op_num, unsigned int number_of_vectors,
1601 int reduc_index)
ebfd146a
IR
1602{
1603 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1604 gimple stmt = VEC_index (gimple, stmts, 0);
1605 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
1606 int nunits;
1607 tree vec_cst;
1608 tree t = NULL_TREE;
1609 int j, number_of_places_left_in_vector;
1610 tree vector_type;
1611 tree op, vop;
1612 int group_size = VEC_length (gimple, stmts);
1613 unsigned int vec_num, i;
1614 int number_of_copies = 1;
1615 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1616 bool constant_p, is_store;
b5aeb3bb
IR
1617 tree neutral_op = NULL;
1618
1619 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1620 {
1621 enum tree_code code = gimple_assign_rhs_code (stmt);
1622 if (reduc_index == -1)
1623 {
1624 VEC_free (tree, heap, *vec_oprnds);
1625 return;
1626 }
1627
1628 op_num = reduc_index - 1;
1629 op = gimple_op (stmt, op_num + 1);
1630 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1631 we need either neutral operands or the original operands. See
1632 get_initial_def_for_reduction() for details. */
1633 switch (code)
1634 {
1635 case WIDEN_SUM_EXPR:
1636 case DOT_PROD_EXPR:
1637 case PLUS_EXPR:
1638 case MINUS_EXPR:
1639 case BIT_IOR_EXPR:
1640 case BIT_XOR_EXPR:
1641 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1642 neutral_op = build_real (TREE_TYPE (op), dconst0);
1643 else
1644 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1645
1646 break;
1647
1648 case MULT_EXPR:
1649 case BIT_AND_EXPR:
1650 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1651 neutral_op = build_real (TREE_TYPE (op), dconst1);
1652 else
1653 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1654
1655 break;
1656
1657 default:
1658 neutral_op = NULL;
1659 }
1660 }
ebfd146a
IR
1661
1662 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1663 {
1664 is_store = true;
1665 op = gimple_assign_rhs1 (stmt);
1666 }
1667 else
1668 {
1669 is_store = false;
1670 op = gimple_op (stmt, op_num + 1);
1671 }
1672
1673 if (CONSTANT_CLASS_P (op))
cd481d83 1674 constant_p = true;
ebfd146a 1675 else
cd481d83
IR
1676 constant_p = false;
1677
1678 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1679 gcc_assert (vector_type);
ebfd146a
IR
1680
1681 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1682
1683 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 1684 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
1685
1686 For example, we have two scalar operands, s1 and s2 (e.g., group of
1687 strided accesses of size two), while NUNITS is four (i.e., four scalars
1688 of this type can be packed in a vector). The output vector will contain
1689 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1690 will be 2).
1691
b8698a0f 1692 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
1693 containing the operands.
1694
1695 For example, NUNITS is four as before, and the group size is 8
1696 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1697 {s5, s6, s7, s8}. */
b8698a0f 1698
ebfd146a
IR
1699 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1700
1701 number_of_places_left_in_vector = nunits;
1702 for (j = 0; j < number_of_copies; j++)
1703 {
1704 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1705 {
1706 if (is_store)
1707 op = gimple_assign_rhs1 (stmt);
1708 else
1709 op = gimple_op (stmt, op_num + 1);
b8698a0f 1710
b5aeb3bb
IR
1711 if (reduc_index != -1)
1712 {
1713 struct loop *loop = (gimple_bb (stmt))->loop_father;
1714 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1715
1716 gcc_assert (loop);
1717 /* Get the def before the loop. */
1718 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1719 loop_preheader_edge (loop));
1720 if (j != (number_of_copies - 1) && neutral_op)
1721 op = neutral_op;
1722 }
1723
ebfd146a
IR
1724 /* Create 'vect_ = {op0,op1,...,opn}'. */
1725 t = tree_cons (NULL_TREE, op, t);
1726
1727 number_of_places_left_in_vector--;
1728
1729 if (number_of_places_left_in_vector == 0)
1730 {
1731 number_of_places_left_in_vector = nunits;
1732
1733 if (constant_p)
1734 vec_cst = build_vector (vector_type, t);
1735 else
1736 vec_cst = build_constructor_from_list (vector_type, t);
1737 VEC_quick_push (tree, voprnds,
1738 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1739 t = NULL_TREE;
1740 }
1741 }
1742 }
1743
b8698a0f 1744 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
1745 them. */
1746 vec_num = VEC_length (tree, voprnds);
1747 for (j = vec_num - 1; j >= 0; j--)
1748 {
1749 vop = VEC_index (tree, voprnds, j);
1750 VEC_quick_push (tree, *vec_oprnds, vop);
1751 }
1752
1753 VEC_free (tree, heap, voprnds);
1754
1755 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
1756 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1757 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
1758 to replicate the vectors. */
1759 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1760 {
b5aeb3bb
IR
1761 tree neutral_vec = NULL;
1762
1763 if (neutral_op)
1764 {
1765 if (!neutral_vec)
1766 {
1767 t = NULL;
1768 for (i = 0; i < (unsigned) nunits; i++)
1769 t = tree_cons (NULL_TREE, neutral_op, t);
1770 neutral_vec = build_vector (vector_type, t);
1771 }
1772
1773 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1774 }
1775 else
1776 {
1777 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1778 VEC_quick_push (tree, *vec_oprnds, vop);
1779 }
ebfd146a
IR
1780 }
1781}
1782
1783
1784/* Get vectorized definitions from SLP_NODE that contains corresponding
1785 vectorized def-stmts. */
1786
1787static void
1788vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1789{
1790 tree vec_oprnd;
1791 gimple vec_def_stmt;
1792 unsigned int i;
1793
1794 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1795
1796 for (i = 0;
1797 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1798 i++)
1799 {
1800 gcc_assert (vec_def_stmt);
1801 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1802 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1803 }
1804}
1805
1806
b8698a0f
L
1807/* Get vectorized definitions for SLP_NODE.
1808 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
1809 call vect_get_constant_vectors() to create vector stmts.
1810 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1811 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 1812 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 1813 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
1814 the right node. This is used when the second operand must remain scalar. */
1815
ebfd146a
IR
1816void
1817vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
b5aeb3bb 1818 VEC (tree,heap) **vec_oprnds1, int reduc_index)
ebfd146a
IR
1819{
1820 gimple first_stmt;
1821 enum tree_code code;
1822 int number_of_vects;
b8698a0f 1823 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
1824
1825 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1826 /* The number of vector defs is determined by the number of vector statements
1827 in the node from which we get those statements. */
b8698a0f 1828 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
1829 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1830 else
1831 {
1832 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1833 /* Number of vector stmts was calculated according to LHS in
1834 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1835 necessary. See vect_get_smallest_scalar_type() for details. */
1836 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1837 &rhs_size_unit);
1838 if (rhs_size_unit != lhs_size_unit)
1839 {
1840 number_of_vects *= rhs_size_unit;
1841 number_of_vects /= lhs_size_unit;
1842 }
1843 }
1844
1845 /* Allocate memory for vectorized defs. */
1846 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1847
1848 /* SLP_NODE corresponds either to a group of stores or to a group of
b5aeb3bb
IR
1849 unary/binary operations. We don't call this function for loads.
1850 For reduction defs we call vect_get_constant_vectors(), since we are
1851 looking for initial loop invariant values. */
1852 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
ebfd146a
IR
1853 /* The defs are already vectorized. */
1854 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1855 else
1856 /* Build vectors from scalar defs. */
b5aeb3bb
IR
1857 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1858 reduc_index);
ebfd146a
IR
1859
1860 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1861 /* Since we don't call this function with loads, this is a group of
1862 stores. */
1863 return;
1864
b5aeb3bb
IR
1865 /* For reductions, we only need initial values. */
1866 if (reduc_index != -1)
1867 return;
1868
ebfd146a
IR
1869 code = gimple_assign_rhs_code (first_stmt);
1870 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1871 return;
1872
1873 /* The number of vector defs is determined by the number of vector statements
1874 in the node from which we get those statements. */
1875 if (SLP_TREE_RIGHT (slp_node))
1876 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1877 else
1878 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1879
1880 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1881
1882 if (SLP_TREE_RIGHT (slp_node))
1883 /* The defs are already vectorized. */
1884 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1885 else
1886 /* Build vectors from scalar defs. */
b5aeb3bb 1887 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
ebfd146a
IR
1888}
1889
a70d6342 1890
b8698a0f 1891/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
1892 building a vector of type MASK_TYPE from it) and two input vectors placed in
1893 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1894 shifting by STRIDE elements of DR_CHAIN for every copy.
1895 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 1896 copies).
ebfd146a
IR
1897 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1898 the created stmts must be inserted. */
1899
1900static inline void
b8698a0f 1901vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 1902 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f
L
1903 gimple_stmt_iterator *gsi, slp_tree node,
1904 tree builtin_decl, tree vectype,
ebfd146a
IR
1905 VEC(tree,heap) *dr_chain,
1906 int ncopies, int vect_stmts_counter)
1907{
faf63e39 1908 tree perm_dest;
ebfd146a
IR
1909 gimple perm_stmt = NULL;
1910 stmt_vec_info next_stmt_info;
0f900dfa 1911 int i, stride;
ebfd146a 1912 tree first_vec, second_vec, data_ref;
ebfd146a
IR
1913 VEC (tree, heap) *params = NULL;
1914
ebfd146a 1915 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 1916
b8698a0f 1917 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 1918 stmts later. */
b8698a0f 1919 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
1920 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1921 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1922
1923 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1924 for (i = 0; i < ncopies; i++)
1925 {
1926 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1927 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1928
1929 /* Build argument list for the vectorized call. */
1930 VEC_free (tree, heap, params);
1931 params = VEC_alloc (tree, heap, 3);
1932 VEC_quick_push (tree, params, first_vec);
1933 VEC_quick_push (tree, params, second_vec);
1934 VEC_quick_push (tree, params, mask);
1935
1936 /* Generate the permute statement. */
1937 perm_stmt = gimple_build_call_vec (builtin_decl, params);
1938 data_ref = make_ssa_name (perm_dest, perm_stmt);
1939 gimple_call_set_lhs (perm_stmt, data_ref);
1940 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 1941
b8698a0f
L
1942 /* Store the vector statement in NODE. */
1943 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
1944 stride * i + vect_stmts_counter, perm_stmt);
1945
1946 first_vec_indx += stride;
1947 second_vec_indx += stride;
1948 }
1949
1950 /* Mark the scalar stmt as vectorized. */
1951 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1952 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1953}
1954
1955
b8698a0f 1956/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 1957 return in CURRENT_MASK_ELEMENT its equivalent in target specific
b8698a0f 1958 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
1959 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1960 the next vector, i.e., the current first vector is not needed. */
b8698a0f 1961
ebfd146a 1962static bool
b8698a0f 1963vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 1964 int mask_nunits, bool only_one_vec, int index,
b8698a0f 1965 int *mask, int *current_mask_element,
ebfd146a
IR
1966 bool *need_next_vector)
1967{
1968 int i;
1969 static int number_of_mask_fixes = 1;
1970 static bool mask_fixed = false;
1971 static bool needs_first_vector = false;
1972
1973 /* Convert to target specific representation. */
1974 *current_mask_element = first_mask_element + m;
1975 /* Adjust the value in case it's a mask for second and third vectors. */
1976 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1977
1978 if (*current_mask_element < mask_nunits)
1979 needs_first_vector = true;
1980
1981 /* We have only one input vector to permute but the mask accesses values in
1982 the next vector as well. */
1983 if (only_one_vec && *current_mask_element >= mask_nunits)
1984 {
1985 if (vect_print_dump_info (REPORT_DETAILS))
1986 {
1987 fprintf (vect_dump, "permutation requires at least two vectors ");
1988 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1989 }
1990
1991 return false;
1992 }
1993
1994 /* The mask requires the next vector. */
1995 if (*current_mask_element >= mask_nunits * 2)
1996 {
1997 if (needs_first_vector || mask_fixed)
1998 {
1999 /* We either need the first vector too or have already moved to the
b8698a0f 2000 next vector. In both cases, this permutation needs three
ebfd146a
IR
2001 vectors. */
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 {
2004 fprintf (vect_dump, "permutation requires at "
2005 "least three vectors ");
2006 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2007 }
2008
2009 return false;
2010 }
2011
2012 /* We move to the next vector, dropping the first one and working with
2013 the second and the third - we need to adjust the values of the mask
2014 accordingly. */
2015 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2016
2017 for (i = 0; i < index; i++)
2018 mask[i] -= mask_nunits * number_of_mask_fixes;
2019
2020 (number_of_mask_fixes)++;
2021 mask_fixed = true;
2022 }
2023
2024 *need_next_vector = mask_fixed;
2025
2026 /* This was the last element of this mask. Start a new one. */
2027 if (index == mask_nunits - 1)
2028 {
2029 number_of_mask_fixes = 1;
2030 mask_fixed = false;
2031 needs_first_vector = false;
2032 }
2033
2034 return true;
2035}
2036
2037
2038/* Generate vector permute statements from a list of loads in DR_CHAIN.
2039 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2040 permute statements for SLP_NODE_INSTANCE. */
2041bool
2042vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2043 gimple_stmt_iterator *gsi, int vf,
2044 slp_instance slp_node_instance, bool analyze_only)
2045{
2046 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2047 tree mask_element_type = NULL_TREE, mask_type;
2048 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2049 slp_tree node;
2050 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2051 gimple next_scalar_stmt;
2052 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2053 int first_mask_element;
2054 int index, unroll_factor, *mask, current_mask_element, ncopies;
2055 bool only_one_vec = false, need_next_vector = false;
2056 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2057
2058 if (!targetm.vectorize.builtin_vec_perm)
2059 {
2060 if (vect_print_dump_info (REPORT_DETAILS))
2061 {
2062 fprintf (vect_dump, "no builtin for vect permute for ");
2063 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2064 }
2065
2066 return false;
2067 }
2068
2069 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2070 &mask_element_type);
2071 if (!builtin_decl || !mask_element_type)
2072 {
2073 if (vect_print_dump_info (REPORT_DETAILS))
2074 {
2075 fprintf (vect_dump, "no builtin for vect permute for ");
2076 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2077 }
2078
2079 return false;
2080 }
2081
2082 mask_type = get_vectype_for_scalar_type (mask_element_type);
2083 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2084 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2085 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2086 scale = mask_nunits / nunits;
2087 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2088
2089 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2090 unrolling factor. */
b8698a0f 2091 orig_vec_stmts_num = group_size *
ebfd146a
IR
2092 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2093 if (orig_vec_stmts_num == 1)
2094 only_one_vec = true;
2095
b8698a0f 2096 /* Number of copies is determined by the final vectorization factor
ebfd146a 2097 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2098 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2099
b8698a0f
L
2100 /* Generate permutation masks for every NODE. Number of masks for each NODE
2101 is equal to GROUP_SIZE.
2102 E.g., we have a group of three nodes with three loads from the same
2103 location in each node, and the vector size is 4. I.e., we have a
2104 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2105 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2106 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2107 ...
2108
2109 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2110 scpecific type, e.g., in bytes for Altivec.
b8698a0f 2111 The last mask is illegal since we assume two operands for permute
ebfd146a
IR
2112 operation, and the mask element values can't be outside that range. Hence,
2113 the last mask must be converted into {2,5,5,5}.
b8698a0f 2114 For the first two permutations we need the first and the second input
ebfd146a 2115 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2116 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2117 {c2,a3,b3,c3}. */
2118
2119 for (i = 0;
2120 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2121 i, node);
2122 i++)
2123 {
2124 scalar_index = 0;
2125 index = 0;
2126 vect_stmts_counter = 0;
2127 vec_index = 0;
2128 first_vec_index = vec_index++;
2129 if (only_one_vec)
2130 second_vec_index = first_vec_index;
2131 else
2132 second_vec_index = vec_index++;
2133
2134 for (j = 0; j < unroll_factor; j++)
2135 {
2136 for (k = 0; k < group_size; k++)
2137 {
2138 first_mask_element = (i + j * group_size) * scale;
2139 for (m = 0; m < scale; m++)
2140 {
b8698a0f 2141 if (!vect_get_mask_element (stmt, first_mask_element, m,
ebfd146a
IR
2142 mask_nunits, only_one_vec, index, mask,
2143 &current_mask_element, &need_next_vector))
2144 return false;
2145
2146 mask[index++] = current_mask_element;
b8698a0f 2147 }
ebfd146a
IR
2148
2149 if (index == mask_nunits)
2150 {
faf63e39
RH
2151 tree mask_vec = NULL;
2152
2153 while (--index >= 0)
2154 {
2155 tree t = build_int_cst (mask_element_type, mask[index]);
2156 mask_vec = tree_cons (NULL, t, mask_vec);
2157 }
2158 mask_vec = build_vector (mask_type, mask_vec);
2159 index = 0;
2160
2161 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2162 mask_vec))
2163 {
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 {
2166 fprintf (vect_dump, "unsupported vect permute ");
2167 print_generic_expr (vect_dump, mask_vec, 0);
2168 }
2169 free (mask);
2170 return false;
2171 }
2172
ebfd146a
IR
2173 if (!analyze_only)
2174 {
2175 if (need_next_vector)
2176 {
2177 first_vec_index = second_vec_index;
2178 second_vec_index = vec_index;
2179 }
2180
2181 next_scalar_stmt = VEC_index (gimple,
2182 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2183
2184 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39
RH
2185 mask_vec, first_vec_index, second_vec_index,
2186 gsi, node, builtin_decl, vectype, dr_chain,
2187 ncopies, vect_stmts_counter++);
ebfd146a 2188 }
b8698a0f
L
2189 }
2190 }
2191 }
2192 }
ebfd146a
IR
2193
2194 free (mask);
2195 return true;
2196}
2197
2198
2199
2200/* Vectorize SLP instance tree in postorder. */
2201
2202static bool
2203vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2204 unsigned int vectorization_factor)
ebfd146a
IR
2205{
2206 gimple stmt;
2207 bool strided_store, is_store;
2208 gimple_stmt_iterator si;
2209 stmt_vec_info stmt_info;
2210 unsigned int vec_stmts_size, nunits, group_size;
2211 tree vectype;
2212 int i;
2213 slp_tree loads_node;
2214
2215 if (!node)
2216 return false;
2217
2218 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2219 vectorization_factor);
2220 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2221 vectorization_factor);
b8698a0f 2222
ebfd146a
IR
2223 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2224 stmt_info = vinfo_for_stmt (stmt);
2225
2226 /* VECTYPE is the type of the destination. */
b690cc0f 2227 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2228 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2229 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2230
2231 /* For each SLP instance calculate number of vector stmts to be created
2232 for the scalar stmts in each node of the SLP tree. Number of vector
2233 elements in one vector iteration is the number of scalar elements in
2234 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2235 size. */
2236 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2237
2238 /* In case of load permutation we have to allocate vectorized statements for
2239 all the nodes that participate in that permutation. */
2240 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2241 {
2242 for (i = 0;
2243 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2244 i++)
2245 {
2246 if (!SLP_TREE_VEC_STMTS (loads_node))
2247 {
2248 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2249 vec_stmts_size);
2250 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2251 }
2252 }
2253 }
2254
2255 if (!SLP_TREE_VEC_STMTS (node))
2256 {
2257 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2258 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2259 }
2260
2261 if (vect_print_dump_info (REPORT_DETAILS))
2262 {
2263 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2264 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2265 }
ebfd146a
IR
2266
2267 /* Loads should be inserted before the first load. */
2268 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2269 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2270 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2271 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2272 else
2273 si = gsi_for_stmt (stmt);
b8698a0f 2274
ebfd146a 2275 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2276 return is_store;
ebfd146a
IR
2277}
2278
2279
2280bool
a70d6342 2281vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2282{
a70d6342 2283 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2284 slp_instance instance;
a70d6342 2285 unsigned int i, vf;
ebfd146a
IR
2286 bool is_store = false;
2287
a70d6342
IR
2288 if (loop_vinfo)
2289 {
2290 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2291 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2292 }
a70d6342
IR
2293 else
2294 {
2295 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2296 vf = 1;
b8698a0f 2297 }
a70d6342 2298
ebfd146a
IR
2299 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2300 {
2301 /* Schedule the tree of INSTANCE. */
2302 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2303 instance, vf);
8644a673
IR
2304 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2305 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2306 fprintf (vect_dump, "vectorizing stmts using SLP.");
2307 }
2308
b5aeb3bb
IR
2309 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2310 {
2311 slp_tree root = SLP_INSTANCE_TREE (instance);
2312 gimple store;
2313 unsigned int j;
2314 gimple_stmt_iterator gsi;
2315
2316 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2317 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2318 {
2319 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2320 break;
2321
2322 /* Free the attached stmt_vec_info and remove the stmt. */
2323 gsi = gsi_for_stmt (store);
2324 gsi_remove (&gsi, true);
2325 free_stmt_vec_info (store);
2326 }
2327 }
2328
ebfd146a
IR
2329 return is_store;
2330}
a70d6342
IR
2331
2332
2333/* Vectorize the basic block. */
2334
2335void
2336vect_slp_transform_bb (basic_block bb)
2337{
2338 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2339 gimple_stmt_iterator si;
2340
2341 gcc_assert (bb_vinfo);
2342
2343 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "SLPing BB\n");
2345
2346 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2347 {
2348 gimple stmt = gsi_stmt (si);
2349 stmt_vec_info stmt_info;
2350
2351 if (vect_print_dump_info (REPORT_DETAILS))
2352 {
2353 fprintf (vect_dump, "------>SLPing statement: ");
2354 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2355 }
2356
2357 stmt_info = vinfo_for_stmt (stmt);
2358 gcc_assert (stmt_info);
2359
2360 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2361 if (STMT_SLP_TYPE (stmt_info))
2362 {
2363 vect_schedule_slp (NULL, bb_vinfo);
2364 break;
2365 }
2366 }
2367
2368 mark_sym_for_renaming (gimple_vop (cfun));
2369 /* The memory tags and pointers in vectorized statements need to
2370 have their SSA forms updated. FIXME, why can't this be delayed
2371 until all the loops have been transformed? */
2372 update_ssa (TODO_update_ssa);
2373
2374 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2375 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2376
12aaf609
IR
2377 destroy_bb_vec_info (bb_vinfo);
2378}
a70d6342 2379