]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
re PR target/44531 ([SH] Multilib configuration does not work as expected on darwin)
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
c75c517d
SB
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "tree-vectorizer.h"
41
a70d6342
IR
42/* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45LOC
46find_bb_location (basic_block bb)
47{
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62}
63
64
ebfd146a
IR
65/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67static void
68vect_free_slp_tree (slp_tree node)
69{
70 if (!node)
71 return;
72
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 75
ebfd146a
IR
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 78
ebfd146a 79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 80
ebfd146a
IR
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83
84 free (node);
85}
86
87
88/* Free the memory allocated for the SLP instance. */
89
90void
91vect_free_slp_instance (slp_instance instance)
92{
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
96}
97
98
99/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
102
103static bool
a70d6342 104vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 105 slp_tree slp_node, gimple stmt,
a70d6342 106 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
b8698a0f 110 tree *first_stmt_def0_type,
ebfd146a
IR
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
115{
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 121 stmt_vec_info stmt_info =
ebfd146a
IR
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
a70d6342 124 struct loop *loop = NULL;
b8698a0f 125
a70d6342
IR
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
128
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131
132 for (i = 0; i < number_of_oprnds; i++)
133 {
134 oprnd = gimple_op (stmt, i + 1);
135
b8698a0f 136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 137 &dt[i])
ebfd146a
IR
138 || (!def_stmt && dt[i] != vect_constant_def))
139 {
b8698a0f 140 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
141 {
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
144 }
145
146 return false;
147 }
148
a70d6342
IR
149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
150 from the pattern. Check that all the stmts of the node are in the
ebfd146a 151 pattern. */
a70d6342 152 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 {
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
160 {
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 {
165 if (vect_print_dump_info (REPORT_DETAILS))
166 {
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
170 }
171
172 return false;
173 }
174 }
175
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178
179 if (*dt == vect_unknown_def_type)
180 {
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
184 }
185
186 switch (gimple_code (def_stmt))
187 {
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
191
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
195
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
200 }
201 }
202
203 if (!*first_stmt_dt0)
204 {
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
211
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
218 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
219 }
b8698a0f 220
ebfd146a
IR
221 else
222 {
223 if (!*first_stmt_dt1 && i == 1)
224 {
225 /* op1 of the first stmt of the group - store its info. */
226 *first_stmt_dt1 = dt[i];
227 if (def)
228 *first_stmt_def1_type = TREE_TYPE (def);
229 else
230 {
b8698a0f 231 /* We assume that the stmt contains only one constant
ebfd146a
IR
232 operand. We fail otherwise, to be on the safe side. */
233 if (*first_stmt_const_oprnd)
234 {
b8698a0f 235 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 236 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 237 "oprnds in stmt");
ebfd146a
IR
238 return false;
239 }
240 *first_stmt_const_oprnd = oprnd;
241 }
242 }
243 else
244 {
b8698a0f 245 /* Not first stmt of the group, check that the def-stmt/s match
ebfd146a 246 the def-stmt/s of the first stmt. */
b8698a0f 247 if ((i == 0
ebfd146a
IR
248 && (*first_stmt_dt0 != dt[i]
249 || (*first_stmt_def0_type && def
8533c9d8
SP
250 && !types_compatible_p (*first_stmt_def0_type,
251 TREE_TYPE (def)))))
b8698a0f 252 || (i == 1
ebfd146a
IR
253 && (*first_stmt_dt1 != dt[i]
254 || (*first_stmt_def1_type && def
8533c9d8
SP
255 && !types_compatible_p (*first_stmt_def1_type,
256 TREE_TYPE (def)))))
b8698a0f 257 || (!def
8533c9d8
SP
258 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
259 TREE_TYPE (oprnd))))
b8698a0f
L
260 {
261 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 262 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 263
ebfd146a
IR
264 return false;
265 }
266 }
267 }
268
269 /* Check the types of the definitions. */
270 switch (dt[i])
271 {
272 case vect_constant_def:
8644a673 273 case vect_external_def:
ebfd146a 274 break;
b8698a0f 275
8644a673 276 case vect_internal_def:
b5aeb3bb 277 case vect_reduction_def:
ebfd146a
IR
278 if (i == 0)
279 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
280 else
281 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
282 break;
283
284 default:
285 /* FORNOW: Not supported. */
b8698a0f 286 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
287 {
288 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
289 print_generic_expr (vect_dump, def, TDF_SLIM);
290 }
291
292 return false;
293 }
294 }
295
296 return true;
297}
298
299
300/* Recursively build an SLP tree starting from NODE.
b8698a0f
L
301 Fail (and return FALSE) if def-stmts are not isomorphic, require data
302 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
303 TRUE. */
304
305static bool
b8698a0f 306vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
307 slp_tree *node, unsigned int group_size,
308 int *inside_cost, int *outside_cost,
309 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 310 VEC (int, heap) **load_permutation,
a70d6342
IR
311 VEC (slp_tree, heap) **loads,
312 unsigned int vectorization_factor)
ebfd146a
IR
313{
314 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
315 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
316 unsigned int i;
317 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
318 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
319 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
320 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
321 enum tree_code first_stmt_code = ERROR_MARK, rhs_code;
ebfd146a
IR
322 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
323 tree lhs;
324 bool stop_recursion = false, need_same_oprnds = false;
325 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 326 unsigned int ncopies;
ebfd146a
IR
327 optab optab;
328 int icode;
329 enum machine_mode optab_op2_mode;
330 enum machine_mode vec_mode;
331 tree first_stmt_const_oprnd = NULL_TREE;
332 struct data_reference *first_dr;
333 bool pattern0 = false, pattern1 = false;
334 HOST_WIDE_INT dummy;
335 bool permutation = false;
336 unsigned int load_place;
b5aeb3bb 337 gimple first_load, prev_first_load = NULL;
ebfd146a
IR
338
339 /* For every stmt in NODE find its def stmt/s. */
340 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
341 {
b8698a0f 342 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
343 {
344 fprintf (vect_dump, "Build SLP for ");
345 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
346 }
347
4b5caab7
IR
348 /* Fail to vectorize statements marked as unvectorizable. */
349 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
350 {
351 if (vect_print_dump_info (REPORT_SLP))
352 {
353 fprintf (vect_dump,
354 "Build SLP failed: unvectorizable statement ");
355 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
356 }
357
358 return false;
359 }
360
ebfd146a
IR
361 lhs = gimple_get_lhs (stmt);
362 if (lhs == NULL_TREE)
363 {
b8698a0f 364 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
365 {
366 fprintf (vect_dump,
367 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
368 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
369 }
b8698a0f 370
ebfd146a
IR
371 return false;
372 }
373
b8698a0f 374 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
375 vectype = get_vectype_for_scalar_type (scalar_type);
376 if (!vectype)
377 {
378 if (vect_print_dump_info (REPORT_SLP))
379 {
380 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
381 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
382 }
383 return false;
384 }
b8698a0f 385
ebfd146a 386 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
387 if (ncopies != 1)
388 {
389 if (vect_print_dump_info (REPORT_SLP))
390 fprintf (vect_dump, "SLP with multiple types ");
ebfd146a 391
a70d6342
IR
392 /* FORNOW: multiple types are unsupported in BB SLP. */
393 if (bb_vinfo)
394 return false;
395 }
b8698a0f 396
ebfd146a
IR
397 /* In case of multiple types we need to detect the smallest type. */
398 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
399 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 400
ebfd146a
IR
401 if (is_gimple_call (stmt))
402 rhs_code = CALL_EXPR;
403 else
404 rhs_code = gimple_assign_rhs_code (stmt);
405
406 /* Check the operation. */
407 if (i == 0)
408 {
409 first_stmt_code = rhs_code;
410
b8698a0f 411 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
412 vector shift with scalar shift operand. */
413 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
414 || rhs_code == LROTATE_EXPR
415 || rhs_code == RROTATE_EXPR)
416 {
417 vec_mode = TYPE_MODE (vectype);
418
419 /* First see if we have a vector/vector shift. */
420 optab = optab_for_tree_code (rhs_code, vectype,
421 optab_vector);
422
423 if (!optab
424 || (optab->handlers[(int) vec_mode].insn_code
425 == CODE_FOR_nothing))
426 {
427 /* No vector/vector shift, try for a vector/scalar shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
429 optab_scalar);
430
431 if (!optab)
432 {
433 if (vect_print_dump_info (REPORT_SLP))
434 fprintf (vect_dump, "Build SLP failed: no optab.");
435 return false;
436 }
437 icode = (int) optab->handlers[(int) vec_mode].insn_code;
438 if (icode == CODE_FOR_nothing)
439 {
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: "
442 "op not supported by target.");
443 return false;
444 }
445 optab_op2_mode = insn_data[icode].operand[2].mode;
446 if (!VECTOR_MODE_P (optab_op2_mode))
447 {
448 need_same_oprnds = true;
449 first_op1 = gimple_assign_rhs2 (stmt);
450 }
451 }
452 }
453 }
454 else
455 {
456 if (first_stmt_code != rhs_code
457 && (first_stmt_code != IMAGPART_EXPR
458 || rhs_code != REALPART_EXPR)
459 && (first_stmt_code != REALPART_EXPR
460 || rhs_code != IMAGPART_EXPR))
461 {
b8698a0f 462 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 463 {
b8698a0f 464 fprintf (vect_dump,
ebfd146a
IR
465 "Build SLP failed: different operation in stmt ");
466 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
467 }
b8698a0f 468
ebfd146a
IR
469 return false;
470 }
b8698a0f
L
471
472 if (need_same_oprnds
ebfd146a
IR
473 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
474 {
b8698a0f 475 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 476 {
b8698a0f 477 fprintf (vect_dump,
ebfd146a
IR
478 "Build SLP failed: different shift arguments in ");
479 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
480 }
b8698a0f 481
ebfd146a
IR
482 return false;
483 }
484 }
485
486 /* Strided store or load. */
487 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
488 {
489 if (REFERENCE_CLASS_P (lhs))
490 {
491 /* Store. */
b8698a0f
L
492 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
493 stmt, &def_stmts0, &def_stmts1,
494 &first_stmt_dt0,
495 &first_stmt_dt1,
496 &first_stmt_def0_type,
ebfd146a
IR
497 &first_stmt_def1_type,
498 &first_stmt_const_oprnd,
499 ncopies_for_cost,
500 &pattern0, &pattern1))
501 return false;
502 }
b5aeb3bb
IR
503 else
504 {
505 /* Load. */
506 /* FORNOW: Check that there is no gap between the loads. */
507 if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
508 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
509 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
510 && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
511 {
512 if (vect_print_dump_info (REPORT_SLP))
513 {
514 fprintf (vect_dump, "Build SLP failed: strided "
515 "loads have gaps ");
516 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
517 }
b8698a0f 518
b5aeb3bb
IR
519 return false;
520 }
2f0fa28e 521
b5aeb3bb
IR
522 /* Check that the size of interleaved loads group is not
523 greater than the SLP group size. */
524 if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
525 {
526 if (vect_print_dump_info (REPORT_SLP))
527 {
528 fprintf (vect_dump, "Build SLP failed: the number of "
529 "interleaved loads is greater than"
530 " the SLP group size ");
531 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
532 }
b8698a0f 533
b5aeb3bb
IR
534 return false;
535 }
536
537 first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
538 if (prev_first_load)
539 {
540 /* Check that there are no loads from different interleaving
541 chains in the same node. The only exception is complex
542 numbers. */
543 if (prev_first_load != first_load
544 && rhs_code != REALPART_EXPR
545 && rhs_code != IMAGPART_EXPR)
546 {
547 if (vect_print_dump_info (REPORT_SLP))
548 {
549 fprintf (vect_dump, "Build SLP failed: different "
550 "interleaving chains in one node ");
551 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
552 }
553
554 return false;
555 }
556 }
557 else
558 prev_first_load = first_load;
b8698a0f 559
ebfd146a
IR
560 if (first_load == stmt)
561 {
562 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 563 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
564 == dr_unaligned_unsupported)
565 {
566 if (vect_print_dump_info (REPORT_SLP))
567 {
568 fprintf (vect_dump, "Build SLP failed: unsupported "
569 "unaligned load ");
570 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
571 }
b8698a0f 572
ebfd146a
IR
573 return false;
574 }
b8698a0f 575
ebfd146a
IR
576 /* Analyze costs (for the first stmt in the group). */
577 vect_model_load_cost (vinfo_for_stmt (stmt),
578 ncopies_for_cost, *node);
579 }
b8698a0f 580
ebfd146a
IR
581 /* Store the place of this load in the interleaving chain. In
582 case that permutation is needed we later decide if a specific
583 permutation is supported. */
584 load_place = vect_get_place_in_interleaving_chain (stmt,
585 first_load);
586 if (load_place != i)
587 permutation = true;
b8698a0f 588
ebfd146a 589 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 590
ebfd146a
IR
591 /* We stop the tree when we reach a group of loads. */
592 stop_recursion = true;
593 continue;
594 }
595 } /* Strided access. */
596 else
597 {
598 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
599 {
600 /* Not strided load. */
b8698a0f 601 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
602 {
603 fprintf (vect_dump, "Build SLP failed: not strided load ");
604 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
605 }
606
607 /* FORNOW: Not strided loads are not supported. */
608 return false;
609 }
610
611 /* Not memory operation. */
612 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
613 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
614 {
b8698a0f 615 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
616 {
617 fprintf (vect_dump, "Build SLP failed: operation");
618 fprintf (vect_dump, " unsupported ");
619 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
620 }
621
622 return false;
623 }
624
b8698a0f 625 /* Find the def-stmts. */
a70d6342 626 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 627 &def_stmts0, &def_stmts1,
b8698a0f
L
628 &first_stmt_dt0, &first_stmt_dt1,
629 &first_stmt_def0_type,
ebfd146a
IR
630 &first_stmt_def1_type,
631 &first_stmt_const_oprnd,
632 ncopies_for_cost,
633 &pattern0, &pattern1))
634 return false;
635 }
636 }
637
638 /* Add the costs of the node to the overall instance costs. */
b8698a0f 639 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
640 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
641
642 /* Strided loads were reached - stop the recursion. */
643 if (stop_recursion)
644 {
645 if (permutation)
646 {
b8698a0f 647 VEC_safe_push (slp_tree, heap, *loads, *node);
35e1a5e7 648 *inside_cost
720f5239 649 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 650 * group_size;
ebfd146a
IR
651 }
652
653 return true;
654 }
655
b8698a0f 656 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 657 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
658 {
659 slp_tree left_node = XNEW (struct _slp_tree);
660 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
661 SLP_TREE_VEC_STMTS (left_node) = NULL;
662 SLP_TREE_LEFT (left_node) = NULL;
663 SLP_TREE_RIGHT (left_node) = NULL;
664 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
665 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
666 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
667 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
668 max_nunits, load_permutation, loads,
669 vectorization_factor))
ebfd146a 670 return false;
b8698a0f 671
ebfd146a
IR
672 SLP_TREE_LEFT (*node) = left_node;
673 }
674
8644a673 675 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
676 {
677 slp_tree right_node = XNEW (struct _slp_tree);
678 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
679 SLP_TREE_VEC_STMTS (right_node) = NULL;
680 SLP_TREE_LEFT (right_node) = NULL;
681 SLP_TREE_RIGHT (right_node) = NULL;
682 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
683 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 684 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 685 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
686 max_nunits, load_permutation, loads,
687 vectorization_factor))
ebfd146a 688 return false;
b8698a0f 689
ebfd146a
IR
690 SLP_TREE_RIGHT (*node) = right_node;
691 }
692
693 return true;
694}
695
696
697static void
698vect_print_slp_tree (slp_tree node)
699{
700 int i;
701 gimple stmt;
702
703 if (!node)
704 return;
705
706 fprintf (vect_dump, "node ");
707 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
708 {
709 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 710 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
711 }
712 fprintf (vect_dump, "\n");
713
714 vect_print_slp_tree (SLP_TREE_LEFT (node));
715 vect_print_slp_tree (SLP_TREE_RIGHT (node));
716}
717
718
b8698a0f
L
719/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
720 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
721 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
722 stmts in NODE are to be marked. */
723
724static void
725vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
726{
727 int i;
728 gimple stmt;
729
730 if (!node)
731 return;
732
733 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
734 if (j < 0 || i == j)
735 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
736
737 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
738 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
739}
740
741
a70d6342
IR
742/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
743
744static void
745vect_mark_slp_stmts_relevant (slp_tree node)
746{
747 int i;
748 gimple stmt;
749 stmt_vec_info stmt_info;
750
751 if (!node)
752 return;
753
754 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
755 {
756 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 757 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
758 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
759 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
760 }
761
762 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
763 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
764}
765
766
b8698a0f 767/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
768 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
769
770static bool
771vect_supported_slp_permutation_p (slp_instance instance)
772{
773 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
774 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
775 gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
776 VEC (slp_tree, heap) *sorted_loads = NULL;
777 int index;
778 slp_tree *tmp_loads = NULL;
b8698a0f 779 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 780 slp_tree load;
b8698a0f
L
781
782 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 783 location in all the loads in the node, when the data-refs in
b8698a0f 784 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
785 Sort the nodes according to the order of accesses in the chain. */
786 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
787 for (i = 0, j = 0;
788 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
789 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
790 i += group_size, j++)
791 {
792 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
793 /* Check that the loads are all in the same interleaving chain. */
794 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
795 {
796 if (vect_print_dump_info (REPORT_DETAILS))
797 {
798 fprintf (vect_dump, "Build SLP failed: unsupported data "
799 "permutation ");
800 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
801 }
b8698a0f 802
ebfd146a 803 free (tmp_loads);
b8698a0f 804 return false;
ebfd146a
IR
805 }
806
807 tmp_loads[index] = load;
808 }
b8698a0f 809
ebfd146a
IR
810 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
811 for (i = 0; i < group_size; i++)
812 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
813
814 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
815 SLP_INSTANCE_LOADS (instance) = sorted_loads;
816 free (tmp_loads);
817
818 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
819 SLP_INSTANCE_UNROLLING_FACTOR (instance),
820 instance, true))
821 return false;
822
823 return true;
824}
825
826
b5aeb3bb
IR
827/* Rearrange the statements of NODE according to PERMUTATION. */
828
829static void
830vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
831 VEC (int, heap) *permutation)
832{
833 gimple stmt;
834 VEC (gimple, heap) *tmp_stmts;
835 unsigned int index, i;
836
837 if (!node)
838 return;
839
840 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
841 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
842
843 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
844 tmp_stmts = VEC_alloc (gimple, heap, group_size);
845
846 for (i = 0; i < group_size; i++)
847 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
848
849 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
850 {
851 index = VEC_index (int, permutation, i);
852 VEC_replace (gimple, tmp_stmts, index, stmt);
853 }
854
855 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
856 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
857}
858
859
ebfd146a
IR
860/* Check if the required load permutation is supported.
861 LOAD_PERMUTATION contains a list of indices of the loads.
862 In SLP this permutation is relative to the order of strided stores that are
863 the base of the SLP instance. */
864
865static bool
866vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
867 VEC (int, heap) *load_permutation)
868{
b5aeb3bb
IR
869 int i = 0, j, prev = -1, next, k, number_of_groups;
870 bool supported, bad_permutation = false;
7417f6c0 871 sbitmap load_index;
b5aeb3bb
IR
872 slp_tree node;
873 gimple stmt;
ebfd146a 874
a70d6342 875 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
876 if (!slp_instn)
877 return false;
878
879 if (vect_print_dump_info (REPORT_SLP))
880 {
881 fprintf (vect_dump, "Load permutation ");
882 for (i = 0; VEC_iterate (int, load_permutation, i, next); i++)
883 fprintf (vect_dump, "%d ", next);
884 }
885
b5aeb3bb
IR
886 /* In case of reduction every load permutation is allowed, since the order
887 of the reduction statements is not important (as opposed to the case of
888 strided stores). The only condition we need to check is that all the
889 load nodes are of the same size and have the same permutation (and then
890 rearrange all the nodes of the SLP instance according to this
891 permutation). */
892
893 /* Check that all the load nodes are of the same size. */
894 for (i = 0;
895 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node);
896 i++)
897 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
898 != (unsigned) group_size)
899 return false;
900
901 node = SLP_INSTANCE_TREE (slp_instn);
902 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
903 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
904 instance, not all the loads belong to the same node or interleaving
905 group. Hence, we need to divide them into groups according to
906 GROUP_SIZE. */
907 number_of_groups = VEC_length (int, load_permutation) / group_size;
908
909 /* Reduction (there are no data-refs in the root). */
910 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
911 {
912 int first_group_load_index;
913
914 /* Compare all the permutation sequences to the first one. */
915 for (i = 1; i < number_of_groups; i++)
916 {
917 k = 0;
918 for (j = i * group_size; j < i * group_size + group_size; j++)
919 {
920 next = VEC_index (int, load_permutation, j);
921 first_group_load_index = VEC_index (int, load_permutation, k);
922
923 if (next != first_group_load_index)
924 {
925 bad_permutation = true;
926 break;
927 }
928
929 k++;
930 }
931
932 if (bad_permutation)
933 break;
934 }
935
936 if (!bad_permutation)
937 {
938 /* This permutaion is valid for reduction. Since the order of the
939 statements in the nodes is not important unless they are memory
940 accesses, we can rearrange the statements in all the nodes
941 according to the order of the loads. */
942 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
943 load_permutation);
944 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
945 return true;
946 }
947 }
948
b8698a0f
L
949 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
950 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 951 well (unless it's reduction). */
ebfd146a
IR
952 if (VEC_length (int, load_permutation)
953 != (unsigned int) (group_size * group_size))
954 return false;
955
956 supported = true;
7417f6c0
IR
957 load_index = sbitmap_alloc (group_size);
958 sbitmap_zero (load_index);
ebfd146a
IR
959 for (j = 0; j < group_size; j++)
960 {
961 for (i = j * group_size, k = 0;
962 VEC_iterate (int, load_permutation, i, next) && k < group_size;
963 i++, k++)
964 {
965 if (i != j * group_size && next != prev)
966 {
967 supported = false;
968 break;
969 }
970
971 prev = next;
b8698a0f 972 }
7417f6c0
IR
973
974 if (TEST_BIT (load_index, prev))
975 {
976 supported = false;
977 break;
978 }
979
980 SET_BIT (load_index, prev);
ebfd146a 981 }
59eefaa6
IR
982
983 for (j = 0; j < group_size; j++)
984 if (!TEST_BIT (load_index, j))
985 return false;
986
7417f6c0 987 sbitmap_free (load_index);
ebfd146a
IR
988
989 if (supported && i == group_size * group_size
990 && vect_supported_slp_permutation_p (slp_instn))
991 return true;
992
b8698a0f 993 return false;
ebfd146a
IR
994}
995
996
b8698a0f 997/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 998 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 999 load does not appear in the first SLP node to be transformed, causing
ebfd146a
IR
1000 incorrect order of statements. Since we generate all the loads together,
1001 they must be inserted before the first load of the SLP instance and not
1002 before the first load of the first node of the instance. */
b8698a0f
L
1003static gimple
1004vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1005{
1006 int i, j;
1007 slp_tree load_node;
1008 gimple first_load = NULL, load;
1009
b8698a0f
L
1010 for (i = 0;
1011 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node);
ebfd146a 1012 i++)
b8698a0f 1013 for (j = 0;
ebfd146a
IR
1014 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load);
1015 j++)
1016 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1017
ebfd146a
IR
1018 return first_load;
1019}
1020
1021
1022/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1023 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1024 Return FALSE if it's impossible to SLP any stmt in the loop. */
1025
1026static bool
a70d6342
IR
1027vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1028 gimple stmt)
ebfd146a
IR
1029{
1030 slp_instance new_instance;
1031 slp_tree node = XNEW (struct _slp_tree);
1032 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1033 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1034 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1035 gimple next;
0f900dfa 1036 unsigned int vectorization_factor = 0;
b5aeb3bb 1037 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1038 unsigned int max_nunits = 0;
1039 VEC (int, heap) *load_permutation;
1040 VEC (slp_tree, heap) *loads;
b5aeb3bb
IR
1041 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1042
1043 if (dr)
1044 {
1045 scalar_type = TREE_TYPE (DR_REF (dr));
1046 vectype = get_vectype_for_scalar_type (scalar_type);
1047 group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
1048 }
1049 else
1050 {
1051 gcc_assert (loop_vinfo);
1052 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1053 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1054 }
b8698a0f 1055
ebfd146a
IR
1056 if (!vectype)
1057 {
1058 if (vect_print_dump_info (REPORT_SLP))
1059 {
1060 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1061 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1062 }
b5aeb3bb 1063
ebfd146a
IR
1064 return false;
1065 }
1066
1067 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1068 if (loop_vinfo)
1069 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1070 else
1071 /* No multitypes in BB SLP. */
1072 vectorization_factor = nunits;
1073
a70d6342
IR
1074 /* Calculate the unrolling factor. */
1075 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1076 if (unrolling_factor != 1 && !loop_vinfo)
1077 {
1078 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1079 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1080 " block SLP");
b8698a0f 1081
a70d6342
IR
1082 return false;
1083 }
1084
b8698a0f 1085 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
1086 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1087 next = stmt;
b5aeb3bb 1088 if (dr)
ebfd146a 1089 {
b5aeb3bb
IR
1090 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1091 while (next)
1092 {
1093 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1094 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1095 }
1096 }
1097 else
1098 {
1099 /* Collect reduction statements. */
1100 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1101 next);
1102 i++)
1103 {
1104 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1105 if (vect_print_dump_info (REPORT_DETAILS))
1106 {
1107 fprintf (vect_dump, "pushing reduction into node: ");
1108 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1109 }
1110 }
ebfd146a
IR
1111 }
1112
1113 SLP_TREE_VEC_STMTS (node) = NULL;
1114 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1115 SLP_TREE_LEFT (node) = NULL;
1116 SLP_TREE_RIGHT (node) = NULL;
1117 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1118 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1119
ebfd146a
IR
1120 /* Calculate the number of vector stmts to create based on the unrolling
1121 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1122 GROUP_SIZE / NUNITS otherwise. */
1123 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1124
1125 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1126 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1127
1128 /* Build the tree for the SLP instance. */
b8698a0f
L
1129 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1130 &inside_cost, &outside_cost, ncopies_for_cost,
1131 &max_nunits, &load_permutation, &loads,
a70d6342 1132 vectorization_factor))
ebfd146a 1133 {
b8698a0f 1134 /* Create a new SLP instance. */
ebfd146a
IR
1135 new_instance = XNEW (struct _slp_instance);
1136 SLP_INSTANCE_TREE (new_instance) = node;
1137 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1138 /* Calculate the unrolling factor based on the smallest type in the
1139 loop. */
1140 if (max_nunits > nunits)
1141 unrolling_factor = least_common_multiple (max_nunits, group_size)
1142 / group_size;
b8698a0f 1143
ebfd146a
IR
1144 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1145 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1146 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1147 SLP_INSTANCE_LOADS (new_instance) = loads;
1148 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1149 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1150 if (VEC_length (slp_tree, loads))
1151 {
1152 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1153 load_permutation))
ebfd146a
IR
1154 {
1155 if (vect_print_dump_info (REPORT_SLP))
1156 {
1157 fprintf (vect_dump, "Build SLP failed: unsupported load "
1158 "permutation ");
1159 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1160 }
1161
1162 vect_free_slp_instance (new_instance);
1163 return false;
1164 }
1165
1166 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1167 = vect_find_first_load_in_slp_instance (new_instance);
1168 }
1169 else
1170 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1171
a70d6342 1172 if (loop_vinfo)
b8698a0f
L
1173 VEC_safe_push (slp_instance, heap,
1174 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1175 new_instance);
1176 else
1177 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1178 new_instance);
b8698a0f 1179
ebfd146a
IR
1180 if (vect_print_dump_info (REPORT_SLP))
1181 vect_print_slp_tree (node);
1182
1183 return true;
1184 }
1185
1186 /* Failed to SLP. */
1187 /* Free the allocated memory. */
1188 vect_free_slp_tree (node);
1189 VEC_free (int, heap, load_permutation);
1190 VEC_free (slp_tree, heap, loads);
b8698a0f 1191
a70d6342 1192 return false;
ebfd146a
IR
1193}
1194
1195
1196/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1197 trees of packed scalar stmts if SLP is possible. */
1198
1199bool
a70d6342 1200vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1201{
1202 unsigned int i;
b5aeb3bb 1203 VEC (gimple, heap) *strided_stores, *reductions = NULL;
ebfd146a 1204 gimple store;
a70d6342 1205 bool ok = false;
ebfd146a
IR
1206
1207 if (vect_print_dump_info (REPORT_SLP))
1208 fprintf (vect_dump, "=== vect_analyze_slp ===");
1209
a70d6342 1210 if (loop_vinfo)
b5aeb3bb
IR
1211 {
1212 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1213 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1214 }
a70d6342
IR
1215 else
1216 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1217
b5aeb3bb 1218 /* Find SLP sequences starting from groups of strided stores. */
ebfd146a 1219 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
a70d6342
IR
1220 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1221 ok = true;
ebfd146a 1222
b8698a0f 1223 if (bb_vinfo && !ok)
a70d6342
IR
1224 {
1225 if (vect_print_dump_info (REPORT_SLP))
1226 fprintf (vect_dump, "Failed to SLP the basic block.");
1227
1228 return false;
1229 }
ebfd146a 1230
b5aeb3bb 1231 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1232 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1233 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1234 VEC_index (gimple, reductions, 0)))
1235 ok = true;
1236
ebfd146a
IR
1237 return true;
1238}
1239
1240
1241/* For each possible SLP instance decide whether to SLP it and calculate overall
1242 unrolling factor needed to SLP the loop. */
1243
1244void
1245vect_make_slp_decision (loop_vec_info loop_vinfo)
1246{
1247 unsigned int i, unrolling_factor = 1;
1248 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1249 slp_instance instance;
1250 int decided_to_slp = 0;
1251
1252 if (vect_print_dump_info (REPORT_SLP))
1253 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1254
1255 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1256 {
1257 /* FORNOW: SLP if you can. */
1258 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1259 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1260
b8698a0f
L
1261 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1262 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ebfd146a
IR
1263 loop-based vectorization. Such stmts will be marked as HYBRID. */
1264 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1265 decided_to_slp++;
1266 }
1267
1268 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1269
b8698a0f
L
1270 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1271 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a
IR
1272 decided_to_slp, unrolling_factor);
1273}
1274
1275
1276/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1277 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1278
1279static void
1280vect_detect_hybrid_slp_stmts (slp_tree node)
1281{
1282 int i;
1283 gimple stmt;
1284 imm_use_iterator imm_iter;
1285 gimple use_stmt;
99f51320 1286 stmt_vec_info stmt_vinfo;
ebfd146a
IR
1287
1288 if (!node)
1289 return;
1290
1291 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1292 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1293 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1294 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1295 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1296 && !STMT_SLP_TYPE (stmt_vinfo)
1297 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1298 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1299 && !(gimple_code (use_stmt) == GIMPLE_PHI
1300 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1301 == vect_reduction_def))
ebfd146a
IR
1302 vect_mark_slp_stmts (node, hybrid, i);
1303
1304 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1305 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1306}
1307
1308
1309/* Find stmts that must be both vectorized and SLPed. */
1310
1311void
1312vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1313{
1314 unsigned int i;
1315 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1316 slp_instance instance;
1317
1318 if (vect_print_dump_info (REPORT_SLP))
1319 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1320
1321 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1322 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1323}
1324
a70d6342
IR
1325
1326/* Create and initialize a new bb_vec_info struct for BB, as well as
1327 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1328
a70d6342
IR
1329static bb_vec_info
1330new_bb_vec_info (basic_block bb)
1331{
1332 bb_vec_info res = NULL;
1333 gimple_stmt_iterator gsi;
1334
1335 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1336 BB_VINFO_BB (res) = bb;
1337
1338 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1339 {
1340 gimple stmt = gsi_stmt (gsi);
1341 gimple_set_uid (stmt, 0);
1342 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1343 }
1344
1345 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1346 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1347
1348 bb->aux = res;
1349 return res;
1350}
1351
1352
1353/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1354 stmts in the basic block. */
1355
1356static void
1357destroy_bb_vec_info (bb_vec_info bb_vinfo)
1358{
1359 basic_block bb;
1360 gimple_stmt_iterator si;
1361
1362 if (!bb_vinfo)
1363 return;
1364
1365 bb = BB_VINFO_BB (bb_vinfo);
1366
1367 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1368 {
1369 gimple stmt = gsi_stmt (si);
1370 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1371
1372 if (stmt_info)
1373 /* Free stmt_vec_info. */
1374 free_stmt_vec_info (stmt);
1375 }
1376
1377 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1378 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1379 free (bb_vinfo);
1380 bb->aux = NULL;
1381}
1382
1383
1384/* Analyze statements contained in SLP tree node after recursively analyzing
1385 the subtree. Return TRUE if the operations are supported. */
1386
1387static bool
1388vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1389{
1390 bool dummy;
1391 int i;
1392 gimple stmt;
1393
1394 if (!node)
1395 return true;
1396
1397 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1398 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1399 return false;
1400
1401 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
1402 {
1403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1404 gcc_assert (stmt_info);
1405 gcc_assert (PURE_SLP_STMT (stmt_info));
1406
1407 if (!vect_analyze_stmt (stmt, &dummy, node))
1408 return false;
1409 }
1410
1411 return true;
1412}
1413
1414
1415/* Analyze statements in SLP instances of the basic block. Return TRUE if the
1416 operations are supported. */
1417
1418static bool
1419vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1420{
1421 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1422 slp_instance instance;
1423 int i;
1424
1425 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1426 {
b8698a0f 1427 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1428 SLP_INSTANCE_TREE (instance)))
1429 {
1430 vect_free_slp_instance (instance);
1431 VEC_ordered_remove (slp_instance, slp_instances, i);
1432 }
1433 else
1434 i++;
b8698a0f
L
1435 }
1436
a70d6342
IR
1437 if (!VEC_length (slp_instance, slp_instances))
1438 return false;
1439
1440 return true;
1441}
1442
1443
1444/* Cheick if the basic block can be vectorized. */
1445
1446bb_vec_info
1447vect_slp_analyze_bb (basic_block bb)
1448{
1449 bb_vec_info bb_vinfo;
1450 VEC (ddr_p, heap) *ddrs;
1451 VEC (slp_instance, heap) *slp_instances;
1452 slp_instance instance;
1453 int i, insns = 0;
1454 gimple_stmt_iterator gsi;
777e1f09
RG
1455 int min_vf = 2;
1456 int max_vf = MAX_VECTORIZATION_FACTOR;
a70d6342
IR
1457
1458 if (vect_print_dump_info (REPORT_DETAILS))
1459 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1460
1461 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
c7da0354
RG
1462 {
1463 gimple stmt = gsi_stmt (gsi);
1464 if (!is_gimple_debug (stmt)
1465 && !gimple_nop_p (stmt)
69e3b47f 1466 && gimple_code (stmt) != GIMPLE_LABEL)
c7da0354
RG
1467 insns++;
1468 }
a70d6342
IR
1469
1470 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1471 {
1472 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1473 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1474 "block.\n");
1475
1476 return NULL;
1477 }
1478
1479 bb_vinfo = new_bb_vec_info (bb);
1480 if (!bb_vinfo)
1481 return NULL;
1482
777e1f09 1483 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1484 {
1485 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1486 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1487 "block.\n");
b8698a0f 1488
a70d6342
IR
1489 destroy_bb_vec_info (bb_vinfo);
1490 return NULL;
1491 }
1492
1493 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1494 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1495 {
1496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1497 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1498 "block.\n");
1499
1500 destroy_bb_vec_info (bb_vinfo);
1501 return NULL;
1502 }
1503
777e1f09
RG
1504 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
1505 || min_vf > max_vf)
1506 {
1507 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1508 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1509 "in basic block.\n");
1510
1511 destroy_bb_vec_info (bb_vinfo);
1512 return NULL;
1513 }
1514
a70d6342
IR
1515 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1516 {
1517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1518 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1519 "block.\n");
b8698a0f 1520
a70d6342
IR
1521 destroy_bb_vec_info (bb_vinfo);
1522 return NULL;
1523 }
b8698a0f 1524
a70d6342
IR
1525 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1526 {
1527 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1528 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1529 "block.\n");
b8698a0f 1530
a70d6342
IR
1531 destroy_bb_vec_info (bb_vinfo);
1532 return NULL;
1533 }
1534
1535 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1536 {
1537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1538 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1539 "block.\n");
1540
1541 destroy_bb_vec_info (bb_vinfo);
1542 return NULL;
1543 }
1544
1545 /* Check the SLP opportunities in the basic block, analyze and build SLP
1546 trees. */
1547 if (!vect_analyze_slp (NULL, bb_vinfo))
1548 {
1549 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1550 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1551 "in basic block.\n");
1552
1553 destroy_bb_vec_info (bb_vinfo);
1554 return NULL;
1555 }
b8698a0f 1556
a70d6342
IR
1557 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1558
1559 /* Mark all the statements that we want to vectorize as pure SLP and
1560 relevant. */
1561 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1562 {
1563 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1564 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1565 }
a70d6342
IR
1566
1567 if (!vect_slp_analyze_operations (bb_vinfo))
1568 {
1569 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1570 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1571
1572 destroy_bb_vec_info (bb_vinfo);
1573 return NULL;
1574 }
1575
1576 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1577 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1578
1579 return bb_vinfo;
1580}
1581
1582
b8698a0f 1583/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ebfd146a
IR
1584 the number of created vector stmts depends on the unrolling factor). However,
1585 the actual number of vector stmts for every SLP node depends on VF which is
1586 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
b8698a0f 1587 In this function we assume that the inside costs calculated in
ebfd146a
IR
1588 vect_model_xxx_cost are linear in ncopies. */
1589
1590void
1591vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1592{
1593 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1594 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1595 slp_instance instance;
1596
1597 if (vect_print_dump_info (REPORT_SLP))
1598 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1599
1600 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
1601 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1602 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1603 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1604}
1605
a70d6342 1606
b8698a0f
L
1607/* For constant and loop invariant defs of SLP_NODE this function returns
1608 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
ebfd146a 1609 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
b5aeb3bb
IR
1610 stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
1611 REDUC_INDEX is the index of the reduction operand in the statements, unless
1612 it is -1. */
ebfd146a
IR
1613
1614static void
1615vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
b5aeb3bb
IR
1616 unsigned int op_num, unsigned int number_of_vectors,
1617 int reduc_index)
ebfd146a
IR
1618{
1619 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1620 gimple stmt = VEC_index (gimple, stmts, 0);
1621 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
1622 int nunits;
1623 tree vec_cst;
1624 tree t = NULL_TREE;
1625 int j, number_of_places_left_in_vector;
1626 tree vector_type;
1627 tree op, vop;
1628 int group_size = VEC_length (gimple, stmts);
1629 unsigned int vec_num, i;
1630 int number_of_copies = 1;
1631 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1632 bool constant_p, is_store;
b5aeb3bb
IR
1633 tree neutral_op = NULL;
1634
1635 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1636 {
1637 enum tree_code code = gimple_assign_rhs_code (stmt);
1638 if (reduc_index == -1)
1639 {
1640 VEC_free (tree, heap, *vec_oprnds);
1641 return;
1642 }
1643
1644 op_num = reduc_index - 1;
1645 op = gimple_op (stmt, op_num + 1);
1646 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
1647 we need either neutral operands or the original operands. See
1648 get_initial_def_for_reduction() for details. */
1649 switch (code)
1650 {
1651 case WIDEN_SUM_EXPR:
1652 case DOT_PROD_EXPR:
1653 case PLUS_EXPR:
1654 case MINUS_EXPR:
1655 case BIT_IOR_EXPR:
1656 case BIT_XOR_EXPR:
1657 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1658 neutral_op = build_real (TREE_TYPE (op), dconst0);
1659 else
1660 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1661
1662 break;
1663
1664 case MULT_EXPR:
b5aeb3bb
IR
1665 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1666 neutral_op = build_real (TREE_TYPE (op), dconst1);
1667 else
1668 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1669
1670 break;
1671
c1e822d5
IR
1672 case BIT_AND_EXPR:
1673 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1674 break;
1675
b5aeb3bb
IR
1676 default:
1677 neutral_op = NULL;
1678 }
1679 }
ebfd146a
IR
1680
1681 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1682 {
1683 is_store = true;
1684 op = gimple_assign_rhs1 (stmt);
1685 }
1686 else
1687 {
1688 is_store = false;
1689 op = gimple_op (stmt, op_num + 1);
1690 }
1691
1692 if (CONSTANT_CLASS_P (op))
cd481d83 1693 constant_p = true;
ebfd146a 1694 else
cd481d83
IR
1695 constant_p = false;
1696
1697 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1698 gcc_assert (vector_type);
ebfd146a
IR
1699
1700 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1701
1702 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 1703 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
1704
1705 For example, we have two scalar operands, s1 and s2 (e.g., group of
1706 strided accesses of size two), while NUNITS is four (i.e., four scalars
1707 of this type can be packed in a vector). The output vector will contain
1708 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1709 will be 2).
1710
b8698a0f 1711 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
1712 containing the operands.
1713
1714 For example, NUNITS is four as before, and the group size is 8
1715 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1716 {s5, s6, s7, s8}. */
b8698a0f 1717
ebfd146a
IR
1718 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1719
1720 number_of_places_left_in_vector = nunits;
1721 for (j = 0; j < number_of_copies; j++)
1722 {
1723 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1724 {
1725 if (is_store)
1726 op = gimple_assign_rhs1 (stmt);
1727 else
1728 op = gimple_op (stmt, op_num + 1);
b8698a0f 1729
b5aeb3bb
IR
1730 if (reduc_index != -1)
1731 {
1732 struct loop *loop = (gimple_bb (stmt))->loop_father;
1733 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1734
1735 gcc_assert (loop);
1736 /* Get the def before the loop. */
1737 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1738 loop_preheader_edge (loop));
1739 if (j != (number_of_copies - 1) && neutral_op)
1740 op = neutral_op;
1741 }
1742
ebfd146a
IR
1743 /* Create 'vect_ = {op0,op1,...,opn}'. */
1744 t = tree_cons (NULL_TREE, op, t);
1745
1746 number_of_places_left_in_vector--;
1747
1748 if (number_of_places_left_in_vector == 0)
1749 {
1750 number_of_places_left_in_vector = nunits;
1751
1752 if (constant_p)
1753 vec_cst = build_vector (vector_type, t);
1754 else
1755 vec_cst = build_constructor_from_list (vector_type, t);
1756 VEC_quick_push (tree, voprnds,
1757 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1758 t = NULL_TREE;
1759 }
1760 }
1761 }
1762
b8698a0f 1763 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
1764 them. */
1765 vec_num = VEC_length (tree, voprnds);
1766 for (j = vec_num - 1; j >= 0; j--)
1767 {
1768 vop = VEC_index (tree, voprnds, j);
1769 VEC_quick_push (tree, *vec_oprnds, vop);
1770 }
1771
1772 VEC_free (tree, heap, voprnds);
1773
1774 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
1775 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1776 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
1777 to replicate the vectors. */
1778 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1779 {
b5aeb3bb
IR
1780 tree neutral_vec = NULL;
1781
1782 if (neutral_op)
1783 {
1784 if (!neutral_vec)
1785 {
1786 t = NULL;
1787 for (i = 0; i < (unsigned) nunits; i++)
1788 t = tree_cons (NULL_TREE, neutral_op, t);
1789 neutral_vec = build_vector (vector_type, t);
1790 }
1791
1792 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
1793 }
1794 else
1795 {
1796 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1797 VEC_quick_push (tree, *vec_oprnds, vop);
1798 }
ebfd146a
IR
1799 }
1800}
1801
1802
1803/* Get vectorized definitions from SLP_NODE that contains corresponding
1804 vectorized def-stmts. */
1805
1806static void
1807vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1808{
1809 tree vec_oprnd;
1810 gimple vec_def_stmt;
1811 unsigned int i;
1812
1813 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1814
1815 for (i = 0;
1816 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1817 i++)
1818 {
1819 gcc_assert (vec_def_stmt);
1820 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1821 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1822 }
1823}
1824
1825
b8698a0f
L
1826/* Get vectorized definitions for SLP_NODE.
1827 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
1828 call vect_get_constant_vectors() to create vector stmts.
1829 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1830 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 1831 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 1832 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
1833 the right node. This is used when the second operand must remain scalar. */
1834
ebfd146a
IR
1835void
1836vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
b5aeb3bb 1837 VEC (tree,heap) **vec_oprnds1, int reduc_index)
ebfd146a
IR
1838{
1839 gimple first_stmt;
1840 enum tree_code code;
1841 int number_of_vects;
b8698a0f 1842 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
1843
1844 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1845 /* The number of vector defs is determined by the number of vector statements
1846 in the node from which we get those statements. */
b8698a0f 1847 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
1848 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1849 else
1850 {
1851 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1852 /* Number of vector stmts was calculated according to LHS in
1853 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1854 necessary. See vect_get_smallest_scalar_type() for details. */
1855 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1856 &rhs_size_unit);
1857 if (rhs_size_unit != lhs_size_unit)
1858 {
1859 number_of_vects *= rhs_size_unit;
1860 number_of_vects /= lhs_size_unit;
1861 }
1862 }
1863
1864 /* Allocate memory for vectorized defs. */
1865 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1866
1867 /* SLP_NODE corresponds either to a group of stores or to a group of
b5aeb3bb
IR
1868 unary/binary operations. We don't call this function for loads.
1869 For reduction defs we call vect_get_constant_vectors(), since we are
1870 looking for initial loop invariant values. */
1871 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
ebfd146a
IR
1872 /* The defs are already vectorized. */
1873 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1874 else
1875 /* Build vectors from scalar defs. */
b5aeb3bb
IR
1876 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
1877 reduc_index);
ebfd146a
IR
1878
1879 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1880 /* Since we don't call this function with loads, this is a group of
1881 stores. */
1882 return;
1883
b5aeb3bb
IR
1884 /* For reductions, we only need initial values. */
1885 if (reduc_index != -1)
1886 return;
1887
ebfd146a
IR
1888 code = gimple_assign_rhs_code (first_stmt);
1889 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1890 return;
1891
1892 /* The number of vector defs is determined by the number of vector statements
1893 in the node from which we get those statements. */
1894 if (SLP_TREE_RIGHT (slp_node))
1895 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1896 else
1897 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1898
1899 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1900
1901 if (SLP_TREE_RIGHT (slp_node))
1902 /* The defs are already vectorized. */
1903 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1904 else
1905 /* Build vectors from scalar defs. */
b5aeb3bb 1906 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
ebfd146a
IR
1907}
1908
a70d6342 1909
b8698a0f 1910/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
1911 building a vector of type MASK_TYPE from it) and two input vectors placed in
1912 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
1913 shifting by STRIDE elements of DR_CHAIN for every copy.
1914 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 1915 copies).
ebfd146a
IR
1916 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
1917 the created stmts must be inserted. */
1918
1919static inline void
b8698a0f 1920vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 1921 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f
L
1922 gimple_stmt_iterator *gsi, slp_tree node,
1923 tree builtin_decl, tree vectype,
ebfd146a
IR
1924 VEC(tree,heap) *dr_chain,
1925 int ncopies, int vect_stmts_counter)
1926{
faf63e39 1927 tree perm_dest;
ebfd146a
IR
1928 gimple perm_stmt = NULL;
1929 stmt_vec_info next_stmt_info;
0f900dfa 1930 int i, stride;
ebfd146a 1931 tree first_vec, second_vec, data_ref;
ebfd146a 1932
ebfd146a 1933 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 1934
b8698a0f 1935 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 1936 stmts later. */
b8698a0f 1937 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
1938 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
1939 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
1940
1941 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
1942 for (i = 0; i < ncopies; i++)
1943 {
1944 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
1945 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
1946
ebfd146a 1947 /* Generate the permute statement. */
36c4a482
NF
1948 perm_stmt = gimple_build_call (builtin_decl,
1949 3, first_vec, second_vec, mask);
ebfd146a
IR
1950 data_ref = make_ssa_name (perm_dest, perm_stmt);
1951 gimple_call_set_lhs (perm_stmt, data_ref);
1952 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 1953
b8698a0f
L
1954 /* Store the vector statement in NODE. */
1955 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
1956 stride * i + vect_stmts_counter, perm_stmt);
1957
1958 first_vec_indx += stride;
1959 second_vec_indx += stride;
1960 }
1961
1962 /* Mark the scalar stmt as vectorized. */
1963 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
1964 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
1965}
1966
1967
b8698a0f 1968/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 1969 return in CURRENT_MASK_ELEMENT its equivalent in target specific
b8698a0f 1970 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
1971 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
1972 the next vector, i.e., the current first vector is not needed. */
b8698a0f 1973
ebfd146a 1974static bool
b8698a0f 1975vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 1976 int mask_nunits, bool only_one_vec, int index,
b8698a0f 1977 int *mask, int *current_mask_element,
ebfd146a
IR
1978 bool *need_next_vector)
1979{
1980 int i;
1981 static int number_of_mask_fixes = 1;
1982 static bool mask_fixed = false;
1983 static bool needs_first_vector = false;
1984
1985 /* Convert to target specific representation. */
1986 *current_mask_element = first_mask_element + m;
1987 /* Adjust the value in case it's a mask for second and third vectors. */
1988 *current_mask_element -= mask_nunits * (number_of_mask_fixes - 1);
1989
1990 if (*current_mask_element < mask_nunits)
1991 needs_first_vector = true;
1992
1993 /* We have only one input vector to permute but the mask accesses values in
1994 the next vector as well. */
1995 if (only_one_vec && *current_mask_element >= mask_nunits)
1996 {
1997 if (vect_print_dump_info (REPORT_DETAILS))
1998 {
1999 fprintf (vect_dump, "permutation requires at least two vectors ");
2000 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2001 }
2002
2003 return false;
2004 }
2005
2006 /* The mask requires the next vector. */
2007 if (*current_mask_element >= mask_nunits * 2)
2008 {
2009 if (needs_first_vector || mask_fixed)
2010 {
2011 /* We either need the first vector too or have already moved to the
b8698a0f 2012 next vector. In both cases, this permutation needs three
ebfd146a
IR
2013 vectors. */
2014 if (vect_print_dump_info (REPORT_DETAILS))
2015 {
2016 fprintf (vect_dump, "permutation requires at "
2017 "least three vectors ");
2018 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2019 }
2020
2021 return false;
2022 }
2023
2024 /* We move to the next vector, dropping the first one and working with
2025 the second and the third - we need to adjust the values of the mask
2026 accordingly. */
2027 *current_mask_element -= mask_nunits * number_of_mask_fixes;
2028
2029 for (i = 0; i < index; i++)
2030 mask[i] -= mask_nunits * number_of_mask_fixes;
2031
2032 (number_of_mask_fixes)++;
2033 mask_fixed = true;
2034 }
2035
2036 *need_next_vector = mask_fixed;
2037
2038 /* This was the last element of this mask. Start a new one. */
2039 if (index == mask_nunits - 1)
2040 {
2041 number_of_mask_fixes = 1;
2042 mask_fixed = false;
2043 needs_first_vector = false;
2044 }
2045
2046 return true;
2047}
2048
2049
2050/* Generate vector permute statements from a list of loads in DR_CHAIN.
2051 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2052 permute statements for SLP_NODE_INSTANCE. */
2053bool
2054vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2055 gimple_stmt_iterator *gsi, int vf,
2056 slp_instance slp_node_instance, bool analyze_only)
2057{
2058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2059 tree mask_element_type = NULL_TREE, mask_type;
2060 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2061 slp_tree node;
2062 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2063 gimple next_scalar_stmt;
2064 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2065 int first_mask_element;
2066 int index, unroll_factor, *mask, current_mask_element, ncopies;
2067 bool only_one_vec = false, need_next_vector = false;
2068 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
2069
2070 if (!targetm.vectorize.builtin_vec_perm)
2071 {
2072 if (vect_print_dump_info (REPORT_DETAILS))
2073 {
2074 fprintf (vect_dump, "no builtin for vect permute for ");
2075 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2076 }
2077
2078 return false;
2079 }
2080
2081 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2082 &mask_element_type);
2083 if (!builtin_decl || !mask_element_type)
2084 {
2085 if (vect_print_dump_info (REPORT_DETAILS))
2086 {
2087 fprintf (vect_dump, "no builtin for vect permute for ");
2088 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2089 }
2090
2091 return false;
2092 }
2093
2094 mask_type = get_vectype_for_scalar_type (mask_element_type);
2095 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2096 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2097 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2098 scale = mask_nunits / nunits;
2099 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2100
2101 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2102 unrolling factor. */
b8698a0f 2103 orig_vec_stmts_num = group_size *
ebfd146a
IR
2104 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2105 if (orig_vec_stmts_num == 1)
2106 only_one_vec = true;
2107
b8698a0f 2108 /* Number of copies is determined by the final vectorization factor
ebfd146a 2109 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2110 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2111
b8698a0f
L
2112 /* Generate permutation masks for every NODE. Number of masks for each NODE
2113 is equal to GROUP_SIZE.
2114 E.g., we have a group of three nodes with three loads from the same
2115 location in each node, and the vector size is 4. I.e., we have a
2116 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2117 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2118 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2119 ...
2120
2121 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2122 scpecific type, e.g., in bytes for Altivec.
b8698a0f 2123 The last mask is illegal since we assume two operands for permute
ebfd146a
IR
2124 operation, and the mask element values can't be outside that range. Hence,
2125 the last mask must be converted into {2,5,5,5}.
b8698a0f 2126 For the first two permutations we need the first and the second input
ebfd146a 2127 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2128 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2129 {c2,a3,b3,c3}. */
2130
2131 for (i = 0;
2132 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance),
2133 i, node);
2134 i++)
2135 {
2136 scalar_index = 0;
2137 index = 0;
2138 vect_stmts_counter = 0;
2139 vec_index = 0;
2140 first_vec_index = vec_index++;
2141 if (only_one_vec)
2142 second_vec_index = first_vec_index;
2143 else
2144 second_vec_index = vec_index++;
2145
2146 for (j = 0; j < unroll_factor; j++)
2147 {
2148 for (k = 0; k < group_size; k++)
2149 {
2150 first_mask_element = (i + j * group_size) * scale;
2151 for (m = 0; m < scale; m++)
2152 {
b8698a0f 2153 if (!vect_get_mask_element (stmt, first_mask_element, m,
ebfd146a
IR
2154 mask_nunits, only_one_vec, index, mask,
2155 &current_mask_element, &need_next_vector))
2156 return false;
2157
2158 mask[index++] = current_mask_element;
b8698a0f 2159 }
ebfd146a
IR
2160
2161 if (index == mask_nunits)
2162 {
faf63e39
RH
2163 tree mask_vec = NULL;
2164
2165 while (--index >= 0)
2166 {
2167 tree t = build_int_cst (mask_element_type, mask[index]);
2168 mask_vec = tree_cons (NULL, t, mask_vec);
2169 }
2170 mask_vec = build_vector (mask_type, mask_vec);
2171 index = 0;
2172
2173 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2174 mask_vec))
2175 {
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 {
2178 fprintf (vect_dump, "unsupported vect permute ");
2179 print_generic_expr (vect_dump, mask_vec, 0);
2180 }
2181 free (mask);
2182 return false;
2183 }
2184
ebfd146a
IR
2185 if (!analyze_only)
2186 {
2187 if (need_next_vector)
2188 {
2189 first_vec_index = second_vec_index;
2190 second_vec_index = vec_index;
2191 }
2192
2193 next_scalar_stmt = VEC_index (gimple,
2194 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2195
2196 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39
RH
2197 mask_vec, first_vec_index, second_vec_index,
2198 gsi, node, builtin_decl, vectype, dr_chain,
2199 ncopies, vect_stmts_counter++);
ebfd146a 2200 }
b8698a0f
L
2201 }
2202 }
2203 }
2204 }
ebfd146a
IR
2205
2206 free (mask);
2207 return true;
2208}
2209
2210
2211
2212/* Vectorize SLP instance tree in postorder. */
2213
2214static bool
2215vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2216 unsigned int vectorization_factor)
ebfd146a
IR
2217{
2218 gimple stmt;
2219 bool strided_store, is_store;
2220 gimple_stmt_iterator si;
2221 stmt_vec_info stmt_info;
2222 unsigned int vec_stmts_size, nunits, group_size;
2223 tree vectype;
2224 int i;
2225 slp_tree loads_node;
2226
2227 if (!node)
2228 return false;
2229
2230 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2231 vectorization_factor);
2232 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2233 vectorization_factor);
b8698a0f 2234
ebfd146a
IR
2235 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2236 stmt_info = vinfo_for_stmt (stmt);
2237
2238 /* VECTYPE is the type of the destination. */
b690cc0f 2239 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2240 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2241 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2242
2243 /* For each SLP instance calculate number of vector stmts to be created
2244 for the scalar stmts in each node of the SLP tree. Number of vector
2245 elements in one vector iteration is the number of scalar elements in
2246 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2247 size. */
2248 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2249
2250 /* In case of load permutation we have to allocate vectorized statements for
2251 all the nodes that participate in that permutation. */
2252 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2253 {
2254 for (i = 0;
2255 VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node);
2256 i++)
2257 {
2258 if (!SLP_TREE_VEC_STMTS (loads_node))
2259 {
2260 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2261 vec_stmts_size);
2262 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2263 }
2264 }
2265 }
2266
2267 if (!SLP_TREE_VEC_STMTS (node))
2268 {
2269 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2270 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2271 }
2272
2273 if (vect_print_dump_info (REPORT_DETAILS))
2274 {
2275 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2276 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2277 }
ebfd146a
IR
2278
2279 /* Loads should be inserted before the first load. */
2280 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2281 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2282 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2283 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2284 else
2285 si = gsi_for_stmt (stmt);
b8698a0f 2286
ebfd146a 2287 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2288 return is_store;
ebfd146a
IR
2289}
2290
2291
2292bool
a70d6342 2293vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2294{
a70d6342 2295 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2296 slp_instance instance;
a70d6342 2297 unsigned int i, vf;
ebfd146a
IR
2298 bool is_store = false;
2299
a70d6342
IR
2300 if (loop_vinfo)
2301 {
2302 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2303 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2304 }
a70d6342
IR
2305 else
2306 {
2307 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2308 vf = 1;
b8698a0f 2309 }
a70d6342 2310
ebfd146a
IR
2311 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2312 {
2313 /* Schedule the tree of INSTANCE. */
2314 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2315 instance, vf);
8644a673
IR
2316 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2317 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2318 fprintf (vect_dump, "vectorizing stmts using SLP.");
2319 }
2320
b5aeb3bb
IR
2321 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
2322 {
2323 slp_tree root = SLP_INSTANCE_TREE (instance);
2324 gimple store;
2325 unsigned int j;
2326 gimple_stmt_iterator gsi;
2327
2328 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2329 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2330 {
2331 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2332 break;
2333
2334 /* Free the attached stmt_vec_info and remove the stmt. */
2335 gsi = gsi_for_stmt (store);
2336 gsi_remove (&gsi, true);
2337 free_stmt_vec_info (store);
2338 }
2339 }
2340
ebfd146a
IR
2341 return is_store;
2342}
a70d6342
IR
2343
2344
2345/* Vectorize the basic block. */
2346
2347void
2348vect_slp_transform_bb (basic_block bb)
2349{
2350 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2351 gimple_stmt_iterator si;
2352
2353 gcc_assert (bb_vinfo);
2354
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "SLPing BB\n");
2357
2358 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2359 {
2360 gimple stmt = gsi_stmt (si);
2361 stmt_vec_info stmt_info;
2362
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 {
2365 fprintf (vect_dump, "------>SLPing statement: ");
2366 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2367 }
2368
2369 stmt_info = vinfo_for_stmt (stmt);
2370 gcc_assert (stmt_info);
2371
2372 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2373 if (STMT_SLP_TYPE (stmt_info))
2374 {
2375 vect_schedule_slp (NULL, bb_vinfo);
2376 break;
2377 }
2378 }
2379
2380 mark_sym_for_renaming (gimple_vop (cfun));
2381 /* The memory tags and pointers in vectorized statements need to
2382 have their SSA forms updated. FIXME, why can't this be delayed
2383 until all the loops have been transformed? */
2384 update_ssa (TODO_update_ssa);
2385
2386 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2387 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2388
12aaf609
IR
2389 destroy_bb_vec_info (bb_vinfo);
2390}
a70d6342 2391