]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
tree-vect-loop-manip.c (vect_create_cond_for_alias_checks): Use new names for group...
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
c75c517d
SB
2 Copyright (C) 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
36#include "cfglayout.h"
37#include "expr.h"
38#include "recog.h"
39#include "optabs.h"
40#include "tree-vectorizer.h"
41
a70d6342
IR
42/* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45LOC
46find_bb_location (basic_block bb)
47{
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62}
63
64
ebfd146a
IR
65/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67static void
68vect_free_slp_tree (slp_tree node)
69{
70 if (!node)
71 return;
72
73 if (SLP_TREE_LEFT (node))
74 vect_free_slp_tree (SLP_TREE_LEFT (node));
b8698a0f 75
ebfd146a
IR
76 if (SLP_TREE_RIGHT (node))
77 vect_free_slp_tree (SLP_TREE_RIGHT (node));
b8698a0f 78
ebfd146a 79 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 80
ebfd146a
IR
81 if (SLP_TREE_VEC_STMTS (node))
82 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
83
84 free (node);
85}
86
87
88/* Free the memory allocated for the SLP instance. */
89
90void
91vect_free_slp_instance (slp_instance instance)
92{
93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
94 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
95 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
96}
97
98
99/* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
100 they are of a legal type and that they match the defs of the first stmt of
101 the SLP group (stored in FIRST_STMT_...). */
102
103static bool
a70d6342 104vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 105 slp_tree slp_node, gimple stmt,
a70d6342 106 VEC (gimple, heap) **def_stmts0,
ebfd146a
IR
107 VEC (gimple, heap) **def_stmts1,
108 enum vect_def_type *first_stmt_dt0,
109 enum vect_def_type *first_stmt_dt1,
b8698a0f 110 tree *first_stmt_def0_type,
ebfd146a
IR
111 tree *first_stmt_def1_type,
112 tree *first_stmt_const_oprnd,
113 int ncopies_for_cost,
114 bool *pattern0, bool *pattern1)
115{
116 tree oprnd;
117 unsigned int i, number_of_oprnds;
118 tree def;
119 gimple def_stmt;
120 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
b8698a0f 121 stmt_vec_info stmt_info =
ebfd146a
IR
122 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
123 enum gimple_rhs_class rhs_class;
a70d6342 124 struct loop *loop = NULL;
b8698a0f 125
a70d6342
IR
126 if (loop_vinfo)
127 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
128
129 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
130 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
131
132 for (i = 0; i < number_of_oprnds; i++)
133 {
134 oprnd = gimple_op (stmt, i + 1);
135
b8698a0f 136 if (!vect_is_simple_use (oprnd, loop_vinfo, bb_vinfo, &def_stmt, &def,
a70d6342 137 &dt[i])
ebfd146a
IR
138 || (!def_stmt && dt[i] != vect_constant_def))
139 {
b8698a0f 140 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
141 {
142 fprintf (vect_dump, "Build SLP failed: can't find def for ");
143 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
144 }
145
146 return false;
147 }
148
a70d6342 149 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 150 from the pattern. Check that all the stmts of the node are in the
ebfd146a 151 pattern. */
a70d6342 152 if (loop && def_stmt && gimple_bb (def_stmt)
ebfd146a
IR
153 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
154 && vinfo_for_stmt (def_stmt)
155 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
156 {
157 if (!*first_stmt_dt0)
158 *pattern0 = true;
159 else
160 {
161 if (i == 1 && !*first_stmt_dt1)
162 *pattern1 = true;
163 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
164 {
165 if (vect_print_dump_info (REPORT_DETAILS))
166 {
167 fprintf (vect_dump, "Build SLP failed: some of the stmts"
168 " are in a pattern, and others are not ");
169 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
170 }
171
172 return false;
173 }
174 }
175
176 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
177 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
178
179 if (*dt == vect_unknown_def_type)
180 {
181 if (vect_print_dump_info (REPORT_DETAILS))
182 fprintf (vect_dump, "Unsupported pattern.");
183 return false;
184 }
185
186 switch (gimple_code (def_stmt))
187 {
188 case GIMPLE_PHI:
189 def = gimple_phi_result (def_stmt);
190 break;
191
192 case GIMPLE_ASSIGN:
193 def = gimple_assign_lhs (def_stmt);
194 break;
195
196 default:
197 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "unsupported defining stmt: ");
199 return false;
200 }
201 }
202
203 if (!*first_stmt_dt0)
204 {
205 /* op0 of the first stmt of the group - store its info. */
206 *first_stmt_dt0 = dt[i];
207 if (def)
208 *first_stmt_def0_type = TREE_TYPE (def);
209 else
210 *first_stmt_const_oprnd = oprnd;
211
212 /* Analyze costs (for the first stmt of the group only). */
213 if (rhs_class != GIMPLE_SINGLE_RHS)
214 /* Not memory operation (we don't call this functions for loads). */
215 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
216 else
217 /* Store. */
272c6793
RS
218 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
219 dt[0], slp_node);
ebfd146a 220 }
b8698a0f 221
ebfd146a
IR
222 else
223 {
224 if (!*first_stmt_dt1 && i == 1)
225 {
226 /* op1 of the first stmt of the group - store its info. */
227 *first_stmt_dt1 = dt[i];
228 if (def)
229 *first_stmt_def1_type = TREE_TYPE (def);
230 else
231 {
b8698a0f 232 /* We assume that the stmt contains only one constant
ebfd146a
IR
233 operand. We fail otherwise, to be on the safe side. */
234 if (*first_stmt_const_oprnd)
235 {
b8698a0f 236 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 237 fprintf (vect_dump, "Build SLP failed: two constant "
b8698a0f 238 "oprnds in stmt");
ebfd146a
IR
239 return false;
240 }
241 *first_stmt_const_oprnd = oprnd;
242 }
243 }
244 else
245 {
b8698a0f 246 /* Not first stmt of the group, check that the def-stmt/s match
ebfd146a 247 the def-stmt/s of the first stmt. */
b8698a0f 248 if ((i == 0
ebfd146a
IR
249 && (*first_stmt_dt0 != dt[i]
250 || (*first_stmt_def0_type && def
8533c9d8
SP
251 && !types_compatible_p (*first_stmt_def0_type,
252 TREE_TYPE (def)))))
b8698a0f 253 || (i == 1
ebfd146a
IR
254 && (*first_stmt_dt1 != dt[i]
255 || (*first_stmt_def1_type && def
8533c9d8
SP
256 && !types_compatible_p (*first_stmt_def1_type,
257 TREE_TYPE (def)))))
b8698a0f 258 || (!def
8533c9d8
SP
259 && !types_compatible_p (TREE_TYPE (*first_stmt_const_oprnd),
260 TREE_TYPE (oprnd))))
b8698a0f
L
261 {
262 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 263 fprintf (vect_dump, "Build SLP failed: different types ");
b8698a0f 264
ebfd146a
IR
265 return false;
266 }
267 }
268 }
269
270 /* Check the types of the definitions. */
271 switch (dt[i])
272 {
273 case vect_constant_def:
8644a673 274 case vect_external_def:
ebfd146a 275 break;
b8698a0f 276
8644a673 277 case vect_internal_def:
b5aeb3bb 278 case vect_reduction_def:
ebfd146a
IR
279 if (i == 0)
280 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
281 else
282 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
283 break;
284
285 default:
286 /* FORNOW: Not supported. */
b8698a0f 287 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
288 {
289 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
290 print_generic_expr (vect_dump, def, TDF_SLIM);
291 }
292
293 return false;
294 }
295 }
296
297 return true;
298}
299
300
301/* Recursively build an SLP tree starting from NODE.
b8698a0f 302 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 303 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
304 TRUE. */
305
306static bool
b8698a0f 307vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
308 slp_tree *node, unsigned int group_size,
309 int *inside_cost, int *outside_cost,
310 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 311 VEC (int, heap) **load_permutation,
a70d6342
IR
312 VEC (slp_tree, heap) **loads,
313 unsigned int vectorization_factor)
ebfd146a
IR
314{
315 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
316 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
317 unsigned int i;
318 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
319 gimple stmt = VEC_index (gimple, stmts, 0);
81f40b79
ILT
320 enum vect_def_type first_stmt_dt0 = vect_uninitialized_def;
321 enum vect_def_type first_stmt_dt1 = vect_uninitialized_def;
2200fc49 322 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
ebfd146a
IR
323 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
324 tree lhs;
325 bool stop_recursion = false, need_same_oprnds = false;
326 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 327 unsigned int ncopies;
ebfd146a
IR
328 optab optab;
329 int icode;
330 enum machine_mode optab_op2_mode;
331 enum machine_mode vec_mode;
332 tree first_stmt_const_oprnd = NULL_TREE;
333 struct data_reference *first_dr;
334 bool pattern0 = false, pattern1 = false;
335 HOST_WIDE_INT dummy;
336 bool permutation = false;
337 unsigned int load_place;
b5aeb3bb 338 gimple first_load, prev_first_load = NULL;
ebfd146a
IR
339
340 /* For every stmt in NODE find its def stmt/s. */
ac47786e 341 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
ebfd146a 342 {
b8698a0f 343 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
344 {
345 fprintf (vect_dump, "Build SLP for ");
346 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
347 }
348
4b5caab7
IR
349 /* Fail to vectorize statements marked as unvectorizable. */
350 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
351 {
352 if (vect_print_dump_info (REPORT_SLP))
353 {
354 fprintf (vect_dump,
355 "Build SLP failed: unvectorizable statement ");
356 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
357 }
358
359 return false;
360 }
361
ebfd146a
IR
362 lhs = gimple_get_lhs (stmt);
363 if (lhs == NULL_TREE)
364 {
b8698a0f 365 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
366 {
367 fprintf (vect_dump,
368 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
369 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
370 }
b8698a0f 371
ebfd146a
IR
372 return false;
373 }
374
b8698a0f 375 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
376 vectype = get_vectype_for_scalar_type (scalar_type);
377 if (!vectype)
378 {
379 if (vect_print_dump_info (REPORT_SLP))
380 {
381 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
383 }
384 return false;
385 }
b8698a0f 386
ebfd146a 387 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
388 if (ncopies != 1)
389 {
390 if (vect_print_dump_info (REPORT_SLP))
391 fprintf (vect_dump, "SLP with multiple types ");
ebfd146a 392
a70d6342
IR
393 /* FORNOW: multiple types are unsupported in BB SLP. */
394 if (bb_vinfo)
395 return false;
396 }
b8698a0f 397
ebfd146a
IR
398 /* In case of multiple types we need to detect the smallest type. */
399 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
400 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 401
ebfd146a
IR
402 if (is_gimple_call (stmt))
403 rhs_code = CALL_EXPR;
404 else
405 rhs_code = gimple_assign_rhs_code (stmt);
406
407 /* Check the operation. */
408 if (i == 0)
409 {
410 first_stmt_code = rhs_code;
411
b8698a0f 412 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
413 vector shift with scalar shift operand. */
414 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
415 || rhs_code == LROTATE_EXPR
416 || rhs_code == RROTATE_EXPR)
417 {
418 vec_mode = TYPE_MODE (vectype);
419
420 /* First see if we have a vector/vector shift. */
421 optab = optab_for_tree_code (rhs_code, vectype,
422 optab_vector);
423
424 if (!optab
947131ba 425 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
426 {
427 /* No vector/vector shift, try for a vector/scalar shift. */
428 optab = optab_for_tree_code (rhs_code, vectype,
429 optab_scalar);
430
431 if (!optab)
432 {
433 if (vect_print_dump_info (REPORT_SLP))
434 fprintf (vect_dump, "Build SLP failed: no optab.");
435 return false;
436 }
947131ba 437 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
438 if (icode == CODE_FOR_nothing)
439 {
440 if (vect_print_dump_info (REPORT_SLP))
441 fprintf (vect_dump, "Build SLP failed: "
442 "op not supported by target.");
443 return false;
444 }
445 optab_op2_mode = insn_data[icode].operand[2].mode;
446 if (!VECTOR_MODE_P (optab_op2_mode))
447 {
448 need_same_oprnds = true;
449 first_op1 = gimple_assign_rhs2 (stmt);
450 }
451 }
452 }
453 }
454 else
455 {
456 if (first_stmt_code != rhs_code
457 && (first_stmt_code != IMAGPART_EXPR
458 || rhs_code != REALPART_EXPR)
459 && (first_stmt_code != REALPART_EXPR
69f11a13
IR
460 || rhs_code != IMAGPART_EXPR)
461 && !(STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt))
462 && (first_stmt_code == ARRAY_REF
463 || first_stmt_code == INDIRECT_REF
464 || first_stmt_code == COMPONENT_REF
465 || first_stmt_code == MEM_REF)))
ebfd146a 466 {
b8698a0f 467 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 468 {
b8698a0f 469 fprintf (vect_dump,
ebfd146a
IR
470 "Build SLP failed: different operation in stmt ");
471 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
472 }
b8698a0f 473
ebfd146a
IR
474 return false;
475 }
b8698a0f
L
476
477 if (need_same_oprnds
ebfd146a
IR
478 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
479 {
b8698a0f 480 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 481 {
b8698a0f 482 fprintf (vect_dump,
ebfd146a
IR
483 "Build SLP failed: different shift arguments in ");
484 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
485 }
b8698a0f 486
ebfd146a
IR
487 return false;
488 }
489 }
490
491 /* Strided store or load. */
492 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
493 {
494 if (REFERENCE_CLASS_P (lhs))
495 {
496 /* Store. */
b8698a0f
L
497 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
498 stmt, &def_stmts0, &def_stmts1,
499 &first_stmt_dt0,
500 &first_stmt_dt1,
501 &first_stmt_def0_type,
ebfd146a
IR
502 &first_stmt_def1_type,
503 &first_stmt_const_oprnd,
504 ncopies_for_cost,
505 &pattern0, &pattern1))
506 return false;
507 }
b5aeb3bb
IR
508 else
509 {
510 /* Load. */
511 /* FORNOW: Check that there is no gap between the loads. */
e14c1050
IR
512 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
513 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
514 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
515 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb
IR
516 {
517 if (vect_print_dump_info (REPORT_SLP))
518 {
519 fprintf (vect_dump, "Build SLP failed: strided "
520 "loads have gaps ");
521 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
522 }
b8698a0f 523
b5aeb3bb
IR
524 return false;
525 }
2f0fa28e 526
b5aeb3bb
IR
527 /* Check that the size of interleaved loads group is not
528 greater than the SLP group size. */
e14c1050 529 if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
b5aeb3bb
IR
530 {
531 if (vect_print_dump_info (REPORT_SLP))
532 {
533 fprintf (vect_dump, "Build SLP failed: the number of "
534 "interleaved loads is greater than"
535 " the SLP group size ");
536 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
537 }
b8698a0f 538
b5aeb3bb
IR
539 return false;
540 }
541
e14c1050 542 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
543 if (prev_first_load)
544 {
545 /* Check that there are no loads from different interleaving
ff802fa1 546 chains in the same node. The only exception is complex
b5aeb3bb
IR
547 numbers. */
548 if (prev_first_load != first_load
549 && rhs_code != REALPART_EXPR
550 && rhs_code != IMAGPART_EXPR)
551 {
552 if (vect_print_dump_info (REPORT_SLP))
553 {
554 fprintf (vect_dump, "Build SLP failed: different "
555 "interleaving chains in one node ");
556 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
557 }
558
559 return false;
560 }
561 }
562 else
563 prev_first_load = first_load;
b8698a0f 564
ebfd146a
IR
565 if (first_load == stmt)
566 {
567 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 568 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
569 == dr_unaligned_unsupported)
570 {
571 if (vect_print_dump_info (REPORT_SLP))
572 {
573 fprintf (vect_dump, "Build SLP failed: unsupported "
574 "unaligned load ");
575 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
576 }
b8698a0f 577
ebfd146a
IR
578 return false;
579 }
b8698a0f 580
ebfd146a
IR
581 /* Analyze costs (for the first stmt in the group). */
582 vect_model_load_cost (vinfo_for_stmt (stmt),
272c6793 583 ncopies_for_cost, false, *node);
ebfd146a 584 }
b8698a0f 585
ff802fa1 586 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
587 case that permutation is needed we later decide if a specific
588 permutation is supported. */
589 load_place = vect_get_place_in_interleaving_chain (stmt,
590 first_load);
591 if (load_place != i)
592 permutation = true;
b8698a0f 593
ebfd146a 594 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 595
ebfd146a
IR
596 /* We stop the tree when we reach a group of loads. */
597 stop_recursion = true;
598 continue;
599 }
600 } /* Strided access. */
601 else
602 {
603 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
604 {
605 /* Not strided load. */
b8698a0f 606 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
607 {
608 fprintf (vect_dump, "Build SLP failed: not strided load ");
609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
610 }
611
612 /* FORNOW: Not strided loads are not supported. */
613 return false;
614 }
615
616 /* Not memory operation. */
617 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
618 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
619 {
b8698a0f 620 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
621 {
622 fprintf (vect_dump, "Build SLP failed: operation");
623 fprintf (vect_dump, " unsupported ");
624 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
625 }
626
627 return false;
628 }
629
b8698a0f 630 /* Find the def-stmts. */
a70d6342 631 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
ebfd146a 632 &def_stmts0, &def_stmts1,
b8698a0f
L
633 &first_stmt_dt0, &first_stmt_dt1,
634 &first_stmt_def0_type,
ebfd146a
IR
635 &first_stmt_def1_type,
636 &first_stmt_const_oprnd,
637 ncopies_for_cost,
638 &pattern0, &pattern1))
639 return false;
640 }
641 }
642
643 /* Add the costs of the node to the overall instance costs. */
b8698a0f 644 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
645 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
646
647 /* Strided loads were reached - stop the recursion. */
648 if (stop_recursion)
649 {
650 if (permutation)
651 {
b8698a0f 652 VEC_safe_push (slp_tree, heap, *loads, *node);
35e1a5e7 653 *inside_cost
720f5239 654 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 655 * group_size;
ebfd146a 656 }
2200fc49
IR
657 else
658 {
659 /* We don't check here complex numbers chains, so we keep them in
660 LOADS for further check in vect_supported_load_permutation_p. */
661 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
662 VEC_safe_push (slp_tree, heap, *loads, *node);
663 }
ebfd146a
IR
664
665 return true;
666 }
667
b8698a0f 668 /* Create SLP_TREE nodes for the definition node/s. */
8644a673 669 if (first_stmt_dt0 == vect_internal_def)
ebfd146a
IR
670 {
671 slp_tree left_node = XNEW (struct _slp_tree);
672 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
673 SLP_TREE_VEC_STMTS (left_node) = NULL;
674 SLP_TREE_LEFT (left_node) = NULL;
675 SLP_TREE_RIGHT (left_node) = NULL;
676 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
677 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
b8698a0f
L
678 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &left_node, group_size,
679 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
680 max_nunits, load_permutation, loads,
681 vectorization_factor))
ebfd146a 682 return false;
b8698a0f 683
ebfd146a
IR
684 SLP_TREE_LEFT (*node) = left_node;
685 }
686
8644a673 687 if (first_stmt_dt1 == vect_internal_def)
ebfd146a
IR
688 {
689 slp_tree right_node = XNEW (struct _slp_tree);
690 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
691 SLP_TREE_VEC_STMTS (right_node) = NULL;
692 SLP_TREE_LEFT (right_node) = NULL;
693 SLP_TREE_RIGHT (right_node) = NULL;
694 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
695 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
a70d6342 696 if (!vect_build_slp_tree (loop_vinfo, bb_vinfo, &right_node, group_size,
ebfd146a 697 inside_cost, outside_cost, ncopies_for_cost,
a70d6342
IR
698 max_nunits, load_permutation, loads,
699 vectorization_factor))
ebfd146a 700 return false;
b8698a0f 701
ebfd146a
IR
702 SLP_TREE_RIGHT (*node) = right_node;
703 }
704
705 return true;
706}
707
708
709static void
710vect_print_slp_tree (slp_tree node)
711{
712 int i;
713 gimple stmt;
714
715 if (!node)
716 return;
717
718 fprintf (vect_dump, "node ");
ac47786e 719 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
720 {
721 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 722 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
723 }
724 fprintf (vect_dump, "\n");
725
726 vect_print_slp_tree (SLP_TREE_LEFT (node));
727 vect_print_slp_tree (SLP_TREE_RIGHT (node));
728}
729
730
b8698a0f
L
731/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
732 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 733 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
734 stmts in NODE are to be marked. */
735
736static void
737vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
738{
739 int i;
740 gimple stmt;
741
742 if (!node)
743 return;
744
ac47786e 745 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
746 if (j < 0 || i == j)
747 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
748
749 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
750 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
751}
752
753
a70d6342
IR
754/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
755
756static void
757vect_mark_slp_stmts_relevant (slp_tree node)
758{
759 int i;
760 gimple stmt;
761 stmt_vec_info stmt_info;
762
763 if (!node)
764 return;
765
ac47786e 766 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
767 {
768 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 769 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
770 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
771 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
772 }
773
774 vect_mark_slp_stmts_relevant (SLP_TREE_LEFT (node));
775 vect_mark_slp_stmts_relevant (SLP_TREE_RIGHT (node));
776}
777
778
b8698a0f 779/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
780 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
781
782static bool
783vect_supported_slp_permutation_p (slp_instance instance)
784{
785 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
786 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
e14c1050 787 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a
IR
788 VEC (slp_tree, heap) *sorted_loads = NULL;
789 int index;
790 slp_tree *tmp_loads = NULL;
b8698a0f 791 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 792 slp_tree load;
b8698a0f
L
793
794 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 795 location in all the loads in the node, when the data-refs in
b8698a0f 796 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
797 Sort the nodes according to the order of accesses in the chain. */
798 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
799 for (i = 0, j = 0;
800 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
801 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
802 i += group_size, j++)
803 {
804 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
805 /* Check that the loads are all in the same interleaving chain. */
e14c1050 806 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
ebfd146a
IR
807 {
808 if (vect_print_dump_info (REPORT_DETAILS))
809 {
810 fprintf (vect_dump, "Build SLP failed: unsupported data "
811 "permutation ");
812 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
813 }
b8698a0f 814
ebfd146a 815 free (tmp_loads);
b8698a0f 816 return false;
ebfd146a
IR
817 }
818
819 tmp_loads[index] = load;
820 }
b8698a0f 821
ebfd146a
IR
822 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
823 for (i = 0; i < group_size; i++)
824 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
825
826 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
827 SLP_INSTANCE_LOADS (instance) = sorted_loads;
828 free (tmp_loads);
829
830 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
831 SLP_INSTANCE_UNROLLING_FACTOR (instance),
832 instance, true))
833 return false;
834
835 return true;
836}
837
838
b5aeb3bb
IR
839/* Rearrange the statements of NODE according to PERMUTATION. */
840
841static void
842vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
843 VEC (int, heap) *permutation)
844{
845 gimple stmt;
846 VEC (gimple, heap) *tmp_stmts;
847 unsigned int index, i;
848
849 if (!node)
850 return;
851
852 vect_slp_rearrange_stmts (SLP_TREE_LEFT (node), group_size, permutation);
853 vect_slp_rearrange_stmts (SLP_TREE_RIGHT (node), group_size, permutation);
854
855 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
856 tmp_stmts = VEC_alloc (gimple, heap, group_size);
857
858 for (i = 0; i < group_size; i++)
859 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
860
ac47786e 861 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb
IR
862 {
863 index = VEC_index (int, permutation, i);
864 VEC_replace (gimple, tmp_stmts, index, stmt);
865 }
866
867 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
868 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
869}
870
871
ebfd146a
IR
872/* Check if the required load permutation is supported.
873 LOAD_PERMUTATION contains a list of indices of the loads.
874 In SLP this permutation is relative to the order of strided stores that are
875 the base of the SLP instance. */
876
877static bool
878vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
879 VEC (int, heap) *load_permutation)
880{
b5aeb3bb
IR
881 int i = 0, j, prev = -1, next, k, number_of_groups;
882 bool supported, bad_permutation = false;
7417f6c0 883 sbitmap load_index;
2200fc49
IR
884 slp_tree node, other_complex_node;
885 gimple stmt, first = NULL, other_node_first;
886 unsigned complex_numbers = 0;
ebfd146a 887
a70d6342 888 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
889 if (!slp_instn)
890 return false;
891
892 if (vect_print_dump_info (REPORT_SLP))
893 {
894 fprintf (vect_dump, "Load permutation ");
ac47786e 895 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
ebfd146a
IR
896 fprintf (vect_dump, "%d ", next);
897 }
898
b5aeb3bb
IR
899 /* In case of reduction every load permutation is allowed, since the order
900 of the reduction statements is not important (as opposed to the case of
ff802fa1 901 strided stores). The only condition we need to check is that all the
b5aeb3bb
IR
902 load nodes are of the same size and have the same permutation (and then
903 rearrange all the nodes of the SLP instance according to this
904 permutation). */
905
906 /* Check that all the load nodes are of the same size. */
ac47786e 907 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49
IR
908 {
909 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
910 != (unsigned) group_size)
911 return false;
912
913 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
914 if (is_gimple_assign (stmt)
915 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
916 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
917 complex_numbers++;
918 }
919
920 /* Complex operands can be swapped as following:
921 real_c = real_b + real_a;
922 imag_c = imag_a + imag_b;
923 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 924 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
925 chains are mixed, they match the above pattern. */
926 if (complex_numbers)
927 {
ac47786e 928 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 929 {
ac47786e 930 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
931 {
932 if (j == 0)
933 first = stmt;
934 else
935 {
e14c1050 936 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
2200fc49
IR
937 {
938 if (complex_numbers != 2)
939 return false;
940
941 if (i == 0)
942 k = 1;
943 else
944 k = 0;
945
946 other_complex_node = VEC_index (slp_tree,
947 SLP_INSTANCE_LOADS (slp_instn), k);
948 other_node_first = VEC_index (gimple,
949 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
950
e14c1050 951 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2200fc49
IR
952 != other_node_first)
953 return false;
954 }
955 }
956 }
957 }
958 }
959
960 /* We checked that this case ok, so there is no need to proceed with
961 permutation tests. */
962 if (complex_numbers == 2)
963 {
964 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
965 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
966 return true;
967 }
968
b5aeb3bb
IR
969 node = SLP_INSTANCE_TREE (slp_instn);
970 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
971 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
972 instance, not all the loads belong to the same node or interleaving
ff802fa1 973 group. Hence, we need to divide them into groups according to
b5aeb3bb
IR
974 GROUP_SIZE. */
975 number_of_groups = VEC_length (int, load_permutation) / group_size;
976
977 /* Reduction (there are no data-refs in the root). */
978 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
979 {
980 int first_group_load_index;
981
982 /* Compare all the permutation sequences to the first one. */
983 for (i = 1; i < number_of_groups; i++)
984 {
985 k = 0;
986 for (j = i * group_size; j < i * group_size + group_size; j++)
987 {
988 next = VEC_index (int, load_permutation, j);
989 first_group_load_index = VEC_index (int, load_permutation, k);
990
991 if (next != first_group_load_index)
992 {
993 bad_permutation = true;
994 break;
995 }
996
997 k++;
998 }
999
1000 if (bad_permutation)
1001 break;
1002 }
1003
1004 if (!bad_permutation)
1005 {
c9c1e775
IR
1006 /* Check that the loads in the first sequence are different and there
1007 are no gaps between them. */
1008 load_index = sbitmap_alloc (group_size);
1009 sbitmap_zero (load_index);
1010 for (k = 0; k < group_size; k++)
1011 {
1012 first_group_load_index = VEC_index (int, load_permutation, k);
1013 if (TEST_BIT (load_index, first_group_load_index))
1014 {
1015 bad_permutation = true;
1016 break;
1017 }
1018
1019 SET_BIT (load_index, first_group_load_index);
1020 }
1021
1022 if (!bad_permutation)
1023 for (k = 0; k < group_size; k++)
1024 if (!TEST_BIT (load_index, k))
1025 {
1026 bad_permutation = true;
1027 break;
1028 }
1029
1030 sbitmap_free (load_index);
1031 }
1032
1033 if (!bad_permutation)
1034 {
1035 /* This permutation is valid for reduction. Since the order of the
b5aeb3bb
IR
1036 statements in the nodes is not important unless they are memory
1037 accesses, we can rearrange the statements in all the nodes
1038 according to the order of the loads. */
1039 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1040 load_permutation);
1041 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1042 return true;
1043 }
1044 }
1045
b8698a0f
L
1046 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1047 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1048 well (unless it's reduction). */
ebfd146a
IR
1049 if (VEC_length (int, load_permutation)
1050 != (unsigned int) (group_size * group_size))
1051 return false;
1052
1053 supported = true;
7417f6c0
IR
1054 load_index = sbitmap_alloc (group_size);
1055 sbitmap_zero (load_index);
ebfd146a
IR
1056 for (j = 0; j < group_size; j++)
1057 {
1058 for (i = j * group_size, k = 0;
1059 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1060 i++, k++)
1061 {
1062 if (i != j * group_size && next != prev)
1063 {
1064 supported = false;
1065 break;
1066 }
1067
1068 prev = next;
b8698a0f 1069 }
7417f6c0
IR
1070
1071 if (TEST_BIT (load_index, prev))
1072 {
1073 supported = false;
1074 break;
1075 }
1076
1077 SET_BIT (load_index, prev);
ebfd146a 1078 }
59eefaa6
IR
1079
1080 for (j = 0; j < group_size; j++)
1081 if (!TEST_BIT (load_index, j))
1082 return false;
1083
7417f6c0 1084 sbitmap_free (load_index);
ebfd146a
IR
1085
1086 if (supported && i == group_size * group_size
1087 && vect_supported_slp_permutation_p (slp_instn))
1088 return true;
1089
b8698a0f 1090 return false;
ebfd146a
IR
1091}
1092
1093
b8698a0f 1094/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1095 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1096 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1097 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1098 they must be inserted before the first load of the SLP instance and not
1099 before the first load of the first node of the instance. */
ff802fa1 1100
b8698a0f
L
1101static gimple
1102vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1103{
1104 int i, j;
1105 slp_tree load_node;
1106 gimple first_load = NULL, load;
1107
ac47786e
NF
1108 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1109 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1110 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1111
ebfd146a
IR
1112 return first_load;
1113}
1114
1115
e4a707c4 1116/* Find the last store in SLP INSTANCE. */
ff802fa1 1117
e4a707c4
IR
1118static gimple
1119vect_find_last_store_in_slp_instance (slp_instance instance)
1120{
1121 int i;
1122 slp_tree node;
1123 gimple last_store = NULL, store;
1124
1125 node = SLP_INSTANCE_TREE (instance);
1126 for (i = 0;
1127 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1128 i++)
1129 last_store = get_later_stmt (store, last_store);
1130
1131 return last_store;
1132}
1133
1134
ff802fa1 1135/* Analyze an SLP instance starting from a group of strided stores. Call
b8698a0f 1136 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1137 Return FALSE if it's impossible to SLP any stmt in the loop. */
1138
1139static bool
a70d6342
IR
1140vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1141 gimple stmt)
ebfd146a
IR
1142{
1143 slp_instance new_instance;
1144 slp_tree node = XNEW (struct _slp_tree);
e14c1050 1145 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1146 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1147 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1148 gimple next;
0f900dfa 1149 unsigned int vectorization_factor = 0;
b5aeb3bb 1150 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1151 unsigned int max_nunits = 0;
1152 VEC (int, heap) *load_permutation;
1153 VEC (slp_tree, heap) *loads;
b5aeb3bb
IR
1154 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1155
1156 if (dr)
1157 {
1158 scalar_type = TREE_TYPE (DR_REF (dr));
1159 vectype = get_vectype_for_scalar_type (scalar_type);
e14c1050 1160 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1161 }
1162 else
1163 {
1164 gcc_assert (loop_vinfo);
1165 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1166 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1167 }
b8698a0f 1168
ebfd146a
IR
1169 if (!vectype)
1170 {
1171 if (vect_print_dump_info (REPORT_SLP))
1172 {
1173 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1174 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1175 }
b5aeb3bb 1176
ebfd146a
IR
1177 return false;
1178 }
1179
1180 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1181 if (loop_vinfo)
1182 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1183 else
1184 /* No multitypes in BB SLP. */
1185 vectorization_factor = nunits;
1186
a70d6342
IR
1187 /* Calculate the unrolling factor. */
1188 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1189 if (unrolling_factor != 1 && !loop_vinfo)
1190 {
1191 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1192 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1193 " block SLP");
b8698a0f 1194
a70d6342
IR
1195 return false;
1196 }
1197
b8698a0f 1198 /* Create a node (a root of the SLP tree) for the packed strided stores. */
ebfd146a
IR
1199 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
1200 next = stmt;
b5aeb3bb 1201 if (dr)
ebfd146a 1202 {
b5aeb3bb
IR
1203 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1204 while (next)
1205 {
1206 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
e14c1050 1207 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1208 }
1209 }
1210 else
1211 {
1212 /* Collect reduction statements. */
1213 for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i,
1214 next);
1215 i++)
1216 {
1217 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
1218 if (vect_print_dump_info (REPORT_DETAILS))
1219 {
1220 fprintf (vect_dump, "pushing reduction into node: ");
1221 print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
1222 }
1223 }
ebfd146a
IR
1224 }
1225
1226 SLP_TREE_VEC_STMTS (node) = NULL;
1227 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
1228 SLP_TREE_LEFT (node) = NULL;
1229 SLP_TREE_RIGHT (node) = NULL;
1230 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
1231 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
1232
ebfd146a
IR
1233 /* Calculate the number of vector stmts to create based on the unrolling
1234 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1235 GROUP_SIZE / NUNITS otherwise. */
1236 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1237
1238 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1239 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1240
1241 /* Build the tree for the SLP instance. */
b8698a0f
L
1242 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1243 &inside_cost, &outside_cost, ncopies_for_cost,
1244 &max_nunits, &load_permutation, &loads,
a70d6342 1245 vectorization_factor))
ebfd146a 1246 {
b8698a0f 1247 /* Create a new SLP instance. */
ebfd146a
IR
1248 new_instance = XNEW (struct _slp_instance);
1249 SLP_INSTANCE_TREE (new_instance) = node;
1250 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
1251 /* Calculate the unrolling factor based on the smallest type in the
1252 loop. */
1253 if (max_nunits > nunits)
1254 unrolling_factor = least_common_multiple (max_nunits, group_size)
1255 / group_size;
b8698a0f 1256
ebfd146a
IR
1257 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1258 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1259 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1260 SLP_INSTANCE_LOADS (new_instance) = loads;
1261 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1262 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
1263 if (VEC_length (slp_tree, loads))
1264 {
1265 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1266 load_permutation))
ebfd146a
IR
1267 {
1268 if (vect_print_dump_info (REPORT_SLP))
1269 {
1270 fprintf (vect_dump, "Build SLP failed: unsupported load "
1271 "permutation ");
1272 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1273 }
1274
1275 vect_free_slp_instance (new_instance);
1276 return false;
1277 }
1278
1279 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1280 = vect_find_first_load_in_slp_instance (new_instance);
1281 }
1282 else
1283 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1284
a70d6342 1285 if (loop_vinfo)
b8698a0f
L
1286 VEC_safe_push (slp_instance, heap,
1287 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1288 new_instance);
1289 else
1290 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1291 new_instance);
b8698a0f 1292
ebfd146a
IR
1293 if (vect_print_dump_info (REPORT_SLP))
1294 vect_print_slp_tree (node);
1295
1296 return true;
1297 }
1298
1299 /* Failed to SLP. */
1300 /* Free the allocated memory. */
1301 vect_free_slp_tree (node);
1302 VEC_free (int, heap, load_permutation);
1303 VEC_free (slp_tree, heap, loads);
b8698a0f 1304
a70d6342 1305 return false;
ebfd146a
IR
1306}
1307
1308
ff802fa1 1309/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1310 trees of packed scalar stmts if SLP is possible. */
1311
1312bool
a70d6342 1313vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1314{
1315 unsigned int i;
b5aeb3bb 1316 VEC (gimple, heap) *strided_stores, *reductions = NULL;
ebfd146a 1317 gimple store;
a70d6342 1318 bool ok = false;
ebfd146a
IR
1319
1320 if (vect_print_dump_info (REPORT_SLP))
1321 fprintf (vect_dump, "=== vect_analyze_slp ===");
1322
a70d6342 1323 if (loop_vinfo)
b5aeb3bb
IR
1324 {
1325 strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
1326 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1327 }
a70d6342
IR
1328 else
1329 strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
b8698a0f 1330
b5aeb3bb 1331 /* Find SLP sequences starting from groups of strided stores. */
ac47786e 1332 FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
a70d6342
IR
1333 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
1334 ok = true;
ebfd146a 1335
b8698a0f 1336 if (bb_vinfo && !ok)
a70d6342
IR
1337 {
1338 if (vect_print_dump_info (REPORT_SLP))
1339 fprintf (vect_dump, "Failed to SLP the basic block.");
1340
1341 return false;
1342 }
ebfd146a 1343
b5aeb3bb 1344 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1345 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1346 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1347 VEC_index (gimple, reductions, 0)))
1348 ok = true;
1349
ebfd146a
IR
1350 return true;
1351}
1352
1353
1354/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1355 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1356 least one instance. */
ebfd146a 1357
437f4a00 1358bool
ebfd146a
IR
1359vect_make_slp_decision (loop_vec_info loop_vinfo)
1360{
1361 unsigned int i, unrolling_factor = 1;
1362 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1363 slp_instance instance;
1364 int decided_to_slp = 0;
1365
1366 if (vect_print_dump_info (REPORT_SLP))
1367 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1368
ac47786e 1369 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1370 {
1371 /* FORNOW: SLP if you can. */
1372 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1373 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1374
ff802fa1 1375 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1376 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1377 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1378 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1379 decided_to_slp++;
1380 }
1381
1382 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1383
b8698a0f
L
1384 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1385 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a 1386 decided_to_slp, unrolling_factor);
437f4a00
IR
1387
1388 return (decided_to_slp > 0);
ebfd146a
IR
1389}
1390
1391
1392/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1393 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1394
1395static void
1396vect_detect_hybrid_slp_stmts (slp_tree node)
1397{
1398 int i;
1399 gimple stmt;
1400 imm_use_iterator imm_iter;
1401 gimple use_stmt;
99f51320 1402 stmt_vec_info stmt_vinfo;
ebfd146a
IR
1403
1404 if (!node)
1405 return;
1406
ac47786e 1407 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1408 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1409 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1410 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
99f51320
IR
1411 if ((stmt_vinfo = vinfo_for_stmt (use_stmt))
1412 && !STMT_SLP_TYPE (stmt_vinfo)
1413 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb
IR
1414 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
1415 && !(gimple_code (use_stmt) == GIMPLE_PHI
1416 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (use_stmt))
1417 == vect_reduction_def))
ebfd146a
IR
1418 vect_mark_slp_stmts (node, hybrid, i);
1419
1420 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
1421 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
1422}
1423
1424
1425/* Find stmts that must be both vectorized and SLPed. */
1426
1427void
1428vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1429{
1430 unsigned int i;
1431 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1432 slp_instance instance;
1433
1434 if (vect_print_dump_info (REPORT_SLP))
1435 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1436
ac47786e 1437 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1438 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1439}
1440
a70d6342
IR
1441
1442/* Create and initialize a new bb_vec_info struct for BB, as well as
1443 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1444
a70d6342
IR
1445static bb_vec_info
1446new_bb_vec_info (basic_block bb)
1447{
1448 bb_vec_info res = NULL;
1449 gimple_stmt_iterator gsi;
1450
1451 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1452 BB_VINFO_BB (res) = bb;
1453
1454 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1455 {
1456 gimple stmt = gsi_stmt (gsi);
1457 gimple_set_uid (stmt, 0);
1458 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1459 }
1460
1461 BB_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
1462 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1463
1464 bb->aux = res;
1465 return res;
1466}
1467
1468
1469/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1470 stmts in the basic block. */
1471
1472static void
1473destroy_bb_vec_info (bb_vec_info bb_vinfo)
1474{
1475 basic_block bb;
1476 gimple_stmt_iterator si;
1477
1478 if (!bb_vinfo)
1479 return;
1480
1481 bb = BB_VINFO_BB (bb_vinfo);
1482
1483 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1484 {
1485 gimple stmt = gsi_stmt (si);
1486 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1487
1488 if (stmt_info)
1489 /* Free stmt_vec_info. */
1490 free_stmt_vec_info (stmt);
1491 }
1492
01be8516
SP
1493 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1494 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
a70d6342
IR
1495 VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
1496 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1497 free (bb_vinfo);
1498 bb->aux = NULL;
1499}
1500
1501
1502/* Analyze statements contained in SLP tree node after recursively analyzing
1503 the subtree. Return TRUE if the operations are supported. */
1504
1505static bool
1506vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1507{
1508 bool dummy;
1509 int i;
1510 gimple stmt;
1511
1512 if (!node)
1513 return true;
1514
1515 if (!vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_LEFT (node))
1516 || !vect_slp_analyze_node_operations (bb_vinfo, SLP_TREE_RIGHT (node)))
1517 return false;
1518
ac47786e 1519 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1520 {
1521 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1522 gcc_assert (stmt_info);
1523 gcc_assert (PURE_SLP_STMT (stmt_info));
1524
1525 if (!vect_analyze_stmt (stmt, &dummy, node))
1526 return false;
1527 }
1528
1529 return true;
1530}
1531
1532
ff802fa1 1533/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1534 operations are supported. */
1535
1536static bool
1537vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1538{
1539 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1540 slp_instance instance;
1541 int i;
1542
1543 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1544 {
b8698a0f 1545 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1546 SLP_INSTANCE_TREE (instance)))
1547 {
1548 vect_free_slp_instance (instance);
1549 VEC_ordered_remove (slp_instance, slp_instances, i);
1550 }
1551 else
1552 i++;
b8698a0f
L
1553 }
1554
a70d6342
IR
1555 if (!VEC_length (slp_instance, slp_instances))
1556 return false;
1557
1558 return true;
1559}
1560
e4a707c4
IR
1561/* Check if loads and stores are mixed in the basic block (in that
1562 case if we are not sure that the accesses differ, we can't vectorize the
ff802fa1 1563 basic block). Also return FALSE in case that there is statement marked as
e4a707c4
IR
1564 not vectorizable. */
1565
1566static bool
1567vect_bb_vectorizable_with_dependencies (bb_vec_info bb_vinfo)
1568{
1569 basic_block bb = BB_VINFO_BB (bb_vinfo);
1570 gimple_stmt_iterator si;
1571 bool detected_store = false;
1572 gimple stmt;
1573 struct data_reference *dr;
1574
1575 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1576 {
1577 stmt = gsi_stmt (si);
1578
1579 /* We can't allow not analyzed statements, since they may contain data
1580 accesses. */
1581 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
1582 return false;
1583
1584 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
1585 continue;
1586
1587 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
1588 if (DR_IS_READ (dr) && detected_store)
1589 return false;
1590
1591 if (!DR_IS_READ (dr))
1592 detected_store = true;
1593 }
1594
1595 return true;
1596}
a70d6342 1597
69f11a13
IR
1598/* Check if vectorization of the basic block is profitable. */
1599
1600static bool
1601vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1602{
1603 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1604 slp_instance instance;
1605 int i;
1606 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1607 unsigned int stmt_cost;
1608 gimple stmt;
1609 gimple_stmt_iterator si;
1610 basic_block bb = BB_VINFO_BB (bb_vinfo);
1611 stmt_vec_info stmt_info = NULL;
1612 tree dummy_type = NULL;
1613 int dummy = 0;
1614
1615 /* Calculate vector costs. */
ac47786e 1616 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
69f11a13
IR
1617 {
1618 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1619 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1620 }
1621
1622 /* Calculate scalar cost. */
1623 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1624 {
1625 stmt = gsi_stmt (si);
1626 stmt_info = vinfo_for_stmt (stmt);
1627
1628 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1629 || !PURE_SLP_STMT (stmt_info))
1630 continue;
1631
1632 if (STMT_VINFO_DATA_REF (stmt_info))
1633 {
1634 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1635 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1636 (scalar_load, dummy_type, dummy);
1637 else
1638 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1639 (scalar_store, dummy_type, dummy);
1640 }
1641 else
1642 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1643 (scalar_stmt, dummy_type, dummy);
1644
1645 scalar_cost += stmt_cost;
1646 }
1647
1648 if (vect_print_dump_info (REPORT_COST))
1649 {
1650 fprintf (vect_dump, "Cost model analysis: \n");
1651 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1652 vec_inside_cost);
1653 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1654 vec_outside_cost);
1655 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1656 }
1657
1658 /* Vectorization is profitable if its cost is less than the cost of scalar
1659 version. */
1660 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1661 return false;
1662
1663 return true;
1664}
1665
1666/* Check if the basic block can be vectorized. */
a70d6342
IR
1667
1668bb_vec_info
1669vect_slp_analyze_bb (basic_block bb)
1670{
1671 bb_vec_info bb_vinfo;
1672 VEC (ddr_p, heap) *ddrs;
1673 VEC (slp_instance, heap) *slp_instances;
1674 slp_instance instance;
1675 int i, insns = 0;
1676 gimple_stmt_iterator gsi;
777e1f09
RG
1677 int min_vf = 2;
1678 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4
IR
1679 bool data_dependence_in_bb = false;
1680
bb67d9c7 1681 current_vector_size = 0;
a70d6342
IR
1682
1683 if (vect_print_dump_info (REPORT_DETAILS))
1684 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
1685
1686 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
c7da0354
RG
1687 {
1688 gimple stmt = gsi_stmt (gsi);
1689 if (!is_gimple_debug (stmt)
1690 && !gimple_nop_p (stmt)
69e3b47f 1691 && gimple_code (stmt) != GIMPLE_LABEL)
c7da0354
RG
1692 insns++;
1693 }
a70d6342
IR
1694
1695 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
1696 {
1697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1698 fprintf (vect_dump, "not vectorized: too many instructions in basic "
1699 "block.\n");
1700
1701 return NULL;
1702 }
1703
1704 bb_vinfo = new_bb_vec_info (bb);
1705 if (!bb_vinfo)
1706 return NULL;
1707
777e1f09 1708 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
1709 {
1710 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1711 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
1712 "block.\n");
b8698a0f 1713
a70d6342
IR
1714 destroy_bb_vec_info (bb_vinfo);
1715 return NULL;
1716 }
1717
1718 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 1719 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
1720 {
1721 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1722 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
1723 "block.\n");
1724
1725 destroy_bb_vec_info (bb_vinfo);
1726 return NULL;
1727 }
1728
e4a707c4
IR
1729 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf,
1730 &data_dependence_in_bb)
1731 || min_vf > max_vf
1732 || (data_dependence_in_bb
1733 && !vect_bb_vectorizable_with_dependencies (bb_vinfo)))
777e1f09
RG
1734 {
1735 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1736 fprintf (vect_dump, "not vectorized: unhandled data dependence "
1737 "in basic block.\n");
1738
1739 destroy_bb_vec_info (bb_vinfo);
1740 return NULL;
1741 }
1742
a70d6342
IR
1743 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
1744 {
1745 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1746 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
1747 "block.\n");
b8698a0f 1748
a70d6342
IR
1749 destroy_bb_vec_info (bb_vinfo);
1750 return NULL;
1751 }
b8698a0f 1752
a70d6342
IR
1753 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
1754 {
1755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1756 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
1757 "block.\n");
b8698a0f 1758
a70d6342
IR
1759 destroy_bb_vec_info (bb_vinfo);
1760 return NULL;
1761 }
1762
1763 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
1764 {
1765 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1766 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
1767 "block.\n");
1768
1769 destroy_bb_vec_info (bb_vinfo);
1770 return NULL;
1771 }
1772
1773 /* Check the SLP opportunities in the basic block, analyze and build SLP
1774 trees. */
1775 if (!vect_analyze_slp (NULL, bb_vinfo))
1776 {
1777 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1778 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
1779 "in basic block.\n");
1780
1781 destroy_bb_vec_info (bb_vinfo);
1782 return NULL;
1783 }
b8698a0f 1784
a70d6342
IR
1785 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1786
1787 /* Mark all the statements that we want to vectorize as pure SLP and
1788 relevant. */
ac47786e 1789 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
a70d6342
IR
1790 {
1791 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1792 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 1793 }
a70d6342
IR
1794
1795 if (!vect_slp_analyze_operations (bb_vinfo))
1796 {
1797 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1798 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
1799
1800 destroy_bb_vec_info (bb_vinfo);
1801 return NULL;
1802 }
1803
69f11a13
IR
1804 /* Cost model: check if the vectorization is worthwhile. */
1805 if (flag_vect_cost_model
1806 && !vect_bb_vectorization_profitable_p (bb_vinfo))
1807 {
1808 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1809 fprintf (vect_dump, "not vectorized: vectorization is not "
1810 "profitable.\n");
1811
1812 destroy_bb_vec_info (bb_vinfo);
1813 return NULL;
1814 }
1815
a70d6342 1816 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 1817 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
1818
1819 return bb_vinfo;
1820}
1821
1822
b8698a0f 1823/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
1824 the number of created vector stmts depends on the unrolling factor).
1825 However, the actual number of vector stmts for every SLP node depends on
1826 VF which is set later in vect_analyze_operations (). Hence, SLP costs
1827 should be updated. In this function we assume that the inside costs
1828 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
1829
1830void
1831vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
1832{
1833 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1834 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1835 slp_instance instance;
1836
1837 if (vect_print_dump_info (REPORT_SLP))
1838 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
1839
ac47786e 1840 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a 1841 /* We assume that costs are linear in ncopies. */
b8698a0f
L
1842 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
1843 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
1844}
1845
a70d6342 1846
b8698a0f
L
1847/* For constant and loop invariant defs of SLP_NODE this function returns
1848 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
1849 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
1850 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
1851 REDUC_INDEX is the index of the reduction operand in the statements, unless
1852 it is -1. */
ebfd146a
IR
1853
1854static void
9dc3f7de
IR
1855vect_get_constant_vectors (tree op, slp_tree slp_node,
1856 VEC (tree, heap) **vec_oprnds,
b5aeb3bb
IR
1857 unsigned int op_num, unsigned int number_of_vectors,
1858 int reduc_index)
ebfd146a
IR
1859{
1860 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1861 gimple stmt = VEC_index (gimple, stmts, 0);
1862 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
ebfd146a
IR
1863 int nunits;
1864 tree vec_cst;
1865 tree t = NULL_TREE;
1866 int j, number_of_places_left_in_vector;
1867 tree vector_type;
9dc3f7de 1868 tree vop;
ebfd146a
IR
1869 int group_size = VEC_length (gimple, stmts);
1870 unsigned int vec_num, i;
1871 int number_of_copies = 1;
1872 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1873 bool constant_p, is_store;
b5aeb3bb 1874 tree neutral_op = NULL;
d59dc888 1875 enum tree_code code = gimple_assign_rhs_code (stmt);
b5aeb3bb
IR
1876
1877 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1878 {
b5aeb3bb
IR
1879 if (reduc_index == -1)
1880 {
1881 VEC_free (tree, heap, *vec_oprnds);
1882 return;
1883 }
1884
1885 op_num = reduc_index - 1;
9dc3f7de 1886 op = gimple_op (stmt, reduc_index);
b5aeb3bb 1887 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 1888 we need either neutral operands or the original operands. See
b5aeb3bb
IR
1889 get_initial_def_for_reduction() for details. */
1890 switch (code)
1891 {
1892 case WIDEN_SUM_EXPR:
1893 case DOT_PROD_EXPR:
1894 case PLUS_EXPR:
1895 case MINUS_EXPR:
1896 case BIT_IOR_EXPR:
1897 case BIT_XOR_EXPR:
1898 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1899 neutral_op = build_real (TREE_TYPE (op), dconst0);
1900 else
1901 neutral_op = build_int_cst (TREE_TYPE (op), 0);
1902
1903 break;
1904
1905 case MULT_EXPR:
b5aeb3bb
IR
1906 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
1907 neutral_op = build_real (TREE_TYPE (op), dconst1);
1908 else
1909 neutral_op = build_int_cst (TREE_TYPE (op), 1);
1910
1911 break;
1912
c1e822d5
IR
1913 case BIT_AND_EXPR:
1914 neutral_op = build_int_cst (TREE_TYPE (op), -1);
1915 break;
1916
b5aeb3bb
IR
1917 default:
1918 neutral_op = NULL;
1919 }
1920 }
ebfd146a
IR
1921
1922 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1923 {
1924 is_store = true;
1925 op = gimple_assign_rhs1 (stmt);
1926 }
1927 else
9dc3f7de
IR
1928 is_store = false;
1929
1930 gcc_assert (op);
ebfd146a
IR
1931
1932 if (CONSTANT_CLASS_P (op))
d59dc888 1933 constant_p = true;
ebfd146a 1934 else
d59dc888
IR
1935 constant_p = false;
1936
9dc3f7de 1937 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 1938 gcc_assert (vector_type);
ebfd146a
IR
1939 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
1940
1941 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 1942 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
1943
1944 For example, we have two scalar operands, s1 and s2 (e.g., group of
1945 strided accesses of size two), while NUNITS is four (i.e., four scalars
1946 of this type can be packed in a vector). The output vector will contain
1947 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1948 will be 2).
1949
b8698a0f 1950 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
1951 containing the operands.
1952
1953 For example, NUNITS is four as before, and the group size is 8
1954 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1955 {s5, s6, s7, s8}. */
b8698a0f 1956
ebfd146a
IR
1957 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1958
1959 number_of_places_left_in_vector = nunits;
1960 for (j = 0; j < number_of_copies; j++)
1961 {
1962 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1963 {
1964 if (is_store)
1965 op = gimple_assign_rhs1 (stmt);
1966 else
1967 op = gimple_op (stmt, op_num + 1);
b8698a0f 1968
b5aeb3bb
IR
1969 if (reduc_index != -1)
1970 {
1971 struct loop *loop = (gimple_bb (stmt))->loop_father;
1972 gimple def_stmt = SSA_NAME_DEF_STMT (op);
1973
1974 gcc_assert (loop);
1975 /* Get the def before the loop. */
1976 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
1977 loop_preheader_edge (loop));
1978 if (j != (number_of_copies - 1) && neutral_op)
1979 op = neutral_op;
1980 }
1981
ebfd146a
IR
1982 /* Create 'vect_ = {op0,op1,...,opn}'. */
1983 t = tree_cons (NULL_TREE, op, t);
1984
1985 number_of_places_left_in_vector--;
1986
1987 if (number_of_places_left_in_vector == 0)
1988 {
1989 number_of_places_left_in_vector = nunits;
1990
1991 if (constant_p)
1992 vec_cst = build_vector (vector_type, t);
1993 else
1994 vec_cst = build_constructor_from_list (vector_type, t);
1995 VEC_quick_push (tree, voprnds,
1996 vect_init_vector (stmt, vec_cst, vector_type, NULL));
1997 t = NULL_TREE;
1998 }
1999 }
2000 }
2001
b8698a0f 2002 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
2003 them. */
2004 vec_num = VEC_length (tree, voprnds);
2005 for (j = vec_num - 1; j >= 0; j--)
2006 {
2007 vop = VEC_index (tree, voprnds, j);
2008 VEC_quick_push (tree, *vec_oprnds, vop);
2009 }
2010
2011 VEC_free (tree, heap, voprnds);
2012
2013 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2014 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2015 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
2016 to replicate the vectors. */
2017 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2018 {
b5aeb3bb
IR
2019 tree neutral_vec = NULL;
2020
2021 if (neutral_op)
2022 {
2023 if (!neutral_vec)
b9acc9f1 2024 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb
IR
2025
2026 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2027 }
2028 else
2029 {
2030 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2031 VEC_quick_push (tree, *vec_oprnds, vop);
2032 }
ebfd146a
IR
2033 }
2034}
2035
2036
2037/* Get vectorized definitions from SLP_NODE that contains corresponding
2038 vectorized def-stmts. */
2039
2040static void
2041vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2042{
2043 tree vec_oprnd;
2044 gimple vec_def_stmt;
2045 unsigned int i;
2046
2047 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2048
ac47786e 2049 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2050 {
2051 gcc_assert (vec_def_stmt);
2052 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2053 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2054 }
2055}
2056
2057
b8698a0f
L
2058/* Get vectorized definitions for SLP_NODE.
2059 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2060 call vect_get_constant_vectors() to create vector stmts.
2061 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2062 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
b8698a0f 2063 vect_get_slp_vect_defs() to retrieve them.
ebfd146a 2064 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
b8698a0f
L
2065 the right node. This is used when the second operand must remain scalar. */
2066
ebfd146a 2067void
9dc3f7de
IR
2068vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
2069 VEC (tree,heap) **vec_oprnds0,
b5aeb3bb 2070 VEC (tree,heap) **vec_oprnds1, int reduc_index)
ebfd146a
IR
2071{
2072 gimple first_stmt;
2073 enum tree_code code;
2074 int number_of_vects;
b8698a0f 2075 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
ebfd146a
IR
2076
2077 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
2078 /* The number of vector defs is determined by the number of vector statements
2079 in the node from which we get those statements. */
b8698a0f 2080 if (SLP_TREE_LEFT (slp_node))
ebfd146a
IR
2081 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
2082 else
2083 {
2084 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2085 /* Number of vector stmts was calculated according to LHS in
2086 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
ff802fa1 2087 necessary. See vect_get_smallest_scalar_type () for details. */
ebfd146a
IR
2088 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2089 &rhs_size_unit);
2090 if (rhs_size_unit != lhs_size_unit)
2091 {
2092 number_of_vects *= rhs_size_unit;
2093 number_of_vects /= lhs_size_unit;
2094 }
2095 }
2096
2097 /* Allocate memory for vectorized defs. */
2098 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
2099
2100 /* SLP_NODE corresponds either to a group of stores or to a group of
ff802fa1 2101 unary/binary operations. We don't call this function for loads.
b5aeb3bb
IR
2102 For reduction defs we call vect_get_constant_vectors(), since we are
2103 looking for initial loop invariant values. */
2104 if (SLP_TREE_LEFT (slp_node) && reduc_index == -1)
ebfd146a
IR
2105 /* The defs are already vectorized. */
2106 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
2107 else
2108 /* Build vectors from scalar defs. */
9dc3f7de 2109 vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
b5aeb3bb 2110 reduc_index);
ebfd146a
IR
2111
2112 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
2113 /* Since we don't call this function with loads, this is a group of
2114 stores. */
2115 return;
2116
b5aeb3bb
IR
2117 /* For reductions, we only need initial values. */
2118 if (reduc_index != -1)
2119 return;
2120
ebfd146a
IR
2121 code = gimple_assign_rhs_code (first_stmt);
2122 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
2123 return;
2124
2125 /* The number of vector defs is determined by the number of vector statements
2126 in the node from which we get those statements. */
2127 if (SLP_TREE_RIGHT (slp_node))
2128 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
2129 else
2130 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2131
2132 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
2133
2134 if (SLP_TREE_RIGHT (slp_node))
2135 /* The defs are already vectorized. */
2136 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
2137 else
2138 /* Build vectors from scalar defs. */
9dc3f7de
IR
2139 vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
2140 -1);
ebfd146a
IR
2141}
2142
a70d6342 2143
b8698a0f 2144/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2145 building a vector of type MASK_TYPE from it) and two input vectors placed in
2146 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2147 shifting by STRIDE elements of DR_CHAIN for every copy.
2148 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2149 copies).
ebfd146a
IR
2150 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2151 the created stmts must be inserted. */
2152
2153static inline void
b8698a0f 2154vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2155 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f
L
2156 gimple_stmt_iterator *gsi, slp_tree node,
2157 tree builtin_decl, tree vectype,
ebfd146a
IR
2158 VEC(tree,heap) *dr_chain,
2159 int ncopies, int vect_stmts_counter)
2160{
faf63e39 2161 tree perm_dest;
ebfd146a
IR
2162 gimple perm_stmt = NULL;
2163 stmt_vec_info next_stmt_info;
0f900dfa 2164 int i, stride;
ebfd146a 2165 tree first_vec, second_vec, data_ref;
ebfd146a 2166
ebfd146a 2167 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2168
b8698a0f 2169 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2170 stmts later. */
b8698a0f 2171 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
2172 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2173 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2174
2175 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2176 for (i = 0; i < ncopies; i++)
2177 {
2178 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2179 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2180
ebfd146a 2181 /* Generate the permute statement. */
36c4a482
NF
2182 perm_stmt = gimple_build_call (builtin_decl,
2183 3, first_vec, second_vec, mask);
ebfd146a
IR
2184 data_ref = make_ssa_name (perm_dest, perm_stmt);
2185 gimple_call_set_lhs (perm_stmt, data_ref);
2186 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2187
b8698a0f
L
2188 /* Store the vector statement in NODE. */
2189 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
2190 stride * i + vect_stmts_counter, perm_stmt);
2191
2192 first_vec_indx += stride;
2193 second_vec_indx += stride;
2194 }
2195
2196 /* Mark the scalar stmt as vectorized. */
2197 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2198 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2199}
2200
2201
b8698a0f 2202/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2203 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2204 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2205 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2206 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2207
ebfd146a 2208static bool
b8698a0f 2209vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2210 int mask_nunits, bool only_one_vec, int index,
b8698a0f 2211 int *mask, int *current_mask_element,
694a4f61
IR
2212 bool *need_next_vector, int *number_of_mask_fixes,
2213 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2214{
2215 int i;
ebfd146a
IR
2216
2217 /* Convert to target specific representation. */
2218 *current_mask_element = first_mask_element + m;
2219 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2220 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2221
2222 if (*current_mask_element < mask_nunits)
694a4f61 2223 *needs_first_vector = true;
ebfd146a
IR
2224
2225 /* We have only one input vector to permute but the mask accesses values in
2226 the next vector as well. */
2227 if (only_one_vec && *current_mask_element >= mask_nunits)
2228 {
2229 if (vect_print_dump_info (REPORT_DETAILS))
2230 {
2231 fprintf (vect_dump, "permutation requires at least two vectors ");
2232 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2233 }
2234
2235 return false;
2236 }
2237
2238 /* The mask requires the next vector. */
2239 if (*current_mask_element >= mask_nunits * 2)
2240 {
694a4f61 2241 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2242 {
2243 /* We either need the first vector too or have already moved to the
b8698a0f 2244 next vector. In both cases, this permutation needs three
ebfd146a
IR
2245 vectors. */
2246 if (vect_print_dump_info (REPORT_DETAILS))
2247 {
2248 fprintf (vect_dump, "permutation requires at "
2249 "least three vectors ");
2250 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2251 }
2252
2253 return false;
2254 }
2255
2256 /* We move to the next vector, dropping the first one and working with
2257 the second and the third - we need to adjust the values of the mask
2258 accordingly. */
694a4f61 2259 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2260
2261 for (i = 0; i < index; i++)
694a4f61 2262 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2263
694a4f61
IR
2264 (*number_of_mask_fixes)++;
2265 *mask_fixed = true;
ebfd146a
IR
2266 }
2267
694a4f61 2268 *need_next_vector = *mask_fixed;
ebfd146a
IR
2269
2270 /* This was the last element of this mask. Start a new one. */
2271 if (index == mask_nunits - 1)
2272 {
694a4f61
IR
2273 *number_of_mask_fixes = 1;
2274 *mask_fixed = false;
2275 *needs_first_vector = false;
ebfd146a
IR
2276 }
2277
2278 return true;
2279}
2280
2281
2282/* Generate vector permute statements from a list of loads in DR_CHAIN.
2283 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2284 permute statements for SLP_NODE_INSTANCE. */
2285bool
2286vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2287 gimple_stmt_iterator *gsi, int vf,
2288 slp_instance slp_node_instance, bool analyze_only)
2289{
2290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2291 tree mask_element_type = NULL_TREE, mask_type;
2292 int i, j, k, m, scale, mask_nunits, nunits, vec_index = 0, scalar_index;
2293 slp_tree node;
2294 tree vectype = STMT_VINFO_VECTYPE (stmt_info), builtin_decl;
2295 gimple next_scalar_stmt;
2296 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2297 int first_mask_element;
2298 int index, unroll_factor, *mask, current_mask_element, ncopies;
2299 bool only_one_vec = false, need_next_vector = false;
2300 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2301 int number_of_mask_fixes = 1;
2302 bool mask_fixed = false;
2303 bool needs_first_vector = false;
ebfd146a
IR
2304
2305 if (!targetm.vectorize.builtin_vec_perm)
2306 {
2307 if (vect_print_dump_info (REPORT_DETAILS))
2308 {
2309 fprintf (vect_dump, "no builtin for vect permute for ");
2310 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2311 }
2312
2313 return false;
2314 }
2315
2316 builtin_decl = targetm.vectorize.builtin_vec_perm (vectype,
2317 &mask_element_type);
2318 if (!builtin_decl || !mask_element_type)
2319 {
2320 if (vect_print_dump_info (REPORT_DETAILS))
2321 {
2322 fprintf (vect_dump, "no builtin for vect permute for ");
2323 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2324 }
2325
2326 return false;
2327 }
2328
2329 mask_type = get_vectype_for_scalar_type (mask_element_type);
2330 mask_nunits = TYPE_VECTOR_SUBPARTS (mask_type);
2331 mask = (int *) xmalloc (sizeof (int) * mask_nunits);
2332 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2333 scale = mask_nunits / nunits;
2334 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2335
2336 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2337 unrolling factor. */
b8698a0f 2338 orig_vec_stmts_num = group_size *
ebfd146a
IR
2339 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2340 if (orig_vec_stmts_num == 1)
2341 only_one_vec = true;
2342
b8698a0f 2343 /* Number of copies is determined by the final vectorization factor
ebfd146a 2344 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2345 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2346
b8698a0f
L
2347 /* Generate permutation masks for every NODE. Number of masks for each NODE
2348 is equal to GROUP_SIZE.
2349 E.g., we have a group of three nodes with three loads from the same
2350 location in each node, and the vector size is 4. I.e., we have a
2351 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2352 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2353 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2354 ...
2355
2356 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9} (in target
2357 scpecific type, e.g., in bytes for Altivec.
b8698a0f 2358 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2359 operation, and the mask element values can't be outside that range.
2360 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2361 For the first two permutations we need the first and the second input
ebfd146a 2362 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2363 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2364 {c2,a3,b3,c3}. */
2365
ac47786e 2366 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2367 {
2368 scalar_index = 0;
2369 index = 0;
2370 vect_stmts_counter = 0;
2371 vec_index = 0;
2372 first_vec_index = vec_index++;
2373 if (only_one_vec)
2374 second_vec_index = first_vec_index;
2375 else
2376 second_vec_index = vec_index++;
2377
2378 for (j = 0; j < unroll_factor; j++)
2379 {
2380 for (k = 0; k < group_size; k++)
2381 {
2382 first_mask_element = (i + j * group_size) * scale;
2383 for (m = 0; m < scale; m++)
2384 {
b8698a0f 2385 if (!vect_get_mask_element (stmt, first_mask_element, m,
ebfd146a 2386 mask_nunits, only_one_vec, index, mask,
694a4f61
IR
2387 &current_mask_element, &need_next_vector,
2388 &number_of_mask_fixes, &mask_fixed,
2389 &needs_first_vector))
ebfd146a
IR
2390 return false;
2391
2392 mask[index++] = current_mask_element;
b8698a0f 2393 }
ebfd146a
IR
2394
2395 if (index == mask_nunits)
2396 {
faf63e39
RH
2397 tree mask_vec = NULL;
2398
2399 while (--index >= 0)
2400 {
2401 tree t = build_int_cst (mask_element_type, mask[index]);
2402 mask_vec = tree_cons (NULL, t, mask_vec);
2403 }
2404 mask_vec = build_vector (mask_type, mask_vec);
2405 index = 0;
2406
2407 if (!targetm.vectorize.builtin_vec_perm_ok (vectype,
2408 mask_vec))
2409 {
2410 if (vect_print_dump_info (REPORT_DETAILS))
2411 {
2412 fprintf (vect_dump, "unsupported vect permute ");
2413 print_generic_expr (vect_dump, mask_vec, 0);
2414 }
2415 free (mask);
2416 return false;
2417 }
2418
ebfd146a
IR
2419 if (!analyze_only)
2420 {
2421 if (need_next_vector)
2422 {
2423 first_vec_index = second_vec_index;
2424 second_vec_index = vec_index;
2425 }
2426
2427 next_scalar_stmt = VEC_index (gimple,
2428 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2429
2430 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39
RH
2431 mask_vec, first_vec_index, second_vec_index,
2432 gsi, node, builtin_decl, vectype, dr_chain,
2433 ncopies, vect_stmts_counter++);
ebfd146a 2434 }
b8698a0f
L
2435 }
2436 }
2437 }
2438 }
ebfd146a
IR
2439
2440 free (mask);
2441 return true;
2442}
2443
2444
2445
2446/* Vectorize SLP instance tree in postorder. */
2447
2448static bool
2449vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2450 unsigned int vectorization_factor)
ebfd146a
IR
2451{
2452 gimple stmt;
2453 bool strided_store, is_store;
2454 gimple_stmt_iterator si;
2455 stmt_vec_info stmt_info;
2456 unsigned int vec_stmts_size, nunits, group_size;
2457 tree vectype;
2458 int i;
2459 slp_tree loads_node;
2460
2461 if (!node)
2462 return false;
2463
2464 vect_schedule_slp_instance (SLP_TREE_LEFT (node), instance,
2465 vectorization_factor);
2466 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), instance,
2467 vectorization_factor);
b8698a0f 2468
ebfd146a
IR
2469 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2470 stmt_info = vinfo_for_stmt (stmt);
2471
2472 /* VECTYPE is the type of the destination. */
b690cc0f 2473 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2474 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2475 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2476
2477 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 2478 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
2479 elements in one vector iteration is the number of scalar elements in
2480 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2481 size. */
2482 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2483
2484 /* In case of load permutation we have to allocate vectorized statements for
2485 all the nodes that participate in that permutation. */
2486 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2487 {
ac47786e 2488 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a
IR
2489 {
2490 if (!SLP_TREE_VEC_STMTS (loads_node))
2491 {
2492 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2493 vec_stmts_size);
2494 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2495 }
2496 }
2497 }
2498
2499 if (!SLP_TREE_VEC_STMTS (node))
2500 {
2501 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2502 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2503 }
2504
2505 if (vect_print_dump_info (REPORT_DETAILS))
2506 {
2507 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2508 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2509 }
ebfd146a
IR
2510
2511 /* Loads should be inserted before the first load. */
2512 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
2513 && STMT_VINFO_STRIDED_ACCESS (stmt_info)
2514 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2515 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
2516 else
2517 si = gsi_for_stmt (stmt);
b8698a0f 2518
e4a707c4
IR
2519 /* Stores should be inserted just before the last store. */
2520 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
2521 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2522 {
2523 gimple last_store = vect_find_last_store_in_slp_instance (instance);
2524 si = gsi_for_stmt (last_store);
2525 }
2526
ebfd146a 2527 is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
b5aeb3bb 2528 return is_store;
ebfd146a
IR
2529}
2530
2531
ff802fa1
IR
2532/* Generate vector code for all SLP instances in the loop/basic block. */
2533
ebfd146a 2534bool
a70d6342 2535vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 2536{
a70d6342 2537 VEC (slp_instance, heap) *slp_instances;
ebfd146a 2538 slp_instance instance;
a70d6342 2539 unsigned int i, vf;
ebfd146a
IR
2540 bool is_store = false;
2541
a70d6342
IR
2542 if (loop_vinfo)
2543 {
2544 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2545 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 2546 }
a70d6342
IR
2547 else
2548 {
2549 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2550 vf = 1;
b8698a0f 2551 }
a70d6342 2552
ac47786e 2553 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
2554 {
2555 /* Schedule the tree of INSTANCE. */
2556 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 2557 instance, vf);
8644a673
IR
2558 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
2559 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
2560 fprintf (vect_dump, "vectorizing stmts using SLP.");
2561 }
2562
ac47786e 2563 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
b5aeb3bb
IR
2564 {
2565 slp_tree root = SLP_INSTANCE_TREE (instance);
2566 gimple store;
2567 unsigned int j;
2568 gimple_stmt_iterator gsi;
2569
2570 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
2571 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
2572 {
2573 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
2574 break;
2575
2576 /* Free the attached stmt_vec_info and remove the stmt. */
2577 gsi = gsi_for_stmt (store);
2578 gsi_remove (&gsi, true);
2579 free_stmt_vec_info (store);
2580 }
2581 }
2582
ebfd146a
IR
2583 return is_store;
2584}
a70d6342
IR
2585
2586
2587/* Vectorize the basic block. */
2588
2589void
2590vect_slp_transform_bb (basic_block bb)
2591{
2592 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
2593 gimple_stmt_iterator si;
2594
2595 gcc_assert (bb_vinfo);
2596
2597 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "SLPing BB\n");
2599
2600 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2601 {
2602 gimple stmt = gsi_stmt (si);
2603 stmt_vec_info stmt_info;
2604
2605 if (vect_print_dump_info (REPORT_DETAILS))
2606 {
2607 fprintf (vect_dump, "------>SLPing statement: ");
2608 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2609 }
2610
2611 stmt_info = vinfo_for_stmt (stmt);
2612 gcc_assert (stmt_info);
2613
2614 /* Schedule all the SLP instances when the first SLP stmt is reached. */
2615 if (STMT_SLP_TYPE (stmt_info))
2616 {
2617 vect_schedule_slp (NULL, bb_vinfo);
2618 break;
2619 }
2620 }
2621
2622 mark_sym_for_renaming (gimple_vop (cfun));
2623 /* The memory tags and pointers in vectorized statements need to
2624 have their SSA forms updated. FIXME, why can't this be delayed
2625 until all the loops have been transformed? */
2626 update_ssa (TODO_update_ssa);
2627
2628 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2629 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 2630
12aaf609
IR
2631 destroy_bb_vec_info (bb_vinfo);
2632}
a70d6342 2633