]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
re PR c/53937 (Pack'ing struct causes gcc to not recognize that an field's address...
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
f2c74cc4 2 Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012
c75c517d 3 Free Software Foundation, Inc.
b8698a0f 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
cf835838
JM
31#include "tree-pretty-print.h"
32#include "gimple-pretty-print.h"
ebfd146a
IR
33#include "tree-flow.h"
34#include "tree-dump.h"
35#include "cfgloop.h"
ebfd146a
IR
36#include "expr.h"
37#include "recog.h"
38#include "optabs.h"
39#include "tree-vectorizer.h"
2635892a 40#include "langhooks.h"
ebfd146a 41
a70d6342
IR
42/* Extract the location of the basic block in the source code.
43 Return the basic block location if succeed and NULL if not. */
44
45LOC
46find_bb_location (basic_block bb)
47{
48 gimple stmt = NULL;
49 gimple_stmt_iterator si;
50
51 if (!bb)
52 return UNKNOWN_LOC;
53
54 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
55 {
56 stmt = gsi_stmt (si);
57 if (gimple_location (stmt) != UNKNOWN_LOC)
58 return gimple_location (stmt);
59 }
60
61 return UNKNOWN_LOC;
62}
63
64
ebfd146a
IR
65/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
66
67static void
68vect_free_slp_tree (slp_tree node)
69{
d092494c
IR
70 int i;
71 slp_void_p child;
72
ebfd146a
IR
73 if (!node)
74 return;
75
d092494c 76 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
d3cfd39e 77 vect_free_slp_tree ((slp_tree) child);
b8698a0f 78
d3cfd39e 79 VEC_free (slp_void_p, heap, SLP_TREE_CHILDREN (node));
ebfd146a 80 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
b8698a0f 81
ebfd146a
IR
82 if (SLP_TREE_VEC_STMTS (node))
83 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
84
85 free (node);
86}
87
88
89/* Free the memory allocated for the SLP instance. */
90
91void
92vect_free_slp_instance (slp_instance instance)
93{
94 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
95 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (instance));
96 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
97}
98
99
d092494c
IR
100/* Create an SLP node for SCALAR_STMTS. */
101
102static slp_tree
103vect_create_new_slp_node (VEC (gimple, heap) *scalar_stmts)
104{
d3cfd39e 105 slp_tree node;
d092494c
IR
106 gimple stmt = VEC_index (gimple, scalar_stmts, 0);
107 unsigned int nops;
108
109 if (is_gimple_call (stmt))
110 nops = gimple_call_num_args (stmt);
111 else if (is_gimple_assign (stmt))
f7e531cf
IR
112 {
113 nops = gimple_num_ops (stmt) - 1;
114 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
115 nops++;
116 }
d092494c
IR
117 else
118 return NULL;
119
d3cfd39e 120 node = XNEW (struct _slp_tree);
d092494c
IR
121 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
122 SLP_TREE_VEC_STMTS (node) = NULL;
123 SLP_TREE_CHILDREN (node) = VEC_alloc (slp_void_p, heap, nops);
124 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
125 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
126
127 return node;
128}
129
130
131/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
132 operand. */
133static VEC (slp_oprnd_info, heap) *
134vect_create_oprnd_info (int nops, int group_size)
135{
136 int i;
137 slp_oprnd_info oprnd_info;
138 VEC (slp_oprnd_info, heap) *oprnds_info;
139
140 oprnds_info = VEC_alloc (slp_oprnd_info, heap, nops);
141 for (i = 0; i < nops; i++)
142 {
143 oprnd_info = XNEW (struct _slp_oprnd_info);
144 oprnd_info->def_stmts = VEC_alloc (gimple, heap, group_size);
145 oprnd_info->first_dt = vect_uninitialized_def;
146 oprnd_info->first_def_type = NULL_TREE;
147 oprnd_info->first_const_oprnd = NULL_TREE;
148 oprnd_info->first_pattern = false;
149 VEC_quick_push (slp_oprnd_info, oprnds_info, oprnd_info);
150 }
151
152 return oprnds_info;
153}
154
155
d3cfd39e
JJ
156/* Free operands info. */
157
d092494c 158static void
d3cfd39e 159vect_free_oprnd_info (VEC (slp_oprnd_info, heap) **oprnds_info)
d092494c
IR
160{
161 int i;
162 slp_oprnd_info oprnd_info;
163
d3cfd39e
JJ
164 FOR_EACH_VEC_ELT (slp_oprnd_info, *oprnds_info, i, oprnd_info)
165 {
d092494c 166 VEC_free (gimple, heap, oprnd_info->def_stmts);
d3cfd39e
JJ
167 XDELETE (oprnd_info);
168 }
d092494c
IR
169
170 VEC_free (slp_oprnd_info, heap, *oprnds_info);
171}
172
173
174/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
175 they are of a valid type and that they match the defs of the first stmt of
176 the SLP group (stored in OPRNDS_INFO). */
ebfd146a
IR
177
178static bool
a70d6342 179vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
b8698a0f 180 slp_tree slp_node, gimple stmt,
d092494c
IR
181 int ncopies_for_cost, bool first,
182 VEC (slp_oprnd_info, heap) **oprnds_info)
ebfd146a
IR
183{
184 tree oprnd;
185 unsigned int i, number_of_oprnds;
d092494c 186 tree def, def_op0 = NULL_TREE;
ebfd146a 187 gimple def_stmt;
d092494c
IR
188 enum vect_def_type dt = vect_uninitialized_def;
189 enum vect_def_type dt_op0 = vect_uninitialized_def;
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 tree lhs = gimple_get_lhs (stmt);
a70d6342 192 struct loop *loop = NULL;
6aa904c4
IR
193 enum tree_code rhs_code;
194 bool different_types = false;
d092494c
IR
195 bool pattern = false;
196 slp_oprnd_info oprnd_info, oprnd0_info, oprnd1_info;
f7e531cf
IR
197 int op_idx = 1;
198 tree compare_rhs = NULL_TREE;
b8698a0f 199
a70d6342
IR
200 if (loop_vinfo)
201 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 202
d092494c 203 if (is_gimple_call (stmt))
190c2236
JJ
204 {
205 number_of_oprnds = gimple_call_num_args (stmt);
206 op_idx = 3;
207 }
f7e531cf
IR
208 else if (is_gimple_assign (stmt))
209 {
210 number_of_oprnds = gimple_num_ops (stmt) - 1;
211 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
212 number_of_oprnds++;
213 }
d092494c 214 else
f7e531cf 215 return false;
ebfd146a
IR
216
217 for (i = 0; i < number_of_oprnds; i++)
218 {
f7e531cf
IR
219 if (compare_rhs)
220 {
221 oprnd = compare_rhs;
222 compare_rhs = NULL_TREE;
223 }
224 else
225 oprnd = gimple_op (stmt, op_idx++);
226
d092494c 227 oprnd_info = VEC_index (slp_oprnd_info, *oprnds_info, i);
ebfd146a 228
f7e531cf
IR
229 if (COMPARISON_CLASS_P (oprnd))
230 {
231 compare_rhs = TREE_OPERAND (oprnd, 1);
232 oprnd = TREE_OPERAND (oprnd, 0);
233 }
234
24ee1384
IR
235 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
236 &def, &dt)
d092494c 237 || (!def_stmt && dt != vect_constant_def))
ebfd146a 238 {
b8698a0f 239 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
240 {
241 fprintf (vect_dump, "Build SLP failed: can't find def for ");
242 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
243 }
244
245 return false;
246 }
247
a70d6342 248 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 249 from the pattern. Check that all the stmts of the node are in the
ebfd146a 250 pattern. */
f5709183
IR
251 if (def_stmt && gimple_bb (def_stmt)
252 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
253 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
254 && gimple_code (def_stmt) != GIMPLE_PHI))
ebfd146a 255 && vinfo_for_stmt (def_stmt)
83197f37 256 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
257 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
258 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 259 {
d092494c
IR
260 pattern = true;
261 if (!first && !oprnd_info->first_pattern)
262 {
263 if (vect_print_dump_info (REPORT_DETAILS))
264 {
265 fprintf (vect_dump, "Build SLP failed: some of the stmts"
266 " are in a pattern, and others are not ");
267 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
268 }
ebfd146a 269
d092494c 270 return false;
ebfd146a
IR
271 }
272
273 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 274 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 275
f7e531cf 276 if (dt == vect_unknown_def_type)
ebfd146a
IR
277 {
278 if (vect_print_dump_info (REPORT_DETAILS))
279 fprintf (vect_dump, "Unsupported pattern.");
280 return false;
281 }
282
283 switch (gimple_code (def_stmt))
284 {
285 case GIMPLE_PHI:
d092494c 286 def = gimple_phi_result (def_stmt);
ebfd146a
IR
287 break;
288
289 case GIMPLE_ASSIGN:
d092494c 290 def = gimple_assign_lhs (def_stmt);
ebfd146a
IR
291 break;
292
293 default:
294 if (vect_print_dump_info (REPORT_DETAILS))
295 fprintf (vect_dump, "unsupported defining stmt: ");
296 return false;
297 }
298 }
299
d092494c 300 if (first)
ebfd146a 301 {
d092494c
IR
302 oprnd_info->first_dt = dt;
303 oprnd_info->first_pattern = pattern;
304 if (def)
305 {
306 oprnd_info->first_def_type = TREE_TYPE (def);
307 oprnd_info->first_const_oprnd = NULL_TREE;
308 }
ebfd146a 309 else
d092494c
IR
310 {
311 oprnd_info->first_def_type = NULL_TREE;
312 oprnd_info->first_const_oprnd = oprnd;
313 }
ebfd146a 314
d092494c
IR
315 if (i == 0)
316 {
317 def_op0 = def;
318 dt_op0 = dt;
319 /* Analyze costs (for the first stmt of the group only). */
320 if (REFERENCE_CLASS_P (lhs))
321 /* Store. */
322 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
323 dt, slp_node);
324 else
7daac345
JJ
325 {
326 enum vect_def_type dts[2];
327 dts[0] = dt;
328 dts[1] = vect_uninitialized_def;
329 /* Not memory operation (we don't call this function for
330 loads). */
331 vect_model_simple_cost (stmt_info, ncopies_for_cost, dts,
332 slp_node);
333 }
d092494c 334 }
ebfd146a 335 }
ebfd146a
IR
336 else
337 {
d092494c
IR
338 /* Not first stmt of the group, check that the def-stmt/s match
339 the def-stmt/s of the first stmt. Allow different definition
340 types for reduction chains: the first stmt must be a
341 vect_reduction_def (a phi node), and the rest
342 vect_internal_def. */
343 if (((oprnd_info->first_dt != dt
344 && !(oprnd_info->first_dt == vect_reduction_def
345 && dt == vect_internal_def))
346 || (oprnd_info->first_def_type != NULL_TREE
347 && def
348 && !types_compatible_p (oprnd_info->first_def_type,
349 TREE_TYPE (def))))
350 || (!def
351 && !types_compatible_p (TREE_TYPE (oprnd_info->first_const_oprnd),
352 TREE_TYPE (oprnd)))
353 || different_types)
ebfd146a 354 {
d092494c 355 if (number_of_oprnds != 2)
ebfd146a 356 {
d092494c
IR
357 if (vect_print_dump_info (REPORT_SLP))
358 fprintf (vect_dump, "Build SLP failed: different types ");
359
360 return false;
361 }
362
363 /* Try to swap operands in case of binary operation. */
364 if (i == 0)
365 different_types = true;
366 else
b8698a0f 367 {
d092494c
IR
368 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
369 if (is_gimple_assign (stmt)
370 && (rhs_code = gimple_assign_rhs_code (stmt))
371 && TREE_CODE_CLASS (rhs_code) == tcc_binary
372 && commutative_tree_code (rhs_code)
373 && oprnd0_info->first_dt == dt
374 && oprnd_info->first_dt == dt_op0
375 && def_op0 && def
376 && !(oprnd0_info->first_def_type
377 && !types_compatible_p (oprnd0_info->first_def_type,
378 TREE_TYPE (def)))
379 && !(oprnd_info->first_def_type
380 && !types_compatible_p (oprnd_info->first_def_type,
381 TREE_TYPE (def_op0))))
382 {
383 if (vect_print_dump_info (REPORT_SLP))
384 {
385 fprintf (vect_dump, "Swapping operands of ");
386 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
6aa904c4 387 }
b8698a0f 388
d092494c
IR
389 swap_tree_operands (stmt, gimple_assign_rhs1_ptr (stmt),
390 gimple_assign_rhs2_ptr (stmt));
391 }
392 else
393 {
394 if (vect_print_dump_info (REPORT_SLP))
395 fprintf (vect_dump, "Build SLP failed: different types ");
396
397 return false;
6aa904c4 398 }
ebfd146a
IR
399 }
400 }
401 }
402
403 /* Check the types of the definitions. */
d092494c 404 switch (dt)
ebfd146a
IR
405 {
406 case vect_constant_def:
8644a673 407 case vect_external_def:
d092494c 408 case vect_reduction_def:
ebfd146a 409 break;
b8698a0f 410
8644a673 411 case vect_internal_def:
d092494c
IR
412 if (different_types)
413 {
414 oprnd0_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
415 oprnd1_info = VEC_index (slp_oprnd_info, *oprnds_info, 0);
416 if (i == 0)
417 VEC_quick_push (gimple, oprnd1_info->def_stmts, def_stmt);
418 else
419 VEC_quick_push (gimple, oprnd0_info->def_stmts, def_stmt);
420 }
ebfd146a 421 else
d092494c
IR
422 VEC_quick_push (gimple, oprnd_info->def_stmts, def_stmt);
423
ebfd146a
IR
424 break;
425
426 default:
427 /* FORNOW: Not supported. */
b8698a0f 428 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
429 {
430 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
d092494c 431 print_generic_expr (vect_dump, def, TDF_SLIM);
ebfd146a
IR
432 }
433
434 return false;
435 }
436 }
437
438 return true;
439}
440
441
442/* Recursively build an SLP tree starting from NODE.
b8698a0f 443 Fail (and return FALSE) if def-stmts are not isomorphic, require data
ff802fa1 444 permutation or are of unsupported types of operation. Otherwise, return
ebfd146a
IR
445 TRUE. */
446
447static bool
b8698a0f 448vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
a70d6342
IR
449 slp_tree *node, unsigned int group_size,
450 int *inside_cost, int *outside_cost,
451 int ncopies_for_cost, unsigned int *max_nunits,
ebfd146a 452 VEC (int, heap) **load_permutation,
a70d6342 453 VEC (slp_tree, heap) **loads,
6aa904c4 454 unsigned int vectorization_factor, bool *loads_permuted)
ebfd146a 455{
ebfd146a
IR
456 unsigned int i;
457 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
458 gimple stmt = VEC_index (gimple, stmts, 0);
2200fc49 459 enum tree_code first_stmt_code = ERROR_MARK, rhs_code = ERROR_MARK;
f7e531cf 460 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a
IR
461 tree lhs;
462 bool stop_recursion = false, need_same_oprnds = false;
463 tree vectype, scalar_type, first_op1 = NULL_TREE;
a70d6342 464 unsigned int ncopies;
ebfd146a
IR
465 optab optab;
466 int icode;
467 enum machine_mode optab_op2_mode;
468 enum machine_mode vec_mode;
ebfd146a 469 struct data_reference *first_dr;
ebfd146a
IR
470 HOST_WIDE_INT dummy;
471 bool permutation = false;
472 unsigned int load_place;
b5aeb3bb 473 gimple first_load, prev_first_load = NULL;
d092494c
IR
474 VEC (slp_oprnd_info, heap) *oprnds_info;
475 unsigned int nops;
476 slp_oprnd_info oprnd_info;
f7e531cf 477 tree cond;
d092494c
IR
478
479 if (is_gimple_call (stmt))
480 nops = gimple_call_num_args (stmt);
f7e531cf
IR
481 else if (is_gimple_assign (stmt))
482 {
483 nops = gimple_num_ops (stmt) - 1;
484 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
485 nops++;
486 }
d092494c 487 else
f7e531cf 488 return false;
d092494c
IR
489
490 oprnds_info = vect_create_oprnd_info (nops, group_size);
ebfd146a
IR
491
492 /* For every stmt in NODE find its def stmt/s. */
ac47786e 493 FOR_EACH_VEC_ELT (gimple, stmts, i, stmt)
ebfd146a 494 {
b8698a0f 495 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
496 {
497 fprintf (vect_dump, "Build SLP for ");
498 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
499 }
500
4b5caab7
IR
501 /* Fail to vectorize statements marked as unvectorizable. */
502 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
503 {
504 if (vect_print_dump_info (REPORT_SLP))
505 {
506 fprintf (vect_dump,
507 "Build SLP failed: unvectorizable statement ");
508 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
509 }
510
d3cfd39e 511 vect_free_oprnd_info (&oprnds_info);
4b5caab7
IR
512 return false;
513 }
514
ebfd146a
IR
515 lhs = gimple_get_lhs (stmt);
516 if (lhs == NULL_TREE)
517 {
b8698a0f 518 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
519 {
520 fprintf (vect_dump,
d092494c 521 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL ");
ebfd146a
IR
522 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 }
b8698a0f 524
d3cfd39e 525 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
526 return false;
527 }
528
f7e531cf
IR
529 if (is_gimple_assign (stmt)
530 && gimple_assign_rhs_code (stmt) == COND_EXPR
531 && (cond = gimple_assign_rhs1 (stmt))
532 && !COMPARISON_CLASS_P (cond))
533 {
534 if (vect_print_dump_info (REPORT_SLP))
535 {
536 fprintf (vect_dump,
537 "Build SLP failed: condition is not comparison ");
538 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
539 }
540
d3cfd39e 541 vect_free_oprnd_info (&oprnds_info);
f7e531cf
IR
542 return false;
543 }
544
b8698a0f 545 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
546 vectype = get_vectype_for_scalar_type (scalar_type);
547 if (!vectype)
548 {
549 if (vect_print_dump_info (REPORT_SLP))
550 {
551 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
552 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
553 }
d092494c 554
d3cfd39e 555 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
556 return false;
557 }
b8698a0f 558
4ef69dfc
IR
559 /* In case of multiple types we need to detect the smallest type. */
560 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 561 {
4ef69dfc
IR
562 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
563 if (bb_vinfo)
564 vectorization_factor = *max_nunits;
a70d6342 565 }
b8698a0f 566
4ef69dfc 567 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 568
ebfd146a 569 if (is_gimple_call (stmt))
190c2236
JJ
570 {
571 rhs_code = CALL_EXPR;
572 if (gimple_call_internal_p (stmt)
573 || gimple_call_tail_p (stmt)
574 || gimple_call_noreturn_p (stmt)
575 || !gimple_call_nothrow_p (stmt)
576 || gimple_call_chain (stmt))
577 {
578 if (vect_print_dump_info (REPORT_SLP))
579 {
580 fprintf (vect_dump,
581 "Build SLP failed: unsupported call type ");
582 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
583 }
584
d3cfd39e 585 vect_free_oprnd_info (&oprnds_info);
190c2236
JJ
586 return false;
587 }
588 }
ebfd146a
IR
589 else
590 rhs_code = gimple_assign_rhs_code (stmt);
591
592 /* Check the operation. */
593 if (i == 0)
594 {
595 first_stmt_code = rhs_code;
596
b8698a0f 597 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
598 vector shift with scalar shift operand. */
599 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
600 || rhs_code == LROTATE_EXPR
601 || rhs_code == RROTATE_EXPR)
602 {
603 vec_mode = TYPE_MODE (vectype);
604
605 /* First see if we have a vector/vector shift. */
606 optab = optab_for_tree_code (rhs_code, vectype,
607 optab_vector);
608
609 if (!optab
947131ba 610 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
611 {
612 /* No vector/vector shift, try for a vector/scalar shift. */
613 optab = optab_for_tree_code (rhs_code, vectype,
614 optab_scalar);
615
616 if (!optab)
617 {
618 if (vect_print_dump_info (REPORT_SLP))
619 fprintf (vect_dump, "Build SLP failed: no optab.");
d3cfd39e 620 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
621 return false;
622 }
947131ba 623 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
624 if (icode == CODE_FOR_nothing)
625 {
626 if (vect_print_dump_info (REPORT_SLP))
627 fprintf (vect_dump, "Build SLP failed: "
628 "op not supported by target.");
d3cfd39e 629 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
630 return false;
631 }
632 optab_op2_mode = insn_data[icode].operand[2].mode;
633 if (!VECTOR_MODE_P (optab_op2_mode))
634 {
635 need_same_oprnds = true;
636 first_op1 = gimple_assign_rhs2 (stmt);
637 }
638 }
639 }
36ba4aae
IR
640 else if (rhs_code == WIDEN_LSHIFT_EXPR)
641 {
642 need_same_oprnds = true;
643 first_op1 = gimple_assign_rhs2 (stmt);
644 }
ebfd146a
IR
645 }
646 else
647 {
648 if (first_stmt_code != rhs_code
649 && (first_stmt_code != IMAGPART_EXPR
650 || rhs_code != REALPART_EXPR)
651 && (first_stmt_code != REALPART_EXPR
69f11a13 652 || rhs_code != IMAGPART_EXPR)
0d0293ac 653 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13
IR
654 && (first_stmt_code == ARRAY_REF
655 || first_stmt_code == INDIRECT_REF
656 || first_stmt_code == COMPONENT_REF
657 || first_stmt_code == MEM_REF)))
ebfd146a 658 {
b8698a0f 659 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 660 {
b8698a0f 661 fprintf (vect_dump,
ebfd146a
IR
662 "Build SLP failed: different operation in stmt ");
663 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
664 }
b8698a0f 665
d3cfd39e 666 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
667 return false;
668 }
b8698a0f
L
669
670 if (need_same_oprnds
ebfd146a
IR
671 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
672 {
b8698a0f 673 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 674 {
b8698a0f 675 fprintf (vect_dump,
ebfd146a
IR
676 "Build SLP failed: different shift arguments in ");
677 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
678 }
b8698a0f 679
d3cfd39e 680 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
681 return false;
682 }
190c2236
JJ
683
684 if (rhs_code == CALL_EXPR)
685 {
686 gimple first_stmt = VEC_index (gimple, stmts, 0);
687 if (gimple_call_num_args (stmt) != nops
688 || !operand_equal_p (gimple_call_fn (first_stmt),
689 gimple_call_fn (stmt), 0)
690 || gimple_call_fntype (first_stmt)
691 != gimple_call_fntype (stmt))
692 {
693 if (vect_print_dump_info (REPORT_SLP))
694 {
695 fprintf (vect_dump,
696 "Build SLP failed: different calls in ");
697 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
698 }
699
d3cfd39e 700 vect_free_oprnd_info (&oprnds_info);
190c2236
JJ
701 return false;
702 }
703 }
ebfd146a
IR
704 }
705
0d0293ac
MM
706 /* Grouped store or load. */
707 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
708 {
709 if (REFERENCE_CLASS_P (lhs))
710 {
711 /* Store. */
b8698a0f 712 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node,
d092494c
IR
713 stmt, ncopies_for_cost,
714 (i == 0), &oprnds_info))
715 {
d3cfd39e 716 vect_free_oprnd_info (&oprnds_info);
d092494c
IR
717 return false;
718 }
ebfd146a 719 }
b5aeb3bb
IR
720 else
721 {
722 /* Load. */
723 /* FORNOW: Check that there is no gap between the loads. */
e14c1050
IR
724 if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
725 && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
726 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
727 && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
b5aeb3bb
IR
728 {
729 if (vect_print_dump_info (REPORT_SLP))
730 {
0d0293ac 731 fprintf (vect_dump, "Build SLP failed: grouped "
b5aeb3bb
IR
732 "loads have gaps ");
733 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
734 }
b8698a0f 735
d3cfd39e 736 vect_free_oprnd_info (&oprnds_info);
b5aeb3bb
IR
737 return false;
738 }
2f0fa28e 739
b5aeb3bb
IR
740 /* Check that the size of interleaved loads group is not
741 greater than the SLP group size. */
6aa904c4
IR
742 if (loop_vinfo
743 && GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
b5aeb3bb
IR
744 {
745 if (vect_print_dump_info (REPORT_SLP))
746 {
747 fprintf (vect_dump, "Build SLP failed: the number of "
748 "interleaved loads is greater than"
749 " the SLP group size ");
750 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
751 }
b8698a0f 752
d3cfd39e 753 vect_free_oprnd_info (&oprnds_info);
b5aeb3bb
IR
754 return false;
755 }
756
e14c1050 757 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
758 if (prev_first_load)
759 {
760 /* Check that there are no loads from different interleaving
ff802fa1 761 chains in the same node. The only exception is complex
b5aeb3bb
IR
762 numbers. */
763 if (prev_first_load != first_load
764 && rhs_code != REALPART_EXPR
765 && rhs_code != IMAGPART_EXPR)
766 {
767 if (vect_print_dump_info (REPORT_SLP))
768 {
769 fprintf (vect_dump, "Build SLP failed: different "
770 "interleaving chains in one node ");
771 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
772 }
773
d3cfd39e 774 vect_free_oprnd_info (&oprnds_info);
b5aeb3bb
IR
775 return false;
776 }
777 }
778 else
779 prev_first_load = first_load;
b8698a0f 780
ebfd146a
IR
781 if (first_load == stmt)
782 {
783 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
720f5239 784 if (vect_supportable_dr_alignment (first_dr, false)
ebfd146a
IR
785 == dr_unaligned_unsupported)
786 {
787 if (vect_print_dump_info (REPORT_SLP))
788 {
789 fprintf (vect_dump, "Build SLP failed: unsupported "
790 "unaligned load ");
791 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
792 }
b8698a0f 793
d3cfd39e 794 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
795 return false;
796 }
b8698a0f 797
ebfd146a
IR
798 /* Analyze costs (for the first stmt in the group). */
799 vect_model_load_cost (vinfo_for_stmt (stmt),
272c6793 800 ncopies_for_cost, false, *node);
ebfd146a 801 }
b8698a0f 802
ff802fa1 803 /* Store the place of this load in the interleaving chain. In
ebfd146a
IR
804 case that permutation is needed we later decide if a specific
805 permutation is supported. */
806 load_place = vect_get_place_in_interleaving_chain (stmt,
807 first_load);
808 if (load_place != i)
809 permutation = true;
b8698a0f 810
ebfd146a 811 VEC_safe_push (int, heap, *load_permutation, load_place);
b8698a0f 812
ebfd146a
IR
813 /* We stop the tree when we reach a group of loads. */
814 stop_recursion = true;
815 continue;
816 }
0d0293ac 817 } /* Grouped access. */
ebfd146a
IR
818 else
819 {
820 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
821 {
0d0293ac 822 /* Not grouped load. */
b8698a0f 823 if (vect_print_dump_info (REPORT_SLP))
ebfd146a 824 {
0d0293ac 825 fprintf (vect_dump, "Build SLP failed: not grouped load ");
ebfd146a
IR
826 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
827 }
828
0d0293ac 829 /* FORNOW: Not grouped loads are not supported. */
d3cfd39e 830 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
831 return false;
832 }
833
834 /* Not memory operation. */
835 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 836 && TREE_CODE_CLASS (rhs_code) != tcc_unary
190c2236
JJ
837 && rhs_code != COND_EXPR
838 && rhs_code != CALL_EXPR)
ebfd146a 839 {
b8698a0f 840 if (vect_print_dump_info (REPORT_SLP))
ebfd146a
IR
841 {
842 fprintf (vect_dump, "Build SLP failed: operation");
843 fprintf (vect_dump, " unsupported ");
844 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
845 }
846
d3cfd39e 847 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
848 return false;
849 }
850
f7e531cf
IR
851 if (rhs_code == COND_EXPR)
852 {
853 tree cond_expr = gimple_assign_rhs1 (stmt);
854
855 if (i == 0)
856 first_cond_code = TREE_CODE (cond_expr);
857 else if (first_cond_code != TREE_CODE (cond_expr))
858 {
859 if (vect_print_dump_info (REPORT_SLP))
860 {
861 fprintf (vect_dump, "Build SLP failed: different"
862 " operation");
863 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
864 }
865
d3cfd39e 866 vect_free_oprnd_info (&oprnds_info);
f7e531cf
IR
867 return false;
868 }
869 }
870
b8698a0f 871 /* Find the def-stmts. */
a70d6342 872 if (!vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo, *node, stmt,
d092494c
IR
873 ncopies_for_cost, (i == 0),
874 &oprnds_info))
875 {
d3cfd39e 876 vect_free_oprnd_info (&oprnds_info);
d092494c
IR
877 return false;
878 }
ebfd146a
IR
879 }
880 }
881
882 /* Add the costs of the node to the overall instance costs. */
b8698a0f 883 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
ebfd146a
IR
884 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
885
0d0293ac 886 /* Grouped loads were reached - stop the recursion. */
ebfd146a
IR
887 if (stop_recursion)
888 {
6aa904c4 889 VEC_safe_push (slp_tree, heap, *loads, *node);
ebfd146a
IR
890 if (permutation)
891 {
6aa904c4
IR
892
893 *loads_permuted = true;
35e1a5e7 894 *inside_cost
720f5239 895 += targetm.vectorize.builtin_vectorization_cost (vec_perm, NULL, 0)
35e1a5e7 896 * group_size;
ebfd146a 897 }
2200fc49 898 else
6aa904c4
IR
899 {
900 /* We don't check here complex numbers chains, so we set
901 LOADS_PERMUTED for further check in
902 vect_supported_load_permutation_p. */
2200fc49 903 if (rhs_code == REALPART_EXPR || rhs_code == IMAGPART_EXPR)
6aa904c4 904 *loads_permuted = true;
2200fc49 905 }
ebfd146a 906
d3cfd39e 907 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
908 return true;
909 }
910
b8698a0f 911 /* Create SLP_TREE nodes for the definition node/s. */
d092494c 912 FOR_EACH_VEC_ELT (slp_oprnd_info, oprnds_info, i, oprnd_info)
ebfd146a 913 {
d092494c 914 slp_tree child;
b8698a0f 915
d092494c
IR
916 if (oprnd_info->first_dt != vect_internal_def)
917 continue;
ebfd146a 918
d092494c
IR
919 child = vect_create_new_slp_node (oprnd_info->def_stmts);
920 if (!child
921 || !vect_build_slp_tree (loop_vinfo, bb_vinfo, &child, group_size,
ebfd146a 922 inside_cost, outside_cost, ncopies_for_cost,
a70d6342 923 max_nunits, load_permutation, loads,
6aa904c4 924 vectorization_factor, loads_permuted))
d092494c 925 {
d3cfd39e
JJ
926 if (child)
927 oprnd_info->def_stmts = NULL;
928 vect_free_slp_tree (child);
929 vect_free_oprnd_info (&oprnds_info);
d092494c
IR
930 return false;
931 }
b8698a0f 932
d3cfd39e 933 oprnd_info->def_stmts = NULL;
d092494c 934 VEC_quick_push (slp_void_p, SLP_TREE_CHILDREN (*node), child);
ebfd146a
IR
935 }
936
d3cfd39e 937 vect_free_oprnd_info (&oprnds_info);
ebfd146a
IR
938 return true;
939}
940
941
942static void
943vect_print_slp_tree (slp_tree node)
944{
945 int i;
946 gimple stmt;
d092494c 947 slp_void_p child;
ebfd146a
IR
948
949 if (!node)
950 return;
951
952 fprintf (vect_dump, "node ");
ac47786e 953 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
954 {
955 fprintf (vect_dump, "\n\tstmt %d ", i);
b8698a0f 956 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
ebfd146a
IR
957 }
958 fprintf (vect_dump, "\n");
959
d092494c
IR
960 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
961 vect_print_slp_tree ((slp_tree) child);
ebfd146a
IR
962}
963
964
b8698a0f
L
965/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
966 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 967 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
968 stmts in NODE are to be marked. */
969
970static void
971vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
972{
973 int i;
974 gimple stmt;
d092494c 975 slp_void_p child;
ebfd146a
IR
976
977 if (!node)
978 return;
979
ac47786e 980 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
981 if (j < 0 || i == j)
982 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
983
d092494c
IR
984 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
985 vect_mark_slp_stmts ((slp_tree) child, mark, j);
ebfd146a
IR
986}
987
988
a70d6342
IR
989/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
990
991static void
992vect_mark_slp_stmts_relevant (slp_tree node)
993{
994 int i;
995 gimple stmt;
996 stmt_vec_info stmt_info;
d092494c 997 slp_void_p child;
a70d6342
IR
998
999 if (!node)
1000 return;
1001
ac47786e 1002 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1003 {
1004 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1005 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1006 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1007 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1008 }
1009
d092494c
IR
1010 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1011 vect_mark_slp_stmts_relevant ((slp_tree) child);
a70d6342
IR
1012}
1013
1014
b8698a0f 1015/* Check if the permutation required by the SLP INSTANCE is supported.
ebfd146a
IR
1016 Reorganize the SLP nodes stored in SLP_INSTANCE_LOADS if needed. */
1017
1018static bool
1019vect_supported_slp_permutation_p (slp_instance instance)
1020{
1021 slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
1022 gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
e14c1050 1023 gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a
IR
1024 VEC (slp_tree, heap) *sorted_loads = NULL;
1025 int index;
1026 slp_tree *tmp_loads = NULL;
b8698a0f 1027 int group_size = SLP_INSTANCE_GROUP_SIZE (instance), i, j;
ebfd146a 1028 slp_tree load;
b8698a0f
L
1029
1030 /* FORNOW: The only supported loads permutation is loads from the same
ebfd146a 1031 location in all the loads in the node, when the data-refs in
b8698a0f 1032 nodes of LOADS constitute an interleaving chain.
ebfd146a
IR
1033 Sort the nodes according to the order of accesses in the chain. */
1034 tmp_loads = (slp_tree *) xmalloc (sizeof (slp_tree) * group_size);
b8698a0f
L
1035 for (i = 0, j = 0;
1036 VEC_iterate (int, SLP_INSTANCE_LOAD_PERMUTATION (instance), i, index)
1037 && VEC_iterate (slp_tree, SLP_INSTANCE_LOADS (instance), j, load);
ebfd146a
IR
1038 i += group_size, j++)
1039 {
1040 gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
1041 /* Check that the loads are all in the same interleaving chain. */
e14c1050 1042 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
ebfd146a
IR
1043 {
1044 if (vect_print_dump_info (REPORT_DETAILS))
1045 {
1046 fprintf (vect_dump, "Build SLP failed: unsupported data "
1047 "permutation ");
1048 print_gimple_stmt (vect_dump, scalar_stmt, 0, TDF_SLIM);
1049 }
b8698a0f 1050
ebfd146a 1051 free (tmp_loads);
b8698a0f 1052 return false;
ebfd146a
IR
1053 }
1054
1055 tmp_loads[index] = load;
1056 }
b8698a0f 1057
ebfd146a
IR
1058 sorted_loads = VEC_alloc (slp_tree, heap, group_size);
1059 for (i = 0; i < group_size; i++)
1060 VEC_safe_push (slp_tree, heap, sorted_loads, tmp_loads[i]);
1061
1062 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (instance));
1063 SLP_INSTANCE_LOADS (instance) = sorted_loads;
1064 free (tmp_loads);
1065
1066 if (!vect_transform_slp_perm_load (stmt, NULL, NULL,
1067 SLP_INSTANCE_UNROLLING_FACTOR (instance),
1068 instance, true))
1069 return false;
1070
1071 return true;
1072}
1073
1074
b5aeb3bb
IR
1075/* Rearrange the statements of NODE according to PERMUTATION. */
1076
1077static void
1078vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1079 VEC (int, heap) *permutation)
1080{
1081 gimple stmt;
1082 VEC (gimple, heap) *tmp_stmts;
1083 unsigned int index, i;
d092494c 1084 slp_void_p child;
b5aeb3bb
IR
1085
1086 if (!node)
1087 return;
1088
d092494c
IR
1089 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1090 vect_slp_rearrange_stmts ((slp_tree) child, group_size, permutation);
b5aeb3bb
IR
1091
1092 gcc_assert (group_size == VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node)));
1093 tmp_stmts = VEC_alloc (gimple, heap, group_size);
1094
1095 for (i = 0; i < group_size; i++)
1096 VEC_safe_push (gimple, heap, tmp_stmts, NULL);
1097
ac47786e 1098 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
b5aeb3bb
IR
1099 {
1100 index = VEC_index (int, permutation, i);
1101 VEC_replace (gimple, tmp_stmts, index, stmt);
1102 }
1103
1104 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
1105 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1106}
1107
1108
ebfd146a
IR
1109/* Check if the required load permutation is supported.
1110 LOAD_PERMUTATION contains a list of indices of the loads.
0d0293ac 1111 In SLP this permutation is relative to the order of grouped stores that are
ebfd146a
IR
1112 the base of the SLP instance. */
1113
1114static bool
1115vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
1116 VEC (int, heap) *load_permutation)
1117{
b5aeb3bb
IR
1118 int i = 0, j, prev = -1, next, k, number_of_groups;
1119 bool supported, bad_permutation = false;
7417f6c0 1120 sbitmap load_index;
2200fc49 1121 slp_tree node, other_complex_node;
6aa904c4 1122 gimple stmt, first = NULL, other_node_first, load, next_load, first_load;
2200fc49 1123 unsigned complex_numbers = 0;
6aa904c4
IR
1124 struct data_reference *dr;
1125 bb_vec_info bb_vinfo;
ebfd146a 1126
a70d6342 1127 /* FORNOW: permutations are only supported in SLP. */
ebfd146a
IR
1128 if (!slp_instn)
1129 return false;
1130
1131 if (vect_print_dump_info (REPORT_SLP))
1132 {
1133 fprintf (vect_dump, "Load permutation ");
ac47786e 1134 FOR_EACH_VEC_ELT (int, load_permutation, i, next)
ebfd146a
IR
1135 fprintf (vect_dump, "%d ", next);
1136 }
1137
b5aeb3bb
IR
1138 /* In case of reduction every load permutation is allowed, since the order
1139 of the reduction statements is not important (as opposed to the case of
0d0293ac 1140 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1141 load nodes are of the same size and have the same permutation (and then
1142 rearrange all the nodes of the SLP instance according to this
1143 permutation). */
1144
1145 /* Check that all the load nodes are of the same size. */
ac47786e 1146 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49
IR
1147 {
1148 if (VEC_length (gimple, SLP_TREE_SCALAR_STMTS (node))
1149 != (unsigned) group_size)
1150 return false;
1151
1152 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1153 if (is_gimple_assign (stmt)
1154 && (gimple_assign_rhs_code (stmt) == REALPART_EXPR
1155 || gimple_assign_rhs_code (stmt) == IMAGPART_EXPR))
1156 complex_numbers++;
1157 }
1158
1159 /* Complex operands can be swapped as following:
1160 real_c = real_b + real_a;
1161 imag_c = imag_a + imag_b;
1162 i.e., we have {real_b, imag_a} and {real_a, imag_b} instead of
ff802fa1 1163 {real_a, imag_a} and {real_b, imag_b}. We check here that if interleaving
2200fc49
IR
1164 chains are mixed, they match the above pattern. */
1165 if (complex_numbers)
1166 {
ac47786e 1167 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
2200fc49 1168 {
ac47786e 1169 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, stmt)
2200fc49
IR
1170 {
1171 if (j == 0)
1172 first = stmt;
1173 else
1174 {
e14c1050 1175 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
2200fc49
IR
1176 {
1177 if (complex_numbers != 2)
1178 return false;
1179
1180 if (i == 0)
1181 k = 1;
1182 else
1183 k = 0;
1184
1185 other_complex_node = VEC_index (slp_tree,
1186 SLP_INSTANCE_LOADS (slp_instn), k);
1187 other_node_first = VEC_index (gimple,
1188 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
1189
e14c1050 1190 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2200fc49
IR
1191 != other_node_first)
1192 return false;
1193 }
1194 }
1195 }
1196 }
1197 }
1198
1199 /* We checked that this case ok, so there is no need to proceed with
1200 permutation tests. */
7772bae0
JJ
1201 if (complex_numbers == 2
1202 && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
2200fc49
IR
1203 {
1204 VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
1205 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1206 return true;
1207 }
1208
b5aeb3bb
IR
1209 node = SLP_INSTANCE_TREE (slp_instn);
1210 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1211 /* LOAD_PERMUTATION is a list of indices of all the loads of the SLP
1212 instance, not all the loads belong to the same node or interleaving
ff802fa1 1213 group. Hence, we need to divide them into groups according to
b5aeb3bb
IR
1214 GROUP_SIZE. */
1215 number_of_groups = VEC_length (int, load_permutation) / group_size;
1216
b010117a
IR
1217 /* Reduction (there are no data-refs in the root).
1218 In reduction chain the order of the loads is important. */
1219 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1220 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb
IR
1221 {
1222 int first_group_load_index;
1223
1224 /* Compare all the permutation sequences to the first one. */
1225 for (i = 1; i < number_of_groups; i++)
1226 {
1227 k = 0;
1228 for (j = i * group_size; j < i * group_size + group_size; j++)
1229 {
1230 next = VEC_index (int, load_permutation, j);
1231 first_group_load_index = VEC_index (int, load_permutation, k);
1232
1233 if (next != first_group_load_index)
1234 {
1235 bad_permutation = true;
1236 break;
1237 }
1238
1239 k++;
1240 }
1241
1242 if (bad_permutation)
1243 break;
1244 }
1245
1246 if (!bad_permutation)
1247 {
c9c1e775
IR
1248 /* Check that the loads in the first sequence are different and there
1249 are no gaps between them. */
1250 load_index = sbitmap_alloc (group_size);
1251 sbitmap_zero (load_index);
1252 for (k = 0; k < group_size; k++)
1253 {
1254 first_group_load_index = VEC_index (int, load_permutation, k);
1255 if (TEST_BIT (load_index, first_group_load_index))
1256 {
1257 bad_permutation = true;
1258 break;
1259 }
1260
1261 SET_BIT (load_index, first_group_load_index);
1262 }
1263
1264 if (!bad_permutation)
1265 for (k = 0; k < group_size; k++)
1266 if (!TEST_BIT (load_index, k))
1267 {
1268 bad_permutation = true;
1269 break;
1270 }
1271
1272 sbitmap_free (load_index);
1273 }
1274
1275 if (!bad_permutation)
1276 {
1277 /* This permutation is valid for reduction. Since the order of the
b5aeb3bb
IR
1278 statements in the nodes is not important unless they are memory
1279 accesses, we can rearrange the statements in all the nodes
1280 according to the order of the loads. */
1281 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1282 load_permutation);
1283 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1284 return true;
1285 }
1286 }
1287
6aa904c4
IR
1288 /* In basic block vectorization we allow any subchain of an interleaving
1289 chain.
1290 FORNOW: not supported in loop SLP because of realignment compications. */
1291 bb_vinfo = STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt));
1292 bad_permutation = false;
073a8998 1293 /* Check that for every node in the instance the loads form a subchain. */
6aa904c4
IR
1294 if (bb_vinfo)
1295 {
1296 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1297 {
1298 next_load = NULL;
1299 first_load = NULL;
1300 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), j, load)
1301 {
1302 if (!first_load)
1303 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (load));
1304 else if (first_load
1305 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (load)))
1306 {
1307 bad_permutation = true;
1308 break;
1309 }
1310
1311 if (j != 0 && next_load != load)
1312 {
1313 bad_permutation = true;
1314 break;
1315 }
1316
1317 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1318 }
1319
1320 if (bad_permutation)
1321 break;
1322 }
1323
1324 /* Check that the alignment of the first load in every subchain, i.e.,
1325 the first statement in every load node, is supported. */
1326 if (!bad_permutation)
1327 {
1328 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_instn), i, node)
1329 {
1330 first_load = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
1331 if (first_load
1332 != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1333 {
1334 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1335 if (vect_supportable_dr_alignment (dr, false)
1336 == dr_unaligned_unsupported)
1337 {
1338 if (vect_print_dump_info (REPORT_SLP))
1339 {
1340 fprintf (vect_dump, "unsupported unaligned load ");
1341 print_gimple_stmt (vect_dump, first_load, 0,
1342 TDF_SLIM);
1343 }
1344 bad_permutation = true;
1345 break;
1346 }
1347 }
1348 }
1349
1350 if (!bad_permutation)
1351 {
1352 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
1353 return true;
1354 }
1355 }
1356 }
1357
b8698a0f
L
1358 /* FORNOW: the only supported permutation is 0..01..1.. of length equal to
1359 GROUP_SIZE and where each sequence of same drs is of GROUP_SIZE length as
b5aeb3bb 1360 well (unless it's reduction). */
ebfd146a
IR
1361 if (VEC_length (int, load_permutation)
1362 != (unsigned int) (group_size * group_size))
1363 return false;
1364
1365 supported = true;
7417f6c0
IR
1366 load_index = sbitmap_alloc (group_size);
1367 sbitmap_zero (load_index);
ebfd146a
IR
1368 for (j = 0; j < group_size; j++)
1369 {
1370 for (i = j * group_size, k = 0;
1371 VEC_iterate (int, load_permutation, i, next) && k < group_size;
1372 i++, k++)
1373 {
1374 if (i != j * group_size && next != prev)
1375 {
1376 supported = false;
1377 break;
1378 }
1379
1380 prev = next;
b8698a0f 1381 }
7417f6c0
IR
1382
1383 if (TEST_BIT (load_index, prev))
1384 {
1385 supported = false;
1386 break;
1387 }
1388
1389 SET_BIT (load_index, prev);
ebfd146a 1390 }
59eefaa6
IR
1391
1392 for (j = 0; j < group_size; j++)
1393 if (!TEST_BIT (load_index, j))
1394 return false;
1395
7417f6c0 1396 sbitmap_free (load_index);
ebfd146a
IR
1397
1398 if (supported && i == group_size * group_size
1399 && vect_supported_slp_permutation_p (slp_instn))
1400 return true;
1401
b8698a0f 1402 return false;
ebfd146a
IR
1403}
1404
1405
b8698a0f 1406/* Find the first load in the loop that belongs to INSTANCE.
ebfd146a 1407 When loads are in several SLP nodes, there can be a case in which the first
b8698a0f 1408 load does not appear in the first SLP node to be transformed, causing
ff802fa1 1409 incorrect order of statements. Since we generate all the loads together,
ebfd146a
IR
1410 they must be inserted before the first load of the SLP instance and not
1411 before the first load of the first node of the instance. */
ff802fa1 1412
b8698a0f
L
1413static gimple
1414vect_find_first_load_in_slp_instance (slp_instance instance)
ebfd146a
IR
1415{
1416 int i, j;
1417 slp_tree load_node;
1418 gimple first_load = NULL, load;
1419
ac47786e
NF
1420 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, load_node)
1421 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (load_node), j, load)
ebfd146a 1422 first_load = get_earlier_stmt (load, first_load);
b8698a0f 1423
ebfd146a
IR
1424 return first_load;
1425}
1426
1427
e4a707c4 1428/* Find the last store in SLP INSTANCE. */
ff802fa1 1429
e4a707c4
IR
1430static gimple
1431vect_find_last_store_in_slp_instance (slp_instance instance)
1432{
1433 int i;
1434 slp_tree node;
1435 gimple last_store = NULL, store;
1436
1437 node = SLP_INSTANCE_TREE (instance);
1438 for (i = 0;
1439 VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, store);
1440 i++)
1441 last_store = get_later_stmt (store, last_store);
1442
1443 return last_store;
1444}
1445
1446
0d0293ac 1447/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1448 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1449 Return FALSE if it's impossible to SLP any stmt in the loop. */
1450
1451static bool
a70d6342
IR
1452vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1453 gimple stmt)
ebfd146a
IR
1454{
1455 slp_instance new_instance;
d092494c 1456 slp_tree node;
e14c1050 1457 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1458 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1459 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1460 gimple next;
0f900dfa 1461 unsigned int vectorization_factor = 0;
b5aeb3bb 1462 int inside_cost = 0, outside_cost = 0, ncopies_for_cost, i;
ebfd146a
IR
1463 unsigned int max_nunits = 0;
1464 VEC (int, heap) *load_permutation;
1465 VEC (slp_tree, heap) *loads;
b5aeb3bb 1466 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
6aa904c4 1467 bool loads_permuted = false;
d092494c 1468 VEC (gimple, heap) *scalar_stmts;
b5aeb3bb 1469
b010117a 1470 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1471 {
b010117a
IR
1472 if (dr)
1473 {
1474 scalar_type = TREE_TYPE (DR_REF (dr));
1475 vectype = get_vectype_for_scalar_type (scalar_type);
1476 }
1477 else
1478 {
1479 gcc_assert (loop_vinfo);
1480 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1481 }
1482
e14c1050 1483 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1484 }
1485 else
1486 {
1487 gcc_assert (loop_vinfo);
1488 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1489 group_size = VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo));
1490 }
b8698a0f 1491
ebfd146a
IR
1492 if (!vectype)
1493 {
1494 if (vect_print_dump_info (REPORT_SLP))
1495 {
1496 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
1497 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
1498 }
b5aeb3bb 1499
ebfd146a
IR
1500 return false;
1501 }
1502
1503 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1504 if (loop_vinfo)
1505 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1506 else
a70d6342
IR
1507 vectorization_factor = nunits;
1508
a70d6342
IR
1509 /* Calculate the unrolling factor. */
1510 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1511 if (unrolling_factor != 1 && !loop_vinfo)
1512 {
1513 if (vect_print_dump_info (REPORT_SLP))
e9dbe7bb
IR
1514 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1515 " block SLP");
b8698a0f 1516
a70d6342
IR
1517 return false;
1518 }
1519
0d0293ac 1520 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
d092494c 1521 scalar_stmts = VEC_alloc (gimple, heap, group_size);
ebfd146a 1522 next = stmt;
b010117a 1523 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1524 {
b5aeb3bb
IR
1525 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1526 while (next)
1527 {
f7e531cf
IR
1528 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1529 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
1530 VEC_safe_push (gimple, heap, scalar_stmts,
1531 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
1532 else
1533 VEC_safe_push (gimple, heap, scalar_stmts, next);
e14c1050 1534 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb
IR
1535 }
1536 }
1537 else
1538 {
1539 /* Collect reduction statements. */
d092494c
IR
1540 VEC (gimple, heap) *reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1541 for (i = 0; VEC_iterate (gimple, reductions, i, next); i++)
f7e531cf 1542 VEC_safe_push (gimple, heap, scalar_stmts, next);
ebfd146a
IR
1543 }
1544
d092494c 1545 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1546
ebfd146a
IR
1547 /* Calculate the number of vector stmts to create based on the unrolling
1548 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1549 GROUP_SIZE / NUNITS otherwise. */
1550 ncopies_for_cost = unrolling_factor * group_size / nunits;
b8698a0f
L
1551
1552 load_permutation = VEC_alloc (int, heap, group_size * group_size);
1553 loads = VEC_alloc (slp_tree, heap, group_size);
ebfd146a
IR
1554
1555 /* Build the tree for the SLP instance. */
b8698a0f
L
1556 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
1557 &inside_cost, &outside_cost, ncopies_for_cost,
1558 &max_nunits, &load_permutation, &loads,
6aa904c4 1559 vectorization_factor, &loads_permuted))
ebfd146a 1560 {
4ef69dfc 1561 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1562 if (max_nunits > nunits)
1563 unrolling_factor = least_common_multiple (max_nunits, group_size)
1564 / group_size;
b8698a0f 1565
4ef69dfc
IR
1566 if (unrolling_factor != 1 && !loop_vinfo)
1567 {
1568 if (vect_print_dump_info (REPORT_SLP))
1569 fprintf (vect_dump, "Build SLP failed: unrolling required in basic"
1570 " block SLP");
1571 return false;
1572 }
1573
1574 /* Create a new SLP instance. */
1575 new_instance = XNEW (struct _slp_instance);
1576 SLP_INSTANCE_TREE (new_instance) = node;
1577 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a
IR
1578 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
1579 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
1580 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
1581 SLP_INSTANCE_LOADS (new_instance) = loads;
1582 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance) = NULL;
1583 SLP_INSTANCE_LOAD_PERMUTATION (new_instance) = load_permutation;
6aa904c4
IR
1584
1585 if (loads_permuted)
ebfd146a
IR
1586 {
1587 if (!vect_supported_load_permutation_p (new_instance, group_size,
b8698a0f 1588 load_permutation))
ebfd146a
IR
1589 {
1590 if (vect_print_dump_info (REPORT_SLP))
1591 {
1592 fprintf (vect_dump, "Build SLP failed: unsupported load "
1593 "permutation ");
1594 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1595 }
1596
1597 vect_free_slp_instance (new_instance);
1598 return false;
1599 }
1600
1601 SLP_INSTANCE_FIRST_LOAD_STMT (new_instance)
1602 = vect_find_first_load_in_slp_instance (new_instance);
1603 }
1604 else
1605 VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (new_instance));
1606
a70d6342 1607 if (loop_vinfo)
b8698a0f
L
1608 VEC_safe_push (slp_instance, heap,
1609 LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
a70d6342
IR
1610 new_instance);
1611 else
1612 VEC_safe_push (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo),
1613 new_instance);
b8698a0f 1614
ebfd146a
IR
1615 if (vect_print_dump_info (REPORT_SLP))
1616 vect_print_slp_tree (node);
1617
1618 return true;
1619 }
1620
1621 /* Failed to SLP. */
1622 /* Free the allocated memory. */
1623 vect_free_slp_tree (node);
1624 VEC_free (int, heap, load_permutation);
1625 VEC_free (slp_tree, heap, loads);
b8698a0f 1626
a70d6342 1627 return false;
ebfd146a
IR
1628}
1629
1630
ff802fa1 1631/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1632 trees of packed scalar stmts if SLP is possible. */
1633
1634bool
a70d6342 1635vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
1636{
1637 unsigned int i;
0d0293ac 1638 VEC (gimple, heap) *grouped_stores, *reductions = NULL, *reduc_chains = NULL;
b010117a 1639 gimple first_element;
a70d6342 1640 bool ok = false;
ebfd146a
IR
1641
1642 if (vect_print_dump_info (REPORT_SLP))
1643 fprintf (vect_dump, "=== vect_analyze_slp ===");
1644
a70d6342 1645 if (loop_vinfo)
b5aeb3bb 1646 {
0d0293ac 1647 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
b010117a 1648 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1649 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1650 }
a70d6342 1651 else
0d0293ac 1652 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
b8698a0f 1653
0d0293ac
MM
1654 /* Find SLP sequences starting from groups of grouped stores. */
1655 FOR_EACH_VEC_ELT (gimple, grouped_stores, i, first_element)
b010117a 1656 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
a70d6342 1657 ok = true;
ebfd146a 1658
b8698a0f 1659 if (bb_vinfo && !ok)
a70d6342
IR
1660 {
1661 if (vect_print_dump_info (REPORT_SLP))
1662 fprintf (vect_dump, "Failed to SLP the basic block.");
1663
1664 return false;
1665 }
ebfd146a 1666
b010117a
IR
1667 if (loop_vinfo
1668 && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
1669 {
1670 /* Find SLP sequences starting from reduction chains. */
1671 FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
1672 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
1673 ok = true;
1674 else
1675 return false;
1676
1677 /* Don't try to vectorize SLP reductions if reduction chain was
1678 detected. */
1679 return ok;
1680 }
1681
b5aeb3bb 1682 /* Find SLP sequences starting from groups of reductions. */
74500b3e 1683 if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
b5aeb3bb
IR
1684 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo,
1685 VEC_index (gimple, reductions, 0)))
1686 ok = true;
1687
ebfd146a
IR
1688 return true;
1689}
1690
1691
1692/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1693 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1694 least one instance. */
ebfd146a 1695
437f4a00 1696bool
ebfd146a
IR
1697vect_make_slp_decision (loop_vec_info loop_vinfo)
1698{
1699 unsigned int i, unrolling_factor = 1;
1700 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1701 slp_instance instance;
1702 int decided_to_slp = 0;
1703
1704 if (vect_print_dump_info (REPORT_SLP))
1705 fprintf (vect_dump, "=== vect_make_slp_decision ===");
1706
ac47786e 1707 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1708 {
1709 /* FORNOW: SLP if you can. */
1710 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1711 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1712
ff802fa1 1713 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1714 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1715 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1716 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1717 decided_to_slp++;
1718 }
1719
1720 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1721
b8698a0f
L
1722 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
1723 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
ebfd146a 1724 decided_to_slp, unrolling_factor);
437f4a00
IR
1725
1726 return (decided_to_slp > 0);
ebfd146a
IR
1727}
1728
1729
1730/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1731 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1732
1733static void
1734vect_detect_hybrid_slp_stmts (slp_tree node)
1735{
1736 int i;
f2c74cc4
IR
1737 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (node);
1738 gimple stmt = VEC_index (gimple, stmts, 0);
ebfd146a
IR
1739 imm_use_iterator imm_iter;
1740 gimple use_stmt;
f2c74cc4 1741 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d092494c 1742 slp_void_p child;
f2c74cc4
IR
1743 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1744 struct loop *loop = NULL;
1745 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1746 basic_block bb = NULL;
ebfd146a
IR
1747
1748 if (!node)
1749 return;
1750
f2c74cc4
IR
1751 if (loop_vinfo)
1752 loop = LOOP_VINFO_LOOP (loop_vinfo);
1753 else
1754 bb = BB_VINFO_BB (bb_vinfo);
1755
ac47786e 1756 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1757 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
1758 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1759 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
f2c74cc4
IR
1760 if (gimple_bb (use_stmt)
1761 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1762 || bb == gimple_bb (use_stmt))
1763 && (stmt_vinfo = vinfo_for_stmt (use_stmt))
99f51320
IR
1764 && !STMT_SLP_TYPE (stmt_vinfo)
1765 && (STMT_VINFO_RELEVANT (stmt_vinfo)
b5aeb3bb 1766 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_vinfo)))
f2c74cc4
IR
1767 && !(gimple_code (use_stmt) == GIMPLE_PHI
1768 && STMT_VINFO_DEF_TYPE (stmt_vinfo)
1769 == vect_reduction_def))
ebfd146a
IR
1770 vect_mark_slp_stmts (node, hybrid, i);
1771
d092494c
IR
1772 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1773 vect_detect_hybrid_slp_stmts ((slp_tree) child);
ebfd146a
IR
1774}
1775
1776
1777/* Find stmts that must be both vectorized and SLPed. */
1778
1779void
1780vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1781{
1782 unsigned int i;
1783 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1784 slp_instance instance;
1785
1786 if (vect_print_dump_info (REPORT_SLP))
1787 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
1788
ac47786e 1789 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
1790 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
1791}
1792
a70d6342
IR
1793
1794/* Create and initialize a new bb_vec_info struct for BB, as well as
1795 stmt_vec_info structs for all the stmts in it. */
b8698a0f 1796
a70d6342
IR
1797static bb_vec_info
1798new_bb_vec_info (basic_block bb)
1799{
1800 bb_vec_info res = NULL;
1801 gimple_stmt_iterator gsi;
1802
1803 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
1804 BB_VINFO_BB (res) = bb;
1805
1806 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1807 {
1808 gimple stmt = gsi_stmt (gsi);
1809 gimple_set_uid (stmt, 0);
1810 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
1811 }
1812
0d0293ac 1813 BB_VINFO_GROUPED_STORES (res) = VEC_alloc (gimple, heap, 10);
a70d6342
IR
1814 BB_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 2);
1815
1816 bb->aux = res;
1817 return res;
1818}
1819
1820
1821/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
1822 stmts in the basic block. */
1823
1824static void
1825destroy_bb_vec_info (bb_vec_info bb_vinfo)
1826{
1827 basic_block bb;
1828 gimple_stmt_iterator si;
1829
1830 if (!bb_vinfo)
1831 return;
1832
1833 bb = BB_VINFO_BB (bb_vinfo);
1834
1835 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1836 {
1837 gimple stmt = gsi_stmt (si);
1838 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1839
1840 if (stmt_info)
1841 /* Free stmt_vec_info. */
1842 free_stmt_vec_info (stmt);
1843 }
1844
01be8516
SP
1845 free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
1846 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
0d0293ac 1847 VEC_free (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo));
a70d6342
IR
1848 VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
1849 free (bb_vinfo);
1850 bb->aux = NULL;
1851}
1852
1853
1854/* Analyze statements contained in SLP tree node after recursively analyzing
1855 the subtree. Return TRUE if the operations are supported. */
1856
1857static bool
1858vect_slp_analyze_node_operations (bb_vec_info bb_vinfo, slp_tree node)
1859{
1860 bool dummy;
1861 int i;
1862 gimple stmt;
d092494c 1863 slp_void_p child;
a70d6342
IR
1864
1865 if (!node)
1866 return true;
1867
d092494c
IR
1868 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
1869 if (!vect_slp_analyze_node_operations (bb_vinfo, (slp_tree) child))
1870 return false;
a70d6342 1871
ac47786e 1872 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1873 {
1874 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1875 gcc_assert (stmt_info);
1876 gcc_assert (PURE_SLP_STMT (stmt_info));
1877
1878 if (!vect_analyze_stmt (stmt, &dummy, node))
1879 return false;
1880 }
1881
1882 return true;
1883}
1884
1885
ff802fa1 1886/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
1887 operations are supported. */
1888
1889static bool
1890vect_slp_analyze_operations (bb_vec_info bb_vinfo)
1891{
1892 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1893 slp_instance instance;
1894 int i;
1895
1896 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); )
1897 {
b8698a0f 1898 if (!vect_slp_analyze_node_operations (bb_vinfo,
a70d6342
IR
1899 SLP_INSTANCE_TREE (instance)))
1900 {
1901 vect_free_slp_instance (instance);
1902 VEC_ordered_remove (slp_instance, slp_instances, i);
1903 }
1904 else
1905 i++;
b8698a0f
L
1906 }
1907
a70d6342
IR
1908 if (!VEC_length (slp_instance, slp_instances))
1909 return false;
1910
1911 return true;
1912}
1913
69f11a13
IR
1914/* Check if vectorization of the basic block is profitable. */
1915
1916static bool
1917vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
1918{
1919 VEC (slp_instance, heap) *slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
1920 slp_instance instance;
1921 int i;
1922 unsigned int vec_outside_cost = 0, vec_inside_cost = 0, scalar_cost = 0;
1923 unsigned int stmt_cost;
1924 gimple stmt;
1925 gimple_stmt_iterator si;
1926 basic_block bb = BB_VINFO_BB (bb_vinfo);
1927 stmt_vec_info stmt_info = NULL;
1928 tree dummy_type = NULL;
1929 int dummy = 0;
1930
1931 /* Calculate vector costs. */
ac47786e 1932 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
69f11a13
IR
1933 {
1934 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
1935 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
1936 }
1937
1938 /* Calculate scalar cost. */
1939 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1940 {
1941 stmt = gsi_stmt (si);
1942 stmt_info = vinfo_for_stmt (stmt);
1943
1944 if (!stmt_info || !STMT_VINFO_VECTORIZABLE (stmt_info)
1945 || !PURE_SLP_STMT (stmt_info))
1946 continue;
1947
1948 if (STMT_VINFO_DATA_REF (stmt_info))
1949 {
1950 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1951 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1952 (scalar_load, dummy_type, dummy);
1953 else
1954 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1955 (scalar_store, dummy_type, dummy);
1956 }
1957 else
1958 stmt_cost = targetm.vectorize.builtin_vectorization_cost
1959 (scalar_stmt, dummy_type, dummy);
1960
1961 scalar_cost += stmt_cost;
1962 }
1963
1964 if (vect_print_dump_info (REPORT_COST))
1965 {
1966 fprintf (vect_dump, "Cost model analysis: \n");
1967 fprintf (vect_dump, " Vector inside of basic block cost: %d\n",
1968 vec_inside_cost);
1969 fprintf (vect_dump, " Vector outside of basic block cost: %d\n",
1970 vec_outside_cost);
1971 fprintf (vect_dump, " Scalar cost of basic block: %d", scalar_cost);
1972 }
1973
1974 /* Vectorization is profitable if its cost is less than the cost of scalar
1975 version. */
1976 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
1977 return false;
1978
1979 return true;
1980}
1981
1982/* Check if the basic block can be vectorized. */
a70d6342 1983
8e19f5a1
IR
1984static bb_vec_info
1985vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
1986{
1987 bb_vec_info bb_vinfo;
1988 VEC (ddr_p, heap) *ddrs;
1989 VEC (slp_instance, heap) *slp_instances;
1990 slp_instance instance;
8e19f5a1 1991 int i;
777e1f09
RG
1992 int min_vf = 2;
1993 int max_vf = MAX_VECTORIZATION_FACTOR;
e4a707c4 1994
a70d6342
IR
1995 bb_vinfo = new_bb_vec_info (bb);
1996 if (!bb_vinfo)
1997 return NULL;
1998
777e1f09 1999 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf))
a70d6342
IR
2000 {
2001 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2002 fprintf (vect_dump, "not vectorized: unhandled data-ref in basic "
2003 "block.\n");
b8698a0f 2004
a70d6342
IR
2005 destroy_bb_vec_info (bb_vinfo);
2006 return NULL;
2007 }
2008
2009 ddrs = BB_VINFO_DDRS (bb_vinfo);
b8698a0f 2010 if (!VEC_length (ddr_p, ddrs))
a70d6342
IR
2011 {
2012 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2013 fprintf (vect_dump, "not vectorized: not enough data-refs in basic "
2014 "block.\n");
2015
2016 destroy_bb_vec_info (bb_vinfo);
2017 return NULL;
2018 }
2019
f5709183
IR
2020 vect_pattern_recog (NULL, bb_vinfo);
2021
2022 if (!vect_analyze_data_ref_dependences (NULL, bb_vinfo, &max_vf)
5bfdb7d8 2023 || min_vf > max_vf)
777e1f09
RG
2024 {
2025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2026 fprintf (vect_dump, "not vectorized: unhandled data dependence "
2027 "in basic block.\n");
2028
2029 destroy_bb_vec_info (bb_vinfo);
2030 return NULL;
2031 }
2032
a70d6342
IR
2033 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2034 {
2035 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2036 fprintf (vect_dump, "not vectorized: bad data alignment in basic "
2037 "block.\n");
b8698a0f 2038
a70d6342
IR
2039 destroy_bb_vec_info (bb_vinfo);
2040 return NULL;
2041 }
b8698a0f 2042
a70d6342
IR
2043 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2044 {
2045 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2046 fprintf (vect_dump, "not vectorized: unhandled data access in basic "
2047 "block.\n");
b8698a0f 2048
a70d6342
IR
2049 destroy_bb_vec_info (bb_vinfo);
2050 return NULL;
2051 }
2052
a70d6342
IR
2053 /* Check the SLP opportunities in the basic block, analyze and build SLP
2054 trees. */
2055 if (!vect_analyze_slp (NULL, bb_vinfo))
2056 {
2057 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2058 fprintf (vect_dump, "not vectorized: failed to find SLP opportunities "
2059 "in basic block.\n");
2060
2061 destroy_bb_vec_info (bb_vinfo);
2062 return NULL;
2063 }
b8698a0f 2064
a70d6342
IR
2065 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2066
2067 /* Mark all the statements that we want to vectorize as pure SLP and
2068 relevant. */
ac47786e 2069 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
a70d6342
IR
2070 {
2071 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2072 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 2073 }
a70d6342 2074
38eec4c6
UW
2075 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
2076 {
2077 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2078 fprintf (vect_dump, "not vectorized: unsupported alignment in basic "
2079 "block.\n");
2080
2081 destroy_bb_vec_info (bb_vinfo);
2082 return NULL;
2083 }
2084
a70d6342
IR
2085 if (!vect_slp_analyze_operations (bb_vinfo))
2086 {
2087 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2088 fprintf (vect_dump, "not vectorized: bad operation in basic block.\n");
2089
2090 destroy_bb_vec_info (bb_vinfo);
2091 return NULL;
2092 }
2093
69f11a13
IR
2094 /* Cost model: check if the vectorization is worthwhile. */
2095 if (flag_vect_cost_model
2096 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2097 {
2098 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2099 fprintf (vect_dump, "not vectorized: vectorization is not "
2100 "profitable.\n");
2101
2102 destroy_bb_vec_info (bb_vinfo);
2103 return NULL;
2104 }
2105
a70d6342 2106 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 2107 fprintf (vect_dump, "Basic block will be vectorized using SLP\n");
a70d6342
IR
2108
2109 return bb_vinfo;
2110}
2111
2112
8e19f5a1
IR
2113bb_vec_info
2114vect_slp_analyze_bb (basic_block bb)
2115{
2116 bb_vec_info bb_vinfo;
2117 int insns = 0;
2118 gimple_stmt_iterator gsi;
2119 unsigned int vector_sizes;
2120
2121 if (vect_print_dump_info (REPORT_DETAILS))
2122 fprintf (vect_dump, "===vect_slp_analyze_bb===\n");
2123
2124 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2125 {
2126 gimple stmt = gsi_stmt (gsi);
2127 if (!is_gimple_debug (stmt)
2128 && !gimple_nop_p (stmt)
2129 && gimple_code (stmt) != GIMPLE_LABEL)
2130 insns++;
2131 }
2132
2133 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2134 {
2135 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2136 fprintf (vect_dump, "not vectorized: too many instructions in basic "
2137 "block.\n");
2138
2139 return NULL;
2140 }
2141
2142 /* Autodetect first vector size we try. */
2143 current_vector_size = 0;
2144 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2145
2146 while (1)
2147 {
2148 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2149 if (bb_vinfo)
2150 return bb_vinfo;
2151
2152 destroy_bb_vec_info (bb_vinfo);
2153
2154 vector_sizes &= ~current_vector_size;
2155 if (vector_sizes == 0
2156 || current_vector_size == 0)
2157 return NULL;
2158
2159 /* Try the next biggest vector size. */
2160 current_vector_size = 1 << floor_log2 (vector_sizes);
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "***** Re-trying analysis with "
2163 "vector size %d\n", current_vector_size);
2164 }
2165}
2166
2167
b8698a0f 2168/* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
ff802fa1
IR
2169 the number of created vector stmts depends on the unrolling factor).
2170 However, the actual number of vector stmts for every SLP node depends on
2171 VF which is set later in vect_analyze_operations (). Hence, SLP costs
2172 should be updated. In this function we assume that the inside costs
2173 calculated in vect_model_xxx_cost are linear in ncopies. */
ebfd146a
IR
2174
2175void
2176vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
2177{
2178 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2179 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2180 slp_instance instance;
2181
2182 if (vect_print_dump_info (REPORT_SLP))
2183 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
2184
ac47786e 2185 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a 2186 /* We assume that costs are linear in ncopies. */
b8698a0f
L
2187 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
2188 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
ebfd146a
IR
2189}
2190
a70d6342 2191
b8698a0f
L
2192/* For constant and loop invariant defs of SLP_NODE this function returns
2193 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2194 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2195 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2196 REDUC_INDEX is the index of the reduction operand in the statements, unless
2197 it is -1. */
ebfd146a
IR
2198
2199static void
9dc3f7de
IR
2200vect_get_constant_vectors (tree op, slp_tree slp_node,
2201 VEC (tree, heap) **vec_oprnds,
b5aeb3bb
IR
2202 unsigned int op_num, unsigned int number_of_vectors,
2203 int reduc_index)
ebfd146a
IR
2204{
2205 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2206 gimple stmt = VEC_index (gimple, stmts, 0);
2207 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2208 unsigned nunits;
ebfd146a 2209 tree vec_cst;
d2a12ae7
RG
2210 tree *elts;
2211 unsigned j, number_of_places_left_in_vector;
ebfd146a 2212 tree vector_type;
9dc3f7de 2213 tree vop;
ebfd146a
IR
2214 int group_size = VEC_length (gimple, stmts);
2215 unsigned int vec_num, i;
d2a12ae7 2216 unsigned number_of_copies = 1;
ebfd146a
IR
2217 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
2218 bool constant_p, is_store;
b5aeb3bb 2219 tree neutral_op = NULL;
bac430c9 2220 enum tree_code code = gimple_expr_code (stmt);
0e93a64e
IR
2221 gimple def_stmt;
2222 struct loop *loop;
b5aeb3bb 2223
29ed4920
IR
2224 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2225 && reduc_index != -1)
b5aeb3bb 2226 {
b5aeb3bb 2227 op_num = reduc_index - 1;
9dc3f7de 2228 op = gimple_op (stmt, reduc_index);
b5aeb3bb 2229 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2230 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2231 get_initial_def_for_reduction() for details. */
2232 switch (code)
2233 {
2234 case WIDEN_SUM_EXPR:
2235 case DOT_PROD_EXPR:
2236 case PLUS_EXPR:
2237 case MINUS_EXPR:
2238 case BIT_IOR_EXPR:
2239 case BIT_XOR_EXPR:
2240 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2241 neutral_op = build_real (TREE_TYPE (op), dconst0);
2242 else
2243 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2244
2245 break;
2246
2247 case MULT_EXPR:
b5aeb3bb
IR
2248 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2249 neutral_op = build_real (TREE_TYPE (op), dconst1);
2250 else
2251 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2252
2253 break;
2254
c1e822d5
IR
2255 case BIT_AND_EXPR:
2256 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2257 break;
2258
0e93a64e
IR
2259 case MAX_EXPR:
2260 case MIN_EXPR:
2261 def_stmt = SSA_NAME_DEF_STMT (op);
2262 loop = (gimple_bb (stmt))->loop_father;
2263 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2264 loop_preheader_edge (loop));
2265 break;
2266
b5aeb3bb 2267 default:
0e93a64e 2268 neutral_op = NULL;
b5aeb3bb
IR
2269 }
2270 }
ebfd146a
IR
2271
2272 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2273 {
2274 is_store = true;
2275 op = gimple_assign_rhs1 (stmt);
2276 }
2277 else
9dc3f7de
IR
2278 is_store = false;
2279
2280 gcc_assert (op);
ebfd146a
IR
2281
2282 if (CONSTANT_CLASS_P (op))
d59dc888 2283 constant_p = true;
ebfd146a 2284 else
d59dc888
IR
2285 constant_p = false;
2286
9dc3f7de 2287 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
cd481d83 2288 gcc_assert (vector_type);
ebfd146a
IR
2289 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2290
2291 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2292 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2293
2294 For example, we have two scalar operands, s1 and s2 (e.g., group of
2295 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2296 of this type can be packed in a vector). The output vector will contain
2297 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2298 will be 2).
2299
b8698a0f 2300 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2301 containing the operands.
2302
2303 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2304 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2305 {s5, s6, s7, s8}. */
b8698a0f 2306
ebfd146a
IR
2307 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
2308
2309 number_of_places_left_in_vector = nunits;
d2a12ae7 2310 elts = XALLOCAVEC (tree, nunits);
ebfd146a
IR
2311 for (j = 0; j < number_of_copies; j++)
2312 {
2313 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
2314 {
2315 if (is_store)
2316 op = gimple_assign_rhs1 (stmt);
bac430c9 2317 else
f7e531cf 2318 {
bac430c9 2319 switch (code)
f7e531cf 2320 {
bac430c9
IR
2321 case COND_EXPR:
2322 if (op_num == 0 || op_num == 1)
2323 {
2324 tree cond = gimple_assign_rhs1 (stmt);
2325 op = TREE_OPERAND (cond, op_num);
2326 }
2327 else
2328 {
2329 if (op_num == 2)
2330 op = gimple_assign_rhs2 (stmt);
2331 else
2332 op = gimple_assign_rhs3 (stmt);
2333 }
2334 break;
2335
2336 case CALL_EXPR:
2337 op = gimple_call_arg (stmt, op_num);
2338 break;
2339
b84b294a
JJ
2340 case LSHIFT_EXPR:
2341 case RSHIFT_EXPR:
2342 case LROTATE_EXPR:
2343 case RROTATE_EXPR:
2344 op = gimple_op (stmt, op_num + 1);
2345 /* Unlike the other binary operators, shifts/rotates have
2346 the shift count being int, instead of the same type as
2347 the lhs, so make sure the scalar is the right type if
2348 we are dealing with vectors of
2349 long long/long/short/char. */
2350 if (op_num == 1 && constant_p)
2351 op = fold_convert (TREE_TYPE (vector_type), op);
2352 break;
2353
bac430c9
IR
2354 default:
2355 op = gimple_op (stmt, op_num + 1);
b84b294a 2356 break;
f7e531cf
IR
2357 }
2358 }
b8698a0f 2359
b5aeb3bb
IR
2360 if (reduc_index != -1)
2361 {
0e93a64e
IR
2362 loop = (gimple_bb (stmt))->loop_father;
2363 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2364
2365 gcc_assert (loop);
b010117a
IR
2366
2367 /* Get the def before the loop. In reduction chain we have only
2368 one initial value. */
2369 if ((j != (number_of_copies - 1)
2370 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2371 && i != 0))
2372 && neutral_op)
b5aeb3bb 2373 op = neutral_op;
b010117a
IR
2374 else
2375 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2376 loop_preheader_edge (loop));
b5aeb3bb
IR
2377 }
2378
ebfd146a 2379 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2380 number_of_places_left_in_vector--;
50eeef09
RG
2381 if (constant_p
2382 && !types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
2383 {
2384 op = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
2385 gcc_assert (op && CONSTANT_CLASS_P (op));
2386 }
d2a12ae7 2387 elts[number_of_places_left_in_vector] = op;
ebfd146a
IR
2388
2389 if (number_of_places_left_in_vector == 0)
2390 {
2391 number_of_places_left_in_vector = nunits;
2392
2393 if (constant_p)
d2a12ae7 2394 vec_cst = build_vector (vector_type, elts);
ebfd146a 2395 else
d2a12ae7
RG
2396 {
2397 VEC(constructor_elt,gc) *v;
2398 unsigned k;
2399 v = VEC_alloc (constructor_elt, gc, nunits);
2400 for (k = 0; k < nunits; ++k)
2401 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2402 vec_cst = build_constructor (vector_type, v);
2403 }
ebfd146a 2404 VEC_quick_push (tree, voprnds,
d2a12ae7
RG
2405 vect_init_vector (stmt, vec_cst,
2406 vector_type, NULL));
ebfd146a
IR
2407 }
2408 }
2409 }
2410
b8698a0f 2411 /* Since the vectors are created in the reverse order, we should invert
ebfd146a
IR
2412 them. */
2413 vec_num = VEC_length (tree, voprnds);
d2a12ae7 2414 for (j = vec_num; j != 0; j--)
ebfd146a 2415 {
d2a12ae7 2416 vop = VEC_index (tree, voprnds, j - 1);
ebfd146a
IR
2417 VEC_quick_push (tree, *vec_oprnds, vop);
2418 }
2419
2420 VEC_free (tree, heap, voprnds);
2421
2422 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2423 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2424 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a
IR
2425 to replicate the vectors. */
2426 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
2427 {
b5aeb3bb
IR
2428 tree neutral_vec = NULL;
2429
2430 if (neutral_op)
2431 {
2432 if (!neutral_vec)
b9acc9f1 2433 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb
IR
2434
2435 VEC_quick_push (tree, *vec_oprnds, neutral_vec);
2436 }
2437 else
2438 {
2439 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
2440 VEC_quick_push (tree, *vec_oprnds, vop);
2441 }
ebfd146a
IR
2442 }
2443}
2444
2445
2446/* Get vectorized definitions from SLP_NODE that contains corresponding
2447 vectorized def-stmts. */
2448
2449static void
2450vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
2451{
2452 tree vec_oprnd;
2453 gimple vec_def_stmt;
2454 unsigned int i;
2455
2456 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
2457
ac47786e 2458 FOR_EACH_VEC_ELT (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2459 {
2460 gcc_assert (vec_def_stmt);
2461 vec_oprnd = gimple_get_lhs (vec_def_stmt);
2462 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
2463 }
2464}
2465
2466
b8698a0f
L
2467/* Get vectorized definitions for SLP_NODE.
2468 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2469 call vect_get_constant_vectors() to create vector stmts.
2470 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2471 must be stored in the corresponding child of SLP_NODE, and we call
2472 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2473
ebfd146a 2474void
d092494c
IR
2475vect_get_slp_defs (VEC (tree, heap) *ops, slp_tree slp_node,
2476 VEC (slp_void_p, heap) **vec_oprnds, int reduc_index)
ebfd146a 2477{
d092494c
IR
2478 gimple first_stmt, first_def;
2479 int number_of_vects = 0, i;
2480 unsigned int child_index = 0;
b8698a0f 2481 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c
IR
2482 slp_tree child = NULL;
2483 VEC (tree, heap) *vec_defs;
2484 tree oprnd, def_lhs;
2485 bool vectorized_defs;
ebfd146a
IR
2486
2487 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
d092494c 2488 FOR_EACH_VEC_ELT (tree, ops, i, oprnd)
ebfd146a 2489 {
d092494c
IR
2490 /* For each operand we check if it has vectorized definitions in a child
2491 node or we need to create them (for invariants and constants). We
2492 check if the LHS of the first stmt of the next child matches OPRND.
2493 If it does, we found the correct child. Otherwise, we call
2494 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2495 to check this child node for the next operand. */
2496 vectorized_defs = false;
2497 if (VEC_length (slp_void_p, SLP_TREE_CHILDREN (slp_node)) > child_index)
ebfd146a 2498 {
d092494c
IR
2499 child = (slp_tree) VEC_index (slp_void_p,
2500 SLP_TREE_CHILDREN (slp_node),
2501 child_index);
2502 first_def = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (child), 0);
2503
2504 /* In the end of a pattern sequence we have a use of the original stmt,
2505 so we need to compare OPRND with the original def. */
2506 if (is_pattern_stmt_p (vinfo_for_stmt (first_def))
2507 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first_stmt))
2508 && !is_pattern_stmt_p (vinfo_for_stmt (first_stmt)))
2509 first_def = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2510
2511 if (is_gimple_call (first_def))
2512 def_lhs = gimple_call_lhs (first_def);
2513 else
2514 def_lhs = gimple_assign_lhs (first_def);
ebfd146a 2515
d092494c
IR
2516 if (operand_equal_p (oprnd, def_lhs, 0))
2517 {
2518 /* The number of vector defs is determined by the number of
2519 vector statements in the node from which we get those
2520 statements. */
2521 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2522 vectorized_defs = true;
2523 child_index++;
2524 }
2525 }
ebfd146a 2526
d092494c
IR
2527 if (!vectorized_defs)
2528 {
2529 if (i == 0)
2530 {
2531 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2532 /* Number of vector stmts was calculated according to LHS in
2533 vect_schedule_slp_instance (), fix it by replacing LHS with
2534 RHS, if necessary. See vect_get_smallest_scalar_type () for
2535 details. */
2536 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2537 &rhs_size_unit);
2538 if (rhs_size_unit != lhs_size_unit)
2539 {
2540 number_of_vects *= rhs_size_unit;
2541 number_of_vects /= lhs_size_unit;
2542 }
2543 }
2544 }
b5aeb3bb 2545
d092494c
IR
2546 /* Allocate memory for vectorized defs. */
2547 vec_defs = VEC_alloc (tree, heap, number_of_vects);
ebfd146a 2548
d092494c
IR
2549 /* For reduction defs we call vect_get_constant_vectors (), since we are
2550 looking for initial loop invariant values. */
2551 if (vectorized_defs && reduc_index == -1)
2552 /* The defs are already vectorized. */
2553 vect_get_slp_vect_defs (child, &vec_defs);
2554 else
2555 /* Build vectors from scalar defs. */
2556 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
2557 number_of_vects, reduc_index);
ebfd146a 2558
d092494c 2559 VEC_quick_push (slp_void_p, *vec_oprnds, (slp_void_p) vec_defs);
ebfd146a 2560
d092494c
IR
2561 /* For reductions, we only need initial values. */
2562 if (reduc_index != -1)
2563 return;
2564 }
ebfd146a
IR
2565}
2566
a70d6342 2567
b8698a0f 2568/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2569 building a vector of type MASK_TYPE from it) and two input vectors placed in
2570 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2571 shifting by STRIDE elements of DR_CHAIN for every copy.
2572 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2573 copies).
ebfd146a
IR
2574 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2575 the created stmts must be inserted. */
2576
2577static inline void
b8698a0f 2578vect_create_mask_and_perm (gimple stmt, gimple next_scalar_stmt,
faf63e39 2579 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 2580 gimple_stmt_iterator *gsi, slp_tree node,
2635892a 2581 tree vectype, VEC(tree,heap) *dr_chain,
ebfd146a
IR
2582 int ncopies, int vect_stmts_counter)
2583{
faf63e39 2584 tree perm_dest;
ebfd146a
IR
2585 gimple perm_stmt = NULL;
2586 stmt_vec_info next_stmt_info;
0f900dfa 2587 int i, stride;
ebfd146a 2588 tree first_vec, second_vec, data_ref;
ebfd146a 2589
ebfd146a 2590 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 2591
b8698a0f 2592 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 2593 stmts later. */
b8698a0f 2594 for (i = VEC_length (gimple, SLP_TREE_VEC_STMTS (node));
ebfd146a
IR
2595 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
2596 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (node), NULL);
2597
2598 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
2599 for (i = 0; i < ncopies; i++)
2600 {
2601 first_vec = VEC_index (tree, dr_chain, first_vec_indx);
2602 second_vec = VEC_index (tree, dr_chain, second_vec_indx);
2603
ebfd146a 2604 /* Generate the permute statement. */
2635892a
RH
2605 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
2606 first_vec, second_vec, mask);
ebfd146a 2607 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 2608 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 2609 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 2610
b8698a0f
L
2611 /* Store the vector statement in NODE. */
2612 VEC_replace (gimple, SLP_TREE_VEC_STMTS (node),
ebfd146a
IR
2613 stride * i + vect_stmts_counter, perm_stmt);
2614
2615 first_vec_indx += stride;
2616 second_vec_indx += stride;
2617 }
2618
2619 /* Mark the scalar stmt as vectorized. */
2620 next_stmt_info = vinfo_for_stmt (next_scalar_stmt);
2621 STMT_VINFO_VEC_STMT (next_stmt_info) = perm_stmt;
2622}
2623
2624
b8698a0f 2625/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 2626 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 2627 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
2628 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
2629 the next vector, i.e., the current first vector is not needed. */
b8698a0f 2630
ebfd146a 2631static bool
b8698a0f 2632vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 2633 int mask_nunits, bool only_one_vec, int index,
22e4dee7 2634 unsigned char *mask, int *current_mask_element,
694a4f61
IR
2635 bool *need_next_vector, int *number_of_mask_fixes,
2636 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
2637{
2638 int i;
ebfd146a
IR
2639
2640 /* Convert to target specific representation. */
2641 *current_mask_element = first_mask_element + m;
2642 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 2643 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a
IR
2644
2645 if (*current_mask_element < mask_nunits)
694a4f61 2646 *needs_first_vector = true;
ebfd146a
IR
2647
2648 /* We have only one input vector to permute but the mask accesses values in
2649 the next vector as well. */
2650 if (only_one_vec && *current_mask_element >= mask_nunits)
2651 {
2652 if (vect_print_dump_info (REPORT_DETAILS))
2653 {
2654 fprintf (vect_dump, "permutation requires at least two vectors ");
2655 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2656 }
2657
2658 return false;
2659 }
2660
2661 /* The mask requires the next vector. */
2662 if (*current_mask_element >= mask_nunits * 2)
2663 {
694a4f61 2664 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
2665 {
2666 /* We either need the first vector too or have already moved to the
b8698a0f 2667 next vector. In both cases, this permutation needs three
ebfd146a
IR
2668 vectors. */
2669 if (vect_print_dump_info (REPORT_DETAILS))
2670 {
2671 fprintf (vect_dump, "permutation requires at "
2672 "least three vectors ");
2673 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2674 }
2675
2676 return false;
2677 }
2678
2679 /* We move to the next vector, dropping the first one and working with
2680 the second and the third - we need to adjust the values of the mask
2681 accordingly. */
694a4f61 2682 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
2683
2684 for (i = 0; i < index; i++)
694a4f61 2685 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 2686
694a4f61
IR
2687 (*number_of_mask_fixes)++;
2688 *mask_fixed = true;
ebfd146a
IR
2689 }
2690
694a4f61 2691 *need_next_vector = *mask_fixed;
ebfd146a
IR
2692
2693 /* This was the last element of this mask. Start a new one. */
2694 if (index == mask_nunits - 1)
2695 {
694a4f61
IR
2696 *number_of_mask_fixes = 1;
2697 *mask_fixed = false;
2698 *needs_first_vector = false;
ebfd146a
IR
2699 }
2700
2701 return true;
2702}
2703
2704
2705/* Generate vector permute statements from a list of loads in DR_CHAIN.
2706 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
2707 permute statements for SLP_NODE_INSTANCE. */
2708bool
2709vect_transform_slp_perm_load (gimple stmt, VEC (tree, heap) *dr_chain,
2710 gimple_stmt_iterator *gsi, int vf,
2711 slp_instance slp_node_instance, bool analyze_only)
2712{
2713 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2714 tree mask_element_type = NULL_TREE, mask_type;
2635892a 2715 int i, j, k, nunits, vec_index = 0, scalar_index;
ebfd146a 2716 slp_tree node;
2635892a 2717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2718 gimple next_scalar_stmt;
2719 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
2720 int first_mask_element;
22e4dee7
RH
2721 int index, unroll_factor, current_mask_element, ncopies;
2722 unsigned char *mask;
ebfd146a
IR
2723 bool only_one_vec = false, need_next_vector = false;
2724 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
2725 int number_of_mask_fixes = 1;
2726 bool mask_fixed = false;
2727 bool needs_first_vector = false;
22e4dee7 2728 enum machine_mode mode;
ebfd146a 2729
22e4dee7
RH
2730 mode = TYPE_MODE (vectype);
2731
2732 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a
IR
2733 {
2734 if (vect_print_dump_info (REPORT_DETAILS))
2735 {
2635892a 2736 fprintf (vect_dump, "no vect permute for ");
ebfd146a
IR
2737 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2738 }
2635892a 2739 return false;
ebfd146a
IR
2740 }
2741
2635892a
RH
2742 /* The generic VEC_PERM_EXPR code always uses an integral type of the
2743 same size as the vector element being permuted. */
96f9265a
RG
2744 mask_element_type = lang_hooks.types.type_for_mode
2745 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 2746 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 2747 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 2748 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
2749 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
2750
2751 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
2752 unrolling factor. */
b8698a0f 2753 orig_vec_stmts_num = group_size *
ebfd146a
IR
2754 SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance) / nunits;
2755 if (orig_vec_stmts_num == 1)
2756 only_one_vec = true;
2757
b8698a0f 2758 /* Number of copies is determined by the final vectorization factor
ebfd146a 2759 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 2760 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 2761
b8698a0f
L
2762 /* Generate permutation masks for every NODE. Number of masks for each NODE
2763 is equal to GROUP_SIZE.
2764 E.g., we have a group of three nodes with three loads from the same
2765 location in each node, and the vector size is 4. I.e., we have a
2766 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
2767 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
2768 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
2769 ...
2770
2635892a 2771 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 2772 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
2773 operation, and the mask element values can't be outside that range.
2774 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 2775 For the first two permutations we need the first and the second input
ebfd146a 2776 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 2777 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
2778 {c2,a3,b3,c3}. */
2779
ac47786e 2780 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (slp_node_instance), i, node)
ebfd146a
IR
2781 {
2782 scalar_index = 0;
2783 index = 0;
2784 vect_stmts_counter = 0;
2785 vec_index = 0;
2786 first_vec_index = vec_index++;
2787 if (only_one_vec)
2788 second_vec_index = first_vec_index;
2789 else
2790 second_vec_index = vec_index++;
2791
2792 for (j = 0; j < unroll_factor; j++)
2793 {
2794 for (k = 0; k < group_size; k++)
2795 {
2635892a
RH
2796 first_mask_element = i + j * group_size;
2797 if (!vect_get_mask_element (stmt, first_mask_element, 0,
2798 nunits, only_one_vec, index,
2799 mask, &current_mask_element,
2800 &need_next_vector,
2801 &number_of_mask_fixes, &mask_fixed,
2802 &needs_first_vector))
2803 return false;
2804 mask[index++] = current_mask_element;
ebfd146a 2805
2635892a 2806 if (index == nunits)
ebfd146a 2807 {
d2a12ae7
RG
2808 tree mask_vec, *mask_elts;
2809 int l;
faf63e39 2810
22e4dee7
RH
2811 if (!can_vec_perm_p (mode, false, mask))
2812 {
2813 if (vect_print_dump_info (REPORT_DETAILS))
2814 {
2815 fprintf (vect_dump, "unsupported vect permute { ");
2816 for (i = 0; i < nunits; ++i)
2817 fprintf (vect_dump, "%d ", mask[i]);
2818 fprintf (vect_dump, "}\n");
2819 }
2820 return false;
2821 }
2822
d2a12ae7
RG
2823 mask_elts = XALLOCAVEC (tree, nunits);
2824 for (l = 0; l < nunits; ++l)
2825 mask_elts[l] = build_int_cst (mask_element_type, mask[l]);
2826 mask_vec = build_vector (mask_type, mask_elts);
faf63e39
RH
2827 index = 0;
2828
ebfd146a
IR
2829 if (!analyze_only)
2830 {
2831 if (need_next_vector)
2832 {
2833 first_vec_index = second_vec_index;
2834 second_vec_index = vec_index;
2835 }
2836
2837 next_scalar_stmt = VEC_index (gimple,
2838 SLP_TREE_SCALAR_STMTS (node), scalar_index++);
2839
2840 vect_create_mask_and_perm (stmt, next_scalar_stmt,
faf63e39 2841 mask_vec, first_vec_index, second_vec_index,
2635892a 2842 gsi, node, vectype, dr_chain,
faf63e39 2843 ncopies, vect_stmts_counter++);
ebfd146a 2844 }
b8698a0f
L
2845 }
2846 }
2847 }
2848 }
ebfd146a 2849
ebfd146a
IR
2850 return true;
2851}
2852
2853
2854
2855/* Vectorize SLP instance tree in postorder. */
2856
2857static bool
2858vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 2859 unsigned int vectorization_factor)
ebfd146a
IR
2860{
2861 gimple stmt;
0d0293ac 2862 bool grouped_store, is_store;
ebfd146a
IR
2863 gimple_stmt_iterator si;
2864 stmt_vec_info stmt_info;
2865 unsigned int vec_stmts_size, nunits, group_size;
2866 tree vectype;
2867 int i;
2868 slp_tree loads_node;
d092494c 2869 slp_void_p child;
ebfd146a
IR
2870
2871 if (!node)
2872 return false;
2873
d092494c
IR
2874 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2875 vect_schedule_slp_instance ((slp_tree) child, instance,
2876 vectorization_factor);
b8698a0f 2877
ebfd146a
IR
2878 stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
2879 stmt_info = vinfo_for_stmt (stmt);
2880
2881 /* VECTYPE is the type of the destination. */
b690cc0f 2882 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
2883 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
2884 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
2885
2886 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 2887 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
2888 elements in one vector iteration is the number of scalar elements in
2889 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
2890 size. */
2891 vec_stmts_size = (vectorization_factor * group_size) / nunits;
2892
2893 /* In case of load permutation we have to allocate vectorized statements for
2894 all the nodes that participate in that permutation. */
2895 if (SLP_INSTANCE_LOAD_PERMUTATION (instance))
2896 {
ac47786e 2897 FOR_EACH_VEC_ELT (slp_tree, SLP_INSTANCE_LOADS (instance), i, loads_node)
ebfd146a
IR
2898 {
2899 if (!SLP_TREE_VEC_STMTS (loads_node))
2900 {
2901 SLP_TREE_VEC_STMTS (loads_node) = VEC_alloc (gimple, heap,
2902 vec_stmts_size);
2903 SLP_TREE_NUMBER_OF_VEC_STMTS (loads_node) = vec_stmts_size;
2904 }
2905 }
2906 }
2907
2908 if (!SLP_TREE_VEC_STMTS (node))
2909 {
2910 SLP_TREE_VEC_STMTS (node) = VEC_alloc (gimple, heap, vec_stmts_size);
2911 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
2912 }
2913
2914 if (vect_print_dump_info (REPORT_DETAILS))
2915 {
2916 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
2917 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
b8698a0f 2918 }
ebfd146a
IR
2919
2920 /* Loads should be inserted before the first load. */
2921 if (SLP_INSTANCE_FIRST_LOAD_STMT (instance)
0d0293ac 2922 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
6aa904c4
IR
2923 && !REFERENCE_CLASS_P (gimple_get_lhs (stmt))
2924 && SLP_INSTANCE_LOAD_PERMUTATION (instance))
ebfd146a 2925 si = gsi_for_stmt (SLP_INSTANCE_FIRST_LOAD_STMT (instance));
9d5e7640 2926 else if (is_pattern_stmt_p (stmt_info))
6aa904c4 2927 si = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
ebfd146a
IR
2928 else
2929 si = gsi_for_stmt (stmt);
b8698a0f 2930
e4a707c4 2931 /* Stores should be inserted just before the last store. */
0d0293ac 2932 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e4a707c4
IR
2933 && REFERENCE_CLASS_P (gimple_get_lhs (stmt)))
2934 {
2935 gimple last_store = vect_find_last_store_in_slp_instance (instance);
a024e70e
IR
2936 if (is_pattern_stmt_p (vinfo_for_stmt (last_store)))
2937 last_store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_store));
e4a707c4
IR
2938 si = gsi_for_stmt (last_store);
2939 }
2940
b010117a
IR
2941 /* Mark the first element of the reduction chain as reduction to properly
2942 transform the node. In the analysis phase only the last element of the
2943 chain is marked as reduction. */
0d0293ac 2944 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
2945 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
2946 {
2947 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2948 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2949 }
2950
0d0293ac 2951 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 2952 return is_store;
ebfd146a
IR
2953}
2954
dd34c087
JJ
2955/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
2956 For loop vectorization this is done in vectorizable_call, but for SLP
2957 it needs to be deferred until end of vect_schedule_slp, because multiple
2958 SLP instances may refer to the same scalar stmt. */
2959
2960static void
2961vect_remove_slp_scalar_calls (slp_tree node)
2962{
2963 gimple stmt, new_stmt;
2964 gimple_stmt_iterator gsi;
2965 int i;
2966 slp_void_p child;
2967 tree lhs;
2968 stmt_vec_info stmt_info;
2969
2970 if (!node)
2971 return;
2972
2973 FOR_EACH_VEC_ELT (slp_void_p, SLP_TREE_CHILDREN (node), i, child)
2974 vect_remove_slp_scalar_calls ((slp_tree) child);
2975
2976 FOR_EACH_VEC_ELT (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt)
2977 {
2978 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
2979 continue;
2980 stmt_info = vinfo_for_stmt (stmt);
2981 if (stmt_info == NULL
2982 || is_pattern_stmt_p (stmt_info)
2983 || !PURE_SLP_STMT (stmt_info))
2984 continue;
2985 lhs = gimple_call_lhs (stmt);
2986 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
2987 set_vinfo_for_stmt (new_stmt, stmt_info);
2988 set_vinfo_for_stmt (stmt, NULL);
2989 STMT_VINFO_STMT (stmt_info) = new_stmt;
2990 gsi = gsi_for_stmt (stmt);
2991 gsi_replace (&gsi, new_stmt, false);
2992 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
2993 }
2994}
ebfd146a 2995
ff802fa1
IR
2996/* Generate vector code for all SLP instances in the loop/basic block. */
2997
ebfd146a 2998bool
a70d6342 2999vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 3000{
a70d6342 3001 VEC (slp_instance, heap) *slp_instances;
ebfd146a 3002 slp_instance instance;
a70d6342 3003 unsigned int i, vf;
ebfd146a
IR
3004 bool is_store = false;
3005
a70d6342
IR
3006 if (loop_vinfo)
3007 {
3008 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3009 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 3010 }
a70d6342
IR
3011 else
3012 {
3013 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3014 vf = 1;
b8698a0f 3015 }
a70d6342 3016
ac47786e 3017 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
ebfd146a
IR
3018 {
3019 /* Schedule the tree of INSTANCE. */
3020 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3021 instance, vf);
8644a673
IR
3022 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)
3023 || vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
ebfd146a
IR
3024 fprintf (vect_dump, "vectorizing stmts using SLP.");
3025 }
3026
ac47786e 3027 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
b5aeb3bb
IR
3028 {
3029 slp_tree root = SLP_INSTANCE_TREE (instance);
3030 gimple store;
3031 unsigned int j;
3032 gimple_stmt_iterator gsi;
3033
dd34c087
JJ
3034 vect_remove_slp_scalar_calls (root);
3035
b5aeb3bb
IR
3036 for (j = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (root), j, store)
3037 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3038 {
3039 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3040 break;
3041
a024e70e
IR
3042 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3043 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3044 /* Free the attached stmt_vec_info and remove the stmt. */
3045 gsi = gsi_for_stmt (store);
3d3f2249 3046 unlink_stmt_vdef (store);
b5aeb3bb 3047 gsi_remove (&gsi, true);
3d3f2249 3048 release_defs (store);
b5aeb3bb
IR
3049 free_stmt_vec_info (store);
3050 }
3051 }
3052
ebfd146a
IR
3053 return is_store;
3054}
a70d6342
IR
3055
3056
3057/* Vectorize the basic block. */
3058
3059void
3060vect_slp_transform_bb (basic_block bb)
3061{
3062 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3063 gimple_stmt_iterator si;
3064
3065 gcc_assert (bb_vinfo);
3066
3067 if (vect_print_dump_info (REPORT_DETAILS))
3068 fprintf (vect_dump, "SLPing BB\n");
3069
3070 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3071 {
3072 gimple stmt = gsi_stmt (si);
3073 stmt_vec_info stmt_info;
3074
3075 if (vect_print_dump_info (REPORT_DETAILS))
3076 {
3077 fprintf (vect_dump, "------>SLPing statement: ");
3078 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3079 }
3080
3081 stmt_info = vinfo_for_stmt (stmt);
3082 gcc_assert (stmt_info);
3083
3084 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3085 if (STMT_SLP_TYPE (stmt_info))
3086 {
3087 vect_schedule_slp (NULL, bb_vinfo);
3088 break;
3089 }
3090 }
3091
3092 mark_sym_for_renaming (gimple_vop (cfun));
3093 /* The memory tags and pointers in vectorized statements need to
3094 have their SSA forms updated. FIXME, why can't this be delayed
3095 until all the loops have been transformed? */
3096 update_ssa (TODO_update_ssa);
3097
3098 if (vect_print_dump_info (REPORT_DETAILS))
e9dbe7bb 3099 fprintf (vect_dump, "BASIC BLOCK VECTORIZED\n");
a70d6342 3100
12aaf609
IR
3101 destroy_bb_vec_info (bb_vinfo);
3102}
a70d6342 3103