1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "gimple-pretty-print.h"
39 #include "fold-const.h"
40 #include "stor-layout.h"
41 #include "internal-fn.h"
42 #include "gimple-iterator.h"
45 #include "tree-vectorizer.h"
46 #include "langhooks.h"
47 #include "gimple-walk.h"
49 /* Extract the location of the basic block in the source code.
50 Return the basic block location if succeed and NULL if not. */
53 find_bb_location (basic_block bb
)
56 gimple_stmt_iterator si
;
59 return UNKNOWN_LOCATION
;
61 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
64 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
65 return gimple_location (stmt
);
68 return UNKNOWN_LOCATION
;
72 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
75 vect_free_slp_tree (slp_tree node
)
83 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
84 vect_free_slp_tree (child
);
86 SLP_TREE_CHILDREN (node
).release ();
87 SLP_TREE_SCALAR_STMTS (node
).release ();
88 SLP_TREE_VEC_STMTS (node
).release ();
89 SLP_TREE_LOAD_PERMUTATION (node
).release ();
95 /* Free the memory allocated for the SLP instance. */
98 vect_free_slp_instance (slp_instance instance
)
100 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
101 SLP_INSTANCE_LOADS (instance
).release ();
106 /* Create an SLP node for SCALAR_STMTS. */
109 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
112 gimple
*stmt
= scalar_stmts
[0];
115 if (is_gimple_call (stmt
))
116 nops
= gimple_call_num_args (stmt
);
117 else if (is_gimple_assign (stmt
))
119 nops
= gimple_num_ops (stmt
) - 1;
120 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
126 node
= XNEW (struct _slp_tree
);
127 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
128 SLP_TREE_VEC_STMTS (node
).create (0);
129 SLP_TREE_CHILDREN (node
).create (nops
);
130 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
131 SLP_TREE_TWO_OPERATORS (node
) = false;
137 /* This structure is used in creation of an SLP tree. Each instance
138 corresponds to the same operand in a group of scalar stmts in an SLP
140 typedef struct _slp_oprnd_info
142 /* Def-stmts for the operands. */
143 vec
<gimple
*> def_stmts
;
144 /* Information about the first statement, its vector def-type, type, the
145 operand itself in case it's constant, and an indication if it's a pattern
147 enum vect_def_type first_dt
;
154 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
156 static vec
<slp_oprnd_info
>
157 vect_create_oprnd_info (int nops
, int group_size
)
160 slp_oprnd_info oprnd_info
;
161 vec
<slp_oprnd_info
> oprnds_info
;
163 oprnds_info
.create (nops
);
164 for (i
= 0; i
< nops
; i
++)
166 oprnd_info
= XNEW (struct _slp_oprnd_info
);
167 oprnd_info
->def_stmts
.create (group_size
);
168 oprnd_info
->first_dt
= vect_uninitialized_def
;
169 oprnd_info
->first_op_type
= NULL_TREE
;
170 oprnd_info
->first_pattern
= false;
171 oprnd_info
->second_pattern
= false;
172 oprnds_info
.quick_push (oprnd_info
);
179 /* Free operands info. */
182 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
185 slp_oprnd_info oprnd_info
;
187 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
189 oprnd_info
->def_stmts
.release ();
190 XDELETE (oprnd_info
);
193 oprnds_info
.release ();
197 /* Find the place of the data-ref in STMT in the interleaving chain that starts
198 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
201 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
203 gimple
*next_stmt
= first_stmt
;
206 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
211 if (next_stmt
== stmt
)
213 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
215 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
223 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
224 they are of a valid type and that they match the defs of the first stmt of
225 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
226 return -1, if the error could be corrected by swapping operands of the
227 operation return 1, if everything is ok return 0. */
230 vect_get_and_check_slp_defs (vec_info
*vinfo
,
231 gimple
*stmt
, unsigned stmt_num
,
232 vec
<slp_oprnd_info
> *oprnds_info
)
235 unsigned int i
, number_of_oprnds
;
237 enum vect_def_type dt
= vect_uninitialized_def
;
238 struct loop
*loop
= NULL
;
239 bool pattern
= false;
240 slp_oprnd_info oprnd_info
;
241 int first_op_idx
= 1;
242 bool commutative
= false;
243 bool first_op_cond
= false;
244 bool first
= stmt_num
== 0;
245 bool second
= stmt_num
== 1;
247 if (is_a
<loop_vec_info
> (vinfo
))
248 loop
= LOOP_VINFO_LOOP (as_a
<loop_vec_info
> (vinfo
));
250 if (is_gimple_call (stmt
))
252 number_of_oprnds
= gimple_call_num_args (stmt
);
255 else if (is_gimple_assign (stmt
))
257 enum tree_code code
= gimple_assign_rhs_code (stmt
);
258 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
259 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
261 first_op_cond
= true;
266 commutative
= commutative_tree_code (code
);
271 bool swapped
= false;
272 for (i
= 0; i
< number_of_oprnds
; i
++)
277 if (i
== 0 || i
== 1)
278 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
),
281 oprnd
= gimple_op (stmt
, first_op_idx
+ i
- 1);
284 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
286 oprnd_info
= (*oprnds_info
)[i
];
288 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
290 if (dump_enabled_p ())
292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
293 "Build SLP failed: can't analyze def for ");
294 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
295 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
301 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
302 from the pattern. Check that all the stmts of the node are in the
304 if (def_stmt
&& gimple_bb (def_stmt
)
305 && ((is_a
<loop_vec_info
> (vinfo
)
306 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
307 || (is_a
<bb_vec_info
> (vinfo
)
308 && gimple_bb (def_stmt
) == as_a
<bb_vec_info
> (vinfo
)->bb
309 && gimple_code (def_stmt
) != GIMPLE_PHI
))
310 && vinfo_for_stmt (def_stmt
)
311 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
312 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
313 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
316 if (!first
&& !oprnd_info
->first_pattern
317 /* Allow different pattern state for the defs of the
318 first stmt in reduction chains. */
319 && (oprnd_info
->first_dt
!= vect_reduction_def
320 || (!second
&& !oprnd_info
->second_pattern
)))
330 if (dump_enabled_p ())
332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
333 "Build SLP failed: some of the stmts"
334 " are in a pattern, and others are not ");
335 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
336 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
342 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
343 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
345 if (dt
== vect_unknown_def_type
)
347 if (dump_enabled_p ())
348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
349 "Unsupported pattern.\n");
353 switch (gimple_code (def_stmt
))
360 if (dump_enabled_p ())
361 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
362 "unsupported defining stmt:\n");
368 oprnd_info
->second_pattern
= pattern
;
372 oprnd_info
->first_dt
= dt
;
373 oprnd_info
->first_pattern
= pattern
;
374 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
378 /* Not first stmt of the group, check that the def-stmt/s match
379 the def-stmt/s of the first stmt. Allow different definition
380 types for reduction chains: the first stmt must be a
381 vect_reduction_def (a phi node), and the rest
382 vect_internal_def. */
383 if (((oprnd_info
->first_dt
!= dt
384 && !(oprnd_info
->first_dt
== vect_reduction_def
385 && dt
== vect_internal_def
)
386 && !((oprnd_info
->first_dt
== vect_external_def
387 || oprnd_info
->first_dt
== vect_constant_def
)
388 && (dt
== vect_external_def
389 || dt
== vect_constant_def
)))
390 || !types_compatible_p (oprnd_info
->first_op_type
,
393 /* Try swapping operands if we got a mismatch. */
402 if (dump_enabled_p ())
403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
404 "Build SLP failed: different types\n");
410 /* Check the types of the definitions. */
413 case vect_constant_def
:
414 case vect_external_def
:
415 case vect_reduction_def
:
418 case vect_internal_def
:
419 oprnd_info
->def_stmts
.quick_push (def_stmt
);
423 /* FORNOW: Not supported. */
424 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
427 "Build SLP failed: illegal type of def ");
428 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
429 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
441 tree cond
= gimple_assign_rhs1 (stmt
);
442 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
443 &TREE_OPERAND (cond
, 1));
444 TREE_SET_CODE (cond
, swap_tree_comparison (TREE_CODE (cond
)));
447 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
448 gimple_assign_rhs2_ptr (stmt
));
455 /* Verify if the scalar stmts STMTS are isomorphic, require data
456 permutation or are of unsupported types of operation. Return
457 true if they are, otherwise return false and indicate in *MATCHES
458 which stmts are not isomorphic to the first one. If MATCHES[0]
459 is false then this indicates the comparison could not be
460 carried out or the stmts will never be vectorized by SLP. */
463 vect_build_slp_tree_1 (vec_info
*vinfo
,
464 vec
<gimple
*> stmts
, unsigned int group_size
,
465 unsigned nops
, unsigned int *max_nunits
,
466 unsigned int vectorization_factor
, bool *matches
,
470 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
471 enum tree_code first_stmt_code
= ERROR_MARK
;
472 enum tree_code alt_stmt_code
= ERROR_MARK
;
473 enum tree_code rhs_code
= ERROR_MARK
;
474 enum tree_code first_cond_code
= ERROR_MARK
;
476 bool need_same_oprnds
= false;
477 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
480 machine_mode optab_op2_mode
;
481 machine_mode vec_mode
;
483 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
486 /* For every stmt in NODE find its def stmt/s. */
487 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
491 if (dump_enabled_p ())
493 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
494 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
495 dump_printf (MSG_NOTE
, "\n");
498 /* Fail to vectorize statements marked as unvectorizable. */
499 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
501 if (dump_enabled_p ())
503 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
504 "Build SLP failed: unvectorizable statement ");
505 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
506 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
508 /* Fatal mismatch. */
513 lhs
= gimple_get_lhs (stmt
);
514 if (lhs
== NULL_TREE
)
516 if (dump_enabled_p ())
518 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
519 "Build SLP failed: not GIMPLE_ASSIGN nor "
521 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
522 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
524 /* Fatal mismatch. */
529 if (is_gimple_assign (stmt
)
530 && gimple_assign_rhs_code (stmt
) == COND_EXPR
531 && (cond
= gimple_assign_rhs1 (stmt
))
532 && !COMPARISON_CLASS_P (cond
))
534 if (dump_enabled_p ())
536 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
537 "Build SLP failed: condition is not "
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
540 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
542 /* Fatal mismatch. */
547 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
548 vectype
= get_vectype_for_scalar_type (scalar_type
);
551 if (dump_enabled_p ())
553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
554 "Build SLP failed: unsupported data-type ");
555 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
557 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
559 /* Fatal mismatch. */
564 /* If populating the vector type requires unrolling then fail
565 before adjusting *max_nunits for basic-block vectorization. */
566 if (is_a
<bb_vec_info
> (vinfo
)
567 && TYPE_VECTOR_SUBPARTS (vectype
) > group_size
)
569 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
570 "Build SLP failed: unrolling required "
571 "in basic block SLP\n");
572 /* Fatal mismatch. */
577 /* In case of multiple types we need to detect the smallest type. */
578 if (*max_nunits
< TYPE_VECTOR_SUBPARTS (vectype
))
580 *max_nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
581 if (is_a
<bb_vec_info
> (vinfo
))
582 vectorization_factor
= *max_nunits
;
585 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
587 rhs_code
= CALL_EXPR
;
588 if (gimple_call_internal_p (call_stmt
)
589 || gimple_call_tail_p (call_stmt
)
590 || gimple_call_noreturn_p (call_stmt
)
591 || !gimple_call_nothrow_p (call_stmt
)
592 || gimple_call_chain (call_stmt
))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
597 "Build SLP failed: unsupported call type ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
600 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
602 /* Fatal mismatch. */
608 rhs_code
= gimple_assign_rhs_code (stmt
);
610 /* Check the operation. */
613 first_stmt_code
= rhs_code
;
615 /* Shift arguments should be equal in all the packed stmts for a
616 vector shift with scalar shift operand. */
617 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
618 || rhs_code
== LROTATE_EXPR
619 || rhs_code
== RROTATE_EXPR
)
621 vec_mode
= TYPE_MODE (vectype
);
623 /* First see if we have a vector/vector shift. */
624 optab
= optab_for_tree_code (rhs_code
, vectype
,
628 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
630 /* No vector/vector shift, try for a vector/scalar shift. */
631 optab
= optab_for_tree_code (rhs_code
, vectype
,
636 if (dump_enabled_p ())
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
638 "Build SLP failed: no optab.\n");
639 /* Fatal mismatch. */
643 icode
= (int) optab_handler (optab
, vec_mode
);
644 if (icode
== CODE_FOR_nothing
)
646 if (dump_enabled_p ())
647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
649 "op not supported by target.\n");
650 /* Fatal mismatch. */
654 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
655 if (!VECTOR_MODE_P (optab_op2_mode
))
657 need_same_oprnds
= true;
658 first_op1
= gimple_assign_rhs2 (stmt
);
662 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
664 need_same_oprnds
= true;
665 first_op1
= gimple_assign_rhs2 (stmt
);
670 if (first_stmt_code
!= rhs_code
671 && alt_stmt_code
== ERROR_MARK
)
672 alt_stmt_code
= rhs_code
;
673 if (first_stmt_code
!= rhs_code
674 && (first_stmt_code
!= IMAGPART_EXPR
675 || rhs_code
!= REALPART_EXPR
)
676 && (first_stmt_code
!= REALPART_EXPR
677 || rhs_code
!= IMAGPART_EXPR
)
678 /* Handle mismatches in plus/minus by computing both
679 and merging the results. */
680 && !((first_stmt_code
== PLUS_EXPR
681 || first_stmt_code
== MINUS_EXPR
)
682 && (alt_stmt_code
== PLUS_EXPR
683 || alt_stmt_code
== MINUS_EXPR
)
684 && rhs_code
== alt_stmt_code
)
685 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
686 && (first_stmt_code
== ARRAY_REF
687 || first_stmt_code
== BIT_FIELD_REF
688 || first_stmt_code
== INDIRECT_REF
689 || first_stmt_code
== COMPONENT_REF
690 || first_stmt_code
== MEM_REF
)))
692 if (dump_enabled_p ())
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
695 "Build SLP failed: different operation "
697 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
700 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
708 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
710 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
713 "Build SLP failed: different shift "
715 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
716 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
722 if (rhs_code
== CALL_EXPR
)
724 gimple
*first_stmt
= stmts
[0];
725 if (gimple_call_num_args (stmt
) != nops
726 || !operand_equal_p (gimple_call_fn (first_stmt
),
727 gimple_call_fn (stmt
), 0)
728 || gimple_call_fntype (first_stmt
)
729 != gimple_call_fntype (stmt
))
731 if (dump_enabled_p ())
733 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
734 "Build SLP failed: different calls in ");
735 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
737 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
745 /* Grouped store or load. */
746 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
748 if (REFERENCE_CLASS_P (lhs
))
756 /* Check that the size of interleaved loads group is not
757 greater than the SLP group size. */
759 = vectorization_factor
/ TYPE_VECTOR_SUBPARTS (vectype
);
760 if (is_a
<loop_vec_info
> (vinfo
)
761 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
762 && ((GROUP_SIZE (vinfo_for_stmt (stmt
))
763 - GROUP_GAP (vinfo_for_stmt (stmt
)))
764 > ncopies
* group_size
))
766 if (dump_enabled_p ())
768 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
769 "Build SLP failed: the number "
770 "of interleaved loads is greater than "
771 "the SLP group size ");
772 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
774 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
776 /* Fatal mismatch. */
781 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
784 /* Check that there are no loads from different interleaving
785 chains in the same node. */
786 if (prev_first_load
!= first_load
)
788 if (dump_enabled_p ())
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
792 "Build SLP failed: different "
793 "interleaving chains in one node ");
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
796 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
803 prev_first_load
= first_load
;
805 } /* Grouped access. */
808 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
810 /* Not grouped load. */
811 if (dump_enabled_p ())
813 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
814 "Build SLP failed: not grouped load ");
815 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
816 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
819 /* FORNOW: Not grouped loads are not supported. */
820 /* Fatal mismatch. */
825 /* Not memory operation. */
826 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
827 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
828 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
829 && rhs_code
!= CALL_EXPR
)
831 if (dump_enabled_p ())
833 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
834 "Build SLP failed: operation");
835 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
836 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
837 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
839 /* Fatal mismatch. */
844 if (rhs_code
== COND_EXPR
)
846 tree cond_expr
= gimple_assign_rhs1 (stmt
);
849 first_cond_code
= TREE_CODE (cond_expr
);
850 else if (first_cond_code
!= TREE_CODE (cond_expr
))
852 if (dump_enabled_p ())
854 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
855 "Build SLP failed: different"
857 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
859 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
870 for (i
= 0; i
< group_size
; ++i
)
874 /* If we allowed a two-operation SLP node verify the target can cope
875 with the permute we are going to use. */
876 if (alt_stmt_code
!= ERROR_MARK
877 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
880 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype
));
881 for (i
= 0; i
< TYPE_VECTOR_SUBPARTS (vectype
); ++i
)
884 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
885 sel
[i
] += TYPE_VECTOR_SUBPARTS (vectype
);
887 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
889 for (i
= 0; i
< group_size
; ++i
)
890 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
893 if (dump_enabled_p ())
895 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
896 "Build SLP failed: different operation "
898 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
900 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
902 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
908 *two_operators
= true;
914 /* Recursively build an SLP tree starting from NODE.
915 Fail (and return a value not equal to zero) if def-stmts are not
916 isomorphic, require data permutation or are of unsupported types of
917 operation. Otherwise, return 0.
918 The value returned is the depth in the SLP tree where a mismatch
922 vect_build_slp_tree (vec_info
*vinfo
,
923 slp_tree
*node
, unsigned int group_size
,
924 unsigned int *max_nunits
,
925 vec
<slp_tree
> *loads
,
926 unsigned int vectorization_factor
,
927 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
928 unsigned max_tree_size
)
930 unsigned nops
, i
, this_tree_size
= 0;
935 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
936 if (is_gimple_call (stmt
))
937 nops
= gimple_call_num_args (stmt
);
938 else if (is_gimple_assign (stmt
))
940 nops
= gimple_num_ops (stmt
) - 1;
941 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
947 bool two_operators
= false;
948 if (!vect_build_slp_tree_1 (vinfo
,
949 SLP_TREE_SCALAR_STMTS (*node
), group_size
, nops
,
950 max_nunits
, vectorization_factor
, matches
,
953 SLP_TREE_TWO_OPERATORS (*node
) = two_operators
;
955 /* If the SLP node is a load, terminate the recursion. */
956 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
957 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
959 loads
->safe_push (*node
);
963 /* Get at the operands, verifying they are compatible. */
964 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
965 slp_oprnd_info oprnd_info
;
966 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node
), i
, stmt
)
968 switch (vect_get_and_check_slp_defs (vinfo
, stmt
, i
, &oprnds_info
))
974 vect_free_oprnd_info (oprnds_info
);
981 for (i
= 0; i
< group_size
; ++i
)
984 vect_free_oprnd_info (oprnds_info
);
988 stmt
= SLP_TREE_SCALAR_STMTS (*node
)[0];
990 /* Create SLP_TREE nodes for the definition node/s. */
991 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
994 unsigned old_nloads
= loads
->length ();
995 unsigned old_max_nunits
= *max_nunits
;
997 if (oprnd_info
->first_dt
!= vect_internal_def
)
1000 if (++this_tree_size
> max_tree_size
)
1002 vect_free_oprnd_info (oprnds_info
);
1006 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1009 vect_free_oprnd_info (oprnds_info
);
1013 if (vect_build_slp_tree (vinfo
, &child
,
1014 group_size
, max_nunits
, loads
,
1015 vectorization_factor
, matches
,
1016 npermutes
, &this_tree_size
, max_tree_size
))
1018 /* If we have all children of child built up from scalars then just
1019 throw that away and build it up this node from scalars. */
1020 if (!SLP_TREE_CHILDREN (child
).is_empty ())
1023 slp_tree grandchild
;
1025 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1026 if (grandchild
!= NULL
)
1031 *max_nunits
= old_max_nunits
;
1032 loads
->truncate (old_nloads
);
1033 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1034 vect_free_slp_tree (grandchild
);
1035 SLP_TREE_CHILDREN (child
).truncate (0);
1037 dump_printf_loc (MSG_NOTE
, vect_location
,
1038 "Building parent vector operands from "
1039 "scalars instead\n");
1040 oprnd_info
->def_stmts
= vNULL
;
1041 vect_free_slp_tree (child
);
1042 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1047 oprnd_info
->def_stmts
= vNULL
;
1048 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1052 /* If the SLP build failed fatally and we analyze a basic-block
1053 simply treat nodes we fail to build as externally defined
1054 (and thus build vectors from the scalar defs).
1055 The cost model will reject outright expensive cases.
1056 ??? This doesn't treat cases where permutation ultimatively
1057 fails (or we don't try permutation below). Ideally we'd
1058 even compute a permutation that will end up with the maximum
1060 if (is_a
<bb_vec_info
> (vinfo
)
1062 /* ??? Rejecting patterns this way doesn't work. We'd have to
1063 do extra work to cancel the pattern so the uses see the
1065 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1068 slp_tree grandchild
;
1071 *max_nunits
= old_max_nunits
;
1072 loads
->truncate (old_nloads
);
1073 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1074 vect_free_slp_tree (grandchild
);
1075 SLP_TREE_CHILDREN (child
).truncate (0);
1077 dump_printf_loc (MSG_NOTE
, vect_location
,
1078 "Building vector operands from scalars\n");
1079 oprnd_info
->def_stmts
= vNULL
;
1080 vect_free_slp_tree (child
);
1081 SLP_TREE_CHILDREN (*node
).quick_push (NULL
);
1085 /* If the SLP build for operand zero failed and operand zero
1086 and one can be commutated try that for the scalar stmts
1087 that failed the match. */
1089 /* A first scalar stmt mismatch signals a fatal mismatch. */
1091 /* ??? For COND_EXPRs we can swap the comparison operands
1092 as well as the arms under some constraints. */
1094 && oprnds_info
[1]->first_dt
== vect_internal_def
1095 && is_gimple_assign (stmt
)
1096 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1097 && !SLP_TREE_TWO_OPERATORS (*node
)
1098 /* Do so only if the number of not successful permutes was nor more
1099 than a cut-ff as re-trying the recursive match on
1100 possibly each level of the tree would expose exponential
1105 slp_tree grandchild
;
1108 *max_nunits
= old_max_nunits
;
1109 loads
->truncate (old_nloads
);
1110 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1111 vect_free_slp_tree (grandchild
);
1112 SLP_TREE_CHILDREN (child
).truncate (0);
1114 /* Swap mismatched definition stmts. */
1115 dump_printf_loc (MSG_NOTE
, vect_location
,
1116 "Re-trying with swapped operands of stmts ");
1117 for (j
= 0; j
< group_size
; ++j
)
1120 std::swap (oprnds_info
[0]->def_stmts
[j
],
1121 oprnds_info
[1]->def_stmts
[j
]);
1122 dump_printf (MSG_NOTE
, "%d ", j
);
1124 dump_printf (MSG_NOTE
, "\n");
1125 /* And try again with scratch 'matches' ... */
1126 bool *tem
= XALLOCAVEC (bool, group_size
);
1127 if (vect_build_slp_tree (vinfo
, &child
,
1128 group_size
, max_nunits
, loads
,
1129 vectorization_factor
,
1130 tem
, npermutes
, &this_tree_size
,
1133 /* ... so if successful we can apply the operand swapping
1134 to the GIMPLE IL. This is necessary because for example
1135 vect_get_slp_defs uses operand indexes and thus expects
1136 canonical operand order. */
1137 for (j
= 0; j
< group_size
; ++j
)
1140 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (*node
)[j
];
1141 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1142 gimple_assign_rhs2_ptr (stmt
));
1144 oprnd_info
->def_stmts
= vNULL
;
1145 SLP_TREE_CHILDREN (*node
).quick_push (child
);
1152 oprnd_info
->def_stmts
= vNULL
;
1153 vect_free_slp_tree (child
);
1154 vect_free_oprnd_info (oprnds_info
);
1159 *tree_size
+= this_tree_size
;
1161 vect_free_oprnd_info (oprnds_info
);
1165 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1168 vect_print_slp_tree (int dump_kind
, slp_tree node
)
1177 dump_printf (dump_kind
, "node ");
1178 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1180 dump_printf (dump_kind
, "\n\tstmt %d ", i
);
1181 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1183 dump_printf (dump_kind
, "\n");
1185 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1186 vect_print_slp_tree (dump_kind
, child
);
1190 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1191 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1192 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1193 stmts in NODE are to be marked. */
1196 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1205 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1206 if (j
< 0 || i
== j
)
1207 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1209 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1210 vect_mark_slp_stmts (child
, mark
, j
);
1214 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1217 vect_mark_slp_stmts_relevant (slp_tree node
)
1221 stmt_vec_info stmt_info
;
1227 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1229 stmt_info
= vinfo_for_stmt (stmt
);
1230 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1231 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1232 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1235 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1236 vect_mark_slp_stmts_relevant (child
);
1240 /* Rearrange the statements of NODE according to PERMUTATION. */
1243 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1244 vec
<unsigned> permutation
)
1247 vec
<gimple
*> tmp_stmts
;
1251 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1252 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1254 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1255 tmp_stmts
.create (group_size
);
1256 tmp_stmts
.quick_grow_cleared (group_size
);
1258 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1259 tmp_stmts
[permutation
[i
]] = stmt
;
1261 SLP_TREE_SCALAR_STMTS (node
).release ();
1262 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1266 /* Attempt to reorder stmts in a reduction chain so that we don't
1267 require any load permutation. Return true if that was possible,
1268 otherwise return false. */
1271 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1273 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1277 slp_tree node
, load
;
1279 /* Compare all the permutation sequences to the first one. We know
1280 that at least one load is permuted. */
1281 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1282 if (!node
->load_permutation
.exists ())
1284 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1286 if (!load
->load_permutation
.exists ())
1288 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1289 if (lidx
!= node
->load_permutation
[j
])
1293 /* Check that the loads in the first sequence are different and there
1294 are no gaps between them. */
1295 load_index
= sbitmap_alloc (group_size
);
1296 bitmap_clear (load_index
);
1297 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1299 if (bitmap_bit_p (load_index
, lidx
))
1301 sbitmap_free (load_index
);
1304 bitmap_set_bit (load_index
, lidx
);
1306 for (i
= 0; i
< group_size
; i
++)
1307 if (!bitmap_bit_p (load_index
, i
))
1309 sbitmap_free (load_index
);
1312 sbitmap_free (load_index
);
1314 /* This permutation is valid for reduction. Since the order of the
1315 statements in the nodes is not important unless they are memory
1316 accesses, we can rearrange the statements in all the nodes
1317 according to the order of the loads. */
1318 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1319 node
->load_permutation
);
1321 /* We are done, no actual permutations need to be generated. */
1322 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1323 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1327 /* Check if the required load permutations in the SLP instance
1328 SLP_INSTN are supported. */
1331 vect_supported_load_permutation_p (slp_instance slp_instn
)
1333 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1334 unsigned int i
, j
, k
, next
;
1336 gimple
*stmt
, *load
, *next_load
, *first_load
;
1337 struct data_reference
*dr
;
1339 if (dump_enabled_p ())
1341 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1342 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1343 if (node
->load_permutation
.exists ())
1344 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1345 dump_printf (MSG_NOTE
, "%d ", next
);
1347 for (k
= 0; k
< group_size
; ++k
)
1348 dump_printf (MSG_NOTE
, "%d ", k
);
1349 dump_printf (MSG_NOTE
, "\n");
1352 /* In case of reduction every load permutation is allowed, since the order
1353 of the reduction statements is not important (as opposed to the case of
1354 grouped stores). The only condition we need to check is that all the
1355 load nodes are of the same size and have the same permutation (and then
1356 rearrange all the nodes of the SLP instance according to this
1359 /* Check that all the load nodes are of the same size. */
1360 /* ??? Can't we assert this? */
1361 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1362 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1365 node
= SLP_INSTANCE_TREE (slp_instn
);
1366 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1368 /* Reduction (there are no data-refs in the root).
1369 In reduction chain the order of the loads is not important. */
1370 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1371 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1373 if (vect_attempt_slp_rearrange_stmts (slp_instn
))
1376 /* Fallthru to general load permutation handling. */
1379 /* In basic block vectorization we allow any subchain of an interleaving
1381 FORNOW: not supported in loop SLP because of realignment compications. */
1382 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1384 /* Check whether the loads in an instance form a subchain and thus
1385 no permutation is necessary. */
1386 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1388 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1390 bool subchain_p
= true;
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1395 && (next_load
!= load
1396 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1401 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1404 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1407 /* Verify the permutation can be generated. */
1409 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1410 1, slp_instn
, true))
1412 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1414 "unsupported load permutation\n");
1420 /* Check that the alignment of the first load in every subchain, i.e.,
1421 the first statement in every load node, is supported.
1422 ??? This belongs in alignment checking. */
1423 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1425 first_load
= SLP_TREE_SCALAR_STMTS (node
)[0];
1426 if (first_load
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load
)))
1428 dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load
));
1429 if (vect_supportable_dr_alignment (dr
, false)
1430 == dr_unaligned_unsupported
)
1432 if (dump_enabled_p ())
1434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1436 "unsupported unaligned load ");
1437 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1439 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1449 /* For loop vectorization verify we can generate the permutation. */
1450 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1451 if (node
->load_permutation
.exists ()
1452 && !vect_transform_slp_perm_load
1454 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
), slp_instn
, true))
1461 /* Find the last store in SLP INSTANCE. */
1464 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1466 gimple
*last
= NULL
, *stmt
;
1468 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1470 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1471 if (is_pattern_stmt_p (stmt_vinfo
))
1472 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1474 last
= get_later_stmt (stmt
, last
);
1480 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1483 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1484 stmt_vector_for_cost
*prologue_cost_vec
,
1485 stmt_vector_for_cost
*body_cost_vec
,
1486 unsigned ncopies_for_cost
)
1491 stmt_vec_info stmt_info
;
1493 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1495 /* Recurse down the SLP tree. */
1496 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1498 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1499 body_cost_vec
, ncopies_for_cost
);
1501 /* Look at the first scalar stmt to determine the cost. */
1502 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1503 stmt_info
= vinfo_for_stmt (stmt
);
1504 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1506 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1507 vect_model_store_cost (stmt_info
, ncopies_for_cost
, false,
1508 vect_uninitialized_def
,
1509 node
, prologue_cost_vec
, body_cost_vec
);
1513 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1514 vect_model_load_cost (stmt_info
, ncopies_for_cost
, false,
1515 node
, prologue_cost_vec
, body_cost_vec
);
1516 /* If the load is permuted record the cost for the permutation.
1517 ??? Loads from multiple chains are let through here only
1518 for a single special case involving complex numbers where
1519 in the end no permutation is necessary. */
1520 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, s
)
1521 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s
))
1522 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
))
1523 && vect_get_place_in_interleaving_chain
1524 (s
, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
)) != i
)
1526 record_stmt_cost (body_cost_vec
, group_size
, vec_perm
,
1527 stmt_info
, 0, vect_body
);
1534 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1535 stmt_info
, 0, vect_body
);
1536 if (SLP_TREE_TWO_OPERATORS (node
))
1538 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1539 stmt_info
, 0, vect_body
);
1540 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1541 stmt_info
, 0, vect_body
);
1545 /* Scan operands and account for prologue cost of constants/externals.
1546 ??? This over-estimates cost for multiple uses and should be
1548 lhs
= gimple_get_lhs (stmt
);
1549 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1551 tree op
= gimple_op (stmt
, i
);
1553 enum vect_def_type dt
;
1554 if (!op
|| op
== lhs
)
1556 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1558 /* Without looking at the actual initializer a vector of
1559 constants can be implemented as load from the constant pool.
1560 ??? We need to pass down stmt_info for a vector type
1561 even if it points to the wrong stmt. */
1562 if (dt
== vect_constant_def
)
1563 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1564 stmt_info
, 0, vect_prologue
);
1565 else if (dt
== vect_external_def
)
1566 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1567 stmt_info
, 0, vect_prologue
);
1572 /* Compute the cost for the SLP instance INSTANCE. */
1575 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1577 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1578 unsigned ncopies_for_cost
;
1579 stmt_info_for_cost
*si
;
1582 if (dump_enabled_p ())
1583 dump_printf_loc (MSG_NOTE
, vect_location
,
1584 "=== vect_analyze_slp_cost ===\n");
1586 /* Calculate the number of vector stmts to create based on the unrolling
1587 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1588 GROUP_SIZE / NUNITS otherwise. */
1589 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1590 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1591 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1592 /* Adjust the group_size by the vectorization factor which is always one
1593 for basic-block vectorization. */
1594 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1595 group_size
*= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info
));
1596 unsigned nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1597 /* For reductions look at a reduction operand in case the reduction
1598 operation is widening like DOT_PROD or SAD. */
1599 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1601 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1602 switch (gimple_assign_rhs_code (stmt
))
1606 nunits
= TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1607 (TREE_TYPE (gimple_assign_rhs1 (stmt
))));
1612 ncopies_for_cost
= least_common_multiple (nunits
, group_size
) / nunits
;
1614 prologue_cost_vec
.create (10);
1615 body_cost_vec
.create (10);
1616 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1617 &prologue_cost_vec
, &body_cost_vec
,
1620 /* Record the prologue costs, which were delayed until we were
1621 sure that SLP was successful. */
1622 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1624 struct _stmt_vec_info
*stmt_info
1625 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1626 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1627 si
->misalign
, vect_prologue
);
1630 /* Record the instance's instructions in the target cost model. */
1631 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1633 struct _stmt_vec_info
*stmt_info
1634 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1635 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1636 si
->misalign
, vect_body
);
1639 prologue_cost_vec
.release ();
1640 body_cost_vec
.release ();
1643 /* Analyze an SLP instance starting from a group of grouped stores. Call
1644 vect_build_slp_tree to build a tree of packed stmts if possible.
1645 Return FALSE if it's impossible to SLP any stmt in the loop. */
1648 vect_analyze_slp_instance (vec_info
*vinfo
,
1649 gimple
*stmt
, unsigned max_tree_size
)
1651 slp_instance new_instance
;
1653 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1654 unsigned int unrolling_factor
= 1, nunits
;
1655 tree vectype
, scalar_type
= NULL_TREE
;
1657 unsigned int vectorization_factor
= 0;
1659 unsigned int max_nunits
= 0;
1660 vec
<slp_tree
> loads
;
1661 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
1662 vec
<gimple
*> scalar_stmts
;
1664 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1668 scalar_type
= TREE_TYPE (DR_REF (dr
));
1669 vectype
= get_vectype_for_scalar_type (scalar_type
);
1673 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1674 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1677 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
1681 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1682 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1683 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1688 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1691 "Build SLP failed: unsupported data-type ");
1692 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
1693 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1699 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1700 if (is_a
<loop_vec_info
> (vinfo
))
1701 vectorization_factor
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
1703 vectorization_factor
= nunits
;
1705 /* Calculate the unrolling factor. */
1706 unrolling_factor
= least_common_multiple (nunits
, group_size
) / group_size
;
1707 if (unrolling_factor
!= 1 && is_a
<bb_vec_info
> (vinfo
))
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1711 "Build SLP failed: unrolling required in basic"
1717 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
1718 scalar_stmts
.create (group_size
);
1720 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1722 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1725 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
1726 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
1727 scalar_stmts
.safe_push (
1728 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
1730 scalar_stmts
.safe_push (next
);
1731 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
1733 /* Mark the first element of the reduction chain as reduction to properly
1734 transform the node. In the reduction analysis phase only the last
1735 element of the chain is marked as reduction. */
1736 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
1737 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
1741 /* Collect reduction statements. */
1742 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
1743 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
1744 scalar_stmts
.safe_push (next
);
1747 node
= vect_create_new_slp_node (scalar_stmts
);
1749 loads
.create (group_size
);
1751 /* Build the tree for the SLP instance. */
1752 bool *matches
= XALLOCAVEC (bool, group_size
);
1753 unsigned npermutes
= 0;
1754 if (vect_build_slp_tree (vinfo
, &node
, group_size
,
1755 &max_nunits
, &loads
,
1756 vectorization_factor
, matches
, &npermutes
, NULL
,
1759 /* Calculate the unrolling factor based on the smallest type. */
1760 if (max_nunits
> nunits
)
1761 unrolling_factor
= least_common_multiple (max_nunits
, group_size
)
1764 if (unrolling_factor
!= 1 && is_a
<bb_vec_info
> (vinfo
))
1766 if (dump_enabled_p ())
1767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1768 "Build SLP failed: unrolling required in basic"
1770 vect_free_slp_tree (node
);
1775 /* Create a new SLP instance. */
1776 new_instance
= XNEW (struct _slp_instance
);
1777 SLP_INSTANCE_TREE (new_instance
) = node
;
1778 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
1779 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
1780 SLP_INSTANCE_LOADS (new_instance
) = loads
;
1782 /* Compute the load permutation. */
1784 bool loads_permuted
= false;
1785 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
1787 vec
<unsigned> load_permutation
;
1789 gimple
*load
, *first_stmt
;
1790 bool this_load_permuted
= false;
1791 load_permutation
.create (group_size
);
1792 first_stmt
= GROUP_FIRST_ELEMENT
1793 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
1794 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
1797 = vect_get_place_in_interleaving_chain (load
, first_stmt
);
1798 gcc_assert (load_place
!= -1);
1799 if (load_place
!= j
)
1800 this_load_permuted
= true;
1801 load_permutation
.safe_push (load_place
);
1803 if (!this_load_permuted
1804 /* The load requires permutation when unrolling exposes
1805 a gap either because the group is larger than the SLP
1806 group-size or because there is a gap between the groups. */
1807 && (unrolling_factor
== 1
1808 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1809 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
1811 load_permutation
.release ();
1814 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
1815 loads_permuted
= true;
1820 if (!vect_supported_load_permutation_p (new_instance
))
1822 if (dump_enabled_p ())
1824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1825 "Build SLP failed: unsupported load "
1827 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
1828 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1830 vect_free_slp_instance (new_instance
);
1835 vinfo
->slp_instances
.safe_push (new_instance
);
1837 if (dump_enabled_p ())
1838 vect_print_slp_tree (MSG_NOTE
, node
);
1843 /* Failed to SLP. */
1844 /* Free the allocated memory. */
1845 vect_free_slp_tree (node
);
1852 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
1853 trees of packed scalar stmts if SLP is possible. */
1856 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
1859 gimple
*first_element
;
1862 if (dump_enabled_p ())
1863 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
1865 /* Find SLP sequences starting from groups of grouped stores. */
1866 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
1867 if (vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
))
1870 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
1872 if (loop_vinfo
->reduction_chains
.length () > 0)
1874 /* Find SLP sequences starting from reduction chains. */
1875 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
1876 if (vect_analyze_slp_instance (vinfo
, first_element
,
1882 /* Don't try to vectorize SLP reductions if reduction chain was
1887 /* Find SLP sequences starting from groups of reductions. */
1888 if (loop_vinfo
->reductions
.length () > 1
1889 && vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
1898 /* For each possible SLP instance decide whether to SLP it and calculate overall
1899 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1900 least one instance. */
1903 vect_make_slp_decision (loop_vec_info loop_vinfo
)
1905 unsigned int i
, unrolling_factor
= 1;
1906 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
1907 slp_instance instance
;
1908 int decided_to_slp
= 0;
1910 if (dump_enabled_p ())
1911 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
1914 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
1916 /* FORNOW: SLP if you can. */
1917 if (unrolling_factor
< SLP_INSTANCE_UNROLLING_FACTOR (instance
))
1918 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1920 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
1921 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
1922 loop-based vectorization. Such stmts will be marked as HYBRID. */
1923 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
1927 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
1929 if (decided_to_slp
&& dump_enabled_p ())
1930 dump_printf_loc (MSG_NOTE
, vect_location
,
1931 "Decided to SLP %d instances. Unrolling factor %d\n",
1932 decided_to_slp
, unrolling_factor
);
1934 return (decided_to_slp
> 0);
1938 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
1939 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
1942 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
1944 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
1945 imm_use_iterator imm_iter
;
1947 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
1949 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1950 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1953 /* Propagate hybrid down the SLP tree. */
1954 if (stype
== hybrid
)
1956 else if (HYBRID_SLP_STMT (stmt_vinfo
))
1960 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1961 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
1962 /* We always get the pattern stmt here, but for immediate
1963 uses we have to use the LHS of the original stmt. */
1964 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo
));
1965 if (STMT_VINFO_RELATED_STMT (stmt_vinfo
))
1966 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
1967 if (TREE_CODE (gimple_op (stmt
, 0)) == SSA_NAME
)
1968 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, gimple_op (stmt
, 0))
1970 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1972 use_vinfo
= vinfo_for_stmt (use_stmt
);
1973 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
1974 && STMT_VINFO_RELATED_STMT (use_vinfo
))
1975 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
1976 if (!STMT_SLP_TYPE (use_vinfo
)
1977 && (STMT_VINFO_RELEVANT (use_vinfo
)
1978 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
1979 && !(gimple_code (use_stmt
) == GIMPLE_PHI
1980 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
1982 if (dump_enabled_p ())
1984 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
1985 "def in non-SLP stmt: ");
1986 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
1994 && !HYBRID_SLP_STMT (stmt_vinfo
))
1996 if (dump_enabled_p ())
1998 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
1999 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2001 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2004 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2006 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2009 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2012 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2014 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2015 struct loop
*loopp
= (struct loop
*)wi
->info
;
2020 if (TREE_CODE (*tp
) == SSA_NAME
2021 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2023 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2024 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2025 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2027 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2030 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2032 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2040 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2043 /* If the stmt is in a SLP instance then this isn't a reason
2044 to mark use definitions in other SLP instances as hybrid. */
2045 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi
))) != loop_vect
)
2050 /* Find stmts that must be both vectorized and SLPed. */
2053 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2056 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2057 slp_instance instance
;
2059 if (dump_enabled_p ())
2060 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2063 /* First walk all pattern stmt in the loop and mark defs of uses as
2064 hybrid because immediate uses in them are not recorded. */
2065 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2067 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2068 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2071 gimple
*stmt
= gsi_stmt (gsi
);
2072 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2073 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2076 memset (&wi
, 0, sizeof (wi
));
2077 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2078 gimple_stmt_iterator gsi2
2079 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2080 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2081 vect_detect_hybrid_slp_1
, &wi
);
2082 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2083 vect_detect_hybrid_slp_2
,
2084 vect_detect_hybrid_slp_1
, &wi
);
2089 /* Then walk the SLP instance trees marking stmts with uses in
2090 non-SLP stmts as hybrid, also propagating hybrid down the
2091 SLP tree, collecting the above info on-the-fly. */
2092 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2094 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2095 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2101 /* Create and initialize a new bb_vec_info struct for BB, as well as
2102 stmt_vec_info structs for all the stmts in it. */
2105 new_bb_vec_info (basic_block bb
)
2107 bb_vec_info res
= NULL
;
2108 gimple_stmt_iterator gsi
;
2110 res
= (bb_vec_info
) xcalloc (1, sizeof (struct _bb_vec_info
));
2111 res
->kind
= vec_info::bb
;
2112 BB_VINFO_BB (res
) = bb
;
2114 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2116 gimple
*stmt
= gsi_stmt (gsi
);
2117 gimple_set_uid (stmt
, 0);
2118 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, res
));
2121 BB_VINFO_GROUPED_STORES (res
).create (10);
2122 BB_VINFO_SLP_INSTANCES (res
).create (2);
2123 BB_VINFO_TARGET_COST_DATA (res
) = init_cost (NULL
);
2130 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2131 stmts in the basic block. */
2134 destroy_bb_vec_info (bb_vec_info bb_vinfo
)
2136 vec
<slp_instance
> slp_instances
;
2137 slp_instance instance
;
2139 gimple_stmt_iterator si
;
2145 bb
= BB_VINFO_BB (bb_vinfo
);
2147 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2149 gimple
*stmt
= gsi_stmt (si
);
2150 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2153 /* Free stmt_vec_info. */
2154 free_stmt_vec_info (stmt
);
2157 vect_destroy_datarefs (bb_vinfo
);
2158 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo
));
2159 BB_VINFO_GROUPED_STORES (bb_vinfo
).release ();
2160 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2161 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2162 vect_free_slp_instance (instance
);
2163 BB_VINFO_SLP_INSTANCES (bb_vinfo
).release ();
2164 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo
));
2170 /* Analyze statements contained in SLP tree node after recursively analyzing
2171 the subtree. Return TRUE if the operations are supported. */
2174 vect_slp_analyze_node_operations (slp_tree node
)
2184 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2185 if (!vect_slp_analyze_node_operations (child
))
2188 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2190 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2191 gcc_assert (stmt_info
);
2192 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2194 if (!vect_analyze_stmt (stmt
, &dummy
, node
))
2202 /* Analyze statements in SLP instances of the basic block. Return TRUE if the
2203 operations are supported. */
2206 vect_slp_analyze_operations (vec
<slp_instance
> slp_instances
, void *data
)
2208 slp_instance instance
;
2211 if (dump_enabled_p ())
2212 dump_printf_loc (MSG_NOTE
, vect_location
,
2213 "=== vect_slp_analyze_operations ===\n");
2215 for (i
= 0; slp_instances
.iterate (i
, &instance
); )
2217 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance
)))
2219 dump_printf_loc (MSG_NOTE
, vect_location
,
2220 "removing SLP instance operations starting from: ");
2221 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2222 SLP_TREE_SCALAR_STMTS
2223 (SLP_INSTANCE_TREE (instance
))[0], 0);
2224 vect_free_slp_instance (instance
);
2225 slp_instances
.ordered_remove (i
);
2229 /* Compute the costs of the SLP instance. */
2230 vect_analyze_slp_cost (instance
, data
);
2235 if (!slp_instances
.length ())
2242 /* Compute the scalar cost of the SLP node NODE and its children
2243 and return it. Do not account defs that are marked in LIFE and
2244 update LIFE according to uses of NODE. */
2247 vect_bb_slp_scalar_cost (basic_block bb
,
2248 slp_tree node
, vec
<bool, va_heap
> *life
)
2250 unsigned scalar_cost
= 0;
2255 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2258 ssa_op_iter op_iter
;
2259 def_operand_p def_p
;
2260 stmt_vec_info stmt_info
;
2265 /* If there is a non-vectorized use of the defs then the scalar
2266 stmt is kept live in which case we do not account it or any
2267 required defs in the SLP children in the scalar cost. This
2268 way we make the vectorization more costly when compared to
2270 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2272 imm_use_iterator use_iter
;
2274 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2275 if (!is_gimple_debug (use_stmt
)
2276 && (gimple_code (use_stmt
) == GIMPLE_PHI
2277 || gimple_bb (use_stmt
) != bb
2278 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt
))))
2281 BREAK_FROM_IMM_USE_STMT (use_iter
);
2287 stmt_info
= vinfo_for_stmt (stmt
);
2288 if (STMT_VINFO_DATA_REF (stmt_info
))
2290 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2291 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2293 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2296 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2298 scalar_cost
+= stmt_cost
;
2301 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2303 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, life
);
2308 /* Check if vectorization of the basic block is profitable. */
2311 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2313 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2314 slp_instance instance
;
2316 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2317 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2319 /* Calculate scalar cost. */
2320 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2322 auto_vec
<bool, 20> life
;
2323 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2324 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2325 SLP_INSTANCE_TREE (instance
),
2329 /* Complete the target-specific cost calculation. */
2330 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2331 &vec_inside_cost
, &vec_epilogue_cost
);
2333 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2335 if (dump_enabled_p ())
2337 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2338 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2340 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2341 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2342 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2345 /* Vectorization is profitable if its cost is less than the cost of scalar
2347 if (vec_outside_cost
+ vec_inside_cost
>= scalar_cost
)
2353 /* Check if the basic block can be vectorized. */
2356 vect_slp_analyze_bb_1 (basic_block bb
)
2358 bb_vec_info bb_vinfo
;
2359 vec
<slp_instance
> slp_instances
;
2360 slp_instance instance
;
2363 unsigned n_stmts
= 0;
2365 bb_vinfo
= new_bb_vec_info (bb
);
2369 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, &n_stmts
))
2371 if (dump_enabled_p ())
2372 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2373 "not vectorized: unhandled data-ref in basic "
2376 destroy_bb_vec_info (bb_vinfo
);
2380 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2382 if (dump_enabled_p ())
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2384 "not vectorized: not enough data-refs in "
2387 destroy_bb_vec_info (bb_vinfo
);
2391 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2393 if (dump_enabled_p ())
2394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2395 "not vectorized: unhandled data access in "
2398 destroy_bb_vec_info (bb_vinfo
);
2402 vect_pattern_recog (bb_vinfo
);
2404 if (!vect_analyze_data_refs_alignment (bb_vinfo
))
2406 if (dump_enabled_p ())
2407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2408 "not vectorized: bad data alignment in basic "
2411 destroy_bb_vec_info (bb_vinfo
);
2415 /* Check the SLP opportunities in the basic block, analyze and build SLP
2417 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2419 if (dump_enabled_p ())
2421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2422 "Failed to SLP the basic block.\n");
2423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2424 "not vectorized: failed to find SLP opportunities "
2425 "in basic block.\n");
2428 destroy_bb_vec_info (bb_vinfo
);
2432 slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2434 /* Mark all the statements that we want to vectorize as pure SLP and
2436 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2438 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2439 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
2442 /* Mark all the statements that we do not want to vectorize. */
2443 for (gimple_stmt_iterator gsi
= gsi_start_bb (BB_VINFO_BB (bb_vinfo
));
2444 !gsi_end_p (gsi
); gsi_next (&gsi
))
2446 stmt_vec_info vinfo
= vinfo_for_stmt (gsi_stmt (gsi
));
2447 if (STMT_SLP_TYPE (vinfo
) != pure_slp
)
2448 STMT_VINFO_VECTORIZABLE (vinfo
) = false;
2451 /* Analyze dependences. At this point all stmts not participating in
2452 vectorization have to be marked. Dependence analysis assumes
2453 that we either vectorize all SLP instances or none at all. */
2454 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo
))
2456 if (dump_enabled_p ())
2457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2458 "not vectorized: unhandled data dependence "
2459 "in basic block.\n");
2461 destroy_bb_vec_info (bb_vinfo
);
2465 if (!vect_verify_datarefs_alignment (bb_vinfo
))
2467 if (dump_enabled_p ())
2468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2469 "not vectorized: unsupported alignment in basic "
2471 destroy_bb_vec_info (bb_vinfo
);
2475 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo
),
2476 BB_VINFO_TARGET_COST_DATA (bb_vinfo
)))
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2480 "not vectorized: bad operation in basic block.\n");
2482 destroy_bb_vec_info (bb_vinfo
);
2486 /* Cost model: check if the vectorization is worthwhile. */
2487 if (!unlimited_cost_model (NULL
)
2488 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
2490 if (dump_enabled_p ())
2491 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2492 "not vectorized: vectorization is not "
2495 destroy_bb_vec_info (bb_vinfo
);
2499 if (dump_enabled_p ())
2500 dump_printf_loc (MSG_NOTE
, vect_location
,
2501 "Basic block will be vectorized using SLP\n");
2508 vect_slp_analyze_bb (basic_block bb
)
2510 bb_vec_info bb_vinfo
;
2512 gimple_stmt_iterator gsi
;
2513 unsigned int vector_sizes
;
2515 if (dump_enabled_p ())
2516 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
2518 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2520 gimple
*stmt
= gsi_stmt (gsi
);
2521 if (!is_gimple_debug (stmt
)
2522 && !gimple_nop_p (stmt
)
2523 && gimple_code (stmt
) != GIMPLE_LABEL
)
2527 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2529 if (dump_enabled_p ())
2530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2531 "not vectorized: too many instructions in "
2537 /* Autodetect first vector size we try. */
2538 current_vector_size
= 0;
2539 vector_sizes
= targetm
.vectorize
.autovectorize_vector_sizes ();
2543 bb_vinfo
= vect_slp_analyze_bb_1 (bb
);
2547 destroy_bb_vec_info (bb_vinfo
);
2549 vector_sizes
&= ~current_vector_size
;
2550 if (vector_sizes
== 0
2551 || current_vector_size
== 0)
2554 /* Try the next biggest vector size. */
2555 current_vector_size
= 1 << floor_log2 (vector_sizes
);
2556 if (dump_enabled_p ())
2557 dump_printf_loc (MSG_NOTE
, vect_location
,
2558 "***** Re-trying analysis with "
2559 "vector size %d\n", current_vector_size
);
2564 /* For constant and loop invariant defs of SLP_NODE this function returns
2565 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
2566 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2567 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
2568 REDUC_INDEX is the index of the reduction operand in the statements, unless
2572 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
2573 vec
<tree
> *vec_oprnds
,
2574 unsigned int op_num
, unsigned int number_of_vectors
,
2577 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
2578 gimple
*stmt
= stmts
[0];
2579 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
2583 unsigned j
, number_of_places_left_in_vector
;
2586 int group_size
= stmts
.length ();
2587 unsigned int vec_num
, i
;
2588 unsigned number_of_copies
= 1;
2590 voprnds
.create (number_of_vectors
);
2591 bool constant_p
, is_store
;
2592 tree neutral_op
= NULL
;
2593 enum tree_code code
= gimple_expr_code (stmt
);
2596 gimple_seq ctor_seq
= NULL
;
2598 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
2599 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
2601 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
2602 && reduc_index
!= -1)
2604 op_num
= reduc_index
;
2605 op
= gimple_op (stmt
, op_num
+ 1);
2606 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
2607 we need either neutral operands or the original operands. See
2608 get_initial_def_for_reduction() for details. */
2611 case WIDEN_SUM_EXPR
:
2618 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2619 neutral_op
= build_real (TREE_TYPE (op
), dconst0
);
2621 neutral_op
= build_int_cst (TREE_TYPE (op
), 0);
2626 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op
)))
2627 neutral_op
= build_real (TREE_TYPE (op
), dconst1
);
2629 neutral_op
= build_int_cst (TREE_TYPE (op
), 1);
2634 neutral_op
= build_int_cst (TREE_TYPE (op
), -1);
2637 /* For MIN/MAX we don't have an easy neutral operand but
2638 the initial values can be used fine here. Only for
2639 a reduction chain we have to force a neutral element. */
2642 if (!GROUP_FIRST_ELEMENT (stmt_vinfo
))
2646 def_stmt
= SSA_NAME_DEF_STMT (op
);
2647 loop
= (gimple_bb (stmt
))->loop_father
;
2648 neutral_op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2649 loop_preheader_edge (loop
));
2654 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo
));
2659 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
2662 op
= gimple_assign_rhs1 (stmt
);
2669 if (CONSTANT_CLASS_P (op
))
2674 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
2675 created vectors. It is greater than 1 if unrolling is performed.
2677 For example, we have two scalar operands, s1 and s2 (e.g., group of
2678 strided accesses of size two), while NUNITS is four (i.e., four scalars
2679 of this type can be packed in a vector). The output vector will contain
2680 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
2683 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
2684 containing the operands.
2686 For example, NUNITS is four as before, and the group size is 8
2687 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
2688 {s5, s6, s7, s8}. */
2690 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
2692 number_of_places_left_in_vector
= nunits
;
2693 elts
= XALLOCAVEC (tree
, nunits
);
2694 bool place_after_defs
= false;
2695 for (j
= 0; j
< number_of_copies
; j
++)
2697 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
2700 op
= gimple_assign_rhs1 (stmt
);
2706 if (op_num
== 0 || op_num
== 1)
2708 tree cond
= gimple_assign_rhs1 (stmt
);
2709 op
= TREE_OPERAND (cond
, op_num
);
2714 op
= gimple_assign_rhs2 (stmt
);
2716 op
= gimple_assign_rhs3 (stmt
);
2721 op
= gimple_call_arg (stmt
, op_num
);
2728 op
= gimple_op (stmt
, op_num
+ 1);
2729 /* Unlike the other binary operators, shifts/rotates have
2730 the shift count being int, instead of the same type as
2731 the lhs, so make sure the scalar is the right type if
2732 we are dealing with vectors of
2733 long long/long/short/char. */
2734 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
2735 op
= fold_convert (TREE_TYPE (vector_type
), op
);
2739 op
= gimple_op (stmt
, op_num
+ 1);
2744 if (reduc_index
!= -1)
2746 loop
= (gimple_bb (stmt
))->loop_father
;
2747 def_stmt
= SSA_NAME_DEF_STMT (op
);
2751 /* Get the def before the loop. In reduction chain we have only
2752 one initial value. */
2753 if ((j
!= (number_of_copies
- 1)
2754 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2759 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
,
2760 loop_preheader_edge (loop
));
2763 /* Create 'vect_ = {op0,op1,...,opn}'. */
2764 number_of_places_left_in_vector
--;
2766 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
2768 if (CONSTANT_CLASS_P (op
))
2770 op
= fold_unary (VIEW_CONVERT_EXPR
,
2771 TREE_TYPE (vector_type
), op
);
2772 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
2776 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
2778 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
), op
);
2780 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
, op
);
2781 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
2785 elts
[number_of_places_left_in_vector
] = op
;
2786 if (!CONSTANT_CLASS_P (op
))
2788 if (TREE_CODE (orig_op
) == SSA_NAME
2789 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
2790 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
2791 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
2792 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
2793 place_after_defs
= true;
2795 if (number_of_places_left_in_vector
== 0)
2797 number_of_places_left_in_vector
= nunits
;
2800 vec_cst
= build_vector (vector_type
, elts
);
2803 vec
<constructor_elt
, va_gc
> *v
;
2805 vec_alloc (v
, nunits
);
2806 for (k
= 0; k
< nunits
; ++k
)
2807 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
2808 vec_cst
= build_constructor (vector_type
, v
);
2811 gimple_stmt_iterator gsi
;
2812 if (place_after_defs
)
2815 (vect_find_last_scalar_stmt_in_slp (slp_node
));
2816 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
2819 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
2820 if (ctor_seq
!= NULL
)
2822 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
2823 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
2827 voprnds
.quick_push (init
);
2828 place_after_defs
= false;
2833 /* Since the vectors are created in the reverse order, we should invert
2835 vec_num
= voprnds
.length ();
2836 for (j
= vec_num
; j
!= 0; j
--)
2838 vop
= voprnds
[j
- 1];
2839 vec_oprnds
->quick_push (vop
);
2844 /* In case that VF is greater than the unrolling factor needed for the SLP
2845 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2846 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
2847 to replicate the vectors. */
2848 while (number_of_vectors
> vec_oprnds
->length ())
2850 tree neutral_vec
= NULL
;
2855 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
2857 vec_oprnds
->quick_push (neutral_vec
);
2861 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
2862 vec_oprnds
->quick_push (vop
);
2868 /* Get vectorized definitions from SLP_NODE that contains corresponding
2869 vectorized def-stmts. */
2872 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
2875 gimple
*vec_def_stmt
;
2878 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
2880 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
2882 gcc_assert (vec_def_stmt
);
2883 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
2884 vec_oprnds
->quick_push (vec_oprnd
);
2889 /* Get vectorized definitions for SLP_NODE.
2890 If the scalar definitions are loop invariants or constants, collect them and
2891 call vect_get_constant_vectors() to create vector stmts.
2892 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
2893 must be stored in the corresponding child of SLP_NODE, and we call
2894 vect_get_slp_vect_defs () to retrieve them. */
2897 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
2898 vec
<vec
<tree
> > *vec_oprnds
, int reduc_index
)
2901 int number_of_vects
= 0, i
;
2902 unsigned int child_index
= 0;
2903 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
2904 slp_tree child
= NULL
;
2907 bool vectorized_defs
;
2909 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
2910 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
2912 /* For each operand we check if it has vectorized definitions in a child
2913 node or we need to create them (for invariants and constants). We
2914 check if the LHS of the first stmt of the next child matches OPRND.
2915 If it does, we found the correct child. Otherwise, we call
2916 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2917 to check this child node for the next operand. */
2918 vectorized_defs
= false;
2919 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
2921 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
2923 /* We have to check both pattern and original def, if available. */
2926 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
2928 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
2930 if (operand_equal_p (oprnd
, gimple_get_lhs (first_def
), 0)
2932 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
2934 /* The number of vector defs is determined by the number of
2935 vector statements in the node from which we get those
2937 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
2938 vectorized_defs
= true;
2946 if (!vectorized_defs
)
2950 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
2951 /* Number of vector stmts was calculated according to LHS in
2952 vect_schedule_slp_instance (), fix it by replacing LHS with
2953 RHS, if necessary. See vect_get_smallest_scalar_type () for
2955 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
2957 if (rhs_size_unit
!= lhs_size_unit
)
2959 number_of_vects
*= rhs_size_unit
;
2960 number_of_vects
/= lhs_size_unit
;
2965 /* Allocate memory for vectorized defs. */
2967 vec_defs
.create (number_of_vects
);
2969 /* For reduction defs we call vect_get_constant_vectors (), since we are
2970 looking for initial loop invariant values. */
2971 if (vectorized_defs
&& reduc_index
== -1)
2972 /* The defs are already vectorized. */
2973 vect_get_slp_vect_defs (child
, &vec_defs
);
2975 /* Build vectors from scalar defs. */
2976 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
2977 number_of_vects
, reduc_index
);
2979 vec_oprnds
->quick_push (vec_defs
);
2981 /* For reductions, we only need initial values. */
2982 if (reduc_index
!= -1)
2988 /* Create NCOPIES permutation statements using the mask MASK_BYTES (by
2989 building a vector of type MASK_TYPE from it) and two input vectors placed in
2990 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2991 shifting by STRIDE elements of DR_CHAIN for every copy.
2992 (STRIDE is the number of vectorized stmts for NODE divided by the number of
2994 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2995 the created stmts must be inserted. */
2998 vect_create_mask_and_perm (gimple
*stmt
,
2999 tree mask
, int first_vec_indx
, int second_vec_indx
,
3000 gimple_stmt_iterator
*gsi
, slp_tree node
,
3001 tree vectype
, vec
<tree
> dr_chain
,
3002 int ncopies
, int vect_stmts_counter
)
3005 gimple
*perm_stmt
= NULL
;
3007 tree first_vec
, second_vec
, data_ref
;
3009 stride
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
) / ncopies
;
3011 /* Initialize the vect stmts of NODE to properly insert the generated
3013 for (i
= SLP_TREE_VEC_STMTS (node
).length ();
3014 i
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3015 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3017 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
3018 for (i
= 0; i
< ncopies
; i
++)
3020 first_vec
= dr_chain
[first_vec_indx
];
3021 second_vec
= dr_chain
[second_vec_indx
];
3023 /* Generate the permute statement. */
3024 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3025 first_vec
, second_vec
, mask
);
3026 data_ref
= make_ssa_name (perm_dest
, perm_stmt
);
3027 gimple_set_lhs (perm_stmt
, data_ref
);
3028 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3030 /* Store the vector statement in NODE. */
3031 SLP_TREE_VEC_STMTS (node
)[stride
* i
+ vect_stmts_counter
] = perm_stmt
;
3033 first_vec_indx
+= stride
;
3034 second_vec_indx
+= stride
;
3039 /* Given FIRST_MASK_ELEMENT - the mask element in element representation,
3040 return in CURRENT_MASK_ELEMENT its equivalent in target specific
3041 representation. Check that the mask is valid and return FALSE if not.
3042 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3043 the next vector, i.e., the current first vector is not needed. */
3046 vect_get_mask_element (gimple
*stmt
, int first_mask_element
, int m
,
3047 int mask_nunits
, bool only_one_vec
, int index
,
3048 unsigned char *mask
, int *current_mask_element
,
3049 bool *need_next_vector
, int *number_of_mask_fixes
,
3050 bool *mask_fixed
, bool *needs_first_vector
)
3054 /* Convert to target specific representation. */
3055 *current_mask_element
= first_mask_element
+ m
;
3056 /* Adjust the value in case it's a mask for second and third vectors. */
3057 *current_mask_element
-= mask_nunits
* (*number_of_mask_fixes
- 1);
3059 if (*current_mask_element
< 0)
3061 if (dump_enabled_p ())
3063 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3064 "permutation requires past vector ");
3065 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3066 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3071 if (*current_mask_element
< mask_nunits
)
3072 *needs_first_vector
= true;
3074 /* We have only one input vector to permute but the mask accesses values in
3075 the next vector as well. */
3076 if (only_one_vec
&& *current_mask_element
>= mask_nunits
)
3078 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3081 "permutation requires at least two vectors ");
3082 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3083 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3089 /* The mask requires the next vector. */
3090 while (*current_mask_element
>= mask_nunits
* 2)
3092 if (*needs_first_vector
|| *mask_fixed
)
3094 /* We either need the first vector too or have already moved to the
3095 next vector. In both cases, this permutation needs three
3097 if (dump_enabled_p ())
3099 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3100 "permutation requires at "
3101 "least three vectors ");
3102 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3103 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3109 /* We move to the next vector, dropping the first one and working with
3110 the second and the third - we need to adjust the values of the mask
3112 *current_mask_element
-= mask_nunits
* *number_of_mask_fixes
;
3114 for (i
= 0; i
< index
; i
++)
3115 mask
[i
] -= mask_nunits
* *number_of_mask_fixes
;
3117 (*number_of_mask_fixes
)++;
3121 *need_next_vector
= *mask_fixed
;
3123 /* This was the last element of this mask. Start a new one. */
3124 if (index
== mask_nunits
- 1)
3126 *number_of_mask_fixes
= 1;
3127 *mask_fixed
= false;
3128 *needs_first_vector
= false;
3135 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3136 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3137 permute statements for the SLP node NODE of the SLP instance
3138 SLP_NODE_INSTANCE. */
3141 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3142 gimple_stmt_iterator
*gsi
, int vf
,
3143 slp_instance slp_node_instance
, bool analyze_only
)
3145 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3146 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3147 tree mask_element_type
= NULL_TREE
, mask_type
;
3148 int i
, j
, k
, nunits
, vec_index
= 0;
3149 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3150 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3151 int first_mask_element
;
3152 int index
, unroll_factor
, current_mask_element
, ncopies
;
3153 unsigned char *mask
;
3154 bool only_one_vec
= false, need_next_vector
= false;
3155 int first_vec_index
, second_vec_index
, orig_vec_stmts_num
, vect_stmts_counter
;
3156 int number_of_mask_fixes
= 1;
3157 bool mask_fixed
= false;
3158 bool needs_first_vector
= false;
3161 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3164 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3166 mode
= TYPE_MODE (vectype
);
3168 if (!can_vec_perm_p (mode
, false, NULL
))
3170 if (dump_enabled_p ())
3172 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3173 "no vect permute for ");
3174 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3175 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3180 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3181 same size as the vector element being permuted. */
3182 mask_element_type
= lang_hooks
.types
.type_for_mode
3183 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3184 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3185 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3186 mask
= XALLOCAVEC (unsigned char, nunits
);
3187 unroll_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3189 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3190 unrolling factor. */
3192 = (STMT_VINFO_GROUP_SIZE (stmt_info
)
3193 * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
)
3194 + nunits
- 1) / nunits
;
3195 if (orig_vec_stmts_num
== 1)
3196 only_one_vec
= true;
3198 /* Number of copies is determined by the final vectorization factor
3199 relatively to SLP_NODE_INSTANCE unrolling factor. */
3200 ncopies
= vf
/ SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance
);
3202 /* Generate permutation masks for every NODE. Number of masks for each NODE
3203 is equal to GROUP_SIZE.
3204 E.g., we have a group of three nodes with three loads from the same
3205 location in each node, and the vector size is 4. I.e., we have a
3206 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3207 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3208 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3211 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3212 The last mask is illegal since we assume two operands for permute
3213 operation, and the mask element values can't be outside that range.
3214 Hence, the last mask must be converted into {2,5,5,5}.
3215 For the first two permutations we need the first and the second input
3216 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3217 we need the second and the third vectors: {b1,c1,a2,b2} and
3222 vect_stmts_counter
= 0;
3224 first_vec_index
= vec_index
++;
3226 second_vec_index
= first_vec_index
;
3228 second_vec_index
= vec_index
++;
3230 for (j
= 0; j
< unroll_factor
; j
++)
3232 for (k
= 0; k
< group_size
; k
++)
3234 i
= SLP_TREE_LOAD_PERMUTATION (node
)[k
];
3235 first_mask_element
= i
+ j
* STMT_VINFO_GROUP_SIZE (stmt_info
);
3236 if (!vect_get_mask_element (stmt
, first_mask_element
, 0,
3237 nunits
, only_one_vec
, index
,
3238 mask
, ¤t_mask_element
,
3240 &number_of_mask_fixes
, &mask_fixed
,
3241 &needs_first_vector
))
3243 gcc_assert (current_mask_element
>= 0
3244 && current_mask_element
< 2 * nunits
);
3245 mask
[index
++] = current_mask_element
;
3247 if (index
== nunits
)
3250 if (!can_vec_perm_p (mode
, false, mask
))
3252 if (dump_enabled_p ())
3254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3256 "unsupported vect permute { ");
3257 for (i
= 0; i
< nunits
; ++i
)
3258 dump_printf (MSG_MISSED_OPTIMIZATION
, "%d ",
3260 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3268 tree mask_vec
, *mask_elts
;
3269 mask_elts
= XALLOCAVEC (tree
, nunits
);
3270 for (l
= 0; l
< nunits
; ++l
)
3271 mask_elts
[l
] = build_int_cst (mask_element_type
,
3273 mask_vec
= build_vector (mask_type
, mask_elts
);
3275 if (need_next_vector
)
3277 first_vec_index
= second_vec_index
;
3278 second_vec_index
= vec_index
;
3281 vect_create_mask_and_perm (stmt
,
3282 mask_vec
, first_vec_index
, second_vec_index
,
3283 gsi
, node
, vectype
, dr_chain
,
3284 ncopies
, vect_stmts_counter
++);
3296 /* Vectorize SLP instance tree in postorder. */
3299 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3300 unsigned int vectorization_factor
)
3303 bool grouped_store
, is_store
;
3304 gimple_stmt_iterator si
;
3305 stmt_vec_info stmt_info
;
3306 unsigned int vec_stmts_size
, nunits
, group_size
;
3314 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3315 vect_schedule_slp_instance (child
, instance
, vectorization_factor
);
3317 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3318 stmt_info
= vinfo_for_stmt (stmt
);
3320 /* VECTYPE is the type of the destination. */
3321 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3322 nunits
= (unsigned int) TYPE_VECTOR_SUBPARTS (vectype
);
3323 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3325 /* For each SLP instance calculate number of vector stmts to be created
3326 for the scalar stmts in each node of the SLP tree. Number of vector
3327 elements in one vector iteration is the number of scalar elements in
3328 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
3330 Unless this is a SLP reduction in which case the number of vector
3331 stmts is equal to the number of vector stmts of the children. */
3332 if (GROUP_FIRST_ELEMENT (stmt_info
)
3333 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3334 vec_stmts_size
= SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
3336 vec_stmts_size
= (vectorization_factor
* group_size
) / nunits
;
3338 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3340 SLP_TREE_VEC_STMTS (node
).create (vec_stmts_size
);
3341 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = vec_stmts_size
;
3344 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_NOTE
,vect_location
,
3347 "------>vectorizing SLP node starting from: ");
3348 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3349 dump_printf (MSG_NOTE
, "\n");
3352 /* Vectorized stmts go before the last scalar stmt which is where
3353 all uses are ready. */
3354 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3356 /* Mark the first element of the reduction chain as reduction to properly
3357 transform the node. In the analysis phase only the last element of the
3358 chain is marked as reduction. */
3359 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3360 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3362 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3363 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3366 /* Handle two-operation SLP nodes by vectorizing the group with
3367 both operations and then performing a merge. */
3368 if (SLP_TREE_TWO_OPERATORS (node
))
3370 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3371 enum tree_code ocode
;
3373 unsigned char *mask
= XALLOCAVEC (unsigned char, group_size
);
3374 bool allsame
= true;
3375 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3376 if (gimple_assign_rhs_code (ostmt
) != code0
)
3380 ocode
= gimple_assign_rhs_code (ostmt
);
3389 tree tmask
= NULL_TREE
;
3390 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3391 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3392 SLP_TREE_VEC_STMTS (node
).truncate (0);
3393 gimple_assign_set_rhs_code (stmt
, ocode
);
3394 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3395 gimple_assign_set_rhs_code (stmt
, code0
);
3396 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3397 SLP_TREE_VEC_STMTS (node
).truncate (0);
3398 tree meltype
= build_nonstandard_integer_type
3399 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
))), 1);
3400 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3402 for (j
= 0; j
< v0
.length (); ++j
)
3404 tree
*melts
= XALLOCAVEC (tree
, TYPE_VECTOR_SUBPARTS (vectype
));
3405 for (l
= 0; l
< TYPE_VECTOR_SUBPARTS (vectype
); ++l
)
3407 if (k
>= group_size
)
3409 melts
[l
] = build_int_cst
3410 (meltype
, mask
[k
++] * TYPE_VECTOR_SUBPARTS (vectype
) + l
);
3412 tmask
= build_vector (mvectype
, melts
);
3414 /* ??? Not all targets support a VEC_PERM_EXPR with a
3415 constant mask that would translate to a vec_merge RTX
3416 (with their vec_perm_const_ok). We can either not
3417 vectorize in that case or let veclower do its job.
3418 Unfortunately that isn't too great and at least for
3419 plus/minus we'd eventually like to match targets
3420 vector addsub instructions. */
3422 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3424 gimple_assign_lhs (v0
[j
]),
3425 gimple_assign_lhs (v1
[j
]), tmask
);
3426 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3427 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3434 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3438 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3439 For loop vectorization this is done in vectorizable_call, but for SLP
3440 it needs to be deferred until end of vect_schedule_slp, because multiple
3441 SLP instances may refer to the same scalar stmt. */
3444 vect_remove_slp_scalar_calls (slp_tree node
)
3446 gimple
*stmt
, *new_stmt
;
3447 gimple_stmt_iterator gsi
;
3451 stmt_vec_info stmt_info
;
3456 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3457 vect_remove_slp_scalar_calls (child
);
3459 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3461 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3463 stmt_info
= vinfo_for_stmt (stmt
);
3464 if (stmt_info
== NULL
3465 || is_pattern_stmt_p (stmt_info
)
3466 || !PURE_SLP_STMT (stmt_info
))
3468 lhs
= gimple_call_lhs (stmt
);
3469 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3470 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3471 set_vinfo_for_stmt (stmt
, NULL
);
3472 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3473 gsi
= gsi_for_stmt (stmt
);
3474 gsi_replace (&gsi
, new_stmt
, false);
3475 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3479 /* Generate vector code for all SLP instances in the loop/basic block. */
3482 vect_schedule_slp (vec_info
*vinfo
)
3484 vec
<slp_instance
> slp_instances
;
3485 slp_instance instance
;
3487 bool is_store
= false;
3489 slp_instances
= vinfo
->slp_instances
;
3490 if (is_a
<loop_vec_info
> (vinfo
))
3491 vf
= as_a
<loop_vec_info
> (vinfo
)->vectorization_factor
;
3495 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3497 /* Schedule the tree of INSTANCE. */
3498 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
3500 if (dump_enabled_p ())
3501 dump_printf_loc (MSG_NOTE
, vect_location
,
3502 "vectorizing stmts using SLP.\n");
3505 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3507 slp_tree root
= SLP_INSTANCE_TREE (instance
);
3510 gimple_stmt_iterator gsi
;
3512 /* Remove scalar call stmts. Do not do this for basic-block
3513 vectorization as not all uses may be vectorized.
3514 ??? Why should this be necessary? DCE should be able to
3515 remove the stmts itself.
3516 ??? For BB vectorization we can as well remove scalar
3517 stmts starting from the SLP tree root if they have no
3519 if (is_a
<loop_vec_info
> (vinfo
))
3520 vect_remove_slp_scalar_calls (root
);
3522 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
3523 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
3525 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
3528 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
3529 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
3530 /* Free the attached stmt_vec_info and remove the stmt. */
3531 gsi
= gsi_for_stmt (store
);
3532 unlink_stmt_vdef (store
);
3533 gsi_remove (&gsi
, true);
3534 release_defs (store
);
3535 free_stmt_vec_info (store
);
3543 /* Vectorize the basic block. */
3546 vect_slp_transform_bb (basic_block bb
)
3548 bb_vec_info bb_vinfo
= vec_info_for_bb (bb
);
3549 gimple_stmt_iterator si
;
3551 gcc_assert (bb_vinfo
);
3553 if (dump_enabled_p ())
3554 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB\n");
3556 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
3558 gimple
*stmt
= gsi_stmt (si
);
3559 stmt_vec_info stmt_info
;
3561 if (dump_enabled_p ())
3563 dump_printf_loc (MSG_NOTE
, vect_location
,
3564 "------>SLPing statement: ");
3565 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3566 dump_printf (MSG_NOTE
, "\n");
3569 stmt_info
= vinfo_for_stmt (stmt
);
3570 gcc_assert (stmt_info
);
3572 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3573 if (STMT_SLP_TYPE (stmt_info
))
3575 vect_schedule_slp (bb_vinfo
);
3580 if (dump_enabled_p ())
3581 dump_printf_loc (MSG_NOTE
, vect_location
,
3582 "BASIC BLOCK VECTORIZED\n");
3584 destroy_bb_vec_info (bb_vinfo
);