1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
48 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
51 vect_free_slp_tree (slp_tree node
)
56 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
57 vect_free_slp_tree (child
);
60 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
61 /* After transform some stmts are removed and thus their vinfo is gone. */
62 if (vinfo_for_stmt (stmt
))
64 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) > 0);
65 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))--;
68 SLP_TREE_CHILDREN (node
).release ();
69 SLP_TREE_SCALAR_STMTS (node
).release ();
70 SLP_TREE_VEC_STMTS (node
).release ();
71 SLP_TREE_LOAD_PERMUTATION (node
).release ();
77 /* Free the memory allocated for the SLP instance. */
80 vect_free_slp_instance (slp_instance instance
)
82 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
83 SLP_INSTANCE_LOADS (instance
).release ();
88 /* Create an SLP node for SCALAR_STMTS. */
91 vect_create_new_slp_node (vec
<gimple
*> scalar_stmts
)
94 gimple
*stmt
= scalar_stmts
[0];
97 if (is_gimple_call (stmt
))
98 nops
= gimple_call_num_args (stmt
);
99 else if (is_gimple_assign (stmt
))
101 nops
= gimple_num_ops (stmt
) - 1;
102 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
105 else if (gimple_code (stmt
) == GIMPLE_PHI
)
110 node
= XNEW (struct _slp_tree
);
111 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
112 SLP_TREE_VEC_STMTS (node
).create (0);
113 SLP_TREE_CHILDREN (node
).create (nops
);
114 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
115 SLP_TREE_TWO_OPERATORS (node
) = false;
116 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
119 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt
)
120 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
))++;
126 /* This structure is used in creation of an SLP tree. Each instance
127 corresponds to the same operand in a group of scalar stmts in an SLP
129 typedef struct _slp_oprnd_info
131 /* Def-stmts for the operands. */
132 vec
<gimple
*> def_stmts
;
133 /* Information about the first statement, its vector def-type, type, the
134 operand itself in case it's constant, and an indication if it's a pattern
137 enum vect_def_type first_dt
;
143 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
145 static vec
<slp_oprnd_info
>
146 vect_create_oprnd_info (int nops
, int group_size
)
149 slp_oprnd_info oprnd_info
;
150 vec
<slp_oprnd_info
> oprnds_info
;
152 oprnds_info
.create (nops
);
153 for (i
= 0; i
< nops
; i
++)
155 oprnd_info
= XNEW (struct _slp_oprnd_info
);
156 oprnd_info
->def_stmts
.create (group_size
);
157 oprnd_info
->first_dt
= vect_uninitialized_def
;
158 oprnd_info
->first_op_type
= NULL_TREE
;
159 oprnd_info
->first_pattern
= false;
160 oprnd_info
->second_pattern
= false;
161 oprnds_info
.quick_push (oprnd_info
);
168 /* Free operands info. */
171 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
174 slp_oprnd_info oprnd_info
;
176 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
178 oprnd_info
->def_stmts
.release ();
179 XDELETE (oprnd_info
);
182 oprnds_info
.release ();
186 /* Find the place of the data-ref in STMT in the interleaving chain that starts
187 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
190 vect_get_place_in_interleaving_chain (gimple
*stmt
, gimple
*first_stmt
)
192 gimple
*next_stmt
= first_stmt
;
195 if (first_stmt
!= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
200 if (next_stmt
== stmt
)
202 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
204 result
+= GROUP_GAP (vinfo_for_stmt (next_stmt
));
212 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
213 they are of a valid type and that they match the defs of the first stmt of
214 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
215 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
216 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
217 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
218 and code of comparison needs to be inverted. If there is any operand swap
219 in this function, *SWAP is set to non-zero value.
220 If there was a fatal error return -1; if the error could be corrected by
221 swapping operands of father node of this one, return 1; if everything is
225 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
226 gimple
*stmt
, unsigned stmt_num
,
227 vec
<slp_oprnd_info
> *oprnds_info
)
230 unsigned int i
, number_of_oprnds
;
232 enum vect_def_type dt
= vect_uninitialized_def
;
233 bool pattern
= false;
234 slp_oprnd_info oprnd_info
;
235 int first_op_idx
= 1;
236 bool commutative
= false;
237 bool first_op_cond
= false;
238 bool first
= stmt_num
== 0;
239 bool second
= stmt_num
== 1;
241 if (is_gimple_call (stmt
))
243 number_of_oprnds
= gimple_call_num_args (stmt
);
246 else if (is_gimple_assign (stmt
))
248 enum tree_code code
= gimple_assign_rhs_code (stmt
);
249 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
250 /* Swap can only be done for cond_expr if asked to, otherwise we
251 could result in different comparison code to the first stmt. */
252 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
253 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
255 first_op_cond
= true;
259 commutative
= commutative_tree_code (code
);
264 bool swapped
= (*swap
!= 0);
265 gcc_assert (!swapped
|| first_op_cond
);
266 for (i
= 0; i
< number_of_oprnds
; i
++)
271 /* Map indicating how operands of cond_expr should be swapped. */
272 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
273 int *map
= maps
[*swap
];
276 oprnd
= TREE_OPERAND (gimple_op (stmt
, first_op_idx
), map
[i
]);
278 oprnd
= gimple_op (stmt
, map
[i
]);
281 oprnd
= gimple_op (stmt
, first_op_idx
+ (swapped
? !i
: i
));
283 oprnd_info
= (*oprnds_info
)[i
];
285 if (!vect_is_simple_use (oprnd
, vinfo
, &def_stmt
, &dt
))
287 if (dump_enabled_p ())
289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
290 "Build SLP failed: can't analyze def for ");
291 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
298 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
299 from the pattern. Check that all the stmts of the node are in the
301 if (def_stmt
&& gimple_bb (def_stmt
)
302 && vect_stmt_in_region_p (vinfo
, def_stmt
)
303 && vinfo_for_stmt (def_stmt
)
304 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt
))
305 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt
))
306 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt
)))
309 if (!first
&& !oprnd_info
->first_pattern
310 /* Allow different pattern state for the defs of the
311 first stmt in reduction chains. */
312 && (oprnd_info
->first_dt
!= vect_reduction_def
313 || (!second
&& !oprnd_info
->second_pattern
)))
323 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "Build SLP failed: some of the stmts"
327 " are in a pattern, and others are not ");
328 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
329 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
335 def_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
336 dt
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt
));
338 if (dt
== vect_unknown_def_type
)
340 if (dump_enabled_p ())
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
342 "Unsupported pattern.\n");
346 switch (gimple_code (def_stmt
))
353 if (dump_enabled_p ())
354 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
355 "unsupported defining stmt:\n");
361 oprnd_info
->second_pattern
= pattern
;
365 oprnd_info
->first_dt
= dt
;
366 oprnd_info
->first_pattern
= pattern
;
367 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
371 /* Not first stmt of the group, check that the def-stmt/s match
372 the def-stmt/s of the first stmt. Allow different definition
373 types for reduction chains: the first stmt must be a
374 vect_reduction_def (a phi node), and the rest
375 vect_internal_def. */
376 if (((oprnd_info
->first_dt
!= dt
377 && !(oprnd_info
->first_dt
== vect_reduction_def
378 && dt
== vect_internal_def
)
379 && !((oprnd_info
->first_dt
== vect_external_def
380 || oprnd_info
->first_dt
== vect_constant_def
)
381 && (dt
== vect_external_def
382 || dt
== vect_constant_def
)))
383 || !types_compatible_p (oprnd_info
->first_op_type
,
386 /* Try swapping operands if we got a mismatch. */
395 if (dump_enabled_p ())
396 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
397 "Build SLP failed: different types\n");
403 /* Check the types of the definitions. */
406 case vect_constant_def
:
407 case vect_external_def
:
408 /* We must already have set a vector size by now. */
409 gcc_checking_assert (maybe_ne (current_vector_size
, 0U));
410 if (!current_vector_size
.is_constant ())
412 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
415 "Build SLP failed: invalid type of def "
416 "for variable-length SLP ");
417 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
418 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
424 case vect_reduction_def
:
425 case vect_induction_def
:
426 case vect_internal_def
:
427 oprnd_info
->def_stmts
.quick_push (def_stmt
);
431 /* FORNOW: Not supported. */
432 if (dump_enabled_p ())
434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
435 "Build SLP failed: illegal type of def ");
436 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, oprnd
);
437 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
447 /* If there are already uses of this stmt in a SLP instance then
448 we've committed to the operand order and can't swap it. */
449 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt
)) != 0)
451 if (dump_enabled_p ())
453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
454 "Build SLP failed: cannot swap operands of "
456 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
463 tree cond
= gimple_assign_rhs1 (stmt
);
464 enum tree_code code
= TREE_CODE (cond
);
469 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
470 &TREE_OPERAND (cond
, 1));
471 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
476 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
477 gimple_assign_rhs3_ptr (stmt
));
478 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
479 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
480 gcc_assert (code
!= ERROR_MARK
);
481 TREE_SET_CODE (cond
, code
);
485 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
486 gimple_assign_rhs2_ptr (stmt
));
487 if (dump_enabled_p ())
489 dump_printf_loc (MSG_NOTE
, vect_location
,
490 "swapped operands to match def types in ");
491 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
499 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
500 caller's attempt to find the vector type in STMT with the narrowest
501 element type. Return true if VECTYPE is nonnull and if it is valid
502 for VINFO. When returning true, update MAX_NUNITS to reflect the
503 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are
504 as for vect_build_slp_tree. */
507 vect_record_max_nunits (vec_info
*vinfo
, gimple
*stmt
, unsigned int group_size
,
508 tree vectype
, poly_uint64
*max_nunits
)
512 if (dump_enabled_p ())
514 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
515 "Build SLP failed: unsupported data-type in ");
516 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
517 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
519 /* Fatal mismatch. */
523 /* If populating the vector type requires unrolling then fail
524 before adjusting *max_nunits for basic-block vectorization. */
525 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
526 unsigned HOST_WIDE_INT const_nunits
;
527 if (is_a
<bb_vec_info
> (vinfo
)
528 && (!nunits
.is_constant (&const_nunits
)
529 || const_nunits
> group_size
))
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
532 "Build SLP failed: unrolling required "
533 "in basic block SLP\n");
534 /* Fatal mismatch. */
538 /* In case of multiple types we need to detect the smallest type. */
539 vect_update_max_nunits (max_nunits
, vectype
);
543 /* Verify if the scalar stmts STMTS are isomorphic, require data
544 permutation or are of unsupported types of operation. Return
545 true if they are, otherwise return false and indicate in *MATCHES
546 which stmts are not isomorphic to the first one. If MATCHES[0]
547 is false then this indicates the comparison could not be
548 carried out or the stmts will never be vectorized by SLP.
550 Note COND_EXPR is possibly ismorphic to another one after swapping its
551 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
552 the first stmt by swapping the two operands of comparison; set SWAP[i]
553 to 2 if stmt I is isormorphic to the first stmt by inverting the code
554 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
555 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
558 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
559 vec
<gimple
*> stmts
, unsigned int group_size
,
560 unsigned nops
, poly_uint64
*max_nunits
,
561 bool *matches
, bool *two_operators
)
564 gimple
*first_stmt
= stmts
[0], *stmt
= stmts
[0];
565 enum tree_code first_stmt_code
= ERROR_MARK
;
566 enum tree_code alt_stmt_code
= ERROR_MARK
;
567 enum tree_code rhs_code
= ERROR_MARK
;
568 enum tree_code first_cond_code
= ERROR_MARK
;
570 bool need_same_oprnds
= false;
571 tree vectype
= NULL_TREE
, scalar_type
, first_op1
= NULL_TREE
;
574 machine_mode optab_op2_mode
;
575 machine_mode vec_mode
;
577 gimple
*first_load
= NULL
, *prev_first_load
= NULL
;
579 /* For every stmt in NODE find its def stmt/s. */
580 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
585 if (dump_enabled_p ())
587 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for ");
588 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
591 /* Fail to vectorize statements marked as unvectorizable. */
592 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)))
594 if (dump_enabled_p ())
596 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
597 "Build SLP failed: unvectorizable statement ");
598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
600 /* Fatal mismatch. */
605 lhs
= gimple_get_lhs (stmt
);
606 if (lhs
== NULL_TREE
)
608 if (dump_enabled_p ())
610 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
611 "Build SLP failed: not GIMPLE_ASSIGN nor "
613 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
615 /* Fatal mismatch. */
620 scalar_type
= vect_get_smallest_scalar_type (stmt
, &dummy
, &dummy
);
621 vectype
= get_vectype_for_scalar_type (scalar_type
);
622 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
625 /* Fatal mismatch. */
630 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
632 rhs_code
= CALL_EXPR
;
633 if (gimple_call_internal_p (call_stmt
)
634 || gimple_call_tail_p (call_stmt
)
635 || gimple_call_noreturn_p (call_stmt
)
636 || !gimple_call_nothrow_p (call_stmt
)
637 || gimple_call_chain (call_stmt
))
639 if (dump_enabled_p ())
641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
642 "Build SLP failed: unsupported call type ");
643 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
646 /* Fatal mismatch. */
652 rhs_code
= gimple_assign_rhs_code (stmt
);
654 /* Check the operation. */
657 first_stmt_code
= rhs_code
;
659 /* Shift arguments should be equal in all the packed stmts for a
660 vector shift with scalar shift operand. */
661 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
662 || rhs_code
== LROTATE_EXPR
663 || rhs_code
== RROTATE_EXPR
)
665 vec_mode
= TYPE_MODE (vectype
);
667 /* First see if we have a vector/vector shift. */
668 optab
= optab_for_tree_code (rhs_code
, vectype
,
672 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
674 /* No vector/vector shift, try for a vector/scalar shift. */
675 optab
= optab_for_tree_code (rhs_code
, vectype
,
680 if (dump_enabled_p ())
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
682 "Build SLP failed: no optab.\n");
683 /* Fatal mismatch. */
687 icode
= (int) optab_handler (optab
, vec_mode
);
688 if (icode
== CODE_FOR_nothing
)
690 if (dump_enabled_p ())
691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
693 "op not supported by target.\n");
694 /* Fatal mismatch. */
698 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
699 if (!VECTOR_MODE_P (optab_op2_mode
))
701 need_same_oprnds
= true;
702 first_op1
= gimple_assign_rhs2 (stmt
);
706 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
708 need_same_oprnds
= true;
709 first_op1
= gimple_assign_rhs2 (stmt
);
714 if (first_stmt_code
!= rhs_code
715 && alt_stmt_code
== ERROR_MARK
)
716 alt_stmt_code
= rhs_code
;
717 if (first_stmt_code
!= rhs_code
718 && (first_stmt_code
!= IMAGPART_EXPR
719 || rhs_code
!= REALPART_EXPR
)
720 && (first_stmt_code
!= REALPART_EXPR
721 || rhs_code
!= IMAGPART_EXPR
)
722 /* Handle mismatches in plus/minus by computing both
723 and merging the results. */
724 && !((first_stmt_code
== PLUS_EXPR
725 || first_stmt_code
== MINUS_EXPR
)
726 && (alt_stmt_code
== PLUS_EXPR
727 || alt_stmt_code
== MINUS_EXPR
)
728 && rhs_code
== alt_stmt_code
)
729 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
730 && (first_stmt_code
== ARRAY_REF
731 || first_stmt_code
== BIT_FIELD_REF
732 || first_stmt_code
== INDIRECT_REF
733 || first_stmt_code
== COMPONENT_REF
734 || first_stmt_code
== MEM_REF
)))
736 if (dump_enabled_p ())
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
739 "Build SLP failed: different operation "
741 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
744 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
752 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
754 if (dump_enabled_p ())
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
757 "Build SLP failed: different shift "
759 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
765 if (rhs_code
== CALL_EXPR
)
767 gimple
*first_stmt
= stmts
[0];
768 if (gimple_call_num_args (stmt
) != nops
769 || !operand_equal_p (gimple_call_fn (first_stmt
),
770 gimple_call_fn (stmt
), 0)
771 || gimple_call_fntype (first_stmt
)
772 != gimple_call_fntype (stmt
))
774 if (dump_enabled_p ())
776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
777 "Build SLP failed: different calls in ");
778 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
787 /* Grouped store or load. */
788 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
790 if (REFERENCE_CLASS_P (lhs
))
798 first_load
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
801 /* Check that there are no loads from different interleaving
802 chains in the same node. */
803 if (prev_first_load
!= first_load
)
805 if (dump_enabled_p ())
807 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
809 "Build SLP failed: different "
810 "interleaving chains in one node ");
811 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
819 prev_first_load
= first_load
;
821 } /* Grouped access. */
824 if (TREE_CODE_CLASS (rhs_code
) == tcc_reference
)
826 /* Not grouped load. */
827 if (dump_enabled_p ())
829 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
830 "Build SLP failed: not grouped load ");
831 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
834 /* FORNOW: Not grouped loads are not supported. */
835 /* Fatal mismatch. */
840 /* Not memory operation. */
841 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
842 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
843 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
844 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
845 && rhs_code
!= CALL_EXPR
)
847 if (dump_enabled_p ())
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
850 "Build SLP failed: operation");
851 dump_printf (MSG_MISSED_OPTIMIZATION
, " unsupported ");
852 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
854 /* Fatal mismatch. */
859 if (rhs_code
== COND_EXPR
)
861 tree cond_expr
= gimple_assign_rhs1 (stmt
);
862 enum tree_code cond_code
= TREE_CODE (cond_expr
);
863 enum tree_code swap_code
= ERROR_MARK
;
864 enum tree_code invert_code
= ERROR_MARK
;
867 first_cond_code
= TREE_CODE (cond_expr
);
868 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
870 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
871 swap_code
= swap_tree_comparison (cond_code
);
872 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
875 if (first_cond_code
== cond_code
)
877 /* Isomorphic can be achieved by swapping. */
878 else if (first_cond_code
== swap_code
)
880 /* Isomorphic can be achieved by inverting. */
881 else if (first_cond_code
== invert_code
)
885 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
888 "Build SLP failed: different"
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
902 for (i
= 0; i
< group_size
; ++i
)
906 /* If we allowed a two-operation SLP node verify the target can cope
907 with the permute we are going to use. */
908 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
909 if (alt_stmt_code
!= ERROR_MARK
910 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
912 unsigned HOST_WIDE_INT count
;
913 if (!nunits
.is_constant (&count
))
915 if (dump_enabled_p ())
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
917 "Build SLP failed: different operations "
918 "not allowed with variable-length SLP.\n");
921 vec_perm_builder
sel (count
, count
, 1);
922 for (i
= 0; i
< count
; ++i
)
924 unsigned int elt
= i
;
925 if (gimple_assign_rhs_code (stmts
[i
% group_size
]) == alt_stmt_code
)
927 sel
.quick_push (elt
);
929 vec_perm_indices
indices (sel
, 2, count
);
930 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
932 for (i
= 0; i
< group_size
; ++i
)
933 if (gimple_assign_rhs_code (stmts
[i
]) == alt_stmt_code
)
936 if (dump_enabled_p ())
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
939 "Build SLP failed: different operation "
941 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
945 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
951 *two_operators
= true;
957 /* Traits for the hash_set to record failed SLP builds for a stmt set.
958 Note we never remove apart from at destruction time so we do not
959 need a special value for deleted that differs from empty. */
962 typedef vec
<gimple
*> value_type
;
963 typedef vec
<gimple
*> compare_type
;
964 static inline hashval_t
hash (value_type
);
965 static inline bool equal (value_type existing
, value_type candidate
);
966 static inline bool is_empty (value_type x
) { return !x
.exists (); }
967 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
968 static inline void mark_empty (value_type
&x
) { x
.release (); }
969 static inline void mark_deleted (value_type
&x
) { x
.release (); }
970 static inline void remove (value_type
&x
) { x
.release (); }
973 bst_traits::hash (value_type x
)
976 for (unsigned i
= 0; i
< x
.length (); ++i
)
977 h
.add_int (gimple_uid (x
[i
]));
981 bst_traits::equal (value_type existing
, value_type candidate
)
983 if (existing
.length () != candidate
.length ())
985 for (unsigned i
= 0; i
< existing
.length (); ++i
)
986 if (existing
[i
] != candidate
[i
])
991 typedef hash_set
<vec
<gimple
*>, bst_traits
> scalar_stmts_set_t
;
992 static scalar_stmts_set_t
*bst_fail
;
995 vect_build_slp_tree_2 (vec_info
*vinfo
,
996 vec
<gimple
*> stmts
, unsigned int group_size
,
997 poly_uint64
*max_nunits
,
998 vec
<slp_tree
> *loads
,
999 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1000 unsigned max_tree_size
);
1003 vect_build_slp_tree (vec_info
*vinfo
,
1004 vec
<gimple
*> stmts
, unsigned int group_size
,
1005 poly_uint64
*max_nunits
, vec
<slp_tree
> *loads
,
1006 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1007 unsigned max_tree_size
)
1009 if (bst_fail
->contains (stmts
))
1011 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1012 loads
, matches
, npermutes
, tree_size
,
1014 /* When SLP build fails for stmts record this, otherwise SLP build
1015 can be exponential in time when we allow to construct parts from
1016 scalars, see PR81723. */
1020 x
.create (stmts
.length ());
1027 /* Recursively build an SLP tree starting from NODE.
1028 Fail (and return a value not equal to zero) if def-stmts are not
1029 isomorphic, require data permutation or are of unsupported types of
1030 operation. Otherwise, return 0.
1031 The value returned is the depth in the SLP tree where a mismatch
1035 vect_build_slp_tree_2 (vec_info
*vinfo
,
1036 vec
<gimple
*> stmts
, unsigned int group_size
,
1037 poly_uint64
*max_nunits
,
1038 vec
<slp_tree
> *loads
,
1039 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1040 unsigned max_tree_size
)
1042 unsigned nops
, i
, this_tree_size
= 0;
1043 poly_uint64 this_max_nunits
= *max_nunits
;
1050 if (is_gimple_call (stmt
))
1051 nops
= gimple_call_num_args (stmt
);
1052 else if (is_gimple_assign (stmt
))
1054 nops
= gimple_num_ops (stmt
) - 1;
1055 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1058 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1063 /* If the SLP node is a PHI (induction or reduction), terminate
1065 if (gimple_code (stmt
) == GIMPLE_PHI
)
1067 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1068 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
1069 if (!vect_record_max_nunits (vinfo
, stmt
, group_size
, vectype
,
1073 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
));
1074 /* Induction from different IVs is not supported. */
1075 if (def_type
== vect_induction_def
)
1077 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1078 if (stmt
!= stmts
[0])
1083 /* Else def types have to match. */
1084 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1086 /* But for reduction chains only check on the first stmt. */
1087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
1088 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) != stmt
)
1090 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) != def_type
)
1094 node
= vect_create_new_slp_node (stmts
);
1099 bool two_operators
= false;
1100 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1101 if (!vect_build_slp_tree_1 (vinfo
, swap
,
1102 stmts
, group_size
, nops
,
1103 &this_max_nunits
, matches
, &two_operators
))
1106 /* If the SLP node is a load, terminate the recursion. */
1107 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
1108 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))))
1110 *max_nunits
= this_max_nunits
;
1111 node
= vect_create_new_slp_node (stmts
);
1112 loads
->safe_push (node
);
1116 /* Get at the operands, verifying they are compatible. */
1117 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1118 slp_oprnd_info oprnd_info
;
1119 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
1121 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1122 stmt
, i
, &oprnds_info
);
1124 matches
[(res
== -1) ? 0 : i
] = false;
1128 for (i
= 0; i
< group_size
; ++i
)
1131 vect_free_oprnd_info (oprnds_info
);
1135 auto_vec
<slp_tree
, 4> children
;
1136 auto_vec
<slp_tree
> this_loads
;
1141 max_tree_size
-= *tree_size
;
1143 /* Create SLP_TREE nodes for the definition node/s. */
1144 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1147 unsigned old_nloads
= this_loads
.length ();
1148 unsigned old_tree_size
= this_tree_size
;
1151 if (oprnd_info
->first_dt
!= vect_internal_def
1152 && oprnd_info
->first_dt
!= vect_reduction_def
1153 && oprnd_info
->first_dt
!= vect_induction_def
)
1156 if (++this_tree_size
> max_tree_size
)
1158 FOR_EACH_VEC_ELT (children
, j
, child
)
1159 vect_free_slp_tree (child
);
1160 vect_free_oprnd_info (oprnds_info
);
1164 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1165 group_size
, &this_max_nunits
,
1166 &this_loads
, matches
, npermutes
,
1168 max_tree_size
)) != NULL
)
1170 /* If we have all children of child built up from scalars then just
1171 throw that away and build it up this node from scalars. */
1172 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1173 /* ??? Rejecting patterns this way doesn't work. We'd have to
1174 do extra work to cancel the pattern so the uses see the
1176 && !is_pattern_stmt_p
1177 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1179 slp_tree grandchild
;
1181 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1182 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1187 this_loads
.truncate (old_nloads
);
1188 this_tree_size
= old_tree_size
;
1189 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1190 vect_free_slp_tree (grandchild
);
1191 SLP_TREE_CHILDREN (child
).truncate (0);
1193 dump_printf_loc (MSG_NOTE
, vect_location
,
1194 "Building parent vector operands from "
1195 "scalars instead\n");
1196 oprnd_info
->def_stmts
= vNULL
;
1197 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1198 children
.safe_push (child
);
1203 oprnd_info
->def_stmts
= vNULL
;
1204 children
.safe_push (child
);
1208 /* If the SLP build failed fatally and we analyze a basic-block
1209 simply treat nodes we fail to build as externally defined
1210 (and thus build vectors from the scalar defs).
1211 The cost model will reject outright expensive cases.
1212 ??? This doesn't treat cases where permutation ultimatively
1213 fails (or we don't try permutation below). Ideally we'd
1214 even compute a permutation that will end up with the maximum
1216 if (is_a
<bb_vec_info
> (vinfo
)
1218 /* ??? Rejecting patterns this way doesn't work. We'd have to
1219 do extra work to cancel the pattern so the uses see the
1221 && !is_pattern_stmt_p (vinfo_for_stmt (stmt
)))
1223 dump_printf_loc (MSG_NOTE
, vect_location
,
1224 "Building vector operands from scalars\n");
1225 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1226 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1227 children
.safe_push (child
);
1228 oprnd_info
->def_stmts
= vNULL
;
1232 /* If the SLP build for operand zero failed and operand zero
1233 and one can be commutated try that for the scalar stmts
1234 that failed the match. */
1236 /* A first scalar stmt mismatch signals a fatal mismatch. */
1238 /* ??? For COND_EXPRs we can swap the comparison operands
1239 as well as the arms under some constraints. */
1241 && oprnds_info
[1]->first_dt
== vect_internal_def
1242 && is_gimple_assign (stmt
)
1243 && commutative_tree_code (gimple_assign_rhs_code (stmt
))
1245 /* Do so only if the number of not successful permutes was nor more
1246 than a cut-ff as re-trying the recursive match on
1247 possibly each level of the tree would expose exponential
1251 /* Verify if we can safely swap or if we committed to a specific
1252 operand order already. */
1253 for (j
= 0; j
< group_size
; ++j
)
1256 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts
[j
]))))
1258 if (dump_enabled_p ())
1260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1261 "Build SLP failed: cannot swap operands "
1263 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
1269 /* Swap mismatched definition stmts. */
1270 dump_printf_loc (MSG_NOTE
, vect_location
,
1271 "Re-trying with swapped operands of stmts ");
1272 for (j
= 0; j
< group_size
; ++j
)
1275 std::swap (oprnds_info
[0]->def_stmts
[j
],
1276 oprnds_info
[1]->def_stmts
[j
]);
1277 dump_printf (MSG_NOTE
, "%d ", j
);
1279 dump_printf (MSG_NOTE
, "\n");
1280 /* And try again with scratch 'matches' ... */
1281 bool *tem
= XALLOCAVEC (bool, group_size
);
1282 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1283 group_size
, &this_max_nunits
,
1284 &this_loads
, tem
, npermutes
,
1286 max_tree_size
)) != NULL
)
1288 /* ... so if successful we can apply the operand swapping
1289 to the GIMPLE IL. This is necessary because for example
1290 vect_get_slp_defs uses operand indexes and thus expects
1291 canonical operand order. This is also necessary even
1292 if we end up building the operand from scalars as
1293 we'll continue to process swapped operand two. */
1294 for (j
= 0; j
< group_size
; ++j
)
1296 gimple
*stmt
= stmts
[j
];
1297 gimple_set_plf (stmt
, GF_PLF_1
, false);
1299 for (j
= 0; j
< group_size
; ++j
)
1301 gimple
*stmt
= stmts
[j
];
1304 /* Avoid swapping operands twice. */
1305 if (gimple_plf (stmt
, GF_PLF_1
))
1307 swap_ssa_operands (stmt
, gimple_assign_rhs1_ptr (stmt
),
1308 gimple_assign_rhs2_ptr (stmt
));
1309 gimple_set_plf (stmt
, GF_PLF_1
, true);
1312 /* Verify we swap all duplicates or none. */
1314 for (j
= 0; j
< group_size
; ++j
)
1316 gimple
*stmt
= stmts
[j
];
1317 gcc_assert (gimple_plf (stmt
, GF_PLF_1
) == ! matches
[j
]);
1320 /* If we have all children of child built up from scalars then
1321 just throw that away and build it up this node from scalars. */
1322 if (!SLP_TREE_CHILDREN (child
).is_empty ()
1323 /* ??? Rejecting patterns this way doesn't work. We'd have
1324 to do extra work to cancel the pattern so the uses see the
1326 && !is_pattern_stmt_p
1327 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0])))
1330 slp_tree grandchild
;
1332 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1333 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1338 this_loads
.truncate (old_nloads
);
1339 this_tree_size
= old_tree_size
;
1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1341 vect_free_slp_tree (grandchild
);
1342 SLP_TREE_CHILDREN (child
).truncate (0);
1344 dump_printf_loc (MSG_NOTE
, vect_location
,
1345 "Building parent vector operands from "
1346 "scalars instead\n");
1347 oprnd_info
->def_stmts
= vNULL
;
1348 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1349 children
.safe_push (child
);
1354 oprnd_info
->def_stmts
= vNULL
;
1355 children
.safe_push (child
);
1363 gcc_assert (child
== NULL
);
1364 FOR_EACH_VEC_ELT (children
, j
, child
)
1365 vect_free_slp_tree (child
);
1366 vect_free_oprnd_info (oprnds_info
);
1370 vect_free_oprnd_info (oprnds_info
);
1373 *tree_size
+= this_tree_size
;
1374 *max_nunits
= this_max_nunits
;
1375 loads
->safe_splice (this_loads
);
1377 node
= vect_create_new_slp_node (stmts
);
1378 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1379 SLP_TREE_CHILDREN (node
).splice (children
);
1383 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1386 vect_print_slp_tree (dump_flags_t dump_kind
, location_t loc
, slp_tree node
)
1392 dump_printf_loc (dump_kind
, loc
, "node%s\n",
1393 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1394 ? " (external)" : "");
1395 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1397 dump_printf_loc (dump_kind
, loc
, "\tstmt %d ", i
);
1398 dump_gimple_stmt (dump_kind
, TDF_SLIM
, stmt
, 0);
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1401 vect_print_slp_tree (dump_kind
, loc
, child
);
1405 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1406 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
1407 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
1408 stmts in NODE are to be marked. */
1411 vect_mark_slp_stmts (slp_tree node
, enum slp_vect_type mark
, int j
)
1417 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1420 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1421 if (j
< 0 || i
== j
)
1422 STMT_SLP_TYPE (vinfo_for_stmt (stmt
)) = mark
;
1424 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1425 vect_mark_slp_stmts (child
, mark
, j
);
1429 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1432 vect_mark_slp_stmts_relevant (slp_tree node
)
1436 stmt_vec_info stmt_info
;
1439 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1442 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1444 stmt_info
= vinfo_for_stmt (stmt
);
1445 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1446 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1447 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1450 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1451 vect_mark_slp_stmts_relevant (child
);
1455 /* Rearrange the statements of NODE according to PERMUTATION. */
1458 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1459 vec
<unsigned> permutation
)
1462 vec
<gimple
*> tmp_stmts
;
1466 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1467 vect_slp_rearrange_stmts (child
, group_size
, permutation
);
1469 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1470 tmp_stmts
.create (group_size
);
1471 tmp_stmts
.quick_grow_cleared (group_size
);
1473 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
1474 tmp_stmts
[permutation
[i
]] = stmt
;
1476 SLP_TREE_SCALAR_STMTS (node
).release ();
1477 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1481 /* Attempt to reorder stmts in a reduction chain so that we don't
1482 require any load permutation. Return true if that was possible,
1483 otherwise return false. */
1486 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1488 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1491 slp_tree node
, load
;
1493 /* Compare all the permutation sequences to the first one. We know
1494 that at least one load is permuted. */
1495 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1496 if (!node
->load_permutation
.exists ())
1498 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1500 if (!load
->load_permutation
.exists ())
1502 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1503 if (lidx
!= node
->load_permutation
[j
])
1507 /* Check that the loads in the first sequence are different and there
1508 are no gaps between them. */
1509 auto_sbitmap
load_index (group_size
);
1510 bitmap_clear (load_index
);
1511 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1513 if (lidx
>= group_size
)
1515 if (bitmap_bit_p (load_index
, lidx
))
1518 bitmap_set_bit (load_index
, lidx
);
1520 for (i
= 0; i
< group_size
; i
++)
1521 if (!bitmap_bit_p (load_index
, i
))
1524 /* This permutation is valid for reduction. Since the order of the
1525 statements in the nodes is not important unless they are memory
1526 accesses, we can rearrange the statements in all the nodes
1527 according to the order of the loads. */
1528 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1529 node
->load_permutation
);
1531 /* We are done, no actual permutations need to be generated. */
1532 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1533 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1535 gimple
*first_stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1536 first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt
));
1537 /* But we have to keep those permutations that are required because
1538 of handling of gaps. */
1539 if (known_eq (unrolling_factor
, 1U)
1540 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
1541 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0))
1542 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1544 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1545 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1551 /* Check if the required load permutations in the SLP instance
1552 SLP_INSTN are supported. */
1555 vect_supported_load_permutation_p (slp_instance slp_instn
)
1557 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1558 unsigned int i
, j
, k
, next
;
1560 gimple
*stmt
, *load
, *next_load
;
1562 if (dump_enabled_p ())
1564 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1565 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1566 if (node
->load_permutation
.exists ())
1567 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1568 dump_printf (MSG_NOTE
, "%d ", next
);
1570 for (k
= 0; k
< group_size
; ++k
)
1571 dump_printf (MSG_NOTE
, "%d ", k
);
1572 dump_printf (MSG_NOTE
, "\n");
1575 /* In case of reduction every load permutation is allowed, since the order
1576 of the reduction statements is not important (as opposed to the case of
1577 grouped stores). The only condition we need to check is that all the
1578 load nodes are of the same size and have the same permutation (and then
1579 rearrange all the nodes of the SLP instance according to this
1582 /* Check that all the load nodes are of the same size. */
1583 /* ??? Can't we assert this? */
1584 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1585 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1588 node
= SLP_INSTANCE_TREE (slp_instn
);
1589 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1591 /* Reduction (there are no data-refs in the root).
1592 In reduction chain the order of the loads is not important. */
1593 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
))
1594 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
1595 vect_attempt_slp_rearrange_stmts (slp_instn
);
1597 /* In basic block vectorization we allow any subchain of an interleaving
1599 FORNOW: not supported in loop SLP because of realignment compications. */
1600 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt
)))
1602 /* Check whether the loads in an instance form a subchain and thus
1603 no permutation is necessary. */
1604 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1606 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1608 bool subchain_p
= true;
1610 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load
)
1613 && (next_load
!= load
1614 || GROUP_GAP (vinfo_for_stmt (load
)) != 1))
1619 next_load
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (load
));
1622 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1625 stmt_vec_info group_info
1626 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1627 group_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info
));
1629 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info
));
1630 unsigned k
, maxk
= 0;
1631 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1634 /* In BB vectorization we may not actually use a loaded vector
1635 accessing elements in excess of GROUP_SIZE. */
1636 if (maxk
>= (GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1638 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1639 "BB vectorization with gaps at the end of "
1640 "a load is not supported\n");
1644 /* Verify the permutation can be generated. */
1647 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1648 1, slp_instn
, true, &n_perms
))
1650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1652 "unsupported load permutation\n");
1660 /* For loop vectorization verify we can generate the permutation. Be
1661 conservative about the vectorization factor, there are permutations
1662 that will use three vector inputs only starting from a specific factor
1663 and the vectorization factor is not yet final.
1664 ??? The SLP instance unrolling factor might not be the maximum one. */
1667 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1668 LOOP_VINFO_VECT_FACTOR
1669 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
))));
1670 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1671 if (node
->load_permutation
.exists ()
1672 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1673 slp_instn
, true, &n_perms
))
1680 /* Find the last store in SLP INSTANCE. */
1683 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1685 gimple
*last
= NULL
, *stmt
;
1687 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt
); i
++)
1689 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1690 if (is_pattern_stmt_p (stmt_vinfo
))
1691 last
= get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo
), last
);
1693 last
= get_later_stmt (stmt
, last
);
1699 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1702 vect_analyze_slp_cost_1 (slp_instance instance
, slp_tree node
,
1703 stmt_vector_for_cost
*prologue_cost_vec
,
1704 stmt_vector_for_cost
*body_cost_vec
,
1705 unsigned ncopies_for_cost
,
1706 scalar_stmts_set_t
* visited
)
1711 stmt_vec_info stmt_info
;
1714 /* If we already costed the exact same set of scalar stmts we're done.
1715 We share the generated vector stmts for those. */
1716 if (visited
->contains (SLP_TREE_SCALAR_STMTS (node
)))
1719 visited
->add (SLP_TREE_SCALAR_STMTS (node
).copy ());
1721 /* Recurse down the SLP tree. */
1722 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1723 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1724 vect_analyze_slp_cost_1 (instance
, child
, prologue_cost_vec
,
1725 body_cost_vec
, ncopies_for_cost
, visited
);
1727 /* Look at the first scalar stmt to determine the cost. */
1728 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1729 stmt_info
= vinfo_for_stmt (stmt
);
1730 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1732 vect_memory_access_type memory_access_type
1733 = (STMT_VINFO_STRIDED_P (stmt_info
)
1736 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info
)))
1737 vect_model_store_cost (stmt_info
, ncopies_for_cost
,
1738 memory_access_type
, vect_uninitialized_def
,
1739 node
, prologue_cost_vec
, body_cost_vec
);
1742 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)));
1743 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1745 /* If the load is permuted then the alignment is determined by
1746 the first group element not by the first scalar stmt DR. */
1747 stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1748 stmt_info
= vinfo_for_stmt (stmt
);
1749 /* Record the cost for the permutation. */
1751 vect_transform_slp_perm_load (node
, vNULL
, NULL
,
1752 ncopies_for_cost
, instance
, true,
1754 record_stmt_cost (body_cost_vec
, n_perms
, vec_perm
,
1755 stmt_info
, 0, vect_body
);
1756 unsigned assumed_nunits
1757 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info
));
1758 /* And adjust the number of loads performed. This handles
1759 redundancies as well as loads that are later dead. */
1760 auto_sbitmap
perm (GROUP_SIZE (stmt_info
));
1761 bitmap_clear (perm
);
1762 for (i
= 0; i
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++i
)
1763 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (node
)[i
]);
1764 ncopies_for_cost
= 0;
1765 bool load_seen
= false;
1766 for (i
= 0; i
< GROUP_SIZE (stmt_info
); ++i
)
1768 if (i
% assumed_nunits
== 0)
1774 if (bitmap_bit_p (perm
, i
))
1779 gcc_assert (ncopies_for_cost
1780 <= (GROUP_SIZE (stmt_info
) - GROUP_GAP (stmt_info
)
1781 + assumed_nunits
- 1) / assumed_nunits
);
1782 poly_uint64 uf
= SLP_INSTANCE_UNROLLING_FACTOR (instance
);
1783 ncopies_for_cost
*= estimated_poly_value (uf
);
1785 /* Record the cost for the vector loads. */
1786 vect_model_load_cost (stmt_info
, ncopies_for_cost
,
1787 memory_access_type
, node
, prologue_cost_vec
,
1792 else if (STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
)
1794 /* ncopies_for_cost is the number of IVs we generate. */
1795 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1796 stmt_info
, 0, vect_body
);
1798 /* Prologue cost for the initial values and step vector. */
1799 record_stmt_cost (prologue_cost_vec
, ncopies_for_cost
,
1801 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1803 ? vector_load
: vec_construct
,
1804 stmt_info
, 0, vect_prologue
);
1805 record_stmt_cost (prologue_cost_vec
, 1,
1807 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info
))
1808 ? vector_load
: vec_construct
,
1809 stmt_info
, 0, vect_prologue
);
1811 /* ??? No easy way to get at the actual number of vector stmts
1812 to be geneated and thus the derived IVs. */
1816 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1817 stmt_info
, 0, vect_body
);
1818 if (SLP_TREE_TWO_OPERATORS (node
))
1820 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vector_stmt
,
1821 stmt_info
, 0, vect_body
);
1822 record_stmt_cost (body_cost_vec
, ncopies_for_cost
, vec_perm
,
1823 stmt_info
, 0, vect_body
);
1827 /* Push SLP node def-type to stmts. */
1828 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1829 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1830 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1831 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
1833 /* Scan operands and account for prologue cost of constants/externals.
1834 ??? This over-estimates cost for multiple uses and should be
1836 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1837 lhs
= gimple_get_lhs (stmt
);
1838 for (i
= 0; i
< gimple_num_ops (stmt
); ++i
)
1840 tree op
= gimple_op (stmt
, i
);
1842 enum vect_def_type dt
;
1843 if (!op
|| op
== lhs
)
1845 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &def_stmt
, &dt
))
1847 /* Without looking at the actual initializer a vector of
1848 constants can be implemented as load from the constant pool.
1849 ??? We need to pass down stmt_info for a vector type
1850 even if it points to the wrong stmt. */
1851 if (dt
== vect_constant_def
)
1852 record_stmt_cost (prologue_cost_vec
, 1, vector_load
,
1853 stmt_info
, 0, vect_prologue
);
1854 else if (dt
== vect_external_def
)
1855 record_stmt_cost (prologue_cost_vec
, 1, vec_construct
,
1856 stmt_info
, 0, vect_prologue
);
1860 /* Restore stmt def-types. */
1861 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1862 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
1863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
1864 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
1867 /* Compute the cost for the SLP instance INSTANCE. */
1870 vect_analyze_slp_cost (slp_instance instance
, void *data
)
1872 stmt_vector_for_cost body_cost_vec
, prologue_cost_vec
;
1873 unsigned ncopies_for_cost
;
1874 stmt_info_for_cost
*si
;
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_NOTE
, vect_location
,
1879 "=== vect_analyze_slp_cost ===\n");
1881 /* Calculate the number of vector stmts to create based on the unrolling
1882 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1883 GROUP_SIZE / NUNITS otherwise. */
1884 unsigned group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
1885 slp_tree node
= SLP_INSTANCE_TREE (instance
);
1886 stmt_vec_info stmt_info
= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node
)[0]);
1887 /* Get the estimated vectorization factor, which is always one for
1888 basic-block vectorization. */
1889 unsigned int assumed_vf
;
1890 if (STMT_VINFO_LOOP_VINFO (stmt_info
))
1891 assumed_vf
= vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info
));
1894 /* For reductions look at a reduction operand in case the reduction
1895 operation is widening like DOT_PROD or SAD. */
1896 tree vectype_for_cost
= STMT_VINFO_VECTYPE (stmt_info
);
1897 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1899 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
1900 switch (gimple_assign_rhs_code (stmt
))
1904 vectype_for_cost
= get_vectype_for_scalar_type
1905 (TREE_TYPE (gimple_assign_rhs1 (stmt
)));
1910 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype_for_cost
);
1911 ncopies_for_cost
= (least_common_multiple (assumed_nunits
,
1912 group_size
* assumed_vf
)
1915 prologue_cost_vec
.create (10);
1916 body_cost_vec
.create (10);
1917 scalar_stmts_set_t
*visited
= new scalar_stmts_set_t ();
1918 vect_analyze_slp_cost_1 (instance
, SLP_INSTANCE_TREE (instance
),
1919 &prologue_cost_vec
, &body_cost_vec
,
1920 ncopies_for_cost
, visited
);
1923 /* Record the prologue costs, which were delayed until we were
1924 sure that SLP was successful. */
1925 FOR_EACH_VEC_ELT (prologue_cost_vec
, i
, si
)
1927 struct _stmt_vec_info
*stmt_info
1928 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1929 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1930 si
->misalign
, vect_prologue
);
1933 /* Record the instance's instructions in the target cost model. */
1934 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1936 struct _stmt_vec_info
*stmt_info
1937 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1938 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1939 si
->misalign
, vect_body
);
1942 prologue_cost_vec
.release ();
1943 body_cost_vec
.release ();
1946 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1947 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1948 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1949 containing the remainder.
1950 Return the first stmt in the second group. */
1953 vect_split_slp_store_group (gimple
*first_stmt
, unsigned group1_size
)
1955 stmt_vec_info first_vinfo
= vinfo_for_stmt (first_stmt
);
1956 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo
) == first_stmt
);
1957 gcc_assert (group1_size
> 0);
1958 int group2_size
= GROUP_SIZE (first_vinfo
) - group1_size
;
1959 gcc_assert (group2_size
> 0);
1960 GROUP_SIZE (first_vinfo
) = group1_size
;
1962 gimple
*stmt
= first_stmt
;
1963 for (unsigned i
= group1_size
; i
> 1; i
--)
1965 stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1966 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1968 /* STMT is now the last element of the first group. */
1969 gimple
*group2
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
1970 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)) = 0;
1972 GROUP_SIZE (vinfo_for_stmt (group2
)) = group2_size
;
1973 for (stmt
= group2
; stmt
; stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
)))
1975 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = group2
;
1976 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt
)) == 1);
1979 /* For the second group, the GROUP_GAP is that before the original group,
1980 plus skipping over the first vector. */
1981 GROUP_GAP (vinfo_for_stmt (group2
)) =
1982 GROUP_GAP (first_vinfo
) + group1_size
;
1984 /* GROUP_GAP of the first group now has to skip over the second group too. */
1985 GROUP_GAP (first_vinfo
) += group2_size
;
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1989 group1_size
, group2_size
);
1994 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1995 statements and a vector of NUNITS elements. */
1998 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2000 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2003 /* Analyze an SLP instance starting from a group of grouped stores. Call
2004 vect_build_slp_tree to build a tree of packed stmts if possible.
2005 Return FALSE if it's impossible to SLP any stmt in the loop. */
2008 vect_analyze_slp_instance (vec_info
*vinfo
,
2009 gimple
*stmt
, unsigned max_tree_size
)
2011 slp_instance new_instance
;
2013 unsigned int group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2014 tree vectype
, scalar_type
= NULL_TREE
;
2017 vec
<slp_tree
> loads
;
2018 struct data_reference
*dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt
));
2019 vec
<gimple
*> scalar_stmts
;
2021 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2025 scalar_type
= TREE_TYPE (DR_REF (dr
));
2026 vectype
= get_vectype_for_scalar_type (scalar_type
);
2030 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2031 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2034 group_size
= GROUP_SIZE (vinfo_for_stmt (stmt
));
2038 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2039 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
2040 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2045 if (dump_enabled_p ())
2047 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2048 "Build SLP failed: unsupported data-type ");
2049 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, scalar_type
);
2050 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2055 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2057 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2058 scalar_stmts
.create (group_size
);
2060 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2062 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2065 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next
))
2066 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)))
2067 scalar_stmts
.safe_push (
2068 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next
)));
2070 scalar_stmts
.safe_push (next
);
2071 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2073 /* Mark the first element of the reduction chain as reduction to properly
2074 transform the node. In the reduction analysis phase only the last
2075 element of the chain is marked as reduction. */
2076 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
)))
2077 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_reduction_def
;
2081 /* Collect reduction statements. */
2082 vec
<gimple
*> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2083 for (i
= 0; reductions
.iterate (i
, &next
); i
++)
2084 scalar_stmts
.safe_push (next
);
2087 loads
.create (group_size
);
2089 /* Build the tree for the SLP instance. */
2090 bool *matches
= XALLOCAVEC (bool, group_size
);
2091 unsigned npermutes
= 0;
2092 bst_fail
= new scalar_stmts_set_t ();
2093 poly_uint64 max_nunits
= nunits
;
2094 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2095 &max_nunits
, &loads
, matches
, &npermutes
,
2096 NULL
, max_tree_size
);
2100 /* Calculate the unrolling factor based on the smallest type. */
2101 poly_uint64 unrolling_factor
2102 = calculate_unrolling_factor (max_nunits
, group_size
);
2104 if (maybe_ne (unrolling_factor
, 1U)
2105 && is_a
<bb_vec_info
> (vinfo
))
2107 unsigned HOST_WIDE_INT const_max_nunits
;
2108 if (!max_nunits
.is_constant (&const_max_nunits
)
2109 || const_max_nunits
> group_size
)
2111 if (dump_enabled_p ())
2112 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2113 "Build SLP failed: store group "
2114 "size not a multiple of the vector size "
2115 "in basic block SLP\n");
2116 vect_free_slp_tree (node
);
2120 /* Fatal mismatch. */
2121 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2122 vect_free_slp_tree (node
);
2127 /* Create a new SLP instance. */
2128 new_instance
= XNEW (struct _slp_instance
);
2129 SLP_INSTANCE_TREE (new_instance
) = node
;
2130 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2131 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2132 SLP_INSTANCE_LOADS (new_instance
) = loads
;
2134 /* Compute the load permutation. */
2136 bool loads_permuted
= false;
2137 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2139 vec
<unsigned> load_permutation
;
2141 gimple
*load
, *first_stmt
;
2142 bool this_load_permuted
= false;
2143 load_permutation
.create (group_size
);
2144 first_stmt
= GROUP_FIRST_ELEMENT
2145 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2146 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load
)
2148 int load_place
= vect_get_place_in_interleaving_chain
2150 gcc_assert (load_place
!= -1);
2151 if (load_place
!= j
)
2152 this_load_permuted
= true;
2153 load_permutation
.safe_push (load_place
);
2155 if (!this_load_permuted
2156 /* The load requires permutation when unrolling exposes
2157 a gap either because the group is larger than the SLP
2158 group-size or because there is a gap between the groups. */
2159 && (known_eq (unrolling_factor
, 1U)
2160 || (group_size
== GROUP_SIZE (vinfo_for_stmt (first_stmt
))
2161 && GROUP_GAP (vinfo_for_stmt (first_stmt
)) == 0)))
2163 load_permutation
.release ();
2166 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2167 loads_permuted
= true;
2172 if (!vect_supported_load_permutation_p (new_instance
))
2174 if (dump_enabled_p ())
2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2177 "Build SLP failed: unsupported load "
2179 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
,
2182 vect_free_slp_instance (new_instance
);
2187 /* If the loads and stores can be handled with load/store-lan
2188 instructions do not generate this SLP instance. */
2189 if (is_a
<loop_vec_info
> (vinfo
)
2191 && dr
&& vect_store_lanes_supported (vectype
, group_size
))
2194 FOR_EACH_VEC_ELT (loads
, i
, load_node
)
2196 gimple
*first_stmt
= GROUP_FIRST_ELEMENT
2197 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node
)[0]));
2198 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (first_stmt
);
2199 /* Use SLP for strided accesses (or if we
2200 can't load-lanes). */
2201 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2202 || ! vect_load_lanes_supported
2203 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2204 GROUP_SIZE (stmt_vinfo
)))
2207 if (i
== loads
.length ())
2209 if (dump_enabled_p ())
2210 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2211 "Built SLP cancelled: can use "
2212 "load/store-lanes\n");
2213 vect_free_slp_instance (new_instance
);
2218 vinfo
->slp_instances
.safe_push (new_instance
);
2220 if (dump_enabled_p ())
2222 dump_printf_loc (MSG_NOTE
, vect_location
,
2223 "Final SLP tree for instance:\n");
2224 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2232 /* Failed to SLP. */
2233 /* Free the allocated memory. */
2234 scalar_stmts
.release ();
2238 /* For basic block SLP, try to break the group up into multiples of the
2240 unsigned HOST_WIDE_INT const_nunits
;
2241 if (is_a
<bb_vec_info
> (vinfo
)
2242 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
))
2243 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt
))
2244 && nunits
.is_constant (&const_nunits
))
2246 /* We consider breaking the group only on VF boundaries from the existing
2248 for (i
= 0; i
< group_size
; i
++)
2249 if (!matches
[i
]) break;
2251 if (i
>= const_nunits
&& i
< group_size
)
2253 /* Split into two groups at the first vector boundary before i. */
2254 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2255 unsigned group1_size
= i
& ~(const_nunits
- 1);
2257 gimple
*rest
= vect_split_slp_store_group (stmt
, group1_size
);
2258 bool res
= vect_analyze_slp_instance (vinfo
, stmt
, max_tree_size
);
2259 /* If the first non-match was in the middle of a vector,
2260 skip the rest of that vector. */
2261 if (group1_size
< i
)
2263 i
= group1_size
+ const_nunits
;
2265 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2268 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2271 /* Even though the first vector did not all match, we might be able to SLP
2272 (some) of the remainder. FORNOW ignore this possibility. */
2279 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2280 trees of packed scalar stmts if SLP is possible. */
2283 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2286 gimple
*first_element
;
2288 if (dump_enabled_p ())
2289 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_analyze_slp ===\n");
2291 /* Find SLP sequences starting from groups of grouped stores. */
2292 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2293 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2295 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2297 if (loop_vinfo
->reduction_chains
.length () > 0)
2299 /* Find SLP sequences starting from reduction chains. */
2300 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2301 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2304 /* Dissolve reduction chain group. */
2305 gimple
*next
, *stmt
= first_element
;
2308 stmt_vec_info vinfo
= vinfo_for_stmt (stmt
);
2309 next
= GROUP_NEXT_ELEMENT (vinfo
);
2310 GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2311 GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2314 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element
))
2315 = vect_internal_def
;
2319 /* Find SLP sequences starting from groups of reductions. */
2320 if (loop_vinfo
->reductions
.length () > 1)
2321 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2329 /* For each possible SLP instance decide whether to SLP it and calculate overall
2330 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2331 least one instance. */
2334 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2337 poly_uint64 unrolling_factor
= 1;
2338 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2339 slp_instance instance
;
2340 int decided_to_slp
= 0;
2342 if (dump_enabled_p ())
2343 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_make_slp_decision ==="
2346 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2348 /* FORNOW: SLP if you can. */
2349 /* All unroll factors have the form current_vector_size * X for some
2350 rational X, so they must have a common multiple. */
2352 = force_common_multiple (unrolling_factor
,
2353 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2355 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2356 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2357 loop-based vectorization. Such stmts will be marked as HYBRID. */
2358 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
2362 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2364 if (decided_to_slp
&& dump_enabled_p ())
2366 dump_printf_loc (MSG_NOTE
, vect_location
,
2367 "Decided to SLP %d instances. Unrolling factor ",
2369 dump_dec (MSG_NOTE
, unrolling_factor
);
2370 dump_printf (MSG_NOTE
, "\n");
2373 return (decided_to_slp
> 0);
2377 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2378 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2381 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2383 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2384 imm_use_iterator imm_iter
;
2386 stmt_vec_info use_vinfo
, stmt_vinfo
= vinfo_for_stmt (stmt
);
2388 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2389 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2392 /* Propagate hybrid down the SLP tree. */
2393 if (stype
== hybrid
)
2395 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2399 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2400 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2401 /* If we get a pattern stmt here we have to use the LHS of the
2402 original stmt for immediate uses. */
2403 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo
)
2404 && STMT_VINFO_RELATED_STMT (stmt_vinfo
))
2405 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2407 if (gimple_code (stmt
) == GIMPLE_PHI
)
2408 def
= gimple_phi_result (stmt
);
2410 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2412 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2414 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
2416 use_vinfo
= vinfo_for_stmt (use_stmt
);
2417 if (STMT_VINFO_IN_PATTERN_P (use_vinfo
)
2418 && STMT_VINFO_RELATED_STMT (use_vinfo
))
2419 use_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo
));
2420 if (!STMT_SLP_TYPE (use_vinfo
)
2421 && (STMT_VINFO_RELEVANT (use_vinfo
)
2422 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2423 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2424 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2426 if (dump_enabled_p ())
2428 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2429 "def in non-SLP stmt: ");
2430 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, use_stmt
, 0);
2438 && !HYBRID_SLP_STMT (stmt_vinfo
))
2440 if (dump_enabled_p ())
2442 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2443 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
2445 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2448 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2449 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
)
2450 vect_detect_hybrid_slp_stmts (child
, i
, stype
);
2453 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2456 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2458 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2459 struct loop
*loopp
= (struct loop
*)wi
->info
;
2464 if (TREE_CODE (*tp
) == SSA_NAME
2465 && !SSA_NAME_IS_DEFAULT_DEF (*tp
))
2467 gimple
*def_stmt
= SSA_NAME_DEF_STMT (*tp
);
2468 if (flow_bb_inside_loop_p (loopp
, gimple_bb (def_stmt
))
2469 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt
)))
2471 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: ");
2474 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
2476 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt
)) = hybrid
;
2484 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2487 stmt_vec_info use_vinfo
= vinfo_for_stmt (gsi_stmt (*gsi
));
2488 /* If the stmt is in a SLP instance then this isn't a reason
2489 to mark use definitions in other SLP instances as hybrid. */
2490 if (! STMT_SLP_TYPE (use_vinfo
)
2491 && (STMT_VINFO_RELEVANT (use_vinfo
)
2492 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2493 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2494 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2501 /* Find stmts that must be both vectorized and SLPed. */
2504 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2507 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2508 slp_instance instance
;
2510 if (dump_enabled_p ())
2511 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vect_detect_hybrid_slp ==="
2514 /* First walk all pattern stmt in the loop and mark defs of uses as
2515 hybrid because immediate uses in them are not recorded. */
2516 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2518 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2519 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2522 gimple
*stmt
= gsi_stmt (gsi
);
2523 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2524 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2527 memset (&wi
, 0, sizeof (wi
));
2528 wi
.info
= LOOP_VINFO_LOOP (loop_vinfo
);
2529 gimple_stmt_iterator gsi2
2530 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
2531 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2532 vect_detect_hybrid_slp_1
, &wi
);
2533 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2534 vect_detect_hybrid_slp_2
,
2535 vect_detect_hybrid_slp_1
, &wi
);
2540 /* Then walk the SLP instance trees marking stmts with uses in
2541 non-SLP stmts as hybrid, also propagating hybrid down the
2542 SLP tree, collecting the above info on-the-fly. */
2543 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2545 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2546 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2552 /* Initialize a bb_vec_info struct for the statements between
2553 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2555 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2556 gimple_stmt_iterator region_end_in
)
2557 : vec_info (vec_info::bb
, init_cost (NULL
)),
2558 bb (gsi_bb (region_begin_in
)),
2559 region_begin (region_begin_in
),
2560 region_end (region_end_in
)
2562 gimple_stmt_iterator gsi
;
2564 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2567 gimple
*stmt
= gsi_stmt (gsi
);
2568 gimple_set_uid (stmt
, 0);
2569 set_vinfo_for_stmt (stmt
, new_stmt_vec_info (stmt
, this));
2576 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2577 stmts in the basic block. */
2579 _bb_vec_info::~_bb_vec_info ()
2581 for (gimple_stmt_iterator si
= region_begin
;
2582 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2584 gimple
*stmt
= gsi_stmt (si
);
2585 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2588 /* Free stmt_vec_info. */
2589 free_stmt_vec_info (stmt
);
2591 /* Reset region marker. */
2592 gimple_set_uid (stmt
, -1);
2599 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2600 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2602 Return true if the operations are supported. */
2605 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2606 slp_instance node_instance
)
2613 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2617 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
))
2620 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
2621 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2622 gcc_assert (stmt_info
);
2623 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2625 /* For BB vectorization vector types are assigned here.
2626 Memory accesses already got their vector type assigned
2627 in vect_analyze_data_refs. */
2628 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2630 && ! STMT_VINFO_DATA_REF (stmt_info
))
2632 gcc_assert (PURE_SLP_STMT (stmt_info
));
2634 tree scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
2635 if (dump_enabled_p ())
2637 dump_printf_loc (MSG_NOTE
, vect_location
,
2638 "get vectype for scalar type: ");
2639 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
2640 dump_printf (MSG_NOTE
, "\n");
2643 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
2646 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2649 "not SLPed: unsupported data-type ");
2650 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2652 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2657 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
2660 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
2661 dump_printf (MSG_NOTE
, "\n");
2665 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt
)
2666 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt
)) = vectype
;
2669 /* Calculate the number of vector statements to be created for the
2670 scalar stmts in this node. For SLP reductions it is equal to the
2671 number of vector statements in the children (which has already been
2672 calculated by the recursive call). Otherwise it is the number of
2673 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by
2674 VF divided by the number of elements in a vector. */
2675 if (GROUP_FIRST_ELEMENT (stmt_info
)
2676 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2677 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2678 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[0]);
2682 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2683 vf
= loop_vinfo
->vectorization_factor
;
2686 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2687 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2688 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2689 = vect_get_num_vectors (vf
* group_size
, vectype
);
2692 /* Push SLP node def-type to stmt operands. */
2693 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2694 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2695 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2696 = SLP_TREE_DEF_TYPE (child
);
2697 bool res
= vect_analyze_stmt (stmt
, &dummy
, node
, node_instance
);
2698 /* Restore def-types. */
2699 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2700 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2701 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child
)[0]))
2702 = vect_internal_def
;
2710 /* Analyze statements in SLP instances of VINFO. Return true if the
2711 operations are supported. */
2714 vect_slp_analyze_operations (vec_info
*vinfo
)
2716 slp_instance instance
;
2719 if (dump_enabled_p ())
2720 dump_printf_loc (MSG_NOTE
, vect_location
,
2721 "=== vect_slp_analyze_operations ===\n");
2723 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2725 if (!vect_slp_analyze_node_operations (vinfo
,
2726 SLP_INSTANCE_TREE (instance
),
2729 dump_printf_loc (MSG_NOTE
, vect_location
,
2730 "removing SLP instance operations starting from: ");
2731 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2732 SLP_TREE_SCALAR_STMTS
2733 (SLP_INSTANCE_TREE (instance
))[0], 0);
2734 vect_free_slp_instance (instance
);
2735 vinfo
->slp_instances
.ordered_remove (i
);
2739 /* Compute the costs of the SLP instance. */
2740 vect_analyze_slp_cost (instance
, vinfo
->target_cost_data
);
2745 return !vinfo
->slp_instances
.is_empty ();
2749 /* Compute the scalar cost of the SLP node NODE and its children
2750 and return it. Do not account defs that are marked in LIFE and
2751 update LIFE according to uses of NODE. */
2754 vect_bb_slp_scalar_cost (basic_block bb
,
2755 slp_tree node
, vec
<bool, va_heap
> *life
)
2757 unsigned scalar_cost
= 0;
2762 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
2765 ssa_op_iter op_iter
;
2766 def_operand_p def_p
;
2767 stmt_vec_info stmt_info
;
2772 /* If there is a non-vectorized use of the defs then the scalar
2773 stmt is kept live in which case we do not account it or any
2774 required defs in the SLP children in the scalar cost. This
2775 way we make the vectorization more costly when compared to
2777 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2779 imm_use_iterator use_iter
;
2781 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2782 if (!is_gimple_debug (use_stmt
)
2783 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt
)->vinfo
,
2785 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt
))))
2788 BREAK_FROM_IMM_USE_STMT (use_iter
);
2794 /* Count scalar stmts only once. */
2795 if (gimple_visited_p (stmt
))
2797 gimple_set_visited (stmt
, true);
2799 stmt_info
= vinfo_for_stmt (stmt
);
2800 if (STMT_VINFO_DATA_REF (stmt_info
))
2802 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2803 stmt_cost
= vect_get_stmt_cost (scalar_load
);
2805 stmt_cost
= vect_get_stmt_cost (scalar_store
);
2808 stmt_cost
= vect_get_stmt_cost (scalar_stmt
);
2810 scalar_cost
+= stmt_cost
;
2813 auto_vec
<bool, 20> subtree_life
;
2814 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2816 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2818 /* Do not directly pass LIFE to the recursive call, copy it to
2819 confine changes in the callee to the current child/subtree. */
2820 subtree_life
.safe_splice (*life
);
2821 scalar_cost
+= vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
);
2822 subtree_life
.truncate (0);
2829 /* Check if vectorization of the basic block is profitable. */
2832 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2834 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2835 slp_instance instance
;
2837 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2838 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2840 /* Calculate scalar cost. */
2841 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2843 auto_vec
<bool, 20> life
;
2844 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2845 scalar_cost
+= vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2846 SLP_INSTANCE_TREE (instance
),
2850 /* Unset visited flag. */
2851 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2852 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2853 gimple_set_visited (gsi_stmt (gsi
), false);
2855 /* Complete the target-specific cost calculation. */
2856 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2857 &vec_inside_cost
, &vec_epilogue_cost
);
2859 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2861 if (dump_enabled_p ())
2863 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2864 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2866 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2867 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2868 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2871 /* Vectorization is profitable if its cost is more than the cost of scalar
2872 version. Note that we err on the vector side for equal cost because
2873 the cost estimate is otherwise quite pessimistic (constant uses are
2874 free on the scalar side but cost a load on the vector side for
2876 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2882 /* Check if the basic block can be vectorized. Returns a bb_vec_info
2883 if so and sets fatal to true if failure is independent of
2884 current_vector_size. */
2887 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin
,
2888 gimple_stmt_iterator region_end
,
2889 vec
<data_reference_p
> datarefs
, int n_stmts
,
2892 bb_vec_info bb_vinfo
;
2893 slp_instance instance
;
2895 poly_uint64 min_vf
= 2;
2897 /* The first group of checks is independent of the vector size. */
2900 if (n_stmts
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
2902 if (dump_enabled_p ())
2903 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2904 "not vectorized: too many instructions in "
2906 free_data_refs (datarefs
);
2910 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
);
2914 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
2916 /* Analyze the data references. */
2918 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
))
2920 if (dump_enabled_p ())
2921 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2922 "not vectorized: unhandled data-ref in basic "
2929 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2931 if (dump_enabled_p ())
2932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2933 "not vectorized: not enough data-refs in "
2940 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2942 if (dump_enabled_p ())
2943 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2944 "not vectorized: unhandled data access in "
2951 /* If there are no grouped stores in the region there is no need
2952 to continue with pattern recog as vect_analyze_slp will fail
2954 if (bb_vinfo
->grouped_stores
.is_empty ())
2956 if (dump_enabled_p ())
2957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2958 "not vectorized: no grouped stores in "
2965 /* While the rest of the analysis below depends on it in some way. */
2968 vect_pattern_recog (bb_vinfo
);
2970 /* Check the SLP opportunities in the basic block, analyze and build SLP
2972 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2974 if (dump_enabled_p ())
2976 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2977 "Failed to SLP the basic block.\n");
2978 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2979 "not vectorized: failed to find SLP opportunities "
2980 "in basic block.\n");
2987 vect_record_base_alignments (bb_vinfo
);
2989 /* Analyze and verify the alignment of data references and the
2990 dependence in the SLP instances. */
2991 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2993 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2994 || ! vect_slp_analyze_instance_dependence (instance
))
2996 dump_printf_loc (MSG_NOTE
, vect_location
,
2997 "removing SLP instance operations starting from: ");
2998 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
,
2999 SLP_TREE_SCALAR_STMTS
3000 (SLP_INSTANCE_TREE (instance
))[0], 0);
3001 vect_free_slp_instance (instance
);
3002 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3006 /* Mark all the statements that we want to vectorize as pure SLP and
3008 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
), pure_slp
, -1);
3009 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3013 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3019 if (!vect_slp_analyze_operations (bb_vinfo
))
3021 if (dump_enabled_p ())
3022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3023 "not vectorized: bad operation in basic block.\n");
3029 /* Cost model: check if the vectorization is worthwhile. */
3030 if (!unlimited_cost_model (NULL
)
3031 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3033 if (dump_enabled_p ())
3034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3035 "not vectorized: vectorization is not "
3042 if (dump_enabled_p ())
3043 dump_printf_loc (MSG_NOTE
, vect_location
,
3044 "Basic block will be vectorized using SLP\n");
3050 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3051 true if anything in the basic-block was vectorized. */
3054 vect_slp_bb (basic_block bb
)
3056 bb_vec_info bb_vinfo
;
3057 gimple_stmt_iterator gsi
;
3058 bool any_vectorized
= false;
3059 auto_vector_sizes vector_sizes
;
3061 if (dump_enabled_p ())
3062 dump_printf_loc (MSG_NOTE
, vect_location
, "===vect_slp_analyze_bb===\n");
3064 /* Autodetect first vector size we try. */
3065 current_vector_size
= 0;
3066 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
);
3067 unsigned int next_size
= 0;
3069 gsi
= gsi_start_bb (bb
);
3071 poly_uint64 autodetected_vector_size
= 0;
3074 if (gsi_end_p (gsi
))
3077 gimple_stmt_iterator region_begin
= gsi
;
3078 vec
<data_reference_p
> datarefs
= vNULL
;
3081 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3083 gimple
*stmt
= gsi_stmt (gsi
);
3084 if (is_gimple_debug (stmt
))
3088 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3089 vect_location
= gimple_location (stmt
);
3091 if (!find_data_references_in_stmt (NULL
, stmt
, &datarefs
))
3095 /* Skip leading unhandled stmts. */
3096 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3102 gimple_stmt_iterator region_end
= gsi
;
3104 bool vectorized
= false;
3106 bb_vinfo
= vect_slp_analyze_bb_1 (region_begin
, region_end
,
3107 datarefs
, insns
, fatal
);
3109 && dbg_cnt (vect_slp
))
3111 if (dump_enabled_p ())
3112 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3114 vect_schedule_slp (bb_vinfo
);
3116 if (dump_enabled_p ())
3117 dump_printf_loc (MSG_NOTE
, vect_location
,
3118 "basic block part vectorized\n");
3124 any_vectorized
|= vectorized
;
3127 autodetected_vector_size
= current_vector_size
;
3129 if (next_size
< vector_sizes
.length ()
3130 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3134 || next_size
== vector_sizes
.length ()
3135 || known_eq (current_vector_size
, 0U)
3136 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3137 vector sizes will fail do not bother iterating. */
3140 if (gsi_end_p (region_end
))
3143 /* Skip the unhandled stmt. */
3146 /* And reset vector sizes. */
3147 current_vector_size
= 0;
3152 /* Try the next biggest vector size. */
3153 current_vector_size
= vector_sizes
[next_size
++];
3154 if (dump_enabled_p ())
3156 dump_printf_loc (MSG_NOTE
, vect_location
,
3157 "***** Re-trying analysis with "
3159 dump_dec (MSG_NOTE
, current_vector_size
);
3160 dump_printf (MSG_NOTE
, "\n");
3168 return any_vectorized
;
3172 /* Return 1 if vector type of boolean constant which is OPNUM
3173 operand in statement STMT is a boolean vector. */
3176 vect_mask_constant_operand_p (gimple
*stmt
, int opnum
)
3178 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3179 enum tree_code code
= gimple_expr_code (stmt
);
3182 enum vect_def_type dt
;
3184 /* For comparison and COND_EXPR type is chosen depending
3185 on the other comparison operand. */
3186 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3189 op
= gimple_assign_rhs1 (stmt
);
3191 op
= gimple_assign_rhs2 (stmt
);
3193 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3197 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3200 if (code
== COND_EXPR
)
3202 tree cond
= gimple_assign_rhs1 (stmt
);
3204 if (TREE_CODE (cond
) == SSA_NAME
)
3207 op
= TREE_OPERAND (cond
, 1);
3209 op
= TREE_OPERAND (cond
, 0);
3211 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &def_stmt
,
3215 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3218 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3222 /* For constant and loop invariant defs of SLP_NODE this function returns
3223 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3224 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3225 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
3226 REDUC_INDEX is the index of the reduction operand in the statements, unless
3230 vect_get_constant_vectors (tree op
, slp_tree slp_node
,
3231 vec
<tree
> *vec_oprnds
,
3232 unsigned int op_num
, unsigned int number_of_vectors
)
3234 vec
<gimple
*> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3235 gimple
*stmt
= stmts
[0];
3236 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
3239 unsigned j
, number_of_places_left_in_vector
;
3242 int group_size
= stmts
.length ();
3243 unsigned int vec_num
, i
;
3244 unsigned number_of_copies
= 1;
3246 voprnds
.create (number_of_vectors
);
3247 bool constant_p
, is_store
;
3248 tree neutral_op
= NULL
;
3249 enum tree_code code
= gimple_expr_code (stmt
);
3250 gimple_seq ctor_seq
= NULL
;
3252 /* Check if vector type is a boolean vector. */
3253 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3254 && vect_mask_constant_operand_p (stmt
, op_num
))
3256 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo
));
3258 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
3259 /* Enforced by vect_get_and_check_slp_defs. */
3260 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
3262 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
3265 op
= gimple_assign_rhs1 (stmt
);
3272 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3273 created vectors. It is greater than 1 if unrolling is performed.
3275 For example, we have two scalar operands, s1 and s2 (e.g., group of
3276 strided accesses of size two), while NUNITS is four (i.e., four scalars
3277 of this type can be packed in a vector). The output vector will contain
3278 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3281 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3282 containing the operands.
3284 For example, NUNITS is four as before, and the group size is 8
3285 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3286 {s5, s6, s7, s8}. */
3288 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3290 number_of_places_left_in_vector
= nunits
;
3292 tree_vector_builder
elts (vector_type
, nunits
, 1);
3293 elts
.quick_grow (nunits
);
3294 bool place_after_defs
= false;
3295 for (j
= 0; j
< number_of_copies
; j
++)
3297 for (i
= group_size
- 1; stmts
.iterate (i
, &stmt
); i
--)
3300 op
= gimple_assign_rhs1 (stmt
);
3307 tree cond
= gimple_assign_rhs1 (stmt
);
3308 if (TREE_CODE (cond
) == SSA_NAME
)
3309 op
= gimple_op (stmt
, op_num
+ 1);
3310 else if (op_num
== 0 || op_num
== 1)
3311 op
= TREE_OPERAND (cond
, op_num
);
3315 op
= gimple_assign_rhs2 (stmt
);
3317 op
= gimple_assign_rhs3 (stmt
);
3323 op
= gimple_call_arg (stmt
, op_num
);
3330 op
= gimple_op (stmt
, op_num
+ 1);
3331 /* Unlike the other binary operators, shifts/rotates have
3332 the shift count being int, instead of the same type as
3333 the lhs, so make sure the scalar is the right type if
3334 we are dealing with vectors of
3335 long long/long/short/char. */
3336 if (op_num
== 1 && TREE_CODE (op
) == INTEGER_CST
)
3337 op
= fold_convert (TREE_TYPE (vector_type
), op
);
3341 op
= gimple_op (stmt
, op_num
+ 1);
3346 /* Create 'vect_ = {op0,op1,...,opn}'. */
3347 number_of_places_left_in_vector
--;
3349 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3351 if (CONSTANT_CLASS_P (op
))
3353 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3355 /* Can't use VIEW_CONVERT_EXPR for booleans because
3356 of possibly different sizes of scalar value and
3358 if (integer_zerop (op
))
3359 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3360 else if (integer_onep (op
))
3361 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3366 op
= fold_unary (VIEW_CONVERT_EXPR
,
3367 TREE_TYPE (vector_type
), op
);
3368 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3372 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3374 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3377 = build_all_ones_cst (TREE_TYPE (vector_type
));
3379 = build_zero_cst (TREE_TYPE (vector_type
));
3380 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3381 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3387 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3390 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3393 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3397 elts
[number_of_places_left_in_vector
] = op
;
3398 if (!CONSTANT_CLASS_P (op
))
3400 if (TREE_CODE (orig_op
) == SSA_NAME
3401 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3402 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3403 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3404 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3405 place_after_defs
= true;
3407 if (number_of_places_left_in_vector
== 0)
3410 vec_cst
= elts
.build ();
3413 vec
<constructor_elt
, va_gc
> *v
;
3415 vec_alloc (v
, nunits
);
3416 for (k
= 0; k
< nunits
; ++k
)
3417 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, elts
[k
]);
3418 vec_cst
= build_constructor (vector_type
, v
);
3421 gimple_stmt_iterator gsi
;
3422 if (place_after_defs
)
3425 (vect_find_last_scalar_stmt_in_slp (slp_node
));
3426 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, &gsi
);
3429 init
= vect_init_vector (stmt
, vec_cst
, vector_type
, NULL
);
3430 if (ctor_seq
!= NULL
)
3432 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3433 gsi_insert_seq_before_without_update (&gsi
, ctor_seq
,
3437 voprnds
.quick_push (init
);
3438 place_after_defs
= false;
3439 number_of_places_left_in_vector
= nunits
;
3441 elts
.new_vector (vector_type
, nunits
, 1);
3442 elts
.quick_grow (nunits
);
3447 /* Since the vectors are created in the reverse order, we should invert
3449 vec_num
= voprnds
.length ();
3450 for (j
= vec_num
; j
!= 0; j
--)
3452 vop
= voprnds
[j
- 1];
3453 vec_oprnds
->quick_push (vop
);
3458 /* In case that VF is greater than the unrolling factor needed for the SLP
3459 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3460 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3461 to replicate the vectors. */
3462 while (number_of_vectors
> vec_oprnds
->length ())
3464 tree neutral_vec
= NULL
;
3469 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3471 vec_oprnds
->quick_push (neutral_vec
);
3475 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3476 vec_oprnds
->quick_push (vop
);
3482 /* Get vectorized definitions from SLP_NODE that contains corresponding
3483 vectorized def-stmts. */
3486 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3489 gimple
*vec_def_stmt
;
3492 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3494 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt
)
3496 gcc_assert (vec_def_stmt
);
3497 if (gimple_code (vec_def_stmt
) == GIMPLE_PHI
)
3498 vec_oprnd
= gimple_phi_result (vec_def_stmt
);
3500 vec_oprnd
= gimple_get_lhs (vec_def_stmt
);
3501 vec_oprnds
->quick_push (vec_oprnd
);
3506 /* Get vectorized definitions for SLP_NODE.
3507 If the scalar definitions are loop invariants or constants, collect them and
3508 call vect_get_constant_vectors() to create vector stmts.
3509 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3510 must be stored in the corresponding child of SLP_NODE, and we call
3511 vect_get_slp_vect_defs () to retrieve them. */
3514 vect_get_slp_defs (vec
<tree
> ops
, slp_tree slp_node
,
3515 vec
<vec
<tree
> > *vec_oprnds
)
3518 int number_of_vects
= 0, i
;
3519 unsigned int child_index
= 0;
3520 HOST_WIDE_INT lhs_size_unit
, rhs_size_unit
;
3521 slp_tree child
= NULL
;
3524 bool vectorized_defs
;
3526 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3527 FOR_EACH_VEC_ELT (ops
, i
, oprnd
)
3529 /* For each operand we check if it has vectorized definitions in a child
3530 node or we need to create them (for invariants and constants). We
3531 check if the LHS of the first stmt of the next child matches OPRND.
3532 If it does, we found the correct child. Otherwise, we call
3533 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3534 to check this child node for the next operand. */
3535 vectorized_defs
= false;
3536 if (SLP_TREE_CHILDREN (slp_node
).length () > child_index
)
3538 child
= SLP_TREE_CHILDREN (slp_node
)[child_index
];
3540 /* We have to check both pattern and original def, if available. */
3541 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3543 gimple
*first_def
= SLP_TREE_SCALAR_STMTS (child
)[0];
3545 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def
));
3548 if (gimple_code (first_def
) == GIMPLE_PHI
)
3549 first_def_op
= gimple_phi_result (first_def
);
3551 first_def_op
= gimple_get_lhs (first_def
);
3552 if (operand_equal_p (oprnd
, first_def_op
, 0)
3554 && operand_equal_p (oprnd
, gimple_get_lhs (related
), 0)))
3556 /* The number of vector defs is determined by the number of
3557 vector statements in the node from which we get those
3559 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (child
);
3560 vectorized_defs
= true;
3568 if (!vectorized_defs
)
3572 number_of_vects
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3573 /* Number of vector stmts was calculated according to LHS in
3574 vect_schedule_slp_instance (), fix it by replacing LHS with
3575 RHS, if necessary. See vect_get_smallest_scalar_type () for
3577 vect_get_smallest_scalar_type (first_stmt
, &lhs_size_unit
,
3579 if (rhs_size_unit
!= lhs_size_unit
)
3581 number_of_vects
*= rhs_size_unit
;
3582 number_of_vects
/= lhs_size_unit
;
3587 /* Allocate memory for vectorized defs. */
3589 vec_defs
.create (number_of_vects
);
3591 /* For reduction defs we call vect_get_constant_vectors (), since we are
3592 looking for initial loop invariant values. */
3593 if (vectorized_defs
)
3594 /* The defs are already vectorized. */
3595 vect_get_slp_vect_defs (child
, &vec_defs
);
3597 /* Build vectors from scalar defs. */
3598 vect_get_constant_vectors (oprnd
, slp_node
, &vec_defs
, i
,
3601 vec_oprnds
->quick_push (vec_defs
);
3605 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3606 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3607 permute statements for the SLP node NODE of the SLP instance
3608 SLP_NODE_INSTANCE. */
3611 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3612 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3613 slp_instance slp_node_instance
, bool analyze_only
,
3616 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3617 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3618 tree mask_element_type
= NULL_TREE
, mask_type
;
3619 int nunits
, vec_index
= 0;
3620 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3621 int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3624 unsigned HOST_WIDE_INT const_vf
;
3626 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3629 stmt_info
= vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
));
3631 mode
= TYPE_MODE (vectype
);
3633 /* At the moment, all permutations are represented using per-element
3634 indices, so we can't cope with variable vectorization factors. */
3635 if (!vf
.is_constant (&const_vf
))
3638 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3639 same size as the vector element being permuted. */
3640 mask_element_type
= lang_hooks
.types
.type_for_mode
3641 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))).require (), 1);
3642 mask_type
= get_vectype_for_scalar_type (mask_element_type
);
3643 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3644 vec_perm_builder
mask (nunits
, nunits
, 1);
3645 mask
.quick_grow (nunits
);
3646 vec_perm_indices indices
;
3648 /* Initialize the vect stmts of NODE to properly insert the generated
3651 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3652 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3653 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3655 /* Generate permutation masks for every NODE. Number of masks for each NODE
3656 is equal to GROUP_SIZE.
3657 E.g., we have a group of three nodes with three loads from the same
3658 location in each node, and the vector size is 4. I.e., we have a
3659 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3660 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3661 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3664 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3665 The last mask is illegal since we assume two operands for permute
3666 operation, and the mask element values can't be outside that range.
3667 Hence, the last mask must be converted into {2,5,5,5}.
3668 For the first two permutations we need the first and the second input
3669 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3670 we need the second and the third vectors: {b1,c1,a2,b2} and
3673 int vect_stmts_counter
= 0;
3675 int first_vec_index
= -1;
3676 int second_vec_index
= -1;
3680 for (unsigned int j
= 0; j
< const_vf
; j
++)
3682 for (int k
= 0; k
< group_size
; k
++)
3684 int i
= (SLP_TREE_LOAD_PERMUTATION (node
)[k
]
3685 + j
* STMT_VINFO_GROUP_SIZE (stmt_info
));
3686 vec_index
= i
/ nunits
;
3687 mask_element
= i
% nunits
;
3688 if (vec_index
== first_vec_index
3689 || first_vec_index
== -1)
3691 first_vec_index
= vec_index
;
3693 else if (vec_index
== second_vec_index
3694 || second_vec_index
== -1)
3696 second_vec_index
= vec_index
;
3697 mask_element
+= nunits
;
3701 if (dump_enabled_p ())
3703 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3704 "permutation requires at "
3705 "least three vectors ");
3706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3709 gcc_assert (analyze_only
);
3713 gcc_assert (mask_element
>= 0
3714 && mask_element
< 2 * nunits
);
3715 if (mask_element
!= index
)
3717 mask
[index
++] = mask_element
;
3719 if (index
== nunits
&& !noop_p
)
3721 indices
.new_vector (mask
, 2, nunits
);
3722 if (!can_vec_perm_const_p (mode
, indices
))
3724 if (dump_enabled_p ())
3726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3728 "unsupported vect permute { ");
3729 for (i
= 0; i
< nunits
; ++i
)
3731 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3732 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3734 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3736 gcc_assert (analyze_only
);
3743 if (index
== nunits
)
3747 tree mask_vec
= NULL_TREE
;
3750 mask_vec
= vec_perm_indices_to_tree (mask_type
, indices
);
3752 if (second_vec_index
== -1)
3753 second_vec_index
= first_vec_index
;
3755 /* Generate the permute statement if necessary. */
3756 tree first_vec
= dr_chain
[first_vec_index
];
3757 tree second_vec
= dr_chain
[second_vec_index
];
3762 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3764 perm_dest
= make_ssa_name (perm_dest
);
3765 perm_stmt
= gimple_build_assign (perm_dest
,
3767 first_vec
, second_vec
,
3769 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
3772 /* If mask was NULL_TREE generate the requested
3773 identity transform. */
3774 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
3776 /* Store the vector statement in NODE. */
3777 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
3781 first_vec_index
= -1;
3782 second_vec_index
= -1;
3791 typedef hash_map
<vec
<gimple
*>, slp_tree
,
3792 simple_hashmap_traits
<bst_traits
, slp_tree
> >
3793 scalar_stmts_to_slp_tree_map_t
;
3795 /* Vectorize SLP instance tree in postorder. */
3798 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3799 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3802 bool grouped_store
, is_store
;
3803 gimple_stmt_iterator si
;
3804 stmt_vec_info stmt_info
;
3805 unsigned int group_size
;
3810 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3813 /* See if we have already vectorized the same set of stmts and reuse their
3814 vectorized stmts. */
3816 = bst_map
->get_or_insert (SLP_TREE_SCALAR_STMTS (node
).copy ());
3819 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (leader
));
3824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3825 vect_schedule_slp_instance (child
, instance
, bst_map
);
3827 /* Push SLP node def-type to stmts. */
3828 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3829 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3830 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3831 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = SLP_TREE_DEF_TYPE (child
);
3833 stmt
= SLP_TREE_SCALAR_STMTS (node
)[0];
3834 stmt_info
= vinfo_for_stmt (stmt
);
3836 /* VECTYPE is the type of the destination. */
3837 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3838 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3839 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3841 if (!SLP_TREE_VEC_STMTS (node
).exists ())
3842 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3844 if (dump_enabled_p ())
3846 dump_printf_loc (MSG_NOTE
,vect_location
,
3847 "------>vectorizing SLP node starting from: ");
3848 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3851 /* Vectorized stmts go before the last scalar stmt which is where
3852 all uses are ready. */
3853 si
= gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node
));
3855 /* Mark the first element of the reduction chain as reduction to properly
3856 transform the node. In the analysis phase only the last element of the
3857 chain is marked as reduction. */
3858 if (GROUP_FIRST_ELEMENT (stmt_info
) && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3859 && GROUP_FIRST_ELEMENT (stmt_info
) == stmt
)
3861 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
3862 STMT_VINFO_TYPE (stmt_info
) = reduc_vec_info_type
;
3865 /* Handle two-operation SLP nodes by vectorizing the group with
3866 both operations and then performing a merge. */
3867 if (SLP_TREE_TWO_OPERATORS (node
))
3869 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3870 enum tree_code ocode
= ERROR_MARK
;
3872 vec_perm_builder
mask (group_size
, group_size
, 1);
3873 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt
)
3874 if (gimple_assign_rhs_code (ostmt
) != code0
)
3876 mask
.quick_push (1);
3877 ocode
= gimple_assign_rhs_code (ostmt
);
3880 mask
.quick_push (0);
3881 if (ocode
!= ERROR_MARK
)
3886 tree tmask
= NULL_TREE
;
3887 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3888 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3889 SLP_TREE_VEC_STMTS (node
).truncate (0);
3890 gimple_assign_set_rhs_code (stmt
, ocode
);
3891 vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3892 gimple_assign_set_rhs_code (stmt
, code0
);
3893 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3894 SLP_TREE_VEC_STMTS (node
).truncate (0);
3895 tree meltype
= build_nonstandard_integer_type
3896 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3897 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3899 for (j
= 0; j
< v0
.length (); ++j
)
3901 /* Enforced by vect_build_slp_tree, which rejects variable-length
3902 vectors for SLP_TREE_TWO_OPERATORS. */
3903 unsigned int const_nunits
= nunits
.to_constant ();
3904 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3905 for (l
= 0; l
< const_nunits
; ++l
)
3907 if (k
>= group_size
)
3909 tree t
= build_int_cst (meltype
,
3910 mask
[k
++] * const_nunits
+ l
);
3911 melts
.quick_push (t
);
3913 tmask
= melts
.build ();
3915 /* ??? Not all targets support a VEC_PERM_EXPR with a
3916 constant mask that would translate to a vec_merge RTX
3917 (with their vec_perm_const_ok). We can either not
3918 vectorize in that case or let veclower do its job.
3919 Unfortunately that isn't too great and at least for
3920 plus/minus we'd eventually like to match targets
3921 vector addsub instructions. */
3923 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3925 gimple_assign_lhs (v0
[j
]),
3926 gimple_assign_lhs (v1
[j
]), tmask
);
3927 vect_finish_stmt_generation (stmt
, vstmt
, &si
);
3928 SLP_TREE_VEC_STMTS (node
).quick_push (vstmt
);
3935 is_store
= vect_transform_stmt (stmt
, &si
, &grouped_store
, node
, instance
);
3937 /* Restore stmt def-types. */
3938 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3939 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3940 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, stmt
)
3941 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt
)) = vect_internal_def
;
3946 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3947 For loop vectorization this is done in vectorizable_call, but for SLP
3948 it needs to be deferred until end of vect_schedule_slp, because multiple
3949 SLP instances may refer to the same scalar stmt. */
3952 vect_remove_slp_scalar_calls (slp_tree node
)
3954 gimple
*stmt
, *new_stmt
;
3955 gimple_stmt_iterator gsi
;
3959 stmt_vec_info stmt_info
;
3961 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3964 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3965 vect_remove_slp_scalar_calls (child
);
3967 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt
)
3969 if (!is_gimple_call (stmt
) || gimple_bb (stmt
) == NULL
)
3971 stmt_info
= vinfo_for_stmt (stmt
);
3972 if (stmt_info
== NULL
3973 || is_pattern_stmt_p (stmt_info
)
3974 || !PURE_SLP_STMT (stmt_info
))
3976 lhs
= gimple_call_lhs (stmt
);
3977 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3978 set_vinfo_for_stmt (new_stmt
, stmt_info
);
3979 set_vinfo_for_stmt (stmt
, NULL
);
3980 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
3981 gsi
= gsi_for_stmt (stmt
);
3982 gsi_replace (&gsi
, new_stmt
, false);
3983 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
3987 /* Generate vector code for all SLP instances in the loop/basic block. */
3990 vect_schedule_slp (vec_info
*vinfo
)
3992 vec
<slp_instance
> slp_instances
;
3993 slp_instance instance
;
3995 bool is_store
= false;
3997 slp_instances
= vinfo
->slp_instances
;
3998 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4000 /* Schedule the tree of INSTANCE. */
4001 scalar_stmts_to_slp_tree_map_t
*bst_map
4002 = new scalar_stmts_to_slp_tree_map_t ();
4003 is_store
= vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4006 if (dump_enabled_p ())
4007 dump_printf_loc (MSG_NOTE
, vect_location
,
4008 "vectorizing stmts using SLP.\n");
4011 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4013 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4016 gimple_stmt_iterator gsi
;
4018 /* Remove scalar call stmts. Do not do this for basic-block
4019 vectorization as not all uses may be vectorized.
4020 ??? Why should this be necessary? DCE should be able to
4021 remove the stmts itself.
4022 ??? For BB vectorization we can as well remove scalar
4023 stmts starting from the SLP tree root if they have no
4025 if (is_a
<loop_vec_info
> (vinfo
))
4026 vect_remove_slp_scalar_calls (root
);
4028 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store
)
4029 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4031 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store
)))
4034 if (is_pattern_stmt_p (vinfo_for_stmt (store
)))
4035 store
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store
));
4036 /* Free the attached stmt_vec_info and remove the stmt. */
4037 gsi
= gsi_for_stmt (store
);
4038 unlink_stmt_vdef (store
);
4039 gsi_remove (&gsi
, true);
4040 release_defs (store
);
4041 free_stmt_vec_info (store
);