1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
50 static bool vectorizable_slp_permutation (vec_info
*, gimple_stmt_iterator
*,
51 slp_tree
, stmt_vector_for_cost
*);
53 /* Initialize a SLP node. */
55 _slp_tree::_slp_tree ()
57 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
58 SLP_TREE_SCALAR_OPS (this) = vNULL
;
59 SLP_TREE_VEC_STMTS (this) = vNULL
;
60 SLP_TREE_VEC_DEFS (this) = vNULL
;
61 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
62 SLP_TREE_CHILDREN (this) = vNULL
;
63 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
64 SLP_TREE_LANE_PERMUTATION (this) = vNULL
;
65 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
66 SLP_TREE_CODE (this) = ERROR_MARK
;
67 SLP_TREE_VECTYPE (this) = NULL_TREE
;
68 SLP_TREE_REPRESENTATIVE (this) = NULL
;
74 /* Tear down a SLP node. */
76 _slp_tree::~_slp_tree ()
78 SLP_TREE_CHILDREN (this).release ();
79 SLP_TREE_SCALAR_STMTS (this).release ();
80 SLP_TREE_SCALAR_OPS (this).release ();
81 SLP_TREE_VEC_STMTS (this).release ();
82 SLP_TREE_VEC_DEFS (this).release ();
83 SLP_TREE_LOAD_PERMUTATION (this).release ();
84 SLP_TREE_LANE_PERMUTATION (this).release ();
87 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
88 FINAL_P is true if we have vectorized the instance or if we have
89 made a final decision not to vectorize the statements in any way. */
92 vect_free_slp_tree (slp_tree node
, bool final_p
)
97 if (--node
->refcnt
!= 0)
100 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
101 vect_free_slp_tree (child
, final_p
);
103 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
104 Some statements might no longer exist, after having been
105 removed by vect_transform_stmt. Updating the remaining
106 statements would be redundant. */
109 stmt_vec_info stmt_info
;
110 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
112 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
113 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
120 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
121 have vectorized the instance or if we have made a final decision not
122 to vectorize the statements in any way. */
125 vect_free_slp_instance (slp_instance instance
, bool final_p
)
127 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
128 SLP_INSTANCE_LOADS (instance
).release ();
133 /* Create an SLP node for SCALAR_STMTS. */
136 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
138 slp_tree node
= new _slp_tree
;
139 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
140 SLP_TREE_CHILDREN (node
).create (nops
);
141 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
142 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
143 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
146 stmt_vec_info stmt_info
;
147 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
148 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
153 /* Create an SLP node for OPS. */
156 vect_create_new_slp_node (vec
<tree
> ops
)
158 slp_tree node
= new _slp_tree
;
159 SLP_TREE_SCALAR_OPS (node
) = ops
;
160 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
161 SLP_TREE_LANES (node
) = ops
.length ();
166 /* This structure is used in creation of an SLP tree. Each instance
167 corresponds to the same operand in a group of scalar stmts in an SLP
169 typedef struct _slp_oprnd_info
171 /* Def-stmts for the operands. */
172 vec
<stmt_vec_info
> def_stmts
;
175 /* Information about the first statement, its vector def-type, type, the
176 operand itself in case it's constant, and an indication if it's a pattern
179 enum vect_def_type first_dt
;
184 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
186 static vec
<slp_oprnd_info
>
187 vect_create_oprnd_info (int nops
, int group_size
)
190 slp_oprnd_info oprnd_info
;
191 vec
<slp_oprnd_info
> oprnds_info
;
193 oprnds_info
.create (nops
);
194 for (i
= 0; i
< nops
; i
++)
196 oprnd_info
= XNEW (struct _slp_oprnd_info
);
197 oprnd_info
->def_stmts
.create (group_size
);
198 oprnd_info
->ops
.create (group_size
);
199 oprnd_info
->first_dt
= vect_uninitialized_def
;
200 oprnd_info
->first_op_type
= NULL_TREE
;
201 oprnd_info
->any_pattern
= false;
202 oprnds_info
.quick_push (oprnd_info
);
209 /* Free operands info. */
212 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
215 slp_oprnd_info oprnd_info
;
217 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
219 oprnd_info
->def_stmts
.release ();
220 oprnd_info
->ops
.release ();
221 XDELETE (oprnd_info
);
224 oprnds_info
.release ();
228 /* Return true if STMTS contains a pattern statement. */
231 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
233 stmt_vec_info stmt_info
;
235 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
236 if (is_pattern_stmt_p (stmt_info
))
241 /* Return true when all lanes in the external or constant NODE have
245 vect_slp_tree_uniform_p (slp_tree node
)
247 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
248 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
);
250 /* Pre-exsting vectors. */
251 if (SLP_TREE_SCALAR_OPS (node
).is_empty ())
255 tree op
, first
= NULL_TREE
;
256 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
259 else if (!operand_equal_p (first
, op
, 0))
265 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
266 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
270 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
271 stmt_vec_info first_stmt_info
)
273 stmt_vec_info next_stmt_info
= first_stmt_info
;
276 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
281 if (next_stmt_info
== stmt_info
)
283 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
285 result
+= DR_GROUP_GAP (next_stmt_info
);
287 while (next_stmt_info
);
292 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
293 using the method implemented by duplicate_and_interleave. Return true
294 if so, returning the number of intermediate vectors in *NVECTORS_OUT
295 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
299 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
300 tree elt_type
, unsigned int *nvectors_out
,
301 tree
*vector_type_out
,
304 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
305 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
308 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
309 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
310 unsigned int nvectors
= 1;
313 scalar_int_mode int_mode
;
314 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
315 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
317 /* Get the natural vector type for this SLP group size. */
318 tree int_type
= build_nonstandard_integer_type
319 (GET_MODE_BITSIZE (int_mode
), 1);
321 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
323 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
324 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
325 GET_MODE_SIZE (base_vector_mode
)))
327 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
328 together into elements of type INT_TYPE and using the result
329 to build NVECTORS vectors. */
330 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
331 vec_perm_builder
sel1 (nelts
, 2, 3);
332 vec_perm_builder
sel2 (nelts
, 2, 3);
333 poly_int64 half_nelts
= exact_div (nelts
, 2);
334 for (unsigned int i
= 0; i
< 3; ++i
)
337 sel1
.quick_push (i
+ nelts
);
338 sel2
.quick_push (half_nelts
+ i
);
339 sel2
.quick_push (half_nelts
+ i
+ nelts
);
341 vec_perm_indices
indices1 (sel1
, 2, nelts
);
342 vec_perm_indices
indices2 (sel2
, 2, nelts
);
343 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
344 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
347 *nvectors_out
= nvectors
;
349 *vector_type_out
= vector_type
;
352 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
354 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
361 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
367 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
368 they are of a valid type and that they match the defs of the first stmt of
369 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
370 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
371 indicates swap is required for cond_expr stmts. Specifically, *SWAP
372 is 1 if STMT is cond and operands of comparison need to be swapped;
373 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
374 If there is any operand swap in this function, *SWAP is set to non-zero
376 If there was a fatal error return -1; if the error could be corrected by
377 swapping operands of father node of this one, return 1; if everything is
380 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
381 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
382 vec
<slp_oprnd_info
> *oprnds_info
)
384 stmt_vec_info stmt_info
= stmts
[stmt_num
];
386 unsigned int i
, number_of_oprnds
;
387 enum vect_def_type dt
= vect_uninitialized_def
;
388 slp_oprnd_info oprnd_info
;
389 int first_op_idx
= 1;
390 unsigned int commutative_op
= -1U;
391 bool first_op_cond
= false;
392 bool first
= stmt_num
== 0;
394 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
396 number_of_oprnds
= gimple_call_num_args (stmt
);
398 if (gimple_call_internal_p (stmt
))
400 internal_fn ifn
= gimple_call_internal_fn (stmt
);
401 commutative_op
= first_commutative_argument (ifn
);
403 /* Masked load, only look at mask. */
404 if (ifn
== IFN_MASK_LOAD
)
406 number_of_oprnds
= 1;
407 /* Mask operand index. */
412 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
414 enum tree_code code
= gimple_assign_rhs_code (stmt
);
415 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
416 /* Swap can only be done for cond_expr if asked to, otherwise we
417 could result in different comparison code to the first stmt. */
418 if (code
== COND_EXPR
419 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
421 first_op_cond
= true;
425 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
430 bool swapped
= (*swap
!= 0);
431 gcc_assert (!swapped
|| first_op_cond
);
432 for (i
= 0; i
< number_of_oprnds
; i
++)
437 /* Map indicating how operands of cond_expr should be swapped. */
438 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
439 int *map
= maps
[*swap
];
442 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
443 first_op_idx
), map
[i
]);
445 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
448 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
449 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
450 oprnd
= TREE_OPERAND (oprnd
, 0);
452 oprnd_info
= (*oprnds_info
)[i
];
454 stmt_vec_info def_stmt_info
;
455 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
457 if (dump_enabled_p ())
458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
459 "Build SLP failed: can't analyze def for %T\n",
465 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
466 oprnd_info
->any_pattern
= true;
470 /* For the swapping logic below force vect_reduction_def
471 for the reduction op in a SLP reduction group. */
472 if (!STMT_VINFO_DATA_REF (stmt_info
)
473 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
474 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
476 dt
= vect_reduction_def
;
477 oprnd_info
->first_dt
= dt
;
478 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
482 /* Not first stmt of the group, check that the def-stmt/s match
483 the def-stmt/s of the first stmt. Allow different definition
484 types for reduction chains: the first stmt must be a
485 vect_reduction_def (a phi node), and the rest
486 end in the reduction chain. */
487 tree type
= TREE_TYPE (oprnd
);
488 if ((oprnd_info
->first_dt
!= dt
489 && !(oprnd_info
->first_dt
== vect_reduction_def
490 && !STMT_VINFO_DATA_REF (stmt_info
)
491 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
493 && !STMT_VINFO_DATA_REF (def_stmt_info
)
494 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
495 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
496 && !((oprnd_info
->first_dt
== vect_external_def
497 || oprnd_info
->first_dt
== vect_constant_def
)
498 && (dt
== vect_external_def
499 || dt
== vect_constant_def
)))
500 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
501 || (!STMT_VINFO_DATA_REF (stmt_info
)
502 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
504 || STMT_VINFO_DATA_REF (def_stmt_info
)
505 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
506 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
507 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
509 /* Try swapping operands if we got a mismatch. */
510 if (i
== commutative_op
&& !swapped
)
512 if (dump_enabled_p ())
513 dump_printf_loc (MSG_NOTE
, vect_location
,
514 "trying swapped operands\n");
519 if (dump_enabled_p ())
520 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
521 "Build SLP failed: different types\n");
525 if ((dt
== vect_constant_def
526 || dt
== vect_external_def
)
527 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
528 && (TREE_CODE (type
) == BOOLEAN_TYPE
529 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
532 if (dump_enabled_p ())
533 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
534 "Build SLP failed: invalid type of def "
535 "for variable-length SLP %T\n", oprnd
);
540 /* Check the types of the definitions. */
543 case vect_external_def
:
544 /* Make sure to demote the overall operand to external. */
545 oprnd_info
->first_dt
= vect_external_def
;
547 case vect_constant_def
:
548 oprnd_info
->def_stmts
.quick_push (NULL
);
549 oprnd_info
->ops
.quick_push (oprnd
);
552 case vect_internal_def
:
553 case vect_reduction_def
:
554 if (oprnd_info
->first_dt
== vect_reduction_def
555 && !STMT_VINFO_DATA_REF (stmt_info
)
556 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
557 && !STMT_VINFO_DATA_REF (def_stmt_info
)
558 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
559 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
561 /* For a SLP reduction chain we want to duplicate the
562 reduction to each of the chain members. That gets
563 us a sane SLP graph (still the stmts are not 100%
564 correct wrt the initial values). */
566 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
567 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
571 case vect_induction_def
:
572 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
573 oprnd_info
->ops
.quick_push (oprnd
);
577 /* FORNOW: Not supported. */
578 if (dump_enabled_p ())
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
580 "Build SLP failed: illegal type of def %T\n",
590 if (dump_enabled_p ())
591 dump_printf_loc (MSG_NOTE
, vect_location
,
592 "swapped operands to match def types in %G",
600 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
601 Return true if we can, meaning that this choice doesn't conflict with
602 existing SLP nodes that use STMT_INFO. */
605 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
607 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
608 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
611 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
612 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
614 /* We maintain the invariant that if any statement in the group is
615 used, all other members of the group have the same vector type. */
616 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
617 stmt_vec_info member_info
= first_info
;
618 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
619 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
620 || is_pattern_stmt_p (member_info
))
625 for (member_info
= first_info
; member_info
;
626 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
627 STMT_VINFO_VECTYPE (member_info
) = vectype
;
631 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
632 && !is_pattern_stmt_p (stmt_info
))
634 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
638 if (dump_enabled_p ())
640 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
641 "Build SLP failed: incompatible vector"
642 " types for: %G", stmt_info
->stmt
);
643 dump_printf_loc (MSG_NOTE
, vect_location
,
644 " old vector type: %T\n", old_vectype
);
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 " new vector type: %T\n", vectype
);
651 /* Return true if call statements CALL1 and CALL2 are similar enough
652 to be combined into the same SLP group. */
655 compatible_calls_p (gcall
*call1
, gcall
*call2
)
657 unsigned int nargs
= gimple_call_num_args (call1
);
658 if (nargs
!= gimple_call_num_args (call2
))
661 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
664 if (gimple_call_internal_p (call1
))
666 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
667 TREE_TYPE (gimple_call_lhs (call2
))))
669 for (unsigned int i
= 0; i
< nargs
; ++i
)
670 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
671 TREE_TYPE (gimple_call_arg (call2
, i
))))
676 if (!operand_equal_p (gimple_call_fn (call1
),
677 gimple_call_fn (call2
), 0))
680 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
686 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
687 caller's attempt to find the vector type in STMT_INFO with the narrowest
688 element type. Return true if VECTYPE is nonnull and if it is valid
689 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
690 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
691 vect_build_slp_tree. */
694 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
695 unsigned int group_size
,
696 tree vectype
, poly_uint64
*max_nunits
)
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
702 "Build SLP failed: unsupported data-type in %G\n",
704 /* Fatal mismatch. */
708 /* If populating the vector type requires unrolling then fail
709 before adjusting *max_nunits for basic-block vectorization. */
710 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
711 unsigned HOST_WIDE_INT const_nunits
;
712 if (is_a
<bb_vec_info
> (vinfo
)
713 && (!nunits
.is_constant (&const_nunits
)
714 || const_nunits
> group_size
))
716 if (dump_enabled_p ())
717 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
718 "Build SLP failed: unrolling required "
719 "in basic block SLP\n");
720 /* Fatal mismatch. */
724 /* In case of multiple types we need to detect the smallest type. */
725 vect_update_max_nunits (max_nunits
, vectype
);
729 /* Verify if the scalar stmts STMTS are isomorphic, require data
730 permutation or are of unsupported types of operation. Return
731 true if they are, otherwise return false and indicate in *MATCHES
732 which stmts are not isomorphic to the first one. If MATCHES[0]
733 is false then this indicates the comparison could not be
734 carried out or the stmts will never be vectorized by SLP.
736 Note COND_EXPR is possibly isomorphic to another one after swapping its
737 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
738 the first stmt by swapping the two operands of comparison; set SWAP[i]
739 to 2 if stmt I is isormorphic to the first stmt by inverting the code
740 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
741 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
744 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
745 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
746 poly_uint64
*max_nunits
, bool *matches
,
747 bool *two_operators
, tree
*node_vectype
)
750 stmt_vec_info first_stmt_info
= stmts
[0];
751 enum tree_code first_stmt_code
= ERROR_MARK
;
752 enum tree_code alt_stmt_code
= ERROR_MARK
;
753 enum tree_code rhs_code
= ERROR_MARK
;
754 enum tree_code first_cond_code
= ERROR_MARK
;
756 bool need_same_oprnds
= false;
757 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
760 machine_mode optab_op2_mode
;
761 machine_mode vec_mode
;
762 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
763 bool first_stmt_load_p
= false, load_p
= false;
765 /* For every stmt in NODE find its def stmt/s. */
766 stmt_vec_info stmt_info
;
767 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
769 gimple
*stmt
= stmt_info
->stmt
;
773 if (dump_enabled_p ())
774 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
776 /* Fail to vectorize statements marked as unvectorizable. */
777 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
779 if (dump_enabled_p ())
780 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
781 "Build SLP failed: unvectorizable statement %G",
783 /* Fatal mismatch. */
788 lhs
= gimple_get_lhs (stmt
);
789 if (lhs
== NULL_TREE
)
791 if (dump_enabled_p ())
792 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
793 "Build SLP failed: not GIMPLE_ASSIGN nor "
794 "GIMPLE_CALL %G", stmt
);
795 /* Fatal mismatch. */
801 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
802 &nunits_vectype
, group_size
)
804 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
805 nunits_vectype
, max_nunits
)))
807 /* Fatal mismatch. */
812 gcc_assert (vectype
);
814 if (is_a
<bb_vec_info
> (vinfo
)
815 && !vect_update_shared_vectype (stmt_info
, vectype
))
818 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
821 rhs_code
= CALL_EXPR
;
823 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
825 else if ((gimple_call_internal_p (call_stmt
)
826 && (!vectorizable_internal_fn_p
827 (gimple_call_internal_fn (call_stmt
))))
828 || gimple_call_tail_p (call_stmt
)
829 || gimple_call_noreturn_p (call_stmt
)
830 || !gimple_call_nothrow_p (call_stmt
)
831 || gimple_call_chain (call_stmt
))
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
835 "Build SLP failed: unsupported call type %G",
837 /* Fatal mismatch. */
844 rhs_code
= gimple_assign_rhs_code (stmt
);
845 load_p
= gimple_vuse (stmt
);
848 /* Check the operation. */
851 *node_vectype
= vectype
;
852 first_stmt_code
= rhs_code
;
853 first_stmt_load_p
= load_p
;
855 /* Shift arguments should be equal in all the packed stmts for a
856 vector shift with scalar shift operand. */
857 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
858 || rhs_code
== LROTATE_EXPR
859 || rhs_code
== RROTATE_EXPR
)
861 vec_mode
= TYPE_MODE (vectype
);
863 /* First see if we have a vector/vector shift. */
864 optab
= optab_for_tree_code (rhs_code
, vectype
,
868 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
870 /* No vector/vector shift, try for a vector/scalar shift. */
871 optab
= optab_for_tree_code (rhs_code
, vectype
,
876 if (dump_enabled_p ())
877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
878 "Build SLP failed: no optab.\n");
879 /* Fatal mismatch. */
883 icode
= (int) optab_handler (optab
, vec_mode
);
884 if (icode
== CODE_FOR_nothing
)
886 if (dump_enabled_p ())
887 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
889 "op not supported by target.\n");
890 /* Fatal mismatch. */
894 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
895 if (!VECTOR_MODE_P (optab_op2_mode
))
897 need_same_oprnds
= true;
898 first_op1
= gimple_assign_rhs2 (stmt
);
902 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
904 need_same_oprnds
= true;
905 first_op1
= gimple_assign_rhs2 (stmt
);
908 && rhs_code
== BIT_FIELD_REF
)
910 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0);
911 if (TREE_CODE (vec
) != SSA_NAME
912 || !types_compatible_p (vectype
, TREE_TYPE (vec
)))
914 if (dump_enabled_p ())
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
917 "BIT_FIELD_REF not supported\n");
918 /* Fatal mismatch. */
924 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
926 need_same_oprnds
= true;
927 first_op1
= gimple_call_arg (call_stmt
, 1);
932 if (first_stmt_code
!= rhs_code
933 && alt_stmt_code
== ERROR_MARK
)
934 alt_stmt_code
= rhs_code
;
935 if ((first_stmt_code
!= rhs_code
936 && (first_stmt_code
!= IMAGPART_EXPR
937 || rhs_code
!= REALPART_EXPR
)
938 && (first_stmt_code
!= REALPART_EXPR
939 || rhs_code
!= IMAGPART_EXPR
)
940 /* Handle mismatches in plus/minus by computing both
941 and merging the results. */
942 && !((first_stmt_code
== PLUS_EXPR
943 || first_stmt_code
== MINUS_EXPR
)
944 && (alt_stmt_code
== PLUS_EXPR
945 || alt_stmt_code
== MINUS_EXPR
)
946 && rhs_code
== alt_stmt_code
)
947 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
948 && (first_stmt_code
== ARRAY_REF
949 || first_stmt_code
== BIT_FIELD_REF
950 || first_stmt_code
== INDIRECT_REF
951 || first_stmt_code
== COMPONENT_REF
952 || first_stmt_code
== MEM_REF
)))
953 || first_stmt_load_p
!= load_p
)
955 if (dump_enabled_p ())
957 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
958 "Build SLP failed: different operation "
960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
961 "original stmt %G", first_stmt_info
->stmt
);
967 if (need_same_oprnds
)
969 tree other_op1
= (call_stmt
970 ? gimple_call_arg (call_stmt
, 1)
971 : gimple_assign_rhs2 (stmt
));
972 if (!operand_equal_p (first_op1
, other_op1
, 0))
974 if (dump_enabled_p ())
975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
976 "Build SLP failed: different shift "
977 "arguments in %G", stmt
);
983 && first_stmt_code
== BIT_FIELD_REF
984 && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info
->stmt
), 0)
985 != TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0)))
987 if (dump_enabled_p ())
988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
989 "Build SLP failed: different BIT_FIELD_REF "
990 "arguments in %G", stmt
);
995 if (!load_p
&& rhs_code
== CALL_EXPR
)
997 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
998 as_a
<gcall
*> (stmt
)))
1000 if (dump_enabled_p ())
1001 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1002 "Build SLP failed: different calls in %G",
1010 /* Grouped store or load. */
1011 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1013 if (REFERENCE_CLASS_P (lhs
))
1021 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1022 if (prev_first_load
)
1024 /* Check that there are no loads from different interleaving
1025 chains in the same node. */
1026 if (prev_first_load
!= first_load
)
1028 if (dump_enabled_p ())
1029 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1031 "Build SLP failed: different "
1032 "interleaving chains in one node %G",
1039 prev_first_load
= first_load
;
1041 } /* Grouped access. */
1046 /* Not grouped load. */
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1049 "Build SLP failed: not grouped load %G", stmt
);
1051 /* FORNOW: Not grouped loads are not supported. */
1052 /* Fatal mismatch. */
1057 /* Not memory operation. */
1058 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1059 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1060 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1061 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1062 && rhs_code
!= VIEW_CONVERT_EXPR
1063 && rhs_code
!= CALL_EXPR
1064 && rhs_code
!= BIT_FIELD_REF
)
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1068 "Build SLP failed: operation unsupported %G",
1070 /* Fatal mismatch. */
1075 if (rhs_code
== COND_EXPR
)
1077 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1078 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1079 enum tree_code swap_code
= ERROR_MARK
;
1080 enum tree_code invert_code
= ERROR_MARK
;
1083 first_cond_code
= TREE_CODE (cond_expr
);
1084 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1086 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1087 swap_code
= swap_tree_comparison (cond_code
);
1088 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1091 if (first_cond_code
== cond_code
)
1093 /* Isomorphic can be achieved by swapping. */
1094 else if (first_cond_code
== swap_code
)
1096 /* Isomorphic can be achieved by inverting. */
1097 else if (first_cond_code
== invert_code
)
1101 if (dump_enabled_p ())
1102 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1103 "Build SLP failed: different"
1104 " operation %G", stmt
);
1114 for (i
= 0; i
< group_size
; ++i
)
1118 /* If we allowed a two-operation SLP node verify the target can cope
1119 with the permute we are going to use. */
1120 if (alt_stmt_code
!= ERROR_MARK
1121 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1123 *two_operators
= true;
1129 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1130 Note we never remove apart from at destruction time so we do not
1131 need a special value for deleted that differs from empty. */
1134 typedef vec
<stmt_vec_info
> value_type
;
1135 typedef vec
<stmt_vec_info
> compare_type
;
1136 static inline hashval_t
hash (value_type
);
1137 static inline bool equal (value_type existing
, value_type candidate
);
1138 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1139 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1140 static const bool empty_zero_p
= true;
1141 static inline void mark_empty (value_type
&x
) { x
.release (); }
1142 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1143 static inline void remove (value_type
&x
) { x
.release (); }
1146 bst_traits::hash (value_type x
)
1149 for (unsigned i
= 0; i
< x
.length (); ++i
)
1150 h
.add_int (gimple_uid (x
[i
]->stmt
));
1154 bst_traits::equal (value_type existing
, value_type candidate
)
1156 if (existing
.length () != candidate
.length ())
1158 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1159 if (existing
[i
] != candidate
[i
])
1164 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1165 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1166 scalar_stmts_to_slp_tree_map_t
;
1169 vect_build_slp_tree_2 (vec_info
*vinfo
,
1170 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1171 poly_uint64
*max_nunits
,
1172 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1173 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1176 vect_build_slp_tree (vec_info
*vinfo
,
1177 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1178 poly_uint64
*max_nunits
,
1179 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1180 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1182 if (slp_tree
*leader
= bst_map
->get (stmts
))
1184 if (dump_enabled_p ())
1185 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1186 *leader
? "" : "failed ", *leader
);
1189 (*leader
)->refcnt
++;
1190 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1194 poly_uint64 this_max_nunits
= 1;
1195 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1197 matches
, npermutes
, tree_size
, bst_map
);
1200 res
->max_nunits
= this_max_nunits
;
1201 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1202 /* Keep a reference for the bst_map use. */
1205 bst_map
->put (stmts
.copy (), res
);
1209 /* Recursively build an SLP tree starting from NODE.
1210 Fail (and return a value not equal to zero) if def-stmts are not
1211 isomorphic, require data permutation or are of unsupported types of
1212 operation. Otherwise, return 0.
1213 The value returned is the depth in the SLP tree where a mismatch
1217 vect_build_slp_tree_2 (vec_info
*vinfo
,
1218 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1219 poly_uint64
*max_nunits
,
1220 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1221 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1223 unsigned nops
, i
, this_tree_size
= 0;
1224 poly_uint64 this_max_nunits
= *max_nunits
;
1229 stmt_vec_info stmt_info
= stmts
[0];
1230 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1231 nops
= gimple_call_num_args (stmt
);
1232 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1234 nops
= gimple_num_ops (stmt
) - 1;
1235 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1238 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1243 /* If the SLP node is a PHI (induction or reduction), terminate
1245 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1247 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1248 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1249 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1253 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1254 /* Induction from different IVs is not supported. */
1255 if (def_type
== vect_induction_def
)
1257 stmt_vec_info other_info
;
1258 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1259 if (stmt_info
!= other_info
)
1262 else if (def_type
== vect_reduction_def
1263 || def_type
== vect_double_reduction_def
1264 || def_type
== vect_nested_cycle
)
1266 /* Else def types have to match. */
1267 stmt_vec_info other_info
;
1268 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1269 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1275 node
= vect_create_new_slp_node (stmts
, 0);
1276 SLP_TREE_VECTYPE (node
) = vectype
;
1281 bool two_operators
= false;
1282 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1283 tree vectype
= NULL_TREE
;
1284 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1285 &this_max_nunits
, matches
, &two_operators
,
1289 /* If the SLP node is a load, terminate the recursion unless masked. */
1290 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1291 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1293 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1296 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1301 *max_nunits
= this_max_nunits
;
1303 node
= vect_create_new_slp_node (stmts
, 0);
1304 SLP_TREE_VECTYPE (node
) = vectype
;
1305 /* And compute the load permutation. Whether it is actually
1306 a permutation depends on the unrolling factor which is
1308 vec
<unsigned> load_permutation
;
1310 stmt_vec_info load_info
;
1311 load_permutation
.create (group_size
);
1312 stmt_vec_info first_stmt_info
1313 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1314 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1316 int load_place
= vect_get_place_in_interleaving_chain
1317 (load_info
, first_stmt_info
);
1318 gcc_assert (load_place
!= -1);
1319 load_permutation
.safe_push (load_place
);
1321 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1325 else if (gimple_assign_single_p (stmt_info
->stmt
)
1326 && !gimple_vuse (stmt_info
->stmt
)
1327 && gimple_assign_rhs_code (stmt_info
->stmt
) == BIT_FIELD_REF
)
1329 /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference
1330 the same SSA name vector of a compatible type to vectype. */
1331 vec
<std::pair
<unsigned, unsigned> > lperm
= vNULL
;
1332 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0);
1333 stmt_vec_info estmt_info
;
1334 FOR_EACH_VEC_ELT (stmts
, i
, estmt_info
)
1336 gassign
*estmt
= as_a
<gassign
*> (estmt_info
->stmt
);
1337 tree bfref
= gimple_assign_rhs1 (estmt
);
1339 if (!known_eq (bit_field_size (bfref
),
1340 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype
))))
1341 || !constant_multiple_p (bit_field_offset (bfref
),
1342 bit_field_size (bfref
), &lane
))
1347 lperm
.safe_push (std::make_pair (0, (unsigned)lane
));
1349 slp_tree vnode
= vect_create_new_slp_node (vNULL
);
1350 SLP_TREE_VECTYPE (vnode
) = TREE_TYPE (vec
);
1351 SLP_TREE_VEC_DEFS (vnode
).safe_push (vec
);
1352 /* We are always building a permutation node even if it is an identity
1353 permute to shield the rest of the vectorizer from the odd node
1354 representing an actual vector without any scalar ops.
1355 ??? We could hide it completely with making the permute node
1357 node
= vect_create_new_slp_node (stmts
, 1);
1358 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1359 SLP_TREE_LANE_PERMUTATION (node
) = lperm
;
1360 SLP_TREE_VECTYPE (node
) = vectype
;
1361 SLP_TREE_CHILDREN (node
).quick_push (vnode
);
1365 /* Get at the operands, verifying they are compatible. */
1366 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1367 slp_oprnd_info oprnd_info
;
1368 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1370 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1371 stmts
, i
, &oprnds_info
);
1373 matches
[(res
== -1) ? 0 : i
] = false;
1377 for (i
= 0; i
< group_size
; ++i
)
1380 vect_free_oprnd_info (oprnds_info
);
1384 auto_vec
<slp_tree
, 4> children
;
1386 stmt_info
= stmts
[0];
1388 /* Create SLP_TREE nodes for the definition node/s. */
1389 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1394 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1396 /* COND_EXPR have one too many eventually if the condition
1398 gcc_assert (i
== 3 && nops
== 4);
1402 if (oprnd_info
->first_dt
!= vect_internal_def
1403 && oprnd_info
->first_dt
!= vect_reduction_def
1404 && oprnd_info
->first_dt
!= vect_induction_def
)
1406 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1407 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1408 oprnd_info
->ops
= vNULL
;
1409 children
.safe_push (invnode
);
1413 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1414 group_size
, &this_max_nunits
,
1416 &this_tree_size
, bst_map
)) != NULL
)
1418 oprnd_info
->def_stmts
= vNULL
;
1419 children
.safe_push (child
);
1423 /* If the SLP build failed fatally and we analyze a basic-block
1424 simply treat nodes we fail to build as externally defined
1425 (and thus build vectors from the scalar defs).
1426 The cost model will reject outright expensive cases.
1427 ??? This doesn't treat cases where permutation ultimatively
1428 fails (or we don't try permutation below). Ideally we'd
1429 even compute a permutation that will end up with the maximum
1431 if (is_a
<bb_vec_info
> (vinfo
)
1433 /* ??? Rejecting patterns this way doesn't work. We'd have to
1434 do extra work to cancel the pattern so the uses see the
1436 && !is_pattern_stmt_p (stmt_info
)
1437 && !oprnd_info
->any_pattern
)
1439 if (dump_enabled_p ())
1440 dump_printf_loc (MSG_NOTE
, vect_location
,
1441 "Building vector operands from scalars\n");
1443 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1444 children
.safe_push (child
);
1445 oprnd_info
->ops
= vNULL
;
1446 oprnd_info
->def_stmts
= vNULL
;
1450 /* If the SLP build for operand zero failed and operand zero
1451 and one can be commutated try that for the scalar stmts
1452 that failed the match. */
1454 /* A first scalar stmt mismatch signals a fatal mismatch. */
1456 /* ??? For COND_EXPRs we can swap the comparison operands
1457 as well as the arms under some constraints. */
1459 && oprnds_info
[1]->first_dt
== vect_internal_def
1460 && is_gimple_assign (stmt_info
->stmt
)
1461 /* Swapping operands for reductions breaks assumptions later on. */
1462 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1463 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1464 /* Do so only if the number of not successful permutes was nor more
1465 than a cut-ff as re-trying the recursive match on
1466 possibly each level of the tree would expose exponential
1470 /* See whether we can swap the matching or the non-matching
1472 bool swap_not_matching
= true;
1475 for (j
= 0; j
< group_size
; ++j
)
1477 if (matches
[j
] != !swap_not_matching
)
1479 stmt_vec_info stmt_info
= stmts
[j
];
1480 /* Verify if we can swap operands of this stmt. */
1481 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1483 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1485 if (!swap_not_matching
)
1487 swap_not_matching
= false;
1492 while (j
!= group_size
);
1494 /* Swap mismatched definition stmts. */
1495 if (dump_enabled_p ())
1496 dump_printf_loc (MSG_NOTE
, vect_location
,
1497 "Re-trying with swapped operands of stmts ");
1498 for (j
= 0; j
< group_size
; ++j
)
1499 if (matches
[j
] == !swap_not_matching
)
1501 std::swap (oprnds_info
[0]->def_stmts
[j
],
1502 oprnds_info
[1]->def_stmts
[j
]);
1503 std::swap (oprnds_info
[0]->ops
[j
],
1504 oprnds_info
[1]->ops
[j
]);
1505 if (dump_enabled_p ())
1506 dump_printf (MSG_NOTE
, "%d ", j
);
1508 if (dump_enabled_p ())
1509 dump_printf (MSG_NOTE
, "\n");
1510 /* And try again with scratch 'matches' ... */
1511 bool *tem
= XALLOCAVEC (bool, group_size
);
1512 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1513 group_size
, &this_max_nunits
,
1515 &this_tree_size
, bst_map
)) != NULL
)
1517 oprnd_info
->def_stmts
= vNULL
;
1518 children
.safe_push (child
);
1526 gcc_assert (child
== NULL
);
1527 FOR_EACH_VEC_ELT (children
, j
, child
)
1528 vect_free_slp_tree (child
, false);
1529 vect_free_oprnd_info (oprnds_info
);
1533 vect_free_oprnd_info (oprnds_info
);
1535 /* If we have all children of a child built up from uniform scalars
1536 then just throw that away, causing it built up from scalars.
1537 The exception is the SLP node for the vector store. */
1538 if (is_a
<bb_vec_info
> (vinfo
)
1539 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1540 /* ??? Rejecting patterns this way doesn't work. We'd have to
1541 do extra work to cancel the pattern so the uses see the
1543 && !is_pattern_stmt_p (stmt_info
))
1547 FOR_EACH_VEC_ELT (children
, j
, child
)
1548 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
1549 || !vect_slp_tree_uniform_p (child
))
1555 FOR_EACH_VEC_ELT (children
, j
, child
)
1556 vect_free_slp_tree (child
, false);
1558 if (dump_enabled_p ())
1559 dump_printf_loc (MSG_NOTE
, vect_location
,
1560 "Building parent vector operands from "
1561 "scalars instead\n");
1566 *tree_size
+= this_tree_size
+ 1;
1567 *max_nunits
= this_max_nunits
;
1571 /* ??? We'd likely want to either cache in bst_map sth like
1572 { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or
1573 the true { a+b, a+b, a+b, a+b } ... but there we don't have
1574 explicit stmts to put in so the keying on 'stmts' doesn't
1575 work (but we have the same issue with nodes that use 'ops'). */
1576 slp_tree one
= new _slp_tree
;
1577 slp_tree two
= new _slp_tree
;
1578 SLP_TREE_DEF_TYPE (one
) = vect_internal_def
;
1579 SLP_TREE_DEF_TYPE (two
) = vect_internal_def
;
1580 SLP_TREE_VECTYPE (one
) = vectype
;
1581 SLP_TREE_VECTYPE (two
) = vectype
;
1582 SLP_TREE_CHILDREN (one
).safe_splice (children
);
1583 SLP_TREE_CHILDREN (two
).safe_splice (children
);
1585 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two
), i
, child
)
1588 /* Here we record the original defs since this
1589 node represents the final lane configuration. */
1590 node
= vect_create_new_slp_node (stmts
, 2);
1591 SLP_TREE_VECTYPE (node
) = vectype
;
1592 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1593 SLP_TREE_CHILDREN (node
).quick_push (one
);
1594 SLP_TREE_CHILDREN (node
).quick_push (two
);
1595 gassign
*stmt
= as_a
<gassign
*> (stmts
[0]->stmt
);
1596 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
1597 enum tree_code ocode
= ERROR_MARK
;
1598 stmt_vec_info ostmt_info
;
1600 FOR_EACH_VEC_ELT (stmts
, i
, ostmt_info
)
1602 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
1603 if (gimple_assign_rhs_code (ostmt
) != code0
)
1605 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (1, i
));
1606 ocode
= gimple_assign_rhs_code (ostmt
);
1610 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (0, i
));
1612 SLP_TREE_CODE (one
) = code0
;
1613 SLP_TREE_CODE (two
) = ocode
;
1614 SLP_TREE_LANES (one
) = stmts
.length ();
1615 SLP_TREE_LANES (two
) = stmts
.length ();
1616 SLP_TREE_REPRESENTATIVE (one
) = stmts
[0];
1617 SLP_TREE_REPRESENTATIVE (two
) = stmts
[j
];
1621 node
= vect_create_new_slp_node (stmts
, nops
);
1622 SLP_TREE_VECTYPE (node
) = vectype
;
1623 SLP_TREE_CHILDREN (node
).splice (children
);
1627 /* Dump a single SLP tree NODE. */
1630 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1635 stmt_vec_info stmt_info
;
1638 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1639 dump_user_location_t user_loc
= loc
.get_user_location ();
1640 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1641 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1643 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1646 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1647 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1648 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1649 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1652 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1653 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1654 dump_printf (metadata
, "%T%s ", op
,
1655 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1656 dump_printf (metadata
, "}\n");
1658 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1660 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1661 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1662 dump_printf (dump_kind
, " %u", j
);
1663 dump_printf (dump_kind
, " }\n");
1665 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1667 dump_printf_loc (metadata
, user_loc
, "\tlane permutation {");
1668 for (i
= 0; i
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++i
)
1669 dump_printf (dump_kind
, " %u[%u]",
1670 SLP_TREE_LANE_PERMUTATION (node
)[i
].first
,
1671 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
);
1672 dump_printf (dump_kind
, " }\n");
1674 if (SLP_TREE_CHILDREN (node
).is_empty ())
1676 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1677 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1678 dump_printf (dump_kind
, " %p", (void *)child
);
1679 dump_printf (dump_kind
, "\n");
1683 debug (slp_tree node
)
1685 debug_dump_context ctx
;
1686 vect_print_slp_tree (MSG_NOTE
,
1687 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1691 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1694 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1695 slp_tree node
, hash_set
<slp_tree
> &visited
)
1700 if (visited
.add (node
))
1703 vect_print_slp_tree (dump_kind
, loc
, node
);
1705 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1706 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1710 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1713 hash_set
<slp_tree
> visited
;
1714 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1717 /* Mark the tree rooted at NODE with PURE_SLP. */
1720 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1723 stmt_vec_info stmt_info
;
1726 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1729 if (visited
.add (node
))
1732 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1733 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1735 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1736 vect_mark_slp_stmts (child
, visited
);
1740 vect_mark_slp_stmts (slp_tree node
)
1742 hash_set
<slp_tree
> visited
;
1743 vect_mark_slp_stmts (node
, visited
);
1746 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1749 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1752 stmt_vec_info stmt_info
;
1755 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1758 if (visited
.add (node
))
1761 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1763 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1764 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1765 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1768 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1769 vect_mark_slp_stmts_relevant (child
, visited
);
1773 vect_mark_slp_stmts_relevant (slp_tree node
)
1775 hash_set
<slp_tree
> visited
;
1776 vect_mark_slp_stmts_relevant (node
, visited
);
1779 /* Copy the SLP subtree rooted at NODE. */
1782 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1787 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1791 copy_ref
= new _slp_tree
;
1792 slp_tree copy
= copy_ref
;
1793 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
1794 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
1795 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
1796 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
1797 copy
->max_nunits
= node
->max_nunits
;
1799 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1801 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1802 stmt_vec_info stmt_info
;
1803 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1804 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1806 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1807 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1808 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1809 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1810 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1811 SLP_TREE_LANE_PERMUTATION (copy
) = SLP_TREE_LANE_PERMUTATION (node
).copy ();
1812 if (SLP_TREE_CHILDREN (node
).exists ())
1813 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1814 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1817 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1819 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1820 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1825 /* Rearrange the statements of NODE according to PERMUTATION. */
1828 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1829 vec
<unsigned> permutation
,
1830 hash_set
<slp_tree
> &visited
)
1835 if (visited
.add (node
))
1838 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1839 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1841 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1843 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1844 vec
<stmt_vec_info
> tmp_stmts
;
1845 tmp_stmts
.create (group_size
);
1846 tmp_stmts
.quick_grow (group_size
);
1847 stmt_vec_info stmt_info
;
1848 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1849 tmp_stmts
[permutation
[i
]] = stmt_info
;
1850 SLP_TREE_SCALAR_STMTS (node
).release ();
1851 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1853 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1855 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1857 tmp_ops
.create (group_size
);
1858 tmp_ops
.quick_grow (group_size
);
1860 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1861 tmp_ops
[permutation
[i
]] = op
;
1862 SLP_TREE_SCALAR_OPS (node
).release ();
1863 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1865 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1867 gcc_assert (group_size
== SLP_TREE_LANE_PERMUTATION (node
).length ());
1868 for (i
= 0; i
< group_size
; ++i
)
1869 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
1870 = permutation
[SLP_TREE_LANE_PERMUTATION (node
)[i
].second
];
1875 /* Attempt to reorder stmts in a reduction chain so that we don't
1876 require any load permutation. Return true if that was possible,
1877 otherwise return false. */
1880 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1884 slp_tree node
, load
;
1886 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1889 /* Compare all the permutation sequences to the first one. We know
1890 that at least one load is permuted. */
1891 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1892 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1894 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1895 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1897 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1898 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1900 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1901 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1905 /* Check that the loads in the first sequence are different and there
1906 are no gaps between them. */
1907 auto_sbitmap
load_index (group_size
);
1908 bitmap_clear (load_index
);
1909 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1911 if (lidx
>= group_size
)
1913 if (bitmap_bit_p (load_index
, lidx
))
1916 bitmap_set_bit (load_index
, lidx
);
1918 for (i
= 0; i
< group_size
; i
++)
1919 if (!bitmap_bit_p (load_index
, i
))
1922 /* This permutation is valid for reduction. Since the order of the
1923 statements in the nodes is not important unless they are memory
1924 accesses, we can rearrange the statements in all the nodes
1925 according to the order of the loads. */
1927 /* We have to unshare the SLP tree we modify. */
1928 hash_map
<slp_tree
, slp_tree
> map
;
1929 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1930 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1932 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1933 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1934 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1935 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1937 /* Do the actual re-arrangement. */
1938 hash_set
<slp_tree
> visited
;
1939 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1940 node
->load_permutation
, visited
);
1942 /* We are done, no actual permutations need to be generated. */
1943 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1944 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1946 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1947 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1948 /* But we have to keep those permutations that are required because
1949 of handling of gaps. */
1950 if (known_eq (unrolling_factor
, 1U)
1951 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1952 && DR_GROUP_GAP (first_stmt_info
) == 0))
1953 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1955 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1956 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1962 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1965 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1966 hash_set
<slp_tree
> &visited
)
1968 if (visited
.add (node
))
1971 if (SLP_TREE_CHILDREN (node
).length () == 0)
1973 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1975 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1976 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1977 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1978 loads
.safe_push (node
);
1984 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1985 vect_gather_slp_loads (loads
, child
, visited
);
1990 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1992 hash_set
<slp_tree
> visited
;
1993 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1997 /* Find the last store in SLP INSTANCE. */
2000 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
2002 stmt_vec_info last
= NULL
;
2003 stmt_vec_info stmt_vinfo
;
2005 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2007 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2008 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
2014 /* Find the first stmt in NODE. */
2017 vect_find_first_scalar_stmt_in_slp (slp_tree node
)
2019 stmt_vec_info first
= NULL
;
2020 stmt_vec_info stmt_vinfo
;
2022 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2024 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2026 || get_later_stmt (stmt_vinfo
, first
) == first
)
2033 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2034 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2035 (also containing the first GROUP1_SIZE stmts, since stores are
2036 consecutive), the second containing the remainder.
2037 Return the first stmt in the second group. */
2039 static stmt_vec_info
2040 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
2042 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
2043 gcc_assert (group1_size
> 0);
2044 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
2045 gcc_assert (group2_size
> 0);
2046 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
2048 stmt_vec_info stmt_info
= first_vinfo
;
2049 for (unsigned i
= group1_size
; i
> 1; i
--)
2051 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2052 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2054 /* STMT is now the last element of the first group. */
2055 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2056 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
2058 DR_GROUP_SIZE (group2
) = group2_size
;
2059 for (stmt_info
= group2
; stmt_info
;
2060 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
2062 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
2063 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2066 /* For the second group, the DR_GROUP_GAP is that before the original group,
2067 plus skipping over the first vector. */
2068 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
2070 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2071 DR_GROUP_GAP (first_vinfo
) += group2_size
;
2073 if (dump_enabled_p ())
2074 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2075 group1_size
, group2_size
);
2080 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2081 statements and a vector of NUNITS elements. */
2084 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2086 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2089 /* Analyze an SLP instance starting from a group of grouped stores. Call
2090 vect_build_slp_tree to build a tree of packed stmts if possible.
2091 Return FALSE if it's impossible to SLP any stmt in the loop. */
2094 vect_analyze_slp_instance (vec_info
*vinfo
,
2095 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2096 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2098 slp_instance new_instance
;
2100 unsigned int group_size
;
2101 tree vectype
, scalar_type
= NULL_TREE
;
2103 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2104 vec
<stmt_vec_info
> scalar_stmts
;
2105 bool constructor
= false;
2107 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2109 scalar_type
= TREE_TYPE (DR_REF (dr
));
2110 group_size
= DR_GROUP_SIZE (stmt_info
);
2111 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2113 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2115 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2116 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2117 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2119 else if (is_gimple_assign (stmt_info
->stmt
)
2120 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2122 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2123 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2128 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2129 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2130 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2135 if (dump_enabled_p ())
2136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2137 "Build SLP failed: unsupported data-type %T\n",
2142 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2144 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2145 scalar_stmts
.create (group_size
);
2146 stmt_vec_info next_info
= stmt_info
;
2147 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2149 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2152 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2153 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2156 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2158 /* Collect the reduction stmts and store them in
2159 SLP_TREE_SCALAR_STMTS. */
2162 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2163 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2165 /* Mark the first element of the reduction chain as reduction to properly
2166 transform the node. In the reduction analysis phase only the last
2167 element of the chain is marked as reduction. */
2168 STMT_VINFO_DEF_TYPE (stmt_info
)
2169 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2170 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2171 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2173 else if (constructor
)
2175 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2177 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2179 if (TREE_CODE (val
) == SSA_NAME
)
2181 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2182 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2183 /* Value is defined in another basic block. */
2186 def_info
= vect_stmt_to_vectorize (def_info
);
2187 scalar_stmts
.safe_push (def_info
);
2192 if (dump_enabled_p ())
2193 dump_printf_loc (MSG_NOTE
, vect_location
,
2194 "Analyzing vectorizable constructor: %G\n",
2199 /* Collect reduction statements. */
2200 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2201 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2202 scalar_stmts
.safe_push (next_info
);
2205 /* Build the tree for the SLP instance. */
2206 bool *matches
= XALLOCAVEC (bool, group_size
);
2207 unsigned npermutes
= 0;
2208 poly_uint64 max_nunits
= nunits
;
2209 unsigned tree_size
= 0;
2210 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2211 &max_nunits
, matches
, &npermutes
,
2212 &tree_size
, bst_map
);
2215 /* Calculate the unrolling factor based on the smallest type. */
2216 poly_uint64 unrolling_factor
2217 = calculate_unrolling_factor (max_nunits
, group_size
);
2219 if (maybe_ne (unrolling_factor
, 1U)
2220 && is_a
<bb_vec_info
> (vinfo
))
2222 unsigned HOST_WIDE_INT const_max_nunits
;
2223 if (!max_nunits
.is_constant (&const_max_nunits
)
2224 || const_max_nunits
> group_size
)
2226 if (dump_enabled_p ())
2227 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2228 "Build SLP failed: store group "
2229 "size not a multiple of the vector size "
2230 "in basic block SLP\n");
2231 vect_free_slp_tree (node
, false);
2234 /* Fatal mismatch. */
2236 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2237 vect_free_slp_tree (node
, false);
2241 /* Create a new SLP instance. */
2242 new_instance
= XNEW (class _slp_instance
);
2243 SLP_INSTANCE_TREE (new_instance
) = node
;
2244 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2245 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2246 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2248 vect_gather_slp_loads (new_instance
, node
);
2249 if (dump_enabled_p ())
2250 dump_printf_loc (MSG_NOTE
, vect_location
,
2251 "SLP size %u vs. limit %u.\n",
2252 tree_size
, max_tree_size
);
2254 /* Check whether any load is possibly permuted. */
2256 bool loads_permuted
= false;
2257 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2259 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2262 stmt_vec_info load_info
;
2263 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2264 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2266 loads_permuted
= true;
2271 /* If the loads and stores can be handled with load/store-lane
2272 instructions do not generate this SLP instance. */
2273 if (is_a
<loop_vec_info
> (vinfo
)
2275 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2278 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2280 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2281 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2282 /* Use SLP for strided accesses (or if we can't load-lanes). */
2283 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2284 || ! vect_load_lanes_supported
2285 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2286 DR_GROUP_SIZE (stmt_vinfo
), false))
2289 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2293 "Built SLP cancelled: can use "
2294 "load/store-lanes\n");
2295 vect_free_slp_instance (new_instance
, false);
2300 /* If this is a reduction chain with a conversion in front
2301 amend the SLP tree with a node for that. */
2303 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2304 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2306 /* Get at the conversion stmt - we know it's the single use
2307 of the last stmt of the reduction chain. */
2308 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2309 use_operand_p use_p
;
2311 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2314 next_info
= vinfo
->lookup_stmt (use_stmt
);
2315 next_info
= vect_stmt_to_vectorize (next_info
);
2316 scalar_stmts
= vNULL
;
2317 scalar_stmts
.create (group_size
);
2318 for (unsigned i
= 0; i
< group_size
; ++i
)
2319 scalar_stmts
.quick_push (next_info
);
2320 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2321 SLP_TREE_VECTYPE (conv
) = STMT_VINFO_VECTYPE (next_info
);
2322 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2323 SLP_INSTANCE_TREE (new_instance
) = conv
;
2324 /* We also have to fake this conversion stmt as SLP reduction
2325 group so we don't have to mess with too much code
2327 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2328 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2331 vinfo
->slp_instances
.safe_push (new_instance
);
2333 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2334 the number of scalar stmts in the root in a few places.
2335 Verify that assumption holds. */
2336 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2337 .length () == group_size
);
2339 if (dump_enabled_p ())
2341 dump_printf_loc (MSG_NOTE
, vect_location
,
2342 "Final SLP tree for instance:\n");
2343 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2344 SLP_INSTANCE_TREE (new_instance
));
2352 /* Failed to SLP. */
2353 /* Free the allocated memory. */
2354 scalar_stmts
.release ();
2357 /* For basic block SLP, try to break the group up into multiples of the
2359 unsigned HOST_WIDE_INT const_nunits
;
2360 if (is_a
<bb_vec_info
> (vinfo
)
2361 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2362 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2363 && nunits
.is_constant (&const_nunits
))
2365 /* We consider breaking the group only on VF boundaries from the existing
2367 for (i
= 0; i
< group_size
; i
++)
2368 if (!matches
[i
]) break;
2370 if (i
>= const_nunits
&& i
< group_size
)
2372 /* Split into two groups at the first vector boundary before i. */
2373 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2374 unsigned group1_size
= i
& ~(const_nunits
- 1);
2376 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2378 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2380 /* If the first non-match was in the middle of a vector,
2381 skip the rest of that vector. */
2382 if (group1_size
< i
)
2384 i
= group1_size
+ const_nunits
;
2386 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2389 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2390 rest
, max_tree_size
);
2393 /* Even though the first vector did not all match, we might be able to SLP
2394 (some) of the remainder. FORNOW ignore this possibility. */
2401 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2402 trees of packed scalar stmts if SLP is possible. */
2405 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2408 stmt_vec_info first_element
;
2410 DUMP_VECT_SCOPE ("vect_analyze_slp");
2412 scalar_stmts_to_slp_tree_map_t
*bst_map
2413 = new scalar_stmts_to_slp_tree_map_t ();
2415 /* Find SLP sequences starting from groups of grouped stores. */
2416 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2417 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2419 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2421 if (loop_vinfo
->reduction_chains
.length () > 0)
2423 /* Find SLP sequences starting from reduction chains. */
2424 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2425 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2428 /* Dissolve reduction chain group. */
2429 stmt_vec_info vinfo
= first_element
;
2430 stmt_vec_info last
= NULL
;
2433 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2434 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2435 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2439 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2440 /* It can be still vectorized as part of an SLP reduction. */
2441 loop_vinfo
->reductions
.safe_push (last
);
2445 /* Find SLP sequences starting from groups of reductions. */
2446 if (loop_vinfo
->reductions
.length () > 1)
2447 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2451 /* The map keeps a reference on SLP nodes built, release that. */
2452 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2453 it
!= bst_map
->end (); ++it
)
2455 vect_free_slp_tree ((*it
).second
, false);
2458 /* Optimize permutations in SLP reductions. */
2459 slp_instance instance
;
2460 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2462 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2463 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2464 /* Reduction (there are no data-refs in the root).
2465 In reduction chain the order of the loads is not important. */
2466 if (!STMT_VINFO_DATA_REF (stmt_info
)
2467 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2468 vect_attempt_slp_rearrange_stmts (instance
);
2471 /* Gather all loads in the SLP graph. */
2472 hash_set
<slp_tree
> visited
;
2473 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2474 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2477 return opt_result::success ();
2481 vect_optimize_slp (vec_info
*vinfo
)
2485 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2487 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2490 /* In basic block vectorization we allow any subchain of an interleaving
2492 FORNOW: not in loop SLP because of realignment complications. */
2493 if (is_a
<bb_vec_info
> (vinfo
))
2495 bool subchain_p
= true;
2496 stmt_vec_info next_load_info
= NULL
;
2497 stmt_vec_info load_info
;
2499 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2502 && (next_load_info
!= load_info
2503 || DR_GROUP_GAP (load_info
) != 1))
2508 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2512 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2518 stmt_vec_info load_info
;
2519 bool this_load_permuted
= false;
2521 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2522 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2524 this_load_permuted
= true;
2527 stmt_vec_info first_stmt_info
2528 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2529 if (!this_load_permuted
2530 /* The load requires permutation when unrolling exposes
2531 a gap either because the group is larger than the SLP
2532 group-size or because there is a gap between the groups. */
2533 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2534 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
2535 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2537 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2545 /* For each possible SLP instance decide whether to SLP it and calculate overall
2546 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2547 least one instance. */
2550 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2553 poly_uint64 unrolling_factor
= 1;
2554 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2555 slp_instance instance
;
2556 int decided_to_slp
= 0;
2558 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2560 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2562 /* FORNOW: SLP if you can. */
2563 /* All unroll factors have the form:
2565 GET_MODE_SIZE (vinfo->vector_mode) * X
2567 for some rational X, so they must have a common multiple. */
2569 = force_common_multiple (unrolling_factor
,
2570 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2572 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2573 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2574 loop-based vectorization. Such stmts will be marked as HYBRID. */
2575 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2579 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2581 if (decided_to_slp
&& dump_enabled_p ())
2583 dump_printf_loc (MSG_NOTE
, vect_location
,
2584 "Decided to SLP %d instances. Unrolling factor ",
2586 dump_dec (MSG_NOTE
, unrolling_factor
);
2587 dump_printf (MSG_NOTE
, "\n");
2590 return (decided_to_slp
> 0);
2593 /* Private data for vect_detect_hybrid_slp. */
2596 loop_vec_info loop_vinfo
;
2597 vec
<stmt_vec_info
> *worklist
;
2600 /* Walker for walk_gimple_op. */
2603 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2605 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2606 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2611 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2614 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2615 if (PURE_SLP_STMT (def_stmt_info
))
2617 if (dump_enabled_p ())
2618 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2619 def_stmt_info
->stmt
);
2620 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2621 dat
->worklist
->safe_push (def_stmt_info
);
2627 /* Find stmts that must be both vectorized and SLPed. */
2630 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2632 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2634 /* All stmts participating in SLP are marked pure_slp, all other
2635 stmts are loop_vect.
2636 First collect all loop_vect stmts into a worklist. */
2637 auto_vec
<stmt_vec_info
> worklist
;
2638 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2640 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2641 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2644 gphi
*phi
= gsi
.phi ();
2645 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2646 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2647 worklist
.safe_push (stmt_info
);
2649 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2652 gimple
*stmt
= gsi_stmt (gsi
);
2653 if (is_gimple_debug (stmt
))
2655 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2656 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2658 for (gimple_stmt_iterator gsi2
2659 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2660 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2662 stmt_vec_info patt_info
2663 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2664 if (!STMT_SLP_TYPE (patt_info
)
2665 && STMT_VINFO_RELEVANT (patt_info
))
2666 worklist
.safe_push (patt_info
);
2668 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2670 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2671 worklist
.safe_push (stmt_info
);
2675 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2676 mark any SLP vectorized stmt as hybrid.
2677 ??? We're visiting def stmts N times (once for each non-SLP and
2678 once for each hybrid-SLP use). */
2681 dat
.worklist
= &worklist
;
2682 dat
.loop_vinfo
= loop_vinfo
;
2683 memset (&wi
, 0, sizeof (wi
));
2684 wi
.info
= (void *)&dat
;
2685 while (!worklist
.is_empty ())
2687 stmt_vec_info stmt_info
= worklist
.pop ();
2688 /* Since SSA operands are not set up for pattern stmts we need
2689 to use walk_gimple_op. */
2691 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2696 /* Initialize a bb_vec_info struct for the statements between
2697 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2699 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2700 gimple_stmt_iterator region_end_in
,
2701 vec_info_shared
*shared
)
2702 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2703 bb (gsi_bb (region_begin_in
)),
2704 region_begin (region_begin_in
),
2705 region_end (region_end_in
)
2707 for (gimple
*stmt
: this->region_stmts ())
2709 gimple_set_uid (stmt
, 0);
2710 if (is_gimple_debug (stmt
))
2719 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2720 stmts in the basic block. */
2722 _bb_vec_info::~_bb_vec_info ()
2724 for (gimple
*stmt
: this->region_stmts ())
2725 /* Reset region marker. */
2726 gimple_set_uid (stmt
, -1);
2731 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2732 given then that child nodes have already been processed, and that
2733 their def types currently match their SLP node's def type. */
2736 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2737 slp_instance node_instance
,
2738 stmt_vector_for_cost
*cost_vec
)
2740 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
2741 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2743 /* Calculate the number of vector statements to be created for the
2744 scalar stmts in this node. For SLP reductions it is equal to the
2745 number of vector statements in the children (which has already been
2746 calculated by the recursive call). Otherwise it is the number of
2747 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2748 VF divided by the number of elements in a vector. */
2749 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2750 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2752 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2753 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2755 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2756 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2763 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2764 vf
= loop_vinfo
->vectorization_factor
;
2767 unsigned int group_size
= SLP_TREE_LANES (node
);
2768 tree vectype
= SLP_TREE_VECTYPE (node
);
2769 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2770 = vect_get_num_vectors (vf
* group_size
, vectype
);
2773 /* Handle purely internal nodes. */
2774 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
2775 return vectorizable_slp_permutation (vinfo
, NULL
, node
, cost_vec
);
2778 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2779 node
, node_instance
, cost_vec
);
2782 /* Try to build NODE from scalars, returning true on success.
2783 NODE_INSTANCE is the SLP instance that contains NODE. */
2786 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2787 slp_instance node_instance
)
2789 stmt_vec_info stmt_info
;
2792 if (!is_a
<bb_vec_info
> (vinfo
)
2793 || node
== SLP_INSTANCE_TREE (node_instance
)
2794 || !SLP_TREE_SCALAR_STMTS (node
).exists ()
2795 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2798 if (dump_enabled_p ())
2799 dump_printf_loc (MSG_NOTE
, vect_location
,
2800 "Building vector operands from scalars instead\n");
2802 /* Don't remove and free the child nodes here, since they could be
2803 referenced by other structures. The analysis and scheduling phases
2804 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2805 unsigned int group_size
= SLP_TREE_LANES (node
);
2806 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2807 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
, true);
2808 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2810 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2811 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2816 /* Compute the prologue cost for invariant or constant operands represented
2820 vect_prologue_cost_for_slp (slp_tree node
,
2821 stmt_vector_for_cost
*cost_vec
)
2823 /* There's a special case of an existing vector, that costs nothing. */
2824 if (SLP_TREE_SCALAR_OPS (node
).length () == 0
2825 && !SLP_TREE_VEC_DEFS (node
).is_empty ())
2827 /* Without looking at the actual initializer a vector of
2828 constants can be implemented as load from the constant pool.
2829 When all elements are the same we can use a splat. */
2830 tree vectype
= SLP_TREE_VECTYPE (node
);
2831 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
2832 unsigned num_vects_to_check
;
2833 unsigned HOST_WIDE_INT const_nunits
;
2834 unsigned nelt_limit
;
2835 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
2836 && ! multiple_p (const_nunits
, group_size
))
2838 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
2839 nelt_limit
= const_nunits
;
2843 /* If either the vector has variable length or the vectors
2844 are composed of repeated whole groups we only need to
2845 cost construction once. All vectors will be the same. */
2846 num_vects_to_check
= 1;
2847 nelt_limit
= group_size
;
2849 tree elt
= NULL_TREE
;
2851 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
2853 unsigned si
= j
% group_size
;
2855 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
2856 /* ??? We're just tracking whether all operands of a single
2857 vector initializer are the same, ideally we'd check if
2858 we emitted the same one already. */
2859 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
2862 if (nelt
== nelt_limit
)
2864 record_stmt_cost (cost_vec
, 1,
2865 SLP_TREE_DEF_TYPE (node
) == vect_external_def
2866 ? (elt
? scalar_to_vec
: vec_construct
)
2868 NULL
, vectype
, 0, vect_prologue
);
2874 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2875 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2877 Return true if the operations are supported. */
2880 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2881 slp_instance node_instance
,
2882 hash_set
<slp_tree
> &visited
,
2883 hash_set
<slp_tree
> &lvisited
,
2884 stmt_vector_for_cost
*cost_vec
)
2889 /* Assume we can code-generate all invariants. */
2890 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2893 /* If we already analyzed the exact same set of scalar stmts we're done.
2894 We share the generated vector stmts for those.
2895 The SLP graph is acyclic so not caching whether we failed or succeeded
2896 doesn't result in any issue since we throw away the lvisited set
2898 if (visited
.contains (node
)
2899 || lvisited
.contains (node
))
2903 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2905 res
= vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2906 visited
, lvisited
, cost_vec
);
2913 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2916 lvisited
.add (node
);
2919 /* When the node can be vectorized cost invariant nodes it references.
2920 This is not done in DFS order to allow the refering node
2921 vectorizable_* calls to nail down the invariant nodes vector type
2922 and possibly unshare it if it needs a different vector type than
2925 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2926 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
2927 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
2928 /* Perform usual caching, note code-generation still
2929 code-gens these nodes multiple times but we expect
2930 to CSE them later. */
2931 && !visited
.contains (child
)
2932 && !lvisited
.add (child
))
2934 /* ??? After auditing more code paths make a "default"
2935 and push the vector type from NODE to all children
2936 if it is not already set. */
2937 /* Compute the number of vectors to be generated. */
2938 tree vector_type
= SLP_TREE_VECTYPE (child
);
2941 /* For shifts with a scalar argument we don't need
2942 to cost or code-generate anything.
2943 ??? Represent this more explicitely. */
2944 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
2945 == shift_vec_info_type
)
2949 unsigned group_size
= SLP_TREE_LANES (child
);
2951 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2952 vf
= loop_vinfo
->vectorization_factor
;
2953 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
2954 = vect_get_num_vectors (vf
* group_size
, vector_type
);
2955 /* And cost them. */
2956 vect_prologue_cost_for_slp (child
, cost_vec
);
2959 /* If this node or any of its children can't be vectorized, try pruning
2960 the tree here rather than felling the whole thing. */
2961 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2963 /* We'll need to revisit this for invariant costing and number
2964 of vectorized stmt setting. */
2972 /* Analyze statements in SLP instances of VINFO. Return true if the
2973 operations are supported. */
2976 vect_slp_analyze_operations (vec_info
*vinfo
)
2978 slp_instance instance
;
2981 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2983 hash_set
<slp_tree
> visited
;
2984 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2986 hash_set
<slp_tree
> lvisited
;
2987 stmt_vector_for_cost cost_vec
;
2988 cost_vec
.create (2);
2989 if (!vect_slp_analyze_node_operations (vinfo
,
2990 SLP_INSTANCE_TREE (instance
),
2991 instance
, visited
, lvisited
,
2993 /* Instances with a root stmt require vectorized defs for the
2995 || (SLP_INSTANCE_ROOT_STMT (instance
)
2996 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2997 != vect_internal_def
)))
2999 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3000 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3001 if (dump_enabled_p ())
3002 dump_printf_loc (MSG_NOTE
, vect_location
,
3003 "removing SLP instance operations starting from: %G",
3005 vect_free_slp_instance (instance
, false);
3006 vinfo
->slp_instances
.ordered_remove (i
);
3007 cost_vec
.release ();
3011 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
3012 x
!= lvisited
.end(); ++x
)
3016 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
3017 cost_vec
.release ();
3021 return !vinfo
->slp_instances
.is_empty ();
3025 /* Compute the scalar cost of the SLP node NODE and its children
3026 and return it. Do not account defs that are marked in LIFE and
3027 update LIFE according to uses of NODE. */
3030 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
3031 slp_tree node
, vec
<bool, va_heap
> *life
,
3032 stmt_vector_for_cost
*cost_vec
,
3033 hash_set
<slp_tree
> &visited
)
3036 stmt_vec_info stmt_info
;
3039 if (visited
.add (node
))
3042 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3044 ssa_op_iter op_iter
;
3045 def_operand_p def_p
;
3050 /* If there is a non-vectorized use of the defs then the scalar
3051 stmt is kept live in which case we do not account it or any
3052 required defs in the SLP children in the scalar cost. This
3053 way we make the vectorization more costly when compared to
3055 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3056 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3057 FOR_EACH_SSA_DEF_OPERAND (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3059 imm_use_iterator use_iter
;
3061 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3062 if (!is_gimple_debug (use_stmt
))
3064 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3066 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3069 BREAK_FROM_IMM_USE_STMT (use_iter
);
3076 /* Count scalar stmts only once. */
3077 if (gimple_visited_p (orig_stmt
))
3079 gimple_set_visited (orig_stmt
, true);
3081 vect_cost_for_stmt kind
;
3082 if (STMT_VINFO_DATA_REF (orig_stmt_info
))
3084 if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info
)))
3087 kind
= scalar_store
;
3089 else if (vect_nop_conversion_p (orig_stmt_info
))
3093 record_stmt_cost (cost_vec
, 1, kind
, orig_stmt_info
, 0, vect_body
);
3096 auto_vec
<bool, 20> subtree_life
;
3097 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3099 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3101 /* Do not directly pass LIFE to the recursive call, copy it to
3102 confine changes in the callee to the current child/subtree. */
3103 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3105 subtree_life
.safe_grow_cleared (SLP_TREE_LANES (child
), true);
3106 for (unsigned j
= 0;
3107 j
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++j
)
3109 auto perm
= SLP_TREE_LANE_PERMUTATION (node
)[j
];
3110 if (perm
.first
== i
)
3111 subtree_life
[perm
.second
] = (*life
)[j
];
3116 gcc_assert (SLP_TREE_LANES (node
) == SLP_TREE_LANES (child
));
3117 subtree_life
.safe_splice (*life
);
3119 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3121 subtree_life
.truncate (0);
3126 /* Check if vectorization of the basic block is profitable. */
3129 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3131 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3132 slp_instance instance
;
3134 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3135 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3137 /* Calculate scalar cost. */
3138 stmt_vector_for_cost scalar_costs
;
3139 scalar_costs
.create (0);
3140 hash_set
<slp_tree
> visited
;
3141 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3143 auto_vec
<bool, 20> life
;
3144 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)),
3146 vect_bb_slp_scalar_cost (bb_vinfo
,
3147 SLP_INSTANCE_TREE (instance
),
3148 &life
, &scalar_costs
, visited
);
3150 /* Unset visited flag. */
3151 stmt_info_for_cost
*si
;
3152 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
3153 gimple_set_visited (si
->stmt_info
->stmt
, false);
3155 void *target_cost_data
= init_cost (NULL
);
3156 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
3157 scalar_costs
.release ();
3159 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3160 destroy_cost_data (target_cost_data
);
3162 /* Complete the target-specific cost calculation. */
3163 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3164 &vec_inside_cost
, &vec_epilogue_cost
);
3166 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3168 if (dump_enabled_p ())
3170 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3171 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3173 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3174 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3175 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3178 /* Vectorization is profitable if its cost is more than the cost of scalar
3179 version. Note that we err on the vector side for equal cost because
3180 the cost estimate is otherwise quite pessimistic (constant uses are
3181 free on the scalar side but cost a load on the vector side for
3183 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3189 /* Find any vectorizable constructors and add them to the grouped_store
3193 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3195 for (gimple
*stmt
: bb_vinfo
->region_stmts ())
3197 gassign
*assign
= dyn_cast
<gassign
*> (stmt
);
3198 if (!assign
|| gimple_assign_rhs_code (assign
) != CONSTRUCTOR
)
3201 tree rhs
= gimple_assign_rhs1 (assign
);
3202 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3203 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3204 CONSTRUCTOR_NELTS (rhs
))
3205 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3206 || uniform_vector_p (rhs
))
3209 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (assign
);
3210 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3214 /* Check if the region described by BB_VINFO can be vectorized, returning
3215 true if so. When returning false, set FATAL to true if the same failure
3216 would prevent vectorization at other vector sizes, false if it is still
3217 worth trying other sizes. N_STMTS is the number of statements in the
3221 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
,
3222 vec
<int> *dataref_groups
)
3224 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3226 slp_instance instance
;
3228 poly_uint64 min_vf
= 2;
3230 /* The first group of checks is independent of the vector size. */
3233 /* Analyze the data references. */
3235 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3237 if (dump_enabled_p ())
3238 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3239 "not vectorized: unhandled data-ref in basic "
3244 if (!vect_analyze_data_ref_accesses (bb_vinfo
, dataref_groups
))
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3248 "not vectorized: unhandled data access in "
3253 vect_slp_check_for_constructors (bb_vinfo
);
3255 /* If there are no grouped stores and no constructors in the region
3256 there is no need to continue with pattern recog as vect_analyze_slp
3257 will fail anyway. */
3258 if (bb_vinfo
->grouped_stores
.is_empty ())
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3262 "not vectorized: no grouped stores in "
3267 /* While the rest of the analysis below depends on it in some way. */
3270 vect_pattern_recog (bb_vinfo
);
3272 /* Check the SLP opportunities in the basic block, analyze and build SLP
3274 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3276 if (dump_enabled_p ())
3278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3279 "Failed to SLP the basic block.\n");
3280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3281 "not vectorized: failed to find SLP opportunities "
3282 "in basic block.\n");
3287 /* Optimize permutations. */
3288 vect_optimize_slp (bb_vinfo
);
3290 vect_record_base_alignments (bb_vinfo
);
3292 /* Analyze and verify the alignment of data references and the
3293 dependence in the SLP instances. */
3294 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3296 if (! vect_slp_analyze_instance_alignment (bb_vinfo
, instance
)
3297 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3299 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3300 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3301 if (dump_enabled_p ())
3302 dump_printf_loc (MSG_NOTE
, vect_location
,
3303 "removing SLP instance operations starting from: %G",
3305 vect_free_slp_instance (instance
, false);
3306 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3310 /* Mark all the statements that we want to vectorize as pure SLP and
3312 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3313 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3314 if (SLP_INSTANCE_ROOT_STMT (instance
))
3315 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3319 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3322 if (!vect_slp_analyze_operations (bb_vinfo
))
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3326 "not vectorized: bad operation in basic block.\n");
3330 /* Cost model: check if the vectorization is worthwhile. */
3331 if (!unlimited_cost_model (NULL
)
3332 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3336 "not vectorized: vectorization is not "
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_NOTE
, vect_location
,
3343 "Basic block will be vectorized using SLP\n");
3347 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3348 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3349 on success. The region has N_STMTS statements and has the datarefs
3350 given by DATAREFS. */
3353 vect_slp_region (gimple_stmt_iterator region_begin
,
3354 gimple_stmt_iterator region_end
,
3355 vec
<data_reference_p
> datarefs
,
3356 vec
<int> *dataref_groups
,
3357 unsigned int n_stmts
)
3359 bb_vec_info bb_vinfo
;
3360 auto_vector_modes vector_modes
;
3362 /* Autodetect first vector size we try. */
3363 machine_mode next_vector_mode
= VOIDmode
;
3364 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3365 unsigned int mode_i
= 0;
3367 vec_info_shared shared
;
3369 machine_mode autodetected_vector_mode
= VOIDmode
;
3372 bool vectorized
= false;
3374 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3376 bool first_time_p
= shared
.datarefs
.is_empty ();
3377 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3379 bb_vinfo
->shared
->save_datarefs ();
3381 bb_vinfo
->shared
->check_datarefs ();
3382 bb_vinfo
->vector_mode
= next_vector_mode
;
3384 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
, dataref_groups
)
3385 && dbg_cnt (vect_slp
))
3387 if (dump_enabled_p ())
3389 dump_printf_loc (MSG_NOTE
, vect_location
,
3390 "***** Analysis succeeded with vector mode"
3391 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3392 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3395 bb_vinfo
->shared
->check_datarefs ();
3396 vect_schedule_slp (bb_vinfo
);
3398 unsigned HOST_WIDE_INT bytes
;
3399 if (dump_enabled_p ())
3401 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3402 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3403 "basic block part vectorized using %wu byte "
3404 "vectors\n", bytes
);
3406 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3407 "basic block part vectorized using variable "
3408 "length vectors\n");
3415 if (dump_enabled_p ())
3416 dump_printf_loc (MSG_NOTE
, vect_location
,
3417 "***** Analysis failed with vector mode %s\n",
3418 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3422 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3425 while (mode_i
< vector_modes
.length ()
3426 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_NOTE
, vect_location
,
3430 "***** The result for vector mode %s would"
3432 GET_MODE_NAME (vector_modes
[mode_i
]));
3438 if (mode_i
< vector_modes
.length ()
3439 && VECTOR_MODE_P (autodetected_vector_mode
)
3440 && (related_vector_mode (vector_modes
[mode_i
],
3441 GET_MODE_INNER (autodetected_vector_mode
))
3442 == autodetected_vector_mode
)
3443 && (related_vector_mode (autodetected_vector_mode
,
3444 GET_MODE_INNER (vector_modes
[mode_i
]))
3445 == vector_modes
[mode_i
]))
3447 if (dump_enabled_p ())
3448 dump_printf_loc (MSG_NOTE
, vect_location
,
3449 "***** Skipping vector mode %s, which would"
3450 " repeat the analysis for %s\n",
3451 GET_MODE_NAME (vector_modes
[mode_i
]),
3452 GET_MODE_NAME (autodetected_vector_mode
));
3457 || mode_i
== vector_modes
.length ()
3458 || autodetected_vector_mode
== VOIDmode
3459 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3460 vector sizes will fail do not bother iterating. */
3464 /* Try the next biggest vector size. */
3465 next_vector_mode
= vector_modes
[mode_i
++];
3466 if (dump_enabled_p ())
3467 dump_printf_loc (MSG_NOTE
, vect_location
,
3468 "***** Re-trying analysis with vector mode %s\n",
3469 GET_MODE_NAME (next_vector_mode
));
3473 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3474 true if anything in the basic-block was vectorized. */
3477 vect_slp_bb (basic_block bb
)
3479 vec
<data_reference_p
> datarefs
= vNULL
;
3480 vec
<int> dataref_groups
= vNULL
;
3482 int current_group
= 0;
3483 gimple_stmt_iterator region_begin
= gsi_start_nondebug_after_labels_bb (bb
);
3484 gimple_stmt_iterator region_end
= gsi_last_bb (bb
);
3485 if (!gsi_end_p (region_end
))
3486 gsi_next (®ion_end
);
3488 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
3491 gimple
*stmt
= gsi_stmt (gsi
);
3492 if (is_gimple_debug (stmt
))
3497 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3498 vect_location
= stmt
;
3500 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
,
3501 &dataref_groups
, current_group
))
3504 if (insns
> param_slp_max_insns_in_bb
)
3506 if (dump_enabled_p ())
3507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3508 "not vectorized: too many instructions in "
3513 return vect_slp_region (region_begin
, region_end
, datarefs
,
3514 &dataref_groups
, insns
);
3518 /* Build a variable-length vector in which the elements in ELTS are repeated
3519 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3520 RESULTS and add any new instructions to SEQ.
3522 The approach we use is:
3524 (1) Find a vector mode VM with integer elements of mode IM.
3526 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3527 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3528 from small vectors to IM.
3530 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3532 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3533 correct byte contents.
3535 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3537 We try to find the largest IM for which this sequence works, in order
3538 to cut down on the number of interleaves. */
3541 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3542 vec
<tree
> elts
, unsigned int nresults
,
3545 unsigned int nelts
= elts
.length ();
3546 tree element_type
= TREE_TYPE (vector_type
);
3548 /* (1) Find a vector mode VM with integer elements of mode IM. */
3549 unsigned int nvectors
= 1;
3550 tree new_vector_type
;
3552 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3553 &nvectors
, &new_vector_type
,
3557 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3558 unsigned int partial_nelts
= nelts
/ nvectors
;
3559 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3561 tree_vector_builder partial_elts
;
3562 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3563 pieces
.quick_grow (nvectors
* 2);
3564 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3566 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3567 ELTS' has mode IM. */
3568 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3569 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3570 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3571 tree t
= gimple_build_vector (seq
, &partial_elts
);
3572 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3573 TREE_TYPE (new_vector_type
), t
);
3575 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3576 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3579 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3580 correct byte contents.
3582 We need to repeat the following operation log2(nvectors) times:
3584 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3585 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3587 However, if each input repeats every N elements and the VF is
3588 a multiple of N * 2, the HI result is the same as the LO. */
3589 unsigned int in_start
= 0;
3590 unsigned int out_start
= nvectors
;
3591 unsigned int hi_start
= nvectors
/ 2;
3592 /* A bound on the number of outputs needed to produce NRESULTS results
3593 in the final iteration. */
3594 unsigned int noutputs_bound
= nvectors
* nresults
;
3595 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3597 noutputs_bound
/= 2;
3598 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3599 for (unsigned int i
= 0; i
< limit
; ++i
)
3602 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3605 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3609 tree output
= make_ssa_name (new_vector_type
);
3610 tree input1
= pieces
[in_start
+ (i
/ 2)];
3611 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3612 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3615 gimple_seq_add_stmt (seq
, stmt
);
3616 pieces
[out_start
+ i
] = output
;
3618 std::swap (in_start
, out_start
);
3621 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3622 results
.reserve (nresults
);
3623 for (unsigned int i
= 0; i
< nresults
; ++i
)
3625 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3626 pieces
[in_start
+ i
]));
3628 results
.quick_push (results
[i
- nvectors
]);
3632 /* For constant and loop invariant defs in OP_NODE this function creates
3633 vector defs that will be used in the vectorized stmts and stores them
3634 to SLP_TREE_VEC_DEFS of OP_NODE. */
3637 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
3639 unsigned HOST_WIDE_INT nunits
;
3641 unsigned j
, number_of_places_left_in_vector
;
3644 int group_size
= op_node
->ops
.length ();
3645 unsigned int vec_num
, i
;
3646 unsigned number_of_copies
= 1;
3648 gimple_seq ctor_seq
= NULL
;
3649 auto_vec
<tree
, 16> permute_results
;
3651 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3652 vector_type
= SLP_TREE_VECTYPE (op_node
);
3654 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
3655 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
3656 auto_vec
<tree
> voprnds (number_of_vectors
);
3658 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3659 created vectors. It is greater than 1 if unrolling is performed.
3661 For example, we have two scalar operands, s1 and s2 (e.g., group of
3662 strided accesses of size two), while NUNITS is four (i.e., four scalars
3663 of this type can be packed in a vector). The output vector will contain
3664 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3667 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3668 containing the operands.
3670 For example, NUNITS is four as before, and the group size is 8
3671 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3672 {s5, s6, s7, s8}. */
3674 /* When using duplicate_and_interleave, we just need one element for
3675 each scalar statement. */
3676 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3677 nunits
= group_size
;
3679 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3681 number_of_places_left_in_vector
= nunits
;
3683 tree_vector_builder
elts (vector_type
, nunits
, 1);
3684 elts
.quick_grow (nunits
);
3685 stmt_vec_info insert_after
= NULL
;
3686 for (j
= 0; j
< number_of_copies
; j
++)
3689 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3691 /* Create 'vect_ = {op0,op1,...,opn}'. */
3692 number_of_places_left_in_vector
--;
3694 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3696 if (CONSTANT_CLASS_P (op
))
3698 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3700 /* Can't use VIEW_CONVERT_EXPR for booleans because
3701 of possibly different sizes of scalar value and
3703 if (integer_zerop (op
))
3704 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3705 else if (integer_onep (op
))
3706 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3711 op
= fold_unary (VIEW_CONVERT_EXPR
,
3712 TREE_TYPE (vector_type
), op
);
3713 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3717 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3719 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3722 = build_all_ones_cst (TREE_TYPE (vector_type
));
3724 = build_zero_cst (TREE_TYPE (vector_type
));
3725 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3726 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3732 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3735 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3738 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3742 elts
[number_of_places_left_in_vector
] = op
;
3743 if (!CONSTANT_CLASS_P (op
))
3745 /* For BB vectorization we have to compute an insert location
3746 when a def is inside the analyzed region since we cannot
3747 simply insert at the BB start in this case. */
3748 stmt_vec_info opdef
;
3749 if (TREE_CODE (orig_op
) == SSA_NAME
3750 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3751 && is_a
<bb_vec_info
> (vinfo
)
3752 && (opdef
= vinfo
->lookup_def (orig_op
)))
3755 insert_after
= opdef
;
3757 insert_after
= get_later_stmt (insert_after
, opdef
);
3760 if (number_of_places_left_in_vector
== 0)
3763 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3764 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3765 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3768 if (permute_results
.is_empty ())
3769 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3770 elts
, number_of_vectors
,
3772 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3774 if (!gimple_seq_empty_p (ctor_seq
))
3778 gimple_stmt_iterator gsi
3779 = gsi_for_stmt (insert_after
->stmt
);
3780 gsi_insert_seq_after (&gsi
, ctor_seq
,
3781 GSI_CONTINUE_LINKING
);
3784 vinfo
->insert_seq_on_entry (NULL
, ctor_seq
);
3787 voprnds
.quick_push (vec_cst
);
3788 insert_after
= NULL
;
3789 number_of_places_left_in_vector
= nunits
;
3791 elts
.new_vector (vector_type
, nunits
, 1);
3792 elts
.quick_grow (nunits
);
3797 /* Since the vectors are created in the reverse order, we should invert
3799 vec_num
= voprnds
.length ();
3800 for (j
= vec_num
; j
!= 0; j
--)
3802 vop
= voprnds
[j
- 1];
3803 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3806 /* In case that VF is greater than the unrolling factor needed for the SLP
3807 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3808 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3809 to replicate the vectors. */
3810 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
3811 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
3813 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3816 /* Get the Ith vectorized definition from SLP_NODE. */
3819 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
3821 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
3822 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]);
3824 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
3827 /* Get the vectorized definitions of SLP_NODE in *VEC_DEFS. */
3830 vect_get_slp_defs (slp_tree slp_node
, vec
<tree
> *vec_defs
)
3832 vec_defs
->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
3833 if (SLP_TREE_DEF_TYPE (slp_node
) == vect_internal_def
)
3836 gimple
*vec_def_stmt
;
3837 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), j
, vec_def_stmt
)
3838 vec_defs
->quick_push (gimple_get_lhs (vec_def_stmt
));
3841 vec_defs
->splice (SLP_TREE_VEC_DEFS (slp_node
));
3844 /* Get N vectorized definitions for SLP_NODE. */
3847 vect_get_slp_defs (vec_info
*,
3848 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3851 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3853 for (unsigned i
= 0; i
< n
; ++i
)
3855 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3856 vec
<tree
> vec_defs
= vNULL
;
3857 vect_get_slp_defs (child
, &vec_defs
);
3858 vec_oprnds
->quick_push (vec_defs
);
3862 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3863 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3864 permute statements for the SLP node NODE. */
3867 vect_transform_slp_perm_load (vec_info
*vinfo
,
3868 slp_tree node
, vec
<tree
> dr_chain
,
3869 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3870 bool analyze_only
, unsigned *n_perms
)
3872 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3874 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3875 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3876 unsigned int mask_element
;
3879 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3882 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3884 mode
= TYPE_MODE (vectype
);
3885 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3887 /* Initialize the vect stmts of NODE to properly insert the generated
3890 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3891 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3892 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3894 /* Generate permutation masks for every NODE. Number of masks for each NODE
3895 is equal to GROUP_SIZE.
3896 E.g., we have a group of three nodes with three loads from the same
3897 location in each node, and the vector size is 4. I.e., we have a
3898 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3899 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3900 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3903 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3904 The last mask is illegal since we assume two operands for permute
3905 operation, and the mask element values can't be outside that range.
3906 Hence, the last mask must be converted into {2,5,5,5}.
3907 For the first two permutations we need the first and the second input
3908 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3909 we need the second and the third vectors: {b1,c1,a2,b2} and
3912 int vect_stmts_counter
= 0;
3913 unsigned int index
= 0;
3914 int first_vec_index
= -1;
3915 int second_vec_index
= -1;
3919 vec_perm_builder mask
;
3920 unsigned int nelts_to_build
;
3921 unsigned int nvectors_per_build
;
3922 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3923 && multiple_p (nunits
, group_size
));
3926 /* A single vector contains a whole number of copies of the node, so:
3927 (a) all permutes can use the same mask; and
3928 (b) the permutes only need a single vector input. */
3929 mask
.new_vector (nunits
, group_size
, 3);
3930 nelts_to_build
= mask
.encoded_nelts ();
3931 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3935 /* We need to construct a separate mask for each vector statement. */
3936 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3937 if (!nunits
.is_constant (&const_nunits
)
3938 || !vf
.is_constant (&const_vf
))
3940 mask
.new_vector (const_nunits
, const_nunits
, 1);
3941 nelts_to_build
= const_vf
* group_size
;
3942 nvectors_per_build
= 1;
3945 unsigned int count
= mask
.encoded_nelts ();
3946 mask
.quick_grow (count
);
3947 vec_perm_indices indices
;
3949 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3951 unsigned int iter_num
= j
/ group_size
;
3952 unsigned int stmt_num
= j
% group_size
;
3953 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3954 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3957 first_vec_index
= 0;
3962 /* Enforced before the loop when !repeating_p. */
3963 unsigned int const_nunits
= nunits
.to_constant ();
3964 vec_index
= i
/ const_nunits
;
3965 mask_element
= i
% const_nunits
;
3966 if (vec_index
== first_vec_index
3967 || first_vec_index
== -1)
3969 first_vec_index
= vec_index
;
3971 else if (vec_index
== second_vec_index
3972 || second_vec_index
== -1)
3974 second_vec_index
= vec_index
;
3975 mask_element
+= const_nunits
;
3979 if (dump_enabled_p ())
3980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3981 "permutation requires at "
3982 "least three vectors %G",
3984 gcc_assert (analyze_only
);
3988 gcc_assert (mask_element
< 2 * const_nunits
);
3991 if (mask_element
!= index
)
3993 mask
[index
++] = mask_element
;
3995 if (index
== count
&& !noop_p
)
3997 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3998 if (!can_vec_perm_const_p (mode
, indices
))
4000 if (dump_enabled_p ())
4002 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4004 "unsupported vect permute { ");
4005 for (i
= 0; i
< count
; ++i
)
4007 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4008 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4010 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4012 gcc_assert (analyze_only
);
4023 tree mask_vec
= NULL_TREE
;
4026 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4028 if (second_vec_index
== -1)
4029 second_vec_index
= first_vec_index
;
4031 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4033 /* Generate the permute statement if necessary. */
4034 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4035 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4039 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4041 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4043 perm_dest
= make_ssa_name (perm_dest
);
4045 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4046 first_vec
, second_vec
,
4048 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
4052 /* If mask was NULL_TREE generate the requested
4053 identity transform. */
4054 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
4056 /* Store the vector statement in NODE. */
4057 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
4062 first_vec_index
= -1;
4063 second_vec_index
= -1;
4072 /* Vectorize the SLP permutations in NODE as specified
4073 in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
4074 child number and lane number.
4075 Interleaving of two two-lane two-child SLP subtrees (not supported):
4076 [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
4077 A blend of two four-lane two-child SLP subtrees:
4078 [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
4079 Highpart of a four-lane one-child SLP subtree (not supported):
4080 [ { 0, 2 }, { 0, 3 } ]
4081 Where currently only a subset is supported by code generating below. */
4084 vectorizable_slp_permutation (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
4085 slp_tree node
, stmt_vector_for_cost
*cost_vec
)
4087 tree vectype
= SLP_TREE_VECTYPE (node
);
4089 /* ??? We currently only support all same vector input and output types
4090 while the SLP IL should really do a concat + select and thus accept
4091 arbitrary mismatches. */
4094 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4095 if (!types_compatible_p (SLP_TREE_VECTYPE (child
), vectype
))
4097 if (dump_enabled_p ())
4098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4099 "Unsupported lane permutation\n");
4103 vec
<std::pair
<unsigned, unsigned> > &perm
= SLP_TREE_LANE_PERMUTATION (node
);
4104 gcc_assert (perm
.length () == SLP_TREE_LANES (node
));
4105 if (dump_enabled_p ())
4107 dump_printf_loc (MSG_NOTE
, vect_location
,
4108 "vectorizing permutation");
4109 for (unsigned i
= 0; i
< perm
.length (); ++i
)
4110 dump_printf (MSG_NOTE
, " op%u[%u]", perm
[i
].first
, perm
[i
].second
);
4111 dump_printf (MSG_NOTE
, "\n");
4114 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4115 if (!nunits
.is_constant ())
4117 unsigned HOST_WIDE_INT vf
= 1;
4118 if (loop_vec_info linfo
= dyn_cast
<loop_vec_info
> (vinfo
))
4119 if (!LOOP_VINFO_VECT_FACTOR (linfo
).is_constant (&vf
))
4121 unsigned olanes
= vf
* SLP_TREE_LANES (node
);
4122 gcc_assert (multiple_p (olanes
, nunits
));
4124 /* Compute the { { SLP operand, vector index}, lane } permutation sequence
4125 from the { SLP operand, scalar lane } permutation as recorded in the
4126 SLP node as intermediate step. This part should already work
4127 with SLP children with arbitrary number of lanes. */
4128 auto_vec
<std::pair
<std::pair
<unsigned, unsigned>, unsigned> > vperm
;
4129 auto_vec
<unsigned> active_lane
;
4130 vperm
.create (olanes
);
4131 active_lane
.safe_grow_cleared (SLP_TREE_CHILDREN (node
).length (), true);
4132 for (unsigned i
= 0; i
< vf
; ++i
)
4134 for (unsigned pi
= 0; pi
< perm
.length (); ++pi
)
4136 std::pair
<unsigned, unsigned> p
= perm
[pi
];
4137 tree vtype
= SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node
)[p
.first
]);
4138 unsigned vnunits
= TYPE_VECTOR_SUBPARTS (vtype
).to_constant ();
4139 unsigned vi
= (active_lane
[p
.first
] + p
.second
) / vnunits
;
4140 unsigned vl
= (active_lane
[p
.first
] + p
.second
) % vnunits
;
4141 vperm
.quick_push (std::make_pair (std::make_pair (p
.first
, vi
), vl
));
4143 /* Advance to the next group. */
4144 for (unsigned j
= 0; j
< SLP_TREE_CHILDREN (node
).length (); ++j
)
4145 active_lane
[j
] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node
)[j
]);
4148 if (dump_enabled_p ())
4150 dump_printf_loc (MSG_NOTE
, vect_location
, "as");
4151 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4153 if (i
!= 0 && multiple_p (i
, TYPE_VECTOR_SUBPARTS (vectype
)))
4154 dump_printf (MSG_NOTE
, ",");
4155 dump_printf (MSG_NOTE
, " vops%u[%u][%u]",
4156 vperm
[i
].first
.first
, vperm
[i
].first
.second
,
4157 vperm
[i
].first
.second
);
4159 dump_printf (MSG_NOTE
, "\n");
4162 /* We can only handle two-vector permutes, everything else should
4163 be lowered on the SLP level. The following is closely inspired
4164 by vect_transform_slp_perm_load and is supposed to eventually
4166 ??? As intermediate step do code-gen in the SLP tree representation
4168 std::pair
<unsigned, unsigned> first_vec
= std::make_pair (-1U, -1U);
4169 std::pair
<unsigned, unsigned> second_vec
= std::make_pair (-1U, -1U);
4170 unsigned int const_nunits
= nunits
.to_constant ();
4171 unsigned int index
= 0;
4172 unsigned int mask_element
;
4173 vec_perm_builder mask
;
4174 mask
.new_vector (const_nunits
, const_nunits
, 1);
4175 unsigned int count
= mask
.encoded_nelts ();
4176 mask
.quick_grow (count
);
4177 vec_perm_indices indices
;
4178 unsigned nperms
= 0;
4179 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
4181 mask_element
= vperm
[i
].second
;
4182 if (first_vec
.first
== -1U
4183 || first_vec
== vperm
[i
].first
)
4184 first_vec
= vperm
[i
].first
;
4185 else if (second_vec
.first
== -1U
4186 || second_vec
== vperm
[i
].first
)
4188 second_vec
= vperm
[i
].first
;
4189 mask_element
+= const_nunits
;
4193 if (dump_enabled_p ())
4194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4195 "permutation requires at "
4196 "least three vectors");
4201 mask
[index
++] = mask_element
;
4205 indices
.new_vector (mask
, second_vec
.first
== -1U ? 1 : 2,
4207 bool identity_p
= indices
.series_p (0, 1, 0, 1);
4209 && !can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
4211 if (dump_enabled_p ())
4213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4215 "unsupported vect permute { ");
4216 for (i
= 0; i
< count
; ++i
)
4218 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4219 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4221 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4231 if (second_vec
.first
== -1U)
4232 second_vec
= first_vec
;
4234 /* Generate the permute statement if necessary. */
4235 slp_tree first_node
= SLP_TREE_CHILDREN (node
)[first_vec
.first
];
4237 = vect_get_slp_vect_def (first_node
, first_vec
.second
);
4239 tree perm_dest
= make_ssa_name (vectype
);
4242 slp_tree second_node
4243 = SLP_TREE_CHILDREN (node
)[second_vec
.first
];
4245 = vect_get_slp_vect_def (second_node
, second_vec
.second
);
4246 tree mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4247 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4248 first_def
, second_def
,
4252 /* We need a copy here in case the def was external. */
4253 perm_stmt
= gimple_build_assign (perm_dest
, first_def
);
4254 vect_finish_stmt_generation (vinfo
, NULL
, perm_stmt
, gsi
);
4255 /* Store the vector statement in NODE. */
4256 SLP_TREE_VEC_STMTS (node
).quick_push (perm_stmt
);
4260 first_vec
= std::make_pair (-1U, -1U);
4261 second_vec
= std::make_pair (-1U, -1U);
4266 record_stmt_cost (cost_vec
, nperms
, vec_perm
, NULL
, vectype
, 0, vect_body
);
4271 /* Vectorize SLP instance tree in postorder. */
4274 vect_schedule_slp_instance (vec_info
*vinfo
,
4275 slp_tree node
, slp_instance instance
)
4277 gimple_stmt_iterator si
;
4281 /* See if we have already vectorized the node in the graph of the
4283 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
4284 && SLP_TREE_VEC_STMTS (node
).exists ())
4285 || SLP_TREE_VEC_DEFS (node
).exists ())
4288 /* Vectorize externals and constants. */
4289 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
4290 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
4292 /* ??? vectorizable_shift can end up using a scalar operand which is
4293 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
4294 node in this case. */
4295 if (!SLP_TREE_VECTYPE (node
))
4298 vect_create_constant_vectors (vinfo
, node
);
4302 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4303 vect_schedule_slp_instance (vinfo
, child
, instance
);
4305 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4306 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4308 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
4309 if (dump_enabled_p ())
4310 dump_printf_loc (MSG_NOTE
, vect_location
,
4311 "------>vectorizing SLP node starting from: %G",
4314 if (STMT_VINFO_DATA_REF (stmt_info
)
4315 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
4317 /* Vectorized loads go before the first scalar load to make it
4318 ready early, vectorized stores go before the last scalar
4319 stmt which is where all uses are ready. */
4320 stmt_vec_info last_stmt_info
= NULL
;
4321 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
4322 last_stmt_info
= vect_find_first_scalar_stmt_in_slp (node
);
4323 else /* DR_IS_WRITE */
4324 last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4325 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4327 else if (SLP_TREE_CHILDREN (node
).is_empty ())
4329 /* This happens for reduction and induction PHIs where we do not use the
4330 insertion iterator. */
4331 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
4332 == cycle_phi_info_type
4333 || (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
4334 == induc_vec_info_type
));
4339 /* Emit other stmts after the children vectorized defs which is
4340 earliest possible. */
4341 gimple
*last_stmt
= NULL
;
4342 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4343 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
4345 /* For fold-left reductions we are retaining the scalar
4346 reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
4347 set so the representation isn't perfect. Resort to the
4348 last scalar def here. */
4349 if (SLP_TREE_VEC_STMTS (child
).is_empty ())
4351 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child
))
4352 == cycle_phi_info_type
);
4353 gphi
*phi
= as_a
<gphi
*>
4354 (vect_find_last_scalar_stmt_in_slp (child
)->stmt
);
4356 || vect_stmt_dominates_stmt_p (last_stmt
, phi
))
4359 /* We are emitting all vectorized stmts in the same place and
4360 the last one is the last.
4361 ??? Unless we have a load permutation applied and that
4362 figures to re-use an earlier generated load. */
4365 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vstmt
)
4367 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
4370 else if (!SLP_TREE_VECTYPE (child
))
4372 /* For externals we use unvectorized at all scalar defs. */
4375 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (child
), j
, def
)
4376 if (TREE_CODE (def
) == SSA_NAME
4377 && !SSA_NAME_IS_DEFAULT_DEF (def
))
4379 gimple
*stmt
= SSA_NAME_DEF_STMT (def
);
4381 || vect_stmt_dominates_stmt_p (last_stmt
, stmt
))
4387 /* For externals we have to look at all defs since their
4388 insertion place is decided per vector. But beware
4389 of pre-existing vectors where we need to make sure
4390 we do not insert before the region boundary. */
4391 if (SLP_TREE_SCALAR_OPS (child
).is_empty ()
4392 && !vinfo
->lookup_def (SLP_TREE_VEC_DEFS (child
)[0]))
4393 last_stmt
= gsi_stmt (as_a
<bb_vec_info
> (vinfo
)->region_begin
);
4398 FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child
), j
, vdef
)
4399 if (TREE_CODE (vdef
) == SSA_NAME
4400 && !SSA_NAME_IS_DEFAULT_DEF (vdef
))
4402 gimple
*vstmt
= SSA_NAME_DEF_STMT (vdef
);
4404 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
4409 /* This can happen when all children are pre-existing vectors or
4412 last_stmt
= vect_find_first_scalar_stmt_in_slp (node
)->stmt
;
4413 if (is_a
<gphi
*> (last_stmt
))
4414 si
= gsi_after_labels (gimple_bb (last_stmt
));
4417 si
= gsi_for_stmt (last_stmt
);
4422 bool done_p
= false;
4424 /* Handle purely internal nodes. */
4425 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
4427 /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
4428 be shared with different SLP nodes (but usually it's the same
4429 operation apart from the case the stmt is only there for denoting
4430 the actual scalar lane defs ...). So do not call vect_transform_stmt
4431 but open-code it here (partly). */
4432 bool done
= vectorizable_slp_permutation (vinfo
, &si
, node
, NULL
);
4437 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4440 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4441 For loop vectorization this is done in vectorizable_call, but for SLP
4442 it needs to be deferred until end of vect_schedule_slp, because multiple
4443 SLP instances may refer to the same scalar stmt. */
4446 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4447 slp_tree node
, hash_set
<slp_tree
> &visited
)
4450 gimple_stmt_iterator gsi
;
4454 stmt_vec_info stmt_info
;
4456 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4459 if (visited
.add (node
))
4462 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4463 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4465 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4467 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4468 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4470 if (is_pattern_stmt_p (stmt_info
)
4471 || !PURE_SLP_STMT (stmt_info
))
4473 lhs
= gimple_call_lhs (stmt
);
4474 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4475 gsi
= gsi_for_stmt (stmt
);
4476 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4477 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4482 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4484 hash_set
<slp_tree
> visited
;
4485 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4488 /* Vectorize the instance root. */
4491 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4493 gassign
*rstmt
= NULL
;
4495 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4500 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
4502 tree vect_lhs
= gimple_get_lhs (child_stmt
);
4503 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4504 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4505 TREE_TYPE (vect_lhs
)))
4506 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4508 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4512 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4514 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4517 vec
<constructor_elt
, va_gc
> *v
;
4518 vec_alloc (v
, nelts
);
4520 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
4522 CONSTRUCTOR_APPEND_ELT (v
,
4524 gimple_get_lhs (child_stmt
));
4526 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4527 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4528 tree r_constructor
= build_constructor (rtype
, v
);
4529 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4534 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4535 gsi_replace (&rgsi
, rstmt
, true);
4538 /* Generate vector code for all SLP instances in the loop/basic block. */
4541 vect_schedule_slp (vec_info
*vinfo
)
4543 vec
<slp_instance
> slp_instances
;
4544 slp_instance instance
;
4547 slp_instances
= vinfo
->slp_instances
;
4548 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4550 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4551 /* Schedule the tree of INSTANCE. */
4552 vect_schedule_slp_instance (vinfo
, node
, instance
);
4554 if (SLP_INSTANCE_ROOT_STMT (instance
))
4555 vectorize_slp_instance_root_stmt (node
, instance
);
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_NOTE
, vect_location
,
4559 "vectorizing stmts using SLP.\n");
4562 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4564 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4565 stmt_vec_info store_info
;
4568 /* Remove scalar call stmts. Do not do this for basic-block
4569 vectorization as not all uses may be vectorized.
4570 ??? Why should this be necessary? DCE should be able to
4571 remove the stmts itself.
4572 ??? For BB vectorization we can as well remove scalar
4573 stmts starting from the SLP tree root if they have no
4575 if (is_a
<loop_vec_info
> (vinfo
))
4576 vect_remove_slp_scalar_calls (vinfo
, root
);
4578 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4580 if (!STMT_VINFO_DATA_REF (store_info
))
4583 if (SLP_INSTANCE_ROOT_STMT (instance
))
4586 store_info
= vect_orig_stmt (store_info
);
4587 /* Free the attached stmt_vec_info and remove the stmt. */
4588 vinfo
->remove_stmt (store_info
);