1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
50 /* Initialize a SLP node. */
52 _slp_tree::_slp_tree ()
54 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
55 SLP_TREE_SCALAR_OPS (this) = vNULL
;
56 SLP_TREE_VEC_STMTS (this) = vNULL
;
57 SLP_TREE_VEC_DEFS (this) = vNULL
;
58 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
59 SLP_TREE_CHILDREN (this) = vNULL
;
60 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
61 SLP_TREE_TWO_OPERATORS (this) = false;
62 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
63 SLP_TREE_VECTYPE (this) = NULL_TREE
;
64 SLP_TREE_REPRESENTATIVE (this) = NULL
;
69 /* Tear down a SLP node. */
71 _slp_tree::~_slp_tree ()
73 SLP_TREE_CHILDREN (this).release ();
74 SLP_TREE_SCALAR_STMTS (this).release ();
75 SLP_TREE_SCALAR_OPS (this).release ();
76 SLP_TREE_VEC_STMTS (this).release ();
77 SLP_TREE_VEC_DEFS (this).release ();
78 SLP_TREE_LOAD_PERMUTATION (this).release ();
81 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
82 FINAL_P is true if we have vectorized the instance or if we have
83 made a final decision not to vectorize the statements in any way. */
86 vect_free_slp_tree (slp_tree node
, bool final_p
)
91 if (--node
->refcnt
!= 0)
94 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
95 vect_free_slp_tree (child
, final_p
);
97 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
98 Some statements might no longer exist, after having been
99 removed by vect_transform_stmt. Updating the remaining
100 statements would be redundant. */
103 stmt_vec_info stmt_info
;
104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
106 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
107 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
114 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
115 have vectorized the instance or if we have made a final decision not
116 to vectorize the statements in any way. */
119 vect_free_slp_instance (slp_instance instance
, bool final_p
)
121 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
122 SLP_INSTANCE_LOADS (instance
).release ();
127 /* Create an SLP node for SCALAR_STMTS. */
130 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
132 slp_tree node
= new _slp_tree
;
133 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
134 SLP_TREE_CHILDREN (node
).create (nops
);
135 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
136 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
139 stmt_vec_info stmt_info
;
140 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
141 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
146 /* Create an SLP node for OPS. */
149 vect_create_new_slp_node (vec
<tree
> ops
)
151 slp_tree node
= new _slp_tree
;
152 SLP_TREE_SCALAR_OPS (node
) = ops
;
153 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
158 /* This structure is used in creation of an SLP tree. Each instance
159 corresponds to the same operand in a group of scalar stmts in an SLP
161 typedef struct _slp_oprnd_info
163 /* Def-stmts for the operands. */
164 vec
<stmt_vec_info
> def_stmts
;
167 /* Information about the first statement, its vector def-type, type, the
168 operand itself in case it's constant, and an indication if it's a pattern
171 enum vect_def_type first_dt
;
176 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
178 static vec
<slp_oprnd_info
>
179 vect_create_oprnd_info (int nops
, int group_size
)
182 slp_oprnd_info oprnd_info
;
183 vec
<slp_oprnd_info
> oprnds_info
;
185 oprnds_info
.create (nops
);
186 for (i
= 0; i
< nops
; i
++)
188 oprnd_info
= XNEW (struct _slp_oprnd_info
);
189 oprnd_info
->def_stmts
.create (group_size
);
190 oprnd_info
->ops
.create (group_size
);
191 oprnd_info
->first_dt
= vect_uninitialized_def
;
192 oprnd_info
->first_op_type
= NULL_TREE
;
193 oprnd_info
->any_pattern
= false;
194 oprnds_info
.quick_push (oprnd_info
);
201 /* Free operands info. */
204 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
207 slp_oprnd_info oprnd_info
;
209 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
211 oprnd_info
->def_stmts
.release ();
212 oprnd_info
->ops
.release ();
213 XDELETE (oprnd_info
);
216 oprnds_info
.release ();
220 /* Return true if STMTS contains a pattern statement. */
223 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
225 stmt_vec_info stmt_info
;
227 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
228 if (is_pattern_stmt_p (stmt_info
))
233 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
234 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
238 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
239 stmt_vec_info first_stmt_info
)
241 stmt_vec_info next_stmt_info
= first_stmt_info
;
244 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
249 if (next_stmt_info
== stmt_info
)
251 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
253 result
+= DR_GROUP_GAP (next_stmt_info
);
255 while (next_stmt_info
);
260 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
261 using the method implemented by duplicate_and_interleave. Return true
262 if so, returning the number of intermediate vectors in *NVECTORS_OUT
263 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
267 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
268 tree elt_type
, unsigned int *nvectors_out
,
269 tree
*vector_type_out
,
272 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
273 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
276 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
277 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
278 unsigned int nvectors
= 1;
281 scalar_int_mode int_mode
;
282 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
283 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
285 /* Get the natural vector type for this SLP group size. */
286 tree int_type
= build_nonstandard_integer_type
287 (GET_MODE_BITSIZE (int_mode
), 1);
289 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
291 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
292 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
293 GET_MODE_SIZE (base_vector_mode
)))
295 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
296 together into elements of type INT_TYPE and using the result
297 to build NVECTORS vectors. */
298 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
299 vec_perm_builder
sel1 (nelts
, 2, 3);
300 vec_perm_builder
sel2 (nelts
, 2, 3);
301 poly_int64 half_nelts
= exact_div (nelts
, 2);
302 for (unsigned int i
= 0; i
< 3; ++i
)
305 sel1
.quick_push (i
+ nelts
);
306 sel2
.quick_push (half_nelts
+ i
);
307 sel2
.quick_push (half_nelts
+ i
+ nelts
);
309 vec_perm_indices
indices1 (sel1
, 2, nelts
);
310 vec_perm_indices
indices2 (sel2
, 2, nelts
);
311 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
312 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
315 *nvectors_out
= nvectors
;
317 *vector_type_out
= vector_type
;
320 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
322 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
329 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
335 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
336 they are of a valid type and that they match the defs of the first stmt of
337 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
338 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
339 indicates swap is required for cond_expr stmts. Specifically, *SWAP
340 is 1 if STMT is cond and operands of comparison need to be swapped;
341 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
342 If there is any operand swap in this function, *SWAP is set to non-zero
344 If there was a fatal error return -1; if the error could be corrected by
345 swapping operands of father node of this one, return 1; if everything is
348 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
349 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
350 vec
<slp_oprnd_info
> *oprnds_info
)
352 stmt_vec_info stmt_info
= stmts
[stmt_num
];
354 unsigned int i
, number_of_oprnds
;
355 enum vect_def_type dt
= vect_uninitialized_def
;
356 slp_oprnd_info oprnd_info
;
357 int first_op_idx
= 1;
358 unsigned int commutative_op
= -1U;
359 bool first_op_cond
= false;
360 bool first
= stmt_num
== 0;
362 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
364 number_of_oprnds
= gimple_call_num_args (stmt
);
366 if (gimple_call_internal_p (stmt
))
368 internal_fn ifn
= gimple_call_internal_fn (stmt
);
369 commutative_op
= first_commutative_argument (ifn
);
371 /* Masked load, only look at mask. */
372 if (ifn
== IFN_MASK_LOAD
)
374 number_of_oprnds
= 1;
375 /* Mask operand index. */
380 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
382 enum tree_code code
= gimple_assign_rhs_code (stmt
);
383 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
384 /* Swap can only be done for cond_expr if asked to, otherwise we
385 could result in different comparison code to the first stmt. */
386 if (code
== COND_EXPR
387 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
389 first_op_cond
= true;
393 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
398 bool swapped
= (*swap
!= 0);
399 gcc_assert (!swapped
|| first_op_cond
);
400 for (i
= 0; i
< number_of_oprnds
; i
++)
405 /* Map indicating how operands of cond_expr should be swapped. */
406 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
407 int *map
= maps
[*swap
];
410 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
411 first_op_idx
), map
[i
]);
413 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
416 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
417 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
418 oprnd
= TREE_OPERAND (oprnd
, 0);
420 oprnd_info
= (*oprnds_info
)[i
];
422 stmt_vec_info def_stmt_info
;
423 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
425 if (dump_enabled_p ())
426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
427 "Build SLP failed: can't analyze def for %T\n",
433 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
434 oprnd_info
->any_pattern
= true;
438 /* For the swapping logic below force vect_reduction_def
439 for the reduction op in a SLP reduction group. */
440 if (!STMT_VINFO_DATA_REF (stmt_info
)
441 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
442 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
444 dt
= vect_reduction_def
;
445 oprnd_info
->first_dt
= dt
;
446 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
450 /* Not first stmt of the group, check that the def-stmt/s match
451 the def-stmt/s of the first stmt. Allow different definition
452 types for reduction chains: the first stmt must be a
453 vect_reduction_def (a phi node), and the rest
454 end in the reduction chain. */
455 tree type
= TREE_TYPE (oprnd
);
456 if ((oprnd_info
->first_dt
!= dt
457 && !(oprnd_info
->first_dt
== vect_reduction_def
458 && !STMT_VINFO_DATA_REF (stmt_info
)
459 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
461 && !STMT_VINFO_DATA_REF (def_stmt_info
)
462 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
463 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
464 && !((oprnd_info
->first_dt
== vect_external_def
465 || oprnd_info
->first_dt
== vect_constant_def
)
466 && (dt
== vect_external_def
467 || dt
== vect_constant_def
)))
468 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
469 || (!STMT_VINFO_DATA_REF (stmt_info
)
470 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
472 || STMT_VINFO_DATA_REF (def_stmt_info
)
473 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
474 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
475 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
477 /* Try swapping operands if we got a mismatch. */
478 if (i
== commutative_op
&& !swapped
)
480 if (dump_enabled_p ())
481 dump_printf_loc (MSG_NOTE
, vect_location
,
482 "trying swapped operands\n");
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
489 "Build SLP failed: different types\n");
493 if ((dt
== vect_constant_def
494 || dt
== vect_external_def
)
495 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
496 && (TREE_CODE (type
) == BOOLEAN_TYPE
497 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
502 "Build SLP failed: invalid type of def "
503 "for variable-length SLP %T\n", oprnd
);
508 /* Check the types of the definitions. */
511 case vect_external_def
:
512 /* Make sure to demote the overall operand to external. */
513 oprnd_info
->first_dt
= vect_external_def
;
515 case vect_constant_def
:
516 oprnd_info
->def_stmts
.quick_push (NULL
);
517 oprnd_info
->ops
.quick_push (oprnd
);
520 case vect_internal_def
:
521 case vect_reduction_def
:
522 if (oprnd_info
->first_dt
== vect_reduction_def
523 && !STMT_VINFO_DATA_REF (stmt_info
)
524 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
525 && !STMT_VINFO_DATA_REF (def_stmt_info
)
526 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
527 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
529 /* For a SLP reduction chain we want to duplicate the
530 reduction to each of the chain members. That gets
531 us a sane SLP graph (still the stmts are not 100%
532 correct wrt the initial values). */
534 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
535 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
539 case vect_induction_def
:
540 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
541 oprnd_info
->ops
.quick_push (oprnd
);
545 /* FORNOW: Not supported. */
546 if (dump_enabled_p ())
547 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
548 "Build SLP failed: illegal type of def %T\n",
558 if (dump_enabled_p ())
559 dump_printf_loc (MSG_NOTE
, vect_location
,
560 "swapped operands to match def types in %G",
568 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
569 Return true if we can, meaning that this choice doesn't conflict with
570 existing SLP nodes that use STMT_INFO. */
573 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
575 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
576 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
579 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
580 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
582 /* We maintain the invariant that if any statement in the group is
583 used, all other members of the group have the same vector type. */
584 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
585 stmt_vec_info member_info
= first_info
;
586 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
587 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
588 || is_pattern_stmt_p (member_info
))
593 for (member_info
= first_info
; member_info
;
594 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
595 STMT_VINFO_VECTYPE (member_info
) = vectype
;
599 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
600 && !is_pattern_stmt_p (stmt_info
))
602 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
606 if (dump_enabled_p ())
608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
609 "Build SLP failed: incompatible vector"
610 " types for: %G", stmt_info
->stmt
);
611 dump_printf_loc (MSG_NOTE
, vect_location
,
612 " old vector type: %T\n", old_vectype
);
613 dump_printf_loc (MSG_NOTE
, vect_location
,
614 " new vector type: %T\n", vectype
);
619 /* Try to infer and assign a vector type to all the statements in STMTS.
620 Used only for BB vectorization. */
623 vect_update_all_shared_vectypes (vec_info
*vinfo
, vec
<stmt_vec_info
> stmts
)
625 tree vectype
, nunits_vectype
;
626 if (!vect_get_vector_types_for_stmt (vinfo
, stmts
[0], &vectype
,
627 &nunits_vectype
, stmts
.length ()))
630 stmt_vec_info stmt_info
;
632 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
633 if (!vect_update_shared_vectype (stmt_info
, vectype
))
639 /* Return true if call statements CALL1 and CALL2 are similar enough
640 to be combined into the same SLP group. */
643 compatible_calls_p (gcall
*call1
, gcall
*call2
)
645 unsigned int nargs
= gimple_call_num_args (call1
);
646 if (nargs
!= gimple_call_num_args (call2
))
649 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
652 if (gimple_call_internal_p (call1
))
654 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
655 TREE_TYPE (gimple_call_lhs (call2
))))
657 for (unsigned int i
= 0; i
< nargs
; ++i
)
658 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
659 TREE_TYPE (gimple_call_arg (call2
, i
))))
664 if (!operand_equal_p (gimple_call_fn (call1
),
665 gimple_call_fn (call2
), 0))
668 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
674 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
675 caller's attempt to find the vector type in STMT_INFO with the narrowest
676 element type. Return true if VECTYPE is nonnull and if it is valid
677 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
678 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
679 vect_build_slp_tree. */
682 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
683 unsigned int group_size
,
684 tree vectype
, poly_uint64
*max_nunits
)
688 if (dump_enabled_p ())
689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
690 "Build SLP failed: unsupported data-type in %G\n",
692 /* Fatal mismatch. */
696 /* If populating the vector type requires unrolling then fail
697 before adjusting *max_nunits for basic-block vectorization. */
698 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
699 unsigned HOST_WIDE_INT const_nunits
;
700 if (is_a
<bb_vec_info
> (vinfo
)
701 && (!nunits
.is_constant (&const_nunits
)
702 || const_nunits
> group_size
))
704 if (dump_enabled_p ())
705 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
706 "Build SLP failed: unrolling required "
707 "in basic block SLP\n");
708 /* Fatal mismatch. */
712 /* In case of multiple types we need to detect the smallest type. */
713 vect_update_max_nunits (max_nunits
, vectype
);
717 /* STMTS is a group of GROUP_SIZE SLP statements in which some
718 statements do the same operation as the first statement and in which
719 the others do ALT_STMT_CODE. Return true if we can take one vector
720 of the first operation and one vector of the second and permute them
721 to get the required result. VECTYPE is the type of the vector that
722 would be permuted. */
725 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
726 unsigned int group_size
, tree vectype
,
727 tree_code alt_stmt_code
)
729 unsigned HOST_WIDE_INT count
;
730 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
733 vec_perm_builder
sel (count
, count
, 1);
734 for (unsigned int i
= 0; i
< count
; ++i
)
736 unsigned int elt
= i
;
737 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
738 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
740 sel
.quick_push (elt
);
742 vec_perm_indices
indices (sel
, 2, count
);
743 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
746 /* Verify if the scalar stmts STMTS are isomorphic, require data
747 permutation or are of unsupported types of operation. Return
748 true if they are, otherwise return false and indicate in *MATCHES
749 which stmts are not isomorphic to the first one. If MATCHES[0]
750 is false then this indicates the comparison could not be
751 carried out or the stmts will never be vectorized by SLP.
753 Note COND_EXPR is possibly isomorphic to another one after swapping its
754 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
755 the first stmt by swapping the two operands of comparison; set SWAP[i]
756 to 2 if stmt I is isormorphic to the first stmt by inverting the code
757 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
758 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
761 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
762 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
763 poly_uint64
*max_nunits
, bool *matches
,
767 stmt_vec_info first_stmt_info
= stmts
[0];
768 enum tree_code first_stmt_code
= ERROR_MARK
;
769 enum tree_code alt_stmt_code
= ERROR_MARK
;
770 enum tree_code rhs_code
= ERROR_MARK
;
771 enum tree_code first_cond_code
= ERROR_MARK
;
773 bool need_same_oprnds
= false;
774 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
777 machine_mode optab_op2_mode
;
778 machine_mode vec_mode
;
779 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
782 /* For every stmt in NODE find its def stmt/s. */
783 stmt_vec_info stmt_info
;
784 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
786 gimple
*stmt
= stmt_info
->stmt
;
790 if (dump_enabled_p ())
791 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
793 /* Fail to vectorize statements marked as unvectorizable. */
794 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
796 if (dump_enabled_p ())
797 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
798 "Build SLP failed: unvectorizable statement %G",
800 /* Fatal mismatch. */
805 lhs
= gimple_get_lhs (stmt
);
806 if (lhs
== NULL_TREE
)
808 if (dump_enabled_p ())
809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
810 "Build SLP failed: not GIMPLE_ASSIGN nor "
811 "GIMPLE_CALL %G", stmt
);
812 /* Fatal mismatch. */
818 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
819 &nunits_vectype
, group_size
)
821 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
822 nunits_vectype
, max_nunits
)))
824 /* Fatal mismatch. */
829 gcc_assert (vectype
);
831 if (is_a
<bb_vec_info
> (vinfo
)
832 && !vect_update_shared_vectype (stmt_info
, vectype
))
835 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
838 rhs_code
= CALL_EXPR
;
840 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
842 else if ((gimple_call_internal_p (call_stmt
)
843 && (!vectorizable_internal_fn_p
844 (gimple_call_internal_fn (call_stmt
))))
845 || gimple_call_tail_p (call_stmt
)
846 || gimple_call_noreturn_p (call_stmt
)
847 || !gimple_call_nothrow_p (call_stmt
)
848 || gimple_call_chain (call_stmt
))
850 if (dump_enabled_p ())
851 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
852 "Build SLP failed: unsupported call type %G",
854 /* Fatal mismatch. */
861 rhs_code
= gimple_assign_rhs_code (stmt
);
862 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
865 /* Check the operation. */
868 first_stmt_code
= rhs_code
;
870 /* Shift arguments should be equal in all the packed stmts for a
871 vector shift with scalar shift operand. */
872 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
873 || rhs_code
== LROTATE_EXPR
874 || rhs_code
== RROTATE_EXPR
)
876 vec_mode
= TYPE_MODE (vectype
);
878 /* First see if we have a vector/vector shift. */
879 optab
= optab_for_tree_code (rhs_code
, vectype
,
883 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
885 /* No vector/vector shift, try for a vector/scalar shift. */
886 optab
= optab_for_tree_code (rhs_code
, vectype
,
891 if (dump_enabled_p ())
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
893 "Build SLP failed: no optab.\n");
894 /* Fatal mismatch. */
898 icode
= (int) optab_handler (optab
, vec_mode
);
899 if (icode
== CODE_FOR_nothing
)
901 if (dump_enabled_p ())
902 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
904 "op not supported by target.\n");
905 /* Fatal mismatch. */
909 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
910 if (!VECTOR_MODE_P (optab_op2_mode
))
912 need_same_oprnds
= true;
913 first_op1
= gimple_assign_rhs2 (stmt
);
917 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
919 need_same_oprnds
= true;
920 first_op1
= gimple_assign_rhs2 (stmt
);
923 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
925 need_same_oprnds
= true;
926 first_op1
= gimple_call_arg (call_stmt
, 1);
931 if (first_stmt_code
!= rhs_code
932 && alt_stmt_code
== ERROR_MARK
)
933 alt_stmt_code
= rhs_code
;
934 if (first_stmt_code
!= rhs_code
935 && (first_stmt_code
!= IMAGPART_EXPR
936 || rhs_code
!= REALPART_EXPR
)
937 && (first_stmt_code
!= REALPART_EXPR
938 || rhs_code
!= IMAGPART_EXPR
)
939 /* Handle mismatches in plus/minus by computing both
940 and merging the results. */
941 && !((first_stmt_code
== PLUS_EXPR
942 || first_stmt_code
== MINUS_EXPR
)
943 && (alt_stmt_code
== PLUS_EXPR
944 || alt_stmt_code
== MINUS_EXPR
)
945 && rhs_code
== alt_stmt_code
)
946 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
947 && (first_stmt_code
== ARRAY_REF
948 || first_stmt_code
== BIT_FIELD_REF
949 || first_stmt_code
== INDIRECT_REF
950 || first_stmt_code
== COMPONENT_REF
951 || first_stmt_code
== MEM_REF
)))
953 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
956 "Build SLP failed: different operation "
958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
959 "original stmt %G", first_stmt_info
->stmt
);
965 if (need_same_oprnds
)
967 tree other_op1
= (call_stmt
968 ? gimple_call_arg (call_stmt
, 1)
969 : gimple_assign_rhs2 (stmt
));
970 if (!operand_equal_p (first_op1
, other_op1
, 0))
972 if (dump_enabled_p ())
973 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
974 "Build SLP failed: different shift "
975 "arguments in %G", stmt
);
981 if (!load_p
&& rhs_code
== CALL_EXPR
)
983 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
984 as_a
<gcall
*> (stmt
)))
986 if (dump_enabled_p ())
987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
988 "Build SLP failed: different calls in %G",
996 /* Grouped store or load. */
997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
999 if (REFERENCE_CLASS_P (lhs
))
1007 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1008 if (prev_first_load
)
1010 /* Check that there are no loads from different interleaving
1011 chains in the same node. */
1012 if (prev_first_load
!= first_load
)
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1017 "Build SLP failed: different "
1018 "interleaving chains in one node %G",
1025 prev_first_load
= first_load
;
1027 } /* Grouped access. */
1032 /* Not grouped load. */
1033 if (dump_enabled_p ())
1034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1035 "Build SLP failed: not grouped load %G", stmt
);
1037 /* FORNOW: Not grouped loads are not supported. */
1038 /* Fatal mismatch. */
1043 /* Not memory operation. */
1044 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1045 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1046 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1047 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1048 && rhs_code
!= VIEW_CONVERT_EXPR
1049 && rhs_code
!= CALL_EXPR
)
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1053 "Build SLP failed: operation unsupported %G",
1055 /* Fatal mismatch. */
1060 if (rhs_code
== COND_EXPR
)
1062 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1063 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1064 enum tree_code swap_code
= ERROR_MARK
;
1065 enum tree_code invert_code
= ERROR_MARK
;
1068 first_cond_code
= TREE_CODE (cond_expr
);
1069 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1071 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1072 swap_code
= swap_tree_comparison (cond_code
);
1073 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1076 if (first_cond_code
== cond_code
)
1078 /* Isomorphic can be achieved by swapping. */
1079 else if (first_cond_code
== swap_code
)
1081 /* Isomorphic can be achieved by inverting. */
1082 else if (first_cond_code
== invert_code
)
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1088 "Build SLP failed: different"
1089 " operation %G", stmt
);
1099 for (i
= 0; i
< group_size
; ++i
)
1103 /* If we allowed a two-operation SLP node verify the target can cope
1104 with the permute we are going to use. */
1105 if (alt_stmt_code
!= ERROR_MARK
1106 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1108 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1109 vectype
, alt_stmt_code
))
1111 for (i
= 0; i
< group_size
; ++i
)
1112 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1115 if (dump_enabled_p ())
1117 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1118 "Build SLP failed: different operation "
1119 "in stmt %G", stmts
[i
]->stmt
);
1120 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1121 "original stmt %G", first_stmt_info
->stmt
);
1126 *two_operators
= true;
1132 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1133 Note we never remove apart from at destruction time so we do not
1134 need a special value for deleted that differs from empty. */
1137 typedef vec
<stmt_vec_info
> value_type
;
1138 typedef vec
<stmt_vec_info
> compare_type
;
1139 static inline hashval_t
hash (value_type
);
1140 static inline bool equal (value_type existing
, value_type candidate
);
1141 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1142 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1143 static const bool empty_zero_p
= true;
1144 static inline void mark_empty (value_type
&x
) { x
.release (); }
1145 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1146 static inline void remove (value_type
&x
) { x
.release (); }
1149 bst_traits::hash (value_type x
)
1152 for (unsigned i
= 0; i
< x
.length (); ++i
)
1153 h
.add_int (gimple_uid (x
[i
]->stmt
));
1157 bst_traits::equal (value_type existing
, value_type candidate
)
1159 if (existing
.length () != candidate
.length ())
1161 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1162 if (existing
[i
] != candidate
[i
])
1167 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1168 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1169 scalar_stmts_to_slp_tree_map_t
;
1172 vect_build_slp_tree_2 (vec_info
*vinfo
,
1173 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1174 poly_uint64
*max_nunits
,
1175 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1176 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1179 vect_build_slp_tree (vec_info
*vinfo
,
1180 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1181 poly_uint64
*max_nunits
,
1182 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1183 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1185 if (slp_tree
*leader
= bst_map
->get (stmts
))
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1189 *leader
? "" : "failed ", *leader
);
1192 (*leader
)->refcnt
++;
1193 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1197 poly_uint64 this_max_nunits
= 1;
1198 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1200 matches
, npermutes
, tree_size
, bst_map
);
1203 res
->max_nunits
= this_max_nunits
;
1204 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1205 /* Keep a reference for the bst_map use. */
1208 bst_map
->put (stmts
.copy (), res
);
1212 /* Recursively build an SLP tree starting from NODE.
1213 Fail (and return a value not equal to zero) if def-stmts are not
1214 isomorphic, require data permutation or are of unsupported types of
1215 operation. Otherwise, return 0.
1216 The value returned is the depth in the SLP tree where a mismatch
1220 vect_build_slp_tree_2 (vec_info
*vinfo
,
1221 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1222 poly_uint64
*max_nunits
,
1223 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1224 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1226 unsigned nops
, i
, this_tree_size
= 0;
1227 poly_uint64 this_max_nunits
= *max_nunits
;
1232 stmt_vec_info stmt_info
= stmts
[0];
1233 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1234 nops
= gimple_call_num_args (stmt
);
1235 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1237 nops
= gimple_num_ops (stmt
) - 1;
1238 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1241 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1246 /* If the SLP node is a PHI (induction or reduction), terminate
1248 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1250 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1251 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1252 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1256 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1257 /* Induction from different IVs is not supported. */
1258 if (def_type
== vect_induction_def
)
1260 stmt_vec_info other_info
;
1261 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1262 if (stmt_info
!= other_info
)
1265 else if (def_type
== vect_reduction_def
1266 || def_type
== vect_double_reduction_def
1267 || def_type
== vect_nested_cycle
)
1269 /* Else def types have to match. */
1270 stmt_vec_info other_info
;
1271 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1272 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1278 node
= vect_create_new_slp_node (stmts
, 0);
1283 bool two_operators
= false;
1284 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1285 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1286 &this_max_nunits
, matches
, &two_operators
))
1289 /* If the SLP node is a load, terminate the recursion unless masked. */
1290 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1291 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1293 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1296 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1301 *max_nunits
= this_max_nunits
;
1303 node
= vect_create_new_slp_node (stmts
, 0);
1304 /* And compute the load permutation. Whether it is actually
1305 a permutation depends on the unrolling factor which is
1307 vec
<unsigned> load_permutation
;
1309 stmt_vec_info load_info
;
1310 load_permutation
.create (group_size
);
1311 stmt_vec_info first_stmt_info
1312 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1313 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1315 int load_place
= vect_get_place_in_interleaving_chain
1316 (load_info
, first_stmt_info
);
1317 gcc_assert (load_place
!= -1);
1318 load_permutation
.safe_push (load_place
);
1320 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1325 /* Get at the operands, verifying they are compatible. */
1326 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1327 slp_oprnd_info oprnd_info
;
1328 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1330 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1331 stmts
, i
, &oprnds_info
);
1333 matches
[(res
== -1) ? 0 : i
] = false;
1337 for (i
= 0; i
< group_size
; ++i
)
1340 vect_free_oprnd_info (oprnds_info
);
1344 auto_vec
<slp_tree
, 4> children
;
1346 stmt_info
= stmts
[0];
1348 /* Create SLP_TREE nodes for the definition node/s. */
1349 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1352 unsigned old_tree_size
= this_tree_size
;
1355 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1357 /* COND_EXPR have one too many eventually if the condition
1359 gcc_assert (i
== 3 && nops
== 4);
1363 if (oprnd_info
->first_dt
!= vect_internal_def
1364 && oprnd_info
->first_dt
!= vect_reduction_def
1365 && oprnd_info
->first_dt
!= vect_induction_def
)
1367 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1368 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1369 oprnd_info
->ops
= vNULL
;
1370 children
.safe_push (invnode
);
1374 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1375 group_size
, &this_max_nunits
,
1377 &this_tree_size
, bst_map
)) != NULL
)
1379 /* If we have all children of a non-unary child built up from
1380 scalars then just throw that away and build it up this node
1382 if (is_a
<bb_vec_info
> (vinfo
)
1383 && SLP_TREE_CHILDREN (child
).length () > 1
1384 /* ??? Rejecting patterns this way doesn't work. We'd have to
1385 do extra work to cancel the pattern so the uses see the
1387 && !oprnd_info
->any_pattern
)
1389 slp_tree grandchild
;
1391 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1392 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1395 && vect_update_all_shared_vectypes (vinfo
,
1396 oprnd_info
->def_stmts
))
1399 this_tree_size
= old_tree_size
;
1400 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1401 vect_free_slp_tree (grandchild
, false);
1402 SLP_TREE_CHILDREN (child
).truncate (0);
1404 if (dump_enabled_p ())
1405 dump_printf_loc (MSG_NOTE
, vect_location
,
1406 "Building parent vector operands from "
1407 "scalars instead\n");
1408 oprnd_info
->def_stmts
= vNULL
;
1409 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1410 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1411 oprnd_info
->ops
= vNULL
;
1413 children
.safe_push (child
);
1418 oprnd_info
->def_stmts
= vNULL
;
1419 children
.safe_push (child
);
1423 /* If the SLP build failed fatally and we analyze a basic-block
1424 simply treat nodes we fail to build as externally defined
1425 (and thus build vectors from the scalar defs).
1426 The cost model will reject outright expensive cases.
1427 ??? This doesn't treat cases where permutation ultimatively
1428 fails (or we don't try permutation below). Ideally we'd
1429 even compute a permutation that will end up with the maximum
1431 if (is_a
<bb_vec_info
> (vinfo
)
1433 /* ??? Rejecting patterns this way doesn't work. We'd have to
1434 do extra work to cancel the pattern so the uses see the
1436 && !is_pattern_stmt_p (stmt_info
)
1437 && !oprnd_info
->any_pattern
1438 && vect_update_all_shared_vectypes (vinfo
, oprnd_info
->def_stmts
))
1440 if (dump_enabled_p ())
1441 dump_printf_loc (MSG_NOTE
, vect_location
,
1442 "Building vector operands from scalars\n");
1444 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
, 0);
1445 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1446 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1447 children
.safe_push (child
);
1448 oprnd_info
->ops
= vNULL
;
1449 oprnd_info
->def_stmts
= vNULL
;
1453 /* If the SLP build for operand zero failed and operand zero
1454 and one can be commutated try that for the scalar stmts
1455 that failed the match. */
1457 /* A first scalar stmt mismatch signals a fatal mismatch. */
1459 /* ??? For COND_EXPRs we can swap the comparison operands
1460 as well as the arms under some constraints. */
1462 && oprnds_info
[1]->first_dt
== vect_internal_def
1463 && is_gimple_assign (stmt_info
->stmt
)
1464 /* Swapping operands for reductions breaks assumptions later on. */
1465 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1466 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1467 /* Do so only if the number of not successful permutes was nor more
1468 than a cut-ff as re-trying the recursive match on
1469 possibly each level of the tree would expose exponential
1473 /* See whether we can swap the matching or the non-matching
1475 bool swap_not_matching
= true;
1478 for (j
= 0; j
< group_size
; ++j
)
1480 if (matches
[j
] != !swap_not_matching
)
1482 stmt_vec_info stmt_info
= stmts
[j
];
1483 /* Verify if we can swap operands of this stmt. */
1484 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1486 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1488 if (!swap_not_matching
)
1490 swap_not_matching
= false;
1495 while (j
!= group_size
);
1497 /* Swap mismatched definition stmts. */
1498 if (dump_enabled_p ())
1499 dump_printf_loc (MSG_NOTE
, vect_location
,
1500 "Re-trying with swapped operands of stmts ");
1501 for (j
= 0; j
< group_size
; ++j
)
1502 if (matches
[j
] == !swap_not_matching
)
1504 std::swap (oprnds_info
[0]->def_stmts
[j
],
1505 oprnds_info
[1]->def_stmts
[j
]);
1506 std::swap (oprnds_info
[0]->ops
[j
],
1507 oprnds_info
[1]->ops
[j
]);
1508 if (dump_enabled_p ())
1509 dump_printf (MSG_NOTE
, "%d ", j
);
1511 if (dump_enabled_p ())
1512 dump_printf (MSG_NOTE
, "\n");
1513 /* And try again with scratch 'matches' ... */
1514 bool *tem
= XALLOCAVEC (bool, group_size
);
1515 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1516 group_size
, &this_max_nunits
,
1518 &this_tree_size
, bst_map
)) != NULL
)
1520 /* If we have all children of a non-unary child built up from
1521 scalars then just throw that away and build it up this node
1523 if (is_a
<bb_vec_info
> (vinfo
)
1524 && SLP_TREE_CHILDREN (child
).length () > 1
1525 /* ??? Rejecting patterns this way doesn't work. We'd have
1526 to do extra work to cancel the pattern so the uses see the
1528 && !oprnd_info
->any_pattern
)
1531 slp_tree grandchild
;
1533 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1534 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1537 && (vect_update_all_shared_vectypes
1538 (vinfo
, oprnd_info
->def_stmts
)))
1541 this_tree_size
= old_tree_size
;
1542 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1543 vect_free_slp_tree (grandchild
, false);
1544 SLP_TREE_CHILDREN (child
).truncate (0);
1546 if (dump_enabled_p ())
1547 dump_printf_loc (MSG_NOTE
, vect_location
,
1548 "Building parent vector operands from "
1549 "scalars instead\n");
1550 oprnd_info
->def_stmts
= vNULL
;
1551 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1552 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1553 oprnd_info
->ops
= vNULL
;
1555 children
.safe_push (child
);
1560 oprnd_info
->def_stmts
= vNULL
;
1561 children
.safe_push (child
);
1569 gcc_assert (child
== NULL
);
1570 FOR_EACH_VEC_ELT (children
, j
, child
)
1571 vect_free_slp_tree (child
, false);
1572 vect_free_oprnd_info (oprnds_info
);
1576 vect_free_oprnd_info (oprnds_info
);
1578 *tree_size
+= this_tree_size
+ 1;
1579 *max_nunits
= this_max_nunits
;
1581 node
= vect_create_new_slp_node (stmts
, nops
);
1582 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1583 SLP_TREE_CHILDREN (node
).splice (children
);
1587 /* Dump a single SLP tree NODE. */
1590 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1595 stmt_vec_info stmt_info
;
1598 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1599 dump_user_location_t user_loc
= loc
.get_user_location ();
1600 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1601 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1603 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1606 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1607 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1608 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1609 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1612 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1613 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1614 dump_printf (metadata
, "%T%s ", op
,
1615 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1616 dump_printf (metadata
, "}\n");
1618 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1620 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1621 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1622 dump_printf (dump_kind
, " %u", j
);
1623 dump_printf (dump_kind
, " }\n");
1625 if (SLP_TREE_CHILDREN (node
).is_empty ())
1627 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1629 dump_printf (dump_kind
, " %p", (void *)child
);
1630 dump_printf (dump_kind
, "\n");
1634 debug (slp_tree node
)
1636 debug_dump_context ctx
;
1637 vect_print_slp_tree (MSG_NOTE
,
1638 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1642 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1645 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1646 slp_tree node
, hash_set
<slp_tree
> &visited
)
1651 if (visited
.add (node
))
1654 vect_print_slp_tree (dump_kind
, loc
, node
);
1656 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1657 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1661 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1664 hash_set
<slp_tree
> visited
;
1665 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1668 /* Mark the tree rooted at NODE with PURE_SLP. */
1671 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1674 stmt_vec_info stmt_info
;
1677 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1680 if (visited
.add (node
))
1683 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1684 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1686 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1687 vect_mark_slp_stmts (child
, visited
);
1691 vect_mark_slp_stmts (slp_tree node
)
1693 hash_set
<slp_tree
> visited
;
1694 vect_mark_slp_stmts (node
, visited
);
1697 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1700 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1703 stmt_vec_info stmt_info
;
1706 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1709 if (visited
.add (node
))
1712 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1714 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1715 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1716 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1719 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1720 vect_mark_slp_stmts_relevant (child
, visited
);
1724 vect_mark_slp_stmts_relevant (slp_tree node
)
1726 hash_set
<slp_tree
> visited
;
1727 vect_mark_slp_stmts_relevant (node
, visited
);
1730 /* Copy the SLP subtree rooted at NODE. */
1733 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1738 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1742 copy_ref
= new _slp_tree
;
1743 slp_tree copy
= copy_ref
;
1744 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
1745 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
1746 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
1747 copy
->max_nunits
= node
->max_nunits
;
1749 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1751 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1752 stmt_vec_info stmt_info
;
1753 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1754 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1756 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1757 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1758 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1759 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1760 if (SLP_TREE_CHILDREN (node
).exists ())
1761 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1762 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1767 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1768 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1773 /* Rearrange the statements of NODE according to PERMUTATION. */
1776 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1777 vec
<unsigned> permutation
,
1778 hash_set
<slp_tree
> &visited
)
1783 if (visited
.add (node
))
1786 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1787 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1789 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1791 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1792 vec
<stmt_vec_info
> tmp_stmts
;
1793 tmp_stmts
.create (group_size
);
1794 tmp_stmts
.quick_grow (group_size
);
1795 stmt_vec_info stmt_info
;
1796 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1797 tmp_stmts
[permutation
[i
]] = stmt_info
;
1798 SLP_TREE_SCALAR_STMTS (node
).release ();
1799 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1801 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1803 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1805 tmp_ops
.create (group_size
);
1806 tmp_ops
.quick_grow (group_size
);
1808 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1809 tmp_ops
[permutation
[i
]] = op
;
1810 SLP_TREE_SCALAR_OPS (node
).release ();
1811 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1816 /* Attempt to reorder stmts in a reduction chain so that we don't
1817 require any load permutation. Return true if that was possible,
1818 otherwise return false. */
1821 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1825 slp_tree node
, load
;
1827 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1830 /* Compare all the permutation sequences to the first one. We know
1831 that at least one load is permuted. */
1832 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1833 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1835 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1836 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1838 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1839 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1841 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1842 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1846 /* Check that the loads in the first sequence are different and there
1847 are no gaps between them. */
1848 auto_sbitmap
load_index (group_size
);
1849 bitmap_clear (load_index
);
1850 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1852 if (lidx
>= group_size
)
1854 if (bitmap_bit_p (load_index
, lidx
))
1857 bitmap_set_bit (load_index
, lidx
);
1859 for (i
= 0; i
< group_size
; i
++)
1860 if (!bitmap_bit_p (load_index
, i
))
1863 /* This permutation is valid for reduction. Since the order of the
1864 statements in the nodes is not important unless they are memory
1865 accesses, we can rearrange the statements in all the nodes
1866 according to the order of the loads. */
1868 /* We have to unshare the SLP tree we modify. */
1869 hash_map
<slp_tree
, slp_tree
> map
;
1870 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1871 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1873 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1874 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1875 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1876 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1878 /* Do the actual re-arrangement. */
1879 hash_set
<slp_tree
> visited
;
1880 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1881 node
->load_permutation
, visited
);
1883 /* We are done, no actual permutations need to be generated. */
1884 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1885 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1887 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1888 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1889 /* But we have to keep those permutations that are required because
1890 of handling of gaps. */
1891 if (known_eq (unrolling_factor
, 1U)
1892 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1893 && DR_GROUP_GAP (first_stmt_info
) == 0))
1894 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1896 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1897 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1903 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1906 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1907 hash_set
<slp_tree
> &visited
)
1909 if (visited
.add (node
))
1912 if (SLP_TREE_CHILDREN (node
).length () == 0)
1914 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1916 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1917 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1918 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1919 loads
.safe_push (node
);
1925 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1926 vect_gather_slp_loads (loads
, child
, visited
);
1931 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1933 hash_set
<slp_tree
> visited
;
1934 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1938 /* Find the last store in SLP INSTANCE. */
1941 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1943 stmt_vec_info last
= NULL
;
1944 stmt_vec_info stmt_vinfo
;
1946 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1948 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1949 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1955 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1956 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1957 (also containing the first GROUP1_SIZE stmts, since stores are
1958 consecutive), the second containing the remainder.
1959 Return the first stmt in the second group. */
1961 static stmt_vec_info
1962 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1964 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1965 gcc_assert (group1_size
> 0);
1966 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1967 gcc_assert (group2_size
> 0);
1968 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1970 stmt_vec_info stmt_info
= first_vinfo
;
1971 for (unsigned i
= group1_size
; i
> 1; i
--)
1973 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1974 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1976 /* STMT is now the last element of the first group. */
1977 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1978 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1980 DR_GROUP_SIZE (group2
) = group2_size
;
1981 for (stmt_info
= group2
; stmt_info
;
1982 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1984 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1985 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1988 /* For the second group, the DR_GROUP_GAP is that before the original group,
1989 plus skipping over the first vector. */
1990 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1992 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1993 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1995 if (dump_enabled_p ())
1996 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1997 group1_size
, group2_size
);
2002 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2003 statements and a vector of NUNITS elements. */
2006 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2008 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2011 /* Analyze an SLP instance starting from a group of grouped stores. Call
2012 vect_build_slp_tree to build a tree of packed stmts if possible.
2013 Return FALSE if it's impossible to SLP any stmt in the loop. */
2016 vect_analyze_slp_instance (vec_info
*vinfo
,
2017 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2018 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2020 slp_instance new_instance
;
2022 unsigned int group_size
;
2023 tree vectype
, scalar_type
= NULL_TREE
;
2025 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2026 vec
<stmt_vec_info
> scalar_stmts
;
2027 bool constructor
= false;
2029 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2031 scalar_type
= TREE_TYPE (DR_REF (dr
));
2032 group_size
= DR_GROUP_SIZE (stmt_info
);
2033 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2035 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2037 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2038 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2039 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2041 else if (is_gimple_assign (stmt_info
->stmt
)
2042 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2044 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2045 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2050 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2051 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2052 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2057 if (dump_enabled_p ())
2058 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2059 "Build SLP failed: unsupported data-type %T\n",
2064 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2066 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2067 scalar_stmts
.create (group_size
);
2068 stmt_vec_info next_info
= stmt_info
;
2069 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2071 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2074 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2075 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2078 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2080 /* Collect the reduction stmts and store them in
2081 SLP_TREE_SCALAR_STMTS. */
2084 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2085 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2087 /* Mark the first element of the reduction chain as reduction to properly
2088 transform the node. In the reduction analysis phase only the last
2089 element of the chain is marked as reduction. */
2090 STMT_VINFO_DEF_TYPE (stmt_info
)
2091 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2092 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2093 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2095 else if (constructor
)
2097 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2099 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2101 if (TREE_CODE (val
) == SSA_NAME
)
2103 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2104 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2105 /* Value is defined in another basic block. */
2108 def_info
= vect_stmt_to_vectorize (def_info
);
2109 scalar_stmts
.safe_push (def_info
);
2114 if (dump_enabled_p ())
2115 dump_printf_loc (MSG_NOTE
, vect_location
,
2116 "Analyzing vectorizable constructor: %G\n",
2121 /* Collect reduction statements. */
2122 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2123 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2124 scalar_stmts
.safe_push (next_info
);
2127 /* Build the tree for the SLP instance. */
2128 bool *matches
= XALLOCAVEC (bool, group_size
);
2129 unsigned npermutes
= 0;
2130 poly_uint64 max_nunits
= nunits
;
2131 unsigned tree_size
= 0;
2132 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2133 &max_nunits
, matches
, &npermutes
,
2134 &tree_size
, bst_map
);
2137 /* Calculate the unrolling factor based on the smallest type. */
2138 poly_uint64 unrolling_factor
2139 = calculate_unrolling_factor (max_nunits
, group_size
);
2141 if (maybe_ne (unrolling_factor
, 1U)
2142 && is_a
<bb_vec_info
> (vinfo
))
2144 unsigned HOST_WIDE_INT const_max_nunits
;
2145 if (!max_nunits
.is_constant (&const_max_nunits
)
2146 || const_max_nunits
> group_size
)
2148 if (dump_enabled_p ())
2149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2150 "Build SLP failed: store group "
2151 "size not a multiple of the vector size "
2152 "in basic block SLP\n");
2153 vect_free_slp_tree (node
, false);
2156 /* Fatal mismatch. */
2157 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2158 vect_free_slp_tree (node
, false);
2162 /* Create a new SLP instance. */
2163 new_instance
= XNEW (class _slp_instance
);
2164 SLP_INSTANCE_TREE (new_instance
) = node
;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2166 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2167 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2169 vect_gather_slp_loads (new_instance
, node
);
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE
, vect_location
,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size
, max_tree_size
);
2175 /* Check whether any load is possibly permuted. */
2177 bool loads_permuted
= false;
2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2180 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2183 stmt_vec_info load_info
;
2184 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2185 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2187 loads_permuted
= true;
2192 /* If the loads and stores can be handled with load/store-lane
2193 instructions do not generate this SLP instance. */
2194 if (is_a
<loop_vec_info
> (vinfo
)
2196 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2199 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2201 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2202 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2203 /* Use SLP for strided accesses (or if we can't load-lanes). */
2204 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2205 || ! vect_load_lanes_supported
2206 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2207 DR_GROUP_SIZE (stmt_vinfo
), false))
2210 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2214 "Built SLP cancelled: can use "
2215 "load/store-lanes\n");
2216 vect_free_slp_instance (new_instance
, false);
2221 /* If this is a reduction chain with a conversion in front
2222 amend the SLP tree with a node for that. */
2224 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2225 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2227 /* Get at the conversion stmt - we know it's the single use
2228 of the last stmt of the reduction chain. */
2229 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2230 use_operand_p use_p
;
2232 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2235 next_info
= vinfo
->lookup_stmt (use_stmt
);
2236 next_info
= vect_stmt_to_vectorize (next_info
);
2237 scalar_stmts
= vNULL
;
2238 scalar_stmts
.create (group_size
);
2239 for (unsigned i
= 0; i
< group_size
; ++i
)
2240 scalar_stmts
.quick_push (next_info
);
2241 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2242 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2243 SLP_INSTANCE_TREE (new_instance
) = conv
;
2244 /* We also have to fake this conversion stmt as SLP reduction
2245 group so we don't have to mess with too much code
2247 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2248 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2251 vinfo
->slp_instances
.safe_push (new_instance
);
2253 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2254 the number of scalar stmts in the root in a few places.
2255 Verify that assumption holds. */
2256 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2257 .length () == group_size
);
2259 if (dump_enabled_p ())
2261 dump_printf_loc (MSG_NOTE
, vect_location
,
2262 "Final SLP tree for instance:\n");
2263 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2264 SLP_INSTANCE_TREE (new_instance
));
2272 /* Failed to SLP. */
2273 /* Free the allocated memory. */
2274 scalar_stmts
.release ();
2277 /* For basic block SLP, try to break the group up into multiples of the
2279 unsigned HOST_WIDE_INT const_nunits
;
2280 if (is_a
<bb_vec_info
> (vinfo
)
2281 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2282 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2283 && nunits
.is_constant (&const_nunits
))
2285 /* We consider breaking the group only on VF boundaries from the existing
2287 for (i
= 0; i
< group_size
; i
++)
2288 if (!matches
[i
]) break;
2290 if (i
>= const_nunits
&& i
< group_size
)
2292 /* Split into two groups at the first vector boundary before i. */
2293 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2294 unsigned group1_size
= i
& ~(const_nunits
- 1);
2296 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2298 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2300 /* If the first non-match was in the middle of a vector,
2301 skip the rest of that vector. */
2302 if (group1_size
< i
)
2304 i
= group1_size
+ const_nunits
;
2306 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2309 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2310 rest
, max_tree_size
);
2313 /* Even though the first vector did not all match, we might be able to SLP
2314 (some) of the remainder. FORNOW ignore this possibility. */
2321 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2322 trees of packed scalar stmts if SLP is possible. */
2325 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2328 stmt_vec_info first_element
;
2330 DUMP_VECT_SCOPE ("vect_analyze_slp");
2332 scalar_stmts_to_slp_tree_map_t
*bst_map
2333 = new scalar_stmts_to_slp_tree_map_t ();
2335 /* Find SLP sequences starting from groups of grouped stores. */
2336 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2337 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2339 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2341 if (loop_vinfo
->reduction_chains
.length () > 0)
2343 /* Find SLP sequences starting from reduction chains. */
2344 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2345 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2348 /* Dissolve reduction chain group. */
2349 stmt_vec_info vinfo
= first_element
;
2350 stmt_vec_info last
= NULL
;
2353 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2354 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2355 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2359 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2360 /* It can be still vectorized as part of an SLP reduction. */
2361 loop_vinfo
->reductions
.safe_push (last
);
2365 /* Find SLP sequences starting from groups of reductions. */
2366 if (loop_vinfo
->reductions
.length () > 1)
2367 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2371 /* The map keeps a reference on SLP nodes built, release that. */
2372 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2373 it
!= bst_map
->end (); ++it
)
2375 vect_free_slp_tree ((*it
).second
, false);
2378 /* Optimize permutations in SLP reductions. */
2379 slp_instance instance
;
2380 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2382 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2383 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2384 /* Reduction (there are no data-refs in the root).
2385 In reduction chain the order of the loads is not important. */
2386 if (!STMT_VINFO_DATA_REF (stmt_info
)
2387 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2388 vect_attempt_slp_rearrange_stmts (instance
);
2391 /* Gather all loads in the SLP graph. */
2392 hash_set
<slp_tree
> visited
;
2393 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2394 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2397 return opt_result::success ();
2401 vect_optimize_slp (vec_info
*vinfo
)
2405 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2407 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2410 /* In basic block vectorization we allow any subchain of an interleaving
2412 FORNOW: not in loop SLP because of realignment complications. */
2413 if (is_a
<bb_vec_info
> (vinfo
))
2415 bool subchain_p
= true;
2416 stmt_vec_info next_load_info
= NULL
;
2417 stmt_vec_info load_info
;
2419 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2422 && (next_load_info
!= load_info
2423 || DR_GROUP_GAP (load_info
) != 1))
2428 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2432 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2438 stmt_vec_info load_info
;
2439 bool this_load_permuted
= false;
2441 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2442 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2444 this_load_permuted
= true;
2447 stmt_vec_info first_stmt_info
2448 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2449 if (!this_load_permuted
2450 /* The load requires permutation when unrolling exposes
2451 a gap either because the group is larger than the SLP
2452 group-size or because there is a gap between the groups. */
2453 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2454 || ((SLP_TREE_SCALAR_STMTS (node
).length ()
2455 == DR_GROUP_SIZE (first_stmt_info
))
2456 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2458 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2466 /* For each possible SLP instance decide whether to SLP it and calculate overall
2467 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2468 least one instance. */
2471 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2474 poly_uint64 unrolling_factor
= 1;
2475 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2476 slp_instance instance
;
2477 int decided_to_slp
= 0;
2479 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2481 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2483 /* FORNOW: SLP if you can. */
2484 /* All unroll factors have the form:
2486 GET_MODE_SIZE (vinfo->vector_mode) * X
2488 for some rational X, so they must have a common multiple. */
2490 = force_common_multiple (unrolling_factor
,
2491 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2493 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2494 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2495 loop-based vectorization. Such stmts will be marked as HYBRID. */
2496 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2500 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2502 if (decided_to_slp
&& dump_enabled_p ())
2504 dump_printf_loc (MSG_NOTE
, vect_location
,
2505 "Decided to SLP %d instances. Unrolling factor ",
2507 dump_dec (MSG_NOTE
, unrolling_factor
);
2508 dump_printf (MSG_NOTE
, "\n");
2511 return (decided_to_slp
> 0);
2514 /* Private data for vect_detect_hybrid_slp. */
2517 loop_vec_info loop_vinfo
;
2518 vec
<stmt_vec_info
> *worklist
;
2521 /* Walker for walk_gimple_op. */
2524 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2526 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2527 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2532 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2535 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2536 if (PURE_SLP_STMT (def_stmt_info
))
2538 if (dump_enabled_p ())
2539 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2540 def_stmt_info
->stmt
);
2541 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2542 dat
->worklist
->safe_push (def_stmt_info
);
2548 /* Find stmts that must be both vectorized and SLPed. */
2551 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2553 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2555 /* All stmts participating in SLP are marked pure_slp, all other
2556 stmts are loop_vect.
2557 First collect all loop_vect stmts into a worklist. */
2558 auto_vec
<stmt_vec_info
> worklist
;
2559 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2561 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2562 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2565 gphi
*phi
= gsi
.phi ();
2566 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2567 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2568 worklist
.safe_push (stmt_info
);
2570 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2573 gimple
*stmt
= gsi_stmt (gsi
);
2574 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2575 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2577 for (gimple_stmt_iterator gsi2
2578 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2579 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2581 stmt_vec_info patt_info
2582 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2583 if (!STMT_SLP_TYPE (patt_info
)
2584 && STMT_VINFO_RELEVANT (patt_info
))
2585 worklist
.safe_push (patt_info
);
2587 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2589 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2590 worklist
.safe_push (stmt_info
);
2594 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2595 mark any SLP vectorized stmt as hybrid.
2596 ??? We're visiting def stmts N times (once for each non-SLP and
2597 once for each hybrid-SLP use). */
2600 dat
.worklist
= &worklist
;
2601 dat
.loop_vinfo
= loop_vinfo
;
2602 memset (&wi
, 0, sizeof (wi
));
2603 wi
.info
= (void *)&dat
;
2604 while (!worklist
.is_empty ())
2606 stmt_vec_info stmt_info
= worklist
.pop ();
2607 /* Since SSA operands are not set up for pattern stmts we need
2608 to use walk_gimple_op. */
2610 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2615 /* Initialize a bb_vec_info struct for the statements between
2616 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2618 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2619 gimple_stmt_iterator region_end_in
,
2620 vec_info_shared
*shared
)
2621 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2622 bb (gsi_bb (region_begin_in
)),
2623 region_begin (region_begin_in
),
2624 region_end (region_end_in
)
2626 gimple_stmt_iterator gsi
;
2628 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2631 gimple
*stmt
= gsi_stmt (gsi
);
2632 gimple_set_uid (stmt
, 0);
2640 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2641 stmts in the basic block. */
2643 _bb_vec_info::~_bb_vec_info ()
2645 for (gimple_stmt_iterator si
= region_begin
;
2646 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2647 /* Reset region marker. */
2648 gimple_set_uid (gsi_stmt (si
), -1);
2653 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2654 given then that child nodes have already been processed, and that
2655 their def types currently match their SLP node's def type. */
2658 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2659 slp_instance node_instance
,
2660 stmt_vector_for_cost
*cost_vec
)
2662 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
2663 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2665 /* Calculate the number of vector statements to be created for the
2666 scalar stmts in this node. For SLP reductions it is equal to the
2667 number of vector statements in the children (which has already been
2668 calculated by the recursive call). Otherwise it is the number of
2669 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2670 VF divided by the number of elements in a vector. */
2671 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2672 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2674 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2675 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2677 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2678 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2685 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2686 vf
= loop_vinfo
->vectorization_factor
;
2689 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2690 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2691 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2692 = vect_get_num_vectors (vf
* group_size
, vectype
);
2696 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2697 node
, node_instance
, cost_vec
);
2700 /* Try to build NODE from scalars, returning true on success.
2701 NODE_INSTANCE is the SLP instance that contains NODE. */
2704 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2705 slp_instance node_instance
)
2707 stmt_vec_info stmt_info
;
2710 if (!is_a
<bb_vec_info
> (vinfo
)
2711 || node
== SLP_INSTANCE_TREE (node_instance
)
2712 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2715 if (dump_enabled_p ())
2716 dump_printf_loc (MSG_NOTE
, vect_location
,
2717 "Building vector operands from scalars instead\n");
2719 /* Don't remove and free the child nodes here, since they could be
2720 referenced by other structures. The analysis and scheduling phases
2721 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2722 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2723 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2724 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2725 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2727 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2728 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2733 /* Compute the prologue cost for invariant or constant operands represented
2737 vect_prologue_cost_for_slp (slp_tree node
,
2738 stmt_vector_for_cost
*cost_vec
)
2740 /* Without looking at the actual initializer a vector of
2741 constants can be implemented as load from the constant pool.
2742 When all elements are the same we can use a splat. */
2743 tree vectype
= SLP_TREE_VECTYPE (node
);
2744 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
2745 unsigned num_vects_to_check
;
2746 unsigned HOST_WIDE_INT const_nunits
;
2747 unsigned nelt_limit
;
2748 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
2749 && ! multiple_p (const_nunits
, group_size
))
2751 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
2752 nelt_limit
= const_nunits
;
2756 /* If either the vector has variable length or the vectors
2757 are composed of repeated whole groups we only need to
2758 cost construction once. All vectors will be the same. */
2759 num_vects_to_check
= 1;
2760 nelt_limit
= group_size
;
2762 tree elt
= NULL_TREE
;
2764 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
2766 unsigned si
= j
% group_size
;
2768 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
2769 /* ??? We're just tracking whether all operands of a single
2770 vector initializer are the same, ideally we'd check if
2771 we emitted the same one already. */
2772 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
2775 if (nelt
== nelt_limit
)
2777 record_stmt_cost (cost_vec
, 1,
2778 SLP_TREE_DEF_TYPE (node
) == vect_external_def
2779 ? (elt
? scalar_to_vec
: vec_construct
)
2781 NULL
, vectype
, 0, vect_prologue
);
2787 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2788 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2790 Return true if the operations are supported. */
2793 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2794 slp_instance node_instance
,
2795 hash_set
<slp_tree
> &visited
,
2796 hash_set
<slp_tree
> &lvisited
,
2797 stmt_vector_for_cost
*cost_vec
)
2802 /* Assume we can code-generate all invariants. */
2803 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2806 /* If we already analyzed the exact same set of scalar stmts we're done.
2807 We share the generated vector stmts for those.
2808 The SLP graph is acyclic so not caching whether we failed or succeeded
2809 doesn't result in any issue since we throw away the lvisited set
2811 if (visited
.contains (node
)
2812 || lvisited
.add (node
))
2815 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2816 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2817 visited
, lvisited
, cost_vec
))
2820 /* ??? We have to catch the case late where two first scalar stmts appear
2821 in multiple SLP children with different def type and fail. Remember
2822 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2823 match it when that is vect_internal_def. */
2824 auto_vec
<vect_def_type
, 4> dt
;
2825 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2826 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2827 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2828 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2830 /* Push SLP node def-type to stmt operands. */
2831 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2832 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2833 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2834 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2835 = SLP_TREE_DEF_TYPE (child
);
2837 /* Check everything worked out. */
2839 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2840 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2842 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2844 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2845 != SLP_TREE_DEF_TYPE (child
))
2848 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2852 if (!res
&& dump_enabled_p ())
2853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2854 "not vectorized: same operand with different "
2855 "def type in stmt.\n");
2858 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2861 /* Restore def-types. */
2862 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2863 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2864 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2866 /* When the node can be vectorized cost invariant nodes it references.
2867 This is not done in DFS order to allow the refering node
2868 vectorizable_* calls to nail down the invariant nodes vector type
2869 and possibly unshare it if it needs a different vector type than
2872 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2873 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
2874 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
2875 /* Perform usual caching, note code-generation still
2876 code-gens these nodes multiple times but we expect
2877 to CSE them later. */
2878 && !visited
.contains (child
)
2879 && !lvisited
.add (child
))
2881 /* ??? After auditing more code paths make a "default"
2882 and push the vector type from NODE to all children
2883 if it is not already set. */
2884 /* Compute the number of vectors to be generated. */
2885 tree vector_type
= SLP_TREE_VECTYPE (child
);
2888 /* For shifts with a scalar argument we don't need
2889 to cost or code-generate anything.
2890 ??? Represent this more explicitely. */
2891 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_SCALAR_STMTS (node
)[0])
2892 == shift_vec_info_type
)
2896 unsigned group_size
= SLP_TREE_SCALAR_OPS (child
).length ();
2898 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2899 vf
= loop_vinfo
->vectorization_factor
;
2900 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
2901 = vect_get_num_vectors (vf
* group_size
, vector_type
);
2902 /* And cost them. */
2903 vect_prologue_cost_for_slp (child
, cost_vec
);
2906 /* If this node can't be vectorized, try pruning the tree here rather
2907 than felling the whole thing. */
2908 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2910 /* We'll need to revisit this for invariant costing and number
2911 of vectorized stmt setting. */
2912 lvisited
.remove (node
);
2920 /* Analyze statements in SLP instances of VINFO. Return true if the
2921 operations are supported. */
2924 vect_slp_analyze_operations (vec_info
*vinfo
)
2926 slp_instance instance
;
2929 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2931 hash_set
<slp_tree
> visited
;
2932 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2934 hash_set
<slp_tree
> lvisited
;
2935 stmt_vector_for_cost cost_vec
;
2936 cost_vec
.create (2);
2937 if (!vect_slp_analyze_node_operations (vinfo
,
2938 SLP_INSTANCE_TREE (instance
),
2939 instance
, visited
, lvisited
,
2941 /* Instances with a root stmt require vectorized defs for the
2943 || (SLP_INSTANCE_ROOT_STMT (instance
)
2944 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2945 != vect_internal_def
)))
2947 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2948 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2949 if (dump_enabled_p ())
2950 dump_printf_loc (MSG_NOTE
, vect_location
,
2951 "removing SLP instance operations starting from: %G",
2953 vect_free_slp_instance (instance
, false);
2954 vinfo
->slp_instances
.ordered_remove (i
);
2955 cost_vec
.release ();
2959 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
2960 x
!= lvisited
.end(); ++x
)
2964 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
2965 cost_vec
.release ();
2969 return !vinfo
->slp_instances
.is_empty ();
2973 /* Compute the scalar cost of the SLP node NODE and its children
2974 and return it. Do not account defs that are marked in LIFE and
2975 update LIFE according to uses of NODE. */
2978 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
2979 slp_tree node
, vec
<bool, va_heap
> *life
,
2980 stmt_vector_for_cost
*cost_vec
,
2981 hash_set
<slp_tree
> &visited
)
2984 stmt_vec_info stmt_info
;
2987 if (visited
.add (node
))
2990 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2992 gimple
*stmt
= stmt_info
->stmt
;
2993 ssa_op_iter op_iter
;
2994 def_operand_p def_p
;
2999 /* If there is a non-vectorized use of the defs then the scalar
3000 stmt is kept live in which case we do not account it or any
3001 required defs in the SLP children in the scalar cost. This
3002 way we make the vectorization more costly when compared to
3004 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3006 imm_use_iterator use_iter
;
3008 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3009 if (!is_gimple_debug (use_stmt
))
3011 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3012 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
3015 BREAK_FROM_IMM_USE_STMT (use_iter
);
3022 /* Count scalar stmts only once. */
3023 if (gimple_visited_p (stmt
))
3025 gimple_set_visited (stmt
, true);
3027 vect_cost_for_stmt kind
;
3028 if (STMT_VINFO_DATA_REF (stmt_info
))
3030 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
3033 kind
= scalar_store
;
3035 else if (vect_nop_conversion_p (stmt_info
))
3039 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
3042 auto_vec
<bool, 20> subtree_life
;
3043 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3045 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3047 /* Do not directly pass LIFE to the recursive call, copy it to
3048 confine changes in the callee to the current child/subtree. */
3049 subtree_life
.safe_splice (*life
);
3050 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3052 subtree_life
.truncate (0);
3057 /* Check if vectorization of the basic block is profitable. */
3060 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3062 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3063 slp_instance instance
;
3065 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3066 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3068 /* Calculate scalar cost. */
3069 stmt_vector_for_cost scalar_costs
;
3070 scalar_costs
.create (0);
3071 hash_set
<slp_tree
> visited
;
3072 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3074 auto_vec
<bool, 20> life
;
3075 life
.safe_grow_cleared
3076 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance
)).length ());
3077 vect_bb_slp_scalar_cost (bb_vinfo
,
3078 SLP_INSTANCE_TREE (instance
),
3079 &life
, &scalar_costs
, visited
);
3081 void *target_cost_data
= init_cost (NULL
);
3082 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
3083 scalar_costs
.release ();
3085 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3086 destroy_cost_data (target_cost_data
);
3088 /* Unset visited flag. */
3089 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
3090 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3091 gimple_set_visited (gsi_stmt (gsi
), false);
3093 /* Complete the target-specific cost calculation. */
3094 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3095 &vec_inside_cost
, &vec_epilogue_cost
);
3097 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3099 if (dump_enabled_p ())
3101 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3102 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3104 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3105 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3106 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3109 /* Vectorization is profitable if its cost is more than the cost of scalar
3110 version. Note that we err on the vector side for equal cost because
3111 the cost estimate is otherwise quite pessimistic (constant uses are
3112 free on the scalar side but cost a load on the vector side for
3114 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3120 /* Find any vectorizable constructors and add them to the grouped_store
3124 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3126 gimple_stmt_iterator gsi
;
3128 for (gsi
= bb_vinfo
->region_begin
;
3129 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3131 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3132 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3135 tree rhs
= gimple_assign_rhs1 (stmt
);
3136 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3137 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3138 CONSTRUCTOR_NELTS (rhs
))
3139 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3140 || uniform_vector_p (rhs
))
3143 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3144 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3148 /* Check if the region described by BB_VINFO can be vectorized, returning
3149 true if so. When returning false, set FATAL to true if the same failure
3150 would prevent vectorization at other vector sizes, false if it is still
3151 worth trying other sizes. N_STMTS is the number of statements in the
3155 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3157 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3159 slp_instance instance
;
3161 poly_uint64 min_vf
= 2;
3163 /* The first group of checks is independent of the vector size. */
3166 /* Analyze the data references. */
3168 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3170 if (dump_enabled_p ())
3171 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3172 "not vectorized: unhandled data-ref in basic "
3177 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3179 if (dump_enabled_p ())
3180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3181 "not vectorized: not enough data-refs in "
3186 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3190 "not vectorized: unhandled data access in "
3195 vect_slp_check_for_constructors (bb_vinfo
);
3197 /* If there are no grouped stores in the region there is no need
3198 to continue with pattern recog as vect_analyze_slp will fail
3200 if (bb_vinfo
->grouped_stores
.is_empty ())
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3204 "not vectorized: no grouped stores in "
3209 /* While the rest of the analysis below depends on it in some way. */
3212 vect_pattern_recog (bb_vinfo
);
3214 /* Check the SLP opportunities in the basic block, analyze and build SLP
3216 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3218 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3221 "Failed to SLP the basic block.\n");
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3223 "not vectorized: failed to find SLP opportunities "
3224 "in basic block.\n");
3229 /* Optimize permutations. */
3230 vect_optimize_slp (bb_vinfo
);
3232 vect_record_base_alignments (bb_vinfo
);
3234 /* Analyze and verify the alignment of data references and the
3235 dependence in the SLP instances. */
3236 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3238 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo
, instance
)
3239 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3241 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3242 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3243 if (dump_enabled_p ())
3244 dump_printf_loc (MSG_NOTE
, vect_location
,
3245 "removing SLP instance operations starting from: %G",
3247 vect_free_slp_instance (instance
, false);
3248 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3252 /* Mark all the statements that we want to vectorize as pure SLP and
3254 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3255 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3256 if (SLP_INSTANCE_ROOT_STMT (instance
))
3257 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3261 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3264 if (!vect_slp_analyze_operations (bb_vinfo
))
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3268 "not vectorized: bad operation in basic block.\n");
3272 /* Cost model: check if the vectorization is worthwhile. */
3273 if (!unlimited_cost_model (NULL
)
3274 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3276 if (dump_enabled_p ())
3277 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3278 "not vectorized: vectorization is not "
3283 if (dump_enabled_p ())
3284 dump_printf_loc (MSG_NOTE
, vect_location
,
3285 "Basic block will be vectorized using SLP\n");
3289 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3290 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3291 on success. The region has N_STMTS statements and has the datarefs
3292 given by DATAREFS. */
3295 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3296 gimple_stmt_iterator region_end
,
3297 vec
<data_reference_p
> datarefs
,
3298 unsigned int n_stmts
)
3300 bb_vec_info bb_vinfo
;
3301 auto_vector_modes vector_modes
;
3303 /* Autodetect first vector size we try. */
3304 machine_mode next_vector_mode
= VOIDmode
;
3305 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3306 unsigned int mode_i
= 0;
3308 vec_info_shared shared
;
3310 machine_mode autodetected_vector_mode
= VOIDmode
;
3313 bool vectorized
= false;
3315 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3317 bool first_time_p
= shared
.datarefs
.is_empty ();
3318 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3320 bb_vinfo
->shared
->save_datarefs ();
3322 bb_vinfo
->shared
->check_datarefs ();
3323 bb_vinfo
->vector_mode
= next_vector_mode
;
3325 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3326 && dbg_cnt (vect_slp
))
3328 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_NOTE
, vect_location
,
3331 "***** Analysis succeeded with vector mode"
3332 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3333 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3336 bb_vinfo
->shared
->check_datarefs ();
3337 vect_schedule_slp (bb_vinfo
);
3339 unsigned HOST_WIDE_INT bytes
;
3340 if (dump_enabled_p ())
3342 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3343 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3344 "basic block part vectorized using %wu byte "
3345 "vectors\n", bytes
);
3347 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3348 "basic block part vectorized using variable "
3349 "length vectors\n");
3356 if (dump_enabled_p ())
3357 dump_printf_loc (MSG_NOTE
, vect_location
,
3358 "***** Analysis failed with vector mode %s\n",
3359 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3363 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3366 while (mode_i
< vector_modes
.length ()
3367 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_NOTE
, vect_location
,
3371 "***** The result for vector mode %s would"
3373 GET_MODE_NAME (vector_modes
[mode_i
]));
3379 if (mode_i
< vector_modes
.length ()
3380 && VECTOR_MODE_P (autodetected_vector_mode
)
3381 && (related_vector_mode (vector_modes
[mode_i
],
3382 GET_MODE_INNER (autodetected_vector_mode
))
3383 == autodetected_vector_mode
)
3384 && (related_vector_mode (autodetected_vector_mode
,
3385 GET_MODE_INNER (vector_modes
[mode_i
]))
3386 == vector_modes
[mode_i
]))
3388 if (dump_enabled_p ())
3389 dump_printf_loc (MSG_NOTE
, vect_location
,
3390 "***** Skipping vector mode %s, which would"
3391 " repeat the analysis for %s\n",
3392 GET_MODE_NAME (vector_modes
[mode_i
]),
3393 GET_MODE_NAME (autodetected_vector_mode
));
3398 || mode_i
== vector_modes
.length ()
3399 || autodetected_vector_mode
== VOIDmode
3400 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3401 vector sizes will fail do not bother iterating. */
3405 /* Try the next biggest vector size. */
3406 next_vector_mode
= vector_modes
[mode_i
++];
3407 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_NOTE
, vect_location
,
3409 "***** Re-trying analysis with vector mode %s\n",
3410 GET_MODE_NAME (next_vector_mode
));
3414 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3415 true if anything in the basic-block was vectorized. */
3418 vect_slp_bb (basic_block bb
)
3420 gimple_stmt_iterator gsi
;
3421 bool any_vectorized
= false;
3423 gsi
= gsi_after_labels (bb
);
3424 while (!gsi_end_p (gsi
))
3426 gimple_stmt_iterator region_begin
= gsi
;
3427 vec
<data_reference_p
> datarefs
= vNULL
;
3430 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3432 gimple
*stmt
= gsi_stmt (gsi
);
3433 if (is_gimple_debug (stmt
))
3437 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3438 vect_location
= stmt
;
3440 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3444 /* Skip leading unhandled stmts. */
3445 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3451 gimple_stmt_iterator region_end
= gsi
;
3453 if (insns
> param_slp_max_insns_in_bb
)
3455 if (dump_enabled_p ())
3456 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3457 "not vectorized: too many instructions in "
3460 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3461 any_vectorized
= true;
3463 if (gsi_end_p (region_end
))
3466 /* Skip the unhandled stmt. */
3470 return any_vectorized
;
3474 /* Build a variable-length vector in which the elements in ELTS are repeated
3475 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3476 RESULTS and add any new instructions to SEQ.
3478 The approach we use is:
3480 (1) Find a vector mode VM with integer elements of mode IM.
3482 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3483 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3484 from small vectors to IM.
3486 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3488 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3489 correct byte contents.
3491 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3493 We try to find the largest IM for which this sequence works, in order
3494 to cut down on the number of interleaves. */
3497 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3498 vec
<tree
> elts
, unsigned int nresults
,
3501 unsigned int nelts
= elts
.length ();
3502 tree element_type
= TREE_TYPE (vector_type
);
3504 /* (1) Find a vector mode VM with integer elements of mode IM. */
3505 unsigned int nvectors
= 1;
3506 tree new_vector_type
;
3508 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3509 &nvectors
, &new_vector_type
,
3513 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3514 unsigned int partial_nelts
= nelts
/ nvectors
;
3515 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3517 tree_vector_builder partial_elts
;
3518 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3519 pieces
.quick_grow (nvectors
* 2);
3520 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3522 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3523 ELTS' has mode IM. */
3524 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3525 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3526 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3527 tree t
= gimple_build_vector (seq
, &partial_elts
);
3528 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3529 TREE_TYPE (new_vector_type
), t
);
3531 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3532 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3535 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3536 correct byte contents.
3538 We need to repeat the following operation log2(nvectors) times:
3540 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3541 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3543 However, if each input repeats every N elements and the VF is
3544 a multiple of N * 2, the HI result is the same as the LO. */
3545 unsigned int in_start
= 0;
3546 unsigned int out_start
= nvectors
;
3547 unsigned int hi_start
= nvectors
/ 2;
3548 /* A bound on the number of outputs needed to produce NRESULTS results
3549 in the final iteration. */
3550 unsigned int noutputs_bound
= nvectors
* nresults
;
3551 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3553 noutputs_bound
/= 2;
3554 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3555 for (unsigned int i
= 0; i
< limit
; ++i
)
3558 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3561 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3565 tree output
= make_ssa_name (new_vector_type
);
3566 tree input1
= pieces
[in_start
+ (i
/ 2)];
3567 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3568 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3571 gimple_seq_add_stmt (seq
, stmt
);
3572 pieces
[out_start
+ i
] = output
;
3574 std::swap (in_start
, out_start
);
3577 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3578 results
.reserve (nresults
);
3579 for (unsigned int i
= 0; i
< nresults
; ++i
)
3581 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3582 pieces
[in_start
+ i
]));
3584 results
.quick_push (results
[i
- nvectors
]);
3588 /* For constant and loop invariant defs in OP_NODE this function creates
3589 vector defs that will be used in the vectorized stmts and stores them
3590 to SLP_TREE_VEC_DEFS of OP_NODE. */
3593 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
3595 unsigned HOST_WIDE_INT nunits
;
3597 unsigned j
, number_of_places_left_in_vector
;
3600 int group_size
= op_node
->ops
.length ();
3601 unsigned int vec_num
, i
;
3602 unsigned number_of_copies
= 1;
3604 gimple_seq ctor_seq
= NULL
;
3605 auto_vec
<tree
, 16> permute_results
;
3607 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3608 vector_type
= SLP_TREE_VECTYPE (op_node
);
3610 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
3611 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
3612 auto_vec
<tree
> voprnds (number_of_vectors
);
3614 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3615 created vectors. It is greater than 1 if unrolling is performed.
3617 For example, we have two scalar operands, s1 and s2 (e.g., group of
3618 strided accesses of size two), while NUNITS is four (i.e., four scalars
3619 of this type can be packed in a vector). The output vector will contain
3620 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3623 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3624 containing the operands.
3626 For example, NUNITS is four as before, and the group size is 8
3627 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3628 {s5, s6, s7, s8}. */
3630 /* When using duplicate_and_interleave, we just need one element for
3631 each scalar statement. */
3632 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3633 nunits
= group_size
;
3635 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3637 number_of_places_left_in_vector
= nunits
;
3639 tree_vector_builder
elts (vector_type
, nunits
, 1);
3640 elts
.quick_grow (nunits
);
3641 stmt_vec_info insert_after
= NULL
;
3642 for (j
= 0; j
< number_of_copies
; j
++)
3645 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3647 /* Create 'vect_ = {op0,op1,...,opn}'. */
3648 number_of_places_left_in_vector
--;
3650 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3652 if (CONSTANT_CLASS_P (op
))
3654 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3656 /* Can't use VIEW_CONVERT_EXPR for booleans because
3657 of possibly different sizes of scalar value and
3659 if (integer_zerop (op
))
3660 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3661 else if (integer_onep (op
))
3662 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3667 op
= fold_unary (VIEW_CONVERT_EXPR
,
3668 TREE_TYPE (vector_type
), op
);
3669 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3673 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3675 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3678 = build_all_ones_cst (TREE_TYPE (vector_type
));
3680 = build_zero_cst (TREE_TYPE (vector_type
));
3681 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3682 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3688 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3691 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3694 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3698 elts
[number_of_places_left_in_vector
] = op
;
3699 if (!CONSTANT_CLASS_P (op
))
3701 /* For BB vectorization we have to compute an insert location
3702 when a def is inside the analyzed region since we cannot
3703 simply insert at the BB start in this case. */
3704 stmt_vec_info opdef
;
3705 if (TREE_CODE (orig_op
) == SSA_NAME
3706 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3707 && is_a
<bb_vec_info
> (vinfo
)
3708 && (opdef
= vinfo
->lookup_def (orig_op
)))
3711 insert_after
= opdef
;
3713 insert_after
= get_later_stmt (insert_after
, opdef
);
3716 if (number_of_places_left_in_vector
== 0)
3719 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3720 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3721 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3724 if (permute_results
.is_empty ())
3725 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3726 elts
, number_of_vectors
,
3728 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3733 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_after
->stmt
);
3734 /* vect_init_vector inserts before. */
3736 init
= vect_init_vector (vinfo
, NULL
, vec_cst
,
3740 init
= vect_init_vector (vinfo
, NULL
, vec_cst
,
3742 if (ctor_seq
!= NULL
)
3744 gimple_stmt_iterator gsi
3745 = gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3746 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3749 voprnds
.quick_push (init
);
3750 insert_after
= NULL
;
3751 number_of_places_left_in_vector
= nunits
;
3753 elts
.new_vector (vector_type
, nunits
, 1);
3754 elts
.quick_grow (nunits
);
3759 /* Since the vectors are created in the reverse order, we should invert
3761 vec_num
= voprnds
.length ();
3762 for (j
= vec_num
; j
!= 0; j
--)
3764 vop
= voprnds
[j
- 1];
3765 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3768 /* In case that VF is greater than the unrolling factor needed for the SLP
3769 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3770 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3771 to replicate the vectors. */
3772 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
3773 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
3775 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
3779 /* Get vectorized definitions from SLP_NODE that contains corresponding
3780 vectorized def-stmts. */
3783 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3785 stmt_vec_info vec_def_stmt_info
;
3788 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3790 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3791 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3795 /* Get N vectorized definitions for SLP_NODE. */
3798 vect_get_slp_defs (vec_info
*,
3799 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3802 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3804 for (unsigned i
= 0; i
< n
; ++i
)
3806 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3808 vec
<tree
> vec_defs
= vNULL
;
3810 /* For each operand we check if it has vectorized definitions in a child
3811 node or we need to create them (for invariants and constants). */
3812 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3813 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3814 vect_get_slp_vect_defs (child
, &vec_defs
);
3816 vec_defs
.splice (SLP_TREE_VEC_DEFS (child
));
3818 vec_oprnds
->quick_push (vec_defs
);
3822 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3823 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3824 permute statements for the SLP node NODE. */
3827 vect_transform_slp_perm_load (vec_info
*vinfo
,
3828 slp_tree node
, vec
<tree
> dr_chain
,
3829 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3830 bool analyze_only
, unsigned *n_perms
)
3832 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3834 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3835 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3836 unsigned int mask_element
;
3839 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3842 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3844 mode
= TYPE_MODE (vectype
);
3845 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3847 /* Initialize the vect stmts of NODE to properly insert the generated
3850 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3851 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3852 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3854 /* Generate permutation masks for every NODE. Number of masks for each NODE
3855 is equal to GROUP_SIZE.
3856 E.g., we have a group of three nodes with three loads from the same
3857 location in each node, and the vector size is 4. I.e., we have a
3858 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3859 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3860 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3863 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3864 The last mask is illegal since we assume two operands for permute
3865 operation, and the mask element values can't be outside that range.
3866 Hence, the last mask must be converted into {2,5,5,5}.
3867 For the first two permutations we need the first and the second input
3868 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3869 we need the second and the third vectors: {b1,c1,a2,b2} and
3872 int vect_stmts_counter
= 0;
3873 unsigned int index
= 0;
3874 int first_vec_index
= -1;
3875 int second_vec_index
= -1;
3879 vec_perm_builder mask
;
3880 unsigned int nelts_to_build
;
3881 unsigned int nvectors_per_build
;
3882 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3883 && multiple_p (nunits
, group_size
));
3886 /* A single vector contains a whole number of copies of the node, so:
3887 (a) all permutes can use the same mask; and
3888 (b) the permutes only need a single vector input. */
3889 mask
.new_vector (nunits
, group_size
, 3);
3890 nelts_to_build
= mask
.encoded_nelts ();
3891 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3895 /* We need to construct a separate mask for each vector statement. */
3896 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3897 if (!nunits
.is_constant (&const_nunits
)
3898 || !vf
.is_constant (&const_vf
))
3900 mask
.new_vector (const_nunits
, const_nunits
, 1);
3901 nelts_to_build
= const_vf
* group_size
;
3902 nvectors_per_build
= 1;
3905 unsigned int count
= mask
.encoded_nelts ();
3906 mask
.quick_grow (count
);
3907 vec_perm_indices indices
;
3909 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3911 unsigned int iter_num
= j
/ group_size
;
3912 unsigned int stmt_num
= j
% group_size
;
3913 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3914 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3917 first_vec_index
= 0;
3922 /* Enforced before the loop when !repeating_p. */
3923 unsigned int const_nunits
= nunits
.to_constant ();
3924 vec_index
= i
/ const_nunits
;
3925 mask_element
= i
% const_nunits
;
3926 if (vec_index
== first_vec_index
3927 || first_vec_index
== -1)
3929 first_vec_index
= vec_index
;
3931 else if (vec_index
== second_vec_index
3932 || second_vec_index
== -1)
3934 second_vec_index
= vec_index
;
3935 mask_element
+= const_nunits
;
3939 if (dump_enabled_p ())
3940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3941 "permutation requires at "
3942 "least three vectors %G",
3944 gcc_assert (analyze_only
);
3948 gcc_assert (mask_element
< 2 * const_nunits
);
3951 if (mask_element
!= index
)
3953 mask
[index
++] = mask_element
;
3955 if (index
== count
&& !noop_p
)
3957 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3958 if (!can_vec_perm_const_p (mode
, indices
))
3960 if (dump_enabled_p ())
3962 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3964 "unsupported vect permute { ");
3965 for (i
= 0; i
< count
; ++i
)
3967 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3968 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3970 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3972 gcc_assert (analyze_only
);
3983 tree mask_vec
= NULL_TREE
;
3986 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3988 if (second_vec_index
== -1)
3989 second_vec_index
= first_vec_index
;
3991 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3993 /* Generate the permute statement if necessary. */
3994 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3995 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3996 stmt_vec_info perm_stmt_info
;
3999 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4001 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4003 perm_dest
= make_ssa_name (perm_dest
);
4005 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4006 first_vec
, second_vec
,
4009 = vect_finish_stmt_generation (vinfo
,
4010 stmt_info
, perm_stmt
,
4014 /* If mask was NULL_TREE generate the requested
4015 identity transform. */
4016 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
4018 /* Store the vector statement in NODE. */
4019 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
4025 first_vec_index
= -1;
4026 second_vec_index
= -1;
4034 /* Vectorize SLP instance tree in postorder. */
4037 vect_schedule_slp_instance (vec_info
*vinfo
,
4038 slp_tree node
, slp_instance instance
)
4040 gimple_stmt_iterator si
;
4044 /* See if we have already vectorized the node in the graph of the
4046 if ((SLP_TREE_DEF_TYPE (node
) == vect_internal_def
4047 && SLP_TREE_VEC_STMTS (node
).exists ())
4048 || SLP_TREE_VEC_DEFS (node
).exists ())
4051 /* Vectorize externals and constants. */
4052 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
4053 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
4055 /* ??? vectorizable_shift can end up using a scalar operand which is
4056 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
4057 node in this case. */
4058 if (!SLP_TREE_VECTYPE (node
))
4061 vect_create_constant_vectors (vinfo
, node
);
4065 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4066 vect_schedule_slp_instance (vinfo
, child
, instance
);
4068 /* Push SLP node def-type to stmts. */
4069 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4070 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4072 stmt_vec_info child_stmt_info
;
4073 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4074 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4077 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
4079 /* VECTYPE is the type of the destination. */
4080 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4081 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4082 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
4084 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4085 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4087 if (dump_enabled_p ())
4088 dump_printf_loc (MSG_NOTE
, vect_location
,
4089 "------>vectorizing SLP node starting from: %G",
4092 /* Vectorized stmts go before the last scalar stmt which is where
4093 all uses are ready. */
4094 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4095 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4097 /* Handle two-operation SLP nodes by vectorizing the group with
4098 both operations and then performing a merge. */
4099 bool done_p
= false;
4100 if (SLP_TREE_TWO_OPERATORS (node
))
4102 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4103 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4104 enum tree_code ocode
= ERROR_MARK
;
4105 stmt_vec_info ostmt_info
;
4106 vec_perm_builder
mask (group_size
, group_size
, 1);
4107 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4109 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4110 if (gimple_assign_rhs_code (ostmt
) != code0
)
4112 mask
.quick_push (1);
4113 ocode
= gimple_assign_rhs_code (ostmt
);
4116 mask
.quick_push (0);
4118 if (ocode
!= ERROR_MARK
)
4120 vec
<stmt_vec_info
> v0
;
4121 vec
<stmt_vec_info
> v1
;
4123 tree tmask
= NULL_TREE
;
4124 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4125 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4126 SLP_TREE_VEC_STMTS (node
).truncate (0);
4127 gimple_assign_set_rhs_code (stmt
, ocode
);
4128 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4129 gimple_assign_set_rhs_code (stmt
, code0
);
4130 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4131 SLP_TREE_VEC_STMTS (node
).truncate (0);
4132 tree meltype
= build_nonstandard_integer_type
4133 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4134 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4136 for (j
= 0; j
< v0
.length (); ++j
)
4138 /* Enforced by vect_build_slp_tree, which rejects variable-length
4139 vectors for SLP_TREE_TWO_OPERATORS. */
4140 unsigned int const_nunits
= nunits
.to_constant ();
4141 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4142 for (l
= 0; l
< const_nunits
; ++l
)
4144 if (k
>= group_size
)
4146 tree t
= build_int_cst (meltype
,
4147 mask
[k
++] * const_nunits
+ l
);
4148 melts
.quick_push (t
);
4150 tmask
= melts
.build ();
4152 /* ??? Not all targets support a VEC_PERM_EXPR with a
4153 constant mask that would translate to a vec_merge RTX
4154 (with their vec_perm_const_ok). We can either not
4155 vectorize in that case or let veclower do its job.
4156 Unfortunately that isn't too great and at least for
4157 plus/minus we'd eventually like to match targets
4158 vector addsub instructions. */
4160 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4162 gimple_assign_lhs (v0
[j
]->stmt
),
4163 gimple_assign_lhs (v1
[j
]->stmt
),
4165 SLP_TREE_VEC_STMTS (node
).quick_push
4166 (vect_finish_stmt_generation (vinfo
, stmt_info
, vstmt
, &si
));
4174 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4176 /* Restore stmt def-types. */
4177 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4178 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4180 stmt_vec_info child_stmt_info
;
4181 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4182 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4186 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4187 For loop vectorization this is done in vectorizable_call, but for SLP
4188 it needs to be deferred until end of vect_schedule_slp, because multiple
4189 SLP instances may refer to the same scalar stmt. */
4192 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4193 slp_tree node
, hash_set
<slp_tree
> &visited
)
4196 gimple_stmt_iterator gsi
;
4200 stmt_vec_info stmt_info
;
4202 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4205 if (visited
.add (node
))
4208 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4209 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4211 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4213 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4214 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4216 if (is_pattern_stmt_p (stmt_info
)
4217 || !PURE_SLP_STMT (stmt_info
))
4219 lhs
= gimple_call_lhs (stmt
);
4220 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4221 gsi
= gsi_for_stmt (stmt
);
4222 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4223 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4228 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4230 hash_set
<slp_tree
> visited
;
4231 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4234 /* Vectorize the instance root. */
4237 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4239 gassign
*rstmt
= NULL
;
4241 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4243 stmt_vec_info child_stmt_info
;
4246 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4248 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4249 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4250 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4251 TREE_TYPE (vect_lhs
)))
4252 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4254 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4258 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4260 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4261 stmt_vec_info child_stmt_info
;
4263 vec
<constructor_elt
, va_gc
> *v
;
4264 vec_alloc (v
, nelts
);
4266 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4268 CONSTRUCTOR_APPEND_ELT (v
,
4270 gimple_get_lhs (child_stmt_info
->stmt
));
4272 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4273 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4274 tree r_constructor
= build_constructor (rtype
, v
);
4275 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4280 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4281 gsi_replace (&rgsi
, rstmt
, true);
4284 /* Generate vector code for all SLP instances in the loop/basic block. */
4287 vect_schedule_slp (vec_info
*vinfo
)
4289 vec
<slp_instance
> slp_instances
;
4290 slp_instance instance
;
4293 slp_instances
= vinfo
->slp_instances
;
4294 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4296 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4297 /* Schedule the tree of INSTANCE. */
4298 vect_schedule_slp_instance (vinfo
, node
, instance
);
4300 if (SLP_INSTANCE_ROOT_STMT (instance
))
4301 vectorize_slp_instance_root_stmt (node
, instance
);
4303 if (dump_enabled_p ())
4304 dump_printf_loc (MSG_NOTE
, vect_location
,
4305 "vectorizing stmts using SLP.\n");
4308 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4310 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4311 stmt_vec_info store_info
;
4314 /* Remove scalar call stmts. Do not do this for basic-block
4315 vectorization as not all uses may be vectorized.
4316 ??? Why should this be necessary? DCE should be able to
4317 remove the stmts itself.
4318 ??? For BB vectorization we can as well remove scalar
4319 stmts starting from the SLP tree root if they have no
4321 if (is_a
<loop_vec_info
> (vinfo
))
4322 vect_remove_slp_scalar_calls (vinfo
, root
);
4324 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4326 if (!STMT_VINFO_DATA_REF (store_info
))
4329 if (SLP_INSTANCE_ROOT_STMT (instance
))
4332 store_info
= vect_orig_stmt (store_info
);
4333 /* Free the attached stmt_vec_info and remove the stmt. */
4334 vinfo
->remove_stmt (store_info
);