1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
50 /* Initialize a SLP node. */
52 _slp_tree::_slp_tree ()
54 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
55 SLP_TREE_SCALAR_OPS (this) = vNULL
;
56 SLP_TREE_VEC_STMTS (this).create (0);
57 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
58 SLP_TREE_CHILDREN (this) = vNULL
;
59 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
60 SLP_TREE_TWO_OPERATORS (this) = false;
61 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
62 SLP_TREE_VECTYPE (this) = NULL_TREE
;
67 /* Tear down a SLP node. */
69 _slp_tree::~_slp_tree ()
71 SLP_TREE_CHILDREN (this).release ();
72 SLP_TREE_SCALAR_STMTS (this).release ();
73 SLP_TREE_SCALAR_OPS (this).release ();
74 SLP_TREE_VEC_STMTS (this).release ();
75 SLP_TREE_LOAD_PERMUTATION (this).release ();
78 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
79 FINAL_P is true if we have vectorized the instance or if we have
80 made a final decision not to vectorize the statements in any way. */
83 vect_free_slp_tree (slp_tree node
, bool final_p
)
88 if (--node
->refcnt
!= 0)
91 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
92 vect_free_slp_tree (child
, final_p
);
94 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
95 Some statements might no longer exist, after having been
96 removed by vect_transform_stmt. Updating the remaining
97 statements would be redundant. */
100 stmt_vec_info stmt_info
;
101 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
103 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
104 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
111 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
112 have vectorized the instance or if we have made a final decision not
113 to vectorize the statements in any way. */
116 vect_free_slp_instance (slp_instance instance
, bool final_p
)
118 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
119 SLP_INSTANCE_LOADS (instance
).release ();
124 /* Create an SLP node for SCALAR_STMTS. */
127 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
129 slp_tree node
= new _slp_tree
;
130 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
131 SLP_TREE_CHILDREN (node
).create (nops
);
132 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
135 stmt_vec_info stmt_info
;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
147 slp_tree node
= new _slp_tree
;
148 SLP_TREE_SCALAR_OPS (node
) = ops
;
149 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
154 /* This structure is used in creation of an SLP tree. Each instance
155 corresponds to the same operand in a group of scalar stmts in an SLP
157 typedef struct _slp_oprnd_info
159 /* Def-stmts for the operands. */
160 vec
<stmt_vec_info
> def_stmts
;
163 /* Information about the first statement, its vector def-type, type, the
164 operand itself in case it's constant, and an indication if it's a pattern
167 enum vect_def_type first_dt
;
172 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
174 static vec
<slp_oprnd_info
>
175 vect_create_oprnd_info (int nops
, int group_size
)
178 slp_oprnd_info oprnd_info
;
179 vec
<slp_oprnd_info
> oprnds_info
;
181 oprnds_info
.create (nops
);
182 for (i
= 0; i
< nops
; i
++)
184 oprnd_info
= XNEW (struct _slp_oprnd_info
);
185 oprnd_info
->def_stmts
.create (group_size
);
186 oprnd_info
->ops
.create (group_size
);
187 oprnd_info
->first_dt
= vect_uninitialized_def
;
188 oprnd_info
->first_op_type
= NULL_TREE
;
189 oprnd_info
->any_pattern
= false;
190 oprnds_info
.quick_push (oprnd_info
);
197 /* Free operands info. */
200 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
203 slp_oprnd_info oprnd_info
;
205 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
207 oprnd_info
->def_stmts
.release ();
208 oprnd_info
->ops
.release ();
209 XDELETE (oprnd_info
);
212 oprnds_info
.release ();
216 /* Return true if STMTS contains a pattern statement. */
219 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
221 stmt_vec_info stmt_info
;
223 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
224 if (is_pattern_stmt_p (stmt_info
))
229 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
230 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
234 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
235 stmt_vec_info first_stmt_info
)
237 stmt_vec_info next_stmt_info
= first_stmt_info
;
240 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
245 if (next_stmt_info
== stmt_info
)
247 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
249 result
+= DR_GROUP_GAP (next_stmt_info
);
251 while (next_stmt_info
);
256 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
257 using the method implemented by duplicate_and_interleave. Return true
258 if so, returning the number of intermediate vectors in *NVECTORS_OUT
259 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
263 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
264 tree elt_type
, unsigned int *nvectors_out
,
265 tree
*vector_type_out
,
268 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
269 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
272 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
273 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
274 unsigned int nvectors
= 1;
277 scalar_int_mode int_mode
;
278 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
279 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
281 /* Get the natural vector type for this SLP group size. */
282 tree int_type
= build_nonstandard_integer_type
283 (GET_MODE_BITSIZE (int_mode
), 1);
285 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
287 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
288 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
289 GET_MODE_SIZE (base_vector_mode
)))
291 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
292 together into elements of type INT_TYPE and using the result
293 to build NVECTORS vectors. */
294 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
295 vec_perm_builder
sel1 (nelts
, 2, 3);
296 vec_perm_builder
sel2 (nelts
, 2, 3);
297 poly_int64 half_nelts
= exact_div (nelts
, 2);
298 for (unsigned int i
= 0; i
< 3; ++i
)
301 sel1
.quick_push (i
+ nelts
);
302 sel2
.quick_push (half_nelts
+ i
);
303 sel2
.quick_push (half_nelts
+ i
+ nelts
);
305 vec_perm_indices
indices1 (sel1
, 2, nelts
);
306 vec_perm_indices
indices2 (sel2
, 2, nelts
);
307 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
308 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
311 *nvectors_out
= nvectors
;
313 *vector_type_out
= vector_type
;
316 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
318 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
325 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
331 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
332 they are of a valid type and that they match the defs of the first stmt of
333 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
334 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
335 indicates swap is required for cond_expr stmts. Specifically, *SWAP
336 is 1 if STMT is cond and operands of comparison need to be swapped;
337 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
338 If there is any operand swap in this function, *SWAP is set to non-zero
340 If there was a fatal error return -1; if the error could be corrected by
341 swapping operands of father node of this one, return 1; if everything is
344 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
345 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
346 vec
<slp_oprnd_info
> *oprnds_info
)
348 stmt_vec_info stmt_info
= stmts
[stmt_num
];
350 unsigned int i
, number_of_oprnds
;
351 enum vect_def_type dt
= vect_uninitialized_def
;
352 slp_oprnd_info oprnd_info
;
353 int first_op_idx
= 1;
354 unsigned int commutative_op
= -1U;
355 bool first_op_cond
= false;
356 bool first
= stmt_num
== 0;
358 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
360 number_of_oprnds
= gimple_call_num_args (stmt
);
362 if (gimple_call_internal_p (stmt
))
364 internal_fn ifn
= gimple_call_internal_fn (stmt
);
365 commutative_op
= first_commutative_argument (ifn
);
367 /* Masked load, only look at mask. */
368 if (ifn
== IFN_MASK_LOAD
)
370 number_of_oprnds
= 1;
371 /* Mask operand index. */
376 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
378 enum tree_code code
= gimple_assign_rhs_code (stmt
);
379 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
380 /* Swap can only be done for cond_expr if asked to, otherwise we
381 could result in different comparison code to the first stmt. */
382 if (code
== COND_EXPR
383 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
385 first_op_cond
= true;
389 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
394 bool swapped
= (*swap
!= 0);
395 gcc_assert (!swapped
|| first_op_cond
);
396 for (i
= 0; i
< number_of_oprnds
; i
++)
401 /* Map indicating how operands of cond_expr should be swapped. */
402 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
403 int *map
= maps
[*swap
];
406 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
407 first_op_idx
), map
[i
]);
409 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
412 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
413 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
414 oprnd
= TREE_OPERAND (oprnd
, 0);
416 oprnd_info
= (*oprnds_info
)[i
];
418 stmt_vec_info def_stmt_info
;
419 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
421 if (dump_enabled_p ())
422 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
423 "Build SLP failed: can't analyze def for %T\n",
429 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
430 oprnd_info
->any_pattern
= true;
434 /* For the swapping logic below force vect_reduction_def
435 for the reduction op in a SLP reduction group. */
436 if (!STMT_VINFO_DATA_REF (stmt_info
)
437 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
438 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
440 dt
= vect_reduction_def
;
441 oprnd_info
->first_dt
= dt
;
442 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
446 /* Not first stmt of the group, check that the def-stmt/s match
447 the def-stmt/s of the first stmt. Allow different definition
448 types for reduction chains: the first stmt must be a
449 vect_reduction_def (a phi node), and the rest
450 end in the reduction chain. */
451 tree type
= TREE_TYPE (oprnd
);
452 if ((oprnd_info
->first_dt
!= dt
453 && !(oprnd_info
->first_dt
== vect_reduction_def
454 && !STMT_VINFO_DATA_REF (stmt_info
)
455 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
457 && !STMT_VINFO_DATA_REF (def_stmt_info
)
458 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
459 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
460 && !((oprnd_info
->first_dt
== vect_external_def
461 || oprnd_info
->first_dt
== vect_constant_def
)
462 && (dt
== vect_external_def
463 || dt
== vect_constant_def
)))
464 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
465 || (!STMT_VINFO_DATA_REF (stmt_info
)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
468 || STMT_VINFO_DATA_REF (def_stmt_info
)
469 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
470 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
471 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
473 /* Try swapping operands if we got a mismatch. */
474 if (i
== commutative_op
&& !swapped
)
476 if (dump_enabled_p ())
477 dump_printf_loc (MSG_NOTE
, vect_location
,
478 "trying swapped operands\n");
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
485 "Build SLP failed: different types\n");
489 if ((dt
== vect_constant_def
490 || dt
== vect_external_def
)
491 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
492 && (TREE_CODE (type
) == BOOLEAN_TYPE
493 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
496 if (dump_enabled_p ())
497 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
498 "Build SLP failed: invalid type of def "
499 "for variable-length SLP %T\n", oprnd
);
504 /* Check the types of the definitions. */
507 case vect_external_def
:
508 /* Make sure to demote the overall operand to external. */
509 oprnd_info
->first_dt
= vect_external_def
;
511 case vect_constant_def
:
512 oprnd_info
->def_stmts
.quick_push (NULL
);
513 oprnd_info
->ops
.quick_push (oprnd
);
516 case vect_internal_def
:
517 case vect_reduction_def
:
518 if (oprnd_info
->first_dt
== vect_reduction_def
519 && !STMT_VINFO_DATA_REF (stmt_info
)
520 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
521 && !STMT_VINFO_DATA_REF (def_stmt_info
)
522 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
523 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
525 /* For a SLP reduction chain we want to duplicate the
526 reduction to each of the chain members. That gets
527 us a sane SLP graph (still the stmts are not 100%
528 correct wrt the initial values). */
530 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
531 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
535 case vect_induction_def
:
536 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
537 oprnd_info
->ops
.quick_push (oprnd
);
541 /* FORNOW: Not supported. */
542 if (dump_enabled_p ())
543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
544 "Build SLP failed: illegal type of def %T\n",
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE
, vect_location
,
556 "swapped operands to match def types in %G",
564 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
565 Return true if we can, meaning that this choice doesn't conflict with
566 existing SLP nodes that use STMT_INFO. */
569 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
571 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
572 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
575 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
576 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
578 /* We maintain the invariant that if any statement in the group is
579 used, all other members of the group have the same vector type. */
580 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
581 stmt_vec_info member_info
= first_info
;
582 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
583 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
584 || is_pattern_stmt_p (member_info
))
589 for (member_info
= first_info
; member_info
;
590 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
591 STMT_VINFO_VECTYPE (member_info
) = vectype
;
595 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
596 && !is_pattern_stmt_p (stmt_info
))
598 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
602 if (dump_enabled_p ())
604 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
605 "Build SLP failed: incompatible vector"
606 " types for: %G", stmt_info
->stmt
);
607 dump_printf_loc (MSG_NOTE
, vect_location
,
608 " old vector type: %T\n", old_vectype
);
609 dump_printf_loc (MSG_NOTE
, vect_location
,
610 " new vector type: %T\n", vectype
);
615 /* Try to infer and assign a vector type to all the statements in STMTS.
616 Used only for BB vectorization. */
619 vect_update_all_shared_vectypes (vec_info
*vinfo
, vec
<stmt_vec_info
> stmts
)
621 tree vectype
, nunits_vectype
;
622 if (!vect_get_vector_types_for_stmt (vinfo
, stmts
[0], &vectype
,
623 &nunits_vectype
, stmts
.length ()))
626 stmt_vec_info stmt_info
;
628 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
629 if (!vect_update_shared_vectype (stmt_info
, vectype
))
635 /* Return true if call statements CALL1 and CALL2 are similar enough
636 to be combined into the same SLP group. */
639 compatible_calls_p (gcall
*call1
, gcall
*call2
)
641 unsigned int nargs
= gimple_call_num_args (call1
);
642 if (nargs
!= gimple_call_num_args (call2
))
645 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
648 if (gimple_call_internal_p (call1
))
650 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
651 TREE_TYPE (gimple_call_lhs (call2
))))
653 for (unsigned int i
= 0; i
< nargs
; ++i
)
654 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
655 TREE_TYPE (gimple_call_arg (call2
, i
))))
660 if (!operand_equal_p (gimple_call_fn (call1
),
661 gimple_call_fn (call2
), 0))
664 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
670 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
671 caller's attempt to find the vector type in STMT_INFO with the narrowest
672 element type. Return true if VECTYPE is nonnull and if it is valid
673 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
674 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
675 vect_build_slp_tree. */
678 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
679 unsigned int group_size
,
680 tree vectype
, poly_uint64
*max_nunits
)
684 if (dump_enabled_p ())
685 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
686 "Build SLP failed: unsupported data-type in %G\n",
688 /* Fatal mismatch. */
692 /* If populating the vector type requires unrolling then fail
693 before adjusting *max_nunits for basic-block vectorization. */
694 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
695 unsigned HOST_WIDE_INT const_nunits
;
696 if (is_a
<bb_vec_info
> (vinfo
)
697 && (!nunits
.is_constant (&const_nunits
)
698 || const_nunits
> group_size
))
700 if (dump_enabled_p ())
701 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
702 "Build SLP failed: unrolling required "
703 "in basic block SLP\n");
704 /* Fatal mismatch. */
708 /* In case of multiple types we need to detect the smallest type. */
709 vect_update_max_nunits (max_nunits
, vectype
);
713 /* STMTS is a group of GROUP_SIZE SLP statements in which some
714 statements do the same operation as the first statement and in which
715 the others do ALT_STMT_CODE. Return true if we can take one vector
716 of the first operation and one vector of the second and permute them
717 to get the required result. VECTYPE is the type of the vector that
718 would be permuted. */
721 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
722 unsigned int group_size
, tree vectype
,
723 tree_code alt_stmt_code
)
725 unsigned HOST_WIDE_INT count
;
726 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
729 vec_perm_builder
sel (count
, count
, 1);
730 for (unsigned int i
= 0; i
< count
; ++i
)
732 unsigned int elt
= i
;
733 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
734 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
736 sel
.quick_push (elt
);
738 vec_perm_indices
indices (sel
, 2, count
);
739 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
742 /* Verify if the scalar stmts STMTS are isomorphic, require data
743 permutation or are of unsupported types of operation. Return
744 true if they are, otherwise return false and indicate in *MATCHES
745 which stmts are not isomorphic to the first one. If MATCHES[0]
746 is false then this indicates the comparison could not be
747 carried out or the stmts will never be vectorized by SLP.
749 Note COND_EXPR is possibly isomorphic to another one after swapping its
750 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
751 the first stmt by swapping the two operands of comparison; set SWAP[i]
752 to 2 if stmt I is isormorphic to the first stmt by inverting the code
753 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
754 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
757 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
758 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
759 poly_uint64
*max_nunits
, bool *matches
,
763 stmt_vec_info first_stmt_info
= stmts
[0];
764 enum tree_code first_stmt_code
= ERROR_MARK
;
765 enum tree_code alt_stmt_code
= ERROR_MARK
;
766 enum tree_code rhs_code
= ERROR_MARK
;
767 enum tree_code first_cond_code
= ERROR_MARK
;
769 bool need_same_oprnds
= false;
770 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
773 machine_mode optab_op2_mode
;
774 machine_mode vec_mode
;
775 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
778 /* For every stmt in NODE find its def stmt/s. */
779 stmt_vec_info stmt_info
;
780 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
782 gimple
*stmt
= stmt_info
->stmt
;
786 if (dump_enabled_p ())
787 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
789 /* Fail to vectorize statements marked as unvectorizable. */
790 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
792 if (dump_enabled_p ())
793 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
794 "Build SLP failed: unvectorizable statement %G",
796 /* Fatal mismatch. */
801 lhs
= gimple_get_lhs (stmt
);
802 if (lhs
== NULL_TREE
)
804 if (dump_enabled_p ())
805 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
806 "Build SLP failed: not GIMPLE_ASSIGN nor "
807 "GIMPLE_CALL %G", stmt
);
808 /* Fatal mismatch. */
814 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
815 &nunits_vectype
, group_size
)
817 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
818 nunits_vectype
, max_nunits
)))
820 /* Fatal mismatch. */
825 gcc_assert (vectype
);
827 if (is_a
<bb_vec_info
> (vinfo
)
828 && !vect_update_shared_vectype (stmt_info
, vectype
))
831 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
834 rhs_code
= CALL_EXPR
;
836 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
838 else if ((gimple_call_internal_p (call_stmt
)
839 && (!vectorizable_internal_fn_p
840 (gimple_call_internal_fn (call_stmt
))))
841 || gimple_call_tail_p (call_stmt
)
842 || gimple_call_noreturn_p (call_stmt
)
843 || !gimple_call_nothrow_p (call_stmt
)
844 || gimple_call_chain (call_stmt
))
846 if (dump_enabled_p ())
847 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
848 "Build SLP failed: unsupported call type %G",
850 /* Fatal mismatch. */
857 rhs_code
= gimple_assign_rhs_code (stmt
);
858 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
861 /* Check the operation. */
864 first_stmt_code
= rhs_code
;
866 /* Shift arguments should be equal in all the packed stmts for a
867 vector shift with scalar shift operand. */
868 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
869 || rhs_code
== LROTATE_EXPR
870 || rhs_code
== RROTATE_EXPR
)
872 vec_mode
= TYPE_MODE (vectype
);
874 /* First see if we have a vector/vector shift. */
875 optab
= optab_for_tree_code (rhs_code
, vectype
,
879 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
881 /* No vector/vector shift, try for a vector/scalar shift. */
882 optab
= optab_for_tree_code (rhs_code
, vectype
,
887 if (dump_enabled_p ())
888 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
889 "Build SLP failed: no optab.\n");
890 /* Fatal mismatch. */
894 icode
= (int) optab_handler (optab
, vec_mode
);
895 if (icode
== CODE_FOR_nothing
)
897 if (dump_enabled_p ())
898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
900 "op not supported by target.\n");
901 /* Fatal mismatch. */
905 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
906 if (!VECTOR_MODE_P (optab_op2_mode
))
908 need_same_oprnds
= true;
909 first_op1
= gimple_assign_rhs2 (stmt
);
913 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
915 need_same_oprnds
= true;
916 first_op1
= gimple_assign_rhs2 (stmt
);
919 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
921 need_same_oprnds
= true;
922 first_op1
= gimple_call_arg (call_stmt
, 1);
927 if (first_stmt_code
!= rhs_code
928 && alt_stmt_code
== ERROR_MARK
)
929 alt_stmt_code
= rhs_code
;
930 if (first_stmt_code
!= rhs_code
931 && (first_stmt_code
!= IMAGPART_EXPR
932 || rhs_code
!= REALPART_EXPR
)
933 && (first_stmt_code
!= REALPART_EXPR
934 || rhs_code
!= IMAGPART_EXPR
)
935 /* Handle mismatches in plus/minus by computing both
936 and merging the results. */
937 && !((first_stmt_code
== PLUS_EXPR
938 || first_stmt_code
== MINUS_EXPR
)
939 && (alt_stmt_code
== PLUS_EXPR
940 || alt_stmt_code
== MINUS_EXPR
)
941 && rhs_code
== alt_stmt_code
)
942 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
943 && (first_stmt_code
== ARRAY_REF
944 || first_stmt_code
== BIT_FIELD_REF
945 || first_stmt_code
== INDIRECT_REF
946 || first_stmt_code
== COMPONENT_REF
947 || first_stmt_code
== MEM_REF
)))
949 if (dump_enabled_p ())
951 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
952 "Build SLP failed: different operation "
954 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
955 "original stmt %G", first_stmt_info
->stmt
);
961 if (need_same_oprnds
)
963 tree other_op1
= (call_stmt
964 ? gimple_call_arg (call_stmt
, 1)
965 : gimple_assign_rhs2 (stmt
));
966 if (!operand_equal_p (first_op1
, other_op1
, 0))
968 if (dump_enabled_p ())
969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
970 "Build SLP failed: different shift "
971 "arguments in %G", stmt
);
977 if (!load_p
&& rhs_code
== CALL_EXPR
)
979 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
980 as_a
<gcall
*> (stmt
)))
982 if (dump_enabled_p ())
983 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
984 "Build SLP failed: different calls in %G",
992 /* Grouped store or load. */
993 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
995 if (REFERENCE_CLASS_P (lhs
))
1003 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1004 if (prev_first_load
)
1006 /* Check that there are no loads from different interleaving
1007 chains in the same node. */
1008 if (prev_first_load
!= first_load
)
1010 if (dump_enabled_p ())
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1013 "Build SLP failed: different "
1014 "interleaving chains in one node %G",
1021 prev_first_load
= first_load
;
1023 } /* Grouped access. */
1028 /* Not grouped load. */
1029 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1031 "Build SLP failed: not grouped load %G", stmt
);
1033 /* FORNOW: Not grouped loads are not supported. */
1034 /* Fatal mismatch. */
1039 /* Not memory operation. */
1040 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1041 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1042 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1043 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1044 && rhs_code
!= VIEW_CONVERT_EXPR
1045 && rhs_code
!= CALL_EXPR
)
1047 if (dump_enabled_p ())
1048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1049 "Build SLP failed: operation unsupported %G",
1051 /* Fatal mismatch. */
1056 if (rhs_code
== COND_EXPR
)
1058 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1059 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1060 enum tree_code swap_code
= ERROR_MARK
;
1061 enum tree_code invert_code
= ERROR_MARK
;
1064 first_cond_code
= TREE_CODE (cond_expr
);
1065 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1067 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1068 swap_code
= swap_tree_comparison (cond_code
);
1069 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1072 if (first_cond_code
== cond_code
)
1074 /* Isomorphic can be achieved by swapping. */
1075 else if (first_cond_code
== swap_code
)
1077 /* Isomorphic can be achieved by inverting. */
1078 else if (first_cond_code
== invert_code
)
1082 if (dump_enabled_p ())
1083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1084 "Build SLP failed: different"
1085 " operation %G", stmt
);
1095 for (i
= 0; i
< group_size
; ++i
)
1099 /* If we allowed a two-operation SLP node verify the target can cope
1100 with the permute we are going to use. */
1101 if (alt_stmt_code
!= ERROR_MARK
1102 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1104 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1105 vectype
, alt_stmt_code
))
1107 for (i
= 0; i
< group_size
; ++i
)
1108 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1111 if (dump_enabled_p ())
1113 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1114 "Build SLP failed: different operation "
1115 "in stmt %G", stmts
[i
]->stmt
);
1116 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1117 "original stmt %G", first_stmt_info
->stmt
);
1122 *two_operators
= true;
1128 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1129 Note we never remove apart from at destruction time so we do not
1130 need a special value for deleted that differs from empty. */
1133 typedef vec
<stmt_vec_info
> value_type
;
1134 typedef vec
<stmt_vec_info
> compare_type
;
1135 static inline hashval_t
hash (value_type
);
1136 static inline bool equal (value_type existing
, value_type candidate
);
1137 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1138 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1139 static const bool empty_zero_p
= true;
1140 static inline void mark_empty (value_type
&x
) { x
.release (); }
1141 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1142 static inline void remove (value_type
&x
) { x
.release (); }
1145 bst_traits::hash (value_type x
)
1148 for (unsigned i
= 0; i
< x
.length (); ++i
)
1149 h
.add_int (gimple_uid (x
[i
]->stmt
));
1153 bst_traits::equal (value_type existing
, value_type candidate
)
1155 if (existing
.length () != candidate
.length ())
1157 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1158 if (existing
[i
] != candidate
[i
])
1163 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1164 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1165 scalar_stmts_to_slp_tree_map_t
;
1168 vect_build_slp_tree_2 (vec_info
*vinfo
,
1169 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1170 poly_uint64
*max_nunits
,
1171 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1172 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1175 vect_build_slp_tree (vec_info
*vinfo
,
1176 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1177 poly_uint64
*max_nunits
,
1178 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1179 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1181 if (slp_tree
*leader
= bst_map
->get (stmts
))
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1185 *leader
? "" : "failed ", *leader
);
1188 (*leader
)->refcnt
++;
1189 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1193 poly_uint64 this_max_nunits
= 1;
1194 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1196 matches
, npermutes
, tree_size
, bst_map
);
1199 res
->max_nunits
= this_max_nunits
;
1200 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1201 /* Keep a reference for the bst_map use. */
1204 bst_map
->put (stmts
.copy (), res
);
1208 /* Recursively build an SLP tree starting from NODE.
1209 Fail (and return a value not equal to zero) if def-stmts are not
1210 isomorphic, require data permutation or are of unsupported types of
1211 operation. Otherwise, return 0.
1212 The value returned is the depth in the SLP tree where a mismatch
1216 vect_build_slp_tree_2 (vec_info
*vinfo
,
1217 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1218 poly_uint64
*max_nunits
,
1219 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1220 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1222 unsigned nops
, i
, this_tree_size
= 0;
1223 poly_uint64 this_max_nunits
= *max_nunits
;
1228 stmt_vec_info stmt_info
= stmts
[0];
1229 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1230 nops
= gimple_call_num_args (stmt
);
1231 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1233 nops
= gimple_num_ops (stmt
) - 1;
1234 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1237 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1242 /* If the SLP node is a PHI (induction or reduction), terminate
1244 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1246 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1247 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1248 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1252 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1253 /* Induction from different IVs is not supported. */
1254 if (def_type
== vect_induction_def
)
1256 stmt_vec_info other_info
;
1257 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1258 if (stmt_info
!= other_info
)
1261 else if (def_type
== vect_reduction_def
1262 || def_type
== vect_double_reduction_def
1263 || def_type
== vect_nested_cycle
)
1265 /* Else def types have to match. */
1266 stmt_vec_info other_info
;
1267 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1268 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1274 node
= vect_create_new_slp_node (stmts
, 0);
1279 bool two_operators
= false;
1280 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1281 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1282 &this_max_nunits
, matches
, &two_operators
))
1285 /* If the SLP node is a load, terminate the recursion unless masked. */
1286 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1287 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1289 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1292 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1297 *max_nunits
= this_max_nunits
;
1299 node
= vect_create_new_slp_node (stmts
, 0);
1300 /* And compute the load permutation. Whether it is actually
1301 a permutation depends on the unrolling factor which is
1303 vec
<unsigned> load_permutation
;
1305 stmt_vec_info load_info
;
1306 load_permutation
.create (group_size
);
1307 stmt_vec_info first_stmt_info
1308 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1309 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1311 int load_place
= vect_get_place_in_interleaving_chain
1312 (load_info
, first_stmt_info
);
1313 gcc_assert (load_place
!= -1);
1314 load_permutation
.safe_push (load_place
);
1316 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1321 /* Get at the operands, verifying they are compatible. */
1322 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1323 slp_oprnd_info oprnd_info
;
1324 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1326 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1327 stmts
, i
, &oprnds_info
);
1329 matches
[(res
== -1) ? 0 : i
] = false;
1333 for (i
= 0; i
< group_size
; ++i
)
1336 vect_free_oprnd_info (oprnds_info
);
1340 auto_vec
<slp_tree
, 4> children
;
1342 stmt_info
= stmts
[0];
1344 /* Create SLP_TREE nodes for the definition node/s. */
1345 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1348 unsigned old_tree_size
= this_tree_size
;
1351 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1353 /* COND_EXPR have one too many eventually if the condition
1355 gcc_assert (i
== 3 && nops
== 4);
1359 if (oprnd_info
->first_dt
!= vect_internal_def
1360 && oprnd_info
->first_dt
!= vect_reduction_def
1361 && oprnd_info
->first_dt
!= vect_induction_def
)
1363 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1364 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1365 oprnd_info
->ops
= vNULL
;
1366 children
.safe_push (invnode
);
1370 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1371 group_size
, &this_max_nunits
,
1373 &this_tree_size
, bst_map
)) != NULL
)
1375 /* If we have all children of a non-unary child built up from
1376 scalars then just throw that away and build it up this node
1378 if (is_a
<bb_vec_info
> (vinfo
)
1379 && SLP_TREE_CHILDREN (child
).length () > 1
1380 /* ??? Rejecting patterns this way doesn't work. We'd have to
1381 do extra work to cancel the pattern so the uses see the
1383 && !oprnd_info
->any_pattern
)
1385 slp_tree grandchild
;
1387 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1388 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1391 && vect_update_all_shared_vectypes (vinfo
,
1392 oprnd_info
->def_stmts
))
1395 this_tree_size
= old_tree_size
;
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1397 vect_free_slp_tree (grandchild
, false);
1398 SLP_TREE_CHILDREN (child
).truncate (0);
1400 if (dump_enabled_p ())
1401 dump_printf_loc (MSG_NOTE
, vect_location
,
1402 "Building parent vector operands from "
1403 "scalars instead\n");
1404 oprnd_info
->def_stmts
= vNULL
;
1405 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1406 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1407 oprnd_info
->ops
= vNULL
;
1409 children
.safe_push (child
);
1414 oprnd_info
->def_stmts
= vNULL
;
1415 children
.safe_push (child
);
1419 /* If the SLP build failed fatally and we analyze a basic-block
1420 simply treat nodes we fail to build as externally defined
1421 (and thus build vectors from the scalar defs).
1422 The cost model will reject outright expensive cases.
1423 ??? This doesn't treat cases where permutation ultimatively
1424 fails (or we don't try permutation below). Ideally we'd
1425 even compute a permutation that will end up with the maximum
1427 if (is_a
<bb_vec_info
> (vinfo
)
1429 /* ??? Rejecting patterns this way doesn't work. We'd have to
1430 do extra work to cancel the pattern so the uses see the
1432 && !is_pattern_stmt_p (stmt_info
)
1433 && !oprnd_info
->any_pattern
1434 && vect_update_all_shared_vectypes (vinfo
, oprnd_info
->def_stmts
))
1436 if (dump_enabled_p ())
1437 dump_printf_loc (MSG_NOTE
, vect_location
,
1438 "Building vector operands from scalars\n");
1440 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
, 0);
1441 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1442 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1443 children
.safe_push (child
);
1444 oprnd_info
->ops
= vNULL
;
1445 oprnd_info
->def_stmts
= vNULL
;
1449 /* If the SLP build for operand zero failed and operand zero
1450 and one can be commutated try that for the scalar stmts
1451 that failed the match. */
1453 /* A first scalar stmt mismatch signals a fatal mismatch. */
1455 /* ??? For COND_EXPRs we can swap the comparison operands
1456 as well as the arms under some constraints. */
1458 && oprnds_info
[1]->first_dt
== vect_internal_def
1459 && is_gimple_assign (stmt_info
->stmt
)
1460 /* Swapping operands for reductions breaks assumptions later on. */
1461 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1462 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1463 /* Do so only if the number of not successful permutes was nor more
1464 than a cut-ff as re-trying the recursive match on
1465 possibly each level of the tree would expose exponential
1469 /* See whether we can swap the matching or the non-matching
1471 bool swap_not_matching
= true;
1474 for (j
= 0; j
< group_size
; ++j
)
1476 if (matches
[j
] != !swap_not_matching
)
1478 stmt_vec_info stmt_info
= stmts
[j
];
1479 /* Verify if we can swap operands of this stmt. */
1480 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1482 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1484 if (!swap_not_matching
)
1486 swap_not_matching
= false;
1491 while (j
!= group_size
);
1493 /* Swap mismatched definition stmts. */
1494 if (dump_enabled_p ())
1495 dump_printf_loc (MSG_NOTE
, vect_location
,
1496 "Re-trying with swapped operands of stmts ");
1497 for (j
= 0; j
< group_size
; ++j
)
1498 if (matches
[j
] == !swap_not_matching
)
1500 std::swap (oprnds_info
[0]->def_stmts
[j
],
1501 oprnds_info
[1]->def_stmts
[j
]);
1502 std::swap (oprnds_info
[0]->ops
[j
],
1503 oprnds_info
[1]->ops
[j
]);
1504 if (dump_enabled_p ())
1505 dump_printf (MSG_NOTE
, "%d ", j
);
1507 if (dump_enabled_p ())
1508 dump_printf (MSG_NOTE
, "\n");
1509 /* And try again with scratch 'matches' ... */
1510 bool *tem
= XALLOCAVEC (bool, group_size
);
1511 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1512 group_size
, &this_max_nunits
,
1514 &this_tree_size
, bst_map
)) != NULL
)
1516 /* If we have all children of a non-unary child built up from
1517 scalars then just throw that away and build it up this node
1519 if (is_a
<bb_vec_info
> (vinfo
)
1520 && SLP_TREE_CHILDREN (child
).length () > 1
1521 /* ??? Rejecting patterns this way doesn't work. We'd have
1522 to do extra work to cancel the pattern so the uses see the
1524 && !oprnd_info
->any_pattern
)
1527 slp_tree grandchild
;
1529 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1530 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1533 && (vect_update_all_shared_vectypes
1534 (vinfo
, oprnd_info
->def_stmts
)))
1537 this_tree_size
= old_tree_size
;
1538 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1539 vect_free_slp_tree (grandchild
, false);
1540 SLP_TREE_CHILDREN (child
).truncate (0);
1542 if (dump_enabled_p ())
1543 dump_printf_loc (MSG_NOTE
, vect_location
,
1544 "Building parent vector operands from "
1545 "scalars instead\n");
1546 oprnd_info
->def_stmts
= vNULL
;
1547 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1548 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1549 oprnd_info
->ops
= vNULL
;
1551 children
.safe_push (child
);
1556 oprnd_info
->def_stmts
= vNULL
;
1557 children
.safe_push (child
);
1565 gcc_assert (child
== NULL
);
1566 FOR_EACH_VEC_ELT (children
, j
, child
)
1567 vect_free_slp_tree (child
, false);
1568 vect_free_oprnd_info (oprnds_info
);
1572 vect_free_oprnd_info (oprnds_info
);
1574 *tree_size
+= this_tree_size
+ 1;
1575 *max_nunits
= this_max_nunits
;
1577 node
= vect_create_new_slp_node (stmts
, nops
);
1578 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1579 SLP_TREE_CHILDREN (node
).splice (children
);
1583 /* Dump a single SLP tree NODE. */
1586 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1591 stmt_vec_info stmt_info
;
1594 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1595 dump_user_location_t user_loc
= loc
.get_user_location ();
1596 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1597 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1599 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1602 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1603 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1604 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1605 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1608 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1610 dump_printf (metadata
, "%T%s ", op
,
1611 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1612 dump_printf (metadata
, "}\n");
1614 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1616 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1617 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1618 dump_printf (dump_kind
, " %u", j
);
1619 dump_printf (dump_kind
, " }\n");
1621 if (SLP_TREE_CHILDREN (node
).is_empty ())
1623 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1624 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1625 dump_printf (dump_kind
, " %p", (void *)child
);
1626 dump_printf (dump_kind
, "\n");
1630 debug (slp_tree node
)
1632 debug_dump_context ctx
;
1633 vect_print_slp_tree (MSG_NOTE
,
1634 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1638 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1641 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1642 slp_tree node
, hash_set
<slp_tree
> &visited
)
1647 if (visited
.add (node
))
1650 vect_print_slp_tree (dump_kind
, loc
, node
);
1652 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1653 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
1657 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1660 hash_set
<slp_tree
> visited
;
1661 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
1664 /* Mark the tree rooted at NODE with PURE_SLP. */
1667 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1670 stmt_vec_info stmt_info
;
1673 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1676 if (visited
.add (node
))
1679 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1680 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1682 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1683 vect_mark_slp_stmts (child
, visited
);
1687 vect_mark_slp_stmts (slp_tree node
)
1689 hash_set
<slp_tree
> visited
;
1690 vect_mark_slp_stmts (node
, visited
);
1693 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1696 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1699 stmt_vec_info stmt_info
;
1702 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1705 if (visited
.add (node
))
1708 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1710 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1711 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1712 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1715 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1716 vect_mark_slp_stmts_relevant (child
, visited
);
1720 vect_mark_slp_stmts_relevant (slp_tree node
)
1722 hash_set
<slp_tree
> visited
;
1723 vect_mark_slp_stmts_relevant (node
, visited
);
1726 /* Copy the SLP subtree rooted at NODE. */
1729 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1734 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1738 copy_ref
= new _slp_tree
;
1739 slp_tree copy
= copy_ref
;
1740 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
1741 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
1742 copy
->max_nunits
= node
->max_nunits
;
1744 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1746 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1747 stmt_vec_info stmt_info
;
1748 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1749 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1751 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1752 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1753 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1754 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1755 if (SLP_TREE_CHILDREN (node
).exists ())
1756 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1757 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1760 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1762 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1763 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1768 /* Rearrange the statements of NODE according to PERMUTATION. */
1771 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1772 vec
<unsigned> permutation
,
1773 hash_set
<slp_tree
> &visited
)
1778 if (visited
.add (node
))
1781 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1782 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1784 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1786 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1787 /* ??? Computation nodes are isomorphic and need no rearrangement.
1788 This is a quick hack to cover those where rearrangement breaks
1789 semantics because only the first stmt is guaranteed to have the
1790 correct operation code due to others being swapped or inverted. */
1791 stmt_vec_info first
= SLP_TREE_SCALAR_STMTS (node
)[0];
1792 if (is_gimple_assign (first
->stmt
)
1793 && gimple_assign_rhs_code (first
->stmt
) == COND_EXPR
)
1795 vec
<stmt_vec_info
> tmp_stmts
;
1796 tmp_stmts
.create (group_size
);
1797 tmp_stmts
.quick_grow (group_size
);
1798 stmt_vec_info stmt_info
;
1799 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1800 tmp_stmts
[permutation
[i
]] = stmt_info
;
1801 SLP_TREE_SCALAR_STMTS (node
).release ();
1802 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1804 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1806 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1808 tmp_ops
.create (group_size
);
1809 tmp_ops
.quick_grow (group_size
);
1811 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1812 tmp_ops
[permutation
[i
]] = op
;
1813 SLP_TREE_SCALAR_OPS (node
).release ();
1814 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1819 /* Attempt to reorder stmts in a reduction chain so that we don't
1820 require any load permutation. Return true if that was possible,
1821 otherwise return false. */
1824 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1828 slp_tree node
, load
;
1830 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1833 /* Compare all the permutation sequences to the first one. We know
1834 that at least one load is permuted. */
1835 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1836 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1838 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1839 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1841 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1842 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1844 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1845 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1849 /* Check that the loads in the first sequence are different and there
1850 are no gaps between them. */
1851 auto_sbitmap
load_index (group_size
);
1852 bitmap_clear (load_index
);
1853 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1855 if (lidx
>= group_size
)
1857 if (bitmap_bit_p (load_index
, lidx
))
1860 bitmap_set_bit (load_index
, lidx
);
1862 for (i
= 0; i
< group_size
; i
++)
1863 if (!bitmap_bit_p (load_index
, i
))
1866 /* This permutation is valid for reduction. Since the order of the
1867 statements in the nodes is not important unless they are memory
1868 accesses, we can rearrange the statements in all the nodes
1869 according to the order of the loads. */
1871 /* We have to unshare the SLP tree we modify. */
1872 hash_map
<slp_tree
, slp_tree
> map
;
1873 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1874 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1876 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1877 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1878 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1879 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1881 /* Do the actual re-arrangement. */
1882 hash_set
<slp_tree
> visited
;
1883 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1884 node
->load_permutation
, visited
);
1886 /* We are done, no actual permutations need to be generated. */
1887 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1888 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1890 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1891 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1892 /* But we have to keep those permutations that are required because
1893 of handling of gaps. */
1894 if (known_eq (unrolling_factor
, 1U)
1895 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1896 && DR_GROUP_GAP (first_stmt_info
) == 0))
1897 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1899 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1900 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1906 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1909 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1910 hash_set
<slp_tree
> &visited
)
1912 if (visited
.add (node
))
1915 if (SLP_TREE_CHILDREN (node
).length () == 0)
1917 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1919 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1920 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1921 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1922 loads
.safe_push (node
);
1928 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1929 vect_gather_slp_loads (loads
, child
, visited
);
1934 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1936 hash_set
<slp_tree
> visited
;
1937 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1941 /* Find the last store in SLP INSTANCE. */
1944 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1946 stmt_vec_info last
= NULL
;
1947 stmt_vec_info stmt_vinfo
;
1949 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1951 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1952 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1958 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1959 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1960 (also containing the first GROUP1_SIZE stmts, since stores are
1961 consecutive), the second containing the remainder.
1962 Return the first stmt in the second group. */
1964 static stmt_vec_info
1965 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1967 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1968 gcc_assert (group1_size
> 0);
1969 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1970 gcc_assert (group2_size
> 0);
1971 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1973 stmt_vec_info stmt_info
= first_vinfo
;
1974 for (unsigned i
= group1_size
; i
> 1; i
--)
1976 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1977 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1979 /* STMT is now the last element of the first group. */
1980 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1981 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1983 DR_GROUP_SIZE (group2
) = group2_size
;
1984 for (stmt_info
= group2
; stmt_info
;
1985 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1987 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1988 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1991 /* For the second group, the DR_GROUP_GAP is that before the original group,
1992 plus skipping over the first vector. */
1993 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1995 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1996 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1998 if (dump_enabled_p ())
1999 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2000 group1_size
, group2_size
);
2005 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2006 statements and a vector of NUNITS elements. */
2009 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2011 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2014 /* Analyze an SLP instance starting from a group of grouped stores. Call
2015 vect_build_slp_tree to build a tree of packed stmts if possible.
2016 Return FALSE if it's impossible to SLP any stmt in the loop. */
2019 vect_analyze_slp_instance (vec_info
*vinfo
,
2020 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2021 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2023 slp_instance new_instance
;
2025 unsigned int group_size
;
2026 tree vectype
, scalar_type
= NULL_TREE
;
2028 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2029 vec
<stmt_vec_info
> scalar_stmts
;
2030 bool constructor
= false;
2032 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2034 scalar_type
= TREE_TYPE (DR_REF (dr
));
2035 group_size
= DR_GROUP_SIZE (stmt_info
);
2036 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2038 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2040 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2041 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2042 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2044 else if (is_gimple_assign (stmt_info
->stmt
)
2045 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2047 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2048 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2053 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2054 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2055 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2060 if (dump_enabled_p ())
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2062 "Build SLP failed: unsupported data-type %T\n",
2067 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2069 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2070 scalar_stmts
.create (group_size
);
2071 stmt_vec_info next_info
= stmt_info
;
2072 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2074 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2077 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2078 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2081 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2083 /* Collect the reduction stmts and store them in
2084 SLP_TREE_SCALAR_STMTS. */
2087 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2088 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2090 /* Mark the first element of the reduction chain as reduction to properly
2091 transform the node. In the reduction analysis phase only the last
2092 element of the chain is marked as reduction. */
2093 STMT_VINFO_DEF_TYPE (stmt_info
)
2094 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2095 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2096 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2098 else if (constructor
)
2100 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2102 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2104 if (TREE_CODE (val
) == SSA_NAME
)
2106 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2107 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2108 /* Value is defined in another basic block. */
2111 def_info
= vect_stmt_to_vectorize (def_info
);
2112 scalar_stmts
.safe_push (def_info
);
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_NOTE
, vect_location
,
2119 "Analyzing vectorizable constructor: %G\n",
2124 /* Collect reduction statements. */
2125 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2126 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2127 scalar_stmts
.safe_push (next_info
);
2130 /* Build the tree for the SLP instance. */
2131 bool *matches
= XALLOCAVEC (bool, group_size
);
2132 unsigned npermutes
= 0;
2133 poly_uint64 max_nunits
= nunits
;
2134 unsigned tree_size
= 0;
2135 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2136 &max_nunits
, matches
, &npermutes
,
2137 &tree_size
, bst_map
);
2140 /* Calculate the unrolling factor based on the smallest type. */
2141 poly_uint64 unrolling_factor
2142 = calculate_unrolling_factor (max_nunits
, group_size
);
2144 if (maybe_ne (unrolling_factor
, 1U)
2145 && is_a
<bb_vec_info
> (vinfo
))
2147 unsigned HOST_WIDE_INT const_max_nunits
;
2148 if (!max_nunits
.is_constant (&const_max_nunits
)
2149 || const_max_nunits
> group_size
)
2151 if (dump_enabled_p ())
2152 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2153 "Build SLP failed: store group "
2154 "size not a multiple of the vector size "
2155 "in basic block SLP\n");
2156 vect_free_slp_tree (node
, false);
2159 /* Fatal mismatch. */
2160 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2161 vect_free_slp_tree (node
, false);
2165 /* Create a new SLP instance. */
2166 new_instance
= XNEW (class _slp_instance
);
2167 SLP_INSTANCE_TREE (new_instance
) = node
;
2168 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2169 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2170 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2172 vect_gather_slp_loads (new_instance
, node
);
2173 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_NOTE
, vect_location
,
2175 "SLP size %u vs. limit %u.\n",
2176 tree_size
, max_tree_size
);
2178 /* Check whether any load is possibly permuted. */
2180 bool loads_permuted
= false;
2181 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2183 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2186 stmt_vec_info load_info
;
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2188 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2190 loads_permuted
= true;
2195 /* If the loads and stores can be handled with load/store-lane
2196 instructions do not generate this SLP instance. */
2197 if (is_a
<loop_vec_info
> (vinfo
)
2199 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2202 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2204 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2205 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2206 /* Use SLP for strided accesses (or if we can't load-lanes). */
2207 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2208 || ! vect_load_lanes_supported
2209 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2210 DR_GROUP_SIZE (stmt_vinfo
), false))
2213 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2217 "Built SLP cancelled: can use "
2218 "load/store-lanes\n");
2219 vect_free_slp_instance (new_instance
, false);
2224 /* If this is a reduction chain with a conversion in front
2225 amend the SLP tree with a node for that. */
2227 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2228 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2230 /* Get at the conversion stmt - we know it's the single use
2231 of the last stmt of the reduction chain. */
2232 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2233 use_operand_p use_p
;
2235 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2238 next_info
= vinfo
->lookup_stmt (use_stmt
);
2239 next_info
= vect_stmt_to_vectorize (next_info
);
2240 scalar_stmts
= vNULL
;
2241 scalar_stmts
.create (group_size
);
2242 for (unsigned i
= 0; i
< group_size
; ++i
)
2243 scalar_stmts
.quick_push (next_info
);
2244 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2245 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2246 SLP_INSTANCE_TREE (new_instance
) = conv
;
2247 /* We also have to fake this conversion stmt as SLP reduction
2248 group so we don't have to mess with too much code
2250 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2251 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2254 vinfo
->slp_instances
.safe_push (new_instance
);
2256 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2257 the number of scalar stmts in the root in a few places.
2258 Verify that assumption holds. */
2259 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2260 .length () == group_size
);
2262 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_NOTE
, vect_location
,
2265 "Final SLP tree for instance:\n");
2266 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2267 SLP_INSTANCE_TREE (new_instance
));
2275 /* Failed to SLP. */
2276 /* Free the allocated memory. */
2277 scalar_stmts
.release ();
2280 /* For basic block SLP, try to break the group up into multiples of the
2282 unsigned HOST_WIDE_INT const_nunits
;
2283 if (is_a
<bb_vec_info
> (vinfo
)
2284 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2285 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2286 && nunits
.is_constant (&const_nunits
))
2288 /* We consider breaking the group only on VF boundaries from the existing
2290 for (i
= 0; i
< group_size
; i
++)
2291 if (!matches
[i
]) break;
2293 if (i
>= const_nunits
&& i
< group_size
)
2295 /* Split into two groups at the first vector boundary before i. */
2296 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2297 unsigned group1_size
= i
& ~(const_nunits
- 1);
2299 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2301 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2303 /* If the first non-match was in the middle of a vector,
2304 skip the rest of that vector. */
2305 if (group1_size
< i
)
2307 i
= group1_size
+ const_nunits
;
2309 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2312 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2313 rest
, max_tree_size
);
2316 /* Even though the first vector did not all match, we might be able to SLP
2317 (some) of the remainder. FORNOW ignore this possibility. */
2324 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2325 trees of packed scalar stmts if SLP is possible. */
2328 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2331 stmt_vec_info first_element
;
2333 DUMP_VECT_SCOPE ("vect_analyze_slp");
2335 scalar_stmts_to_slp_tree_map_t
*bst_map
2336 = new scalar_stmts_to_slp_tree_map_t ();
2338 /* Find SLP sequences starting from groups of grouped stores. */
2339 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2340 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2342 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2344 if (loop_vinfo
->reduction_chains
.length () > 0)
2346 /* Find SLP sequences starting from reduction chains. */
2347 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2348 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2351 /* Dissolve reduction chain group. */
2352 stmt_vec_info vinfo
= first_element
;
2353 stmt_vec_info last
= NULL
;
2356 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2357 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2358 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2362 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2363 /* It can be still vectorized as part of an SLP reduction. */
2364 loop_vinfo
->reductions
.safe_push (last
);
2368 /* Find SLP sequences starting from groups of reductions. */
2369 if (loop_vinfo
->reductions
.length () > 1)
2370 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2374 /* The map keeps a reference on SLP nodes built, release that. */
2375 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2376 it
!= bst_map
->end (); ++it
)
2378 vect_free_slp_tree ((*it
).second
, false);
2381 /* Optimize permutations in SLP reductions. */
2382 slp_instance instance
;
2383 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2385 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2386 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2387 /* Reduction (there are no data-refs in the root).
2388 In reduction chain the order of the loads is not important. */
2389 if (!STMT_VINFO_DATA_REF (stmt_info
)
2390 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2391 vect_attempt_slp_rearrange_stmts (instance
);
2394 /* Gather all loads in the SLP graph. */
2395 hash_set
<slp_tree
> visited
;
2396 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2397 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2400 return opt_result::success ();
2404 vect_optimize_slp (vec_info
*vinfo
)
2408 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2410 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2413 /* In basic block vectorization we allow any subchain of an interleaving
2415 FORNOW: not in loop SLP because of realignment complications. */
2416 if (is_a
<bb_vec_info
> (vinfo
))
2418 bool subchain_p
= true;
2419 stmt_vec_info next_load_info
= NULL
;
2420 stmt_vec_info load_info
;
2422 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2425 && (next_load_info
!= load_info
2426 || DR_GROUP_GAP (load_info
) != 1))
2431 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2435 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2441 stmt_vec_info load_info
;
2442 bool this_load_permuted
= false;
2444 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2445 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2447 this_load_permuted
= true;
2450 stmt_vec_info first_stmt_info
2451 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2452 if (!this_load_permuted
2453 /* The load requires permutation when unrolling exposes
2454 a gap either because the group is larger than the SLP
2455 group-size or because there is a gap between the groups. */
2456 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2457 || ((SLP_TREE_SCALAR_STMTS (node
).length ()
2458 == DR_GROUP_SIZE (first_stmt_info
))
2459 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2461 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2469 /* For each possible SLP instance decide whether to SLP it and calculate overall
2470 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2471 least one instance. */
2474 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2477 poly_uint64 unrolling_factor
= 1;
2478 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2479 slp_instance instance
;
2480 int decided_to_slp
= 0;
2482 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2484 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2486 /* FORNOW: SLP if you can. */
2487 /* All unroll factors have the form:
2489 GET_MODE_SIZE (vinfo->vector_mode) * X
2491 for some rational X, so they must have a common multiple. */
2493 = force_common_multiple (unrolling_factor
,
2494 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2496 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2497 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2498 loop-based vectorization. Such stmts will be marked as HYBRID. */
2499 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2503 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2505 if (decided_to_slp
&& dump_enabled_p ())
2507 dump_printf_loc (MSG_NOTE
, vect_location
,
2508 "Decided to SLP %d instances. Unrolling factor ",
2510 dump_dec (MSG_NOTE
, unrolling_factor
);
2511 dump_printf (MSG_NOTE
, "\n");
2514 return (decided_to_slp
> 0);
2517 /* Private data for vect_detect_hybrid_slp. */
2520 loop_vec_info loop_vinfo
;
2521 vec
<stmt_vec_info
> *worklist
;
2524 /* Walker for walk_gimple_op. */
2527 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2529 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2530 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2535 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2538 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2539 if (PURE_SLP_STMT (def_stmt_info
))
2541 if (dump_enabled_p ())
2542 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2543 def_stmt_info
->stmt
);
2544 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2545 dat
->worklist
->safe_push (def_stmt_info
);
2551 /* Find stmts that must be both vectorized and SLPed. */
2554 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2556 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2558 /* All stmts participating in SLP are marked pure_slp, all other
2559 stmts are loop_vect.
2560 First collect all loop_vect stmts into a worklist. */
2561 auto_vec
<stmt_vec_info
> worklist
;
2562 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2564 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2565 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2568 gphi
*phi
= gsi
.phi ();
2569 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2570 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2571 worklist
.safe_push (stmt_info
);
2573 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2576 gimple
*stmt
= gsi_stmt (gsi
);
2577 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2578 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2580 for (gimple_stmt_iterator gsi2
2581 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2582 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2584 stmt_vec_info patt_info
2585 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2586 if (!STMT_SLP_TYPE (patt_info
)
2587 && STMT_VINFO_RELEVANT (patt_info
))
2588 worklist
.safe_push (patt_info
);
2590 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2592 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2593 worklist
.safe_push (stmt_info
);
2597 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2598 mark any SLP vectorized stmt as hybrid.
2599 ??? We're visiting def stmts N times (once for each non-SLP and
2600 once for each hybrid-SLP use). */
2603 dat
.worklist
= &worklist
;
2604 dat
.loop_vinfo
= loop_vinfo
;
2605 memset (&wi
, 0, sizeof (wi
));
2606 wi
.info
= (void *)&dat
;
2607 while (!worklist
.is_empty ())
2609 stmt_vec_info stmt_info
= worklist
.pop ();
2610 /* Since SSA operands are not set up for pattern stmts we need
2611 to use walk_gimple_op. */
2613 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2618 /* Initialize a bb_vec_info struct for the statements between
2619 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2621 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2622 gimple_stmt_iterator region_end_in
,
2623 vec_info_shared
*shared
)
2624 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2625 bb (gsi_bb (region_begin_in
)),
2626 region_begin (region_begin_in
),
2627 region_end (region_end_in
)
2629 gimple_stmt_iterator gsi
;
2631 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2634 gimple
*stmt
= gsi_stmt (gsi
);
2635 gimple_set_uid (stmt
, 0);
2643 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2644 stmts in the basic block. */
2646 _bb_vec_info::~_bb_vec_info ()
2648 for (gimple_stmt_iterator si
= region_begin
;
2649 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2650 /* Reset region marker. */
2651 gimple_set_uid (gsi_stmt (si
), -1);
2656 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2657 given then that child nodes have already been processed, and that
2658 their def types currently match their SLP node's def type. */
2661 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2662 slp_instance node_instance
,
2663 stmt_vector_for_cost
*cost_vec
)
2665 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2666 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2668 /* Calculate the number of vector statements to be created for the
2669 scalar stmts in this node. For SLP reductions it is equal to the
2670 number of vector statements in the children (which has already been
2671 calculated by the recursive call). Otherwise it is the number of
2672 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2673 VF divided by the number of elements in a vector. */
2674 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2675 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2677 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2678 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2680 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2681 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2688 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2689 vf
= loop_vinfo
->vectorization_factor
;
2692 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2693 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2694 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2695 = vect_get_num_vectors (vf
* group_size
, vectype
);
2699 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2700 node
, node_instance
, cost_vec
);
2703 /* Try to build NODE from scalars, returning true on success.
2704 NODE_INSTANCE is the SLP instance that contains NODE. */
2707 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2708 slp_instance node_instance
)
2710 stmt_vec_info stmt_info
;
2713 if (!is_a
<bb_vec_info
> (vinfo
)
2714 || node
== SLP_INSTANCE_TREE (node_instance
)
2715 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2718 if (dump_enabled_p ())
2719 dump_printf_loc (MSG_NOTE
, vect_location
,
2720 "Building vector operands from scalars instead\n");
2722 /* Don't remove and free the child nodes here, since they could be
2723 referenced by other structures. The analysis and scheduling phases
2724 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2725 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2726 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2727 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2728 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2730 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2731 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2736 /* Compute the prologue cost for invariant or constant operands represented
2740 vect_prologue_cost_for_slp (vec_info
*vinfo
,
2742 stmt_vector_for_cost
*cost_vec
)
2744 /* Without looking at the actual initializer a vector of
2745 constants can be implemented as load from the constant pool.
2746 When all elements are the same we can use a splat. */
2747 tree vectype
= SLP_TREE_VECTYPE (node
);
2748 /* ??? Ideally we'd want all invariant nodes to have a vectype. */
2750 vectype
= get_vectype_for_scalar_type (vinfo
,
2751 TREE_TYPE (SLP_TREE_SCALAR_OPS
2753 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
2754 unsigned num_vects_to_check
;
2755 unsigned HOST_WIDE_INT const_nunits
;
2756 unsigned nelt_limit
;
2757 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
2758 && ! multiple_p (const_nunits
, group_size
))
2760 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
2761 nelt_limit
= const_nunits
;
2765 /* If either the vector has variable length or the vectors
2766 are composed of repeated whole groups we only need to
2767 cost construction once. All vectors will be the same. */
2768 num_vects_to_check
= 1;
2769 nelt_limit
= group_size
;
2771 tree elt
= NULL_TREE
;
2773 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
2775 unsigned si
= j
% group_size
;
2777 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
2778 /* ??? We're just tracking whether all operands of a single
2779 vector initializer are the same, ideally we'd check if
2780 we emitted the same one already. */
2781 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
2784 if (nelt
== nelt_limit
)
2786 record_stmt_cost (cost_vec
, 1,
2787 SLP_TREE_DEF_TYPE (node
) == vect_external_def
2788 ? (elt
? scalar_to_vec
: vec_construct
)
2790 NULL
, vectype
, 0, vect_prologue
);
2796 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2797 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2799 Return true if the operations are supported. */
2802 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2803 slp_instance node_instance
,
2804 hash_set
<slp_tree
> &visited
,
2805 hash_set
<slp_tree
> &lvisited
,
2806 stmt_vector_for_cost
*cost_vec
)
2811 /* Assume we can code-generate all invariants. */
2812 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2815 /* If we already analyzed the exact same set of scalar stmts we're done.
2816 We share the generated vector stmts for those.
2817 The SLP graph is acyclic so not caching whether we failed or succeeded
2818 doesn't result in any issue since we throw away the lvisited set
2820 if (visited
.contains (node
)
2821 || lvisited
.add (node
))
2824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2825 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2826 visited
, lvisited
, cost_vec
))
2829 /* ??? We have to catch the case late where two first scalar stmts appear
2830 in multiple SLP children with different def type and fail. Remember
2831 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2832 match it when that is vect_internal_def. */
2833 auto_vec
<vect_def_type
, 4> dt
;
2834 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2835 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2836 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2837 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2839 /* Push SLP node def-type to stmt operands. */
2840 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2841 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2842 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2843 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2844 = SLP_TREE_DEF_TYPE (child
);
2846 /* Check everything worked out. */
2848 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2849 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2851 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2853 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2854 != SLP_TREE_DEF_TYPE (child
))
2857 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2861 if (!res
&& dump_enabled_p ())
2862 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2863 "not vectorized: same operand with different "
2864 "def type in stmt.\n");
2867 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2870 /* Restore def-types. */
2871 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2872 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2873 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2875 /* When the node can be vectorized cost invariant nodes it references.
2876 This is not done in DFS order to allow the refering node
2877 vectorizable_* calls to nail down the invariant nodes vector type
2878 and possibly unshare it if it needs a different vector type than
2881 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2882 if ((SLP_TREE_DEF_TYPE (child
) == vect_constant_def
2883 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
2884 /* Perform usual caching, note code-generation still
2885 code-gens these nodes multiple times but we expect
2886 to CSE them later. */
2887 && !visited
.contains (child
)
2888 && !lvisited
.add (child
))
2890 /* ??? After auditing more code paths make a "default"
2891 and push the vector type from NODE to all children
2892 if it is not already set. */
2893 /* Compute the number of vectors to be generated. */
2894 tree vector_type
= SLP_TREE_VECTYPE (child
);
2897 /* For shifts with a scalar argument we don't need
2898 to cost or code-generate anything.
2899 ??? Represent this more explicitely. */
2900 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_SCALAR_STMTS (node
)[0])
2901 == shift_vec_info_type
)
2905 unsigned group_size
= SLP_TREE_SCALAR_OPS (child
).length ();
2907 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2908 vf
= loop_vinfo
->vectorization_factor
;
2909 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
2910 = vect_get_num_vectors (vf
* group_size
, vector_type
);
2911 /* And cost them. */
2912 vect_prologue_cost_for_slp (vinfo
, child
, cost_vec
);
2915 /* If this node can't be vectorized, try pruning the tree here rather
2916 than felling the whole thing. */
2917 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2919 /* We'll need to revisit this for invariant costing and number
2920 of vectorized stmt setting. */
2921 lvisited
.remove (node
);
2929 /* Analyze statements in SLP instances of VINFO. Return true if the
2930 operations are supported. */
2933 vect_slp_analyze_operations (vec_info
*vinfo
)
2935 slp_instance instance
;
2938 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2940 hash_set
<slp_tree
> visited
;
2941 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2943 hash_set
<slp_tree
> lvisited
;
2944 stmt_vector_for_cost cost_vec
;
2945 cost_vec
.create (2);
2946 if (!vect_slp_analyze_node_operations (vinfo
,
2947 SLP_INSTANCE_TREE (instance
),
2948 instance
, visited
, lvisited
,
2950 /* Instances with a root stmt require vectorized defs for the
2952 || (SLP_INSTANCE_ROOT_STMT (instance
)
2953 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2954 != vect_internal_def
)))
2956 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2957 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2958 if (dump_enabled_p ())
2959 dump_printf_loc (MSG_NOTE
, vect_location
,
2960 "removing SLP instance operations starting from: %G",
2962 vect_free_slp_instance (instance
, false);
2963 vinfo
->slp_instances
.ordered_remove (i
);
2964 cost_vec
.release ();
2968 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
2969 x
!= lvisited
.end(); ++x
)
2973 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
2974 cost_vec
.release ();
2978 return !vinfo
->slp_instances
.is_empty ();
2982 /* Compute the scalar cost of the SLP node NODE and its children
2983 and return it. Do not account defs that are marked in LIFE and
2984 update LIFE according to uses of NODE. */
2987 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
2988 slp_tree node
, vec
<bool, va_heap
> *life
,
2989 stmt_vector_for_cost
*cost_vec
,
2990 hash_set
<slp_tree
> &visited
)
2993 stmt_vec_info stmt_info
;
2996 if (visited
.add (node
))
2999 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3001 gimple
*stmt
= stmt_info
->stmt
;
3002 ssa_op_iter op_iter
;
3003 def_operand_p def_p
;
3008 /* If there is a non-vectorized use of the defs then the scalar
3009 stmt is kept live in which case we do not account it or any
3010 required defs in the SLP children in the scalar cost. This
3011 way we make the vectorization more costly when compared to
3013 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3015 imm_use_iterator use_iter
;
3017 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3018 if (!is_gimple_debug (use_stmt
))
3020 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3021 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
3024 BREAK_FROM_IMM_USE_STMT (use_iter
);
3031 /* Count scalar stmts only once. */
3032 if (gimple_visited_p (stmt
))
3034 gimple_set_visited (stmt
, true);
3036 vect_cost_for_stmt kind
;
3037 if (STMT_VINFO_DATA_REF (stmt_info
))
3039 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
3042 kind
= scalar_store
;
3044 else if (vect_nop_conversion_p (stmt_info
))
3048 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
3051 auto_vec
<bool, 20> subtree_life
;
3052 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3054 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3056 /* Do not directly pass LIFE to the recursive call, copy it to
3057 confine changes in the callee to the current child/subtree. */
3058 subtree_life
.safe_splice (*life
);
3059 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
3061 subtree_life
.truncate (0);
3066 /* Check if vectorization of the basic block is profitable. */
3069 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3071 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3072 slp_instance instance
;
3074 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3075 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3077 /* Calculate scalar cost. */
3078 stmt_vector_for_cost scalar_costs
;
3079 scalar_costs
.create (0);
3080 hash_set
<slp_tree
> visited
;
3081 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3083 auto_vec
<bool, 20> life
;
3084 life
.safe_grow_cleared
3085 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance
)).length ());
3086 vect_bb_slp_scalar_cost (bb_vinfo
,
3087 SLP_INSTANCE_TREE (instance
),
3088 &life
, &scalar_costs
, visited
);
3090 void *target_cost_data
= init_cost (NULL
);
3091 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
3092 scalar_costs
.release ();
3094 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3095 destroy_cost_data (target_cost_data
);
3097 /* Unset visited flag. */
3098 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
3099 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3100 gimple_set_visited (gsi_stmt (gsi
), false);
3102 /* Complete the target-specific cost calculation. */
3103 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3104 &vec_inside_cost
, &vec_epilogue_cost
);
3106 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3108 if (dump_enabled_p ())
3110 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3111 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3113 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3114 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3115 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3118 /* Vectorization is profitable if its cost is more than the cost of scalar
3119 version. Note that we err on the vector side for equal cost because
3120 the cost estimate is otherwise quite pessimistic (constant uses are
3121 free on the scalar side but cost a load on the vector side for
3123 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3129 /* Find any vectorizable constructors and add them to the grouped_store
3133 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3135 gimple_stmt_iterator gsi
;
3137 for (gsi
= bb_vinfo
->region_begin
;
3138 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3140 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3141 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3144 tree rhs
= gimple_assign_rhs1 (stmt
);
3145 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3146 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3147 CONSTRUCTOR_NELTS (rhs
))
3148 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3149 || uniform_vector_p (rhs
))
3152 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3153 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3157 /* Check if the region described by BB_VINFO can be vectorized, returning
3158 true if so. When returning false, set FATAL to true if the same failure
3159 would prevent vectorization at other vector sizes, false if it is still
3160 worth trying other sizes. N_STMTS is the number of statements in the
3164 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3166 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3168 slp_instance instance
;
3170 poly_uint64 min_vf
= 2;
3172 /* The first group of checks is independent of the vector size. */
3175 /* Analyze the data references. */
3177 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3179 if (dump_enabled_p ())
3180 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3181 "not vectorized: unhandled data-ref in basic "
3186 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3188 if (dump_enabled_p ())
3189 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3190 "not vectorized: not enough data-refs in "
3195 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3197 if (dump_enabled_p ())
3198 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3199 "not vectorized: unhandled data access in "
3204 vect_slp_check_for_constructors (bb_vinfo
);
3206 /* If there are no grouped stores in the region there is no need
3207 to continue with pattern recog as vect_analyze_slp will fail
3209 if (bb_vinfo
->grouped_stores
.is_empty ())
3211 if (dump_enabled_p ())
3212 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3213 "not vectorized: no grouped stores in "
3218 /* While the rest of the analysis below depends on it in some way. */
3221 vect_pattern_recog (bb_vinfo
);
3223 /* Check the SLP opportunities in the basic block, analyze and build SLP
3225 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3227 if (dump_enabled_p ())
3229 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3230 "Failed to SLP the basic block.\n");
3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3232 "not vectorized: failed to find SLP opportunities "
3233 "in basic block.\n");
3238 /* Optimize permutations. */
3239 vect_optimize_slp (bb_vinfo
);
3241 vect_record_base_alignments (bb_vinfo
);
3243 /* Analyze and verify the alignment of data references and the
3244 dependence in the SLP instances. */
3245 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3247 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo
, instance
)
3248 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3250 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3251 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3252 if (dump_enabled_p ())
3253 dump_printf_loc (MSG_NOTE
, vect_location
,
3254 "removing SLP instance operations starting from: %G",
3256 vect_free_slp_instance (instance
, false);
3257 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3261 /* Mark all the statements that we want to vectorize as pure SLP and
3263 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3264 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3265 if (SLP_INSTANCE_ROOT_STMT (instance
))
3266 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3270 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3273 if (!vect_slp_analyze_operations (bb_vinfo
))
3275 if (dump_enabled_p ())
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3277 "not vectorized: bad operation in basic block.\n");
3281 /* Cost model: check if the vectorization is worthwhile. */
3282 if (!unlimited_cost_model (NULL
)
3283 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3285 if (dump_enabled_p ())
3286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3287 "not vectorized: vectorization is not "
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_NOTE
, vect_location
,
3294 "Basic block will be vectorized using SLP\n");
3298 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3299 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3300 on success. The region has N_STMTS statements and has the datarefs
3301 given by DATAREFS. */
3304 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3305 gimple_stmt_iterator region_end
,
3306 vec
<data_reference_p
> datarefs
,
3307 unsigned int n_stmts
)
3309 bb_vec_info bb_vinfo
;
3310 auto_vector_modes vector_modes
;
3312 /* Autodetect first vector size we try. */
3313 machine_mode next_vector_mode
= VOIDmode
;
3314 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3315 unsigned int mode_i
= 0;
3317 vec_info_shared shared
;
3319 machine_mode autodetected_vector_mode
= VOIDmode
;
3322 bool vectorized
= false;
3324 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3326 bool first_time_p
= shared
.datarefs
.is_empty ();
3327 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3329 bb_vinfo
->shared
->save_datarefs ();
3331 bb_vinfo
->shared
->check_datarefs ();
3332 bb_vinfo
->vector_mode
= next_vector_mode
;
3334 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3335 && dbg_cnt (vect_slp
))
3337 if (dump_enabled_p ())
3339 dump_printf_loc (MSG_NOTE
, vect_location
,
3340 "***** Analysis succeeded with vector mode"
3341 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3342 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3345 bb_vinfo
->shared
->check_datarefs ();
3346 vect_schedule_slp (bb_vinfo
);
3348 unsigned HOST_WIDE_INT bytes
;
3349 if (dump_enabled_p ())
3351 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3352 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3353 "basic block part vectorized using %wu byte "
3354 "vectors\n", bytes
);
3356 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3357 "basic block part vectorized using variable "
3358 "length vectors\n");
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_NOTE
, vect_location
,
3367 "***** Analysis failed with vector mode %s\n",
3368 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3372 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3375 while (mode_i
< vector_modes
.length ()
3376 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3378 if (dump_enabled_p ())
3379 dump_printf_loc (MSG_NOTE
, vect_location
,
3380 "***** The result for vector mode %s would"
3382 GET_MODE_NAME (vector_modes
[mode_i
]));
3388 if (mode_i
< vector_modes
.length ()
3389 && VECTOR_MODE_P (autodetected_vector_mode
)
3390 && (related_vector_mode (vector_modes
[mode_i
],
3391 GET_MODE_INNER (autodetected_vector_mode
))
3392 == autodetected_vector_mode
)
3393 && (related_vector_mode (autodetected_vector_mode
,
3394 GET_MODE_INNER (vector_modes
[mode_i
]))
3395 == vector_modes
[mode_i
]))
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_NOTE
, vect_location
,
3399 "***** Skipping vector mode %s, which would"
3400 " repeat the analysis for %s\n",
3401 GET_MODE_NAME (vector_modes
[mode_i
]),
3402 GET_MODE_NAME (autodetected_vector_mode
));
3407 || mode_i
== vector_modes
.length ()
3408 || autodetected_vector_mode
== VOIDmode
3409 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3410 vector sizes will fail do not bother iterating. */
3414 /* Try the next biggest vector size. */
3415 next_vector_mode
= vector_modes
[mode_i
++];
3416 if (dump_enabled_p ())
3417 dump_printf_loc (MSG_NOTE
, vect_location
,
3418 "***** Re-trying analysis with vector mode %s\n",
3419 GET_MODE_NAME (next_vector_mode
));
3423 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3424 true if anything in the basic-block was vectorized. */
3427 vect_slp_bb (basic_block bb
)
3429 gimple_stmt_iterator gsi
;
3430 bool any_vectorized
= false;
3432 gsi
= gsi_after_labels (bb
);
3433 while (!gsi_end_p (gsi
))
3435 gimple_stmt_iterator region_begin
= gsi
;
3436 vec
<data_reference_p
> datarefs
= vNULL
;
3439 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3441 gimple
*stmt
= gsi_stmt (gsi
);
3442 if (is_gimple_debug (stmt
))
3446 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3447 vect_location
= stmt
;
3449 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3453 /* Skip leading unhandled stmts. */
3454 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3460 gimple_stmt_iterator region_end
= gsi
;
3462 if (insns
> param_slp_max_insns_in_bb
)
3464 if (dump_enabled_p ())
3465 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3466 "not vectorized: too many instructions in "
3469 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3470 any_vectorized
= true;
3472 if (gsi_end_p (region_end
))
3475 /* Skip the unhandled stmt. */
3479 return any_vectorized
;
3483 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3486 vect_mask_constant_operand_p (vec_info
*vinfo
,
3487 stmt_vec_info stmt_vinfo
, unsigned op_num
)
3489 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3491 enum vect_def_type dt
;
3493 /* For comparison and COND_EXPR type is chosen depending
3494 on the non-constant other comparison operand. */
3495 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3497 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3498 op
= gimple_assign_rhs1 (stmt
);
3500 if (!vect_is_simple_use (op
, vinfo
, &dt
, &vectype
))
3503 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3506 if (code
== COND_EXPR
)
3508 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3509 tree cond
= gimple_assign_rhs1 (stmt
);
3511 if (TREE_CODE (cond
) == SSA_NAME
)
3514 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3520 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3521 op
= TREE_OPERAND (cond
, 0);
3524 if (!vect_is_simple_use (op
, vinfo
, &dt
, &vectype
))
3527 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3530 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3533 /* Build a variable-length vector in which the elements in ELTS are repeated
3534 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3535 RESULTS and add any new instructions to SEQ.
3537 The approach we use is:
3539 (1) Find a vector mode VM with integer elements of mode IM.
3541 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3542 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3543 from small vectors to IM.
3545 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3547 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3548 correct byte contents.
3550 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3552 We try to find the largest IM for which this sequence works, in order
3553 to cut down on the number of interleaves. */
3556 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3557 vec
<tree
> elts
, unsigned int nresults
,
3560 unsigned int nelts
= elts
.length ();
3561 tree element_type
= TREE_TYPE (vector_type
);
3563 /* (1) Find a vector mode VM with integer elements of mode IM. */
3564 unsigned int nvectors
= 1;
3565 tree new_vector_type
;
3567 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3568 &nvectors
, &new_vector_type
,
3572 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3573 unsigned int partial_nelts
= nelts
/ nvectors
;
3574 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3576 tree_vector_builder partial_elts
;
3577 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3578 pieces
.quick_grow (nvectors
* 2);
3579 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3581 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3582 ELTS' has mode IM. */
3583 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3584 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3585 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3586 tree t
= gimple_build_vector (seq
, &partial_elts
);
3587 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3588 TREE_TYPE (new_vector_type
), t
);
3590 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3591 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3594 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3595 correct byte contents.
3597 We need to repeat the following operation log2(nvectors) times:
3599 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3600 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3602 However, if each input repeats every N elements and the VF is
3603 a multiple of N * 2, the HI result is the same as the LO. */
3604 unsigned int in_start
= 0;
3605 unsigned int out_start
= nvectors
;
3606 unsigned int hi_start
= nvectors
/ 2;
3607 /* A bound on the number of outputs needed to produce NRESULTS results
3608 in the final iteration. */
3609 unsigned int noutputs_bound
= nvectors
* nresults
;
3610 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3612 noutputs_bound
/= 2;
3613 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3614 for (unsigned int i
= 0; i
< limit
; ++i
)
3617 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3620 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3624 tree output
= make_ssa_name (new_vector_type
);
3625 tree input1
= pieces
[in_start
+ (i
/ 2)];
3626 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3627 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3630 gimple_seq_add_stmt (seq
, stmt
);
3631 pieces
[out_start
+ i
] = output
;
3633 std::swap (in_start
, out_start
);
3636 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3637 results
.reserve (nresults
);
3638 for (unsigned int i
= 0; i
< nresults
; ++i
)
3640 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3641 pieces
[in_start
+ i
]));
3643 results
.quick_push (results
[i
- nvectors
]);
3647 /* For constant and loop invariant defs of SLP_NODE this function returns
3648 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3649 OP_NODE determines the node for the operand containing the scalar
3653 vect_get_constant_vectors (vec_info
*vinfo
,
3654 slp_tree slp_node
, unsigned op_num
,
3655 vec
<tree
> *vec_oprnds
)
3657 slp_tree op_node
= SLP_TREE_CHILDREN (slp_node
)[op_num
];
3658 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3659 unsigned HOST_WIDE_INT nunits
;
3661 unsigned j
, number_of_places_left_in_vector
;
3664 int group_size
= op_node
->ops
.length ();
3665 unsigned int vec_num
, i
;
3666 unsigned number_of_copies
= 1;
3668 tree neutral_op
= NULL
;
3669 gimple_seq ctor_seq
= NULL
;
3670 auto_vec
<tree
, 16> permute_results
;
3672 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
3673 vector_type
= SLP_TREE_VECTYPE (op_node
);
3675 tree op
= op_node
->ops
[0];
3676 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3677 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3678 && vect_mask_constant_operand_p (vinfo
, stmt_vinfo
, op_num
))
3679 gcc_assert (vector_type
3680 && types_compatible_p (vector_type
,
3681 truth_type_for (stmt_vectype
)));
3683 gcc_assert (vector_type
3684 && types_compatible_p (vector_type
,
3685 get_vectype_for_scalar_type
3686 (vinfo
, TREE_TYPE (op
), op_node
)));
3689 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
3690 vec_oprnds
->create (number_of_vectors
);
3691 auto_vec
<tree
> voprnds (number_of_vectors
);
3693 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3694 created vectors. It is greater than 1 if unrolling is performed.
3696 For example, we have two scalar operands, s1 and s2 (e.g., group of
3697 strided accesses of size two), while NUNITS is four (i.e., four scalars
3698 of this type can be packed in a vector). The output vector will contain
3699 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3702 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3703 containing the operands.
3705 For example, NUNITS is four as before, and the group size is 8
3706 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3707 {s5, s6, s7, s8}. */
3709 /* When using duplicate_and_interleave, we just need one element for
3710 each scalar statement. */
3711 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3712 nunits
= group_size
;
3714 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3716 number_of_places_left_in_vector
= nunits
;
3718 tree_vector_builder
elts (vector_type
, nunits
, 1);
3719 elts
.quick_grow (nunits
);
3720 stmt_vec_info insert_after
= NULL
;
3721 for (j
= 0; j
< number_of_copies
; j
++)
3724 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3726 /* Create 'vect_ = {op0,op1,...,opn}'. */
3727 number_of_places_left_in_vector
--;
3729 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3731 if (CONSTANT_CLASS_P (op
))
3733 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3735 /* Can't use VIEW_CONVERT_EXPR for booleans because
3736 of possibly different sizes of scalar value and
3738 if (integer_zerop (op
))
3739 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3740 else if (integer_onep (op
))
3741 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3746 op
= fold_unary (VIEW_CONVERT_EXPR
,
3747 TREE_TYPE (vector_type
), op
);
3748 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3752 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3754 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3757 = build_all_ones_cst (TREE_TYPE (vector_type
));
3759 = build_zero_cst (TREE_TYPE (vector_type
));
3760 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3761 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3767 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3770 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3773 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3777 elts
[number_of_places_left_in_vector
] = op
;
3778 if (!CONSTANT_CLASS_P (op
))
3780 /* For BB vectorization we have to compute an insert location
3781 when a def is inside the analyzed region since we cannot
3782 simply insert at the BB start in this case. */
3783 stmt_vec_info opdef
;
3784 if (TREE_CODE (orig_op
) == SSA_NAME
3785 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3786 && is_a
<bb_vec_info
> (vinfo
)
3787 && (opdef
= vinfo
->lookup_def (orig_op
)))
3790 insert_after
= opdef
;
3792 insert_after
= get_later_stmt (insert_after
, opdef
);
3795 if (number_of_places_left_in_vector
== 0)
3798 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3799 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3800 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3803 if (permute_results
.is_empty ())
3804 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3805 elts
, number_of_vectors
,
3807 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3812 gimple_stmt_iterator gsi
= gsi_for_stmt (insert_after
->stmt
);
3813 /* vect_init_vector inserts before. */
3815 init
= vect_init_vector (vinfo
, stmt_vinfo
, vec_cst
,
3819 init
= vect_init_vector (vinfo
, stmt_vinfo
, vec_cst
,
3821 if (ctor_seq
!= NULL
)
3823 gimple_stmt_iterator gsi
3824 = gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3825 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3828 voprnds
.quick_push (init
);
3829 insert_after
= NULL
;
3830 number_of_places_left_in_vector
= nunits
;
3832 elts
.new_vector (vector_type
, nunits
, 1);
3833 elts
.quick_grow (nunits
);
3838 /* Since the vectors are created in the reverse order, we should invert
3840 vec_num
= voprnds
.length ();
3841 for (j
= vec_num
; j
!= 0; j
--)
3843 vop
= voprnds
[j
- 1];
3844 vec_oprnds
->quick_push (vop
);
3847 /* In case that VF is greater than the unrolling factor needed for the SLP
3848 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3849 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3850 to replicate the vectors. */
3851 while (number_of_vectors
> vec_oprnds
->length ())
3853 tree neutral_vec
= NULL
;
3858 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3860 vec_oprnds
->quick_push (neutral_vec
);
3864 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3865 vec_oprnds
->quick_push (vop
);
3871 /* Get vectorized definitions from SLP_NODE that contains corresponding
3872 vectorized def-stmts. */
3875 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3877 stmt_vec_info vec_def_stmt_info
;
3880 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3882 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3883 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3887 /* Get N vectorized definitions for SLP_NODE.
3888 If the scalar definitions are loop invariants or constants, collect them and
3889 call vect_get_constant_vectors() to create vector stmts.
3890 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3891 must be stored in the corresponding child of SLP_NODE, and we call
3892 vect_get_slp_vect_defs () to retrieve them. */
3895 vect_get_slp_defs (vec_info
*vinfo
,
3896 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3899 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3901 for (unsigned i
= 0; i
< n
; ++i
)
3903 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3905 vec
<tree
> vec_defs
= vNULL
;
3907 /* For each operand we check if it has vectorized definitions in a child
3908 node or we need to create them (for invariants and constants). */
3909 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3911 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3912 vect_get_slp_vect_defs (child
, &vec_defs
);
3915 vect_get_constant_vectors (vinfo
, slp_node
, i
, &vec_defs
);
3917 vec_oprnds
->quick_push (vec_defs
);
3921 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3922 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3923 permute statements for the SLP node NODE. */
3926 vect_transform_slp_perm_load (vec_info
*vinfo
,
3927 slp_tree node
, vec
<tree
> dr_chain
,
3928 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3929 bool analyze_only
, unsigned *n_perms
)
3931 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3933 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3934 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3935 unsigned int mask_element
;
3938 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3941 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3943 mode
= TYPE_MODE (vectype
);
3944 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3946 /* Initialize the vect stmts of NODE to properly insert the generated
3949 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3950 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3951 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3953 /* Generate permutation masks for every NODE. Number of masks for each NODE
3954 is equal to GROUP_SIZE.
3955 E.g., we have a group of three nodes with three loads from the same
3956 location in each node, and the vector size is 4. I.e., we have a
3957 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3958 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3959 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3962 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3963 The last mask is illegal since we assume two operands for permute
3964 operation, and the mask element values can't be outside that range.
3965 Hence, the last mask must be converted into {2,5,5,5}.
3966 For the first two permutations we need the first and the second input
3967 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3968 we need the second and the third vectors: {b1,c1,a2,b2} and
3971 int vect_stmts_counter
= 0;
3972 unsigned int index
= 0;
3973 int first_vec_index
= -1;
3974 int second_vec_index
= -1;
3978 vec_perm_builder mask
;
3979 unsigned int nelts_to_build
;
3980 unsigned int nvectors_per_build
;
3981 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3982 && multiple_p (nunits
, group_size
));
3985 /* A single vector contains a whole number of copies of the node, so:
3986 (a) all permutes can use the same mask; and
3987 (b) the permutes only need a single vector input. */
3988 mask
.new_vector (nunits
, group_size
, 3);
3989 nelts_to_build
= mask
.encoded_nelts ();
3990 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3994 /* We need to construct a separate mask for each vector statement. */
3995 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3996 if (!nunits
.is_constant (&const_nunits
)
3997 || !vf
.is_constant (&const_vf
))
3999 mask
.new_vector (const_nunits
, const_nunits
, 1);
4000 nelts_to_build
= const_vf
* group_size
;
4001 nvectors_per_build
= 1;
4004 unsigned int count
= mask
.encoded_nelts ();
4005 mask
.quick_grow (count
);
4006 vec_perm_indices indices
;
4008 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
4010 unsigned int iter_num
= j
/ group_size
;
4011 unsigned int stmt_num
= j
% group_size
;
4012 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
4013 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
4016 first_vec_index
= 0;
4021 /* Enforced before the loop when !repeating_p. */
4022 unsigned int const_nunits
= nunits
.to_constant ();
4023 vec_index
= i
/ const_nunits
;
4024 mask_element
= i
% const_nunits
;
4025 if (vec_index
== first_vec_index
4026 || first_vec_index
== -1)
4028 first_vec_index
= vec_index
;
4030 else if (vec_index
== second_vec_index
4031 || second_vec_index
== -1)
4033 second_vec_index
= vec_index
;
4034 mask_element
+= const_nunits
;
4038 if (dump_enabled_p ())
4039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4040 "permutation requires at "
4041 "least three vectors %G",
4043 gcc_assert (analyze_only
);
4047 gcc_assert (mask_element
< 2 * const_nunits
);
4050 if (mask_element
!= index
)
4052 mask
[index
++] = mask_element
;
4054 if (index
== count
&& !noop_p
)
4056 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4057 if (!can_vec_perm_const_p (mode
, indices
))
4059 if (dump_enabled_p ())
4061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4063 "unsupported vect permute { ");
4064 for (i
= 0; i
< count
; ++i
)
4066 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4067 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4069 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4071 gcc_assert (analyze_only
);
4082 tree mask_vec
= NULL_TREE
;
4085 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4087 if (second_vec_index
== -1)
4088 second_vec_index
= first_vec_index
;
4090 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4092 /* Generate the permute statement if necessary. */
4093 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4094 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4095 stmt_vec_info perm_stmt_info
;
4098 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4100 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4102 perm_dest
= make_ssa_name (perm_dest
);
4104 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4105 first_vec
, second_vec
,
4108 = vect_finish_stmt_generation (vinfo
,
4109 stmt_info
, perm_stmt
,
4113 /* If mask was NULL_TREE generate the requested
4114 identity transform. */
4115 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
4117 /* Store the vector statement in NODE. */
4118 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
4124 first_vec_index
= -1;
4125 second_vec_index
= -1;
4133 /* Vectorize SLP instance tree in postorder. */
4136 vect_schedule_slp_instance (vec_info
*vinfo
,
4137 slp_tree node
, slp_instance instance
)
4139 gimple_stmt_iterator si
;
4140 stmt_vec_info stmt_info
;
4141 unsigned int group_size
;
4146 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4149 /* See if we have already vectorized the node in the graph of the
4151 if (SLP_TREE_VEC_STMTS (node
).exists ())
4154 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4155 vect_schedule_slp_instance (vinfo
, child
, instance
);
4157 /* Push SLP node def-type to stmts. */
4158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4159 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4161 stmt_vec_info child_stmt_info
;
4162 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4163 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4166 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4168 /* VECTYPE is the type of the destination. */
4169 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4170 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4171 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
4173 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4174 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4176 if (dump_enabled_p ())
4177 dump_printf_loc (MSG_NOTE
, vect_location
,
4178 "------>vectorizing SLP node starting from: %G",
4181 /* Vectorized stmts go before the last scalar stmt which is where
4182 all uses are ready. */
4183 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4184 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4186 /* Handle two-operation SLP nodes by vectorizing the group with
4187 both operations and then performing a merge. */
4188 bool done_p
= false;
4189 if (SLP_TREE_TWO_OPERATORS (node
))
4191 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4192 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4193 enum tree_code ocode
= ERROR_MARK
;
4194 stmt_vec_info ostmt_info
;
4195 vec_perm_builder
mask (group_size
, group_size
, 1);
4196 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4198 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4199 if (gimple_assign_rhs_code (ostmt
) != code0
)
4201 mask
.quick_push (1);
4202 ocode
= gimple_assign_rhs_code (ostmt
);
4205 mask
.quick_push (0);
4207 if (ocode
!= ERROR_MARK
)
4209 vec
<stmt_vec_info
> v0
;
4210 vec
<stmt_vec_info
> v1
;
4212 tree tmask
= NULL_TREE
;
4213 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4214 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4215 SLP_TREE_VEC_STMTS (node
).truncate (0);
4216 gimple_assign_set_rhs_code (stmt
, ocode
);
4217 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4218 gimple_assign_set_rhs_code (stmt
, code0
);
4219 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4220 SLP_TREE_VEC_STMTS (node
).truncate (0);
4221 tree meltype
= build_nonstandard_integer_type
4222 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4223 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4225 for (j
= 0; j
< v0
.length (); ++j
)
4227 /* Enforced by vect_build_slp_tree, which rejects variable-length
4228 vectors for SLP_TREE_TWO_OPERATORS. */
4229 unsigned int const_nunits
= nunits
.to_constant ();
4230 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4231 for (l
= 0; l
< const_nunits
; ++l
)
4233 if (k
>= group_size
)
4235 tree t
= build_int_cst (meltype
,
4236 mask
[k
++] * const_nunits
+ l
);
4237 melts
.quick_push (t
);
4239 tmask
= melts
.build ();
4241 /* ??? Not all targets support a VEC_PERM_EXPR with a
4242 constant mask that would translate to a vec_merge RTX
4243 (with their vec_perm_const_ok). We can either not
4244 vectorize in that case or let veclower do its job.
4245 Unfortunately that isn't too great and at least for
4246 plus/minus we'd eventually like to match targets
4247 vector addsub instructions. */
4249 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4251 gimple_assign_lhs (v0
[j
]->stmt
),
4252 gimple_assign_lhs (v1
[j
]->stmt
),
4254 SLP_TREE_VEC_STMTS (node
).quick_push
4255 (vect_finish_stmt_generation (vinfo
, stmt_info
, vstmt
, &si
));
4263 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4265 /* Restore stmt def-types. */
4266 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4267 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4269 stmt_vec_info child_stmt_info
;
4270 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4271 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4275 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4276 For loop vectorization this is done in vectorizable_call, but for SLP
4277 it needs to be deferred until end of vect_schedule_slp, because multiple
4278 SLP instances may refer to the same scalar stmt. */
4281 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4282 slp_tree node
, hash_set
<slp_tree
> &visited
)
4285 gimple_stmt_iterator gsi
;
4289 stmt_vec_info stmt_info
;
4291 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4294 if (visited
.add (node
))
4297 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4298 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4300 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4302 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4303 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4305 if (is_pattern_stmt_p (stmt_info
)
4306 || !PURE_SLP_STMT (stmt_info
))
4308 lhs
= gimple_call_lhs (stmt
);
4309 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4310 gsi
= gsi_for_stmt (stmt
);
4311 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4312 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4317 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4319 hash_set
<slp_tree
> visited
;
4320 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4323 /* Vectorize the instance root. */
4326 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4328 gassign
*rstmt
= NULL
;
4330 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4332 stmt_vec_info child_stmt_info
;
4335 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4337 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4338 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4339 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4340 TREE_TYPE (vect_lhs
)))
4341 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4343 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4347 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4349 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4350 stmt_vec_info child_stmt_info
;
4352 vec
<constructor_elt
, va_gc
> *v
;
4353 vec_alloc (v
, nelts
);
4355 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4357 CONSTRUCTOR_APPEND_ELT (v
,
4359 gimple_get_lhs (child_stmt_info
->stmt
));
4361 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4362 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4363 tree r_constructor
= build_constructor (rtype
, v
);
4364 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4369 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4370 gsi_replace (&rgsi
, rstmt
, true);
4373 /* Generate vector code for all SLP instances in the loop/basic block. */
4376 vect_schedule_slp (vec_info
*vinfo
)
4378 vec
<slp_instance
> slp_instances
;
4379 slp_instance instance
;
4382 slp_instances
= vinfo
->slp_instances
;
4383 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4385 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4386 /* Schedule the tree of INSTANCE. */
4387 vect_schedule_slp_instance (vinfo
, node
, instance
);
4389 if (SLP_INSTANCE_ROOT_STMT (instance
))
4390 vectorize_slp_instance_root_stmt (node
, instance
);
4392 if (dump_enabled_p ())
4393 dump_printf_loc (MSG_NOTE
, vect_location
,
4394 "vectorizing stmts using SLP.\n");
4397 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4399 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4400 stmt_vec_info store_info
;
4403 /* Remove scalar call stmts. Do not do this for basic-block
4404 vectorization as not all uses may be vectorized.
4405 ??? Why should this be necessary? DCE should be able to
4406 remove the stmts itself.
4407 ??? For BB vectorization we can as well remove scalar
4408 stmts starting from the SLP tree root if they have no
4410 if (is_a
<loop_vec_info
> (vinfo
))
4411 vect_remove_slp_scalar_calls (vinfo
, root
);
4413 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4415 if (!STMT_VINFO_DATA_REF (store_info
))
4418 if (SLP_INSTANCE_ROOT_STMT (instance
))
4421 store_info
= vect_orig_stmt (store_info
);
4422 /* Free the attached stmt_vec_info and remove the stmt. */
4423 vinfo
->remove_stmt (store_info
);