1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
54 vect_free_slp_tree (slp_tree node
, bool final_p
)
59 if (--node
->refcnt
!= 0)
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
63 vect_free_slp_tree (child
, final_p
);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
71 stmt_vec_info stmt_info
;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
79 SLP_TREE_CHILDREN (node
).release ();
80 SLP_TREE_SCALAR_STMTS (node
).release ();
81 SLP_TREE_SCALAR_OPS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
128 SLP_TREE_CHILDREN (node
).create (nops
);
129 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
130 SLP_TREE_TWO_OPERATORS (node
) = false;
131 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
133 node
->max_nunits
= 1;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
149 node
= XNEW (struct _slp_tree
);
150 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_VEC_STMTS (node
).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
154 SLP_TREE_CHILDREN (node
) = vNULL
;
155 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
156 SLP_TREE_TWO_OPERATORS (node
) = false;
157 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
159 node
->max_nunits
= 1;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec
<stmt_vec_info
> def_stmts
;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
178 enum vect_def_type first_dt
;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 static vec
<slp_oprnd_info
>
186 vect_create_oprnd_info (int nops
, int group_size
)
189 slp_oprnd_info oprnd_info
;
190 vec
<slp_oprnd_info
> oprnds_info
;
192 oprnds_info
.create (nops
);
193 for (i
= 0; i
< nops
; i
++)
195 oprnd_info
= XNEW (struct _slp_oprnd_info
);
196 oprnd_info
->def_stmts
.create (group_size
);
197 oprnd_info
->ops
.create (group_size
);
198 oprnd_info
->first_dt
= vect_uninitialized_def
;
199 oprnd_info
->first_op_type
= NULL_TREE
;
200 oprnd_info
->any_pattern
= false;
201 oprnds_info
.quick_push (oprnd_info
);
208 /* Free operands info. */
211 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
214 slp_oprnd_info oprnd_info
;
216 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
218 oprnd_info
->def_stmts
.release ();
219 oprnd_info
->ops
.release ();
220 XDELETE (oprnd_info
);
223 oprnds_info
.release ();
227 /* Return true if STMTS contains a pattern statement. */
230 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
232 stmt_vec_info stmt_info
;
234 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
235 if (is_pattern_stmt_p (stmt_info
))
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
246 stmt_vec_info first_stmt_info
)
248 stmt_vec_info next_stmt_info
= first_stmt_info
;
251 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
256 if (next_stmt_info
== stmt_info
)
258 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
260 result
+= DR_GROUP_GAP (next_stmt_info
);
262 while (next_stmt_info
);
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
274 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
275 tree elt_type
, unsigned int *nvectors_out
,
276 tree
*vector_type_out
,
279 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
280 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
283 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
284 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
285 unsigned int nvectors
= 1;
288 scalar_int_mode int_mode
;
289 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
290 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type
= build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode
), 1);
296 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
298 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
300 GET_MODE_SIZE (base_vector_mode
)))
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
306 vec_perm_builder
sel1 (nelts
, 2, 3);
307 vec_perm_builder
sel2 (nelts
, 2, 3);
308 poly_int64 half_nelts
= exact_div (nelts
, 2);
309 for (unsigned int i
= 0; i
< 3; ++i
)
312 sel1
.quick_push (i
+ nelts
);
313 sel2
.quick_push (half_nelts
+ i
);
314 sel2
.quick_push (half_nelts
+ i
+ nelts
);
316 vec_perm_indices
indices1 (sel1
, 2, nelts
);
317 vec_perm_indices
indices2 (sel2
, 2, nelts
);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
322 *nvectors_out
= nvectors
;
324 *vector_type_out
= vector_type
;
327 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
329 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
336 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
355 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
356 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
357 vec
<slp_oprnd_info
> *oprnds_info
)
359 stmt_vec_info stmt_info
= stmts
[stmt_num
];
361 unsigned int i
, number_of_oprnds
;
362 enum vect_def_type dt
= vect_uninitialized_def
;
363 slp_oprnd_info oprnd_info
;
364 int first_op_idx
= 1;
365 unsigned int commutative_op
= -1U;
366 bool first_op_cond
= false;
367 bool first
= stmt_num
== 0;
369 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
371 number_of_oprnds
= gimple_call_num_args (stmt
);
373 if (gimple_call_internal_p (stmt
))
375 internal_fn ifn
= gimple_call_internal_fn (stmt
);
376 commutative_op
= first_commutative_argument (ifn
);
378 /* Masked load, only look at mask. */
379 if (ifn
== IFN_MASK_LOAD
)
381 number_of_oprnds
= 1;
382 /* Mask operand index. */
387 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
389 enum tree_code code
= gimple_assign_rhs_code (stmt
);
390 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code
== COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
396 first_op_cond
= true;
400 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
405 bool swapped
= (*swap
!= 0);
406 gcc_assert (!swapped
|| first_op_cond
);
407 for (i
= 0; i
< number_of_oprnds
; i
++)
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map
= maps
[*swap
];
417 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
418 first_op_idx
), map
[i
]);
420 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
423 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
424 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
425 oprnd
= TREE_OPERAND (oprnd
, 0);
427 oprnd_info
= (*oprnds_info
)[i
];
429 stmt_vec_info def_stmt_info
;
430 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
434 "Build SLP failed: can't analyze def for %T\n",
440 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
441 oprnd_info
->any_pattern
= true;
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info
)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
449 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
451 dt
= vect_reduction_def
;
452 oprnd_info
->first_dt
= dt
;
453 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type
= TREE_TYPE (oprnd
);
463 if ((oprnd_info
->first_dt
!= dt
464 && !(oprnd_info
->first_dt
== vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info
)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
468 && !STMT_VINFO_DATA_REF (def_stmt_info
)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
471 && !((oprnd_info
->first_dt
== vect_external_def
472 || oprnd_info
->first_dt
== vect_constant_def
)
473 && (dt
== vect_external_def
474 || dt
== vect_constant_def
)))
475 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
476 || (!STMT_VINFO_DATA_REF (stmt_info
)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
479 || STMT_VINFO_DATA_REF (def_stmt_info
)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
482 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
484 /* Try swapping operands if we got a mismatch. */
485 if (i
== commutative_op
&& !swapped
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "trying swapped operands\n");
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
496 "Build SLP failed: different types\n");
500 if ((dt
== vect_constant_def
501 || dt
== vect_external_def
)
502 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
503 && (TREE_CODE (type
) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd
);
515 /* Check the types of the definitions. */
518 case vect_external_def
:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info
->first_dt
= vect_external_def
;
522 case vect_constant_def
:
523 oprnd_info
->def_stmts
.quick_push (NULL
);
524 oprnd_info
->ops
.quick_push (oprnd
);
527 case vect_internal_def
:
528 case vect_reduction_def
:
529 if (oprnd_info
->first_dt
== vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info
)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
532 && !STMT_VINFO_DATA_REF (def_stmt_info
)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
541 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
542 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
546 case vect_induction_def
:
547 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
548 oprnd_info
->ops
.quick_push (oprnd
);
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
555 "Build SLP failed: illegal type of def %T\n",
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info
->stmt
);
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
580 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
581 tree cond
= gimple_assign_rhs1 (stmt
);
582 enum tree_code code
= TREE_CODE (cond
);
587 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
588 &TREE_OPERAND (cond
, 1));
589 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
594 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
595 gimple_assign_rhs3_ptr (stmt
));
596 if (STMT_VINFO_REDUC_IDX (stmt_info
) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info
) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info
) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info
) = 1;
600 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
601 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
602 gcc_assert (code
!= ERROR_MARK
);
603 TREE_SET_CODE (cond
, code
);
608 /* Commutative ops need not reflect swapping, ops are in
611 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE
, vect_location
,
613 "swapped operands to match def types in %G",
621 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
626 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
628 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
629 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
638 stmt_vec_info member_info
= first_info
;
639 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
640 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
641 || is_pattern_stmt_p (member_info
))
646 for (member_info
= first_info
; member_info
;
647 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
648 STMT_VINFO_VECTYPE (member_info
) = vectype
;
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
653 && !is_pattern_stmt_p (stmt_info
))
655 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
659 if (dump_enabled_p ())
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info
->stmt
);
664 dump_printf_loc (MSG_NOTE
, vect_location
,
665 " old vector type: %T\n", old_vectype
);
666 dump_printf_loc (MSG_NOTE
, vect_location
,
667 " new vector type: %T\n", vectype
);
672 /* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
676 vect_update_all_shared_vectypes (vec
<stmt_vec_info
> stmts
)
678 tree vectype
, nunits_vectype
;
679 if (!vect_get_vector_types_for_stmt (stmts
[0], &vectype
,
680 &nunits_vectype
, stmts
.length ()))
683 stmt_vec_info stmt_info
;
685 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
686 if (!vect_update_shared_vectype (stmt_info
, vectype
))
692 /* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
696 compatible_calls_p (gcall
*call1
, gcall
*call2
)
698 unsigned int nargs
= gimple_call_num_args (call1
);
699 if (nargs
!= gimple_call_num_args (call2
))
702 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
705 if (gimple_call_internal_p (call1
))
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
708 TREE_TYPE (gimple_call_lhs (call2
))))
710 for (unsigned int i
= 0; i
< nargs
; ++i
)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
712 TREE_TYPE (gimple_call_arg (call2
, i
))))
717 if (!operand_equal_p (gimple_call_fn (call1
),
718 gimple_call_fn (call2
), 0))
721 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
727 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
728 caller's attempt to find the vector type in STMT_INFO with the narrowest
729 element type. Return true if VECTYPE is nonnull and if it is valid
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
735 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
736 tree vectype
, poly_uint64
*max_nunits
)
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
742 "Build SLP failed: unsupported data-type in %G\n",
744 /* Fatal mismatch. */
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
750 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
751 unsigned HOST_WIDE_INT const_nunits
;
752 if (STMT_VINFO_BB_VINFO (stmt_info
)
753 && (!nunits
.is_constant (&const_nunits
)
754 || const_nunits
> group_size
))
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
760 /* Fatal mismatch. */
764 /* In case of multiple types we need to detect the smallest type. */
765 vect_update_max_nunits (max_nunits
, vectype
);
769 /* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
777 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
778 unsigned int group_size
, tree vectype
,
779 tree_code alt_stmt_code
)
781 unsigned HOST_WIDE_INT count
;
782 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
785 vec_perm_builder
sel (count
, count
, 1);
786 for (unsigned int i
= 0; i
< count
; ++i
)
788 unsigned int elt
= i
;
789 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
790 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
792 sel
.quick_push (elt
);
794 vec_perm_indices
indices (sel
, 2, count
);
795 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
798 /* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
803 carried out or the stmts will never be vectorized by SLP.
805 Note COND_EXPR is possibly isomorphic to another one after swapping its
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
813 vect_build_slp_tree_1 (unsigned char *swap
,
814 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
815 poly_uint64
*max_nunits
, bool *matches
,
819 stmt_vec_info first_stmt_info
= stmts
[0];
820 enum tree_code first_stmt_code
= ERROR_MARK
;
821 enum tree_code alt_stmt_code
= ERROR_MARK
;
822 enum tree_code rhs_code
= ERROR_MARK
;
823 enum tree_code first_cond_code
= ERROR_MARK
;
825 bool need_same_oprnds
= false;
826 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
829 machine_mode optab_op2_mode
;
830 machine_mode vec_mode
;
831 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
834 /* For every stmt in NODE find its def stmt/s. */
835 stmt_vec_info stmt_info
;
836 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
838 vec_info
*vinfo
= stmt_info
->vinfo
;
839 gimple
*stmt
= stmt_info
->stmt
;
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
846 /* Fail to vectorize statements marked as unvectorizable. */
847 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
851 "Build SLP failed: unvectorizable statement %G",
853 /* Fatal mismatch. */
858 lhs
= gimple_get_lhs (stmt
);
859 if (lhs
== NULL_TREE
)
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt
);
865 /* Fatal mismatch. */
871 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
872 &nunits_vectype
, group_size
)
874 && !vect_record_max_nunits (stmt_info
, group_size
,
875 nunits_vectype
, max_nunits
)))
877 /* Fatal mismatch. */
882 gcc_assert (vectype
);
884 if (is_a
<bb_vec_info
> (vinfo
)
885 && !vect_update_shared_vectype (stmt_info
, vectype
))
888 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
891 rhs_code
= CALL_EXPR
;
893 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
895 else if ((gimple_call_internal_p (call_stmt
)
896 && (!vectorizable_internal_fn_p
897 (gimple_call_internal_fn (call_stmt
))))
898 || gimple_call_tail_p (call_stmt
)
899 || gimple_call_noreturn_p (call_stmt
)
900 || !gimple_call_nothrow_p (call_stmt
)
901 || gimple_call_chain (call_stmt
))
903 if (dump_enabled_p ())
904 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
905 "Build SLP failed: unsupported call type %G",
907 /* Fatal mismatch. */
914 rhs_code
= gimple_assign_rhs_code (stmt
);
915 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
918 /* Check the operation. */
921 first_stmt_code
= rhs_code
;
923 /* Shift arguments should be equal in all the packed stmts for a
924 vector shift with scalar shift operand. */
925 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
926 || rhs_code
== LROTATE_EXPR
927 || rhs_code
== RROTATE_EXPR
)
929 vec_mode
= TYPE_MODE (vectype
);
931 /* First see if we have a vector/vector shift. */
932 optab
= optab_for_tree_code (rhs_code
, vectype
,
936 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
938 /* No vector/vector shift, try for a vector/scalar shift. */
939 optab
= optab_for_tree_code (rhs_code
, vectype
,
944 if (dump_enabled_p ())
945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
946 "Build SLP failed: no optab.\n");
947 /* Fatal mismatch. */
951 icode
= (int) optab_handler (optab
, vec_mode
);
952 if (icode
== CODE_FOR_nothing
)
954 if (dump_enabled_p ())
955 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
957 "op not supported by target.\n");
958 /* Fatal mismatch. */
962 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
963 if (!VECTOR_MODE_P (optab_op2_mode
))
965 need_same_oprnds
= true;
966 first_op1
= gimple_assign_rhs2 (stmt
);
970 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
972 need_same_oprnds
= true;
973 first_op1
= gimple_assign_rhs2 (stmt
);
976 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
978 need_same_oprnds
= true;
979 first_op1
= gimple_call_arg (call_stmt
, 1);
984 if (first_stmt_code
!= rhs_code
985 && alt_stmt_code
== ERROR_MARK
)
986 alt_stmt_code
= rhs_code
;
987 if (first_stmt_code
!= rhs_code
988 && (first_stmt_code
!= IMAGPART_EXPR
989 || rhs_code
!= REALPART_EXPR
)
990 && (first_stmt_code
!= REALPART_EXPR
991 || rhs_code
!= IMAGPART_EXPR
)
992 /* Handle mismatches in plus/minus by computing both
993 and merging the results. */
994 && !((first_stmt_code
== PLUS_EXPR
995 || first_stmt_code
== MINUS_EXPR
)
996 && (alt_stmt_code
== PLUS_EXPR
997 || alt_stmt_code
== MINUS_EXPR
)
998 && rhs_code
== alt_stmt_code
)
999 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1000 && (first_stmt_code
== ARRAY_REF
1001 || first_stmt_code
== BIT_FIELD_REF
1002 || first_stmt_code
== INDIRECT_REF
1003 || first_stmt_code
== COMPONENT_REF
1004 || first_stmt_code
== MEM_REF
)))
1006 if (dump_enabled_p ())
1008 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1009 "Build SLP failed: different operation "
1010 "in stmt %G", stmt
);
1011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1012 "original stmt %G", first_stmt_info
->stmt
);
1018 if (need_same_oprnds
)
1020 tree other_op1
= (call_stmt
1021 ? gimple_call_arg (call_stmt
, 1)
1022 : gimple_assign_rhs2 (stmt
));
1023 if (!operand_equal_p (first_op1
, other_op1
, 0))
1025 if (dump_enabled_p ())
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1027 "Build SLP failed: different shift "
1028 "arguments in %G", stmt
);
1034 if (!load_p
&& rhs_code
== CALL_EXPR
)
1036 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
1037 as_a
<gcall
*> (stmt
)))
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1041 "Build SLP failed: different calls in %G",
1049 /* Grouped store or load. */
1050 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1052 if (REFERENCE_CLASS_P (lhs
))
1060 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1061 if (prev_first_load
)
1063 /* Check that there are no loads from different interleaving
1064 chains in the same node. */
1065 if (prev_first_load
!= first_load
)
1067 if (dump_enabled_p ())
1068 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1070 "Build SLP failed: different "
1071 "interleaving chains in one node %G",
1078 prev_first_load
= first_load
;
1080 } /* Grouped access. */
1085 /* Not grouped load. */
1086 if (dump_enabled_p ())
1087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1088 "Build SLP failed: not grouped load %G", stmt
);
1090 /* FORNOW: Not grouped loads are not supported. */
1091 /* Fatal mismatch. */
1096 /* Not memory operation. */
1097 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1098 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1099 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1100 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1101 && rhs_code
!= VIEW_CONVERT_EXPR
1102 && rhs_code
!= CALL_EXPR
)
1104 if (dump_enabled_p ())
1105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1106 "Build SLP failed: operation unsupported %G",
1108 /* Fatal mismatch. */
1113 if (rhs_code
== COND_EXPR
)
1115 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1116 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1117 enum tree_code swap_code
= ERROR_MARK
;
1118 enum tree_code invert_code
= ERROR_MARK
;
1121 first_cond_code
= TREE_CODE (cond_expr
);
1122 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1124 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1125 swap_code
= swap_tree_comparison (cond_code
);
1126 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1129 if (first_cond_code
== cond_code
)
1131 /* Isomorphic can be achieved by swapping. */
1132 else if (first_cond_code
== swap_code
)
1134 /* Isomorphic can be achieved by inverting. */
1135 else if (first_cond_code
== invert_code
)
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1141 "Build SLP failed: different"
1142 " operation %G", stmt
);
1152 for (i
= 0; i
< group_size
; ++i
)
1156 /* If we allowed a two-operation SLP node verify the target can cope
1157 with the permute we are going to use. */
1158 if (alt_stmt_code
!= ERROR_MARK
1159 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1161 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1162 vectype
, alt_stmt_code
))
1164 for (i
= 0; i
< group_size
; ++i
)
1165 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1168 if (dump_enabled_p ())
1170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1171 "Build SLP failed: different operation "
1172 "in stmt %G", stmts
[i
]->stmt
);
1173 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1174 "original stmt %G", first_stmt_info
->stmt
);
1179 *two_operators
= true;
1185 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1186 Note we never remove apart from at destruction time so we do not
1187 need a special value for deleted that differs from empty. */
1190 typedef vec
<stmt_vec_info
> value_type
;
1191 typedef vec
<stmt_vec_info
> compare_type
;
1192 static inline hashval_t
hash (value_type
);
1193 static inline bool equal (value_type existing
, value_type candidate
);
1194 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1195 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1196 static const bool empty_zero_p
= true;
1197 static inline void mark_empty (value_type
&x
) { x
.release (); }
1198 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1199 static inline void remove (value_type
&x
) { x
.release (); }
1202 bst_traits::hash (value_type x
)
1205 for (unsigned i
= 0; i
< x
.length (); ++i
)
1206 h
.add_int (gimple_uid (x
[i
]->stmt
));
1210 bst_traits::equal (value_type existing
, value_type candidate
)
1212 if (existing
.length () != candidate
.length ())
1214 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1215 if (existing
[i
] != candidate
[i
])
1220 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1221 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1222 scalar_stmts_to_slp_tree_map_t
;
1225 vect_build_slp_tree_2 (vec_info
*vinfo
,
1226 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1227 poly_uint64
*max_nunits
,
1228 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1229 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1232 vect_build_slp_tree (vec_info
*vinfo
,
1233 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1234 poly_uint64
*max_nunits
,
1235 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1236 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1238 if (slp_tree
*leader
= bst_map
->get (stmts
))
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1242 *leader
? "" : "failed ", *leader
);
1245 (*leader
)->refcnt
++;
1246 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1250 poly_uint64 this_max_nunits
= 1;
1251 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1253 matches
, npermutes
, tree_size
, bst_map
);
1256 res
->max_nunits
= this_max_nunits
;
1257 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1258 /* Keep a reference for the bst_map use. */
1261 bst_map
->put (stmts
.copy (), res
);
1265 /* Recursively build an SLP tree starting from NODE.
1266 Fail (and return a value not equal to zero) if def-stmts are not
1267 isomorphic, require data permutation or are of unsupported types of
1268 operation. Otherwise, return 0.
1269 The value returned is the depth in the SLP tree where a mismatch
1273 vect_build_slp_tree_2 (vec_info
*vinfo
,
1274 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1275 poly_uint64
*max_nunits
,
1276 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1277 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1279 unsigned nops
, i
, this_tree_size
= 0;
1280 poly_uint64 this_max_nunits
= *max_nunits
;
1285 stmt_vec_info stmt_info
= stmts
[0];
1286 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1287 nops
= gimple_call_num_args (stmt
);
1288 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1290 nops
= gimple_num_ops (stmt
) - 1;
1291 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1294 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1299 /* If the SLP node is a PHI (induction or reduction), terminate
1301 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1303 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1304 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1305 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1308 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1309 /* Induction from different IVs is not supported. */
1310 if (def_type
== vect_induction_def
)
1312 stmt_vec_info other_info
;
1313 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1314 if (stmt_info
!= other_info
)
1317 else if (def_type
== vect_reduction_def
1318 || def_type
== vect_double_reduction_def
1319 || def_type
== vect_nested_cycle
)
1321 /* Else def types have to match. */
1322 stmt_vec_info other_info
;
1323 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1324 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1330 node
= vect_create_new_slp_node (stmts
);
1335 bool two_operators
= false;
1336 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1337 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1338 &this_max_nunits
, matches
, &two_operators
))
1341 /* If the SLP node is a load, terminate the recursion unless masked. */
1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1343 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1345 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1348 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1353 *max_nunits
= this_max_nunits
;
1355 node
= vect_create_new_slp_node (stmts
);
1356 /* And compute the load permutation. Whether it is actually
1357 a permutation depends on the unrolling factor which is
1359 vec
<unsigned> load_permutation
;
1361 stmt_vec_info load_info
;
1362 load_permutation
.create (group_size
);
1363 stmt_vec_info first_stmt_info
1364 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1365 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1367 int load_place
= vect_get_place_in_interleaving_chain
1368 (load_info
, first_stmt_info
);
1369 gcc_assert (load_place
!= -1);
1370 load_permutation
.safe_push (load_place
);
1372 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1377 /* Get at the operands, verifying they are compatible. */
1378 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1379 slp_oprnd_info oprnd_info
;
1380 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1382 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1383 stmts
, i
, &oprnds_info
);
1385 matches
[(res
== -1) ? 0 : i
] = false;
1389 for (i
= 0; i
< group_size
; ++i
)
1392 vect_free_oprnd_info (oprnds_info
);
1396 auto_vec
<slp_tree
, 4> children
;
1398 stmt_info
= stmts
[0];
1400 /* Create SLP_TREE nodes for the definition node/s. */
1401 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1404 unsigned old_tree_size
= this_tree_size
;
1407 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1409 /* COND_EXPR have one too many eventually if the condition
1411 gcc_assert (i
== 3 && nops
== 4);
1415 if (oprnd_info
->first_dt
!= vect_internal_def
1416 && oprnd_info
->first_dt
!= vect_reduction_def
1417 && oprnd_info
->first_dt
!= vect_induction_def
)
1419 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1420 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1421 oprnd_info
->ops
= vNULL
;
1422 children
.safe_push (invnode
);
1426 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1427 group_size
, &this_max_nunits
,
1429 &this_tree_size
, bst_map
)) != NULL
)
1431 /* If we have all children of a non-unary child built up from
1432 scalars then just throw that away and build it up this node
1434 if (is_a
<bb_vec_info
> (vinfo
)
1435 && SLP_TREE_CHILDREN (child
).length () > 1
1436 /* ??? Rejecting patterns this way doesn't work. We'd have to
1437 do extra work to cancel the pattern so the uses see the
1439 && !oprnd_info
->any_pattern
)
1441 slp_tree grandchild
;
1443 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1444 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1447 && vect_update_all_shared_vectypes (oprnd_info
->def_stmts
))
1450 this_tree_size
= old_tree_size
;
1451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1452 vect_free_slp_tree (grandchild
, false);
1453 SLP_TREE_CHILDREN (child
).truncate (0);
1455 if (dump_enabled_p ())
1456 dump_printf_loc (MSG_NOTE
, vect_location
,
1457 "Building parent vector operands from "
1458 "scalars instead\n");
1459 oprnd_info
->def_stmts
= vNULL
;
1460 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1461 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1462 oprnd_info
->ops
= vNULL
;
1464 children
.safe_push (child
);
1469 oprnd_info
->def_stmts
= vNULL
;
1470 children
.safe_push (child
);
1474 /* If the SLP build failed fatally and we analyze a basic-block
1475 simply treat nodes we fail to build as externally defined
1476 (and thus build vectors from the scalar defs).
1477 The cost model will reject outright expensive cases.
1478 ??? This doesn't treat cases where permutation ultimatively
1479 fails (or we don't try permutation below). Ideally we'd
1480 even compute a permutation that will end up with the maximum
1482 if (is_a
<bb_vec_info
> (vinfo
)
1484 /* ??? Rejecting patterns this way doesn't work. We'd have to
1485 do extra work to cancel the pattern so the uses see the
1487 && !is_pattern_stmt_p (stmt_info
)
1488 && !oprnd_info
->any_pattern
1489 && vect_update_all_shared_vectypes (oprnd_info
->def_stmts
))
1491 if (dump_enabled_p ())
1492 dump_printf_loc (MSG_NOTE
, vect_location
,
1493 "Building vector operands from scalars\n");
1495 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1496 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1497 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1498 children
.safe_push (child
);
1499 oprnd_info
->ops
= vNULL
;
1500 oprnd_info
->def_stmts
= vNULL
;
1504 /* If the SLP build for operand zero failed and operand zero
1505 and one can be commutated try that for the scalar stmts
1506 that failed the match. */
1508 /* A first scalar stmt mismatch signals a fatal mismatch. */
1510 /* ??? For COND_EXPRs we can swap the comparison operands
1511 as well as the arms under some constraints. */
1513 && oprnds_info
[1]->first_dt
== vect_internal_def
1514 && is_gimple_assign (stmt_info
->stmt
)
1515 /* Swapping operands for reductions breaks assumptions later on. */
1516 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1517 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1518 /* Do so only if the number of not successful permutes was nor more
1519 than a cut-ff as re-trying the recursive match on
1520 possibly each level of the tree would expose exponential
1524 /* See whether we can swap the matching or the non-matching
1526 bool swap_not_matching
= true;
1529 for (j
= 0; j
< group_size
; ++j
)
1531 if (matches
[j
] != !swap_not_matching
)
1533 stmt_vec_info stmt_info
= stmts
[j
];
1534 /* Verify if we can swap operands of this stmt. */
1535 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1537 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1539 if (!swap_not_matching
)
1541 swap_not_matching
= false;
1546 while (j
!= group_size
);
1548 /* Swap mismatched definition stmts. */
1549 if (dump_enabled_p ())
1550 dump_printf_loc (MSG_NOTE
, vect_location
,
1551 "Re-trying with swapped operands of stmts ");
1552 for (j
= 0; j
< group_size
; ++j
)
1553 if (matches
[j
] == !swap_not_matching
)
1555 std::swap (oprnds_info
[0]->def_stmts
[j
],
1556 oprnds_info
[1]->def_stmts
[j
]);
1557 std::swap (oprnds_info
[0]->ops
[j
],
1558 oprnds_info
[1]->ops
[j
]);
1559 if (dump_enabled_p ())
1560 dump_printf (MSG_NOTE
, "%d ", j
);
1562 if (dump_enabled_p ())
1563 dump_printf (MSG_NOTE
, "\n");
1564 /* And try again with scratch 'matches' ... */
1565 bool *tem
= XALLOCAVEC (bool, group_size
);
1566 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1567 group_size
, &this_max_nunits
,
1569 &this_tree_size
, bst_map
)) != NULL
)
1571 /* If we have all children of a non-unary child built up from
1572 scalars then just throw that away and build it up this node
1574 if (is_a
<bb_vec_info
> (vinfo
)
1575 && SLP_TREE_CHILDREN (child
).length () > 1
1576 /* ??? Rejecting patterns this way doesn't work. We'd have
1577 to do extra work to cancel the pattern so the uses see the
1579 && !oprnd_info
->any_pattern
)
1582 slp_tree grandchild
;
1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1585 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1588 && (vect_update_all_shared_vectypes
1589 (oprnd_info
->def_stmts
)))
1592 this_tree_size
= old_tree_size
;
1593 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1594 vect_free_slp_tree (grandchild
, false);
1595 SLP_TREE_CHILDREN (child
).truncate (0);
1597 if (dump_enabled_p ())
1598 dump_printf_loc (MSG_NOTE
, vect_location
,
1599 "Building parent vector operands from "
1600 "scalars instead\n");
1601 oprnd_info
->def_stmts
= vNULL
;
1602 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1603 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1604 oprnd_info
->ops
= vNULL
;
1606 children
.safe_push (child
);
1611 oprnd_info
->def_stmts
= vNULL
;
1612 children
.safe_push (child
);
1620 gcc_assert (child
== NULL
);
1621 FOR_EACH_VEC_ELT (children
, j
, child
)
1622 vect_free_slp_tree (child
, false);
1623 vect_free_oprnd_info (oprnds_info
);
1627 vect_free_oprnd_info (oprnds_info
);
1629 *tree_size
+= this_tree_size
+ 1;
1630 *max_nunits
= this_max_nunits
;
1632 node
= vect_create_new_slp_node (stmts
);
1633 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1634 SLP_TREE_CHILDREN (node
).splice (children
);
1638 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1641 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1642 slp_tree node
, hash_set
<slp_tree
> &visited
)
1645 stmt_vec_info stmt_info
;
1649 if (visited
.add (node
))
1652 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1653 dump_user_location_t user_loc
= loc
.get_user_location ();
1654 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1655 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1657 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1660 estimated_poly_value (node
->max_nunits
));
1661 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1662 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1663 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1666 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1667 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1668 dump_printf (metadata
, "%T%s ", op
,
1669 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1670 dump_printf (metadata
, "}\n");
1672 if (SLP_TREE_CHILDREN (node
).is_empty ())
1674 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1675 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1676 dump_printf (dump_kind
, " %p", (void *)child
);
1677 dump_printf (dump_kind
, "\n");
1678 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1679 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1683 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1686 hash_set
<slp_tree
> visited
;
1687 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1690 /* Mark the tree rooted at NODE with PURE_SLP. */
1693 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1696 stmt_vec_info stmt_info
;
1699 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1702 if (visited
.add (node
))
1705 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1706 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1708 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1709 vect_mark_slp_stmts (child
, visited
);
1713 vect_mark_slp_stmts (slp_tree node
)
1715 hash_set
<slp_tree
> visited
;
1716 vect_mark_slp_stmts (node
, visited
);
1719 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1722 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1725 stmt_vec_info stmt_info
;
1728 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1731 if (visited
.add (node
))
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1736 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1737 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1738 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1741 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1742 vect_mark_slp_stmts_relevant (child
, visited
);
1746 vect_mark_slp_stmts_relevant (slp_tree node
)
1748 hash_set
<slp_tree
> visited
;
1749 vect_mark_slp_stmts_relevant (node
, visited
);
1752 /* Copy the SLP subtree rooted at NODE. */
1755 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1760 slp_tree
©
= map
.get_or_insert (node
, &existed_p
);
1764 copy
= XNEW (_slp_tree
);
1765 memcpy (copy
, node
, sizeof (_slp_tree
));
1766 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1768 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1769 stmt_vec_info stmt_info
;
1770 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1771 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1773 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1774 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1775 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1776 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1777 if (SLP_TREE_CHILDREN (node
).exists ())
1778 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1779 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1783 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1785 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1786 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1791 /* Rearrange the statements of NODE according to PERMUTATION. */
1794 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1795 vec
<unsigned> permutation
,
1796 hash_set
<slp_tree
> &visited
)
1801 if (visited
.add (node
))
1804 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1805 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1807 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1809 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1810 vec
<stmt_vec_info
> tmp_stmts
;
1811 tmp_stmts
.create (group_size
);
1812 tmp_stmts
.quick_grow (group_size
);
1813 stmt_vec_info stmt_info
;
1814 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1815 tmp_stmts
[permutation
[i
]] = stmt_info
;
1816 SLP_TREE_SCALAR_STMTS (node
).release ();
1817 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1819 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1821 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1823 tmp_ops
.create (group_size
);
1824 tmp_ops
.quick_grow (group_size
);
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1827 tmp_ops
[permutation
[i
]] = op
;
1828 SLP_TREE_SCALAR_OPS (node
).release ();
1829 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1834 /* Attempt to reorder stmts in a reduction chain so that we don't
1835 require any load permutation. Return true if that was possible,
1836 otherwise return false. */
1839 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1841 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1844 slp_tree node
, load
;
1846 /* Compare all the permutation sequences to the first one. We know
1847 that at least one load is permuted. */
1848 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1849 if (!node
->load_permutation
.exists ())
1851 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1853 if (!load
->load_permutation
.exists ())
1855 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1856 if (lidx
!= node
->load_permutation
[j
])
1860 /* Check that the loads in the first sequence are different and there
1861 are no gaps between them. */
1862 auto_sbitmap
load_index (group_size
);
1863 bitmap_clear (load_index
);
1864 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1866 if (lidx
>= group_size
)
1868 if (bitmap_bit_p (load_index
, lidx
))
1871 bitmap_set_bit (load_index
, lidx
);
1873 for (i
= 0; i
< group_size
; i
++)
1874 if (!bitmap_bit_p (load_index
, i
))
1877 /* This permutation is valid for reduction. Since the order of the
1878 statements in the nodes is not important unless they are memory
1879 accesses, we can rearrange the statements in all the nodes
1880 according to the order of the loads. */
1882 /* We have to unshare the SLP tree we modify. */
1883 hash_map
<slp_tree
, slp_tree
> map
;
1884 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1885 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1887 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1888 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1889 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1890 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1892 /* Do the actual re-arrangement. */
1893 hash_set
<slp_tree
> visited
;
1894 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1895 node
->load_permutation
, visited
);
1897 /* We are done, no actual permutations need to be generated. */
1898 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1899 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1901 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1902 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1903 /* But we have to keep those permutations that are required because
1904 of handling of gaps. */
1905 if (known_eq (unrolling_factor
, 1U)
1906 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1907 && DR_GROUP_GAP (first_stmt_info
) == 0))
1908 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1910 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1911 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1917 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1920 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1921 hash_set
<slp_tree
> &visited
)
1923 if (visited
.add (node
))
1926 if (SLP_TREE_CHILDREN (node
).length () == 0)
1928 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1930 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1931 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1932 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1933 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1939 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1940 vect_gather_slp_loads (inst
, child
, visited
);
1945 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1947 hash_set
<slp_tree
> visited
;
1948 vect_gather_slp_loads (inst
, node
, visited
);
1951 /* Check if the required load permutations in the SLP instance
1952 SLP_INSTN are supported. */
1955 vect_supported_load_permutation_p (slp_instance slp_instn
)
1957 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1958 unsigned int i
, j
, k
, next
;
1961 if (dump_enabled_p ())
1963 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1964 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1965 if (node
->load_permutation
.exists ())
1966 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1967 dump_printf (MSG_NOTE
, "%d ", next
);
1969 for (k
= 0; k
< group_size
; ++k
)
1970 dump_printf (MSG_NOTE
, "%d ", k
);
1971 dump_printf (MSG_NOTE
, "\n");
1974 /* In case of reduction every load permutation is allowed, since the order
1975 of the reduction statements is not important (as opposed to the case of
1976 grouped stores). The only condition we need to check is that all the
1977 load nodes are of the same size and have the same permutation (and then
1978 rearrange all the nodes of the SLP instance according to this
1981 /* Check that all the load nodes are of the same size. */
1982 /* ??? Can't we assert this? */
1983 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1984 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1987 node
= SLP_INSTANCE_TREE (slp_instn
);
1988 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1990 /* Reduction (there are no data-refs in the root).
1991 In reduction chain the order of the loads is not important. */
1992 if (!STMT_VINFO_DATA_REF (stmt_info
)
1993 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1994 vect_attempt_slp_rearrange_stmts (slp_instn
);
1996 /* In basic block vectorization we allow any subchain of an interleaving
1998 FORNOW: not supported in loop SLP because of realignment compications. */
1999 if (STMT_VINFO_BB_VINFO (stmt_info
))
2001 /* Check whether the loads in an instance form a subchain and thus
2002 no permutation is necessary. */
2003 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
2005 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2007 bool subchain_p
= true;
2008 stmt_vec_info next_load_info
= NULL
;
2009 stmt_vec_info load_info
;
2010 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2013 && (next_load_info
!= load_info
2014 || DR_GROUP_GAP (load_info
) != 1))
2019 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2022 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2025 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2026 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
2027 unsigned HOST_WIDE_INT nunits
;
2028 unsigned k
, maxk
= 0;
2029 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
2032 /* In BB vectorization we may not actually use a loaded vector
2033 accessing elements in excess of DR_GROUP_SIZE. */
2034 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
2035 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
2036 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
2038 if (dump_enabled_p ())
2039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2040 "BB vectorization with gaps at the end of "
2041 "a load is not supported\n");
2045 /* Verify the permutation can be generated. */
2048 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
2049 1, slp_instn
, true, &n_perms
))
2051 if (dump_enabled_p ())
2052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
2054 "unsupported load permutation\n");
2062 /* For loop vectorization verify we can generate the permutation. Be
2063 conservative about the vectorization factor, there are permutations
2064 that will use three vector inputs only starting from a specific factor
2065 and the vectorization factor is not yet final.
2066 ??? The SLP instance unrolling factor might not be the maximum one. */
2069 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
2070 LOOP_VINFO_VECT_FACTOR
2071 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
2072 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
2073 if (node
->load_permutation
.exists ()
2074 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
2075 slp_instn
, true, &n_perms
))
2082 /* Find the last store in SLP INSTANCE. */
2085 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
2087 stmt_vec_info last
= NULL
;
2088 stmt_vec_info stmt_vinfo
;
2090 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2092 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2093 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
2099 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2100 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2101 (also containing the first GROUP1_SIZE stmts, since stores are
2102 consecutive), the second containing the remainder.
2103 Return the first stmt in the second group. */
2105 static stmt_vec_info
2106 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
2108 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
2109 gcc_assert (group1_size
> 0);
2110 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
2111 gcc_assert (group2_size
> 0);
2112 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
2114 stmt_vec_info stmt_info
= first_vinfo
;
2115 for (unsigned i
= group1_size
; i
> 1; i
--)
2117 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2118 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2120 /* STMT is now the last element of the first group. */
2121 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2122 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
2124 DR_GROUP_SIZE (group2
) = group2_size
;
2125 for (stmt_info
= group2
; stmt_info
;
2126 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
2128 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
2129 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2132 /* For the second group, the DR_GROUP_GAP is that before the original group,
2133 plus skipping over the first vector. */
2134 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
2136 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2137 DR_GROUP_GAP (first_vinfo
) += group2_size
;
2139 if (dump_enabled_p ())
2140 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2141 group1_size
, group2_size
);
2146 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2147 statements and a vector of NUNITS elements. */
2150 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2152 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2155 /* Analyze an SLP instance starting from a group of grouped stores. Call
2156 vect_build_slp_tree to build a tree of packed stmts if possible.
2157 Return FALSE if it's impossible to SLP any stmt in the loop. */
2160 vect_analyze_slp_instance (vec_info
*vinfo
,
2161 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2162 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2164 slp_instance new_instance
;
2166 unsigned int group_size
;
2167 tree vectype
, scalar_type
= NULL_TREE
;
2169 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2170 vec
<stmt_vec_info
> scalar_stmts
;
2171 bool constructor
= false;
2173 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2175 scalar_type
= TREE_TYPE (DR_REF (dr
));
2176 group_size
= DR_GROUP_SIZE (stmt_info
);
2177 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2179 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2181 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2182 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2183 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2185 else if (is_gimple_assign (stmt_info
->stmt
)
2186 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2188 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2189 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2194 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2195 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2196 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2201 if (dump_enabled_p ())
2202 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2203 "Build SLP failed: unsupported data-type %T\n",
2208 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2210 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2211 scalar_stmts
.create (group_size
);
2212 stmt_vec_info next_info
= stmt_info
;
2213 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2215 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2218 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2219 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2222 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2224 /* Collect the reduction stmts and store them in
2225 SLP_TREE_SCALAR_STMTS. */
2228 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2229 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2231 /* Mark the first element of the reduction chain as reduction to properly
2232 transform the node. In the reduction analysis phase only the last
2233 element of the chain is marked as reduction. */
2234 STMT_VINFO_DEF_TYPE (stmt_info
)
2235 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2236 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2237 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2239 else if (constructor
)
2241 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2243 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2245 if (TREE_CODE (val
) == SSA_NAME
)
2247 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2248 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2249 /* Value is defined in another basic block. */
2252 scalar_stmts
.safe_push (def_info
);
2257 if (dump_enabled_p ())
2258 dump_printf_loc (MSG_NOTE
, vect_location
,
2259 "Analyzing vectorizable constructor: %G\n",
2264 /* Collect reduction statements. */
2265 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2266 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2267 scalar_stmts
.safe_push (next_info
);
2270 /* Build the tree for the SLP instance. */
2271 bool *matches
= XALLOCAVEC (bool, group_size
);
2272 unsigned npermutes
= 0;
2273 poly_uint64 max_nunits
= nunits
;
2274 unsigned tree_size
= 0;
2275 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2276 &max_nunits
, matches
, &npermutes
,
2277 &tree_size
, bst_map
);
2280 /* Calculate the unrolling factor based on the smallest type. */
2281 poly_uint64 unrolling_factor
2282 = calculate_unrolling_factor (max_nunits
, group_size
);
2284 if (maybe_ne (unrolling_factor
, 1U)
2285 && is_a
<bb_vec_info
> (vinfo
))
2287 unsigned HOST_WIDE_INT const_max_nunits
;
2288 if (!max_nunits
.is_constant (&const_max_nunits
)
2289 || const_max_nunits
> group_size
)
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2293 "Build SLP failed: store group "
2294 "size not a multiple of the vector size "
2295 "in basic block SLP\n");
2296 vect_free_slp_tree (node
, false);
2299 /* Fatal mismatch. */
2300 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2301 vect_free_slp_tree (node
, false);
2305 /* Create a new SLP instance. */
2306 new_instance
= XNEW (class _slp_instance
);
2307 SLP_INSTANCE_TREE (new_instance
) = node
;
2308 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2309 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2310 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2311 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2313 vect_gather_slp_loads (new_instance
, node
);
2314 if (dump_enabled_p ())
2315 dump_printf_loc (MSG_NOTE
, vect_location
,
2316 "SLP size %u vs. limit %u.\n",
2317 tree_size
, max_tree_size
);
2319 /* Compute the load permutation. */
2321 bool loads_permuted
= false;
2322 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2324 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2327 stmt_vec_info load_info
;
2328 bool this_load_permuted
= false;
2329 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2330 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2331 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2332 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2334 this_load_permuted
= true;
2337 if (!this_load_permuted
2338 /* The load requires permutation when unrolling exposes
2339 a gap either because the group is larger than the SLP
2340 group-size or because there is a gap between the groups. */
2341 && (known_eq (unrolling_factor
, 1U)
2342 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2343 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2345 SLP_TREE_LOAD_PERMUTATION (load_node
).release ();
2348 loads_permuted
= true;
2353 if (!vect_supported_load_permutation_p (new_instance
))
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2357 "Build SLP failed: unsupported load "
2358 "permutation %G", stmt_info
->stmt
);
2359 vect_free_slp_instance (new_instance
, false);
2364 /* If the loads and stores can be handled with load/store-lan
2365 instructions do not generate this SLP instance. */
2366 if (is_a
<loop_vec_info
> (vinfo
)
2368 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2371 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2373 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2374 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2375 /* Use SLP for strided accesses (or if we can't load-lanes). */
2376 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2377 || ! vect_load_lanes_supported
2378 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2379 DR_GROUP_SIZE (stmt_vinfo
), false))
2382 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2384 if (dump_enabled_p ())
2385 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2386 "Built SLP cancelled: can use "
2387 "load/store-lanes\n");
2388 vect_free_slp_instance (new_instance
, false);
2393 /* If this is a reduction chain with a conversion in front
2394 amend the SLP tree with a node for that. */
2396 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2397 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2399 /* Get at the conversion stmt - we know it's the single use
2400 of the last stmt of the reduction chain. */
2401 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2402 use_operand_p use_p
;
2404 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2407 next_info
= vinfo
->lookup_stmt (use_stmt
);
2408 next_info
= vect_stmt_to_vectorize (next_info
);
2409 scalar_stmts
= vNULL
;
2410 scalar_stmts
.create (group_size
);
2411 for (unsigned i
= 0; i
< group_size
; ++i
)
2412 scalar_stmts
.quick_push (next_info
);
2413 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2414 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2415 SLP_INSTANCE_TREE (new_instance
) = conv
;
2416 /* We also have to fake this conversion stmt as SLP reduction
2417 group so we don't have to mess with too much code
2419 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2420 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2423 vinfo
->slp_instances
.safe_push (new_instance
);
2425 if (dump_enabled_p ())
2427 dump_printf_loc (MSG_NOTE
, vect_location
,
2428 "Final SLP tree for instance:\n");
2429 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2437 /* Failed to SLP. */
2438 /* Free the allocated memory. */
2439 scalar_stmts
.release ();
2442 /* For basic block SLP, try to break the group up into multiples of the
2444 unsigned HOST_WIDE_INT const_nunits
;
2445 if (is_a
<bb_vec_info
> (vinfo
)
2446 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2447 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2448 && nunits
.is_constant (&const_nunits
))
2450 /* We consider breaking the group only on VF boundaries from the existing
2452 for (i
= 0; i
< group_size
; i
++)
2453 if (!matches
[i
]) break;
2455 if (i
>= const_nunits
&& i
< group_size
)
2457 /* Split into two groups at the first vector boundary before i. */
2458 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2459 unsigned group1_size
= i
& ~(const_nunits
- 1);
2461 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2463 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2465 /* If the first non-match was in the middle of a vector,
2466 skip the rest of that vector. */
2467 if (group1_size
< i
)
2469 i
= group1_size
+ const_nunits
;
2471 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2474 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2475 rest
, max_tree_size
);
2478 /* Even though the first vector did not all match, we might be able to SLP
2479 (some) of the remainder. FORNOW ignore this possibility. */
2486 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2487 trees of packed scalar stmts if SLP is possible. */
2490 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2493 stmt_vec_info first_element
;
2495 DUMP_VECT_SCOPE ("vect_analyze_slp");
2497 scalar_stmts_to_slp_tree_map_t
*bst_map
2498 = new scalar_stmts_to_slp_tree_map_t ();
2500 /* Find SLP sequences starting from groups of grouped stores. */
2501 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2502 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2504 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2506 if (loop_vinfo
->reduction_chains
.length () > 0)
2508 /* Find SLP sequences starting from reduction chains. */
2509 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2510 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2513 /* Dissolve reduction chain group. */
2514 stmt_vec_info vinfo
= first_element
;
2515 stmt_vec_info last
= NULL
;
2518 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2519 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2520 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2524 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2525 /* It can be still vectorized as part of an SLP reduction. */
2526 loop_vinfo
->reductions
.safe_push (last
);
2530 /* Find SLP sequences starting from groups of reductions. */
2531 if (loop_vinfo
->reductions
.length () > 1)
2532 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2536 /* The map keeps a reference on SLP nodes built, release that. */
2537 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2538 it
!= bst_map
->end (); ++it
)
2540 vect_free_slp_tree ((*it
).second
, false);
2543 return opt_result::success ();
2547 /* For each possible SLP instance decide whether to SLP it and calculate overall
2548 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2549 least one instance. */
2552 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2555 poly_uint64 unrolling_factor
= 1;
2556 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2557 slp_instance instance
;
2558 int decided_to_slp
= 0;
2560 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2562 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2564 /* FORNOW: SLP if you can. */
2565 /* All unroll factors have the form:
2567 GET_MODE_SIZE (vinfo->vector_mode) * X
2569 for some rational X, so they must have a common multiple. */
2571 = force_common_multiple (unrolling_factor
,
2572 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2574 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2575 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2576 loop-based vectorization. Such stmts will be marked as HYBRID. */
2577 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2581 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2583 if (decided_to_slp
&& dump_enabled_p ())
2585 dump_printf_loc (MSG_NOTE
, vect_location
,
2586 "Decided to SLP %d instances. Unrolling factor ",
2588 dump_dec (MSG_NOTE
, unrolling_factor
);
2589 dump_printf (MSG_NOTE
, "\n");
2592 return (decided_to_slp
> 0);
2596 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2597 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2600 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2601 hash_map
<slp_tree
, unsigned> &visited
)
2603 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2604 imm_use_iterator imm_iter
;
2606 stmt_vec_info use_vinfo
;
2608 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2611 /* We need to union stype over the incoming graph edges but we still
2612 want to limit recursion to stay O(N+E). */
2613 unsigned visited_cnt
= ++visited
.get_or_insert (node
);
2614 gcc_assert (visited_cnt
<= node
->refcnt
);
2615 bool only_edge
= (visited_cnt
!= node
->refcnt
);
2617 /* Propagate hybrid down the SLP tree. */
2618 if (stype
== hybrid
)
2620 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2622 else if (!only_edge
)
2624 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2625 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2626 /* If we get a pattern stmt here we have to use the LHS of the
2627 original stmt for immediate uses. */
2628 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2630 if (gimple_code (stmt
) == GIMPLE_PHI
)
2631 def
= gimple_phi_result (stmt
);
2633 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2635 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2637 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2640 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2641 if (!STMT_SLP_TYPE (use_vinfo
)
2642 && (STMT_VINFO_RELEVANT (use_vinfo
)
2643 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2644 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2645 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2647 if (dump_enabled_p ())
2648 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2649 "def in non-SLP stmt: %G", use_stmt
);
2656 && !HYBRID_SLP_STMT (stmt_vinfo
))
2658 if (dump_enabled_p ())
2659 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2661 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2665 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2666 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2667 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2668 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2671 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2674 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2676 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2677 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2682 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2683 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2685 if (dump_enabled_p ())
2686 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2687 def_stmt_info
->stmt
);
2688 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2695 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2698 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2699 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2700 /* If the stmt is in a SLP instance then this isn't a reason
2701 to mark use definitions in other SLP instances as hybrid. */
2702 if (! STMT_SLP_TYPE (use_vinfo
)
2703 && (STMT_VINFO_RELEVANT (use_vinfo
)
2704 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2705 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2706 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2713 /* Find stmts that must be both vectorized and SLPed. */
2716 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2719 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2720 slp_instance instance
;
2722 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2724 /* First walk all pattern stmt in the loop and mark defs of uses as
2725 hybrid because immediate uses in them are not recorded. */
2726 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2728 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2729 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2732 gimple
*stmt
= gsi_stmt (gsi
);
2733 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2734 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2737 memset (&wi
, 0, sizeof (wi
));
2738 wi
.info
= loop_vinfo
;
2739 gimple_stmt_iterator gsi2
2740 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2741 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2742 vect_detect_hybrid_slp_1
, &wi
);
2743 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2744 vect_detect_hybrid_slp_2
,
2745 vect_detect_hybrid_slp_1
, &wi
);
2750 /* Then walk the SLP instance trees marking stmts with uses in
2751 non-SLP stmts as hybrid, also propagating hybrid down the
2752 SLP tree, collecting the above info on-the-fly. */
2753 for (unsigned j
= 0;; ++j
)
2755 hash_map
<slp_tree
, unsigned> visited
;
2757 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2758 if (j
< SLP_INSTANCE_GROUP_SIZE (instance
))
2761 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2762 j
, pure_slp
, visited
);
2770 /* Initialize a bb_vec_info struct for the statements between
2771 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2773 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2774 gimple_stmt_iterator region_end_in
,
2775 vec_info_shared
*shared
)
2776 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2777 bb (gsi_bb (region_begin_in
)),
2778 region_begin (region_begin_in
),
2779 region_end (region_end_in
)
2781 gimple_stmt_iterator gsi
;
2783 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2786 gimple
*stmt
= gsi_stmt (gsi
);
2787 gimple_set_uid (stmt
, 0);
2795 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2796 stmts in the basic block. */
2798 _bb_vec_info::~_bb_vec_info ()
2800 for (gimple_stmt_iterator si
= region_begin
;
2801 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2802 /* Reset region marker. */
2803 gimple_set_uid (gsi_stmt (si
), -1);
2808 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2809 given then that child nodes have already been processed, and that
2810 their def types currently match their SLP node's def type. */
2813 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2814 slp_instance node_instance
,
2815 stmt_vector_for_cost
*cost_vec
)
2817 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2818 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2820 /* Calculate the number of vector statements to be created for the
2821 scalar stmts in this node. For SLP reductions it is equal to the
2822 number of vector statements in the children (which has already been
2823 calculated by the recursive call). Otherwise it is the number of
2824 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2825 VF divided by the number of elements in a vector. */
2826 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2827 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2829 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2830 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2832 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2833 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2840 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2841 vf
= loop_vinfo
->vectorization_factor
;
2844 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2845 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2846 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2847 = vect_get_num_vectors (vf
* group_size
, vectype
);
2851 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2854 /* Try to build NODE from scalars, returning true on success.
2855 NODE_INSTANCE is the SLP instance that contains NODE. */
2858 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2859 slp_instance node_instance
)
2861 stmt_vec_info stmt_info
;
2864 if (!is_a
<bb_vec_info
> (vinfo
)
2865 || node
== SLP_INSTANCE_TREE (node_instance
)
2866 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2869 if (dump_enabled_p ())
2870 dump_printf_loc (MSG_NOTE
, vect_location
,
2871 "Building vector operands from scalars instead\n");
2873 /* Don't remove and free the child nodes here, since they could be
2874 referenced by other structures. The analysis and scheduling phases
2875 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2876 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2877 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2878 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2879 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2881 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2882 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2887 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2888 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2890 Return true if the operations are supported. */
2893 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2894 slp_instance node_instance
,
2895 hash_set
<slp_tree
> &visited
,
2896 hash_set
<slp_tree
> &lvisited
,
2897 stmt_vector_for_cost
*cost_vec
)
2902 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2905 /* If we already analyzed the exact same set of scalar stmts we're done.
2906 We share the generated vector stmts for those.
2907 The SLP graph is acyclic so not caching whether we failed or succeeded
2908 doesn't result in any issue since we throw away the lvisited set
2910 if (visited
.contains (node
)
2911 || lvisited
.add (node
))
2914 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2915 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2916 visited
, lvisited
, cost_vec
))
2919 /* ??? We have to catch the case late where two first scalar stmts appear
2920 in multiple SLP children with different def type and fail. Remember
2921 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2922 match it when that is vect_internal_def. */
2923 auto_vec
<vect_def_type
, 4> dt
;
2924 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2925 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2926 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2927 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2929 /* Push SLP node def-type to stmt operands. */
2930 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2931 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2932 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2933 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2934 = SLP_TREE_DEF_TYPE (child
);
2936 /* Check everything worked out. */
2938 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2939 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2941 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2943 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2944 != SLP_TREE_DEF_TYPE (child
))
2947 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2951 if (!res
&& dump_enabled_p ())
2952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2953 "not vectorized: same operand with different "
2954 "def type in stmt.\n");
2957 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2960 /* Restore def-types. */
2961 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2962 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2963 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2965 /* If this node can't be vectorized, try pruning the tree here rather
2966 than felling the whole thing. */
2967 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2974 /* Analyze statements in SLP instances of VINFO. Return true if the
2975 operations are supported. */
2978 vect_slp_analyze_operations (vec_info
*vinfo
)
2980 slp_instance instance
;
2983 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2985 hash_set
<slp_tree
> visited
;
2986 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2988 hash_set
<slp_tree
> lvisited
;
2989 stmt_vector_for_cost cost_vec
;
2990 cost_vec
.create (2);
2991 if (!vect_slp_analyze_node_operations (vinfo
,
2992 SLP_INSTANCE_TREE (instance
),
2993 instance
, visited
, lvisited
,
2995 /* Instances with a root stmt require vectorized defs for the
2997 || (SLP_INSTANCE_ROOT_STMT (instance
)
2998 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2999 != vect_internal_def
)))
3001 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3002 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3003 if (dump_enabled_p ())
3004 dump_printf_loc (MSG_NOTE
, vect_location
,
3005 "removing SLP instance operations starting from: %G",
3007 vect_free_slp_instance (instance
, false);
3008 vinfo
->slp_instances
.ordered_remove (i
);
3009 cost_vec
.release ();
3013 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
3014 x
!= lvisited
.end(); ++x
)
3018 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
3019 cost_vec
.release ();
3023 return !vinfo
->slp_instances
.is_empty ();
3027 /* Compute the scalar cost of the SLP node NODE and its children
3028 and return it. Do not account defs that are marked in LIFE and
3029 update LIFE according to uses of NODE. */
3032 vect_bb_slp_scalar_cost (basic_block bb
,
3033 slp_tree node
, vec
<bool, va_heap
> *life
,
3034 stmt_vector_for_cost
*cost_vec
,
3035 hash_set
<slp_tree
> &visited
)
3038 stmt_vec_info stmt_info
;
3041 if (visited
.add (node
))
3044 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3046 gimple
*stmt
= stmt_info
->stmt
;
3047 vec_info
*vinfo
= stmt_info
->vinfo
;
3048 ssa_op_iter op_iter
;
3049 def_operand_p def_p
;
3054 /* If there is a non-vectorized use of the defs then the scalar
3055 stmt is kept live in which case we do not account it or any
3056 required defs in the SLP children in the scalar cost. This
3057 way we make the vectorization more costly when compared to
3059 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
3061 imm_use_iterator use_iter
;
3063 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3064 if (!is_gimple_debug (use_stmt
))
3066 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3067 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
3070 BREAK_FROM_IMM_USE_STMT (use_iter
);
3077 /* Count scalar stmts only once. */
3078 if (gimple_visited_p (stmt
))
3080 gimple_set_visited (stmt
, true);
3082 vect_cost_for_stmt kind
;
3083 if (STMT_VINFO_DATA_REF (stmt_info
))
3085 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
3088 kind
= scalar_store
;
3090 else if (vect_nop_conversion_p (stmt_info
))
3094 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
3097 auto_vec
<bool, 20> subtree_life
;
3098 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3100 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3102 /* Do not directly pass LIFE to the recursive call, copy it to
3103 confine changes in the callee to the current child/subtree. */
3104 subtree_life
.safe_splice (*life
);
3105 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
3107 subtree_life
.truncate (0);
3112 /* Check if vectorization of the basic block is profitable. */
3115 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
3117 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
3118 slp_instance instance
;
3120 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
3121 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
3123 /* Calculate scalar cost. */
3124 stmt_vector_for_cost scalar_costs
;
3125 scalar_costs
.create (0);
3126 hash_set
<slp_tree
> visited
;
3127 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3129 auto_vec
<bool, 20> life
;
3130 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
3131 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
3132 SLP_INSTANCE_TREE (instance
),
3133 &life
, &scalar_costs
, visited
);
3135 void *target_cost_data
= init_cost (NULL
);
3136 add_stmt_costs (target_cost_data
, &scalar_costs
);
3137 scalar_costs
.release ();
3139 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
3140 destroy_cost_data (target_cost_data
);
3142 /* Unset visited flag. */
3143 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
3144 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3145 gimple_set_visited (gsi_stmt (gsi
), false);
3147 /* Complete the target-specific cost calculation. */
3148 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
3149 &vec_inside_cost
, &vec_epilogue_cost
);
3151 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
3153 if (dump_enabled_p ())
3155 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
3156 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
3158 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
3159 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
3160 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
3163 /* Vectorization is profitable if its cost is more than the cost of scalar
3164 version. Note that we err on the vector side for equal cost because
3165 the cost estimate is otherwise quite pessimistic (constant uses are
3166 free on the scalar side but cost a load on the vector side for
3168 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3174 /* Find any vectorizable constructors and add them to the grouped_store
3178 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3180 gimple_stmt_iterator gsi
;
3182 for (gsi
= bb_vinfo
->region_begin
;
3183 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3185 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3186 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3189 tree rhs
= gimple_assign_rhs1 (stmt
);
3190 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3191 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3192 CONSTRUCTOR_NELTS (rhs
))
3193 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3194 || uniform_vector_p (rhs
))
3197 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3198 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3202 /* Check if the region described by BB_VINFO can be vectorized, returning
3203 true if so. When returning false, set FATAL to true if the same failure
3204 would prevent vectorization at other vector sizes, false if it is still
3205 worth trying other sizes. N_STMTS is the number of statements in the
3209 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3211 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3213 slp_instance instance
;
3215 poly_uint64 min_vf
= 2;
3217 /* The first group of checks is independent of the vector size. */
3220 /* Analyze the data references. */
3222 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3226 "not vectorized: unhandled data-ref in basic "
3231 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3235 "not vectorized: not enough data-refs in "
3240 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3242 if (dump_enabled_p ())
3243 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3244 "not vectorized: unhandled data access in "
3249 vect_slp_check_for_constructors (bb_vinfo
);
3251 /* If there are no grouped stores in the region there is no need
3252 to continue with pattern recog as vect_analyze_slp will fail
3254 if (bb_vinfo
->grouped_stores
.is_empty ())
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3258 "not vectorized: no grouped stores in "
3263 /* While the rest of the analysis below depends on it in some way. */
3266 vect_pattern_recog (bb_vinfo
);
3268 /* Check the SLP opportunities in the basic block, analyze and build SLP
3270 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3272 if (dump_enabled_p ())
3274 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3275 "Failed to SLP the basic block.\n");
3276 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3277 "not vectorized: failed to find SLP opportunities "
3278 "in basic block.\n");
3283 vect_record_base_alignments (bb_vinfo
);
3285 /* Analyze and verify the alignment of data references and the
3286 dependence in the SLP instances. */
3287 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3289 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3290 || ! vect_slp_analyze_instance_dependence (instance
))
3292 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3293 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3294 if (dump_enabled_p ())
3295 dump_printf_loc (MSG_NOTE
, vect_location
,
3296 "removing SLP instance operations starting from: %G",
3298 vect_free_slp_instance (instance
, false);
3299 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3303 /* Mark all the statements that we want to vectorize as pure SLP and
3305 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3306 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3307 if (SLP_INSTANCE_ROOT_STMT (instance
))
3308 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3312 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3315 if (!vect_slp_analyze_operations (bb_vinfo
))
3317 if (dump_enabled_p ())
3318 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3319 "not vectorized: bad operation in basic block.\n");
3323 /* Cost model: check if the vectorization is worthwhile. */
3324 if (!unlimited_cost_model (NULL
)
3325 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3327 if (dump_enabled_p ())
3328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3329 "not vectorized: vectorization is not "
3334 if (dump_enabled_p ())
3335 dump_printf_loc (MSG_NOTE
, vect_location
,
3336 "Basic block will be vectorized using SLP\n");
3340 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3341 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3342 on success. The region has N_STMTS statements and has the datarefs
3343 given by DATAREFS. */
3346 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3347 gimple_stmt_iterator region_end
,
3348 vec
<data_reference_p
> datarefs
,
3349 unsigned int n_stmts
)
3351 bb_vec_info bb_vinfo
;
3352 auto_vector_modes vector_modes
;
3354 /* Autodetect first vector size we try. */
3355 machine_mode next_vector_mode
= VOIDmode
;
3356 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3357 unsigned int mode_i
= 0;
3359 vec_info_shared shared
;
3361 machine_mode autodetected_vector_mode
= VOIDmode
;
3364 bool vectorized
= false;
3366 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3368 bool first_time_p
= shared
.datarefs
.is_empty ();
3369 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3371 bb_vinfo
->shared
->save_datarefs ();
3373 bb_vinfo
->shared
->check_datarefs ();
3374 bb_vinfo
->vector_mode
= next_vector_mode
;
3376 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3377 && dbg_cnt (vect_slp
))
3379 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_NOTE
, vect_location
,
3382 "***** Analysis succeeded with vector mode"
3383 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3384 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3387 bb_vinfo
->shared
->check_datarefs ();
3388 vect_schedule_slp (bb_vinfo
);
3390 unsigned HOST_WIDE_INT bytes
;
3391 if (dump_enabled_p ())
3393 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3394 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3395 "basic block part vectorized using %wu byte "
3396 "vectors\n", bytes
);
3398 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3399 "basic block part vectorized using variable "
3400 "length vectors\n");
3407 if (dump_enabled_p ())
3408 dump_printf_loc (MSG_NOTE
, vect_location
,
3409 "***** Analysis failed with vector mode %s\n",
3410 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3414 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3417 while (mode_i
< vector_modes
.length ()
3418 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3420 if (dump_enabled_p ())
3421 dump_printf_loc (MSG_NOTE
, vect_location
,
3422 "***** The result for vector mode %s would"
3424 GET_MODE_NAME (vector_modes
[mode_i
]));
3430 if (mode_i
< vector_modes
.length ()
3431 && VECTOR_MODE_P (autodetected_vector_mode
)
3432 && (related_vector_mode (vector_modes
[mode_i
],
3433 GET_MODE_INNER (autodetected_vector_mode
))
3434 == autodetected_vector_mode
)
3435 && (related_vector_mode (autodetected_vector_mode
,
3436 GET_MODE_INNER (vector_modes
[mode_i
]))
3437 == vector_modes
[mode_i
]))
3439 if (dump_enabled_p ())
3440 dump_printf_loc (MSG_NOTE
, vect_location
,
3441 "***** Skipping vector mode %s, which would"
3442 " repeat the analysis for %s\n",
3443 GET_MODE_NAME (vector_modes
[mode_i
]),
3444 GET_MODE_NAME (autodetected_vector_mode
));
3449 || mode_i
== vector_modes
.length ()
3450 || autodetected_vector_mode
== VOIDmode
3451 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3452 vector sizes will fail do not bother iterating. */
3456 /* Try the next biggest vector size. */
3457 next_vector_mode
= vector_modes
[mode_i
++];
3458 if (dump_enabled_p ())
3459 dump_printf_loc (MSG_NOTE
, vect_location
,
3460 "***** Re-trying analysis with vector mode %s\n",
3461 GET_MODE_NAME (next_vector_mode
));
3465 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3466 true if anything in the basic-block was vectorized. */
3469 vect_slp_bb (basic_block bb
)
3471 gimple_stmt_iterator gsi
;
3472 bool any_vectorized
= false;
3474 gsi
= gsi_start_bb (bb
);
3475 while (!gsi_end_p (gsi
))
3477 gimple_stmt_iterator region_begin
= gsi
;
3478 vec
<data_reference_p
> datarefs
= vNULL
;
3481 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3483 gimple
*stmt
= gsi_stmt (gsi
);
3484 if (is_gimple_debug (stmt
))
3488 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3489 vect_location
= stmt
;
3491 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3495 /* Skip leading unhandled stmts. */
3496 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3502 gimple_stmt_iterator region_end
= gsi
;
3504 if (insns
> param_slp_max_insns_in_bb
)
3506 if (dump_enabled_p ())
3507 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3508 "not vectorized: too many instructions in "
3511 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3512 any_vectorized
= true;
3514 if (gsi_end_p (region_end
))
3517 /* Skip the unhandled stmt. */
3521 return any_vectorized
;
3525 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3528 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
, unsigned op_num
)
3530 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3532 enum vect_def_type dt
;
3534 /* For comparison and COND_EXPR type is chosen depending
3535 on the non-constant other comparison operand. */
3536 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3538 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3539 op
= gimple_assign_rhs1 (stmt
);
3541 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3544 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3547 if (code
== COND_EXPR
)
3549 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3550 tree cond
= gimple_assign_rhs1 (stmt
);
3552 if (TREE_CODE (cond
) == SSA_NAME
)
3555 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3561 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3562 op
= TREE_OPERAND (cond
, 0);
3565 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3568 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3571 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3574 /* Build a variable-length vector in which the elements in ELTS are repeated
3575 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3576 RESULTS and add any new instructions to SEQ.
3578 The approach we use is:
3580 (1) Find a vector mode VM with integer elements of mode IM.
3582 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3583 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3584 from small vectors to IM.
3586 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3588 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3589 correct byte contents.
3591 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3593 We try to find the largest IM for which this sequence works, in order
3594 to cut down on the number of interleaves. */
3597 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3598 vec
<tree
> elts
, unsigned int nresults
,
3601 unsigned int nelts
= elts
.length ();
3602 tree element_type
= TREE_TYPE (vector_type
);
3604 /* (1) Find a vector mode VM with integer elements of mode IM. */
3605 unsigned int nvectors
= 1;
3606 tree new_vector_type
;
3608 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3609 &nvectors
, &new_vector_type
,
3613 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3614 unsigned int partial_nelts
= nelts
/ nvectors
;
3615 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3617 tree_vector_builder partial_elts
;
3618 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3619 pieces
.quick_grow (nvectors
* 2);
3620 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3622 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3623 ELTS' has mode IM. */
3624 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3625 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3626 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3627 tree t
= gimple_build_vector (seq
, &partial_elts
);
3628 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3629 TREE_TYPE (new_vector_type
), t
);
3631 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3632 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3635 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3636 correct byte contents.
3638 We need to repeat the following operation log2(nvectors) times:
3640 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3641 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3643 However, if each input repeats every N elements and the VF is
3644 a multiple of N * 2, the HI result is the same as the LO. */
3645 unsigned int in_start
= 0;
3646 unsigned int out_start
= nvectors
;
3647 unsigned int hi_start
= nvectors
/ 2;
3648 /* A bound on the number of outputs needed to produce NRESULTS results
3649 in the final iteration. */
3650 unsigned int noutputs_bound
= nvectors
* nresults
;
3651 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3653 noutputs_bound
/= 2;
3654 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3655 for (unsigned int i
= 0; i
< limit
; ++i
)
3658 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3661 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3665 tree output
= make_ssa_name (new_vector_type
);
3666 tree input1
= pieces
[in_start
+ (i
/ 2)];
3667 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3668 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3671 gimple_seq_add_stmt (seq
, stmt
);
3672 pieces
[out_start
+ i
] = output
;
3674 std::swap (in_start
, out_start
);
3677 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3678 results
.reserve (nresults
);
3679 for (unsigned int i
= 0; i
< nresults
; ++i
)
3681 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3682 pieces
[in_start
+ i
]));
3684 results
.quick_push (results
[i
- nvectors
]);
3688 /* For constant and loop invariant defs of SLP_NODE this function returns
3689 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3690 OP_NODE determines the node for the operand containing the scalar
3694 vect_get_constant_vectors (slp_tree slp_node
, unsigned op_num
,
3695 vec
<tree
> *vec_oprnds
)
3697 slp_tree op_node
= SLP_TREE_CHILDREN (slp_node
)[op_num
];
3698 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3699 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3700 unsigned HOST_WIDE_INT nunits
;
3702 unsigned j
, number_of_places_left_in_vector
;
3705 int group_size
= op_node
->ops
.length ();
3706 unsigned int vec_num
, i
;
3707 unsigned number_of_copies
= 1;
3709 tree neutral_op
= NULL
;
3710 gimple_seq ctor_seq
= NULL
;
3711 auto_vec
<tree
, 16> permute_results
;
3713 /* ??? SLP analysis should compute the vector type for the
3714 constant / invariant and store it in the SLP node. */
3715 tree op
= op_node
->ops
[0];
3716 /* Check if vector type is a boolean vector. */
3717 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3718 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3719 && vect_mask_constant_operand_p (stmt_vinfo
, op_num
))
3720 vector_type
= truth_type_for (stmt_vectype
);
3722 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), op_node
);
3724 /* ??? For lane-reducing ops we should also have the required number
3725 of vector stmts initialized rather than second-guessing here. */
3726 unsigned int number_of_vectors
;
3727 if (is_gimple_assign (stmt_vinfo
->stmt
)
3728 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3729 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3730 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3731 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3734 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3735 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3737 vec_oprnds
->create (number_of_vectors
);
3738 auto_vec
<tree
> voprnds (number_of_vectors
);
3740 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3741 created vectors. It is greater than 1 if unrolling is performed.
3743 For example, we have two scalar operands, s1 and s2 (e.g., group of
3744 strided accesses of size two), while NUNITS is four (i.e., four scalars
3745 of this type can be packed in a vector). The output vector will contain
3746 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3749 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3750 containing the operands.
3752 For example, NUNITS is four as before, and the group size is 8
3753 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3754 {s5, s6, s7, s8}. */
3756 /* When using duplicate_and_interleave, we just need one element for
3757 each scalar statement. */
3758 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3759 nunits
= group_size
;
3761 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3763 number_of_places_left_in_vector
= nunits
;
3765 tree_vector_builder
elts (vector_type
, nunits
, 1);
3766 elts
.quick_grow (nunits
);
3767 bool place_after_defs
= false;
3768 for (j
= 0; j
< number_of_copies
; j
++)
3770 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3772 /* Create 'vect_ = {op0,op1,...,opn}'. */
3773 number_of_places_left_in_vector
--;
3775 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3777 if (CONSTANT_CLASS_P (op
))
3779 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3781 /* Can't use VIEW_CONVERT_EXPR for booleans because
3782 of possibly different sizes of scalar value and
3784 if (integer_zerop (op
))
3785 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3786 else if (integer_onep (op
))
3787 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3792 op
= fold_unary (VIEW_CONVERT_EXPR
,
3793 TREE_TYPE (vector_type
), op
);
3794 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3798 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3800 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3803 = build_all_ones_cst (TREE_TYPE (vector_type
));
3805 = build_zero_cst (TREE_TYPE (vector_type
));
3806 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3807 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3813 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3816 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3819 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3823 elts
[number_of_places_left_in_vector
] = op
;
3824 if (!CONSTANT_CLASS_P (op
))
3826 if (TREE_CODE (orig_op
) == SSA_NAME
3827 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3828 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3829 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3830 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3831 place_after_defs
= true;
3833 if (number_of_places_left_in_vector
== 0)
3836 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3837 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3838 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3841 if (permute_results
.is_empty ())
3842 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3843 elts
, number_of_vectors
,
3845 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3848 gimple_stmt_iterator gsi
;
3849 if (place_after_defs
)
3851 stmt_vec_info last_stmt_info
3852 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3853 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3854 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3858 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3860 if (ctor_seq
!= NULL
)
3862 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3863 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3866 voprnds
.quick_push (init
);
3867 place_after_defs
= false;
3868 number_of_places_left_in_vector
= nunits
;
3870 elts
.new_vector (vector_type
, nunits
, 1);
3871 elts
.quick_grow (nunits
);
3876 /* Since the vectors are created in the reverse order, we should invert
3878 vec_num
= voprnds
.length ();
3879 for (j
= vec_num
; j
!= 0; j
--)
3881 vop
= voprnds
[j
- 1];
3882 vec_oprnds
->quick_push (vop
);
3885 /* In case that VF is greater than the unrolling factor needed for the SLP
3886 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3887 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3888 to replicate the vectors. */
3889 while (number_of_vectors
> vec_oprnds
->length ())
3891 tree neutral_vec
= NULL
;
3896 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3898 vec_oprnds
->quick_push (neutral_vec
);
3902 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3903 vec_oprnds
->quick_push (vop
);
3909 /* Get vectorized definitions from SLP_NODE that contains corresponding
3910 vectorized def-stmts. */
3913 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3915 stmt_vec_info vec_def_stmt_info
;
3918 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3920 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3921 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3925 /* Get N vectorized definitions for SLP_NODE.
3926 If the scalar definitions are loop invariants or constants, collect them and
3927 call vect_get_constant_vectors() to create vector stmts.
3928 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3929 must be stored in the corresponding child of SLP_NODE, and we call
3930 vect_get_slp_vect_defs () to retrieve them. */
3933 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3936 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3938 for (unsigned i
= 0; i
< n
; ++i
)
3940 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3942 vec
<tree
> vec_defs
= vNULL
;
3944 /* For each operand we check if it has vectorized definitions in a child
3945 node or we need to create them (for invariants and constants). */
3946 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3948 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3949 vect_get_slp_vect_defs (child
, &vec_defs
);
3952 vect_get_constant_vectors (slp_node
, i
, &vec_defs
);
3954 vec_oprnds
->quick_push (vec_defs
);
3958 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3959 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3960 permute statements for the SLP node NODE of the SLP instance
3961 SLP_NODE_INSTANCE. */
3964 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3965 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3966 slp_instance slp_node_instance
, bool analyze_only
,
3969 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3970 vec_info
*vinfo
= stmt_info
->vinfo
;
3972 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3973 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3974 unsigned int mask_element
;
3977 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3980 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3982 mode
= TYPE_MODE (vectype
);
3983 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3985 /* Initialize the vect stmts of NODE to properly insert the generated
3988 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3989 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3990 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3992 /* Generate permutation masks for every NODE. Number of masks for each NODE
3993 is equal to GROUP_SIZE.
3994 E.g., we have a group of three nodes with three loads from the same
3995 location in each node, and the vector size is 4. I.e., we have a
3996 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3997 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3998 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
4001 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
4002 The last mask is illegal since we assume two operands for permute
4003 operation, and the mask element values can't be outside that range.
4004 Hence, the last mask must be converted into {2,5,5,5}.
4005 For the first two permutations we need the first and the second input
4006 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
4007 we need the second and the third vectors: {b1,c1,a2,b2} and
4010 int vect_stmts_counter
= 0;
4011 unsigned int index
= 0;
4012 int first_vec_index
= -1;
4013 int second_vec_index
= -1;
4017 vec_perm_builder mask
;
4018 unsigned int nelts_to_build
;
4019 unsigned int nvectors_per_build
;
4020 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
4021 && multiple_p (nunits
, group_size
));
4024 /* A single vector contains a whole number of copies of the node, so:
4025 (a) all permutes can use the same mask; and
4026 (b) the permutes only need a single vector input. */
4027 mask
.new_vector (nunits
, group_size
, 3);
4028 nelts_to_build
= mask
.encoded_nelts ();
4029 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
4033 /* We need to construct a separate mask for each vector statement. */
4034 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
4035 if (!nunits
.is_constant (&const_nunits
)
4036 || !vf
.is_constant (&const_vf
))
4038 mask
.new_vector (const_nunits
, const_nunits
, 1);
4039 nelts_to_build
= const_vf
* group_size
;
4040 nvectors_per_build
= 1;
4043 unsigned int count
= mask
.encoded_nelts ();
4044 mask
.quick_grow (count
);
4045 vec_perm_indices indices
;
4047 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
4049 unsigned int iter_num
= j
/ group_size
;
4050 unsigned int stmt_num
= j
% group_size
;
4051 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
4052 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
4055 first_vec_index
= 0;
4060 /* Enforced before the loop when !repeating_p. */
4061 unsigned int const_nunits
= nunits
.to_constant ();
4062 vec_index
= i
/ const_nunits
;
4063 mask_element
= i
% const_nunits
;
4064 if (vec_index
== first_vec_index
4065 || first_vec_index
== -1)
4067 first_vec_index
= vec_index
;
4069 else if (vec_index
== second_vec_index
4070 || second_vec_index
== -1)
4072 second_vec_index
= vec_index
;
4073 mask_element
+= const_nunits
;
4077 if (dump_enabled_p ())
4078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4079 "permutation requires at "
4080 "least three vectors %G",
4082 gcc_assert (analyze_only
);
4086 gcc_assert (mask_element
< 2 * const_nunits
);
4089 if (mask_element
!= index
)
4091 mask
[index
++] = mask_element
;
4093 if (index
== count
&& !noop_p
)
4095 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
4096 if (!can_vec_perm_const_p (mode
, indices
))
4098 if (dump_enabled_p ())
4100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4102 "unsupported vect permute { ");
4103 for (i
= 0; i
< count
; ++i
)
4105 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
4106 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
4108 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
4110 gcc_assert (analyze_only
);
4121 tree mask_vec
= NULL_TREE
;
4124 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
4126 if (second_vec_index
== -1)
4127 second_vec_index
= first_vec_index
;
4129 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
4131 /* Generate the permute statement if necessary. */
4132 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
4133 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
4134 stmt_vec_info perm_stmt_info
;
4137 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4139 = vect_create_destination_var (gimple_assign_lhs (stmt
),
4141 perm_dest
= make_ssa_name (perm_dest
);
4143 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
4144 first_vec
, second_vec
,
4147 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
4151 /* If mask was NULL_TREE generate the requested
4152 identity transform. */
4153 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
4155 /* Store the vector statement in NODE. */
4156 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
4162 first_vec_index
= -1;
4163 second_vec_index
= -1;
4171 /* Vectorize SLP instance tree in postorder. */
4174 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
)
4176 gimple_stmt_iterator si
;
4177 stmt_vec_info stmt_info
;
4178 unsigned int group_size
;
4183 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4186 /* See if we have already vectorized the node in the graph of the
4188 if (SLP_TREE_VEC_STMTS (node
).exists ())
4191 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4192 vect_schedule_slp_instance (child
, instance
);
4194 /* Push SLP node def-type to stmts. */
4195 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4196 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4198 stmt_vec_info child_stmt_info
;
4199 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4200 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4203 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4205 /* VECTYPE is the type of the destination. */
4206 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4207 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4208 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
4210 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4211 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4213 if (dump_enabled_p ())
4214 dump_printf_loc (MSG_NOTE
, vect_location
,
4215 "------>vectorizing SLP node starting from: %G",
4218 /* Vectorized stmts go before the last scalar stmt which is where
4219 all uses are ready. */
4220 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4221 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4223 /* Handle two-operation SLP nodes by vectorizing the group with
4224 both operations and then performing a merge. */
4225 bool done_p
= false;
4226 if (SLP_TREE_TWO_OPERATORS (node
))
4228 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4229 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4230 enum tree_code ocode
= ERROR_MARK
;
4231 stmt_vec_info ostmt_info
;
4232 vec_perm_builder
mask (group_size
, group_size
, 1);
4233 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4235 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4236 if (gimple_assign_rhs_code (ostmt
) != code0
)
4238 mask
.quick_push (1);
4239 ocode
= gimple_assign_rhs_code (ostmt
);
4242 mask
.quick_push (0);
4244 if (ocode
!= ERROR_MARK
)
4246 vec
<stmt_vec_info
> v0
;
4247 vec
<stmt_vec_info
> v1
;
4249 tree tmask
= NULL_TREE
;
4250 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4251 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4252 SLP_TREE_VEC_STMTS (node
).truncate (0);
4253 gimple_assign_set_rhs_code (stmt
, ocode
);
4254 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4255 gimple_assign_set_rhs_code (stmt
, code0
);
4256 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4257 SLP_TREE_VEC_STMTS (node
).truncate (0);
4258 tree meltype
= build_nonstandard_integer_type
4259 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4260 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4262 for (j
= 0; j
< v0
.length (); ++j
)
4264 /* Enforced by vect_build_slp_tree, which rejects variable-length
4265 vectors for SLP_TREE_TWO_OPERATORS. */
4266 unsigned int const_nunits
= nunits
.to_constant ();
4267 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4268 for (l
= 0; l
< const_nunits
; ++l
)
4270 if (k
>= group_size
)
4272 tree t
= build_int_cst (meltype
,
4273 mask
[k
++] * const_nunits
+ l
);
4274 melts
.quick_push (t
);
4276 tmask
= melts
.build ();
4278 /* ??? Not all targets support a VEC_PERM_EXPR with a
4279 constant mask that would translate to a vec_merge RTX
4280 (with their vec_perm_const_ok). We can either not
4281 vectorize in that case or let veclower do its job.
4282 Unfortunately that isn't too great and at least for
4283 plus/minus we'd eventually like to match targets
4284 vector addsub instructions. */
4286 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4288 gimple_assign_lhs (v0
[j
]->stmt
),
4289 gimple_assign_lhs (v1
[j
]->stmt
),
4291 SLP_TREE_VEC_STMTS (node
).quick_push
4292 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4300 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4302 /* Restore stmt def-types. */
4303 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4304 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4306 stmt_vec_info child_stmt_info
;
4307 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4308 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4312 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4313 For loop vectorization this is done in vectorizable_call, but for SLP
4314 it needs to be deferred until end of vect_schedule_slp, because multiple
4315 SLP instances may refer to the same scalar stmt. */
4318 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4321 gimple_stmt_iterator gsi
;
4325 stmt_vec_info stmt_info
;
4327 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4330 if (visited
.add (node
))
4333 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4334 vect_remove_slp_scalar_calls (child
, visited
);
4336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4338 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4339 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4341 if (is_pattern_stmt_p (stmt_info
)
4342 || !PURE_SLP_STMT (stmt_info
))
4344 lhs
= gimple_call_lhs (stmt
);
4345 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4346 gsi
= gsi_for_stmt (stmt
);
4347 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4348 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4353 vect_remove_slp_scalar_calls (slp_tree node
)
4355 hash_set
<slp_tree
> visited
;
4356 vect_remove_slp_scalar_calls (node
, visited
);
4359 /* Vectorize the instance root. */
4362 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4364 gassign
*rstmt
= NULL
;
4366 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4368 stmt_vec_info child_stmt_info
;
4371 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4373 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4374 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4375 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4376 TREE_TYPE (vect_lhs
)))
4377 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4379 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4383 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4385 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4386 stmt_vec_info child_stmt_info
;
4388 vec
<constructor_elt
, va_gc
> *v
;
4389 vec_alloc (v
, nelts
);
4391 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4393 CONSTRUCTOR_APPEND_ELT (v
,
4395 gimple_get_lhs (child_stmt_info
->stmt
));
4397 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4398 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4399 tree r_constructor
= build_constructor (rtype
, v
);
4400 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4405 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4406 gsi_replace (&rgsi
, rstmt
, true);
4409 /* Generate vector code for all SLP instances in the loop/basic block. */
4412 vect_schedule_slp (vec_info
*vinfo
)
4414 vec
<slp_instance
> slp_instances
;
4415 slp_instance instance
;
4418 slp_instances
= vinfo
->slp_instances
;
4419 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4421 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4422 /* Schedule the tree of INSTANCE. */
4423 vect_schedule_slp_instance (node
, instance
);
4425 if (SLP_INSTANCE_ROOT_STMT (instance
))
4426 vectorize_slp_instance_root_stmt (node
, instance
);
4428 if (dump_enabled_p ())
4429 dump_printf_loc (MSG_NOTE
, vect_location
,
4430 "vectorizing stmts using SLP.\n");
4433 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4435 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4436 stmt_vec_info store_info
;
4439 /* Remove scalar call stmts. Do not do this for basic-block
4440 vectorization as not all uses may be vectorized.
4441 ??? Why should this be necessary? DCE should be able to
4442 remove the stmts itself.
4443 ??? For BB vectorization we can as well remove scalar
4444 stmts starting from the SLP tree root if they have no
4446 if (is_a
<loop_vec_info
> (vinfo
))
4447 vect_remove_slp_scalar_calls (root
);
4449 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4450 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4452 if (!STMT_VINFO_DATA_REF (store_info
))
4455 if (SLP_INSTANCE_ROOT_STMT (instance
))
4458 store_info
= vect_orig_stmt (store_info
);
4459 /* Free the attached stmt_vec_info and remove the stmt. */
4460 vinfo
->remove_stmt (store_info
);