1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
54 vect_free_slp_tree (slp_tree node
, bool final_p
)
59 if (--node
->refcnt
!= 0)
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
63 vect_free_slp_tree (child
, final_p
);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
71 stmt_vec_info stmt_info
;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
79 SLP_TREE_CHILDREN (node
).release ();
80 SLP_TREE_SCALAR_STMTS (node
).release ();
81 SLP_TREE_SCALAR_OPS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
128 SLP_TREE_CHILDREN (node
).create (nops
);
129 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
130 SLP_TREE_TWO_OPERATORS (node
) = false;
131 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
133 node
->max_nunits
= 1;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
149 node
= XNEW (struct _slp_tree
);
150 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_VEC_STMTS (node
).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
154 SLP_TREE_CHILDREN (node
) = vNULL
;
155 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
156 SLP_TREE_TWO_OPERATORS (node
) = false;
157 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
159 node
->max_nunits
= 1;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec
<stmt_vec_info
> def_stmts
;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
178 enum vect_def_type first_dt
;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 static vec
<slp_oprnd_info
>
186 vect_create_oprnd_info (int nops
, int group_size
)
189 slp_oprnd_info oprnd_info
;
190 vec
<slp_oprnd_info
> oprnds_info
;
192 oprnds_info
.create (nops
);
193 for (i
= 0; i
< nops
; i
++)
195 oprnd_info
= XNEW (struct _slp_oprnd_info
);
196 oprnd_info
->def_stmts
.create (group_size
);
197 oprnd_info
->ops
.create (group_size
);
198 oprnd_info
->first_dt
= vect_uninitialized_def
;
199 oprnd_info
->first_op_type
= NULL_TREE
;
200 oprnd_info
->any_pattern
= false;
201 oprnds_info
.quick_push (oprnd_info
);
208 /* Free operands info. */
211 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
214 slp_oprnd_info oprnd_info
;
216 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
218 oprnd_info
->def_stmts
.release ();
219 oprnd_info
->ops
.release ();
220 XDELETE (oprnd_info
);
223 oprnds_info
.release ();
227 /* Return true if STMTS contains a pattern statement. */
230 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
232 stmt_vec_info stmt_info
;
234 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
235 if (is_pattern_stmt_p (stmt_info
))
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
246 stmt_vec_info first_stmt_info
)
248 stmt_vec_info next_stmt_info
= first_stmt_info
;
251 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
256 if (next_stmt_info
== stmt_info
)
258 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
260 result
+= DR_GROUP_GAP (next_stmt_info
);
262 while (next_stmt_info
);
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
274 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
275 tree elt_type
, unsigned int *nvectors_out
,
276 tree
*vector_type_out
,
279 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
280 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
283 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
284 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
285 unsigned int nvectors
= 1;
288 scalar_int_mode int_mode
;
289 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
290 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type
= build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode
), 1);
296 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
298 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
300 GET_MODE_SIZE (base_vector_mode
)))
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
306 vec_perm_builder
sel1 (nelts
, 2, 3);
307 vec_perm_builder
sel2 (nelts
, 2, 3);
308 poly_int64 half_nelts
= exact_div (nelts
, 2);
309 for (unsigned int i
= 0; i
< 3; ++i
)
312 sel1
.quick_push (i
+ nelts
);
313 sel2
.quick_push (half_nelts
+ i
);
314 sel2
.quick_push (half_nelts
+ i
+ nelts
);
316 vec_perm_indices
indices1 (sel1
, 2, nelts
);
317 vec_perm_indices
indices2 (sel2
, 2, nelts
);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
322 *nvectors_out
= nvectors
;
324 *vector_type_out
= vector_type
;
327 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
329 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
336 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
355 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
356 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
357 vec
<slp_oprnd_info
> *oprnds_info
)
359 stmt_vec_info stmt_info
= stmts
[stmt_num
];
361 unsigned int i
, number_of_oprnds
;
362 enum vect_def_type dt
= vect_uninitialized_def
;
363 slp_oprnd_info oprnd_info
;
364 int first_op_idx
= 1;
365 unsigned int commutative_op
= -1U;
366 bool first_op_cond
= false;
367 bool first
= stmt_num
== 0;
369 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
371 number_of_oprnds
= gimple_call_num_args (stmt
);
373 if (gimple_call_internal_p (stmt
))
375 internal_fn ifn
= gimple_call_internal_fn (stmt
);
376 commutative_op
= first_commutative_argument (ifn
);
378 /* Masked load, only look at mask. */
379 if (ifn
== IFN_MASK_LOAD
)
381 number_of_oprnds
= 1;
382 /* Mask operand index. */
387 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
389 enum tree_code code
= gimple_assign_rhs_code (stmt
);
390 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code
== COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
396 first_op_cond
= true;
400 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
405 bool swapped
= (*swap
!= 0);
406 gcc_assert (!swapped
|| first_op_cond
);
407 for (i
= 0; i
< number_of_oprnds
; i
++)
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map
= maps
[*swap
];
417 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
418 first_op_idx
), map
[i
]);
420 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
423 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
424 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
425 oprnd
= TREE_OPERAND (oprnd
, 0);
427 oprnd_info
= (*oprnds_info
)[i
];
429 stmt_vec_info def_stmt_info
;
430 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
434 "Build SLP failed: can't analyze def for %T\n",
440 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
441 oprnd_info
->any_pattern
= true;
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info
)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
449 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
451 dt
= vect_reduction_def
;
452 oprnd_info
->first_dt
= dt
;
453 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type
= TREE_TYPE (oprnd
);
463 if ((oprnd_info
->first_dt
!= dt
464 && !(oprnd_info
->first_dt
== vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info
)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
468 && !STMT_VINFO_DATA_REF (def_stmt_info
)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
471 && !((oprnd_info
->first_dt
== vect_external_def
472 || oprnd_info
->first_dt
== vect_constant_def
)
473 && (dt
== vect_external_def
474 || dt
== vect_constant_def
)))
475 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
476 || (!STMT_VINFO_DATA_REF (stmt_info
)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
479 || STMT_VINFO_DATA_REF (def_stmt_info
)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
482 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
484 /* Try swapping operands if we got a mismatch. */
485 if (i
== commutative_op
&& !swapped
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "trying swapped operands\n");
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
496 "Build SLP failed: different types\n");
500 if ((dt
== vect_constant_def
501 || dt
== vect_external_def
)
502 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
503 && (TREE_CODE (type
) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd
);
515 /* Check the types of the definitions. */
518 case vect_external_def
:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info
->first_dt
= vect_external_def
;
522 case vect_constant_def
:
523 oprnd_info
->def_stmts
.quick_push (NULL
);
524 oprnd_info
->ops
.quick_push (oprnd
);
527 case vect_internal_def
:
528 case vect_reduction_def
:
529 if (oprnd_info
->first_dt
== vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info
)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
532 && !STMT_VINFO_DATA_REF (def_stmt_info
)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
541 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
542 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
546 case vect_induction_def
:
547 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
548 oprnd_info
->ops
.quick_push (oprnd
);
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
555 "Build SLP failed: illegal type of def %T\n",
565 if (dump_enabled_p ())
566 dump_printf_loc (MSG_NOTE
, vect_location
,
567 "swapped operands to match def types in %G",
575 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
576 Return true if we can, meaning that this choice doesn't conflict with
577 existing SLP nodes that use STMT_INFO. */
580 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
582 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
583 if (old_vectype
&& useless_type_conversion_p (vectype
, old_vectype
))
586 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
587 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
589 /* We maintain the invariant that if any statement in the group is
590 used, all other members of the group have the same vector type. */
591 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
592 stmt_vec_info member_info
= first_info
;
593 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
594 if (STMT_VINFO_NUM_SLP_USES (member_info
) > 0
595 || is_pattern_stmt_p (member_info
))
600 for (member_info
= first_info
; member_info
;
601 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
602 STMT_VINFO_VECTYPE (member_info
) = vectype
;
606 else if (STMT_VINFO_NUM_SLP_USES (stmt_info
) == 0
607 && !is_pattern_stmt_p (stmt_info
))
609 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
613 if (dump_enabled_p ())
615 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
616 "Build SLP failed: incompatible vector"
617 " types for: %G", stmt_info
->stmt
);
618 dump_printf_loc (MSG_NOTE
, vect_location
,
619 " old vector type: %T\n", old_vectype
);
620 dump_printf_loc (MSG_NOTE
, vect_location
,
621 " new vector type: %T\n", vectype
);
626 /* Try to infer and assign a vector type to all the statements in STMTS.
627 Used only for BB vectorization. */
630 vect_update_all_shared_vectypes (vec_info
*vinfo
, vec
<stmt_vec_info
> stmts
)
632 tree vectype
, nunits_vectype
;
633 if (!vect_get_vector_types_for_stmt (vinfo
, stmts
[0], &vectype
,
634 &nunits_vectype
, stmts
.length ()))
637 stmt_vec_info stmt_info
;
639 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
640 if (!vect_update_shared_vectype (stmt_info
, vectype
))
646 /* Return true if call statements CALL1 and CALL2 are similar enough
647 to be combined into the same SLP group. */
650 compatible_calls_p (gcall
*call1
, gcall
*call2
)
652 unsigned int nargs
= gimple_call_num_args (call1
);
653 if (nargs
!= gimple_call_num_args (call2
))
656 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
659 if (gimple_call_internal_p (call1
))
661 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
662 TREE_TYPE (gimple_call_lhs (call2
))))
664 for (unsigned int i
= 0; i
< nargs
; ++i
)
665 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
666 TREE_TYPE (gimple_call_arg (call2
, i
))))
671 if (!operand_equal_p (gimple_call_fn (call1
),
672 gimple_call_fn (call2
), 0))
675 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
681 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
682 caller's attempt to find the vector type in STMT_INFO with the narrowest
683 element type. Return true if VECTYPE is nonnull and if it is valid
684 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
685 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
686 vect_build_slp_tree. */
689 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
690 unsigned int group_size
,
691 tree vectype
, poly_uint64
*max_nunits
)
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
697 "Build SLP failed: unsupported data-type in %G\n",
699 /* Fatal mismatch. */
703 /* If populating the vector type requires unrolling then fail
704 before adjusting *max_nunits for basic-block vectorization. */
705 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
706 unsigned HOST_WIDE_INT const_nunits
;
707 if (is_a
<bb_vec_info
> (vinfo
)
708 && (!nunits
.is_constant (&const_nunits
)
709 || const_nunits
> group_size
))
711 if (dump_enabled_p ())
712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
713 "Build SLP failed: unrolling required "
714 "in basic block SLP\n");
715 /* Fatal mismatch. */
719 /* In case of multiple types we need to detect the smallest type. */
720 vect_update_max_nunits (max_nunits
, vectype
);
724 /* STMTS is a group of GROUP_SIZE SLP statements in which some
725 statements do the same operation as the first statement and in which
726 the others do ALT_STMT_CODE. Return true if we can take one vector
727 of the first operation and one vector of the second and permute them
728 to get the required result. VECTYPE is the type of the vector that
729 would be permuted. */
732 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
733 unsigned int group_size
, tree vectype
,
734 tree_code alt_stmt_code
)
736 unsigned HOST_WIDE_INT count
;
737 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
740 vec_perm_builder
sel (count
, count
, 1);
741 for (unsigned int i
= 0; i
< count
; ++i
)
743 unsigned int elt
= i
;
744 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
745 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
747 sel
.quick_push (elt
);
749 vec_perm_indices
indices (sel
, 2, count
);
750 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
753 /* Verify if the scalar stmts STMTS are isomorphic, require data
754 permutation or are of unsupported types of operation. Return
755 true if they are, otherwise return false and indicate in *MATCHES
756 which stmts are not isomorphic to the first one. If MATCHES[0]
757 is false then this indicates the comparison could not be
758 carried out or the stmts will never be vectorized by SLP.
760 Note COND_EXPR is possibly isomorphic to another one after swapping its
761 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
762 the first stmt by swapping the two operands of comparison; set SWAP[i]
763 to 2 if stmt I is isormorphic to the first stmt by inverting the code
764 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
765 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
768 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
769 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
770 poly_uint64
*max_nunits
, bool *matches
,
774 stmt_vec_info first_stmt_info
= stmts
[0];
775 enum tree_code first_stmt_code
= ERROR_MARK
;
776 enum tree_code alt_stmt_code
= ERROR_MARK
;
777 enum tree_code rhs_code
= ERROR_MARK
;
778 enum tree_code first_cond_code
= ERROR_MARK
;
780 bool need_same_oprnds
= false;
781 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
784 machine_mode optab_op2_mode
;
785 machine_mode vec_mode
;
786 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
789 /* For every stmt in NODE find its def stmt/s. */
790 stmt_vec_info stmt_info
;
791 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
793 gimple
*stmt
= stmt_info
->stmt
;
797 if (dump_enabled_p ())
798 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
800 /* Fail to vectorize statements marked as unvectorizable. */
801 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
803 if (dump_enabled_p ())
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
805 "Build SLP failed: unvectorizable statement %G",
807 /* Fatal mismatch. */
812 lhs
= gimple_get_lhs (stmt
);
813 if (lhs
== NULL_TREE
)
815 if (dump_enabled_p ())
816 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
817 "Build SLP failed: not GIMPLE_ASSIGN nor "
818 "GIMPLE_CALL %G", stmt
);
819 /* Fatal mismatch. */
825 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
826 &nunits_vectype
, group_size
)
828 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
829 nunits_vectype
, max_nunits
)))
831 /* Fatal mismatch. */
836 gcc_assert (vectype
);
838 if (is_a
<bb_vec_info
> (vinfo
)
839 && !vect_update_shared_vectype (stmt_info
, vectype
))
842 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
845 rhs_code
= CALL_EXPR
;
847 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
849 else if ((gimple_call_internal_p (call_stmt
)
850 && (!vectorizable_internal_fn_p
851 (gimple_call_internal_fn (call_stmt
))))
852 || gimple_call_tail_p (call_stmt
)
853 || gimple_call_noreturn_p (call_stmt
)
854 || !gimple_call_nothrow_p (call_stmt
)
855 || gimple_call_chain (call_stmt
))
857 if (dump_enabled_p ())
858 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
859 "Build SLP failed: unsupported call type %G",
861 /* Fatal mismatch. */
868 rhs_code
= gimple_assign_rhs_code (stmt
);
869 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
872 /* Check the operation. */
875 first_stmt_code
= rhs_code
;
877 /* Shift arguments should be equal in all the packed stmts for a
878 vector shift with scalar shift operand. */
879 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
880 || rhs_code
== LROTATE_EXPR
881 || rhs_code
== RROTATE_EXPR
)
883 vec_mode
= TYPE_MODE (vectype
);
885 /* First see if we have a vector/vector shift. */
886 optab
= optab_for_tree_code (rhs_code
, vectype
,
890 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
892 /* No vector/vector shift, try for a vector/scalar shift. */
893 optab
= optab_for_tree_code (rhs_code
, vectype
,
898 if (dump_enabled_p ())
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
900 "Build SLP failed: no optab.\n");
901 /* Fatal mismatch. */
905 icode
= (int) optab_handler (optab
, vec_mode
);
906 if (icode
== CODE_FOR_nothing
)
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
911 "op not supported by target.\n");
912 /* Fatal mismatch. */
916 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
917 if (!VECTOR_MODE_P (optab_op2_mode
))
919 need_same_oprnds
= true;
920 first_op1
= gimple_assign_rhs2 (stmt
);
924 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
926 need_same_oprnds
= true;
927 first_op1
= gimple_assign_rhs2 (stmt
);
930 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
932 need_same_oprnds
= true;
933 first_op1
= gimple_call_arg (call_stmt
, 1);
938 if (first_stmt_code
!= rhs_code
939 && alt_stmt_code
== ERROR_MARK
)
940 alt_stmt_code
= rhs_code
;
941 if (first_stmt_code
!= rhs_code
942 && (first_stmt_code
!= IMAGPART_EXPR
943 || rhs_code
!= REALPART_EXPR
)
944 && (first_stmt_code
!= REALPART_EXPR
945 || rhs_code
!= IMAGPART_EXPR
)
946 /* Handle mismatches in plus/minus by computing both
947 and merging the results. */
948 && !((first_stmt_code
== PLUS_EXPR
949 || first_stmt_code
== MINUS_EXPR
)
950 && (alt_stmt_code
== PLUS_EXPR
951 || alt_stmt_code
== MINUS_EXPR
)
952 && rhs_code
== alt_stmt_code
)
953 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
954 && (first_stmt_code
== ARRAY_REF
955 || first_stmt_code
== BIT_FIELD_REF
956 || first_stmt_code
== INDIRECT_REF
957 || first_stmt_code
== COMPONENT_REF
958 || first_stmt_code
== MEM_REF
)))
960 if (dump_enabled_p ())
962 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
963 "Build SLP failed: different operation "
965 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
966 "original stmt %G", first_stmt_info
->stmt
);
972 if (need_same_oprnds
)
974 tree other_op1
= (call_stmt
975 ? gimple_call_arg (call_stmt
, 1)
976 : gimple_assign_rhs2 (stmt
));
977 if (!operand_equal_p (first_op1
, other_op1
, 0))
979 if (dump_enabled_p ())
980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
981 "Build SLP failed: different shift "
982 "arguments in %G", stmt
);
988 if (!load_p
&& rhs_code
== CALL_EXPR
)
990 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
991 as_a
<gcall
*> (stmt
)))
993 if (dump_enabled_p ())
994 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
995 "Build SLP failed: different calls in %G",
1003 /* Grouped store or load. */
1004 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1006 if (REFERENCE_CLASS_P (lhs
))
1014 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1015 if (prev_first_load
)
1017 /* Check that there are no loads from different interleaving
1018 chains in the same node. */
1019 if (prev_first_load
!= first_load
)
1021 if (dump_enabled_p ())
1022 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1024 "Build SLP failed: different "
1025 "interleaving chains in one node %G",
1032 prev_first_load
= first_load
;
1034 } /* Grouped access. */
1039 /* Not grouped load. */
1040 if (dump_enabled_p ())
1041 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1042 "Build SLP failed: not grouped load %G", stmt
);
1044 /* FORNOW: Not grouped loads are not supported. */
1045 /* Fatal mismatch. */
1050 /* Not memory operation. */
1051 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
1052 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1053 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1054 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1055 && rhs_code
!= VIEW_CONVERT_EXPR
1056 && rhs_code
!= CALL_EXPR
)
1058 if (dump_enabled_p ())
1059 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1060 "Build SLP failed: operation unsupported %G",
1062 /* Fatal mismatch. */
1067 if (rhs_code
== COND_EXPR
)
1069 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1070 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1071 enum tree_code swap_code
= ERROR_MARK
;
1072 enum tree_code invert_code
= ERROR_MARK
;
1075 first_cond_code
= TREE_CODE (cond_expr
);
1076 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1078 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1079 swap_code
= swap_tree_comparison (cond_code
);
1080 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1083 if (first_cond_code
== cond_code
)
1085 /* Isomorphic can be achieved by swapping. */
1086 else if (first_cond_code
== swap_code
)
1088 /* Isomorphic can be achieved by inverting. */
1089 else if (first_cond_code
== invert_code
)
1093 if (dump_enabled_p ())
1094 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1095 "Build SLP failed: different"
1096 " operation %G", stmt
);
1106 for (i
= 0; i
< group_size
; ++i
)
1110 /* If we allowed a two-operation SLP node verify the target can cope
1111 with the permute we are going to use. */
1112 if (alt_stmt_code
!= ERROR_MARK
1113 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1115 if (!vect_two_operations_perm_ok_p (stmts
, group_size
,
1116 vectype
, alt_stmt_code
))
1118 for (i
= 0; i
< group_size
; ++i
)
1119 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1122 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1125 "Build SLP failed: different operation "
1126 "in stmt %G", stmts
[i
]->stmt
);
1127 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1128 "original stmt %G", first_stmt_info
->stmt
);
1133 *two_operators
= true;
1139 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1140 Note we never remove apart from at destruction time so we do not
1141 need a special value for deleted that differs from empty. */
1144 typedef vec
<stmt_vec_info
> value_type
;
1145 typedef vec
<stmt_vec_info
> compare_type
;
1146 static inline hashval_t
hash (value_type
);
1147 static inline bool equal (value_type existing
, value_type candidate
);
1148 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1149 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1150 static const bool empty_zero_p
= true;
1151 static inline void mark_empty (value_type
&x
) { x
.release (); }
1152 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1153 static inline void remove (value_type
&x
) { x
.release (); }
1156 bst_traits::hash (value_type x
)
1159 for (unsigned i
= 0; i
< x
.length (); ++i
)
1160 h
.add_int (gimple_uid (x
[i
]->stmt
));
1164 bst_traits::equal (value_type existing
, value_type candidate
)
1166 if (existing
.length () != candidate
.length ())
1168 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1169 if (existing
[i
] != candidate
[i
])
1174 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1175 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1176 scalar_stmts_to_slp_tree_map_t
;
1179 vect_build_slp_tree_2 (vec_info
*vinfo
,
1180 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1181 poly_uint64
*max_nunits
,
1182 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1183 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1186 vect_build_slp_tree (vec_info
*vinfo
,
1187 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1188 poly_uint64
*max_nunits
,
1189 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1190 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1192 if (slp_tree
*leader
= bst_map
->get (stmts
))
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1196 *leader
? "" : "failed ", *leader
);
1199 (*leader
)->refcnt
++;
1200 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1204 poly_uint64 this_max_nunits
= 1;
1205 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
,
1207 matches
, npermutes
, tree_size
, bst_map
);
1210 res
->max_nunits
= this_max_nunits
;
1211 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1212 /* Keep a reference for the bst_map use. */
1215 bst_map
->put (stmts
.copy (), res
);
1219 /* Recursively build an SLP tree starting from NODE.
1220 Fail (and return a value not equal to zero) if def-stmts are not
1221 isomorphic, require data permutation or are of unsupported types of
1222 operation. Otherwise, return 0.
1223 The value returned is the depth in the SLP tree where a mismatch
1227 vect_build_slp_tree_2 (vec_info
*vinfo
,
1228 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1229 poly_uint64
*max_nunits
,
1230 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1231 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1233 unsigned nops
, i
, this_tree_size
= 0;
1234 poly_uint64 this_max_nunits
= *max_nunits
;
1239 stmt_vec_info stmt_info
= stmts
[0];
1240 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1241 nops
= gimple_call_num_args (stmt
);
1242 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1244 nops
= gimple_num_ops (stmt
) - 1;
1245 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1248 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1253 /* If the SLP node is a PHI (induction or reduction), terminate
1255 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1257 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1258 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1259 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1263 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1264 /* Induction from different IVs is not supported. */
1265 if (def_type
== vect_induction_def
)
1267 stmt_vec_info other_info
;
1268 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1269 if (stmt_info
!= other_info
)
1272 else if (def_type
== vect_reduction_def
1273 || def_type
== vect_double_reduction_def
1274 || def_type
== vect_nested_cycle
)
1276 /* Else def types have to match. */
1277 stmt_vec_info other_info
;
1278 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1279 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1285 node
= vect_create_new_slp_node (stmts
);
1290 bool two_operators
= false;
1291 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1292 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1293 &this_max_nunits
, matches
, &two_operators
))
1296 /* If the SLP node is a load, terminate the recursion unless masked. */
1297 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1298 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1300 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1303 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1308 *max_nunits
= this_max_nunits
;
1310 node
= vect_create_new_slp_node (stmts
);
1311 /* And compute the load permutation. Whether it is actually
1312 a permutation depends on the unrolling factor which is
1314 vec
<unsigned> load_permutation
;
1316 stmt_vec_info load_info
;
1317 load_permutation
.create (group_size
);
1318 stmt_vec_info first_stmt_info
1319 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1320 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1322 int load_place
= vect_get_place_in_interleaving_chain
1323 (load_info
, first_stmt_info
);
1324 gcc_assert (load_place
!= -1);
1325 load_permutation
.safe_push (load_place
);
1327 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1332 /* Get at the operands, verifying they are compatible. */
1333 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1334 slp_oprnd_info oprnd_info
;
1335 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1337 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1338 stmts
, i
, &oprnds_info
);
1340 matches
[(res
== -1) ? 0 : i
] = false;
1344 for (i
= 0; i
< group_size
; ++i
)
1347 vect_free_oprnd_info (oprnds_info
);
1351 auto_vec
<slp_tree
, 4> children
;
1353 stmt_info
= stmts
[0];
1355 /* Create SLP_TREE nodes for the definition node/s. */
1356 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1359 unsigned old_tree_size
= this_tree_size
;
1362 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1364 /* COND_EXPR have one too many eventually if the condition
1366 gcc_assert (i
== 3 && nops
== 4);
1370 if (oprnd_info
->first_dt
!= vect_internal_def
1371 && oprnd_info
->first_dt
!= vect_reduction_def
1372 && oprnd_info
->first_dt
!= vect_induction_def
)
1374 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1375 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1376 oprnd_info
->ops
= vNULL
;
1377 children
.safe_push (invnode
);
1381 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1382 group_size
, &this_max_nunits
,
1384 &this_tree_size
, bst_map
)) != NULL
)
1386 /* If we have all children of a non-unary child built up from
1387 scalars then just throw that away and build it up this node
1389 if (is_a
<bb_vec_info
> (vinfo
)
1390 && SLP_TREE_CHILDREN (child
).length () > 1
1391 /* ??? Rejecting patterns this way doesn't work. We'd have to
1392 do extra work to cancel the pattern so the uses see the
1394 && !oprnd_info
->any_pattern
)
1396 slp_tree grandchild
;
1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1399 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1402 && vect_update_all_shared_vectypes (vinfo
,
1403 oprnd_info
->def_stmts
))
1406 this_tree_size
= old_tree_size
;
1407 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1408 vect_free_slp_tree (grandchild
, false);
1409 SLP_TREE_CHILDREN (child
).truncate (0);
1411 if (dump_enabled_p ())
1412 dump_printf_loc (MSG_NOTE
, vect_location
,
1413 "Building parent vector operands from "
1414 "scalars instead\n");
1415 oprnd_info
->def_stmts
= vNULL
;
1416 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1417 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1418 oprnd_info
->ops
= vNULL
;
1420 children
.safe_push (child
);
1425 oprnd_info
->def_stmts
= vNULL
;
1426 children
.safe_push (child
);
1430 /* If the SLP build failed fatally and we analyze a basic-block
1431 simply treat nodes we fail to build as externally defined
1432 (and thus build vectors from the scalar defs).
1433 The cost model will reject outright expensive cases.
1434 ??? This doesn't treat cases where permutation ultimatively
1435 fails (or we don't try permutation below). Ideally we'd
1436 even compute a permutation that will end up with the maximum
1438 if (is_a
<bb_vec_info
> (vinfo
)
1440 /* ??? Rejecting patterns this way doesn't work. We'd have to
1441 do extra work to cancel the pattern so the uses see the
1443 && !is_pattern_stmt_p (stmt_info
)
1444 && !oprnd_info
->any_pattern
1445 && vect_update_all_shared_vectypes (vinfo
, oprnd_info
->def_stmts
))
1447 if (dump_enabled_p ())
1448 dump_printf_loc (MSG_NOTE
, vect_location
,
1449 "Building vector operands from scalars\n");
1451 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1452 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1453 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1454 children
.safe_push (child
);
1455 oprnd_info
->ops
= vNULL
;
1456 oprnd_info
->def_stmts
= vNULL
;
1460 /* If the SLP build for operand zero failed and operand zero
1461 and one can be commutated try that for the scalar stmts
1462 that failed the match. */
1464 /* A first scalar stmt mismatch signals a fatal mismatch. */
1466 /* ??? For COND_EXPRs we can swap the comparison operands
1467 as well as the arms under some constraints. */
1469 && oprnds_info
[1]->first_dt
== vect_internal_def
1470 && is_gimple_assign (stmt_info
->stmt
)
1471 /* Swapping operands for reductions breaks assumptions later on. */
1472 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1473 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1474 /* Do so only if the number of not successful permutes was nor more
1475 than a cut-ff as re-trying the recursive match on
1476 possibly each level of the tree would expose exponential
1480 /* See whether we can swap the matching or the non-matching
1482 bool swap_not_matching
= true;
1485 for (j
= 0; j
< group_size
; ++j
)
1487 if (matches
[j
] != !swap_not_matching
)
1489 stmt_vec_info stmt_info
= stmts
[j
];
1490 /* Verify if we can swap operands of this stmt. */
1491 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1493 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1495 if (!swap_not_matching
)
1497 swap_not_matching
= false;
1502 while (j
!= group_size
);
1504 /* Swap mismatched definition stmts. */
1505 if (dump_enabled_p ())
1506 dump_printf_loc (MSG_NOTE
, vect_location
,
1507 "Re-trying with swapped operands of stmts ");
1508 for (j
= 0; j
< group_size
; ++j
)
1509 if (matches
[j
] == !swap_not_matching
)
1511 std::swap (oprnds_info
[0]->def_stmts
[j
],
1512 oprnds_info
[1]->def_stmts
[j
]);
1513 std::swap (oprnds_info
[0]->ops
[j
],
1514 oprnds_info
[1]->ops
[j
]);
1515 if (dump_enabled_p ())
1516 dump_printf (MSG_NOTE
, "%d ", j
);
1518 if (dump_enabled_p ())
1519 dump_printf (MSG_NOTE
, "\n");
1520 /* And try again with scratch 'matches' ... */
1521 bool *tem
= XALLOCAVEC (bool, group_size
);
1522 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1523 group_size
, &this_max_nunits
,
1525 &this_tree_size
, bst_map
)) != NULL
)
1527 /* If we have all children of a non-unary child built up from
1528 scalars then just throw that away and build it up this node
1530 if (is_a
<bb_vec_info
> (vinfo
)
1531 && SLP_TREE_CHILDREN (child
).length () > 1
1532 /* ??? Rejecting patterns this way doesn't work. We'd have
1533 to do extra work to cancel the pattern so the uses see the
1535 && !oprnd_info
->any_pattern
)
1538 slp_tree grandchild
;
1540 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1541 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1544 && (vect_update_all_shared_vectypes
1545 (vinfo
, oprnd_info
->def_stmts
)))
1548 this_tree_size
= old_tree_size
;
1549 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1550 vect_free_slp_tree (grandchild
, false);
1551 SLP_TREE_CHILDREN (child
).truncate (0);
1553 if (dump_enabled_p ())
1554 dump_printf_loc (MSG_NOTE
, vect_location
,
1555 "Building parent vector operands from "
1556 "scalars instead\n");
1557 oprnd_info
->def_stmts
= vNULL
;
1558 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1559 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1560 oprnd_info
->ops
= vNULL
;
1562 children
.safe_push (child
);
1567 oprnd_info
->def_stmts
= vNULL
;
1568 children
.safe_push (child
);
1576 gcc_assert (child
== NULL
);
1577 FOR_EACH_VEC_ELT (children
, j
, child
)
1578 vect_free_slp_tree (child
, false);
1579 vect_free_oprnd_info (oprnds_info
);
1583 vect_free_oprnd_info (oprnds_info
);
1585 *tree_size
+= this_tree_size
+ 1;
1586 *max_nunits
= this_max_nunits
;
1588 node
= vect_create_new_slp_node (stmts
);
1589 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1590 SLP_TREE_CHILDREN (node
).splice (children
);
1594 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1597 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1598 slp_tree node
, hash_set
<slp_tree
> &visited
)
1601 stmt_vec_info stmt_info
;
1605 if (visited
.add (node
))
1608 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1609 dump_user_location_t user_loc
= loc
.get_user_location ();
1610 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1611 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1613 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1616 estimated_poly_value (node
->max_nunits
), node
->refcnt
);
1617 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1618 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1619 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1622 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1624 dump_printf (metadata
, "%T%s ", op
,
1625 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1626 dump_printf (metadata
, "}\n");
1628 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1630 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1631 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1632 dump_printf (dump_kind
, " %u", j
);
1633 dump_printf (dump_kind
, " }\n");
1635 if (SLP_TREE_CHILDREN (node
).is_empty ())
1637 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1638 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1639 dump_printf (dump_kind
, " %p", (void *)child
);
1640 dump_printf (dump_kind
, "\n");
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1642 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1646 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1649 hash_set
<slp_tree
> visited
;
1650 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1653 /* Mark the tree rooted at NODE with PURE_SLP. */
1656 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1659 stmt_vec_info stmt_info
;
1662 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1665 if (visited
.add (node
))
1668 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1669 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1671 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1672 vect_mark_slp_stmts (child
, visited
);
1676 vect_mark_slp_stmts (slp_tree node
)
1678 hash_set
<slp_tree
> visited
;
1679 vect_mark_slp_stmts (node
, visited
);
1682 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1685 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1688 stmt_vec_info stmt_info
;
1691 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1694 if (visited
.add (node
))
1697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1699 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1700 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1701 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1704 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1705 vect_mark_slp_stmts_relevant (child
, visited
);
1709 vect_mark_slp_stmts_relevant (slp_tree node
)
1711 hash_set
<slp_tree
> visited
;
1712 vect_mark_slp_stmts_relevant (node
, visited
);
1715 /* Copy the SLP subtree rooted at NODE. */
1718 slp_copy_subtree (slp_tree node
, hash_map
<slp_tree
, slp_tree
> &map
)
1723 slp_tree
©_ref
= map
.get_or_insert (node
, &existed_p
);
1727 copy_ref
= XNEW (_slp_tree
);
1728 slp_tree copy
= copy_ref
;
1729 memcpy (copy
, node
, sizeof (_slp_tree
));
1730 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1732 SLP_TREE_SCALAR_STMTS (copy
) = SLP_TREE_SCALAR_STMTS (node
).copy ();
1733 stmt_vec_info stmt_info
;
1734 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1735 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
1737 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1738 SLP_TREE_SCALAR_OPS (copy
) = SLP_TREE_SCALAR_OPS (node
).copy ();
1739 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1740 SLP_TREE_LOAD_PERMUTATION (copy
) = SLP_TREE_LOAD_PERMUTATION (node
).copy ();
1741 if (SLP_TREE_CHILDREN (node
).exists ())
1742 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
).copy ();
1743 gcc_assert (!SLP_TREE_VEC_STMTS (node
).exists ());
1747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (copy
), i
, child
)
1749 SLP_TREE_CHILDREN (copy
)[i
] = slp_copy_subtree (child
, map
);
1750 SLP_TREE_CHILDREN (copy
)[i
]->refcnt
++;
1755 /* Rearrange the statements of NODE according to PERMUTATION. */
1758 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1759 vec
<unsigned> permutation
,
1760 hash_set
<slp_tree
> &visited
)
1765 if (visited
.add (node
))
1768 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1769 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1771 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1773 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1774 /* ??? Computation nodes are isomorphic and need no rearrangement.
1775 This is a quick hack to cover those where rearrangement breaks
1776 semantics because only the first stmt is guaranteed to have the
1777 correct operation code due to others being swapped or inverted. */
1778 stmt_vec_info first
= SLP_TREE_SCALAR_STMTS (node
)[0];
1779 if (is_gimple_assign (first
->stmt
)
1780 && gimple_assign_rhs_code (first
->stmt
) == COND_EXPR
)
1782 vec
<stmt_vec_info
> tmp_stmts
;
1783 tmp_stmts
.create (group_size
);
1784 tmp_stmts
.quick_grow (group_size
);
1785 stmt_vec_info stmt_info
;
1786 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1787 tmp_stmts
[permutation
[i
]] = stmt_info
;
1788 SLP_TREE_SCALAR_STMTS (node
).release ();
1789 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1791 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1793 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1795 tmp_ops
.create (group_size
);
1796 tmp_ops
.quick_grow (group_size
);
1798 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1799 tmp_ops
[permutation
[i
]] = op
;
1800 SLP_TREE_SCALAR_OPS (node
).release ();
1801 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1806 /* Attempt to reorder stmts in a reduction chain so that we don't
1807 require any load permutation. Return true if that was possible,
1808 otherwise return false. */
1811 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1815 slp_tree node
, load
;
1817 if (SLP_INSTANCE_LOADS (slp_instn
).is_empty ())
1820 /* Compare all the permutation sequences to the first one. We know
1821 that at least one load is permuted. */
1822 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1823 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1825 unsigned int group_size
= SLP_TREE_LOAD_PERMUTATION (node
).length ();
1826 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1828 if (!SLP_TREE_LOAD_PERMUTATION (load
).exists ()
1829 || SLP_TREE_LOAD_PERMUTATION (load
).length () != group_size
)
1831 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (load
), j
, lidx
)
1832 if (lidx
!= SLP_TREE_LOAD_PERMUTATION (node
)[j
])
1836 /* Check that the loads in the first sequence are different and there
1837 are no gaps between them. */
1838 auto_sbitmap
load_index (group_size
);
1839 bitmap_clear (load_index
);
1840 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1842 if (lidx
>= group_size
)
1844 if (bitmap_bit_p (load_index
, lidx
))
1847 bitmap_set_bit (load_index
, lidx
);
1849 for (i
= 0; i
< group_size
; i
++)
1850 if (!bitmap_bit_p (load_index
, i
))
1853 /* This permutation is valid for reduction. Since the order of the
1854 statements in the nodes is not important unless they are memory
1855 accesses, we can rearrange the statements in all the nodes
1856 according to the order of the loads. */
1858 /* We have to unshare the SLP tree we modify. */
1859 hash_map
<slp_tree
, slp_tree
> map
;
1860 slp_tree unshared
= slp_copy_subtree (SLP_INSTANCE_TREE (slp_instn
), map
);
1861 vect_free_slp_tree (SLP_INSTANCE_TREE (slp_instn
), false);
1863 SLP_INSTANCE_TREE (slp_instn
) = unshared
;
1864 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1865 SLP_INSTANCE_LOADS (slp_instn
)[i
] = *map
.get (node
);
1866 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1868 /* Do the actual re-arrangement. */
1869 hash_set
<slp_tree
> visited
;
1870 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1871 node
->load_permutation
, visited
);
1873 /* We are done, no actual permutations need to be generated. */
1874 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1875 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1877 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1878 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1879 /* But we have to keep those permutations that are required because
1880 of handling of gaps. */
1881 if (known_eq (unrolling_factor
, 1U)
1882 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1883 && DR_GROUP_GAP (first_stmt_info
) == 0))
1884 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1886 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1887 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1893 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1896 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
1897 hash_set
<slp_tree
> &visited
)
1899 if (visited
.add (node
))
1902 if (SLP_TREE_CHILDREN (node
).length () == 0)
1904 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1906 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1907 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1908 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1909 loads
.safe_push (node
);
1915 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1916 vect_gather_slp_loads (loads
, child
, visited
);
1921 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1923 hash_set
<slp_tree
> visited
;
1924 vect_gather_slp_loads (SLP_INSTANCE_LOADS (inst
), node
, visited
);
1928 /* Find the last store in SLP INSTANCE. */
1931 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1933 stmt_vec_info last
= NULL
;
1934 stmt_vec_info stmt_vinfo
;
1936 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1938 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1939 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1945 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1946 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1947 (also containing the first GROUP1_SIZE stmts, since stores are
1948 consecutive), the second containing the remainder.
1949 Return the first stmt in the second group. */
1951 static stmt_vec_info
1952 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1954 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1955 gcc_assert (group1_size
> 0);
1956 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1957 gcc_assert (group2_size
> 0);
1958 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1960 stmt_vec_info stmt_info
= first_vinfo
;
1961 for (unsigned i
= group1_size
; i
> 1; i
--)
1963 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1964 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1966 /* STMT is now the last element of the first group. */
1967 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1968 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1970 DR_GROUP_SIZE (group2
) = group2_size
;
1971 for (stmt_info
= group2
; stmt_info
;
1972 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1974 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1975 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1978 /* For the second group, the DR_GROUP_GAP is that before the original group,
1979 plus skipping over the first vector. */
1980 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1982 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1983 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1985 if (dump_enabled_p ())
1986 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1987 group1_size
, group2_size
);
1992 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1993 statements and a vector of NUNITS elements. */
1996 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1998 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2001 /* Analyze an SLP instance starting from a group of grouped stores. Call
2002 vect_build_slp_tree to build a tree of packed stmts if possible.
2003 Return FALSE if it's impossible to SLP any stmt in the loop. */
2006 vect_analyze_slp_instance (vec_info
*vinfo
,
2007 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2008 stmt_vec_info stmt_info
, unsigned max_tree_size
)
2010 slp_instance new_instance
;
2012 unsigned int group_size
;
2013 tree vectype
, scalar_type
= NULL_TREE
;
2015 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2016 vec
<stmt_vec_info
> scalar_stmts
;
2017 bool constructor
= false;
2019 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2021 scalar_type
= TREE_TYPE (DR_REF (dr
));
2022 group_size
= DR_GROUP_SIZE (stmt_info
);
2023 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
2025 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2027 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2028 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2029 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2031 else if (is_gimple_assign (stmt_info
->stmt
)
2032 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2034 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2035 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2040 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2041 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2042 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2047 if (dump_enabled_p ())
2048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2049 "Build SLP failed: unsupported data-type %T\n",
2054 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2056 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2057 scalar_stmts
.create (group_size
);
2058 stmt_vec_info next_info
= stmt_info
;
2059 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2061 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2064 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2065 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2068 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2070 /* Collect the reduction stmts and store them in
2071 SLP_TREE_SCALAR_STMTS. */
2074 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2075 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2077 /* Mark the first element of the reduction chain as reduction to properly
2078 transform the node. In the reduction analysis phase only the last
2079 element of the chain is marked as reduction. */
2080 STMT_VINFO_DEF_TYPE (stmt_info
)
2081 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2082 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2083 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2085 else if (constructor
)
2087 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2089 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2091 if (TREE_CODE (val
) == SSA_NAME
)
2093 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2094 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2095 /* Value is defined in another basic block. */
2098 def_info
= vect_stmt_to_vectorize (def_info
);
2099 scalar_stmts
.safe_push (def_info
);
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_NOTE
, vect_location
,
2106 "Analyzing vectorizable constructor: %G\n",
2111 /* Collect reduction statements. */
2112 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2113 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2114 scalar_stmts
.safe_push (next_info
);
2117 /* Build the tree for the SLP instance. */
2118 bool *matches
= XALLOCAVEC (bool, group_size
);
2119 unsigned npermutes
= 0;
2120 poly_uint64 max_nunits
= nunits
;
2121 unsigned tree_size
= 0;
2122 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2123 &max_nunits
, matches
, &npermutes
,
2124 &tree_size
, bst_map
);
2127 /* Calculate the unrolling factor based on the smallest type. */
2128 poly_uint64 unrolling_factor
2129 = calculate_unrolling_factor (max_nunits
, group_size
);
2131 if (maybe_ne (unrolling_factor
, 1U)
2132 && is_a
<bb_vec_info
> (vinfo
))
2134 unsigned HOST_WIDE_INT const_max_nunits
;
2135 if (!max_nunits
.is_constant (&const_max_nunits
)
2136 || const_max_nunits
> group_size
)
2138 if (dump_enabled_p ())
2139 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2140 "Build SLP failed: store group "
2141 "size not a multiple of the vector size "
2142 "in basic block SLP\n");
2143 vect_free_slp_tree (node
, false);
2146 /* Fatal mismatch. */
2147 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2148 vect_free_slp_tree (node
, false);
2152 /* Create a new SLP instance. */
2153 new_instance
= XNEW (class _slp_instance
);
2154 SLP_INSTANCE_TREE (new_instance
) = node
;
2155 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2156 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2157 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2159 vect_gather_slp_loads (new_instance
, node
);
2160 if (dump_enabled_p ())
2161 dump_printf_loc (MSG_NOTE
, vect_location
,
2162 "SLP size %u vs. limit %u.\n",
2163 tree_size
, max_tree_size
);
2165 /* Check whether any load is possibly permuted. */
2167 bool loads_permuted
= false;
2168 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2170 if (!SLP_TREE_LOAD_PERMUTATION (load_node
).exists ())
2173 stmt_vec_info load_info
;
2174 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2175 if (SLP_TREE_LOAD_PERMUTATION (load_node
)[j
] != j
)
2177 loads_permuted
= true;
2182 /* If the loads and stores can be handled with load/store-lane
2183 instructions do not generate this SLP instance. */
2184 if (is_a
<loop_vec_info
> (vinfo
)
2186 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2189 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2191 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2192 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2193 /* Use SLP for strided accesses (or if we can't load-lanes). */
2194 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2195 || ! vect_load_lanes_supported
2196 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2197 DR_GROUP_SIZE (stmt_vinfo
), false))
2200 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2202 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2204 "Built SLP cancelled: can use "
2205 "load/store-lanes\n");
2206 vect_free_slp_instance (new_instance
, false);
2211 /* If this is a reduction chain with a conversion in front
2212 amend the SLP tree with a node for that. */
2214 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2215 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2217 /* Get at the conversion stmt - we know it's the single use
2218 of the last stmt of the reduction chain. */
2219 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2220 use_operand_p use_p
;
2222 bool r
= single_imm_use (gimple_assign_lhs (tem
),
2225 next_info
= vinfo
->lookup_stmt (use_stmt
);
2226 next_info
= vect_stmt_to_vectorize (next_info
);
2227 scalar_stmts
= vNULL
;
2228 scalar_stmts
.create (group_size
);
2229 for (unsigned i
= 0; i
< group_size
; ++i
)
2230 scalar_stmts
.quick_push (next_info
);
2231 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2232 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2233 SLP_INSTANCE_TREE (new_instance
) = conv
;
2234 /* We also have to fake this conversion stmt as SLP reduction
2235 group so we don't have to mess with too much code
2237 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2238 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2241 vinfo
->slp_instances
.safe_push (new_instance
);
2243 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2244 the number of scalar stmts in the root in a few places.
2245 Verify that assumption holds. */
2246 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2247 .length () == group_size
);
2249 if (dump_enabled_p ())
2251 dump_printf_loc (MSG_NOTE
, vect_location
,
2252 "Final SLP tree for instance:\n");
2253 vect_print_slp_tree (MSG_NOTE
, vect_location
,
2254 SLP_INSTANCE_TREE (new_instance
));
2262 /* Failed to SLP. */
2263 /* Free the allocated memory. */
2264 scalar_stmts
.release ();
2267 /* For basic block SLP, try to break the group up into multiples of the
2269 unsigned HOST_WIDE_INT const_nunits
;
2270 if (is_a
<bb_vec_info
> (vinfo
)
2271 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2272 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2273 && nunits
.is_constant (&const_nunits
))
2275 /* We consider breaking the group only on VF boundaries from the existing
2277 for (i
= 0; i
< group_size
; i
++)
2278 if (!matches
[i
]) break;
2280 if (i
>= const_nunits
&& i
< group_size
)
2282 /* Split into two groups at the first vector boundary before i. */
2283 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2284 unsigned group1_size
= i
& ~(const_nunits
- 1);
2286 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2288 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2290 /* If the first non-match was in the middle of a vector,
2291 skip the rest of that vector. */
2292 if (group1_size
< i
)
2294 i
= group1_size
+ const_nunits
;
2296 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2299 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2300 rest
, max_tree_size
);
2303 /* Even though the first vector did not all match, we might be able to SLP
2304 (some) of the remainder. FORNOW ignore this possibility. */
2311 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2312 trees of packed scalar stmts if SLP is possible. */
2315 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2318 stmt_vec_info first_element
;
2320 DUMP_VECT_SCOPE ("vect_analyze_slp");
2322 scalar_stmts_to_slp_tree_map_t
*bst_map
2323 = new scalar_stmts_to_slp_tree_map_t ();
2325 /* Find SLP sequences starting from groups of grouped stores. */
2326 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2327 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
, max_tree_size
);
2329 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2331 if (loop_vinfo
->reduction_chains
.length () > 0)
2333 /* Find SLP sequences starting from reduction chains. */
2334 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2335 if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2338 /* Dissolve reduction chain group. */
2339 stmt_vec_info vinfo
= first_element
;
2340 stmt_vec_info last
= NULL
;
2343 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2344 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2345 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2349 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2350 /* It can be still vectorized as part of an SLP reduction. */
2351 loop_vinfo
->reductions
.safe_push (last
);
2355 /* Find SLP sequences starting from groups of reductions. */
2356 if (loop_vinfo
->reductions
.length () > 1)
2357 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2361 /* The map keeps a reference on SLP nodes built, release that. */
2362 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2363 it
!= bst_map
->end (); ++it
)
2365 vect_free_slp_tree ((*it
).second
, false);
2368 /* Optimize permutations in SLP reductions. */
2369 slp_instance instance
;
2370 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2372 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2373 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2374 /* Reduction (there are no data-refs in the root).
2375 In reduction chain the order of the loads is not important. */
2376 if (!STMT_VINFO_DATA_REF (stmt_info
)
2377 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2378 vect_attempt_slp_rearrange_stmts (instance
);
2381 /* Gather all loads in the SLP graph. */
2382 hash_set
<slp_tree
> visited
;
2383 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
2384 vect_gather_slp_loads (vinfo
->slp_loads
, SLP_INSTANCE_TREE (instance
),
2387 return opt_result::success ();
2391 vect_optimize_slp (vec_info
*vinfo
)
2395 FOR_EACH_VEC_ELT (vinfo
->slp_loads
, i
, node
)
2397 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2400 /* In basic block vectorization we allow any subchain of an interleaving
2402 FORNOW: not in loop SLP because of realignment complications. */
2403 if (is_a
<bb_vec_info
> (vinfo
))
2405 bool subchain_p
= true;
2406 stmt_vec_info next_load_info
= NULL
;
2407 stmt_vec_info load_info
;
2409 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2412 && (next_load_info
!= load_info
2413 || DR_GROUP_GAP (load_info
) != 1))
2418 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
2422 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2428 stmt_vec_info load_info
;
2429 bool this_load_permuted
= false;
2431 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
2432 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
2434 this_load_permuted
= true;
2437 stmt_vec_info first_stmt_info
2438 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
2439 if (!this_load_permuted
2440 /* The load requires permutation when unrolling exposes
2441 a gap either because the group is larger than the SLP
2442 group-size or because there is a gap between the groups. */
2443 && (known_eq (LOOP_VINFO_VECT_FACTOR (as_a
<loop_vec_info
> (vinfo
)), 1U)
2444 || ((SLP_TREE_SCALAR_STMTS (node
).length ()
2445 == DR_GROUP_SIZE (first_stmt_info
))
2446 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2448 SLP_TREE_LOAD_PERMUTATION (node
).release ();
2456 /* For each possible SLP instance decide whether to SLP it and calculate overall
2457 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2458 least one instance. */
2461 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2464 poly_uint64 unrolling_factor
= 1;
2465 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2466 slp_instance instance
;
2467 int decided_to_slp
= 0;
2469 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2471 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2473 /* FORNOW: SLP if you can. */
2474 /* All unroll factors have the form:
2476 GET_MODE_SIZE (vinfo->vector_mode) * X
2478 for some rational X, so they must have a common multiple. */
2480 = force_common_multiple (unrolling_factor
,
2481 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2483 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2484 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2485 loop-based vectorization. Such stmts will be marked as HYBRID. */
2486 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2490 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2492 if (decided_to_slp
&& dump_enabled_p ())
2494 dump_printf_loc (MSG_NOTE
, vect_location
,
2495 "Decided to SLP %d instances. Unrolling factor ",
2497 dump_dec (MSG_NOTE
, unrolling_factor
);
2498 dump_printf (MSG_NOTE
, "\n");
2501 return (decided_to_slp
> 0);
2504 /* Private data for vect_detect_hybrid_slp. */
2507 loop_vec_info loop_vinfo
;
2508 vec
<stmt_vec_info
> *worklist
;
2511 /* Walker for walk_gimple_op. */
2514 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
2516 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2517 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
2522 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
2525 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
2526 if (PURE_SLP_STMT (def_stmt_info
))
2528 if (dump_enabled_p ())
2529 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2530 def_stmt_info
->stmt
);
2531 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2532 dat
->worklist
->safe_push (def_stmt_info
);
2538 /* Find stmts that must be both vectorized and SLPed. */
2541 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2543 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2545 /* All stmts participating in SLP are marked pure_slp, all other
2546 stmts are loop_vect.
2547 First collect all loop_vect stmts into a worklist. */
2548 auto_vec
<stmt_vec_info
> worklist
;
2549 for (unsigned i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2551 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2552 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
2555 gphi
*phi
= gsi
.phi ();
2556 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
2557 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2558 worklist
.safe_push (stmt_info
);
2560 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2563 gimple
*stmt
= gsi_stmt (gsi
);
2564 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2565 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2567 for (gimple_stmt_iterator gsi2
2568 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
2569 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
2571 stmt_vec_info patt_info
2572 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
2573 if (!STMT_SLP_TYPE (patt_info
)
2574 && STMT_VINFO_RELEVANT (patt_info
))
2575 worklist
.safe_push (patt_info
);
2577 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
2579 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
2580 worklist
.safe_push (stmt_info
);
2584 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
2585 mark any SLP vectorized stmt as hybrid.
2586 ??? We're visiting def stmts N times (once for each non-SLP and
2587 once for each hybrid-SLP use). */
2590 dat
.worklist
= &worklist
;
2591 dat
.loop_vinfo
= loop_vinfo
;
2592 memset (&wi
, 0, sizeof (wi
));
2593 wi
.info
= (void *)&dat
;
2594 while (!worklist
.is_empty ())
2596 stmt_vec_info stmt_info
= worklist
.pop ();
2597 /* Since SSA operands are not set up for pattern stmts we need
2598 to use walk_gimple_op. */
2600 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
2605 /* Initialize a bb_vec_info struct for the statements between
2606 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2608 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2609 gimple_stmt_iterator region_end_in
,
2610 vec_info_shared
*shared
)
2611 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2612 bb (gsi_bb (region_begin_in
)),
2613 region_begin (region_begin_in
),
2614 region_end (region_end_in
)
2616 gimple_stmt_iterator gsi
;
2618 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2621 gimple
*stmt
= gsi_stmt (gsi
);
2622 gimple_set_uid (stmt
, 0);
2630 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2631 stmts in the basic block. */
2633 _bb_vec_info::~_bb_vec_info ()
2635 for (gimple_stmt_iterator si
= region_begin
;
2636 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2637 /* Reset region marker. */
2638 gimple_set_uid (gsi_stmt (si
), -1);
2643 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2644 given then that child nodes have already been processed, and that
2645 their def types currently match their SLP node's def type. */
2648 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2649 slp_instance node_instance
,
2650 stmt_vector_for_cost
*cost_vec
)
2652 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2653 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2655 /* Calculate the number of vector statements to be created for the
2656 scalar stmts in this node. For SLP reductions it is equal to the
2657 number of vector statements in the children (which has already been
2658 calculated by the recursive call). Otherwise it is the number of
2659 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2660 VF divided by the number of elements in a vector. */
2661 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2662 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2664 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2665 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2667 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2668 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2675 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2676 vf
= loop_vinfo
->vectorization_factor
;
2679 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2680 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2681 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2682 = vect_get_num_vectors (vf
* group_size
, vectype
);
2686 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
2687 node
, node_instance
, cost_vec
);
2690 /* Try to build NODE from scalars, returning true on success.
2691 NODE_INSTANCE is the SLP instance that contains NODE. */
2694 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
2695 slp_instance node_instance
)
2697 stmt_vec_info stmt_info
;
2700 if (!is_a
<bb_vec_info
> (vinfo
)
2701 || node
== SLP_INSTANCE_TREE (node_instance
)
2702 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
2705 if (dump_enabled_p ())
2706 dump_printf_loc (MSG_NOTE
, vect_location
,
2707 "Building vector operands from scalars instead\n");
2709 /* Don't remove and free the child nodes here, since they could be
2710 referenced by other structures. The analysis and scheduling phases
2711 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2712 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
2713 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
2714 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
);
2715 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2717 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
2718 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
2723 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2724 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2726 Return true if the operations are supported. */
2729 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2730 slp_instance node_instance
,
2731 hash_set
<slp_tree
> &visited
,
2732 hash_set
<slp_tree
> &lvisited
,
2733 stmt_vector_for_cost
*cost_vec
)
2738 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2741 /* If we already analyzed the exact same set of scalar stmts we're done.
2742 We share the generated vector stmts for those.
2743 The SLP graph is acyclic so not caching whether we failed or succeeded
2744 doesn't result in any issue since we throw away the lvisited set
2746 if (visited
.contains (node
)
2747 || lvisited
.add (node
))
2750 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2751 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2752 visited
, lvisited
, cost_vec
))
2755 /* ??? We have to catch the case late where two first scalar stmts appear
2756 in multiple SLP children with different def type and fail. Remember
2757 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2758 match it when that is vect_internal_def. */
2759 auto_vec
<vect_def_type
, 4> dt
;
2760 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2762 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2763 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2765 /* Push SLP node def-type to stmt operands. */
2766 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2767 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2768 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2769 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2770 = SLP_TREE_DEF_TYPE (child
);
2772 /* Check everything worked out. */
2774 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2775 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2777 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2779 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2780 != SLP_TREE_DEF_TYPE (child
))
2783 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2787 if (!res
&& dump_enabled_p ())
2788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2789 "not vectorized: same operand with different "
2790 "def type in stmt.\n");
2793 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2796 /* Restore def-types. */
2797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2798 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2799 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2801 /* If this node can't be vectorized, try pruning the tree here rather
2802 than felling the whole thing. */
2803 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
2810 /* Analyze statements in SLP instances of VINFO. Return true if the
2811 operations are supported. */
2814 vect_slp_analyze_operations (vec_info
*vinfo
)
2816 slp_instance instance
;
2819 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2821 hash_set
<slp_tree
> visited
;
2822 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2824 hash_set
<slp_tree
> lvisited
;
2825 stmt_vector_for_cost cost_vec
;
2826 cost_vec
.create (2);
2827 if (!vect_slp_analyze_node_operations (vinfo
,
2828 SLP_INSTANCE_TREE (instance
),
2829 instance
, visited
, lvisited
,
2831 /* Instances with a root stmt require vectorized defs for the
2833 || (SLP_INSTANCE_ROOT_STMT (instance
)
2834 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
2835 != vect_internal_def
)))
2837 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2838 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2839 if (dump_enabled_p ())
2840 dump_printf_loc (MSG_NOTE
, vect_location
,
2841 "removing SLP instance operations starting from: %G",
2843 vect_free_slp_instance (instance
, false);
2844 vinfo
->slp_instances
.ordered_remove (i
);
2845 cost_vec
.release ();
2849 for (hash_set
<slp_tree
>::iterator x
= lvisited
.begin();
2850 x
!= lvisited
.end(); ++x
)
2854 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
2855 cost_vec
.release ();
2859 return !vinfo
->slp_instances
.is_empty ();
2863 /* Compute the scalar cost of the SLP node NODE and its children
2864 and return it. Do not account defs that are marked in LIFE and
2865 update LIFE according to uses of NODE. */
2868 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
2869 slp_tree node
, vec
<bool, va_heap
> *life
,
2870 stmt_vector_for_cost
*cost_vec
,
2871 hash_set
<slp_tree
> &visited
)
2874 stmt_vec_info stmt_info
;
2877 if (visited
.add (node
))
2880 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2882 gimple
*stmt
= stmt_info
->stmt
;
2883 ssa_op_iter op_iter
;
2884 def_operand_p def_p
;
2889 /* If there is a non-vectorized use of the defs then the scalar
2890 stmt is kept live in which case we do not account it or any
2891 required defs in the SLP children in the scalar cost. This
2892 way we make the vectorization more costly when compared to
2894 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2896 imm_use_iterator use_iter
;
2898 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2899 if (!is_gimple_debug (use_stmt
))
2901 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2902 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2905 BREAK_FROM_IMM_USE_STMT (use_iter
);
2912 /* Count scalar stmts only once. */
2913 if (gimple_visited_p (stmt
))
2915 gimple_set_visited (stmt
, true);
2917 vect_cost_for_stmt kind
;
2918 if (STMT_VINFO_DATA_REF (stmt_info
))
2920 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2923 kind
= scalar_store
;
2925 else if (vect_nop_conversion_p (stmt_info
))
2929 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2932 auto_vec
<bool, 20> subtree_life
;
2933 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2935 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2937 /* Do not directly pass LIFE to the recursive call, copy it to
2938 confine changes in the callee to the current child/subtree. */
2939 subtree_life
.safe_splice (*life
);
2940 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
2942 subtree_life
.truncate (0);
2947 /* Check if vectorization of the basic block is profitable. */
2950 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2952 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2953 slp_instance instance
;
2955 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2956 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2958 /* Calculate scalar cost. */
2959 stmt_vector_for_cost scalar_costs
;
2960 scalar_costs
.create (0);
2961 hash_set
<slp_tree
> visited
;
2962 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2964 auto_vec
<bool, 20> life
;
2965 life
.safe_grow_cleared
2966 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance
)).length ());
2967 vect_bb_slp_scalar_cost (bb_vinfo
,
2968 SLP_INSTANCE_TREE (instance
),
2969 &life
, &scalar_costs
, visited
);
2971 void *target_cost_data
= init_cost (NULL
);
2972 add_stmt_costs (bb_vinfo
, target_cost_data
, &scalar_costs
);
2973 scalar_costs
.release ();
2975 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2976 destroy_cost_data (target_cost_data
);
2978 /* Unset visited flag. */
2979 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2980 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2981 gimple_set_visited (gsi_stmt (gsi
), false);
2983 /* Complete the target-specific cost calculation. */
2984 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2985 &vec_inside_cost
, &vec_epilogue_cost
);
2987 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2989 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2992 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2994 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2995 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2996 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2999 /* Vectorization is profitable if its cost is more than the cost of scalar
3000 version. Note that we err on the vector side for equal cost because
3001 the cost estimate is otherwise quite pessimistic (constant uses are
3002 free on the scalar side but cost a load on the vector side for
3004 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
3010 /* Find any vectorizable constructors and add them to the grouped_store
3014 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
3016 gimple_stmt_iterator gsi
;
3018 for (gsi
= bb_vinfo
->region_begin
;
3019 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3021 gassign
*stmt
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
3022 if (!stmt
|| gimple_assign_rhs_code (stmt
) != CONSTRUCTOR
)
3025 tree rhs
= gimple_assign_rhs1 (stmt
);
3026 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
3027 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
3028 CONSTRUCTOR_NELTS (rhs
))
3029 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
3030 || uniform_vector_p (rhs
))
3033 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3034 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3038 /* Check if the region described by BB_VINFO can be vectorized, returning
3039 true if so. When returning false, set FATAL to true if the same failure
3040 would prevent vectorization at other vector sizes, false if it is still
3041 worth trying other sizes. N_STMTS is the number of statements in the
3045 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3047 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3049 slp_instance instance
;
3051 poly_uint64 min_vf
= 2;
3053 /* The first group of checks is independent of the vector size. */
3056 /* Analyze the data references. */
3058 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3060 if (dump_enabled_p ())
3061 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3062 "not vectorized: unhandled data-ref in basic "
3067 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3069 if (dump_enabled_p ())
3070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3071 "not vectorized: not enough data-refs in "
3076 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3080 "not vectorized: unhandled data access in "
3085 vect_slp_check_for_constructors (bb_vinfo
);
3087 /* If there are no grouped stores in the region there is no need
3088 to continue with pattern recog as vect_analyze_slp will fail
3090 if (bb_vinfo
->grouped_stores
.is_empty ())
3092 if (dump_enabled_p ())
3093 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3094 "not vectorized: no grouped stores in "
3099 /* While the rest of the analysis below depends on it in some way. */
3102 vect_pattern_recog (bb_vinfo
);
3104 /* Check the SLP opportunities in the basic block, analyze and build SLP
3106 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3108 if (dump_enabled_p ())
3110 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3111 "Failed to SLP the basic block.\n");
3112 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3113 "not vectorized: failed to find SLP opportunities "
3114 "in basic block.\n");
3119 /* Optimize permutations. */
3120 vect_optimize_slp (bb_vinfo
);
3122 vect_record_base_alignments (bb_vinfo
);
3124 /* Analyze and verify the alignment of data references and the
3125 dependence in the SLP instances. */
3126 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3128 if (! vect_slp_analyze_and_verify_instance_alignment (bb_vinfo
, instance
)
3129 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
3131 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3132 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3133 if (dump_enabled_p ())
3134 dump_printf_loc (MSG_NOTE
, vect_location
,
3135 "removing SLP instance operations starting from: %G",
3137 vect_free_slp_instance (instance
, false);
3138 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3142 /* Mark all the statements that we want to vectorize as pure SLP and
3144 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3145 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3146 if (SLP_INSTANCE_ROOT_STMT (instance
))
3147 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3151 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3154 if (!vect_slp_analyze_operations (bb_vinfo
))
3156 if (dump_enabled_p ())
3157 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3158 "not vectorized: bad operation in basic block.\n");
3162 /* Cost model: check if the vectorization is worthwhile. */
3163 if (!unlimited_cost_model (NULL
)
3164 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3166 if (dump_enabled_p ())
3167 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3168 "not vectorized: vectorization is not "
3173 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_NOTE
, vect_location
,
3175 "Basic block will be vectorized using SLP\n");
3179 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3180 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3181 on success. The region has N_STMTS statements and has the datarefs
3182 given by DATAREFS. */
3185 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3186 gimple_stmt_iterator region_end
,
3187 vec
<data_reference_p
> datarefs
,
3188 unsigned int n_stmts
)
3190 bb_vec_info bb_vinfo
;
3191 auto_vector_modes vector_modes
;
3193 /* Autodetect first vector size we try. */
3194 machine_mode next_vector_mode
= VOIDmode
;
3195 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3196 unsigned int mode_i
= 0;
3198 vec_info_shared shared
;
3200 machine_mode autodetected_vector_mode
= VOIDmode
;
3203 bool vectorized
= false;
3205 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3207 bool first_time_p
= shared
.datarefs
.is_empty ();
3208 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3210 bb_vinfo
->shared
->save_datarefs ();
3212 bb_vinfo
->shared
->check_datarefs ();
3213 bb_vinfo
->vector_mode
= next_vector_mode
;
3215 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3216 && dbg_cnt (vect_slp
))
3218 if (dump_enabled_p ())
3220 dump_printf_loc (MSG_NOTE
, vect_location
,
3221 "***** Analysis succeeded with vector mode"
3222 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3223 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3226 bb_vinfo
->shared
->check_datarefs ();
3227 vect_schedule_slp (bb_vinfo
);
3229 unsigned HOST_WIDE_INT bytes
;
3230 if (dump_enabled_p ())
3232 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3233 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3234 "basic block part vectorized using %wu byte "
3235 "vectors\n", bytes
);
3237 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3238 "basic block part vectorized using variable "
3239 "length vectors\n");
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE
, vect_location
,
3248 "***** Analysis failed with vector mode %s\n",
3249 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3253 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3256 while (mode_i
< vector_modes
.length ()
3257 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_NOTE
, vect_location
,
3261 "***** The result for vector mode %s would"
3263 GET_MODE_NAME (vector_modes
[mode_i
]));
3269 if (mode_i
< vector_modes
.length ()
3270 && VECTOR_MODE_P (autodetected_vector_mode
)
3271 && (related_vector_mode (vector_modes
[mode_i
],
3272 GET_MODE_INNER (autodetected_vector_mode
))
3273 == autodetected_vector_mode
)
3274 && (related_vector_mode (autodetected_vector_mode
,
3275 GET_MODE_INNER (vector_modes
[mode_i
]))
3276 == vector_modes
[mode_i
]))
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_NOTE
, vect_location
,
3280 "***** Skipping vector mode %s, which would"
3281 " repeat the analysis for %s\n",
3282 GET_MODE_NAME (vector_modes
[mode_i
]),
3283 GET_MODE_NAME (autodetected_vector_mode
));
3288 || mode_i
== vector_modes
.length ()
3289 || autodetected_vector_mode
== VOIDmode
3290 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3291 vector sizes will fail do not bother iterating. */
3295 /* Try the next biggest vector size. */
3296 next_vector_mode
= vector_modes
[mode_i
++];
3297 if (dump_enabled_p ())
3298 dump_printf_loc (MSG_NOTE
, vect_location
,
3299 "***** Re-trying analysis with vector mode %s\n",
3300 GET_MODE_NAME (next_vector_mode
));
3304 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3305 true if anything in the basic-block was vectorized. */
3308 vect_slp_bb (basic_block bb
)
3310 gimple_stmt_iterator gsi
;
3311 bool any_vectorized
= false;
3313 gsi
= gsi_start_bb (bb
);
3314 while (!gsi_end_p (gsi
))
3316 gimple_stmt_iterator region_begin
= gsi
;
3317 vec
<data_reference_p
> datarefs
= vNULL
;
3320 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3322 gimple
*stmt
= gsi_stmt (gsi
);
3323 if (is_gimple_debug (stmt
))
3327 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3328 vect_location
= stmt
;
3330 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3334 /* Skip leading unhandled stmts. */
3335 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3341 gimple_stmt_iterator region_end
= gsi
;
3343 if (insns
> param_slp_max_insns_in_bb
)
3345 if (dump_enabled_p ())
3346 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3347 "not vectorized: too many instructions in "
3350 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3351 any_vectorized
= true;
3353 if (gsi_end_p (region_end
))
3356 /* Skip the unhandled stmt. */
3360 return any_vectorized
;
3364 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3367 vect_mask_constant_operand_p (vec_info
*vinfo
,
3368 stmt_vec_info stmt_vinfo
, unsigned op_num
)
3370 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3372 enum vect_def_type dt
;
3374 /* For comparison and COND_EXPR type is chosen depending
3375 on the non-constant other comparison operand. */
3376 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3378 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3379 op
= gimple_assign_rhs1 (stmt
);
3381 if (!vect_is_simple_use (op
, vinfo
, &dt
, &vectype
))
3384 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3387 if (code
== COND_EXPR
)
3389 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3390 tree cond
= gimple_assign_rhs1 (stmt
);
3392 if (TREE_CODE (cond
) == SSA_NAME
)
3395 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3401 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3402 op
= TREE_OPERAND (cond
, 0);
3405 if (!vect_is_simple_use (op
, vinfo
, &dt
, &vectype
))
3408 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3411 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3414 /* Build a variable-length vector in which the elements in ELTS are repeated
3415 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3416 RESULTS and add any new instructions to SEQ.
3418 The approach we use is:
3420 (1) Find a vector mode VM with integer elements of mode IM.
3422 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3423 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3424 from small vectors to IM.
3426 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3428 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3429 correct byte contents.
3431 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3433 We try to find the largest IM for which this sequence works, in order
3434 to cut down on the number of interleaves. */
3437 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3438 vec
<tree
> elts
, unsigned int nresults
,
3441 unsigned int nelts
= elts
.length ();
3442 tree element_type
= TREE_TYPE (vector_type
);
3444 /* (1) Find a vector mode VM with integer elements of mode IM. */
3445 unsigned int nvectors
= 1;
3446 tree new_vector_type
;
3448 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
3449 &nvectors
, &new_vector_type
,
3453 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3454 unsigned int partial_nelts
= nelts
/ nvectors
;
3455 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3457 tree_vector_builder partial_elts
;
3458 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3459 pieces
.quick_grow (nvectors
* 2);
3460 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3462 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3463 ELTS' has mode IM. */
3464 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3465 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3466 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3467 tree t
= gimple_build_vector (seq
, &partial_elts
);
3468 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3469 TREE_TYPE (new_vector_type
), t
);
3471 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3472 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3475 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3476 correct byte contents.
3478 We need to repeat the following operation log2(nvectors) times:
3480 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3481 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3483 However, if each input repeats every N elements and the VF is
3484 a multiple of N * 2, the HI result is the same as the LO. */
3485 unsigned int in_start
= 0;
3486 unsigned int out_start
= nvectors
;
3487 unsigned int hi_start
= nvectors
/ 2;
3488 /* A bound on the number of outputs needed to produce NRESULTS results
3489 in the final iteration. */
3490 unsigned int noutputs_bound
= nvectors
* nresults
;
3491 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3493 noutputs_bound
/= 2;
3494 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3495 for (unsigned int i
= 0; i
< limit
; ++i
)
3498 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3501 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3505 tree output
= make_ssa_name (new_vector_type
);
3506 tree input1
= pieces
[in_start
+ (i
/ 2)];
3507 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3508 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3511 gimple_seq_add_stmt (seq
, stmt
);
3512 pieces
[out_start
+ i
] = output
;
3514 std::swap (in_start
, out_start
);
3517 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3518 results
.reserve (nresults
);
3519 for (unsigned int i
= 0; i
< nresults
; ++i
)
3521 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3522 pieces
[in_start
+ i
]));
3524 results
.quick_push (results
[i
- nvectors
]);
3528 /* For constant and loop invariant defs of SLP_NODE this function returns
3529 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3530 OP_NODE determines the node for the operand containing the scalar
3534 vect_get_constant_vectors (vec_info
*vinfo
,
3535 slp_tree slp_node
, unsigned op_num
,
3536 vec
<tree
> *vec_oprnds
)
3538 slp_tree op_node
= SLP_TREE_CHILDREN (slp_node
)[op_num
];
3539 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3540 unsigned HOST_WIDE_INT nunits
;
3542 unsigned j
, number_of_places_left_in_vector
;
3545 int group_size
= op_node
->ops
.length ();
3546 unsigned int vec_num
, i
;
3547 unsigned number_of_copies
= 1;
3549 tree neutral_op
= NULL
;
3550 gimple_seq ctor_seq
= NULL
;
3551 auto_vec
<tree
, 16> permute_results
;
3553 /* ??? SLP analysis should compute the vector type for the
3554 constant / invariant and store it in the SLP node. */
3555 tree op
= op_node
->ops
[0];
3556 /* Check if vector type is a boolean vector. */
3557 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3558 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3559 && vect_mask_constant_operand_p (vinfo
, stmt_vinfo
, op_num
))
3560 vector_type
= truth_type_for (stmt_vectype
);
3562 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), op_node
);
3564 /* ??? For lane-reducing ops we should also have the required number
3565 of vector stmts initialized rather than second-guessing here. */
3566 unsigned int number_of_vectors
;
3567 if (is_gimple_assign (stmt_vinfo
->stmt
)
3568 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3569 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3570 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3571 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3574 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3575 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3577 vec_oprnds
->create (number_of_vectors
);
3578 auto_vec
<tree
> voprnds (number_of_vectors
);
3580 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3581 created vectors. It is greater than 1 if unrolling is performed.
3583 For example, we have two scalar operands, s1 and s2 (e.g., group of
3584 strided accesses of size two), while NUNITS is four (i.e., four scalars
3585 of this type can be packed in a vector). The output vector will contain
3586 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3589 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3590 containing the operands.
3592 For example, NUNITS is four as before, and the group size is 8
3593 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3594 {s5, s6, s7, s8}. */
3596 /* When using duplicate_and_interleave, we just need one element for
3597 each scalar statement. */
3598 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3599 nunits
= group_size
;
3601 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3603 number_of_places_left_in_vector
= nunits
;
3605 tree_vector_builder
elts (vector_type
, nunits
, 1);
3606 elts
.quick_grow (nunits
);
3607 bool place_after_defs
= false;
3608 for (j
= 0; j
< number_of_copies
; j
++)
3610 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3612 /* Create 'vect_ = {op0,op1,...,opn}'. */
3613 number_of_places_left_in_vector
--;
3615 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3617 if (CONSTANT_CLASS_P (op
))
3619 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3621 /* Can't use VIEW_CONVERT_EXPR for booleans because
3622 of possibly different sizes of scalar value and
3624 if (integer_zerop (op
))
3625 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3626 else if (integer_onep (op
))
3627 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3632 op
= fold_unary (VIEW_CONVERT_EXPR
,
3633 TREE_TYPE (vector_type
), op
);
3634 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3638 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3640 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3643 = build_all_ones_cst (TREE_TYPE (vector_type
));
3645 = build_zero_cst (TREE_TYPE (vector_type
));
3646 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3647 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3653 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3656 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3659 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3663 elts
[number_of_places_left_in_vector
] = op
;
3664 if (!CONSTANT_CLASS_P (op
))
3666 if (TREE_CODE (orig_op
) == SSA_NAME
3667 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3668 && is_a
<bb_vec_info
> (vinfo
)
3669 && (as_a
<bb_vec_info
> (vinfo
)->bb
3670 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3671 place_after_defs
= true;
3673 if (number_of_places_left_in_vector
== 0)
3676 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3677 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3678 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3681 if (permute_results
.is_empty ())
3682 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3683 elts
, number_of_vectors
,
3685 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3688 gimple_stmt_iterator gsi
;
3689 if (place_after_defs
)
3691 stmt_vec_info last_stmt_info
3692 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3693 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3694 init
= vect_init_vector (vinfo
, stmt_vinfo
, vec_cst
,
3698 init
= vect_init_vector (vinfo
, stmt_vinfo
, vec_cst
,
3700 if (ctor_seq
!= NULL
)
3702 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3703 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3706 voprnds
.quick_push (init
);
3707 place_after_defs
= false;
3708 number_of_places_left_in_vector
= nunits
;
3710 elts
.new_vector (vector_type
, nunits
, 1);
3711 elts
.quick_grow (nunits
);
3716 /* Since the vectors are created in the reverse order, we should invert
3718 vec_num
= voprnds
.length ();
3719 for (j
= vec_num
; j
!= 0; j
--)
3721 vop
= voprnds
[j
- 1];
3722 vec_oprnds
->quick_push (vop
);
3725 /* In case that VF is greater than the unrolling factor needed for the SLP
3726 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3727 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3728 to replicate the vectors. */
3729 while (number_of_vectors
> vec_oprnds
->length ())
3731 tree neutral_vec
= NULL
;
3736 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3738 vec_oprnds
->quick_push (neutral_vec
);
3742 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3743 vec_oprnds
->quick_push (vop
);
3749 /* Get vectorized definitions from SLP_NODE that contains corresponding
3750 vectorized def-stmts. */
3753 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3755 stmt_vec_info vec_def_stmt_info
;
3758 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3760 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3761 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3765 /* Get N vectorized definitions for SLP_NODE.
3766 If the scalar definitions are loop invariants or constants, collect them and
3767 call vect_get_constant_vectors() to create vector stmts.
3768 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3769 must be stored in the corresponding child of SLP_NODE, and we call
3770 vect_get_slp_vect_defs () to retrieve them. */
3773 vect_get_slp_defs (vec_info
*vinfo
,
3774 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3777 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3779 for (unsigned i
= 0; i
< n
; ++i
)
3781 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3783 vec
<tree
> vec_defs
= vNULL
;
3785 /* For each operand we check if it has vectorized definitions in a child
3786 node or we need to create them (for invariants and constants). */
3787 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3789 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3790 vect_get_slp_vect_defs (child
, &vec_defs
);
3793 vect_get_constant_vectors (vinfo
, slp_node
, i
, &vec_defs
);
3795 vec_oprnds
->quick_push (vec_defs
);
3799 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3800 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3801 permute statements for the SLP node NODE. */
3804 vect_transform_slp_perm_load (vec_info
*vinfo
,
3805 slp_tree node
, vec
<tree
> dr_chain
,
3806 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3807 bool analyze_only
, unsigned *n_perms
)
3809 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3811 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3812 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
3813 unsigned int mask_element
;
3816 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3819 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3821 mode
= TYPE_MODE (vectype
);
3822 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3824 /* Initialize the vect stmts of NODE to properly insert the generated
3827 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3828 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3829 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3831 /* Generate permutation masks for every NODE. Number of masks for each NODE
3832 is equal to GROUP_SIZE.
3833 E.g., we have a group of three nodes with three loads from the same
3834 location in each node, and the vector size is 4. I.e., we have a
3835 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3836 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3837 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3840 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3841 The last mask is illegal since we assume two operands for permute
3842 operation, and the mask element values can't be outside that range.
3843 Hence, the last mask must be converted into {2,5,5,5}.
3844 For the first two permutations we need the first and the second input
3845 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3846 we need the second and the third vectors: {b1,c1,a2,b2} and
3849 int vect_stmts_counter
= 0;
3850 unsigned int index
= 0;
3851 int first_vec_index
= -1;
3852 int second_vec_index
= -1;
3856 vec_perm_builder mask
;
3857 unsigned int nelts_to_build
;
3858 unsigned int nvectors_per_build
;
3859 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3860 && multiple_p (nunits
, group_size
));
3863 /* A single vector contains a whole number of copies of the node, so:
3864 (a) all permutes can use the same mask; and
3865 (b) the permutes only need a single vector input. */
3866 mask
.new_vector (nunits
, group_size
, 3);
3867 nelts_to_build
= mask
.encoded_nelts ();
3868 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3872 /* We need to construct a separate mask for each vector statement. */
3873 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3874 if (!nunits
.is_constant (&const_nunits
)
3875 || !vf
.is_constant (&const_vf
))
3877 mask
.new_vector (const_nunits
, const_nunits
, 1);
3878 nelts_to_build
= const_vf
* group_size
;
3879 nvectors_per_build
= 1;
3882 unsigned int count
= mask
.encoded_nelts ();
3883 mask
.quick_grow (count
);
3884 vec_perm_indices indices
;
3886 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3888 unsigned int iter_num
= j
/ group_size
;
3889 unsigned int stmt_num
= j
% group_size
;
3890 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3891 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3894 first_vec_index
= 0;
3899 /* Enforced before the loop when !repeating_p. */
3900 unsigned int const_nunits
= nunits
.to_constant ();
3901 vec_index
= i
/ const_nunits
;
3902 mask_element
= i
% const_nunits
;
3903 if (vec_index
== first_vec_index
3904 || first_vec_index
== -1)
3906 first_vec_index
= vec_index
;
3908 else if (vec_index
== second_vec_index
3909 || second_vec_index
== -1)
3911 second_vec_index
= vec_index
;
3912 mask_element
+= const_nunits
;
3916 if (dump_enabled_p ())
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3918 "permutation requires at "
3919 "least three vectors %G",
3921 gcc_assert (analyze_only
);
3925 gcc_assert (mask_element
< 2 * const_nunits
);
3928 if (mask_element
!= index
)
3930 mask
[index
++] = mask_element
;
3932 if (index
== count
&& !noop_p
)
3934 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3935 if (!can_vec_perm_const_p (mode
, indices
))
3937 if (dump_enabled_p ())
3939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3941 "unsupported vect permute { ");
3942 for (i
= 0; i
< count
; ++i
)
3944 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3945 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3947 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3949 gcc_assert (analyze_only
);
3960 tree mask_vec
= NULL_TREE
;
3963 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3965 if (second_vec_index
== -1)
3966 second_vec_index
= first_vec_index
;
3968 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3970 /* Generate the permute statement if necessary. */
3971 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3972 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3973 stmt_vec_info perm_stmt_info
;
3976 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3978 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3980 perm_dest
= make_ssa_name (perm_dest
);
3982 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3983 first_vec
, second_vec
,
3986 = vect_finish_stmt_generation (vinfo
,
3987 stmt_info
, perm_stmt
,
3991 /* If mask was NULL_TREE generate the requested
3992 identity transform. */
3993 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3995 /* Store the vector statement in NODE. */
3996 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
4002 first_vec_index
= -1;
4003 second_vec_index
= -1;
4011 /* Vectorize SLP instance tree in postorder. */
4014 vect_schedule_slp_instance (vec_info
*vinfo
,
4015 slp_tree node
, slp_instance instance
)
4017 gimple_stmt_iterator si
;
4018 stmt_vec_info stmt_info
;
4019 unsigned int group_size
;
4024 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4027 /* See if we have already vectorized the node in the graph of the
4029 if (SLP_TREE_VEC_STMTS (node
).exists ())
4032 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4033 vect_schedule_slp_instance (vinfo
, child
, instance
);
4035 /* Push SLP node def-type to stmts. */
4036 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4037 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4039 stmt_vec_info child_stmt_info
;
4040 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4041 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4044 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4046 /* VECTYPE is the type of the destination. */
4047 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4048 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4049 group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
4051 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4052 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4054 if (dump_enabled_p ())
4055 dump_printf_loc (MSG_NOTE
, vect_location
,
4056 "------>vectorizing SLP node starting from: %G",
4059 /* Vectorized stmts go before the last scalar stmt which is where
4060 all uses are ready. */
4061 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4062 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4064 /* Handle two-operation SLP nodes by vectorizing the group with
4065 both operations and then performing a merge. */
4066 bool done_p
= false;
4067 if (SLP_TREE_TWO_OPERATORS (node
))
4069 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4070 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4071 enum tree_code ocode
= ERROR_MARK
;
4072 stmt_vec_info ostmt_info
;
4073 vec_perm_builder
mask (group_size
, group_size
, 1);
4074 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4076 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4077 if (gimple_assign_rhs_code (ostmt
) != code0
)
4079 mask
.quick_push (1);
4080 ocode
= gimple_assign_rhs_code (ostmt
);
4083 mask
.quick_push (0);
4085 if (ocode
!= ERROR_MARK
)
4087 vec
<stmt_vec_info
> v0
;
4088 vec
<stmt_vec_info
> v1
;
4090 tree tmask
= NULL_TREE
;
4091 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4092 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4093 SLP_TREE_VEC_STMTS (node
).truncate (0);
4094 gimple_assign_set_rhs_code (stmt
, ocode
);
4095 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4096 gimple_assign_set_rhs_code (stmt
, code0
);
4097 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4098 SLP_TREE_VEC_STMTS (node
).truncate (0);
4099 tree meltype
= build_nonstandard_integer_type
4100 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4101 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4103 for (j
= 0; j
< v0
.length (); ++j
)
4105 /* Enforced by vect_build_slp_tree, which rejects variable-length
4106 vectors for SLP_TREE_TWO_OPERATORS. */
4107 unsigned int const_nunits
= nunits
.to_constant ();
4108 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4109 for (l
= 0; l
< const_nunits
; ++l
)
4111 if (k
>= group_size
)
4113 tree t
= build_int_cst (meltype
,
4114 mask
[k
++] * const_nunits
+ l
);
4115 melts
.quick_push (t
);
4117 tmask
= melts
.build ();
4119 /* ??? Not all targets support a VEC_PERM_EXPR with a
4120 constant mask that would translate to a vec_merge RTX
4121 (with their vec_perm_const_ok). We can either not
4122 vectorize in that case or let veclower do its job.
4123 Unfortunately that isn't too great and at least for
4124 plus/minus we'd eventually like to match targets
4125 vector addsub instructions. */
4127 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4129 gimple_assign_lhs (v0
[j
]->stmt
),
4130 gimple_assign_lhs (v1
[j
]->stmt
),
4132 SLP_TREE_VEC_STMTS (node
).quick_push
4133 (vect_finish_stmt_generation (vinfo
, stmt_info
, vstmt
, &si
));
4141 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
4143 /* Restore stmt def-types. */
4144 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4145 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4147 stmt_vec_info child_stmt_info
;
4148 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4149 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4153 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4154 For loop vectorization this is done in vectorizable_call, but for SLP
4155 it needs to be deferred until end of vect_schedule_slp, because multiple
4156 SLP instances may refer to the same scalar stmt. */
4159 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
4160 slp_tree node
, hash_set
<slp_tree
> &visited
)
4163 gimple_stmt_iterator gsi
;
4167 stmt_vec_info stmt_info
;
4169 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4172 if (visited
.add (node
))
4175 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4176 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
4178 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4180 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4181 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4183 if (is_pattern_stmt_p (stmt_info
)
4184 || !PURE_SLP_STMT (stmt_info
))
4186 lhs
= gimple_call_lhs (stmt
);
4187 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4188 gsi
= gsi_for_stmt (stmt
);
4189 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4190 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4195 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
4197 hash_set
<slp_tree
> visited
;
4198 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
4201 /* Vectorize the instance root. */
4204 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4206 gassign
*rstmt
= NULL
;
4208 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4210 stmt_vec_info child_stmt_info
;
4213 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4215 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4216 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4217 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
4218 TREE_TYPE (vect_lhs
)))
4219 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
4221 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4225 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4227 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4228 stmt_vec_info child_stmt_info
;
4230 vec
<constructor_elt
, va_gc
> *v
;
4231 vec_alloc (v
, nelts
);
4233 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4235 CONSTRUCTOR_APPEND_ELT (v
,
4237 gimple_get_lhs (child_stmt_info
->stmt
));
4239 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4240 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4241 tree r_constructor
= build_constructor (rtype
, v
);
4242 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4247 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4248 gsi_replace (&rgsi
, rstmt
, true);
4251 /* Generate vector code for all SLP instances in the loop/basic block. */
4254 vect_schedule_slp (vec_info
*vinfo
)
4256 vec
<slp_instance
> slp_instances
;
4257 slp_instance instance
;
4260 slp_instances
= vinfo
->slp_instances
;
4261 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4263 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4264 /* Schedule the tree of INSTANCE. */
4265 vect_schedule_slp_instance (vinfo
, node
, instance
);
4267 if (SLP_INSTANCE_ROOT_STMT (instance
))
4268 vectorize_slp_instance_root_stmt (node
, instance
);
4270 if (dump_enabled_p ())
4271 dump_printf_loc (MSG_NOTE
, vect_location
,
4272 "vectorizing stmts using SLP.\n");
4275 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4277 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4278 stmt_vec_info store_info
;
4281 /* Remove scalar call stmts. Do not do this for basic-block
4282 vectorization as not all uses may be vectorized.
4283 ??? Why should this be necessary? DCE should be able to
4284 remove the stmts itself.
4285 ??? For BB vectorization we can as well remove scalar
4286 stmts starting from the SLP tree root if they have no
4288 if (is_a
<loop_vec_info
> (vinfo
))
4289 vect_remove_slp_scalar_calls (vinfo
, root
);
4291 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
4293 if (!STMT_VINFO_DATA_REF (store_info
))
4296 if (SLP_INSTANCE_ROOT_STMT (instance
))
4299 store_info
= vect_orig_stmt (store_info
);
4300 /* Free the attached stmt_vec_info and remove the stmt. */
4301 vinfo
->remove_stmt (store_info
);