1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 if (--node
->refcnt
!= 0)
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
64 vect_free_slp_tree (child
, final_p
);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
72 stmt_vec_info stmt_info
;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
80 SLP_TREE_CHILDREN (node
).release ();
81 SLP_TREE_SCALAR_STMTS (node
).release ();
82 SLP_TREE_SCALAR_OPS (node
).release ();
83 SLP_TREE_VEC_STMTS (node
).release ();
84 SLP_TREE_LOAD_PERMUTATION (node
).release ();
89 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
90 have vectorized the instance or if we have made a final decision not
91 to vectorize the statements in any way. */
94 vect_free_slp_instance (slp_instance instance
, bool final_p
)
96 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
97 SLP_INSTANCE_LOADS (instance
).release ();
102 /* Create an SLP node for SCALAR_STMTS. */
105 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
108 stmt_vec_info stmt_info
= scalar_stmts
[0];
111 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
112 nops
= gimple_call_num_args (stmt
);
113 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
115 nops
= gimple_num_ops (stmt
) - 1;
116 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
119 else if (is_a
<gphi
*> (stmt_info
->stmt
))
124 node
= XNEW (struct _slp_tree
);
125 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
126 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
127 SLP_TREE_VEC_STMTS (node
).create (0);
128 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
129 SLP_TREE_CHILDREN (node
).create (nops
);
130 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
131 SLP_TREE_TWO_OPERATORS (node
) = false;
132 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
134 node
->max_nunits
= 1;
137 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
138 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
143 /* Create an SLP node for OPS. */
146 vect_create_new_slp_node (vec
<tree
> ops
)
150 node
= XNEW (struct _slp_tree
);
151 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
152 SLP_TREE_SCALAR_OPS (node
) = ops
;
153 SLP_TREE_VEC_STMTS (node
).create (0);
154 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
155 SLP_TREE_CHILDREN (node
) = vNULL
;
156 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
157 SLP_TREE_TWO_OPERATORS (node
) = false;
158 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
160 node
->max_nunits
= 1;
166 /* This structure is used in creation of an SLP tree. Each instance
167 corresponds to the same operand in a group of scalar stmts in an SLP
169 typedef struct _slp_oprnd_info
171 /* Def-stmts for the operands. */
172 vec
<stmt_vec_info
> def_stmts
;
175 /* Information about the first statement, its vector def-type, type, the
176 operand itself in case it's constant, and an indication if it's a pattern
179 enum vect_def_type first_dt
;
185 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
187 static vec
<slp_oprnd_info
>
188 vect_create_oprnd_info (int nops
, int group_size
)
191 slp_oprnd_info oprnd_info
;
192 vec
<slp_oprnd_info
> oprnds_info
;
194 oprnds_info
.create (nops
);
195 for (i
= 0; i
< nops
; i
++)
197 oprnd_info
= XNEW (struct _slp_oprnd_info
);
198 oprnd_info
->def_stmts
.create (group_size
);
199 oprnd_info
->ops
.create (group_size
);
200 oprnd_info
->first_dt
= vect_uninitialized_def
;
201 oprnd_info
->first_op_type
= NULL_TREE
;
202 oprnd_info
->first_pattern
= false;
203 oprnd_info
->second_pattern
= false;
204 oprnds_info
.quick_push (oprnd_info
);
211 /* Free operands info. */
214 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
217 slp_oprnd_info oprnd_info
;
219 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
221 oprnd_info
->def_stmts
.release ();
222 oprnd_info
->ops
.release ();
223 XDELETE (oprnd_info
);
226 oprnds_info
.release ();
230 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
231 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
235 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
236 stmt_vec_info first_stmt_info
)
238 stmt_vec_info next_stmt_info
= first_stmt_info
;
241 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
246 if (next_stmt_info
== stmt_info
)
248 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
250 result
+= DR_GROUP_GAP (next_stmt_info
);
252 while (next_stmt_info
);
257 /* Check whether it is possible to load COUNT elements of type ELT_MODE
258 using the method implemented by duplicate_and_interleave. Return true
259 if so, returning the number of intermediate vectors in *NVECTORS_OUT
260 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
264 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
265 machine_mode elt_mode
,
266 unsigned int *nvectors_out
,
267 tree
*vector_type_out
,
270 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
272 unsigned int nvectors
= 1;
275 scalar_int_mode int_mode
;
276 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
277 if (multiple_p (vinfo
->vector_size
, elt_bytes
, &nelts
)
278 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
280 tree int_type
= build_nonstandard_integer_type
281 (GET_MODE_BITSIZE (int_mode
), 1);
282 tree vector_type
= build_vector_type (int_type
, nelts
);
283 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
285 vec_perm_builder
sel1 (nelts
, 2, 3);
286 vec_perm_builder
sel2 (nelts
, 2, 3);
287 poly_int64 half_nelts
= exact_div (nelts
, 2);
288 for (unsigned int i
= 0; i
< 3; ++i
)
291 sel1
.quick_push (i
+ nelts
);
292 sel2
.quick_push (half_nelts
+ i
);
293 sel2
.quick_push (half_nelts
+ i
+ nelts
);
295 vec_perm_indices
indices1 (sel1
, 2, nelts
);
296 vec_perm_indices
indices2 (sel2
, 2, nelts
);
297 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
298 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
301 *nvectors_out
= nvectors
;
303 *vector_type_out
= vector_type
;
306 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
308 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
315 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
321 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
322 they are of a valid type and that they match the defs of the first stmt of
323 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
324 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
325 indicates swap is required for cond_expr stmts. Specifically, *SWAP
326 is 1 if STMT is cond and operands of comparison need to be swapped;
327 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
328 If there is any operand swap in this function, *SWAP is set to non-zero
330 If there was a fatal error return -1; if the error could be corrected by
331 swapping operands of father node of this one, return 1; if everything is
334 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
335 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
336 vec
<slp_oprnd_info
> *oprnds_info
)
338 stmt_vec_info stmt_info
= stmts
[stmt_num
];
340 unsigned int i
, number_of_oprnds
;
341 enum vect_def_type dt
= vect_uninitialized_def
;
342 bool pattern
= false;
343 slp_oprnd_info oprnd_info
;
344 int first_op_idx
= 1;
345 unsigned int commutative_op
= -1U;
346 bool first_op_cond
= false;
347 bool first
= stmt_num
== 0;
348 bool second
= stmt_num
== 1;
350 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
352 number_of_oprnds
= gimple_call_num_args (stmt
);
354 if (gimple_call_internal_p (stmt
))
356 internal_fn ifn
= gimple_call_internal_fn (stmt
);
357 commutative_op
= first_commutative_argument (ifn
);
359 /* Masked load, only look at mask. */
360 if (ifn
== IFN_MASK_LOAD
)
362 number_of_oprnds
= 1;
363 /* Mask operand index. */
368 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
370 enum tree_code code
= gimple_assign_rhs_code (stmt
);
371 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
372 /* Swap can only be done for cond_expr if asked to, otherwise we
373 could result in different comparison code to the first stmt. */
374 if (code
== COND_EXPR
375 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
377 first_op_cond
= true;
381 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
386 bool swapped
= (*swap
!= 0);
387 gcc_assert (!swapped
|| first_op_cond
);
388 for (i
= 0; i
< number_of_oprnds
; i
++)
393 /* Map indicating how operands of cond_expr should be swapped. */
394 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
395 int *map
= maps
[*swap
];
398 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
399 first_op_idx
), map
[i
]);
401 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
404 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
405 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
406 oprnd
= TREE_OPERAND (oprnd
, 0);
408 oprnd_info
= (*oprnds_info
)[i
];
410 stmt_vec_info def_stmt_info
;
411 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
413 if (dump_enabled_p ())
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
415 "Build SLP failed: can't analyze def for %T\n",
422 oprnd_info
->second_pattern
= pattern
;
426 oprnd_info
->first_dt
= dt
;
427 oprnd_info
->first_pattern
= pattern
;
428 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
432 /* Not first stmt of the group, check that the def-stmt/s match
433 the def-stmt/s of the first stmt. Allow different definition
434 types for reduction chains: the first stmt must be a
435 vect_reduction_def (a phi node), and the rest
436 vect_internal_def. */
437 tree type
= TREE_TYPE (oprnd
);
438 if ((oprnd_info
->first_dt
!= dt
439 && !(oprnd_info
->first_dt
== vect_reduction_def
440 && dt
== vect_internal_def
)
441 && !((oprnd_info
->first_dt
== vect_external_def
442 || oprnd_info
->first_dt
== vect_constant_def
)
443 && (dt
== vect_external_def
444 || dt
== vect_constant_def
)))
445 || !types_compatible_p (oprnd_info
->first_op_type
, type
))
447 /* Try swapping operands if we got a mismatch. */
448 if (i
== commutative_op
&& !swapped
)
454 if (dump_enabled_p ())
455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
456 "Build SLP failed: different types\n");
460 if ((dt
== vect_constant_def
461 || dt
== vect_external_def
)
462 && !vinfo
->vector_size
.is_constant ()
463 && (TREE_CODE (type
) == BOOLEAN_TYPE
464 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
467 if (dump_enabled_p ())
468 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
469 "Build SLP failed: invalid type of def "
470 "for variable-length SLP %T\n", oprnd
);
475 /* Check the types of the definitions. */
478 case vect_constant_def
:
479 case vect_external_def
:
480 oprnd_info
->def_stmts
.quick_push (NULL
);
481 oprnd_info
->ops
.quick_push (oprnd
);
484 case vect_reduction_def
:
485 case vect_induction_def
:
486 case vect_internal_def
:
487 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
488 oprnd_info
->ops
.quick_push (oprnd
);
492 /* FORNOW: Not supported. */
493 if (dump_enabled_p ())
494 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
495 "Build SLP failed: illegal type of def %T\n",
505 /* If there are already uses of this stmt in a SLP instance then
506 we've committed to the operand order and can't swap it. */
507 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
509 if (dump_enabled_p ())
510 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
511 "Build SLP failed: cannot swap operands of "
512 "shared stmt %G", stmt_info
->stmt
);
518 /* To get rid of this swapping we have to move the stmt code
519 to the SLP tree as well (and gather it here per stmt). */
520 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
521 tree cond
= gimple_assign_rhs1 (stmt
);
522 enum tree_code code
= TREE_CODE (cond
);
527 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
528 &TREE_OPERAND (cond
, 1));
529 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
534 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
535 gimple_assign_rhs3_ptr (stmt
));
536 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
537 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
538 gcc_assert (code
!= ERROR_MARK
);
539 TREE_SET_CODE (cond
, code
);
544 /* Commutative ops need not reflect swapping, ops are in
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_NOTE
, vect_location
,
549 "swapped operands to match def types in %G",
557 /* Return true if call statements CALL1 and CALL2 are similar enough
558 to be combined into the same SLP group. */
561 compatible_calls_p (gcall
*call1
, gcall
*call2
)
563 unsigned int nargs
= gimple_call_num_args (call1
);
564 if (nargs
!= gimple_call_num_args (call2
))
567 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
570 if (gimple_call_internal_p (call1
))
572 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
573 TREE_TYPE (gimple_call_lhs (call2
))))
575 for (unsigned int i
= 0; i
< nargs
; ++i
)
576 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
577 TREE_TYPE (gimple_call_arg (call2
, i
))))
582 if (!operand_equal_p (gimple_call_fn (call1
),
583 gimple_call_fn (call2
), 0))
586 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
592 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
593 caller's attempt to find the vector type in STMT_INFO with the narrowest
594 element type. Return true if VECTYPE is nonnull and if it is valid
595 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
596 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
597 vect_build_slp_tree. */
600 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
601 tree vectype
, poly_uint64
*max_nunits
)
605 if (dump_enabled_p ())
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
607 "Build SLP failed: unsupported data-type in %G\n",
609 /* Fatal mismatch. */
613 /* If populating the vector type requires unrolling then fail
614 before adjusting *max_nunits for basic-block vectorization. */
615 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
616 unsigned HOST_WIDE_INT const_nunits
;
617 if (STMT_VINFO_BB_VINFO (stmt_info
)
618 && (!nunits
.is_constant (&const_nunits
)
619 || const_nunits
> group_size
))
621 if (dump_enabled_p ())
622 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
623 "Build SLP failed: unrolling required "
624 "in basic block SLP\n");
625 /* Fatal mismatch. */
629 /* In case of multiple types we need to detect the smallest type. */
630 vect_update_max_nunits (max_nunits
, vectype
);
634 /* STMTS is a group of GROUP_SIZE SLP statements in which some
635 statements do the same operation as the first statement and in which
636 the others do ALT_STMT_CODE. Return true if we can take one vector
637 of the first operation and one vector of the second and permute them
638 to get the required result. VECTYPE is the type of the vector that
639 would be permuted. */
642 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
643 unsigned int group_size
, tree vectype
,
644 tree_code alt_stmt_code
)
646 unsigned HOST_WIDE_INT count
;
647 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
650 vec_perm_builder
sel (count
, count
, 1);
651 for (unsigned int i
= 0; i
< count
; ++i
)
653 unsigned int elt
= i
;
654 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
655 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
657 sel
.quick_push (elt
);
659 vec_perm_indices
indices (sel
, 2, count
);
660 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
663 /* Verify if the scalar stmts STMTS are isomorphic, require data
664 permutation or are of unsupported types of operation. Return
665 true if they are, otherwise return false and indicate in *MATCHES
666 which stmts are not isomorphic to the first one. If MATCHES[0]
667 is false then this indicates the comparison could not be
668 carried out or the stmts will never be vectorized by SLP.
670 Note COND_EXPR is possibly isomorphic to another one after swapping its
671 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
672 the first stmt by swapping the two operands of comparison; set SWAP[i]
673 to 2 if stmt I is isormorphic to the first stmt by inverting the code
674 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
675 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
678 vect_build_slp_tree_1 (unsigned char *swap
,
679 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
680 poly_uint64
*max_nunits
, bool *matches
,
684 stmt_vec_info first_stmt_info
= stmts
[0];
685 enum tree_code first_stmt_code
= ERROR_MARK
;
686 enum tree_code alt_stmt_code
= ERROR_MARK
;
687 enum tree_code rhs_code
= ERROR_MARK
;
688 enum tree_code first_cond_code
= ERROR_MARK
;
690 bool need_same_oprnds
= false;
691 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
694 machine_mode optab_op2_mode
;
695 machine_mode vec_mode
;
696 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
699 /* For every stmt in NODE find its def stmt/s. */
700 stmt_vec_info stmt_info
;
701 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
703 gimple
*stmt
= stmt_info
->stmt
;
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
710 /* Fail to vectorize statements marked as unvectorizable. */
711 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
713 if (dump_enabled_p ())
714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
715 "Build SLP failed: unvectorizable statement %G",
717 /* Fatal mismatch. */
722 lhs
= gimple_get_lhs (stmt
);
723 if (lhs
== NULL_TREE
)
725 if (dump_enabled_p ())
726 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
727 "Build SLP failed: not GIMPLE_ASSIGN nor "
728 "GIMPLE_CALL %G", stmt
);
729 /* Fatal mismatch. */
735 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
738 && !vect_record_max_nunits (stmt_info
, group_size
,
739 nunits_vectype
, max_nunits
)))
741 /* Fatal mismatch. */
746 gcc_assert (vectype
);
748 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
750 rhs_code
= CALL_EXPR
;
752 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
754 else if ((gimple_call_internal_p (call_stmt
)
755 && (!vectorizable_internal_fn_p
756 (gimple_call_internal_fn (call_stmt
))))
757 || gimple_call_tail_p (call_stmt
)
758 || gimple_call_noreturn_p (call_stmt
)
759 || !gimple_call_nothrow_p (call_stmt
)
760 || gimple_call_chain (call_stmt
))
762 if (dump_enabled_p ())
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
764 "Build SLP failed: unsupported call type %G",
766 /* Fatal mismatch. */
773 rhs_code
= gimple_assign_rhs_code (stmt
);
774 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
777 /* Check the operation. */
780 first_stmt_code
= rhs_code
;
782 /* Shift arguments should be equal in all the packed stmts for a
783 vector shift with scalar shift operand. */
784 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
785 || rhs_code
== LROTATE_EXPR
786 || rhs_code
== RROTATE_EXPR
)
788 if (vectype
== boolean_type_node
)
790 if (dump_enabled_p ())
791 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
792 "Build SLP failed: shift of a"
794 /* Fatal mismatch. */
799 vec_mode
= TYPE_MODE (vectype
);
801 /* First see if we have a vector/vector shift. */
802 optab
= optab_for_tree_code (rhs_code
, vectype
,
806 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
808 /* No vector/vector shift, try for a vector/scalar shift. */
809 optab
= optab_for_tree_code (rhs_code
, vectype
,
814 if (dump_enabled_p ())
815 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
816 "Build SLP failed: no optab.\n");
817 /* Fatal mismatch. */
821 icode
= (int) optab_handler (optab
, vec_mode
);
822 if (icode
== CODE_FOR_nothing
)
824 if (dump_enabled_p ())
825 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
827 "op not supported by target.\n");
828 /* Fatal mismatch. */
832 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
833 if (!VECTOR_MODE_P (optab_op2_mode
))
835 need_same_oprnds
= true;
836 first_op1
= gimple_assign_rhs2 (stmt
);
840 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
842 need_same_oprnds
= true;
843 first_op1
= gimple_assign_rhs2 (stmt
);
848 if (first_stmt_code
!= rhs_code
849 && alt_stmt_code
== ERROR_MARK
)
850 alt_stmt_code
= rhs_code
;
851 if (first_stmt_code
!= rhs_code
852 && (first_stmt_code
!= IMAGPART_EXPR
853 || rhs_code
!= REALPART_EXPR
)
854 && (first_stmt_code
!= REALPART_EXPR
855 || rhs_code
!= IMAGPART_EXPR
)
856 /* Handle mismatches in plus/minus by computing both
857 and merging the results. */
858 && !((first_stmt_code
== PLUS_EXPR
859 || first_stmt_code
== MINUS_EXPR
)
860 && (alt_stmt_code
== PLUS_EXPR
861 || alt_stmt_code
== MINUS_EXPR
)
862 && rhs_code
== alt_stmt_code
)
863 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
864 && (first_stmt_code
== ARRAY_REF
865 || first_stmt_code
== BIT_FIELD_REF
866 || first_stmt_code
== INDIRECT_REF
867 || first_stmt_code
== COMPONENT_REF
868 || first_stmt_code
== MEM_REF
)))
870 if (dump_enabled_p ())
872 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
873 "Build SLP failed: different operation "
875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
876 "original stmt %G", first_stmt_info
->stmt
);
883 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
885 if (dump_enabled_p ())
886 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
887 "Build SLP failed: different shift "
888 "arguments in %G", stmt
);
893 if (!load_p
&& rhs_code
== CALL_EXPR
)
895 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
896 as_a
<gcall
*> (stmt
)))
898 if (dump_enabled_p ())
899 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
900 "Build SLP failed: different calls in %G",
908 /* Grouped store or load. */
909 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
911 if (REFERENCE_CLASS_P (lhs
))
919 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
922 /* Check that there are no loads from different interleaving
923 chains in the same node. */
924 if (prev_first_load
!= first_load
)
926 if (dump_enabled_p ())
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
929 "Build SLP failed: different "
930 "interleaving chains in one node %G",
937 prev_first_load
= first_load
;
939 } /* Grouped access. */
944 /* Not grouped load. */
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
947 "Build SLP failed: not grouped load %G", stmt
);
949 /* FORNOW: Not grouped loads are not supported. */
950 /* Fatal mismatch. */
955 /* Not memory operation. */
956 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
957 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
958 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
959 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
960 && rhs_code
!= VIEW_CONVERT_EXPR
961 && rhs_code
!= CALL_EXPR
)
963 if (dump_enabled_p ())
964 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
965 "Build SLP failed: operation unsupported %G",
967 /* Fatal mismatch. */
972 if (rhs_code
== COND_EXPR
)
974 tree cond_expr
= gimple_assign_rhs1 (stmt
);
975 enum tree_code cond_code
= TREE_CODE (cond_expr
);
976 enum tree_code swap_code
= ERROR_MARK
;
977 enum tree_code invert_code
= ERROR_MARK
;
980 first_cond_code
= TREE_CODE (cond_expr
);
981 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
983 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
984 swap_code
= swap_tree_comparison (cond_code
);
985 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
988 if (first_cond_code
== cond_code
)
990 /* Isomorphic can be achieved by swapping. */
991 else if (first_cond_code
== swap_code
)
993 /* Isomorphic can be achieved by inverting. */
994 else if (first_cond_code
== invert_code
)
998 if (dump_enabled_p ())
999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1000 "Build SLP failed: different"
1001 " operation %G", stmt
);
1011 for (i
= 0; i
< group_size
; ++i
)
1015 /* If we allowed a two-operation SLP node verify the target can cope
1016 with the permute we are going to use. */
1017 if (alt_stmt_code
!= ERROR_MARK
1018 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1020 if (vectype
== boolean_type_node
1021 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1022 vectype
, alt_stmt_code
))
1024 for (i
= 0; i
< group_size
; ++i
)
1025 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1028 if (dump_enabled_p ())
1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1031 "Build SLP failed: different operation "
1032 "in stmt %G", stmts
[i
]->stmt
);
1033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1034 "original stmt %G", first_stmt_info
->stmt
);
1039 *two_operators
= true;
1045 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1046 Note we never remove apart from at destruction time so we do not
1047 need a special value for deleted that differs from empty. */
1050 typedef vec
<stmt_vec_info
> value_type
;
1051 typedef vec
<stmt_vec_info
> compare_type
;
1052 static inline hashval_t
hash (value_type
);
1053 static inline bool equal (value_type existing
, value_type candidate
);
1054 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1055 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1056 static inline void mark_empty (value_type
&x
) { x
.release (); }
1057 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1058 static inline void remove (value_type
&x
) { x
.release (); }
1061 bst_traits::hash (value_type x
)
1064 for (unsigned i
= 0; i
< x
.length (); ++i
)
1065 h
.add_int (gimple_uid (x
[i
]->stmt
));
1069 bst_traits::equal (value_type existing
, value_type candidate
)
1071 if (existing
.length () != candidate
.length ())
1073 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1074 if (existing
[i
] != candidate
[i
])
1079 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1080 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1081 scalar_stmts_to_slp_tree_map_t
;
1084 vect_build_slp_tree_2 (vec_info
*vinfo
,
1085 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1086 poly_uint64
*max_nunits
,
1087 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1088 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1091 vect_build_slp_tree (vec_info
*vinfo
,
1092 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1093 poly_uint64
*max_nunits
,
1094 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1095 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1097 if (slp_tree
*leader
= bst_map
->get (stmts
))
1099 if (dump_enabled_p ())
1100 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1101 *leader
? "" : "failed ", *leader
);
1104 (*leader
)->refcnt
++;
1105 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1109 poly_uint64 this_max_nunits
= 1;
1110 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1111 matches
, npermutes
, tree_size
, bst_map
);
1114 res
->max_nunits
= this_max_nunits
;
1115 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1116 /* Keep a reference for the bst_map use. */
1119 bst_map
->put (stmts
.copy (), res
);
1123 /* Recursively build an SLP tree starting from NODE.
1124 Fail (and return a value not equal to zero) if def-stmts are not
1125 isomorphic, require data permutation or are of unsupported types of
1126 operation. Otherwise, return 0.
1127 The value returned is the depth in the SLP tree where a mismatch
1131 vect_build_slp_tree_2 (vec_info
*vinfo
,
1132 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1133 poly_uint64
*max_nunits
,
1134 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1135 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1137 unsigned nops
, i
, this_tree_size
= 0;
1138 poly_uint64 this_max_nunits
= *max_nunits
;
1143 stmt_vec_info stmt_info
= stmts
[0];
1144 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1145 nops
= gimple_call_num_args (stmt
);
1146 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1148 nops
= gimple_num_ops (stmt
) - 1;
1149 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1152 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1157 /* If the SLP node is a PHI (induction or reduction), terminate
1159 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1161 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1162 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1163 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1166 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1167 /* Induction from different IVs is not supported. */
1168 if (def_type
== vect_induction_def
)
1170 stmt_vec_info other_info
;
1171 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1172 if (stmt_info
!= other_info
)
1175 else if (def_type
== vect_reduction_def
1176 || def_type
== vect_double_reduction_def
1177 || def_type
== vect_nested_cycle
)
1179 /* Else def types have to match. */
1180 stmt_vec_info other_info
;
1181 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1183 /* But for reduction chains only check on the first stmt. */
1184 if (!STMT_VINFO_DATA_REF (other_info
)
1185 && REDUC_GROUP_FIRST_ELEMENT (other_info
)
1186 && REDUC_GROUP_FIRST_ELEMENT (other_info
) != stmt_info
)
1188 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1195 node
= vect_create_new_slp_node (stmts
);
1200 bool two_operators
= false;
1201 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1202 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1203 &this_max_nunits
, matches
, &two_operators
))
1206 /* If the SLP node is a load, terminate the recursion unless masked. */
1207 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1208 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1210 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1213 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1218 *max_nunits
= this_max_nunits
;
1220 node
= vect_create_new_slp_node (stmts
);
1225 /* Get at the operands, verifying they are compatible. */
1226 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1227 slp_oprnd_info oprnd_info
;
1228 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1230 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1231 stmts
, i
, &oprnds_info
);
1233 matches
[(res
== -1) ? 0 : i
] = false;
1237 for (i
= 0; i
< group_size
; ++i
)
1240 vect_free_oprnd_info (oprnds_info
);
1244 auto_vec
<slp_tree
, 4> children
;
1246 stmt_info
= stmts
[0];
1248 /* Create SLP_TREE nodes for the definition node/s. */
1249 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1252 unsigned old_tree_size
= this_tree_size
;
1255 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1257 /* COND_EXPR have one too many eventually if the condition
1259 gcc_assert (i
== 3 && nops
== 4);
1263 if (oprnd_info
->first_dt
!= vect_internal_def
1264 && oprnd_info
->first_dt
!= vect_reduction_def
1265 && oprnd_info
->first_dt
!= vect_induction_def
)
1267 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1268 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1269 oprnd_info
->ops
= vNULL
;
1270 children
.safe_push (invnode
);
1274 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1275 group_size
, &this_max_nunits
,
1277 &this_tree_size
, bst_map
)) != NULL
)
1279 /* If we have all children of child built up from scalars then just
1280 throw that away and build it up this node from scalars. */
1281 if (is_a
<bb_vec_info
> (vinfo
)
1282 && !SLP_TREE_CHILDREN (child
).is_empty ()
1283 /* ??? Rejecting patterns this way doesn't work. We'd have to
1284 do extra work to cancel the pattern so the uses see the
1286 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1288 slp_tree grandchild
;
1290 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1291 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1296 this_tree_size
= old_tree_size
;
1297 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1298 vect_free_slp_tree (grandchild
, false);
1299 SLP_TREE_CHILDREN (child
).truncate (0);
1301 if (dump_enabled_p ())
1302 dump_printf_loc (MSG_NOTE
, vect_location
,
1303 "Building parent vector operands from "
1304 "scalars instead\n");
1305 oprnd_info
->def_stmts
= vNULL
;
1306 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1307 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1308 oprnd_info
->ops
= vNULL
;
1310 children
.safe_push (child
);
1315 oprnd_info
->def_stmts
= vNULL
;
1316 children
.safe_push (child
);
1320 /* If the SLP build failed fatally and we analyze a basic-block
1321 simply treat nodes we fail to build as externally defined
1322 (and thus build vectors from the scalar defs).
1323 The cost model will reject outright expensive cases.
1324 ??? This doesn't treat cases where permutation ultimatively
1325 fails (or we don't try permutation below). Ideally we'd
1326 even compute a permutation that will end up with the maximum
1328 if (is_a
<bb_vec_info
> (vinfo
)
1330 /* ??? Rejecting patterns this way doesn't work. We'd have to
1331 do extra work to cancel the pattern so the uses see the
1333 && !is_pattern_stmt_p (stmt_info
))
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "Building vector operands from scalars\n");
1339 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1340 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1341 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1342 children
.safe_push (child
);
1343 oprnd_info
->ops
= vNULL
;
1344 oprnd_info
->def_stmts
= vNULL
;
1348 /* If the SLP build for operand zero failed and operand zero
1349 and one can be commutated try that for the scalar stmts
1350 that failed the match. */
1352 /* A first scalar stmt mismatch signals a fatal mismatch. */
1354 /* ??? For COND_EXPRs we can swap the comparison operands
1355 as well as the arms under some constraints. */
1357 && oprnds_info
[1]->first_dt
== vect_internal_def
1358 && is_gimple_assign (stmt_info
->stmt
)
1359 /* Swapping operands for reductions breaks assumptions later on. */
1360 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1361 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1362 /* Do so only if the number of not successful permutes was nor more
1363 than a cut-ff as re-trying the recursive match on
1364 possibly each level of the tree would expose exponential
1368 /* See whether we can swap the matching or the non-matching
1370 bool swap_not_matching
= true;
1373 for (j
= 0; j
< group_size
; ++j
)
1375 if (matches
[j
] != !swap_not_matching
)
1377 stmt_vec_info stmt_info
= stmts
[j
];
1378 /* Verify if we can swap operands of this stmt. */
1379 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1381 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1383 if (!swap_not_matching
)
1385 swap_not_matching
= false;
1388 /* Verify if we can safely swap or if we committed to a
1389 specific operand order already.
1390 ??? Instead of modifying GIMPLE stmts here we could
1391 record whether we want to swap operands in the SLP
1392 node and temporarily do that when processing it
1393 (or wrap operand accessors in a helper). */
1394 else if (swap
[j
] != 0
1395 || STMT_VINFO_NUM_SLP_USES (stmt_info
))
1397 if (!swap_not_matching
)
1399 if (dump_enabled_p ())
1400 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1402 "Build SLP failed: cannot swap "
1403 "operands of shared stmt %G",
1407 swap_not_matching
= false;
1412 while (j
!= group_size
);
1414 /* Swap mismatched definition stmts. */
1415 if (dump_enabled_p ())
1416 dump_printf_loc (MSG_NOTE
, vect_location
,
1417 "Re-trying with swapped operands of stmts ");
1418 for (j
= 0; j
< group_size
; ++j
)
1419 if (matches
[j
] == !swap_not_matching
)
1421 std::swap (oprnds_info
[0]->def_stmts
[j
],
1422 oprnds_info
[1]->def_stmts
[j
]);
1423 std::swap (oprnds_info
[0]->ops
[j
],
1424 oprnds_info
[1]->ops
[j
]);
1425 if (dump_enabled_p ())
1426 dump_printf (MSG_NOTE
, "%d ", j
);
1428 if (dump_enabled_p ())
1429 dump_printf (MSG_NOTE
, "\n");
1430 /* And try again with scratch 'matches' ... */
1431 bool *tem
= XALLOCAVEC (bool, group_size
);
1432 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1433 group_size
, &this_max_nunits
,
1435 &this_tree_size
, bst_map
)) != NULL
)
1437 /* If we have all children of child built up from scalars then
1438 just throw that away and build it up this node from scalars. */
1439 if (is_a
<bb_vec_info
> (vinfo
)
1440 && !SLP_TREE_CHILDREN (child
).is_empty ()
1441 /* ??? Rejecting patterns this way doesn't work. We'd have
1442 to do extra work to cancel the pattern so the uses see the
1444 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child
)[0]))
1447 slp_tree grandchild
;
1449 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1450 if (SLP_TREE_DEF_TYPE (grandchild
) == vect_internal_def
)
1455 this_tree_size
= old_tree_size
;
1456 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1457 vect_free_slp_tree (grandchild
, false);
1458 SLP_TREE_CHILDREN (child
).truncate (0);
1460 if (dump_enabled_p ())
1461 dump_printf_loc (MSG_NOTE
, vect_location
,
1462 "Building parent vector operands from "
1463 "scalars instead\n");
1464 oprnd_info
->def_stmts
= vNULL
;
1465 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1466 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1467 oprnd_info
->ops
= vNULL
;
1469 children
.safe_push (child
);
1474 oprnd_info
->def_stmts
= vNULL
;
1475 children
.safe_push (child
);
1483 gcc_assert (child
== NULL
);
1484 FOR_EACH_VEC_ELT (children
, j
, child
)
1485 vect_free_slp_tree (child
, false);
1486 vect_free_oprnd_info (oprnds_info
);
1490 vect_free_oprnd_info (oprnds_info
);
1492 *tree_size
+= this_tree_size
+ 1;
1493 *max_nunits
= this_max_nunits
;
1495 node
= vect_create_new_slp_node (stmts
);
1496 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1497 SLP_TREE_CHILDREN (node
).splice (children
);
1501 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1504 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1505 slp_tree node
, hash_set
<slp_tree
> &visited
)
1508 stmt_vec_info stmt_info
;
1511 if (visited
.add (node
))
1514 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1515 dump_user_location_t user_loc
= loc
.get_user_location ();
1516 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1517 SLP_TREE_DEF_TYPE (node
) != vect_internal_def
1518 ? " (external)" : "", node
,
1519 estimated_poly_value (node
->max_nunits
));
1520 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1521 dump_printf_loc (metadata
, user_loc
, "\tstmt %d %G", i
, stmt_info
->stmt
);
1522 if (SLP_TREE_CHILDREN (node
).is_empty ())
1524 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1525 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1526 dump_printf (dump_kind
, " %p", (void *)child
);
1527 dump_printf (dump_kind
, "\n");
1528 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1529 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1533 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1536 hash_set
<slp_tree
> visited
;
1537 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1540 /* Mark the tree rooted at NODE with PURE_SLP. */
1543 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1546 stmt_vec_info stmt_info
;
1549 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1552 if (visited
.add (node
))
1555 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1556 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1558 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1559 vect_mark_slp_stmts (child
, visited
);
1563 vect_mark_slp_stmts (slp_tree node
)
1565 hash_set
<slp_tree
> visited
;
1566 vect_mark_slp_stmts (node
, visited
);
1569 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1572 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1575 stmt_vec_info stmt_info
;
1578 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1581 if (visited
.add (node
))
1584 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1586 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1587 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1588 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1591 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1592 vect_mark_slp_stmts_relevant (child
, visited
);
1596 vect_mark_slp_stmts_relevant (slp_tree node
)
1598 hash_set
<slp_tree
> visited
;
1599 vect_mark_slp_stmts_relevant (node
, visited
);
1603 /* Rearrange the statements of NODE according to PERMUTATION. */
1606 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1607 vec
<unsigned> permutation
,
1608 hash_set
<slp_tree
> &visited
)
1613 if (visited
.add (node
))
1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1617 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1619 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1621 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1622 vec
<stmt_vec_info
> tmp_stmts
;
1623 tmp_stmts
.create (group_size
);
1624 tmp_stmts
.quick_grow (group_size
);
1625 stmt_vec_info stmt_info
;
1626 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1627 tmp_stmts
[permutation
[i
]] = stmt_info
;
1628 SLP_TREE_SCALAR_STMTS (node
).release ();
1629 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1631 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1633 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1635 tmp_ops
.create (group_size
);
1636 tmp_ops
.quick_grow (group_size
);
1638 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1639 tmp_ops
[permutation
[i
]] = op
;
1640 SLP_TREE_SCALAR_OPS (node
).release ();
1641 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1646 /* Attempt to reorder stmts in a reduction chain so that we don't
1647 require any load permutation. Return true if that was possible,
1648 otherwise return false. */
1651 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1653 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1656 slp_tree node
, load
;
1658 /* Compare all the permutation sequences to the first one. We know
1659 that at least one load is permuted. */
1660 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1661 if (!node
->load_permutation
.exists ())
1663 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1665 if (!load
->load_permutation
.exists ())
1667 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1668 if (lidx
!= node
->load_permutation
[j
])
1672 /* Check that the loads in the first sequence are different and there
1673 are no gaps between them. */
1674 auto_sbitmap
load_index (group_size
);
1675 bitmap_clear (load_index
);
1676 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1678 if (lidx
>= group_size
)
1680 if (bitmap_bit_p (load_index
, lidx
))
1683 bitmap_set_bit (load_index
, lidx
);
1685 for (i
= 0; i
< group_size
; i
++)
1686 if (!bitmap_bit_p (load_index
, i
))
1689 /* This permutation is valid for reduction. Since the order of the
1690 statements in the nodes is not important unless they are memory
1691 accesses, we can rearrange the statements in all the nodes
1692 according to the order of the loads. */
1693 hash_set
<slp_tree
> visited
;
1694 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1695 node
->load_permutation
, visited
);
1697 /* We are done, no actual permutations need to be generated. */
1698 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1699 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1701 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1702 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1703 /* But we have to keep those permutations that are required because
1704 of handling of gaps. */
1705 if (known_eq (unrolling_factor
, 1U)
1706 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1707 && DR_GROUP_GAP (first_stmt_info
) == 0))
1708 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1710 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1711 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1717 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1720 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1721 hash_set
<slp_tree
> &visited
)
1723 if (visited
.add (node
))
1726 if (SLP_TREE_CHILDREN (node
).length () == 0)
1728 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1730 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1731 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1732 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1733 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1739 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1740 vect_gather_slp_loads (inst
, child
, visited
);
1745 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1747 hash_set
<slp_tree
> visited
;
1748 vect_gather_slp_loads (inst
, node
, visited
);
1751 /* Check if the required load permutations in the SLP instance
1752 SLP_INSTN are supported. */
1755 vect_supported_load_permutation_p (slp_instance slp_instn
)
1757 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1758 unsigned int i
, j
, k
, next
;
1761 if (dump_enabled_p ())
1763 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1764 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1765 if (node
->load_permutation
.exists ())
1766 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1767 dump_printf (MSG_NOTE
, "%d ", next
);
1769 for (k
= 0; k
< group_size
; ++k
)
1770 dump_printf (MSG_NOTE
, "%d ", k
);
1771 dump_printf (MSG_NOTE
, "\n");
1774 /* In case of reduction every load permutation is allowed, since the order
1775 of the reduction statements is not important (as opposed to the case of
1776 grouped stores). The only condition we need to check is that all the
1777 load nodes are of the same size and have the same permutation (and then
1778 rearrange all the nodes of the SLP instance according to this
1781 /* Check that all the load nodes are of the same size. */
1782 /* ??? Can't we assert this? */
1783 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1784 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1787 node
= SLP_INSTANCE_TREE (slp_instn
);
1788 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1790 /* Reduction (there are no data-refs in the root).
1791 In reduction chain the order of the loads is not important. */
1792 if (!STMT_VINFO_DATA_REF (stmt_info
)
1793 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1794 vect_attempt_slp_rearrange_stmts (slp_instn
);
1796 /* In basic block vectorization we allow any subchain of an interleaving
1798 FORNOW: not supported in loop SLP because of realignment compications. */
1799 if (STMT_VINFO_BB_VINFO (stmt_info
))
1801 /* Check whether the loads in an instance form a subchain and thus
1802 no permutation is necessary. */
1803 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1805 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1807 bool subchain_p
= true;
1808 stmt_vec_info next_load_info
= NULL
;
1809 stmt_vec_info load_info
;
1810 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1813 && (next_load_info
!= load_info
1814 || DR_GROUP_GAP (load_info
) != 1))
1819 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1822 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1825 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1826 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1827 unsigned HOST_WIDE_INT nunits
;
1828 unsigned k
, maxk
= 0;
1829 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1832 /* In BB vectorization we may not actually use a loaded vector
1833 accessing elements in excess of DR_GROUP_SIZE. */
1834 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1835 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1836 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1838 if (dump_enabled_p ())
1839 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1840 "BB vectorization with gaps at the end of "
1841 "a load is not supported\n");
1845 /* Verify the permutation can be generated. */
1848 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1849 1, slp_instn
, true, &n_perms
))
1851 if (dump_enabled_p ())
1852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1854 "unsupported load permutation\n");
1862 /* For loop vectorization verify we can generate the permutation. Be
1863 conservative about the vectorization factor, there are permutations
1864 that will use three vector inputs only starting from a specific factor
1865 and the vectorization factor is not yet final.
1866 ??? The SLP instance unrolling factor might not be the maximum one. */
1869 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1870 LOOP_VINFO_VECT_FACTOR
1871 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1872 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1873 if (node
->load_permutation
.exists ()
1874 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1875 slp_instn
, true, &n_perms
))
1882 /* Find the last store in SLP INSTANCE. */
1885 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1887 stmt_vec_info last
= NULL
;
1888 stmt_vec_info stmt_vinfo
;
1890 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1892 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1893 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1899 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1900 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1901 (also containing the first GROUP1_SIZE stmts, since stores are
1902 consecutive), the second containing the remainder.
1903 Return the first stmt in the second group. */
1905 static stmt_vec_info
1906 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1908 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1909 gcc_assert (group1_size
> 0);
1910 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1911 gcc_assert (group2_size
> 0);
1912 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1914 stmt_vec_info stmt_info
= first_vinfo
;
1915 for (unsigned i
= group1_size
; i
> 1; i
--)
1917 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1918 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1920 /* STMT is now the last element of the first group. */
1921 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1922 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1924 DR_GROUP_SIZE (group2
) = group2_size
;
1925 for (stmt_info
= group2
; stmt_info
;
1926 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1928 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1929 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1932 /* For the second group, the DR_GROUP_GAP is that before the original group,
1933 plus skipping over the first vector. */
1934 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1936 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1937 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1939 if (dump_enabled_p ())
1940 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1941 group1_size
, group2_size
);
1946 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1947 statements and a vector of NUNITS elements. */
1950 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1952 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1955 /* Analyze an SLP instance starting from a group of grouped stores. Call
1956 vect_build_slp_tree to build a tree of packed stmts if possible.
1957 Return FALSE if it's impossible to SLP any stmt in the loop. */
1960 vect_analyze_slp_instance (vec_info
*vinfo
,
1961 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1963 slp_instance new_instance
;
1965 unsigned int group_size
;
1966 tree vectype
, scalar_type
= NULL_TREE
;
1968 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1969 vec
<stmt_vec_info
> scalar_stmts
;
1971 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1973 scalar_type
= TREE_TYPE (DR_REF (dr
));
1974 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1975 group_size
= DR_GROUP_SIZE (stmt_info
);
1977 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1979 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1980 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1981 group_size
= REDUC_GROUP_SIZE (stmt_info
);
1985 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
1986 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1987 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
1992 if (dump_enabled_p ())
1993 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1994 "Build SLP failed: unsupported data-type %T\n",
1999 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2001 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2002 scalar_stmts
.create (group_size
);
2003 stmt_vec_info next_info
= stmt_info
;
2004 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2006 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2009 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2010 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2013 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2015 /* Collect the reduction stmts and store them in
2016 SLP_TREE_SCALAR_STMTS. */
2019 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2020 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2022 /* Mark the first element of the reduction chain as reduction to properly
2023 transform the node. In the reduction analysis phase only the last
2024 element of the chain is marked as reduction. */
2025 STMT_VINFO_DEF_TYPE (stmt_info
) = vect_reduction_def
;
2026 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2027 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2031 /* Collect reduction statements. */
2032 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2033 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2034 scalar_stmts
.safe_push (next_info
);
2037 /* Build the tree for the SLP instance. */
2038 bool *matches
= XALLOCAVEC (bool, group_size
);
2039 unsigned npermutes
= 0;
2040 scalar_stmts_to_slp_tree_map_t
*bst_map
2041 = new scalar_stmts_to_slp_tree_map_t ();
2042 poly_uint64 max_nunits
= nunits
;
2043 unsigned tree_size
= 0;
2044 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2045 &max_nunits
, matches
, &npermutes
,
2046 &tree_size
, bst_map
);
2047 /* The map keeps a reference on SLP nodes built, release that. */
2048 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2049 it
!= bst_map
->end (); ++it
)
2051 vect_free_slp_tree ((*it
).second
, false);
2055 /* Calculate the unrolling factor based on the smallest type. */
2056 poly_uint64 unrolling_factor
2057 = calculate_unrolling_factor (max_nunits
, group_size
);
2059 if (maybe_ne (unrolling_factor
, 1U)
2060 && is_a
<bb_vec_info
> (vinfo
))
2062 unsigned HOST_WIDE_INT const_max_nunits
;
2063 if (!max_nunits
.is_constant (&const_max_nunits
)
2064 || const_max_nunits
> group_size
)
2066 if (dump_enabled_p ())
2067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2068 "Build SLP failed: store group "
2069 "size not a multiple of the vector size "
2070 "in basic block SLP\n");
2071 vect_free_slp_tree (node
, false);
2074 /* Fatal mismatch. */
2075 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2076 vect_free_slp_tree (node
, false);
2080 /* Create a new SLP instance. */
2081 new_instance
= XNEW (class _slp_instance
);
2082 SLP_INSTANCE_TREE (new_instance
) = node
;
2083 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2084 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2085 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2086 vect_gather_slp_loads (new_instance
, node
);
2087 if (dump_enabled_p ())
2088 dump_printf_loc (MSG_NOTE
, vect_location
,
2089 "SLP size %u vs. limit %u.\n",
2090 tree_size
, max_tree_size
);
2092 /* Compute the load permutation. */
2094 bool loads_permuted
= false;
2095 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2097 vec
<unsigned> load_permutation
;
2099 stmt_vec_info load_info
;
2100 bool this_load_permuted
= false;
2101 load_permutation
.create (group_size
);
2102 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2103 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2104 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2106 int load_place
= vect_get_place_in_interleaving_chain
2107 (load_info
, first_stmt_info
);
2108 gcc_assert (load_place
!= -1);
2109 if (load_place
!= j
)
2110 this_load_permuted
= true;
2111 load_permutation
.safe_push (load_place
);
2113 if (!this_load_permuted
2114 /* The load requires permutation when unrolling exposes
2115 a gap either because the group is larger than the SLP
2116 group-size or because there is a gap between the groups. */
2117 && (known_eq (unrolling_factor
, 1U)
2118 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2119 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2121 load_permutation
.release ();
2124 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2125 loads_permuted
= true;
2130 if (!vect_supported_load_permutation_p (new_instance
))
2132 if (dump_enabled_p ())
2133 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2134 "Build SLP failed: unsupported load "
2135 "permutation %G", stmt_info
->stmt
);
2136 vect_free_slp_instance (new_instance
, false);
2141 /* If the loads and stores can be handled with load/store-lan
2142 instructions do not generate this SLP instance. */
2143 if (is_a
<loop_vec_info
> (vinfo
)
2145 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2148 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2150 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2151 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2152 /* Use SLP for strided accesses (or if we can't load-lanes). */
2153 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2154 || ! vect_load_lanes_supported
2155 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2156 DR_GROUP_SIZE (stmt_vinfo
), false))
2159 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2161 if (dump_enabled_p ())
2162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2163 "Built SLP cancelled: can use "
2164 "load/store-lanes\n");
2165 vect_free_slp_instance (new_instance
, false);
2170 vinfo
->slp_instances
.safe_push (new_instance
);
2172 if (dump_enabled_p ())
2174 dump_printf_loc (MSG_NOTE
, vect_location
,
2175 "Final SLP tree for instance:\n");
2176 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2184 /* Failed to SLP. */
2185 /* Free the allocated memory. */
2186 scalar_stmts
.release ();
2189 /* For basic block SLP, try to break the group up into multiples of the
2191 unsigned HOST_WIDE_INT const_nunits
;
2192 if (is_a
<bb_vec_info
> (vinfo
)
2193 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2194 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2195 && nunits
.is_constant (&const_nunits
))
2197 /* We consider breaking the group only on VF boundaries from the existing
2199 for (i
= 0; i
< group_size
; i
++)
2200 if (!matches
[i
]) break;
2202 if (i
>= const_nunits
&& i
< group_size
)
2204 /* Split into two groups at the first vector boundary before i. */
2205 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2206 unsigned group1_size
= i
& ~(const_nunits
- 1);
2208 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2210 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2212 /* If the first non-match was in the middle of a vector,
2213 skip the rest of that vector. */
2214 if (group1_size
< i
)
2216 i
= group1_size
+ const_nunits
;
2218 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2221 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2224 /* Even though the first vector did not all match, we might be able to SLP
2225 (some) of the remainder. FORNOW ignore this possibility. */
2232 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2233 trees of packed scalar stmts if SLP is possible. */
2236 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2239 stmt_vec_info first_element
;
2241 DUMP_VECT_SCOPE ("vect_analyze_slp");
2243 /* Find SLP sequences starting from groups of grouped stores. */
2244 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2245 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2247 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2249 if (loop_vinfo
->reduction_chains
.length () > 0)
2251 /* Find SLP sequences starting from reduction chains. */
2252 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2253 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2256 /* Dissolve reduction chain group. */
2257 stmt_vec_info vinfo
= first_element
;
2260 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2261 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2262 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2265 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2269 /* Find SLP sequences starting from groups of reductions. */
2270 if (loop_vinfo
->reductions
.length () > 1)
2271 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2275 return opt_result::success ();
2279 /* For each possible SLP instance decide whether to SLP it and calculate overall
2280 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2281 least one instance. */
2284 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2287 poly_uint64 unrolling_factor
= 1;
2288 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2289 slp_instance instance
;
2290 int decided_to_slp
= 0;
2292 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2294 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2296 /* FORNOW: SLP if you can. */
2297 /* All unroll factors have the form vinfo->vector_size * X for some
2298 rational X, so they must have a common multiple. */
2300 = force_common_multiple (unrolling_factor
,
2301 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2303 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2304 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2305 loop-based vectorization. Such stmts will be marked as HYBRID. */
2306 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2310 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2312 if (decided_to_slp
&& dump_enabled_p ())
2314 dump_printf_loc (MSG_NOTE
, vect_location
,
2315 "Decided to SLP %d instances. Unrolling factor ",
2317 dump_dec (MSG_NOTE
, unrolling_factor
);
2318 dump_printf (MSG_NOTE
, "\n");
2321 return (decided_to_slp
> 0);
2325 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2326 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2329 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2330 hash_map
<slp_tree
, unsigned> &visited
)
2332 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2333 imm_use_iterator imm_iter
;
2335 stmt_vec_info use_vinfo
;
2337 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2340 /* We need to union stype over the incoming graph edges but we still
2341 want to limit recursion to stay O(N+E). */
2342 bool only_edge
= (++visited
.get_or_insert (node
) < node
->refcnt
);
2344 /* Propagate hybrid down the SLP tree. */
2345 if (stype
== hybrid
)
2347 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2349 else if (!only_edge
)
2351 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2352 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2353 /* If we get a pattern stmt here we have to use the LHS of the
2354 original stmt for immediate uses. */
2355 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2357 if (gimple_code (stmt
) == GIMPLE_PHI
)
2358 def
= gimple_phi_result (stmt
);
2360 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2362 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2364 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2367 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2368 if (!STMT_SLP_TYPE (use_vinfo
)
2369 && (STMT_VINFO_RELEVANT (use_vinfo
)
2370 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2371 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2372 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2376 "def in non-SLP stmt: %G", use_stmt
);
2383 && !HYBRID_SLP_STMT (stmt_vinfo
))
2385 if (dump_enabled_p ())
2386 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2388 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2392 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2393 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2394 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2395 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2399 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2401 hash_map
<slp_tree
, unsigned> visited
;
2402 vect_detect_hybrid_slp_stmts (node
, i
, stype
, visited
);
2405 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2408 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2410 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2411 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2416 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2417 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2419 if (dump_enabled_p ())
2420 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2421 def_stmt_info
->stmt
);
2422 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2429 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2432 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2433 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2434 /* If the stmt is in a SLP instance then this isn't a reason
2435 to mark use definitions in other SLP instances as hybrid. */
2436 if (! STMT_SLP_TYPE (use_vinfo
)
2437 && (STMT_VINFO_RELEVANT (use_vinfo
)
2438 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2439 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2440 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2447 /* Find stmts that must be both vectorized and SLPed. */
2450 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2453 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2454 slp_instance instance
;
2456 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2458 /* First walk all pattern stmt in the loop and mark defs of uses as
2459 hybrid because immediate uses in them are not recorded. */
2460 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2462 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2463 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2466 gimple
*stmt
= gsi_stmt (gsi
);
2467 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2468 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2471 memset (&wi
, 0, sizeof (wi
));
2472 wi
.info
= loop_vinfo
;
2473 gimple_stmt_iterator gsi2
2474 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2475 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2476 vect_detect_hybrid_slp_1
, &wi
);
2477 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2478 vect_detect_hybrid_slp_2
,
2479 vect_detect_hybrid_slp_1
, &wi
);
2484 /* Then walk the SLP instance trees marking stmts with uses in
2485 non-SLP stmts as hybrid, also propagating hybrid down the
2486 SLP tree, collecting the above info on-the-fly. */
2487 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2489 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2490 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2496 /* Initialize a bb_vec_info struct for the statements between
2497 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2499 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2500 gimple_stmt_iterator region_end_in
,
2501 vec_info_shared
*shared
)
2502 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2503 bb (gsi_bb (region_begin_in
)),
2504 region_begin (region_begin_in
),
2505 region_end (region_end_in
)
2507 gimple_stmt_iterator gsi
;
2509 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2512 gimple
*stmt
= gsi_stmt (gsi
);
2513 gimple_set_uid (stmt
, 0);
2521 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2522 stmts in the basic block. */
2524 _bb_vec_info::~_bb_vec_info ()
2526 for (gimple_stmt_iterator si
= region_begin
;
2527 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2528 /* Reset region marker. */
2529 gimple_set_uid (gsi_stmt (si
), -1);
2534 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2535 given then that child nodes have already been processed, and that
2536 their def types currently match their SLP node's def type. */
2539 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2540 slp_instance node_instance
,
2541 stmt_vector_for_cost
*cost_vec
)
2543 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2544 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2546 /* For BB vectorization vector types are assigned here.
2547 Memory accesses already got their vector type assigned
2548 in vect_analyze_data_refs. */
2549 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2551 && ! STMT_VINFO_DATA_REF (stmt_info
))
2553 tree vectype
, nunits_vectype
;
2554 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2556 /* We checked this when building the node. */
2558 if (vectype
== boolean_type_node
)
2560 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2562 /* vect_get_mask_type_for_stmt has already explained the
2567 stmt_vec_info sstmt_info
;
2569 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2570 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2573 /* Calculate the number of vector statements to be created for the
2574 scalar stmts in this node. For SLP reductions it is equal to the
2575 number of vector statements in the children (which has already been
2576 calculated by the recursive call). Otherwise it is the number of
2577 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2578 VF divided by the number of elements in a vector. */
2579 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2580 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2582 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2583 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2585 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2586 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2593 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2594 vf
= loop_vinfo
->vectorization_factor
;
2597 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2598 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2599 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2600 = vect_get_num_vectors (vf
* group_size
, vectype
);
2604 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2607 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2608 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2610 Return true if the operations are supported. */
2613 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2614 slp_instance node_instance
,
2615 scalar_stmts_to_slp_tree_map_t
*visited
,
2616 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2617 stmt_vector_for_cost
*cost_vec
)
2622 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2625 /* If we already analyzed the exact same set of scalar stmts we're done.
2626 We share the generated vector stmts for those. */
2628 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2629 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2631 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2632 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2636 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2637 doesn't result in any issue since we throw away the lvisited set
2639 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2642 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2643 visited
, lvisited
, cost_vec
))
2646 /* ??? We have to catch the case late where two first scalar stmts appear
2647 in multiple SLP children with different def type and fail. Remember
2648 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2649 match it when that is vect_internal_def. */
2650 auto_vec
<vect_def_type
, 4> dt
;
2651 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2652 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2653 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2654 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2656 /* Push SLP node def-type to stmt operands. */
2657 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2658 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2659 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2660 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2661 = SLP_TREE_DEF_TYPE (child
);
2663 /* Check everything worked out. */
2665 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2666 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2668 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2670 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2671 != SLP_TREE_DEF_TYPE (child
))
2674 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2678 if (!res
&& dump_enabled_p ())
2679 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2680 "not vectorized: same operand with different "
2681 "def type in stmt.\n");
2684 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2687 /* Restore def-types. */
2688 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2689 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2690 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2696 /* Analyze statements in SLP instances of VINFO. Return true if the
2697 operations are supported. */
2700 vect_slp_analyze_operations (vec_info
*vinfo
)
2702 slp_instance instance
;
2705 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2707 scalar_stmts_to_slp_tree_map_t
*visited
2708 = new scalar_stmts_to_slp_tree_map_t ();
2709 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2711 scalar_stmts_to_slp_tree_map_t lvisited
;
2712 stmt_vector_for_cost cost_vec
;
2713 cost_vec
.create (2);
2714 if (!vect_slp_analyze_node_operations (vinfo
,
2715 SLP_INSTANCE_TREE (instance
),
2716 instance
, visited
, &lvisited
,
2719 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2720 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2721 if (dump_enabled_p ())
2722 dump_printf_loc (MSG_NOTE
, vect_location
,
2723 "removing SLP instance operations starting from: %G",
2725 vect_free_slp_instance (instance
, false);
2726 vinfo
->slp_instances
.ordered_remove (i
);
2727 cost_vec
.release ();
2731 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2732 x
!= lvisited
.end(); ++x
)
2733 visited
->put ((*x
).first
.copy (), (*x
).second
);
2736 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2737 cost_vec
.release ();
2742 return !vinfo
->slp_instances
.is_empty ();
2746 /* Compute the scalar cost of the SLP node NODE and its children
2747 and return it. Do not account defs that are marked in LIFE and
2748 update LIFE according to uses of NODE. */
2751 vect_bb_slp_scalar_cost (basic_block bb
,
2752 slp_tree node
, vec
<bool, va_heap
> *life
,
2753 stmt_vector_for_cost
*cost_vec
,
2754 hash_set
<slp_tree
> &visited
)
2757 stmt_vec_info stmt_info
;
2760 if (visited
.add (node
))
2763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2765 gimple
*stmt
= stmt_info
->stmt
;
2766 vec_info
*vinfo
= stmt_info
->vinfo
;
2767 ssa_op_iter op_iter
;
2768 def_operand_p def_p
;
2773 /* If there is a non-vectorized use of the defs then the scalar
2774 stmt is kept live in which case we do not account it or any
2775 required defs in the SLP children in the scalar cost. This
2776 way we make the vectorization more costly when compared to
2778 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2780 imm_use_iterator use_iter
;
2782 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2783 if (!is_gimple_debug (use_stmt
))
2785 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2786 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2789 BREAK_FROM_IMM_USE_STMT (use_iter
);
2796 /* Count scalar stmts only once. */
2797 if (gimple_visited_p (stmt
))
2799 gimple_set_visited (stmt
, true);
2801 vect_cost_for_stmt kind
;
2802 if (STMT_VINFO_DATA_REF (stmt_info
))
2804 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2807 kind
= scalar_store
;
2811 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2814 auto_vec
<bool, 20> subtree_life
;
2815 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2817 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2819 /* Do not directly pass LIFE to the recursive call, copy it to
2820 confine changes in the callee to the current child/subtree. */
2821 subtree_life
.safe_splice (*life
);
2822 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
2824 subtree_life
.truncate (0);
2830 vect_bb_slp_scalar_cost (basic_block bb
,
2831 slp_tree node
, vec
<bool, va_heap
> *life
,
2832 stmt_vector_for_cost
*cost_vec
)
2834 hash_set
<slp_tree
> visited
;
2835 vect_bb_slp_scalar_cost (bb
, node
, life
, cost_vec
, visited
);
2838 /* Check if vectorization of the basic block is profitable. */
2841 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2843 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2844 slp_instance instance
;
2846 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2847 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2849 /* Calculate scalar cost. */
2850 stmt_vector_for_cost scalar_costs
;
2851 scalar_costs
.create (0);
2852 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2854 auto_vec
<bool, 20> life
;
2855 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2856 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2857 SLP_INSTANCE_TREE (instance
),
2858 &life
, &scalar_costs
);
2860 void *target_cost_data
= init_cost (NULL
);
2861 add_stmt_costs (target_cost_data
, &scalar_costs
);
2862 scalar_costs
.release ();
2864 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2865 destroy_cost_data (target_cost_data
);
2867 /* Unset visited flag. */
2868 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2869 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2870 gimple_set_visited (gsi_stmt (gsi
), false);
2872 /* Complete the target-specific cost calculation. */
2873 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2874 &vec_inside_cost
, &vec_epilogue_cost
);
2876 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2878 if (dump_enabled_p ())
2880 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2881 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2883 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2884 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2885 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2888 /* Vectorization is profitable if its cost is more than the cost of scalar
2889 version. Note that we err on the vector side for equal cost because
2890 the cost estimate is otherwise quite pessimistic (constant uses are
2891 free on the scalar side but cost a load on the vector side for
2893 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2899 /* Check if the region described by BB_VINFO can be vectorized, returning
2900 true if so. When returning false, set FATAL to true if the same failure
2901 would prevent vectorization at other vector sizes, false if it is still
2902 worth trying other sizes. N_STMTS is the number of statements in the
2906 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
2908 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2910 slp_instance instance
;
2912 poly_uint64 min_vf
= 2;
2914 /* The first group of checks is independent of the vector size. */
2917 /* Analyze the data references. */
2919 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
2921 if (dump_enabled_p ())
2922 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2923 "not vectorized: unhandled data-ref in basic "
2928 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2930 if (dump_enabled_p ())
2931 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2932 "not vectorized: not enough data-refs in "
2937 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2939 if (dump_enabled_p ())
2940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2941 "not vectorized: unhandled data access in "
2946 /* If there are no grouped stores in the region there is no need
2947 to continue with pattern recog as vect_analyze_slp will fail
2949 if (bb_vinfo
->grouped_stores
.is_empty ())
2951 if (dump_enabled_p ())
2952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2953 "not vectorized: no grouped stores in "
2958 /* While the rest of the analysis below depends on it in some way. */
2961 vect_pattern_recog (bb_vinfo
);
2963 /* Check the SLP opportunities in the basic block, analyze and build SLP
2965 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
2967 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2970 "Failed to SLP the basic block.\n");
2971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2972 "not vectorized: failed to find SLP opportunities "
2973 "in basic block.\n");
2978 vect_record_base_alignments (bb_vinfo
);
2980 /* Analyze and verify the alignment of data references and the
2981 dependence in the SLP instances. */
2982 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
2984 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
2985 || ! vect_slp_analyze_instance_dependence (instance
))
2987 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2988 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2989 if (dump_enabled_p ())
2990 dump_printf_loc (MSG_NOTE
, vect_location
,
2991 "removing SLP instance operations starting from: %G",
2993 vect_free_slp_instance (instance
, false);
2994 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
2998 /* Mark all the statements that we want to vectorize as pure SLP and
3000 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3001 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3005 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3008 if (!vect_slp_analyze_operations (bb_vinfo
))
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3012 "not vectorized: bad operation in basic block.\n");
3016 /* Cost model: check if the vectorization is worthwhile. */
3017 if (!unlimited_cost_model (NULL
)
3018 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3020 if (dump_enabled_p ())
3021 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3022 "not vectorized: vectorization is not "
3027 if (dump_enabled_p ())
3028 dump_printf_loc (MSG_NOTE
, vect_location
,
3029 "Basic block will be vectorized using SLP\n");
3033 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3034 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3035 on success. The region has N_STMTS statements and has the datarefs
3036 given by DATAREFS. */
3039 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3040 gimple_stmt_iterator region_end
,
3041 vec
<data_reference_p
> datarefs
,
3042 unsigned int n_stmts
)
3044 bb_vec_info bb_vinfo
;
3045 auto_vector_sizes vector_sizes
;
3047 /* Autodetect first vector size we try. */
3048 poly_uint64 next_vector_size
= 0;
3049 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
, false);
3050 unsigned int next_size
= 0;
3052 vec_info_shared shared
;
3054 poly_uint64 autodetected_vector_size
= 0;
3057 bool vectorized
= false;
3059 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3061 bool first_time_p
= shared
.datarefs
.is_empty ();
3062 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3064 bb_vinfo
->shared
->save_datarefs ();
3066 bb_vinfo
->shared
->check_datarefs ();
3067 bb_vinfo
->vector_size
= next_vector_size
;
3069 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3070 && dbg_cnt (vect_slp
))
3072 if (dump_enabled_p ())
3073 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3075 bb_vinfo
->shared
->check_datarefs ();
3076 vect_schedule_slp (bb_vinfo
);
3078 unsigned HOST_WIDE_INT bytes
;
3079 if (dump_enabled_p ())
3081 if (bb_vinfo
->vector_size
.is_constant (&bytes
))
3082 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3083 "basic block part vectorized using %wu byte "
3084 "vectors\n", bytes
);
3086 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3087 "basic block part vectorized using variable "
3088 "length vectors\n");
3095 autodetected_vector_size
= bb_vinfo
->vector_size
;
3099 if (next_size
< vector_sizes
.length ()
3100 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3104 || next_size
== vector_sizes
.length ()
3105 || known_eq (bb_vinfo
->vector_size
, 0U)
3106 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3107 vector sizes will fail do not bother iterating. */
3111 /* Try the next biggest vector size. */
3112 next_vector_size
= vector_sizes
[next_size
++];
3113 if (dump_enabled_p ())
3115 dump_printf_loc (MSG_NOTE
, vect_location
,
3116 "***** Re-trying analysis with "
3118 dump_dec (MSG_NOTE
, next_vector_size
);
3119 dump_printf (MSG_NOTE
, "\n");
3124 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3125 true if anything in the basic-block was vectorized. */
3128 vect_slp_bb (basic_block bb
)
3130 gimple_stmt_iterator gsi
;
3131 bool any_vectorized
= false;
3133 gsi
= gsi_start_bb (bb
);
3134 while (!gsi_end_p (gsi
))
3136 gimple_stmt_iterator region_begin
= gsi
;
3137 vec
<data_reference_p
> datarefs
= vNULL
;
3140 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3142 gimple
*stmt
= gsi_stmt (gsi
);
3143 if (is_gimple_debug (stmt
))
3147 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3148 vect_location
= stmt
;
3150 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3154 /* Skip leading unhandled stmts. */
3155 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3161 gimple_stmt_iterator region_end
= gsi
;
3163 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
3165 if (dump_enabled_p ())
3166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3167 "not vectorized: too many instructions in "
3170 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3171 any_vectorized
= true;
3173 if (gsi_end_p (region_end
))
3176 /* Skip the unhandled stmt. */
3180 return any_vectorized
;
3184 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3187 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
)
3189 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3191 enum vect_def_type dt
;
3193 /* For comparison and COND_EXPR type is chosen depending
3194 on the non-constant other comparison operand. */
3195 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3197 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3198 op
= gimple_assign_rhs1 (stmt
);
3200 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3203 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3206 if (code
== COND_EXPR
)
3208 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3209 tree cond
= gimple_assign_rhs1 (stmt
);
3211 if (TREE_CODE (cond
) == SSA_NAME
)
3214 op
= TREE_OPERAND (cond
, 0);
3216 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3219 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3222 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3225 /* Build a variable-length vector in which the elements in ELTS are repeated
3226 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3227 RESULTS and add any new instructions to SEQ.
3229 The approach we use is:
3231 (1) Find a vector mode VM with integer elements of mode IM.
3233 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3234 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3235 from small vectors to IM.
3237 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3239 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3240 correct byte contents.
3242 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3244 We try to find the largest IM for which this sequence works, in order
3245 to cut down on the number of interleaves. */
3248 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3249 vec
<tree
> elts
, unsigned int nresults
,
3252 unsigned int nelts
= elts
.length ();
3253 tree element_type
= TREE_TYPE (vector_type
);
3255 /* (1) Find a vector mode VM with integer elements of mode IM. */
3256 unsigned int nvectors
= 1;
3257 tree new_vector_type
;
3259 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, TYPE_MODE (element_type
),
3260 &nvectors
, &new_vector_type
,
3264 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3265 unsigned int partial_nelts
= nelts
/ nvectors
;
3266 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3268 tree_vector_builder partial_elts
;
3269 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3270 pieces
.quick_grow (nvectors
* 2);
3271 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3273 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3274 ELTS' has mode IM. */
3275 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3276 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3277 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3278 tree t
= gimple_build_vector (seq
, &partial_elts
);
3279 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3280 TREE_TYPE (new_vector_type
), t
);
3282 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3283 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3286 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3287 correct byte contents.
3289 We need to repeat the following operation log2(nvectors) times:
3291 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3292 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3294 However, if each input repeats every N elements and the VF is
3295 a multiple of N * 2, the HI result is the same as the LO. */
3296 unsigned int in_start
= 0;
3297 unsigned int out_start
= nvectors
;
3298 unsigned int hi_start
= nvectors
/ 2;
3299 /* A bound on the number of outputs needed to produce NRESULTS results
3300 in the final iteration. */
3301 unsigned int noutputs_bound
= nvectors
* nresults
;
3302 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3304 noutputs_bound
/= 2;
3305 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3306 for (unsigned int i
= 0; i
< limit
; ++i
)
3309 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3312 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3316 tree output
= make_ssa_name (new_vector_type
);
3317 tree input1
= pieces
[in_start
+ (i
/ 2)];
3318 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3319 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3322 gimple_seq_add_stmt (seq
, stmt
);
3323 pieces
[out_start
+ i
] = output
;
3325 std::swap (in_start
, out_start
);
3328 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3329 results
.reserve (nresults
);
3330 for (unsigned int i
= 0; i
< nresults
; ++i
)
3332 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3333 pieces
[in_start
+ i
]));
3335 results
.quick_push (results
[i
- nvectors
]);
3339 /* For constant and loop invariant defs of SLP_NODE this function returns
3340 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3341 OP_NODE determines the node for the operand containing the scalar
3345 vect_get_constant_vectors (slp_tree op_node
, slp_tree slp_node
,
3346 vec
<tree
> *vec_oprnds
)
3348 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3349 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3350 unsigned HOST_WIDE_INT nunits
;
3352 unsigned j
, number_of_places_left_in_vector
;
3355 int group_size
= op_node
->ops
.length ();
3356 unsigned int vec_num
, i
;
3357 unsigned number_of_copies
= 1;
3359 tree neutral_op
= NULL
;
3360 gimple_seq ctor_seq
= NULL
;
3361 auto_vec
<tree
, 16> permute_results
;
3363 /* ??? SLP analysis should compute the vector type for the
3364 constant / invariant and store it in the SLP node. */
3365 tree op
= op_node
->ops
[0];
3366 /* Check if vector type is a boolean vector. */
3367 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3368 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3369 && vect_mask_constant_operand_p (stmt_vinfo
))
3371 = build_same_sized_truth_vector_type (stmt_vectype
);
3373 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
3375 unsigned int number_of_vectors
3376 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3377 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3379 vec_oprnds
->create (number_of_vectors
);
3380 auto_vec
<tree
> voprnds (number_of_vectors
);
3382 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3383 created vectors. It is greater than 1 if unrolling is performed.
3385 For example, we have two scalar operands, s1 and s2 (e.g., group of
3386 strided accesses of size two), while NUNITS is four (i.e., four scalars
3387 of this type can be packed in a vector). The output vector will contain
3388 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3391 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3392 containing the operands.
3394 For example, NUNITS is four as before, and the group size is 8
3395 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3396 {s5, s6, s7, s8}. */
3398 /* When using duplicate_and_interleave, we just need one element for
3399 each scalar statement. */
3400 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3401 nunits
= group_size
;
3403 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3405 number_of_places_left_in_vector
= nunits
;
3407 tree_vector_builder
elts (vector_type
, nunits
, 1);
3408 elts
.quick_grow (nunits
);
3409 bool place_after_defs
= false;
3410 for (j
= 0; j
< number_of_copies
; j
++)
3412 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3414 /* Create 'vect_ = {op0,op1,...,opn}'. */
3415 number_of_places_left_in_vector
--;
3417 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3419 if (CONSTANT_CLASS_P (op
))
3421 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3423 /* Can't use VIEW_CONVERT_EXPR for booleans because
3424 of possibly different sizes of scalar value and
3426 if (integer_zerop (op
))
3427 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3428 else if (integer_onep (op
))
3429 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3434 op
= fold_unary (VIEW_CONVERT_EXPR
,
3435 TREE_TYPE (vector_type
), op
);
3436 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3440 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3442 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3445 = build_all_ones_cst (TREE_TYPE (vector_type
));
3447 = build_zero_cst (TREE_TYPE (vector_type
));
3448 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3449 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3455 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3458 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3461 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3465 elts
[number_of_places_left_in_vector
] = op
;
3466 if (!CONSTANT_CLASS_P (op
))
3468 if (TREE_CODE (orig_op
) == SSA_NAME
3469 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3470 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3471 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3472 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3473 place_after_defs
= true;
3475 if (number_of_places_left_in_vector
== 0)
3478 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3479 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3480 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3483 if (permute_results
.is_empty ())
3484 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3485 elts
, number_of_vectors
,
3487 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3490 gimple_stmt_iterator gsi
;
3491 if (place_after_defs
)
3493 stmt_vec_info last_stmt_info
3494 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3495 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3496 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3500 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3502 if (ctor_seq
!= NULL
)
3504 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3505 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3508 voprnds
.quick_push (init
);
3509 place_after_defs
= false;
3510 number_of_places_left_in_vector
= nunits
;
3512 elts
.new_vector (vector_type
, nunits
, 1);
3513 elts
.quick_grow (nunits
);
3518 /* Since the vectors are created in the reverse order, we should invert
3520 vec_num
= voprnds
.length ();
3521 for (j
= vec_num
; j
!= 0; j
--)
3523 vop
= voprnds
[j
- 1];
3524 vec_oprnds
->quick_push (vop
);
3527 /* In case that VF is greater than the unrolling factor needed for the SLP
3528 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3529 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3530 to replicate the vectors. */
3531 while (number_of_vectors
> vec_oprnds
->length ())
3533 tree neutral_vec
= NULL
;
3538 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3540 vec_oprnds
->quick_push (neutral_vec
);
3544 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3545 vec_oprnds
->quick_push (vop
);
3551 /* Get vectorized definitions from SLP_NODE that contains corresponding
3552 vectorized def-stmts. */
3555 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3557 stmt_vec_info vec_def_stmt_info
;
3560 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3562 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3563 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3567 /* Get N vectorized definitions for SLP_NODE.
3568 If the scalar definitions are loop invariants or constants, collect them and
3569 call vect_get_constant_vectors() to create vector stmts.
3570 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3571 must be stored in the corresponding child of SLP_NODE, and we call
3572 vect_get_slp_vect_defs () to retrieve them. */
3575 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3578 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3580 for (unsigned i
= 0; i
< n
; ++i
)
3582 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3584 vec
<tree
> vec_defs
= vNULL
;
3586 /* For each operand we check if it has vectorized definitions in a child
3587 node or we need to create them (for invariants and constants). */
3588 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3590 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3591 vect_get_slp_vect_defs (child
, &vec_defs
);
3594 vect_get_constant_vectors (child
, slp_node
, &vec_defs
);
3596 vec_oprnds
->quick_push (vec_defs
);
3600 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3601 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3602 permute statements for the SLP node NODE of the SLP instance
3603 SLP_NODE_INSTANCE. */
3606 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3607 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3608 slp_instance slp_node_instance
, bool analyze_only
,
3611 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3612 vec_info
*vinfo
= stmt_info
->vinfo
;
3614 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3615 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3616 unsigned int mask_element
;
3619 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3622 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3624 mode
= TYPE_MODE (vectype
);
3625 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3627 /* Initialize the vect stmts of NODE to properly insert the generated
3630 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3631 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3632 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3634 /* Generate permutation masks for every NODE. Number of masks for each NODE
3635 is equal to GROUP_SIZE.
3636 E.g., we have a group of three nodes with three loads from the same
3637 location in each node, and the vector size is 4. I.e., we have a
3638 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3639 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3640 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3643 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3644 The last mask is illegal since we assume two operands for permute
3645 operation, and the mask element values can't be outside that range.
3646 Hence, the last mask must be converted into {2,5,5,5}.
3647 For the first two permutations we need the first and the second input
3648 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3649 we need the second and the third vectors: {b1,c1,a2,b2} and
3652 int vect_stmts_counter
= 0;
3653 unsigned int index
= 0;
3654 int first_vec_index
= -1;
3655 int second_vec_index
= -1;
3659 vec_perm_builder mask
;
3660 unsigned int nelts_to_build
;
3661 unsigned int nvectors_per_build
;
3662 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3663 && multiple_p (nunits
, group_size
));
3666 /* A single vector contains a whole number of copies of the node, so:
3667 (a) all permutes can use the same mask; and
3668 (b) the permutes only need a single vector input. */
3669 mask
.new_vector (nunits
, group_size
, 3);
3670 nelts_to_build
= mask
.encoded_nelts ();
3671 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3675 /* We need to construct a separate mask for each vector statement. */
3676 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3677 if (!nunits
.is_constant (&const_nunits
)
3678 || !vf
.is_constant (&const_vf
))
3680 mask
.new_vector (const_nunits
, const_nunits
, 1);
3681 nelts_to_build
= const_vf
* group_size
;
3682 nvectors_per_build
= 1;
3685 unsigned int count
= mask
.encoded_nelts ();
3686 mask
.quick_grow (count
);
3687 vec_perm_indices indices
;
3689 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3691 unsigned int iter_num
= j
/ group_size
;
3692 unsigned int stmt_num
= j
% group_size
;
3693 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3694 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3697 first_vec_index
= 0;
3702 /* Enforced before the loop when !repeating_p. */
3703 unsigned int const_nunits
= nunits
.to_constant ();
3704 vec_index
= i
/ const_nunits
;
3705 mask_element
= i
% const_nunits
;
3706 if (vec_index
== first_vec_index
3707 || first_vec_index
== -1)
3709 first_vec_index
= vec_index
;
3711 else if (vec_index
== second_vec_index
3712 || second_vec_index
== -1)
3714 second_vec_index
= vec_index
;
3715 mask_element
+= const_nunits
;
3719 if (dump_enabled_p ())
3720 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3721 "permutation requires at "
3722 "least three vectors %G",
3724 gcc_assert (analyze_only
);
3728 gcc_assert (mask_element
< 2 * const_nunits
);
3731 if (mask_element
!= index
)
3733 mask
[index
++] = mask_element
;
3735 if (index
== count
&& !noop_p
)
3737 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3738 if (!can_vec_perm_const_p (mode
, indices
))
3740 if (dump_enabled_p ())
3742 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3744 "unsupported vect permute { ");
3745 for (i
= 0; i
< count
; ++i
)
3747 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3748 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3750 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3752 gcc_assert (analyze_only
);
3763 tree mask_vec
= NULL_TREE
;
3766 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3768 if (second_vec_index
== -1)
3769 second_vec_index
= first_vec_index
;
3771 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3773 /* Generate the permute statement if necessary. */
3774 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3775 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3776 stmt_vec_info perm_stmt_info
;
3779 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3781 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3783 perm_dest
= make_ssa_name (perm_dest
);
3785 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3786 first_vec
, second_vec
,
3789 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3793 /* If mask was NULL_TREE generate the requested
3794 identity transform. */
3795 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3797 /* Store the vector statement in NODE. */
3798 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3804 first_vec_index
= -1;
3805 second_vec_index
= -1;
3813 /* Vectorize SLP instance tree in postorder. */
3816 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3817 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3819 gimple_stmt_iterator si
;
3820 stmt_vec_info stmt_info
;
3821 unsigned int group_size
;
3826 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3829 /* See if we have already vectorized the node in the graph of the
3831 if (SLP_TREE_VEC_STMTS (node
).exists ())
3834 /* See if we have already vectorized the same set of stmts and reuse their
3835 vectorized stmts across instances. */
3836 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3838 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3842 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3843 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3844 vect_schedule_slp_instance (child
, instance
, bst_map
);
3846 /* Push SLP node def-type to stmts. */
3847 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3848 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3850 stmt_vec_info child_stmt_info
;
3851 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3852 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3855 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3857 /* VECTYPE is the type of the destination. */
3858 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3859 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3860 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3862 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3863 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3865 if (dump_enabled_p ())
3866 dump_printf_loc (MSG_NOTE
, vect_location
,
3867 "------>vectorizing SLP node starting from: %G",
3870 /* Vectorized stmts go before the last scalar stmt which is where
3871 all uses are ready. */
3872 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3873 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3875 /* Handle two-operation SLP nodes by vectorizing the group with
3876 both operations and then performing a merge. */
3877 if (SLP_TREE_TWO_OPERATORS (node
))
3879 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3880 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3881 enum tree_code ocode
= ERROR_MARK
;
3882 stmt_vec_info ostmt_info
;
3883 vec_perm_builder
mask (group_size
, group_size
, 1);
3884 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3886 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3887 if (gimple_assign_rhs_code (ostmt
) != code0
)
3889 mask
.quick_push (1);
3890 ocode
= gimple_assign_rhs_code (ostmt
);
3893 mask
.quick_push (0);
3895 if (ocode
!= ERROR_MARK
)
3897 vec
<stmt_vec_info
> v0
;
3898 vec
<stmt_vec_info
> v1
;
3900 tree tmask
= NULL_TREE
;
3901 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3902 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3903 SLP_TREE_VEC_STMTS (node
).truncate (0);
3904 gimple_assign_set_rhs_code (stmt
, ocode
);
3905 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3906 gimple_assign_set_rhs_code (stmt
, code0
);
3907 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3908 SLP_TREE_VEC_STMTS (node
).truncate (0);
3909 tree meltype
= build_nonstandard_integer_type
3910 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3911 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3913 for (j
= 0; j
< v0
.length (); ++j
)
3915 /* Enforced by vect_build_slp_tree, which rejects variable-length
3916 vectors for SLP_TREE_TWO_OPERATORS. */
3917 unsigned int const_nunits
= nunits
.to_constant ();
3918 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3919 for (l
= 0; l
< const_nunits
; ++l
)
3921 if (k
>= group_size
)
3923 tree t
= build_int_cst (meltype
,
3924 mask
[k
++] * const_nunits
+ l
);
3925 melts
.quick_push (t
);
3927 tmask
= melts
.build ();
3929 /* ??? Not all targets support a VEC_PERM_EXPR with a
3930 constant mask that would translate to a vec_merge RTX
3931 (with their vec_perm_const_ok). We can either not
3932 vectorize in that case or let veclower do its job.
3933 Unfortunately that isn't too great and at least for
3934 plus/minus we'd eventually like to match targets
3935 vector addsub instructions. */
3937 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
3939 gimple_assign_lhs (v0
[j
]->stmt
),
3940 gimple_assign_lhs (v1
[j
]->stmt
),
3942 SLP_TREE_VEC_STMTS (node
).quick_push
3943 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
3950 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3952 /* Restore stmt def-types. */
3953 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3954 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3956 stmt_vec_info child_stmt_info
;
3957 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3958 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
3962 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3963 For loop vectorization this is done in vectorizable_call, but for SLP
3964 it needs to be deferred until end of vect_schedule_slp, because multiple
3965 SLP instances may refer to the same scalar stmt. */
3968 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
3971 gimple_stmt_iterator gsi
;
3975 stmt_vec_info stmt_info
;
3977 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3980 if (visited
.add (node
))
3983 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3984 vect_remove_slp_scalar_calls (child
, visited
);
3986 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3988 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3989 if (!stmt
|| gimple_bb (stmt
) == NULL
)
3991 if (is_pattern_stmt_p (stmt_info
)
3992 || !PURE_SLP_STMT (stmt_info
))
3994 lhs
= gimple_call_lhs (stmt
);
3995 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3996 gsi
= gsi_for_stmt (stmt
);
3997 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
3998 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4003 vect_remove_slp_scalar_calls (slp_tree node
)
4005 hash_set
<slp_tree
> visited
;
4006 vect_remove_slp_scalar_calls (node
, visited
);
4009 /* Generate vector code for all SLP instances in the loop/basic block. */
4012 vect_schedule_slp (vec_info
*vinfo
)
4014 vec
<slp_instance
> slp_instances
;
4015 slp_instance instance
;
4018 scalar_stmts_to_slp_tree_map_t
*bst_map
4019 = new scalar_stmts_to_slp_tree_map_t ();
4020 slp_instances
= vinfo
->slp_instances
;
4021 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4023 /* Schedule the tree of INSTANCE. */
4024 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4026 if (dump_enabled_p ())
4027 dump_printf_loc (MSG_NOTE
, vect_location
,
4028 "vectorizing stmts using SLP.\n");
4032 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4034 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4035 stmt_vec_info store_info
;
4038 /* Remove scalar call stmts. Do not do this for basic-block
4039 vectorization as not all uses may be vectorized.
4040 ??? Why should this be necessary? DCE should be able to
4041 remove the stmts itself.
4042 ??? For BB vectorization we can as well remove scalar
4043 stmts starting from the SLP tree root if they have no
4045 if (is_a
<loop_vec_info
> (vinfo
))
4046 vect_remove_slp_scalar_calls (root
);
4048 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4049 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4051 if (!STMT_VINFO_DATA_REF (store_info
))
4054 store_info
= vect_orig_stmt (store_info
);
4055 /* Free the attached stmt_vec_info and remove the stmt. */
4056 vinfo
->remove_stmt (store_info
);