1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
54 vect_free_slp_tree (slp_tree node
, bool final_p
)
59 if (--node
->refcnt
!= 0)
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
63 vect_free_slp_tree (child
, final_p
);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
71 stmt_vec_info stmt_info
;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
79 SLP_TREE_CHILDREN (node
).release ();
80 SLP_TREE_SCALAR_STMTS (node
).release ();
81 SLP_TREE_SCALAR_OPS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
128 SLP_TREE_CHILDREN (node
).create (nops
);
129 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
130 SLP_TREE_TWO_OPERATORS (node
) = false;
131 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
133 node
->max_nunits
= 1;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
149 node
= XNEW (struct _slp_tree
);
150 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_VEC_STMTS (node
).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
154 SLP_TREE_CHILDREN (node
) = vNULL
;
155 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
156 SLP_TREE_TWO_OPERATORS (node
) = false;
157 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
159 node
->max_nunits
= 1;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec
<stmt_vec_info
> def_stmts
;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
178 enum vect_def_type first_dt
;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 static vec
<slp_oprnd_info
>
186 vect_create_oprnd_info (int nops
, int group_size
)
189 slp_oprnd_info oprnd_info
;
190 vec
<slp_oprnd_info
> oprnds_info
;
192 oprnds_info
.create (nops
);
193 for (i
= 0; i
< nops
; i
++)
195 oprnd_info
= XNEW (struct _slp_oprnd_info
);
196 oprnd_info
->def_stmts
.create (group_size
);
197 oprnd_info
->ops
.create (group_size
);
198 oprnd_info
->first_dt
= vect_uninitialized_def
;
199 oprnd_info
->first_op_type
= NULL_TREE
;
200 oprnd_info
->any_pattern
= false;
201 oprnds_info
.quick_push (oprnd_info
);
208 /* Free operands info. */
211 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
214 slp_oprnd_info oprnd_info
;
216 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
218 oprnd_info
->def_stmts
.release ();
219 oprnd_info
->ops
.release ();
220 XDELETE (oprnd_info
);
223 oprnds_info
.release ();
227 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
228 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
232 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
233 stmt_vec_info first_stmt_info
)
235 stmt_vec_info next_stmt_info
= first_stmt_info
;
238 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
243 if (next_stmt_info
== stmt_info
)
245 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
247 result
+= DR_GROUP_GAP (next_stmt_info
);
249 while (next_stmt_info
);
254 /* Check whether it is possible to load COUNT elements of type ELT_MODE
255 using the method implemented by duplicate_and_interleave. Return true
256 if so, returning the number of intermediate vectors in *NVECTORS_OUT
257 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
261 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
262 machine_mode elt_mode
,
263 unsigned int *nvectors_out
,
264 tree
*vector_type_out
,
267 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
269 unsigned int nvectors
= 1;
272 scalar_int_mode int_mode
;
273 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
274 if (multiple_p (GET_MODE_SIZE (vinfo
->vector_mode
), elt_bytes
, &nelts
)
275 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
277 tree int_type
= build_nonstandard_integer_type
278 (GET_MODE_BITSIZE (int_mode
), 1);
279 tree vector_type
= build_vector_type (int_type
, nelts
);
280 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
282 vec_perm_builder
sel1 (nelts
, 2, 3);
283 vec_perm_builder
sel2 (nelts
, 2, 3);
284 poly_int64 half_nelts
= exact_div (nelts
, 2);
285 for (unsigned int i
= 0; i
< 3; ++i
)
288 sel1
.quick_push (i
+ nelts
);
289 sel2
.quick_push (half_nelts
+ i
);
290 sel2
.quick_push (half_nelts
+ i
+ nelts
);
292 vec_perm_indices
indices1 (sel1
, 2, nelts
);
293 vec_perm_indices
indices2 (sel2
, 2, nelts
);
294 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
295 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
298 *nvectors_out
= nvectors
;
300 *vector_type_out
= vector_type
;
303 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
305 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
312 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
318 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
319 they are of a valid type and that they match the defs of the first stmt of
320 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
321 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
322 indicates swap is required for cond_expr stmts. Specifically, *SWAP
323 is 1 if STMT is cond and operands of comparison need to be swapped;
324 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
325 If there is any operand swap in this function, *SWAP is set to non-zero
327 If there was a fatal error return -1; if the error could be corrected by
328 swapping operands of father node of this one, return 1; if everything is
331 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
332 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
333 vec
<slp_oprnd_info
> *oprnds_info
)
335 stmt_vec_info stmt_info
= stmts
[stmt_num
];
337 unsigned int i
, number_of_oprnds
;
338 enum vect_def_type dt
= vect_uninitialized_def
;
339 slp_oprnd_info oprnd_info
;
340 int first_op_idx
= 1;
341 unsigned int commutative_op
= -1U;
342 bool first_op_cond
= false;
343 bool first
= stmt_num
== 0;
345 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
347 number_of_oprnds
= gimple_call_num_args (stmt
);
349 if (gimple_call_internal_p (stmt
))
351 internal_fn ifn
= gimple_call_internal_fn (stmt
);
352 commutative_op
= first_commutative_argument (ifn
);
354 /* Masked load, only look at mask. */
355 if (ifn
== IFN_MASK_LOAD
)
357 number_of_oprnds
= 1;
358 /* Mask operand index. */
363 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
365 enum tree_code code
= gimple_assign_rhs_code (stmt
);
366 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
367 /* Swap can only be done for cond_expr if asked to, otherwise we
368 could result in different comparison code to the first stmt. */
369 if (code
== COND_EXPR
370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
372 first_op_cond
= true;
376 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
381 bool swapped
= (*swap
!= 0);
382 gcc_assert (!swapped
|| first_op_cond
);
383 for (i
= 0; i
< number_of_oprnds
; i
++)
388 /* Map indicating how operands of cond_expr should be swapped. */
389 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
390 int *map
= maps
[*swap
];
393 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
394 first_op_idx
), map
[i
]);
396 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
399 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
400 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
401 oprnd
= TREE_OPERAND (oprnd
, 0);
403 oprnd_info
= (*oprnds_info
)[i
];
405 stmt_vec_info def_stmt_info
;
406 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
408 if (dump_enabled_p ())
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
410 "Build SLP failed: can't analyze def for %T\n",
416 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
417 oprnd_info
->any_pattern
= true;
421 /* For the swapping logic below force vect_reduction_def
422 for the reduction op in a SLP reduction group. */
423 if (!STMT_VINFO_DATA_REF (stmt_info
)
424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
425 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
427 dt
= vect_reduction_def
;
428 oprnd_info
->first_dt
= dt
;
429 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
433 /* Not first stmt of the group, check that the def-stmt/s match
434 the def-stmt/s of the first stmt. Allow different definition
435 types for reduction chains: the first stmt must be a
436 vect_reduction_def (a phi node), and the rest
437 end in the reduction chain. */
438 tree type
= TREE_TYPE (oprnd
);
439 if ((oprnd_info
->first_dt
!= dt
440 && !(oprnd_info
->first_dt
== vect_reduction_def
441 && !STMT_VINFO_DATA_REF (stmt_info
)
442 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
444 && !STMT_VINFO_DATA_REF (def_stmt_info
)
445 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
446 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
447 && !((oprnd_info
->first_dt
== vect_external_def
448 || oprnd_info
->first_dt
== vect_constant_def
)
449 && (dt
== vect_external_def
450 || dt
== vect_constant_def
)))
451 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
452 || (!STMT_VINFO_DATA_REF (stmt_info
)
453 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
455 || STMT_VINFO_DATA_REF (def_stmt_info
)
456 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
457 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
458 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
460 /* Try swapping operands if we got a mismatch. */
461 if (i
== commutative_op
&& !swapped
)
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE
, vect_location
,
465 "trying swapped operands\n");
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
472 "Build SLP failed: different types\n");
476 if ((dt
== vect_constant_def
477 || dt
== vect_external_def
)
478 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
479 && (TREE_CODE (type
) == BOOLEAN_TYPE
480 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
485 "Build SLP failed: invalid type of def "
486 "for variable-length SLP %T\n", oprnd
);
491 /* Check the types of the definitions. */
494 case vect_external_def
:
495 /* Make sure to demote the overall operand to external. */
496 oprnd_info
->first_dt
= vect_external_def
;
498 case vect_constant_def
:
499 oprnd_info
->def_stmts
.quick_push (NULL
);
500 oprnd_info
->ops
.quick_push (oprnd
);
503 case vect_internal_def
:
504 case vect_reduction_def
:
505 if (oprnd_info
->first_dt
== vect_reduction_def
506 && !STMT_VINFO_DATA_REF (stmt_info
)
507 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
508 && !STMT_VINFO_DATA_REF (def_stmt_info
)
509 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
510 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
512 /* For a SLP reduction chain we want to duplicate the
513 reduction to each of the chain members. That gets
514 us a sane SLP graph (still the stmts are not 100%
515 correct wrt the initial values). */
517 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
518 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
522 case vect_induction_def
:
523 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
524 oprnd_info
->ops
.quick_push (oprnd
);
528 /* FORNOW: Not supported. */
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
531 "Build SLP failed: illegal type of def %T\n",
543 /* If there are already uses of this stmt in a SLP instance then
544 we've committed to the operand order and can't swap it. */
545 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
549 "Build SLP failed: cannot swap operands of "
550 "shared stmt %G", stmt_info
->stmt
);
554 /* To get rid of this swapping we have to move the stmt code
555 to the SLP tree as well (and gather it here per stmt). */
556 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
557 tree cond
= gimple_assign_rhs1 (stmt
);
558 enum tree_code code
= TREE_CODE (cond
);
563 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
564 &TREE_OPERAND (cond
, 1));
565 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
570 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
571 gimple_assign_rhs3_ptr (stmt
));
572 if (STMT_VINFO_REDUC_IDX (stmt_info
) == 1)
573 STMT_VINFO_REDUC_IDX (stmt_info
) = 2;
574 else if (STMT_VINFO_REDUC_IDX (stmt_info
) == 2)
575 STMT_VINFO_REDUC_IDX (stmt_info
) = 1;
576 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
577 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
578 gcc_assert (code
!= ERROR_MARK
);
579 TREE_SET_CODE (cond
, code
);
584 /* Commutative ops need not reflect swapping, ops are in
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE
, vect_location
,
589 "swapped operands to match def types in %G",
597 /* Return true if call statements CALL1 and CALL2 are similar enough
598 to be combined into the same SLP group. */
601 compatible_calls_p (gcall
*call1
, gcall
*call2
)
603 unsigned int nargs
= gimple_call_num_args (call1
);
604 if (nargs
!= gimple_call_num_args (call2
))
607 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
610 if (gimple_call_internal_p (call1
))
612 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
613 TREE_TYPE (gimple_call_lhs (call2
))))
615 for (unsigned int i
= 0; i
< nargs
; ++i
)
616 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
617 TREE_TYPE (gimple_call_arg (call2
, i
))))
622 if (!operand_equal_p (gimple_call_fn (call1
),
623 gimple_call_fn (call2
), 0))
626 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
632 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
633 caller's attempt to find the vector type in STMT_INFO with the narrowest
634 element type. Return true if VECTYPE is nonnull and if it is valid
635 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
636 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
637 vect_build_slp_tree. */
640 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
641 tree vectype
, poly_uint64
*max_nunits
)
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
647 "Build SLP failed: unsupported data-type in %G\n",
649 /* Fatal mismatch. */
653 /* If populating the vector type requires unrolling then fail
654 before adjusting *max_nunits for basic-block vectorization. */
655 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
656 unsigned HOST_WIDE_INT const_nunits
;
657 if (STMT_VINFO_BB_VINFO (stmt_info
)
658 && (!nunits
.is_constant (&const_nunits
)
659 || const_nunits
> group_size
))
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
663 "Build SLP failed: unrolling required "
664 "in basic block SLP\n");
665 /* Fatal mismatch. */
669 /* In case of multiple types we need to detect the smallest type. */
670 vect_update_max_nunits (max_nunits
, vectype
);
674 /* STMTS is a group of GROUP_SIZE SLP statements in which some
675 statements do the same operation as the first statement and in which
676 the others do ALT_STMT_CODE. Return true if we can take one vector
677 of the first operation and one vector of the second and permute them
678 to get the required result. VECTYPE is the type of the vector that
679 would be permuted. */
682 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
683 unsigned int group_size
, tree vectype
,
684 tree_code alt_stmt_code
)
686 unsigned HOST_WIDE_INT count
;
687 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
690 vec_perm_builder
sel (count
, count
, 1);
691 for (unsigned int i
= 0; i
< count
; ++i
)
693 unsigned int elt
= i
;
694 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
695 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
697 sel
.quick_push (elt
);
699 vec_perm_indices
indices (sel
, 2, count
);
700 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
703 /* Verify if the scalar stmts STMTS are isomorphic, require data
704 permutation or are of unsupported types of operation. Return
705 true if they are, otherwise return false and indicate in *MATCHES
706 which stmts are not isomorphic to the first one. If MATCHES[0]
707 is false then this indicates the comparison could not be
708 carried out or the stmts will never be vectorized by SLP.
710 Note COND_EXPR is possibly isomorphic to another one after swapping its
711 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
712 the first stmt by swapping the two operands of comparison; set SWAP[i]
713 to 2 if stmt I is isormorphic to the first stmt by inverting the code
714 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
715 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
718 vect_build_slp_tree_1 (unsigned char *swap
,
719 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
720 poly_uint64
*max_nunits
, bool *matches
,
724 stmt_vec_info first_stmt_info
= stmts
[0];
725 enum tree_code first_stmt_code
= ERROR_MARK
;
726 enum tree_code alt_stmt_code
= ERROR_MARK
;
727 enum tree_code rhs_code
= ERROR_MARK
;
728 enum tree_code first_cond_code
= ERROR_MARK
;
730 bool need_same_oprnds
= false;
731 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
734 machine_mode optab_op2_mode
;
735 machine_mode vec_mode
;
736 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
739 /* For every stmt in NODE find its def stmt/s. */
740 stmt_vec_info stmt_info
;
741 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
743 gimple
*stmt
= stmt_info
->stmt
;
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
750 /* Fail to vectorize statements marked as unvectorizable. */
751 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
755 "Build SLP failed: unvectorizable statement %G",
757 /* Fatal mismatch. */
762 lhs
= gimple_get_lhs (stmt
);
763 if (lhs
== NULL_TREE
)
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
767 "Build SLP failed: not GIMPLE_ASSIGN nor "
768 "GIMPLE_CALL %G", stmt
);
769 /* Fatal mismatch. */
775 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
778 && !vect_record_max_nunits (stmt_info
, group_size
,
779 nunits_vectype
, max_nunits
)))
781 /* Fatal mismatch. */
786 gcc_assert (vectype
);
788 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
790 rhs_code
= CALL_EXPR
;
792 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
794 else if ((gimple_call_internal_p (call_stmt
)
795 && (!vectorizable_internal_fn_p
796 (gimple_call_internal_fn (call_stmt
))))
797 || gimple_call_tail_p (call_stmt
)
798 || gimple_call_noreturn_p (call_stmt
)
799 || !gimple_call_nothrow_p (call_stmt
)
800 || gimple_call_chain (call_stmt
))
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
804 "Build SLP failed: unsupported call type %G",
806 /* Fatal mismatch. */
813 rhs_code
= gimple_assign_rhs_code (stmt
);
814 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
817 /* Check the operation. */
820 first_stmt_code
= rhs_code
;
822 /* Shift arguments should be equal in all the packed stmts for a
823 vector shift with scalar shift operand. */
824 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
825 || rhs_code
== LROTATE_EXPR
826 || rhs_code
== RROTATE_EXPR
)
828 if (vectype
== boolean_type_node
)
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
832 "Build SLP failed: shift of a"
834 /* Fatal mismatch. */
839 vec_mode
= TYPE_MODE (vectype
);
841 /* First see if we have a vector/vector shift. */
842 optab
= optab_for_tree_code (rhs_code
, vectype
,
846 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
848 /* No vector/vector shift, try for a vector/scalar shift. */
849 optab
= optab_for_tree_code (rhs_code
, vectype
,
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
856 "Build SLP failed: no optab.\n");
857 /* Fatal mismatch. */
861 icode
= (int) optab_handler (optab
, vec_mode
);
862 if (icode
== CODE_FOR_nothing
)
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
867 "op not supported by target.\n");
868 /* Fatal mismatch. */
872 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
873 if (!VECTOR_MODE_P (optab_op2_mode
))
875 need_same_oprnds
= true;
876 first_op1
= gimple_assign_rhs2 (stmt
);
880 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
882 need_same_oprnds
= true;
883 first_op1
= gimple_assign_rhs2 (stmt
);
888 if (first_stmt_code
!= rhs_code
889 && alt_stmt_code
== ERROR_MARK
)
890 alt_stmt_code
= rhs_code
;
891 if (first_stmt_code
!= rhs_code
892 && (first_stmt_code
!= IMAGPART_EXPR
893 || rhs_code
!= REALPART_EXPR
)
894 && (first_stmt_code
!= REALPART_EXPR
895 || rhs_code
!= IMAGPART_EXPR
)
896 /* Handle mismatches in plus/minus by computing both
897 and merging the results. */
898 && !((first_stmt_code
== PLUS_EXPR
899 || first_stmt_code
== MINUS_EXPR
)
900 && (alt_stmt_code
== PLUS_EXPR
901 || alt_stmt_code
== MINUS_EXPR
)
902 && rhs_code
== alt_stmt_code
)
903 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
904 && (first_stmt_code
== ARRAY_REF
905 || first_stmt_code
== BIT_FIELD_REF
906 || first_stmt_code
== INDIRECT_REF
907 || first_stmt_code
== COMPONENT_REF
908 || first_stmt_code
== MEM_REF
)))
910 if (dump_enabled_p ())
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
913 "Build SLP failed: different operation "
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
916 "original stmt %G", first_stmt_info
->stmt
);
923 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
927 "Build SLP failed: different shift "
928 "arguments in %G", stmt
);
933 if (!load_p
&& rhs_code
== CALL_EXPR
)
935 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
936 as_a
<gcall
*> (stmt
)))
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
940 "Build SLP failed: different calls in %G",
948 /* Grouped store or load. */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
951 if (REFERENCE_CLASS_P (lhs
))
959 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
962 /* Check that there are no loads from different interleaving
963 chains in the same node. */
964 if (prev_first_load
!= first_load
)
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
969 "Build SLP failed: different "
970 "interleaving chains in one node %G",
977 prev_first_load
= first_load
;
979 } /* Grouped access. */
984 /* Not grouped load. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
987 "Build SLP failed: not grouped load %G", stmt
);
989 /* FORNOW: Not grouped loads are not supported. */
990 /* Fatal mismatch. */
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
997 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
998 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
999 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1000 && rhs_code
!= VIEW_CONVERT_EXPR
1001 && rhs_code
!= CALL_EXPR
)
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1005 "Build SLP failed: operation unsupported %G",
1007 /* Fatal mismatch. */
1012 if (rhs_code
== COND_EXPR
)
1014 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1015 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1016 enum tree_code swap_code
= ERROR_MARK
;
1017 enum tree_code invert_code
= ERROR_MARK
;
1020 first_cond_code
= TREE_CODE (cond_expr
);
1021 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1023 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1024 swap_code
= swap_tree_comparison (cond_code
);
1025 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1028 if (first_cond_code
== cond_code
)
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code
== swap_code
)
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code
== invert_code
)
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1040 "Build SLP failed: different"
1041 " operation %G", stmt
);
1051 for (i
= 0; i
< group_size
; ++i
)
1055 /* If we allowed a two-operation SLP node verify the target can cope
1056 with the permute we are going to use. */
1057 if (alt_stmt_code
!= ERROR_MARK
1058 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1060 if (vectype
== boolean_type_node
1061 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1062 vectype
, alt_stmt_code
))
1064 for (i
= 0; i
< group_size
; ++i
)
1065 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1068 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1071 "Build SLP failed: different operation "
1072 "in stmt %G", stmts
[i
]->stmt
);
1073 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1074 "original stmt %G", first_stmt_info
->stmt
);
1079 *two_operators
= true;
1085 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1086 Note we never remove apart from at destruction time so we do not
1087 need a special value for deleted that differs from empty. */
1090 typedef vec
<stmt_vec_info
> value_type
;
1091 typedef vec
<stmt_vec_info
> compare_type
;
1092 static inline hashval_t
hash (value_type
);
1093 static inline bool equal (value_type existing
, value_type candidate
);
1094 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1095 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1096 static inline void mark_empty (value_type
&x
) { x
.release (); }
1097 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1098 static inline void remove (value_type
&x
) { x
.release (); }
1101 bst_traits::hash (value_type x
)
1104 for (unsigned i
= 0; i
< x
.length (); ++i
)
1105 h
.add_int (gimple_uid (x
[i
]->stmt
));
1109 bst_traits::equal (value_type existing
, value_type candidate
)
1111 if (existing
.length () != candidate
.length ())
1113 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1114 if (existing
[i
] != candidate
[i
])
1119 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1120 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1121 scalar_stmts_to_slp_tree_map_t
;
1124 vect_build_slp_tree_2 (vec_info
*vinfo
,
1125 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1126 poly_uint64
*max_nunits
,
1127 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1128 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1131 vect_build_slp_tree (vec_info
*vinfo
,
1132 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1133 poly_uint64
*max_nunits
,
1134 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1135 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1137 if (slp_tree
*leader
= bst_map
->get (stmts
))
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1141 *leader
? "" : "failed ", *leader
);
1144 (*leader
)->refcnt
++;
1145 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1149 poly_uint64 this_max_nunits
= 1;
1150 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1151 matches
, npermutes
, tree_size
, bst_map
);
1154 res
->max_nunits
= this_max_nunits
;
1155 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1156 /* Keep a reference for the bst_map use. */
1159 bst_map
->put (stmts
.copy (), res
);
1163 /* Recursively build an SLP tree starting from NODE.
1164 Fail (and return a value not equal to zero) if def-stmts are not
1165 isomorphic, require data permutation or are of unsupported types of
1166 operation. Otherwise, return 0.
1167 The value returned is the depth in the SLP tree where a mismatch
1171 vect_build_slp_tree_2 (vec_info
*vinfo
,
1172 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1173 poly_uint64
*max_nunits
,
1174 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1175 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1177 unsigned nops
, i
, this_tree_size
= 0;
1178 poly_uint64 this_max_nunits
= *max_nunits
;
1183 stmt_vec_info stmt_info
= stmts
[0];
1184 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1185 nops
= gimple_call_num_args (stmt
);
1186 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1188 nops
= gimple_num_ops (stmt
) - 1;
1189 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1192 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1197 /* If the SLP node is a PHI (induction or reduction), terminate
1199 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1201 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1202 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1203 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1206 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1207 /* Induction from different IVs is not supported. */
1208 if (def_type
== vect_induction_def
)
1210 stmt_vec_info other_info
;
1211 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1212 if (stmt_info
!= other_info
)
1215 else if (def_type
== vect_reduction_def
1216 || def_type
== vect_double_reduction_def
1217 || def_type
== vect_nested_cycle
)
1219 /* Else def types have to match. */
1220 stmt_vec_info other_info
;
1221 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1222 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1228 node
= vect_create_new_slp_node (stmts
);
1233 bool two_operators
= false;
1234 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1235 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1236 &this_max_nunits
, matches
, &two_operators
))
1239 /* If the SLP node is a load, terminate the recursion unless masked. */
1240 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1241 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1243 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1246 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1251 *max_nunits
= this_max_nunits
;
1253 node
= vect_create_new_slp_node (stmts
);
1258 /* Get at the operands, verifying they are compatible. */
1259 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1260 slp_oprnd_info oprnd_info
;
1261 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1263 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1264 stmts
, i
, &oprnds_info
);
1266 matches
[(res
== -1) ? 0 : i
] = false;
1270 for (i
= 0; i
< group_size
; ++i
)
1273 vect_free_oprnd_info (oprnds_info
);
1277 auto_vec
<slp_tree
, 4> children
;
1279 stmt_info
= stmts
[0];
1281 /* Create SLP_TREE nodes for the definition node/s. */
1282 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1285 unsigned old_tree_size
= this_tree_size
;
1288 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1290 /* COND_EXPR have one too many eventually if the condition
1292 gcc_assert (i
== 3 && nops
== 4);
1296 if (oprnd_info
->first_dt
!= vect_internal_def
1297 && oprnd_info
->first_dt
!= vect_reduction_def
1298 && oprnd_info
->first_dt
!= vect_induction_def
)
1300 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1301 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1302 oprnd_info
->ops
= vNULL
;
1303 children
.safe_push (invnode
);
1307 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1308 group_size
, &this_max_nunits
,
1310 &this_tree_size
, bst_map
)) != NULL
)
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
1314 if (is_a
<bb_vec_info
> (vinfo
)
1315 && !SLP_TREE_CHILDREN (child
).is_empty ()
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1319 && !oprnd_info
->any_pattern
)
1321 slp_tree grandchild
;
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1324 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1329 this_tree_size
= old_tree_size
;
1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1331 vect_free_slp_tree (grandchild
, false);
1332 SLP_TREE_CHILDREN (child
).truncate (0);
1334 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE
, vect_location
,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info
->def_stmts
= vNULL
;
1339 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1340 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1341 oprnd_info
->ops
= vNULL
;
1343 children
.safe_push (child
);
1348 oprnd_info
->def_stmts
= vNULL
;
1349 children
.safe_push (child
);
1353 /* If the SLP build failed fatally and we analyze a basic-block
1354 simply treat nodes we fail to build as externally defined
1355 (and thus build vectors from the scalar defs).
1356 The cost model will reject outright expensive cases.
1357 ??? This doesn't treat cases where permutation ultimatively
1358 fails (or we don't try permutation below). Ideally we'd
1359 even compute a permutation that will end up with the maximum
1361 if (is_a
<bb_vec_info
> (vinfo
)
1363 /* ??? Rejecting patterns this way doesn't work. We'd have to
1364 do extra work to cancel the pattern so the uses see the
1366 && !is_pattern_stmt_p (stmt_info
)
1367 && !oprnd_info
->any_pattern
)
1369 if (dump_enabled_p ())
1370 dump_printf_loc (MSG_NOTE
, vect_location
,
1371 "Building vector operands from scalars\n");
1373 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1374 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1375 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1376 children
.safe_push (child
);
1377 oprnd_info
->ops
= vNULL
;
1378 oprnd_info
->def_stmts
= vNULL
;
1382 /* If the SLP build for operand zero failed and operand zero
1383 and one can be commutated try that for the scalar stmts
1384 that failed the match. */
1386 /* A first scalar stmt mismatch signals a fatal mismatch. */
1388 /* ??? For COND_EXPRs we can swap the comparison operands
1389 as well as the arms under some constraints. */
1391 && oprnds_info
[1]->first_dt
== vect_internal_def
1392 && is_gimple_assign (stmt_info
->stmt
)
1393 /* Swapping operands for reductions breaks assumptions later on. */
1394 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1395 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1396 /* Do so only if the number of not successful permutes was nor more
1397 than a cut-ff as re-trying the recursive match on
1398 possibly each level of the tree would expose exponential
1402 /* See whether we can swap the matching or the non-matching
1404 bool swap_not_matching
= true;
1407 for (j
= 0; j
< group_size
; ++j
)
1409 if (matches
[j
] != !swap_not_matching
)
1411 stmt_vec_info stmt_info
= stmts
[j
];
1412 /* Verify if we can swap operands of this stmt. */
1413 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1415 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1417 if (!swap_not_matching
)
1419 swap_not_matching
= false;
1424 while (j
!= group_size
);
1426 /* Swap mismatched definition stmts. */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE
, vect_location
,
1429 "Re-trying with swapped operands of stmts ");
1430 for (j
= 0; j
< group_size
; ++j
)
1431 if (matches
[j
] == !swap_not_matching
)
1433 std::swap (oprnds_info
[0]->def_stmts
[j
],
1434 oprnds_info
[1]->def_stmts
[j
]);
1435 std::swap (oprnds_info
[0]->ops
[j
],
1436 oprnds_info
[1]->ops
[j
]);
1437 if (dump_enabled_p ())
1438 dump_printf (MSG_NOTE
, "%d ", j
);
1440 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE
, "\n");
1442 /* And try again with scratch 'matches' ... */
1443 bool *tem
= XALLOCAVEC (bool, group_size
);
1444 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1445 group_size
, &this_max_nunits
,
1447 &this_tree_size
, bst_map
)) != NULL
)
1449 /* If we have all children of child built up from scalars then
1450 just throw that away and build it up this node from scalars. */
1451 if (is_a
<bb_vec_info
> (vinfo
)
1452 && !SLP_TREE_CHILDREN (child
).is_empty ()
1453 /* ??? Rejecting patterns this way doesn't work. We'd have
1454 to do extra work to cancel the pattern so the uses see the
1456 && !oprnd_info
->any_pattern
)
1459 slp_tree grandchild
;
1461 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1462 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1467 this_tree_size
= old_tree_size
;
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1469 vect_free_slp_tree (grandchild
, false);
1470 SLP_TREE_CHILDREN (child
).truncate (0);
1472 if (dump_enabled_p ())
1473 dump_printf_loc (MSG_NOTE
, vect_location
,
1474 "Building parent vector operands from "
1475 "scalars instead\n");
1476 oprnd_info
->def_stmts
= vNULL
;
1477 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1478 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1479 oprnd_info
->ops
= vNULL
;
1481 children
.safe_push (child
);
1486 oprnd_info
->def_stmts
= vNULL
;
1487 children
.safe_push (child
);
1495 gcc_assert (child
== NULL
);
1496 FOR_EACH_VEC_ELT (children
, j
, child
)
1497 vect_free_slp_tree (child
, false);
1498 vect_free_oprnd_info (oprnds_info
);
1502 vect_free_oprnd_info (oprnds_info
);
1504 *tree_size
+= this_tree_size
+ 1;
1505 *max_nunits
= this_max_nunits
;
1507 node
= vect_create_new_slp_node (stmts
);
1508 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1509 SLP_TREE_CHILDREN (node
).splice (children
);
1513 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1516 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1517 slp_tree node
, hash_set
<slp_tree
> &visited
)
1520 stmt_vec_info stmt_info
;
1524 if (visited
.add (node
))
1527 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1528 dump_user_location_t user_loc
= loc
.get_user_location ();
1529 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1530 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1532 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1535 estimated_poly_value (node
->max_nunits
));
1536 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1538 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1541 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1543 dump_printf (metadata
, "%T%s ", op
,
1544 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1545 dump_printf (metadata
, "}\n");
1547 if (SLP_TREE_CHILDREN (node
).is_empty ())
1549 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1550 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1551 dump_printf (dump_kind
, " %p", (void *)child
);
1552 dump_printf (dump_kind
, "\n");
1553 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1554 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1558 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1561 hash_set
<slp_tree
> visited
;
1562 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1565 /* Mark the tree rooted at NODE with PURE_SLP. */
1568 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1571 stmt_vec_info stmt_info
;
1574 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1577 if (visited
.add (node
))
1580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1581 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1584 vect_mark_slp_stmts (child
, visited
);
1588 vect_mark_slp_stmts (slp_tree node
)
1590 hash_set
<slp_tree
> visited
;
1591 vect_mark_slp_stmts (node
, visited
);
1594 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1597 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1600 stmt_vec_info stmt_info
;
1603 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1606 if (visited
.add (node
))
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1611 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1612 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1613 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1617 vect_mark_slp_stmts_relevant (child
, visited
);
1621 vect_mark_slp_stmts_relevant (slp_tree node
)
1623 hash_set
<slp_tree
> visited
;
1624 vect_mark_slp_stmts_relevant (node
, visited
);
1628 /* Rearrange the statements of NODE according to PERMUTATION. */
1631 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1632 vec
<unsigned> permutation
,
1633 hash_set
<slp_tree
> &visited
)
1638 if (visited
.add (node
))
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1642 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1644 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1646 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1647 vec
<stmt_vec_info
> tmp_stmts
;
1648 tmp_stmts
.create (group_size
);
1649 tmp_stmts
.quick_grow (group_size
);
1650 stmt_vec_info stmt_info
;
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1652 tmp_stmts
[permutation
[i
]] = stmt_info
;
1653 SLP_TREE_SCALAR_STMTS (node
).release ();
1654 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1656 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1658 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1660 tmp_ops
.create (group_size
);
1661 tmp_ops
.quick_grow (group_size
);
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1664 tmp_ops
[permutation
[i
]] = op
;
1665 SLP_TREE_SCALAR_OPS (node
).release ();
1666 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1671 /* Attempt to reorder stmts in a reduction chain so that we don't
1672 require any load permutation. Return true if that was possible,
1673 otherwise return false. */
1676 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1678 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1681 slp_tree node
, load
;
1683 /* Compare all the permutation sequences to the first one. We know
1684 that at least one load is permuted. */
1685 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1686 if (!node
->load_permutation
.exists ())
1688 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1690 if (!load
->load_permutation
.exists ())
1692 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1693 if (lidx
!= node
->load_permutation
[j
])
1697 /* Check that the loads in the first sequence are different and there
1698 are no gaps between them. */
1699 auto_sbitmap
load_index (group_size
);
1700 bitmap_clear (load_index
);
1701 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1703 if (lidx
>= group_size
)
1705 if (bitmap_bit_p (load_index
, lidx
))
1708 bitmap_set_bit (load_index
, lidx
);
1710 for (i
= 0; i
< group_size
; i
++)
1711 if (!bitmap_bit_p (load_index
, i
))
1714 /* This permutation is valid for reduction. Since the order of the
1715 statements in the nodes is not important unless they are memory
1716 accesses, we can rearrange the statements in all the nodes
1717 according to the order of the loads. */
1718 hash_set
<slp_tree
> visited
;
1719 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1720 node
->load_permutation
, visited
);
1722 /* We are done, no actual permutations need to be generated. */
1723 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1724 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1726 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1727 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1728 /* But we have to keep those permutations that are required because
1729 of handling of gaps. */
1730 if (known_eq (unrolling_factor
, 1U)
1731 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1732 && DR_GROUP_GAP (first_stmt_info
) == 0))
1733 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1735 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1736 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1742 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1745 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1746 hash_set
<slp_tree
> &visited
)
1748 if (visited
.add (node
))
1751 if (SLP_TREE_CHILDREN (node
).length () == 0)
1753 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1755 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1756 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1757 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1758 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1764 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1765 vect_gather_slp_loads (inst
, child
, visited
);
1770 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1772 hash_set
<slp_tree
> visited
;
1773 vect_gather_slp_loads (inst
, node
, visited
);
1776 /* Check if the required load permutations in the SLP instance
1777 SLP_INSTN are supported. */
1780 vect_supported_load_permutation_p (slp_instance slp_instn
)
1782 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1783 unsigned int i
, j
, k
, next
;
1786 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1789 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1790 if (node
->load_permutation
.exists ())
1791 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1792 dump_printf (MSG_NOTE
, "%d ", next
);
1794 for (k
= 0; k
< group_size
; ++k
)
1795 dump_printf (MSG_NOTE
, "%d ", k
);
1796 dump_printf (MSG_NOTE
, "\n");
1799 /* In case of reduction every load permutation is allowed, since the order
1800 of the reduction statements is not important (as opposed to the case of
1801 grouped stores). The only condition we need to check is that all the
1802 load nodes are of the same size and have the same permutation (and then
1803 rearrange all the nodes of the SLP instance according to this
1806 /* Check that all the load nodes are of the same size. */
1807 /* ??? Can't we assert this? */
1808 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1809 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1812 node
= SLP_INSTANCE_TREE (slp_instn
);
1813 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1815 /* Reduction (there are no data-refs in the root).
1816 In reduction chain the order of the loads is not important. */
1817 if (!STMT_VINFO_DATA_REF (stmt_info
)
1818 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1819 vect_attempt_slp_rearrange_stmts (slp_instn
);
1821 /* In basic block vectorization we allow any subchain of an interleaving
1823 FORNOW: not supported in loop SLP because of realignment compications. */
1824 if (STMT_VINFO_BB_VINFO (stmt_info
))
1826 /* Check whether the loads in an instance form a subchain and thus
1827 no permutation is necessary. */
1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1830 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1832 bool subchain_p
= true;
1833 stmt_vec_info next_load_info
= NULL
;
1834 stmt_vec_info load_info
;
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1838 && (next_load_info
!= load_info
1839 || DR_GROUP_GAP (load_info
) != 1))
1844 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1847 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1850 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1851 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1852 unsigned HOST_WIDE_INT nunits
;
1853 unsigned k
, maxk
= 0;
1854 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1857 /* In BB vectorization we may not actually use a loaded vector
1858 accessing elements in excess of DR_GROUP_SIZE. */
1859 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1860 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1861 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1865 "BB vectorization with gaps at the end of "
1866 "a load is not supported\n");
1870 /* Verify the permutation can be generated. */
1873 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1874 1, slp_instn
, true, &n_perms
))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1879 "unsupported load permutation\n");
1887 /* For loop vectorization verify we can generate the permutation. Be
1888 conservative about the vectorization factor, there are permutations
1889 that will use three vector inputs only starting from a specific factor
1890 and the vectorization factor is not yet final.
1891 ??? The SLP instance unrolling factor might not be the maximum one. */
1894 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1895 LOOP_VINFO_VECT_FACTOR
1896 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1897 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1898 if (node
->load_permutation
.exists ()
1899 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1900 slp_instn
, true, &n_perms
))
1907 /* Find the last store in SLP INSTANCE. */
1910 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1912 stmt_vec_info last
= NULL
;
1913 stmt_vec_info stmt_vinfo
;
1915 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1917 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1918 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1924 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1925 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1926 (also containing the first GROUP1_SIZE stmts, since stores are
1927 consecutive), the second containing the remainder.
1928 Return the first stmt in the second group. */
1930 static stmt_vec_info
1931 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1933 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1934 gcc_assert (group1_size
> 0);
1935 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1936 gcc_assert (group2_size
> 0);
1937 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1939 stmt_vec_info stmt_info
= first_vinfo
;
1940 for (unsigned i
= group1_size
; i
> 1; i
--)
1942 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1943 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1945 /* STMT is now the last element of the first group. */
1946 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1947 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1949 DR_GROUP_SIZE (group2
) = group2_size
;
1950 for (stmt_info
= group2
; stmt_info
;
1951 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1953 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1954 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1957 /* For the second group, the DR_GROUP_GAP is that before the original group,
1958 plus skipping over the first vector. */
1959 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1961 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1962 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1966 group1_size
, group2_size
);
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1975 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1977 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1985 vect_analyze_slp_instance (vec_info
*vinfo
,
1986 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1988 slp_instance new_instance
;
1990 unsigned int group_size
;
1991 tree vectype
, scalar_type
= NULL_TREE
;
1993 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1994 vec
<stmt_vec_info
> scalar_stmts
;
1995 bool constructor
= false;
1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1999 scalar_type
= TREE_TYPE (DR_REF (dr
));
2000 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
2001 group_size
= DR_GROUP_SIZE (stmt_info
);
2003 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2005 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2006 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2007 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2009 else if (is_gimple_assign (stmt_info
->stmt
)
2010 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2012 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2013 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2018 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2019 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2020 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2027 "Build SLP failed: unsupported data-type %T\n",
2032 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts
.create (group_size
);
2036 stmt_vec_info next_info
= stmt_info
;
2037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2042 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2043 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2046 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2048 /* Collect the reduction stmts and store them in
2049 SLP_TREE_SCALAR_STMTS. */
2052 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2053 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2055 /* Mark the first element of the reduction chain as reduction to properly
2056 transform the node. In the reduction analysis phase only the last
2057 element of the chain is marked as reduction. */
2058 STMT_VINFO_DEF_TYPE (stmt_info
)
2059 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2060 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2063 else if (constructor
)
2065 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2069 if (TREE_CODE (val
) == SSA_NAME
)
2071 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2072 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2073 /* Value is defined in another basic block. */
2076 scalar_stmts
.safe_push (def_info
);
2084 /* Collect reduction statements. */
2085 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2086 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2087 scalar_stmts
.safe_push (next_info
);
2090 /* Build the tree for the SLP instance. */
2091 bool *matches
= XALLOCAVEC (bool, group_size
);
2092 unsigned npermutes
= 0;
2093 scalar_stmts_to_slp_tree_map_t
*bst_map
2094 = new scalar_stmts_to_slp_tree_map_t ();
2095 poly_uint64 max_nunits
= nunits
;
2096 unsigned tree_size
= 0;
2097 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2098 &max_nunits
, matches
, &npermutes
,
2099 &tree_size
, bst_map
);
2100 /* The map keeps a reference on SLP nodes built, release that. */
2101 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2102 it
!= bst_map
->end (); ++it
)
2104 vect_free_slp_tree ((*it
).second
, false);
2108 /* If this is a reduction chain with a conversion in front
2109 amend the SLP tree with a node for that. */
2111 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2112 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2114 /* Get at the conversion stmt - we know it's the single use
2115 of the last stmt of the reduction chain. */
2116 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2117 use_operand_p use_p
;
2119 bool r
= single_imm_use (gimple_assign_lhs (tem
), &use_p
, &use_stmt
);
2121 next_info
= vinfo
->lookup_stmt (use_stmt
);
2122 next_info
= vect_stmt_to_vectorize (next_info
);
2123 scalar_stmts
= vNULL
;
2124 scalar_stmts
.create (group_size
);
2125 for (unsigned i
= 0; i
< group_size
; ++i
)
2126 scalar_stmts
.quick_push (next_info
);
2127 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2128 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2130 /* We also have to fake this conversion stmt as SLP reduction group
2131 so we don't have to mess with too much code elsewhere. */
2132 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2133 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2136 /* Calculate the unrolling factor based on the smallest type. */
2137 poly_uint64 unrolling_factor
2138 = calculate_unrolling_factor (max_nunits
, group_size
);
2140 if (maybe_ne (unrolling_factor
, 1U)
2141 && is_a
<bb_vec_info
> (vinfo
))
2143 unsigned HOST_WIDE_INT const_max_nunits
;
2144 if (!max_nunits
.is_constant (&const_max_nunits
)
2145 || const_max_nunits
> group_size
)
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
2152 vect_free_slp_tree (node
, false);
2155 /* Fatal mismatch. */
2156 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2157 vect_free_slp_tree (node
, false);
2161 /* Create a new SLP instance. */
2162 new_instance
= XNEW (class _slp_instance
);
2163 SLP_INSTANCE_TREE (new_instance
) = node
;
2164 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2166 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2167 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2169 vect_gather_slp_loads (new_instance
, node
);
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE
, vect_location
,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size
, max_tree_size
);
2175 /* Compute the load permutation. */
2177 bool loads_permuted
= false;
2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2180 vec
<unsigned> load_permutation
;
2182 stmt_vec_info load_info
;
2183 bool this_load_permuted
= false;
2184 load_permutation
.create (group_size
);
2185 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2186 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2189 int load_place
= vect_get_place_in_interleaving_chain
2190 (load_info
, first_stmt_info
);
2191 gcc_assert (load_place
!= -1);
2192 if (load_place
!= j
)
2193 this_load_permuted
= true;
2194 load_permutation
.safe_push (load_place
);
2196 if (!this_load_permuted
2197 /* The load requires permutation when unrolling exposes
2198 a gap either because the group is larger than the SLP
2199 group-size or because there is a gap between the groups. */
2200 && (known_eq (unrolling_factor
, 1U)
2201 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2202 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2204 load_permutation
.release ();
2207 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2208 loads_permuted
= true;
2213 if (!vect_supported_load_permutation_p (new_instance
))
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2217 "Build SLP failed: unsupported load "
2218 "permutation %G", stmt_info
->stmt
);
2219 vect_free_slp_instance (new_instance
, false);
2224 /* If the loads and stores can be handled with load/store-lan
2225 instructions do not generate this SLP instance. */
2226 if (is_a
<loop_vec_info
> (vinfo
)
2228 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2231 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2233 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2234 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2235 /* Use SLP for strided accesses (or if we can't load-lanes). */
2236 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2237 || ! vect_load_lanes_supported
2238 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2239 DR_GROUP_SIZE (stmt_vinfo
), false))
2242 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2246 "Built SLP cancelled: can use "
2247 "load/store-lanes\n");
2248 vect_free_slp_instance (new_instance
, false);
2253 vinfo
->slp_instances
.safe_push (new_instance
);
2255 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_NOTE
, vect_location
,
2258 "Final SLP tree for instance:\n");
2259 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2267 /* Failed to SLP. */
2268 /* Free the allocated memory. */
2269 scalar_stmts
.release ();
2272 /* For basic block SLP, try to break the group up into multiples of the
2274 unsigned HOST_WIDE_INT const_nunits
;
2275 if (is_a
<bb_vec_info
> (vinfo
)
2276 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2277 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2278 && nunits
.is_constant (&const_nunits
))
2280 /* We consider breaking the group only on VF boundaries from the existing
2282 for (i
= 0; i
< group_size
; i
++)
2283 if (!matches
[i
]) break;
2285 if (i
>= const_nunits
&& i
< group_size
)
2287 /* Split into two groups at the first vector boundary before i. */
2288 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2289 unsigned group1_size
= i
& ~(const_nunits
- 1);
2291 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2293 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2295 /* If the first non-match was in the middle of a vector,
2296 skip the rest of that vector. */
2297 if (group1_size
< i
)
2299 i
= group1_size
+ const_nunits
;
2301 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2304 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2307 /* Even though the first vector did not all match, we might be able to SLP
2308 (some) of the remainder. FORNOW ignore this possibility. */
2315 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2316 trees of packed scalar stmts if SLP is possible. */
2319 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2322 stmt_vec_info first_element
;
2324 DUMP_VECT_SCOPE ("vect_analyze_slp");
2326 /* Find SLP sequences starting from groups of grouped stores. */
2327 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2328 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2330 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2332 if (loop_vinfo
->reduction_chains
.length () > 0)
2334 /* Find SLP sequences starting from reduction chains. */
2335 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2336 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2339 /* Dissolve reduction chain group. */
2340 stmt_vec_info vinfo
= first_element
;
2341 stmt_vec_info last
= NULL
;
2344 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2345 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2346 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2350 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2351 /* It can be still vectorized as part of an SLP reduction. */
2352 loop_vinfo
->reductions
.safe_push (last
);
2356 /* Find SLP sequences starting from groups of reductions. */
2357 if (loop_vinfo
->reductions
.length () > 1)
2358 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2362 return opt_result::success ();
2366 /* For each possible SLP instance decide whether to SLP it and calculate overall
2367 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2368 least one instance. */
2371 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2374 poly_uint64 unrolling_factor
= 1;
2375 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2376 slp_instance instance
;
2377 int decided_to_slp
= 0;
2379 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2381 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2383 /* FORNOW: SLP if you can. */
2384 /* All unroll factors have the form:
2386 GET_MODE_SIZE (vinfo->vector_mode) * X
2388 for some rational X, so they must have a common multiple. */
2390 = force_common_multiple (unrolling_factor
,
2391 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2393 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2394 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2395 loop-based vectorization. Such stmts will be marked as HYBRID. */
2396 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2400 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2402 if (decided_to_slp
&& dump_enabled_p ())
2404 dump_printf_loc (MSG_NOTE
, vect_location
,
2405 "Decided to SLP %d instances. Unrolling factor ",
2407 dump_dec (MSG_NOTE
, unrolling_factor
);
2408 dump_printf (MSG_NOTE
, "\n");
2411 return (decided_to_slp
> 0);
2415 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2416 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2419 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2420 hash_map
<slp_tree
, unsigned> &visited
)
2422 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2423 imm_use_iterator imm_iter
;
2425 stmt_vec_info use_vinfo
;
2427 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2430 /* We need to union stype over the incoming graph edges but we still
2431 want to limit recursion to stay O(N+E). */
2432 bool only_edge
= (++visited
.get_or_insert (node
) < node
->refcnt
);
2434 /* Propagate hybrid down the SLP tree. */
2435 if (stype
== hybrid
)
2437 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2439 else if (!only_edge
)
2441 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2442 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2443 /* If we get a pattern stmt here we have to use the LHS of the
2444 original stmt for immediate uses. */
2445 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2447 if (gimple_code (stmt
) == GIMPLE_PHI
)
2448 def
= gimple_phi_result (stmt
);
2450 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2452 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2454 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2457 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2458 if (!STMT_SLP_TYPE (use_vinfo
)
2459 && (STMT_VINFO_RELEVANT (use_vinfo
)
2460 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2461 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2462 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2464 if (dump_enabled_p ())
2465 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2466 "def in non-SLP stmt: %G", use_stmt
);
2473 && !HYBRID_SLP_STMT (stmt_vinfo
))
2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2478 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2482 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2483 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2484 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2485 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2489 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2491 hash_map
<slp_tree
, unsigned> visited
;
2492 vect_detect_hybrid_slp_stmts (node
, i
, stype
, visited
);
2495 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2498 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2500 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2501 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2506 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2507 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2509 if (dump_enabled_p ())
2510 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2511 def_stmt_info
->stmt
);
2512 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2519 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2522 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2523 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2524 /* If the stmt is in a SLP instance then this isn't a reason
2525 to mark use definitions in other SLP instances as hybrid. */
2526 if (! STMT_SLP_TYPE (use_vinfo
)
2527 && (STMT_VINFO_RELEVANT (use_vinfo
)
2528 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2529 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2530 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2537 /* Find stmts that must be both vectorized and SLPed. */
2540 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2543 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2544 slp_instance instance
;
2546 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2548 /* First walk all pattern stmt in the loop and mark defs of uses as
2549 hybrid because immediate uses in them are not recorded. */
2550 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2552 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2553 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2556 gimple
*stmt
= gsi_stmt (gsi
);
2557 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2558 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2561 memset (&wi
, 0, sizeof (wi
));
2562 wi
.info
= loop_vinfo
;
2563 gimple_stmt_iterator gsi2
2564 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2565 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2566 vect_detect_hybrid_slp_1
, &wi
);
2567 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2568 vect_detect_hybrid_slp_2
,
2569 vect_detect_hybrid_slp_1
, &wi
);
2574 /* Then walk the SLP instance trees marking stmts with uses in
2575 non-SLP stmts as hybrid, also propagating hybrid down the
2576 SLP tree, collecting the above info on-the-fly. */
2577 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2579 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2580 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2586 /* Initialize a bb_vec_info struct for the statements between
2587 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2589 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2590 gimple_stmt_iterator region_end_in
,
2591 vec_info_shared
*shared
)
2592 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2593 bb (gsi_bb (region_begin_in
)),
2594 region_begin (region_begin_in
),
2595 region_end (region_end_in
)
2597 gimple_stmt_iterator gsi
;
2599 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2602 gimple
*stmt
= gsi_stmt (gsi
);
2603 gimple_set_uid (stmt
, 0);
2611 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2612 stmts in the basic block. */
2614 _bb_vec_info::~_bb_vec_info ()
2616 for (gimple_stmt_iterator si
= region_begin
;
2617 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2618 /* Reset region marker. */
2619 gimple_set_uid (gsi_stmt (si
), -1);
2624 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2625 given then that child nodes have already been processed, and that
2626 their def types currently match their SLP node's def type. */
2629 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2630 slp_instance node_instance
,
2631 stmt_vector_for_cost
*cost_vec
)
2633 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2634 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2636 /* For BB vectorization vector types are assigned here.
2637 Memory accesses already got their vector type assigned
2638 in vect_analyze_data_refs. */
2639 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2641 && ! STMT_VINFO_DATA_REF (stmt_info
))
2643 tree vectype
, nunits_vectype
;
2644 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2646 /* We checked this when building the node. */
2648 if (vectype
== boolean_type_node
)
2650 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2652 /* vect_get_mask_type_for_stmt has already explained the
2657 stmt_vec_info sstmt_info
;
2659 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2660 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2663 /* Calculate the number of vector statements to be created for the
2664 scalar stmts in this node. For SLP reductions it is equal to the
2665 number of vector statements in the children (which has already been
2666 calculated by the recursive call). Otherwise it is the number of
2667 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2668 VF divided by the number of elements in a vector. */
2669 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2670 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2672 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2673 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2675 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2676 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2683 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2684 vf
= loop_vinfo
->vectorization_factor
;
2687 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2688 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2689 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2690 = vect_get_num_vectors (vf
* group_size
, vectype
);
2694 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2697 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2698 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2700 Return true if the operations are supported. */
2703 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2704 slp_instance node_instance
,
2705 scalar_stmts_to_slp_tree_map_t
*visited
,
2706 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2707 stmt_vector_for_cost
*cost_vec
)
2712 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2715 /* If we already analyzed the exact same set of scalar stmts we're done.
2716 We share the generated vector stmts for those. */
2718 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2719 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2721 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2722 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2726 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2727 doesn't result in any issue since we throw away the lvisited set
2729 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2731 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2732 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2733 visited
, lvisited
, cost_vec
))
2736 /* ??? We have to catch the case late where two first scalar stmts appear
2737 in multiple SLP children with different def type and fail. Remember
2738 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2739 match it when that is vect_internal_def. */
2740 auto_vec
<vect_def_type
, 4> dt
;
2741 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2742 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2743 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2744 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2746 /* Push SLP node def-type to stmt operands. */
2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2748 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2749 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2750 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2751 = SLP_TREE_DEF_TYPE (child
);
2753 /* Check everything worked out. */
2755 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2756 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2758 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2760 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2761 != SLP_TREE_DEF_TYPE (child
))
2764 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2768 if (!res
&& dump_enabled_p ())
2769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2770 "not vectorized: same operand with different "
2771 "def type in stmt.\n");
2774 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2777 /* Restore def-types. */
2778 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2779 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2780 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2786 /* Analyze statements in SLP instances of VINFO. Return true if the
2787 operations are supported. */
2790 vect_slp_analyze_operations (vec_info
*vinfo
)
2792 slp_instance instance
;
2795 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2797 scalar_stmts_to_slp_tree_map_t
*visited
2798 = new scalar_stmts_to_slp_tree_map_t ();
2799 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2801 scalar_stmts_to_slp_tree_map_t lvisited
;
2802 stmt_vector_for_cost cost_vec
;
2803 cost_vec
.create (2);
2804 if (!vect_slp_analyze_node_operations (vinfo
,
2805 SLP_INSTANCE_TREE (instance
),
2806 instance
, visited
, &lvisited
,
2809 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2810 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2811 if (dump_enabled_p ())
2812 dump_printf_loc (MSG_NOTE
, vect_location
,
2813 "removing SLP instance operations starting from: %G",
2815 vect_free_slp_instance (instance
, false);
2816 vinfo
->slp_instances
.ordered_remove (i
);
2817 cost_vec
.release ();
2821 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2822 x
!= lvisited
.end(); ++x
)
2823 visited
->put ((*x
).first
.copy (), (*x
).second
);
2826 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2827 cost_vec
.release ();
2832 return !vinfo
->slp_instances
.is_empty ();
2836 /* Compute the scalar cost of the SLP node NODE and its children
2837 and return it. Do not account defs that are marked in LIFE and
2838 update LIFE according to uses of NODE. */
2841 vect_bb_slp_scalar_cost (basic_block bb
,
2842 slp_tree node
, vec
<bool, va_heap
> *life
,
2843 stmt_vector_for_cost
*cost_vec
,
2844 hash_set
<slp_tree
> &visited
)
2847 stmt_vec_info stmt_info
;
2850 if (visited
.add (node
))
2853 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2855 gimple
*stmt
= stmt_info
->stmt
;
2856 vec_info
*vinfo
= stmt_info
->vinfo
;
2857 ssa_op_iter op_iter
;
2858 def_operand_p def_p
;
2863 /* If there is a non-vectorized use of the defs then the scalar
2864 stmt is kept live in which case we do not account it or any
2865 required defs in the SLP children in the scalar cost. This
2866 way we make the vectorization more costly when compared to
2868 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2870 imm_use_iterator use_iter
;
2872 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2873 if (!is_gimple_debug (use_stmt
))
2875 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2876 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2879 BREAK_FROM_IMM_USE_STMT (use_iter
);
2886 /* Count scalar stmts only once. */
2887 if (gimple_visited_p (stmt
))
2889 gimple_set_visited (stmt
, true);
2891 vect_cost_for_stmt kind
;
2892 if (STMT_VINFO_DATA_REF (stmt_info
))
2894 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2897 kind
= scalar_store
;
2899 else if (vect_nop_conversion_p (stmt_info
))
2903 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2906 auto_vec
<bool, 20> subtree_life
;
2907 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2909 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2911 /* Do not directly pass LIFE to the recursive call, copy it to
2912 confine changes in the callee to the current child/subtree. */
2913 subtree_life
.safe_splice (*life
);
2914 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
2916 subtree_life
.truncate (0);
2922 vect_bb_slp_scalar_cost (basic_block bb
,
2923 slp_tree node
, vec
<bool, va_heap
> *life
,
2924 stmt_vector_for_cost
*cost_vec
)
2926 hash_set
<slp_tree
> visited
;
2927 vect_bb_slp_scalar_cost (bb
, node
, life
, cost_vec
, visited
);
2930 /* Check if vectorization of the basic block is profitable. */
2933 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2935 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2936 slp_instance instance
;
2938 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2939 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2941 /* Calculate scalar cost. */
2942 stmt_vector_for_cost scalar_costs
;
2943 scalar_costs
.create (0);
2944 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2946 auto_vec
<bool, 20> life
;
2947 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2948 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2949 SLP_INSTANCE_TREE (instance
),
2950 &life
, &scalar_costs
);
2952 void *target_cost_data
= init_cost (NULL
);
2953 add_stmt_costs (target_cost_data
, &scalar_costs
);
2954 scalar_costs
.release ();
2956 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2957 destroy_cost_data (target_cost_data
);
2959 /* Unset visited flag. */
2960 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2961 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2962 gimple_set_visited (gsi_stmt (gsi
), false);
2964 /* Complete the target-specific cost calculation. */
2965 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2966 &vec_inside_cost
, &vec_epilogue_cost
);
2968 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2970 if (dump_enabled_p ())
2972 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2973 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2975 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2976 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2977 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2980 /* Vectorization is profitable if its cost is more than the cost of scalar
2981 version. Note that we err on the vector side for equal cost because
2982 the cost estimate is otherwise quite pessimistic (constant uses are
2983 free on the scalar side but cost a load on the vector side for
2985 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2991 /* Find any vectorizable constructors and add them to the grouped_store
2995 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
2997 gimple_stmt_iterator gsi
;
2999 for (gsi
= bb_vinfo
->region_begin
;
3000 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
3002 gimple
*stmt
= gsi_stmt (gsi
);
3004 if (is_gimple_assign (stmt
)
3005 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
3006 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
3007 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
)
3009 tree rhs
= gimple_assign_rhs1 (stmt
);
3011 if (CONSTRUCTOR_NELTS (rhs
) == 0)
3014 poly_uint64 subparts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
));
3016 if (maybe_ne (subparts
, CONSTRUCTOR_NELTS (rhs
)))
3019 if (dump_enabled_p ())
3020 dump_printf_loc (MSG_NOTE
, vect_location
,
3021 "Found vectorizable constructor: %G\n", stmt
);
3022 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3023 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3028 /* Check if the region described by BB_VINFO can be vectorized, returning
3029 true if so. When returning false, set FATAL to true if the same failure
3030 would prevent vectorization at other vector sizes, false if it is still
3031 worth trying other sizes. N_STMTS is the number of statements in the
3035 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3037 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3039 slp_instance instance
;
3041 poly_uint64 min_vf
= 2;
3043 /* The first group of checks is independent of the vector size. */
3046 /* Analyze the data references. */
3048 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3050 if (dump_enabled_p ())
3051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3052 "not vectorized: unhandled data-ref in basic "
3057 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3059 if (dump_enabled_p ())
3060 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3061 "not vectorized: not enough data-refs in "
3066 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3068 if (dump_enabled_p ())
3069 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3070 "not vectorized: unhandled data access in "
3075 vect_slp_check_for_constructors (bb_vinfo
);
3077 /* If there are no grouped stores in the region there is no need
3078 to continue with pattern recog as vect_analyze_slp will fail
3080 if (bb_vinfo
->grouped_stores
.is_empty ())
3082 if (dump_enabled_p ())
3083 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3084 "not vectorized: no grouped stores in "
3089 /* While the rest of the analysis below depends on it in some way. */
3092 vect_pattern_recog (bb_vinfo
);
3094 /* Check the SLP opportunities in the basic block, analyze and build SLP
3096 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3098 if (dump_enabled_p ())
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3101 "Failed to SLP the basic block.\n");
3102 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3103 "not vectorized: failed to find SLP opportunities "
3104 "in basic block.\n");
3109 vect_record_base_alignments (bb_vinfo
);
3111 /* Analyze and verify the alignment of data references and the
3112 dependence in the SLP instances. */
3113 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3115 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3116 || ! vect_slp_analyze_instance_dependence (instance
))
3118 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3119 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3120 if (dump_enabled_p ())
3121 dump_printf_loc (MSG_NOTE
, vect_location
,
3122 "removing SLP instance operations starting from: %G",
3124 vect_free_slp_instance (instance
, false);
3125 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3129 /* Mark all the statements that we want to vectorize as pure SLP and
3131 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3132 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3133 if (SLP_INSTANCE_ROOT_STMT (instance
))
3134 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3138 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3141 if (!vect_slp_analyze_operations (bb_vinfo
))
3143 if (dump_enabled_p ())
3144 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3145 "not vectorized: bad operation in basic block.\n");
3149 /* Cost model: check if the vectorization is worthwhile. */
3150 if (!unlimited_cost_model (NULL
)
3151 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3153 if (dump_enabled_p ())
3154 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3155 "not vectorized: vectorization is not "
3160 if (dump_enabled_p ())
3161 dump_printf_loc (MSG_NOTE
, vect_location
,
3162 "Basic block will be vectorized using SLP\n");
3166 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3167 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3168 on success. The region has N_STMTS statements and has the datarefs
3169 given by DATAREFS. */
3172 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3173 gimple_stmt_iterator region_end
,
3174 vec
<data_reference_p
> datarefs
,
3175 unsigned int n_stmts
)
3177 bb_vec_info bb_vinfo
;
3178 auto_vector_modes vector_modes
;
3180 /* Autodetect first vector size we try. */
3181 machine_mode next_vector_mode
= VOIDmode
;
3182 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3183 unsigned int mode_i
= 0;
3185 vec_info_shared shared
;
3187 machine_mode autodetected_vector_mode
= VOIDmode
;
3190 bool vectorized
= false;
3192 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3194 bool first_time_p
= shared
.datarefs
.is_empty ();
3195 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3197 bb_vinfo
->shared
->save_datarefs ();
3199 bb_vinfo
->shared
->check_datarefs ();
3200 bb_vinfo
->vector_mode
= next_vector_mode
;
3202 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3203 && dbg_cnt (vect_slp
))
3205 if (dump_enabled_p ())
3207 dump_printf_loc (MSG_NOTE
, vect_location
,
3208 "***** Analysis succeeded with vector mode"
3209 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
3210 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3213 bb_vinfo
->shared
->check_datarefs ();
3214 vect_schedule_slp (bb_vinfo
);
3216 unsigned HOST_WIDE_INT bytes
;
3217 if (dump_enabled_p ())
3219 if (GET_MODE_SIZE (bb_vinfo
->vector_mode
).is_constant (&bytes
))
3220 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3221 "basic block part vectorized using %wu byte "
3222 "vectors\n", bytes
);
3224 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3225 "basic block part vectorized using variable "
3226 "length vectors\n");
3233 if (dump_enabled_p ())
3234 dump_printf_loc (MSG_NOTE
, vect_location
,
3235 "***** Analysis failed with vector mode %s\n",
3236 GET_MODE_NAME (bb_vinfo
->vector_mode
));
3240 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
3243 while (mode_i
< vector_modes
.length ()
3244 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
3246 if (dump_enabled_p ())
3247 dump_printf_loc (MSG_NOTE
, vect_location
,
3248 "***** The result for vector mode %s would"
3250 GET_MODE_NAME (vector_modes
[mode_i
]));
3256 if (mode_i
< vector_modes
.length ()
3257 && VECTOR_MODE_P (autodetected_vector_mode
)
3258 && (related_vector_mode (vector_modes
[mode_i
],
3259 GET_MODE_INNER (autodetected_vector_mode
))
3260 == autodetected_vector_mode
)
3261 && (related_vector_mode (autodetected_vector_mode
,
3262 GET_MODE_INNER (vector_modes
[mode_i
]))
3263 == vector_modes
[mode_i
]))
3265 if (dump_enabled_p ())
3266 dump_printf_loc (MSG_NOTE
, vect_location
,
3267 "***** Skipping vector mode %s, which would"
3268 " repeat the analysis for %s\n",
3269 GET_MODE_NAME (vector_modes
[mode_i
]),
3270 GET_MODE_NAME (autodetected_vector_mode
));
3275 || mode_i
== vector_modes
.length ()
3276 || autodetected_vector_mode
== VOIDmode
3277 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3278 vector sizes will fail do not bother iterating. */
3282 /* Try the next biggest vector size. */
3283 next_vector_mode
= vector_modes
[mode_i
++];
3284 if (dump_enabled_p ())
3285 dump_printf_loc (MSG_NOTE
, vect_location
,
3286 "***** Re-trying analysis with vector mode %s\n",
3287 GET_MODE_NAME (next_vector_mode
));
3291 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3292 true if anything in the basic-block was vectorized. */
3295 vect_slp_bb (basic_block bb
)
3297 gimple_stmt_iterator gsi
;
3298 bool any_vectorized
= false;
3300 gsi
= gsi_start_bb (bb
);
3301 while (!gsi_end_p (gsi
))
3303 gimple_stmt_iterator region_begin
= gsi
;
3304 vec
<data_reference_p
> datarefs
= vNULL
;
3307 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3309 gimple
*stmt
= gsi_stmt (gsi
);
3310 if (is_gimple_debug (stmt
))
3314 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3315 vect_location
= stmt
;
3317 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3321 /* Skip leading unhandled stmts. */
3322 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3328 gimple_stmt_iterator region_end
= gsi
;
3330 if (insns
> param_slp_max_insns_in_bb
)
3332 if (dump_enabled_p ())
3333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3334 "not vectorized: too many instructions in "
3337 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3338 any_vectorized
= true;
3340 if (gsi_end_p (region_end
))
3343 /* Skip the unhandled stmt. */
3347 return any_vectorized
;
3351 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3354 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
)
3356 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3358 enum vect_def_type dt
;
3360 /* For comparison and COND_EXPR type is chosen depending
3361 on the non-constant other comparison operand. */
3362 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3364 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3365 op
= gimple_assign_rhs1 (stmt
);
3367 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3370 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3373 if (code
== COND_EXPR
)
3375 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3376 tree cond
= gimple_assign_rhs1 (stmt
);
3378 if (TREE_CODE (cond
) == SSA_NAME
)
3381 op
= TREE_OPERAND (cond
, 0);
3383 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3386 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3389 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3392 /* Build a variable-length vector in which the elements in ELTS are repeated
3393 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3394 RESULTS and add any new instructions to SEQ.
3396 The approach we use is:
3398 (1) Find a vector mode VM with integer elements of mode IM.
3400 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3401 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3402 from small vectors to IM.
3404 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3406 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3407 correct byte contents.
3409 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3411 We try to find the largest IM for which this sequence works, in order
3412 to cut down on the number of interleaves. */
3415 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3416 vec
<tree
> elts
, unsigned int nresults
,
3419 unsigned int nelts
= elts
.length ();
3420 tree element_type
= TREE_TYPE (vector_type
);
3422 /* (1) Find a vector mode VM with integer elements of mode IM. */
3423 unsigned int nvectors
= 1;
3424 tree new_vector_type
;
3426 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, TYPE_MODE (element_type
),
3427 &nvectors
, &new_vector_type
,
3431 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3432 unsigned int partial_nelts
= nelts
/ nvectors
;
3433 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3435 tree_vector_builder partial_elts
;
3436 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3437 pieces
.quick_grow (nvectors
* 2);
3438 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3440 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3441 ELTS' has mode IM. */
3442 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3443 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3444 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3445 tree t
= gimple_build_vector (seq
, &partial_elts
);
3446 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3447 TREE_TYPE (new_vector_type
), t
);
3449 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3450 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3453 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3454 correct byte contents.
3456 We need to repeat the following operation log2(nvectors) times:
3458 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3459 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3461 However, if each input repeats every N elements and the VF is
3462 a multiple of N * 2, the HI result is the same as the LO. */
3463 unsigned int in_start
= 0;
3464 unsigned int out_start
= nvectors
;
3465 unsigned int hi_start
= nvectors
/ 2;
3466 /* A bound on the number of outputs needed to produce NRESULTS results
3467 in the final iteration. */
3468 unsigned int noutputs_bound
= nvectors
* nresults
;
3469 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3471 noutputs_bound
/= 2;
3472 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3473 for (unsigned int i
= 0; i
< limit
; ++i
)
3476 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3479 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3483 tree output
= make_ssa_name (new_vector_type
);
3484 tree input1
= pieces
[in_start
+ (i
/ 2)];
3485 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3486 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3489 gimple_seq_add_stmt (seq
, stmt
);
3490 pieces
[out_start
+ i
] = output
;
3492 std::swap (in_start
, out_start
);
3495 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3496 results
.reserve (nresults
);
3497 for (unsigned int i
= 0; i
< nresults
; ++i
)
3499 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3500 pieces
[in_start
+ i
]));
3502 results
.quick_push (results
[i
- nvectors
]);
3506 /* For constant and loop invariant defs of SLP_NODE this function returns
3507 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3508 OP_NODE determines the node for the operand containing the scalar
3512 vect_get_constant_vectors (slp_tree op_node
, slp_tree slp_node
,
3513 vec
<tree
> *vec_oprnds
)
3515 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3516 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3517 unsigned HOST_WIDE_INT nunits
;
3519 unsigned j
, number_of_places_left_in_vector
;
3522 int group_size
= op_node
->ops
.length ();
3523 unsigned int vec_num
, i
;
3524 unsigned number_of_copies
= 1;
3526 tree neutral_op
= NULL
;
3527 gimple_seq ctor_seq
= NULL
;
3528 auto_vec
<tree
, 16> permute_results
;
3530 /* ??? SLP analysis should compute the vector type for the
3531 constant / invariant and store it in the SLP node. */
3532 tree op
= op_node
->ops
[0];
3533 /* Check if vector type is a boolean vector. */
3534 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3535 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3536 && vect_mask_constant_operand_p (stmt_vinfo
))
3537 vector_type
= truth_type_for (stmt_vectype
);
3539 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
3541 /* ??? For lane-reducing ops we should also have the required number
3542 of vector stmts initialized rather than second-guessing here. */
3543 unsigned int number_of_vectors
;
3544 if (is_gimple_assign (stmt_vinfo
->stmt
)
3545 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3546 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3547 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3548 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3551 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3552 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3554 vec_oprnds
->create (number_of_vectors
);
3555 auto_vec
<tree
> voprnds (number_of_vectors
);
3557 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3558 created vectors. It is greater than 1 if unrolling is performed.
3560 For example, we have two scalar operands, s1 and s2 (e.g., group of
3561 strided accesses of size two), while NUNITS is four (i.e., four scalars
3562 of this type can be packed in a vector). The output vector will contain
3563 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3566 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3567 containing the operands.
3569 For example, NUNITS is four as before, and the group size is 8
3570 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3571 {s5, s6, s7, s8}. */
3573 /* When using duplicate_and_interleave, we just need one element for
3574 each scalar statement. */
3575 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3576 nunits
= group_size
;
3578 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3580 number_of_places_left_in_vector
= nunits
;
3582 tree_vector_builder
elts (vector_type
, nunits
, 1);
3583 elts
.quick_grow (nunits
);
3584 bool place_after_defs
= false;
3585 for (j
= 0; j
< number_of_copies
; j
++)
3587 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3589 /* Create 'vect_ = {op0,op1,...,opn}'. */
3590 number_of_places_left_in_vector
--;
3592 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3594 if (CONSTANT_CLASS_P (op
))
3596 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3598 /* Can't use VIEW_CONVERT_EXPR for booleans because
3599 of possibly different sizes of scalar value and
3601 if (integer_zerop (op
))
3602 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3603 else if (integer_onep (op
))
3604 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3609 op
= fold_unary (VIEW_CONVERT_EXPR
,
3610 TREE_TYPE (vector_type
), op
);
3611 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3615 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3617 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3620 = build_all_ones_cst (TREE_TYPE (vector_type
));
3622 = build_zero_cst (TREE_TYPE (vector_type
));
3623 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3624 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3630 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3633 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3636 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3640 elts
[number_of_places_left_in_vector
] = op
;
3641 if (!CONSTANT_CLASS_P (op
))
3643 if (TREE_CODE (orig_op
) == SSA_NAME
3644 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3645 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3646 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3647 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3648 place_after_defs
= true;
3650 if (number_of_places_left_in_vector
== 0)
3653 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3654 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3655 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3658 if (permute_results
.is_empty ())
3659 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3660 elts
, number_of_vectors
,
3662 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3665 gimple_stmt_iterator gsi
;
3666 if (place_after_defs
)
3668 stmt_vec_info last_stmt_info
3669 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3670 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3671 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3675 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3677 if (ctor_seq
!= NULL
)
3679 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3680 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3683 voprnds
.quick_push (init
);
3684 place_after_defs
= false;
3685 number_of_places_left_in_vector
= nunits
;
3687 elts
.new_vector (vector_type
, nunits
, 1);
3688 elts
.quick_grow (nunits
);
3693 /* Since the vectors are created in the reverse order, we should invert
3695 vec_num
= voprnds
.length ();
3696 for (j
= vec_num
; j
!= 0; j
--)
3698 vop
= voprnds
[j
- 1];
3699 vec_oprnds
->quick_push (vop
);
3702 /* In case that VF is greater than the unrolling factor needed for the SLP
3703 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3704 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3705 to replicate the vectors. */
3706 while (number_of_vectors
> vec_oprnds
->length ())
3708 tree neutral_vec
= NULL
;
3713 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3715 vec_oprnds
->quick_push (neutral_vec
);
3719 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3720 vec_oprnds
->quick_push (vop
);
3726 /* Get vectorized definitions from SLP_NODE that contains corresponding
3727 vectorized def-stmts. */
3730 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3732 stmt_vec_info vec_def_stmt_info
;
3735 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3737 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3738 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3742 /* Get N vectorized definitions for SLP_NODE.
3743 If the scalar definitions are loop invariants or constants, collect them and
3744 call vect_get_constant_vectors() to create vector stmts.
3745 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3746 must be stored in the corresponding child of SLP_NODE, and we call
3747 vect_get_slp_vect_defs () to retrieve them. */
3750 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3753 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3755 for (unsigned i
= 0; i
< n
; ++i
)
3757 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3759 vec
<tree
> vec_defs
= vNULL
;
3761 /* For each operand we check if it has vectorized definitions in a child
3762 node or we need to create them (for invariants and constants). */
3763 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3765 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3766 vect_get_slp_vect_defs (child
, &vec_defs
);
3769 vect_get_constant_vectors (child
, slp_node
, &vec_defs
);
3771 vec_oprnds
->quick_push (vec_defs
);
3775 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3776 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3777 permute statements for the SLP node NODE of the SLP instance
3778 SLP_NODE_INSTANCE. */
3781 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3782 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3783 slp_instance slp_node_instance
, bool analyze_only
,
3786 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3787 vec_info
*vinfo
= stmt_info
->vinfo
;
3789 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3790 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3791 unsigned int mask_element
;
3794 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3797 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3799 mode
= TYPE_MODE (vectype
);
3800 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3802 /* Initialize the vect stmts of NODE to properly insert the generated
3805 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3806 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3807 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3809 /* Generate permutation masks for every NODE. Number of masks for each NODE
3810 is equal to GROUP_SIZE.
3811 E.g., we have a group of three nodes with three loads from the same
3812 location in each node, and the vector size is 4. I.e., we have a
3813 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3814 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3815 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3818 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3819 The last mask is illegal since we assume two operands for permute
3820 operation, and the mask element values can't be outside that range.
3821 Hence, the last mask must be converted into {2,5,5,5}.
3822 For the first two permutations we need the first and the second input
3823 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3824 we need the second and the third vectors: {b1,c1,a2,b2} and
3827 int vect_stmts_counter
= 0;
3828 unsigned int index
= 0;
3829 int first_vec_index
= -1;
3830 int second_vec_index
= -1;
3834 vec_perm_builder mask
;
3835 unsigned int nelts_to_build
;
3836 unsigned int nvectors_per_build
;
3837 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3838 && multiple_p (nunits
, group_size
));
3841 /* A single vector contains a whole number of copies of the node, so:
3842 (a) all permutes can use the same mask; and
3843 (b) the permutes only need a single vector input. */
3844 mask
.new_vector (nunits
, group_size
, 3);
3845 nelts_to_build
= mask
.encoded_nelts ();
3846 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3850 /* We need to construct a separate mask for each vector statement. */
3851 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3852 if (!nunits
.is_constant (&const_nunits
)
3853 || !vf
.is_constant (&const_vf
))
3855 mask
.new_vector (const_nunits
, const_nunits
, 1);
3856 nelts_to_build
= const_vf
* group_size
;
3857 nvectors_per_build
= 1;
3860 unsigned int count
= mask
.encoded_nelts ();
3861 mask
.quick_grow (count
);
3862 vec_perm_indices indices
;
3864 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3866 unsigned int iter_num
= j
/ group_size
;
3867 unsigned int stmt_num
= j
% group_size
;
3868 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3869 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3872 first_vec_index
= 0;
3877 /* Enforced before the loop when !repeating_p. */
3878 unsigned int const_nunits
= nunits
.to_constant ();
3879 vec_index
= i
/ const_nunits
;
3880 mask_element
= i
% const_nunits
;
3881 if (vec_index
== first_vec_index
3882 || first_vec_index
== -1)
3884 first_vec_index
= vec_index
;
3886 else if (vec_index
== second_vec_index
3887 || second_vec_index
== -1)
3889 second_vec_index
= vec_index
;
3890 mask_element
+= const_nunits
;
3894 if (dump_enabled_p ())
3895 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3896 "permutation requires at "
3897 "least three vectors %G",
3899 gcc_assert (analyze_only
);
3903 gcc_assert (mask_element
< 2 * const_nunits
);
3906 if (mask_element
!= index
)
3908 mask
[index
++] = mask_element
;
3910 if (index
== count
&& !noop_p
)
3912 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3913 if (!can_vec_perm_const_p (mode
, indices
))
3915 if (dump_enabled_p ())
3917 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3919 "unsupported vect permute { ");
3920 for (i
= 0; i
< count
; ++i
)
3922 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3923 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3925 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3927 gcc_assert (analyze_only
);
3938 tree mask_vec
= NULL_TREE
;
3941 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3943 if (second_vec_index
== -1)
3944 second_vec_index
= first_vec_index
;
3946 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3948 /* Generate the permute statement if necessary. */
3949 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3950 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3951 stmt_vec_info perm_stmt_info
;
3954 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3956 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3958 perm_dest
= make_ssa_name (perm_dest
);
3960 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3961 first_vec
, second_vec
,
3964 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3968 /* If mask was NULL_TREE generate the requested
3969 identity transform. */
3970 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3972 /* Store the vector statement in NODE. */
3973 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3979 first_vec_index
= -1;
3980 second_vec_index
= -1;
3988 /* Vectorize SLP instance tree in postorder. */
3991 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3992 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3994 gimple_stmt_iterator si
;
3995 stmt_vec_info stmt_info
;
3996 unsigned int group_size
;
4001 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4004 /* See if we have already vectorized the node in the graph of the
4006 if (SLP_TREE_VEC_STMTS (node
).exists ())
4009 /* See if we have already vectorized the same set of stmts and reuse their
4010 vectorized stmts across instances. */
4011 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
4013 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
4017 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
4018 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4019 vect_schedule_slp_instance (child
, instance
, bst_map
);
4021 /* Push SLP node def-type to stmts. */
4022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4023 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4025 stmt_vec_info child_stmt_info
;
4026 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4027 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
4030 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4032 /* VECTYPE is the type of the destination. */
4033 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4034 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4035 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
4037 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
4038 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4040 if (dump_enabled_p ())
4041 dump_printf_loc (MSG_NOTE
, vect_location
,
4042 "------>vectorizing SLP node starting from: %G",
4045 /* Vectorized stmts go before the last scalar stmt which is where
4046 all uses are ready. */
4047 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4048 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4050 /* Handle two-operation SLP nodes by vectorizing the group with
4051 both operations and then performing a merge. */
4052 if (SLP_TREE_TWO_OPERATORS (node
))
4054 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4055 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4056 enum tree_code ocode
= ERROR_MARK
;
4057 stmt_vec_info ostmt_info
;
4058 vec_perm_builder
mask (group_size
, group_size
, 1);
4059 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4061 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4062 if (gimple_assign_rhs_code (ostmt
) != code0
)
4064 mask
.quick_push (1);
4065 ocode
= gimple_assign_rhs_code (ostmt
);
4068 mask
.quick_push (0);
4070 if (ocode
!= ERROR_MARK
)
4072 vec
<stmt_vec_info
> v0
;
4073 vec
<stmt_vec_info
> v1
;
4075 tree tmask
= NULL_TREE
;
4076 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4077 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4078 SLP_TREE_VEC_STMTS (node
).truncate (0);
4079 gimple_assign_set_rhs_code (stmt
, ocode
);
4080 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4081 gimple_assign_set_rhs_code (stmt
, code0
);
4082 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4083 SLP_TREE_VEC_STMTS (node
).truncate (0);
4084 tree meltype
= build_nonstandard_integer_type
4085 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4086 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4088 for (j
= 0; j
< v0
.length (); ++j
)
4090 /* Enforced by vect_build_slp_tree, which rejects variable-length
4091 vectors for SLP_TREE_TWO_OPERATORS. */
4092 unsigned int const_nunits
= nunits
.to_constant ();
4093 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4094 for (l
= 0; l
< const_nunits
; ++l
)
4096 if (k
>= group_size
)
4098 tree t
= build_int_cst (meltype
,
4099 mask
[k
++] * const_nunits
+ l
);
4100 melts
.quick_push (t
);
4102 tmask
= melts
.build ();
4104 /* ??? Not all targets support a VEC_PERM_EXPR with a
4105 constant mask that would translate to a vec_merge RTX
4106 (with their vec_perm_const_ok). We can either not
4107 vectorize in that case or let veclower do its job.
4108 Unfortunately that isn't too great and at least for
4109 plus/minus we'd eventually like to match targets
4110 vector addsub instructions. */
4112 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4114 gimple_assign_lhs (v0
[j
]->stmt
),
4115 gimple_assign_lhs (v1
[j
]->stmt
),
4117 SLP_TREE_VEC_STMTS (node
).quick_push
4118 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4125 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4127 /* Restore stmt def-types. */
4128 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4129 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4131 stmt_vec_info child_stmt_info
;
4132 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4133 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4137 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4138 For loop vectorization this is done in vectorizable_call, but for SLP
4139 it needs to be deferred until end of vect_schedule_slp, because multiple
4140 SLP instances may refer to the same scalar stmt. */
4143 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4146 gimple_stmt_iterator gsi
;
4150 stmt_vec_info stmt_info
;
4152 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4155 if (visited
.add (node
))
4158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4159 vect_remove_slp_scalar_calls (child
, visited
);
4161 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4163 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4164 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4166 if (is_pattern_stmt_p (stmt_info
)
4167 || !PURE_SLP_STMT (stmt_info
))
4169 lhs
= gimple_call_lhs (stmt
);
4170 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4171 gsi
= gsi_for_stmt (stmt
);
4172 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4173 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4178 vect_remove_slp_scalar_calls (slp_tree node
)
4180 hash_set
<slp_tree
> visited
;
4181 vect_remove_slp_scalar_calls (node
, visited
);
4184 /* Vectorize the instance root. */
4187 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4189 gassign
*rstmt
= NULL
;
4191 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4193 stmt_vec_info child_stmt_info
;
4196 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4198 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4199 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4200 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4204 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4206 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4207 stmt_vec_info child_stmt_info
;
4209 vec
<constructor_elt
, va_gc
> *v
;
4210 vec_alloc (v
, nelts
);
4212 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4214 CONSTRUCTOR_APPEND_ELT (v
,
4216 gimple_get_lhs (child_stmt_info
->stmt
));
4218 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4219 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4220 tree r_constructor
= build_constructor (rtype
, v
);
4221 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4226 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4227 gsi_replace (&rgsi
, rstmt
, true);
4230 /* Generate vector code for all SLP instances in the loop/basic block. */
4233 vect_schedule_slp (vec_info
*vinfo
)
4235 vec
<slp_instance
> slp_instances
;
4236 slp_instance instance
;
4239 scalar_stmts_to_slp_tree_map_t
*bst_map
4240 = new scalar_stmts_to_slp_tree_map_t ();
4241 slp_instances
= vinfo
->slp_instances
;
4242 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4244 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4245 /* Schedule the tree of INSTANCE. */
4246 vect_schedule_slp_instance (node
, instance
, bst_map
);
4248 if (SLP_INSTANCE_ROOT_STMT (instance
))
4249 vectorize_slp_instance_root_stmt (node
, instance
);
4251 if (dump_enabled_p ())
4252 dump_printf_loc (MSG_NOTE
, vect_location
,
4253 "vectorizing stmts using SLP.\n");
4257 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4259 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4260 stmt_vec_info store_info
;
4263 /* Remove scalar call stmts. Do not do this for basic-block
4264 vectorization as not all uses may be vectorized.
4265 ??? Why should this be necessary? DCE should be able to
4266 remove the stmts itself.
4267 ??? For BB vectorization we can as well remove scalar
4268 stmts starting from the SLP tree root if they have no
4270 if (is_a
<loop_vec_info
> (vinfo
))
4271 vect_remove_slp_scalar_calls (root
);
4273 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4274 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4276 if (!STMT_VINFO_DATA_REF (store_info
))
4279 if (SLP_INSTANCE_ROOT_STMT (instance
))
4282 store_info
= vect_orig_stmt (store_info
);
4283 /* Free the attached stmt_vec_info and remove the stmt. */
4284 vinfo
->remove_stmt (store_info
);