1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
54 vect_free_slp_tree (slp_tree node
, bool final_p
)
59 if (--node
->refcnt
!= 0)
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
63 vect_free_slp_tree (child
, final_p
);
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
71 stmt_vec_info stmt_info
;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
79 SLP_TREE_CHILDREN (node
).release ();
80 SLP_TREE_SCALAR_STMTS (node
).release ();
81 SLP_TREE_SCALAR_OPS (node
).release ();
82 SLP_TREE_VEC_STMTS (node
).release ();
83 SLP_TREE_LOAD_PERMUTATION (node
).release ();
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
93 vect_free_slp_instance (slp_instance instance
, bool final_p
)
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
96 SLP_INSTANCE_LOADS (instance
).release ();
101 /* Create an SLP node for SCALAR_STMTS. */
104 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
107 stmt_vec_info stmt_info
= scalar_stmts
[0];
110 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
111 nops
= gimple_call_num_args (stmt
);
112 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
114 nops
= gimple_num_ops (stmt
) - 1;
115 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
118 else if (is_a
<gphi
*> (stmt_info
->stmt
))
123 node
= XNEW (struct _slp_tree
);
124 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
125 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
126 SLP_TREE_VEC_STMTS (node
).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
128 SLP_TREE_CHILDREN (node
).create (nops
);
129 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
130 SLP_TREE_TWO_OPERATORS (node
) = false;
131 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
133 node
->max_nunits
= 1;
136 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
137 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
142 /* Create an SLP node for OPS. */
145 vect_create_new_slp_node (vec
<tree
> ops
)
149 node
= XNEW (struct _slp_tree
);
150 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
151 SLP_TREE_SCALAR_OPS (node
) = ops
;
152 SLP_TREE_VEC_STMTS (node
).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
154 SLP_TREE_CHILDREN (node
) = vNULL
;
155 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
156 SLP_TREE_TWO_OPERATORS (node
) = false;
157 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
159 node
->max_nunits
= 1;
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
168 typedef struct _slp_oprnd_info
170 /* Def-stmts for the operands. */
171 vec
<stmt_vec_info
> def_stmts
;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
178 enum vect_def_type first_dt
;
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
185 static vec
<slp_oprnd_info
>
186 vect_create_oprnd_info (int nops
, int group_size
)
189 slp_oprnd_info oprnd_info
;
190 vec
<slp_oprnd_info
> oprnds_info
;
192 oprnds_info
.create (nops
);
193 for (i
= 0; i
< nops
; i
++)
195 oprnd_info
= XNEW (struct _slp_oprnd_info
);
196 oprnd_info
->def_stmts
.create (group_size
);
197 oprnd_info
->ops
.create (group_size
);
198 oprnd_info
->first_dt
= vect_uninitialized_def
;
199 oprnd_info
->first_op_type
= NULL_TREE
;
200 oprnd_info
->any_pattern
= false;
201 oprnds_info
.quick_push (oprnd_info
);
208 /* Free operands info. */
211 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
214 slp_oprnd_info oprnd_info
;
216 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
218 oprnd_info
->def_stmts
.release ();
219 oprnd_info
->ops
.release ();
220 XDELETE (oprnd_info
);
223 oprnds_info
.release ();
227 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
228 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
232 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
233 stmt_vec_info first_stmt_info
)
235 stmt_vec_info next_stmt_info
= first_stmt_info
;
238 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
243 if (next_stmt_info
== stmt_info
)
245 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
247 result
+= DR_GROUP_GAP (next_stmt_info
);
249 while (next_stmt_info
);
254 /* Check whether it is possible to load COUNT elements of type ELT_MODE
255 using the method implemented by duplicate_and_interleave. Return true
256 if so, returning the number of intermediate vectors in *NVECTORS_OUT
257 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
261 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
262 machine_mode elt_mode
,
263 unsigned int *nvectors_out
,
264 tree
*vector_type_out
,
267 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
269 unsigned int nvectors
= 1;
272 scalar_int_mode int_mode
;
273 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
274 if (multiple_p (vinfo
->vector_size
, elt_bytes
, &nelts
)
275 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
277 tree int_type
= build_nonstandard_integer_type
278 (GET_MODE_BITSIZE (int_mode
), 1);
279 tree vector_type
= build_vector_type (int_type
, nelts
);
280 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
282 vec_perm_builder
sel1 (nelts
, 2, 3);
283 vec_perm_builder
sel2 (nelts
, 2, 3);
284 poly_int64 half_nelts
= exact_div (nelts
, 2);
285 for (unsigned int i
= 0; i
< 3; ++i
)
288 sel1
.quick_push (i
+ nelts
);
289 sel2
.quick_push (half_nelts
+ i
);
290 sel2
.quick_push (half_nelts
+ i
+ nelts
);
292 vec_perm_indices
indices1 (sel1
, 2, nelts
);
293 vec_perm_indices
indices2 (sel2
, 2, nelts
);
294 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
295 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
298 *nvectors_out
= nvectors
;
300 *vector_type_out
= vector_type
;
303 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
305 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
312 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
318 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
319 they are of a valid type and that they match the defs of the first stmt of
320 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
321 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
322 indicates swap is required for cond_expr stmts. Specifically, *SWAP
323 is 1 if STMT is cond and operands of comparison need to be swapped;
324 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
325 If there is any operand swap in this function, *SWAP is set to non-zero
327 If there was a fatal error return -1; if the error could be corrected by
328 swapping operands of father node of this one, return 1; if everything is
331 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
332 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
333 vec
<slp_oprnd_info
> *oprnds_info
)
335 stmt_vec_info stmt_info
= stmts
[stmt_num
];
337 unsigned int i
, number_of_oprnds
;
338 enum vect_def_type dt
= vect_uninitialized_def
;
339 slp_oprnd_info oprnd_info
;
340 int first_op_idx
= 1;
341 unsigned int commutative_op
= -1U;
342 bool first_op_cond
= false;
343 bool first
= stmt_num
== 0;
345 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
347 number_of_oprnds
= gimple_call_num_args (stmt
);
349 if (gimple_call_internal_p (stmt
))
351 internal_fn ifn
= gimple_call_internal_fn (stmt
);
352 commutative_op
= first_commutative_argument (ifn
);
354 /* Masked load, only look at mask. */
355 if (ifn
== IFN_MASK_LOAD
)
357 number_of_oprnds
= 1;
358 /* Mask operand index. */
363 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
365 enum tree_code code
= gimple_assign_rhs_code (stmt
);
366 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
367 /* Swap can only be done for cond_expr if asked to, otherwise we
368 could result in different comparison code to the first stmt. */
369 if (code
== COND_EXPR
370 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
372 first_op_cond
= true;
376 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
381 bool swapped
= (*swap
!= 0);
382 gcc_assert (!swapped
|| first_op_cond
);
383 for (i
= 0; i
< number_of_oprnds
; i
++)
388 /* Map indicating how operands of cond_expr should be swapped. */
389 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
390 int *map
= maps
[*swap
];
393 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
394 first_op_idx
), map
[i
]);
396 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
399 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
400 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
401 oprnd
= TREE_OPERAND (oprnd
, 0);
403 oprnd_info
= (*oprnds_info
)[i
];
405 stmt_vec_info def_stmt_info
;
406 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
408 if (dump_enabled_p ())
409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
410 "Build SLP failed: can't analyze def for %T\n",
416 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
417 oprnd_info
->any_pattern
= true;
421 /* For the swapping logic below force vect_reduction_def
422 for the reduction op in a SLP reduction group. */
423 if (!STMT_VINFO_DATA_REF (stmt_info
)
424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
425 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
427 dt
= vect_reduction_def
;
428 oprnd_info
->first_dt
= dt
;
429 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
433 /* Not first stmt of the group, check that the def-stmt/s match
434 the def-stmt/s of the first stmt. Allow different definition
435 types for reduction chains: the first stmt must be a
436 vect_reduction_def (a phi node), and the rest
437 end in the reduction chain. */
438 tree type
= TREE_TYPE (oprnd
);
439 if ((oprnd_info
->first_dt
!= dt
440 && !(oprnd_info
->first_dt
== vect_reduction_def
441 && !STMT_VINFO_DATA_REF (stmt_info
)
442 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
444 && !STMT_VINFO_DATA_REF (def_stmt_info
)
445 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
446 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
447 && !((oprnd_info
->first_dt
== vect_external_def
448 || oprnd_info
->first_dt
== vect_constant_def
)
449 && (dt
== vect_external_def
450 || dt
== vect_constant_def
)))
451 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
452 || (!STMT_VINFO_DATA_REF (stmt_info
)
453 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
455 || STMT_VINFO_DATA_REF (def_stmt_info
)
456 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
457 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
458 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
460 /* Try swapping operands if we got a mismatch. */
461 if (i
== commutative_op
&& !swapped
)
463 if (dump_enabled_p ())
464 dump_printf_loc (MSG_NOTE
, vect_location
,
465 "trying swapped operands\n");
470 if (dump_enabled_p ())
471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
472 "Build SLP failed: different types\n");
476 if ((dt
== vect_constant_def
477 || dt
== vect_external_def
)
478 && !vinfo
->vector_size
.is_constant ()
479 && (TREE_CODE (type
) == BOOLEAN_TYPE
480 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
483 if (dump_enabled_p ())
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
485 "Build SLP failed: invalid type of def "
486 "for variable-length SLP %T\n", oprnd
);
491 /* Check the types of the definitions. */
494 case vect_external_def
:
495 /* Make sure to demote the overall operand to external. */
496 oprnd_info
->first_dt
= vect_external_def
;
498 case vect_constant_def
:
499 oprnd_info
->def_stmts
.quick_push (NULL
);
500 oprnd_info
->ops
.quick_push (oprnd
);
503 case vect_internal_def
:
504 case vect_reduction_def
:
505 if (oprnd_info
->first_dt
== vect_reduction_def
506 && !STMT_VINFO_DATA_REF (stmt_info
)
507 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
508 && !STMT_VINFO_DATA_REF (def_stmt_info
)
509 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
510 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
512 /* For a SLP reduction chain we want to duplicate the
513 reduction to each of the chain members. That gets
514 us a sane SLP graph (still the stmts are not 100%
515 correct wrt the initial values). */
517 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
518 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
522 case vect_induction_def
:
523 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
524 oprnd_info
->ops
.quick_push (oprnd
);
528 /* FORNOW: Not supported. */
529 if (dump_enabled_p ())
530 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
531 "Build SLP failed: illegal type of def %T\n",
543 /* If there are already uses of this stmt in a SLP instance then
544 we've committed to the operand order and can't swap it. */
545 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
547 if (dump_enabled_p ())
548 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
549 "Build SLP failed: cannot swap operands of "
550 "shared stmt %G", stmt_info
->stmt
);
554 /* To get rid of this swapping we have to move the stmt code
555 to the SLP tree as well (and gather it here per stmt). */
556 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
557 tree cond
= gimple_assign_rhs1 (stmt
);
558 enum tree_code code
= TREE_CODE (cond
);
563 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
564 &TREE_OPERAND (cond
, 1));
565 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
570 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
571 gimple_assign_rhs3_ptr (stmt
));
572 if (STMT_VINFO_REDUC_IDX (stmt_info
) == 1)
573 STMT_VINFO_REDUC_IDX (stmt_info
) = 2;
574 else if (STMT_VINFO_REDUC_IDX (stmt_info
) == 2)
575 STMT_VINFO_REDUC_IDX (stmt_info
) = 1;
576 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
577 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
578 gcc_assert (code
!= ERROR_MARK
);
579 TREE_SET_CODE (cond
, code
);
584 /* Commutative ops need not reflect swapping, ops are in
587 if (dump_enabled_p ())
588 dump_printf_loc (MSG_NOTE
, vect_location
,
589 "swapped operands to match def types in %G",
597 /* Return true if call statements CALL1 and CALL2 are similar enough
598 to be combined into the same SLP group. */
601 compatible_calls_p (gcall
*call1
, gcall
*call2
)
603 unsigned int nargs
= gimple_call_num_args (call1
);
604 if (nargs
!= gimple_call_num_args (call2
))
607 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
610 if (gimple_call_internal_p (call1
))
612 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
613 TREE_TYPE (gimple_call_lhs (call2
))))
615 for (unsigned int i
= 0; i
< nargs
; ++i
)
616 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
617 TREE_TYPE (gimple_call_arg (call2
, i
))))
622 if (!operand_equal_p (gimple_call_fn (call1
),
623 gimple_call_fn (call2
), 0))
626 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
632 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
633 caller's attempt to find the vector type in STMT_INFO with the narrowest
634 element type. Return true if VECTYPE is nonnull and if it is valid
635 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
636 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
637 vect_build_slp_tree. */
640 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
641 tree vectype
, poly_uint64
*max_nunits
)
645 if (dump_enabled_p ())
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
647 "Build SLP failed: unsupported data-type in %G\n",
649 /* Fatal mismatch. */
653 /* If populating the vector type requires unrolling then fail
654 before adjusting *max_nunits for basic-block vectorization. */
655 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
656 unsigned HOST_WIDE_INT const_nunits
;
657 if (STMT_VINFO_BB_VINFO (stmt_info
)
658 && (!nunits
.is_constant (&const_nunits
)
659 || const_nunits
> group_size
))
661 if (dump_enabled_p ())
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
663 "Build SLP failed: unrolling required "
664 "in basic block SLP\n");
665 /* Fatal mismatch. */
669 /* In case of multiple types we need to detect the smallest type. */
670 vect_update_max_nunits (max_nunits
, vectype
);
674 /* STMTS is a group of GROUP_SIZE SLP statements in which some
675 statements do the same operation as the first statement and in which
676 the others do ALT_STMT_CODE. Return true if we can take one vector
677 of the first operation and one vector of the second and permute them
678 to get the required result. VECTYPE is the type of the vector that
679 would be permuted. */
682 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
683 unsigned int group_size
, tree vectype
,
684 tree_code alt_stmt_code
)
686 unsigned HOST_WIDE_INT count
;
687 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
690 vec_perm_builder
sel (count
, count
, 1);
691 for (unsigned int i
= 0; i
< count
; ++i
)
693 unsigned int elt
= i
;
694 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
695 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
697 sel
.quick_push (elt
);
699 vec_perm_indices
indices (sel
, 2, count
);
700 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
703 /* Verify if the scalar stmts STMTS are isomorphic, require data
704 permutation or are of unsupported types of operation. Return
705 true if they are, otherwise return false and indicate in *MATCHES
706 which stmts are not isomorphic to the first one. If MATCHES[0]
707 is false then this indicates the comparison could not be
708 carried out or the stmts will never be vectorized by SLP.
710 Note COND_EXPR is possibly isomorphic to another one after swapping its
711 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
712 the first stmt by swapping the two operands of comparison; set SWAP[i]
713 to 2 if stmt I is isormorphic to the first stmt by inverting the code
714 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
715 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
718 vect_build_slp_tree_1 (unsigned char *swap
,
719 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
720 poly_uint64
*max_nunits
, bool *matches
,
724 stmt_vec_info first_stmt_info
= stmts
[0];
725 enum tree_code first_stmt_code
= ERROR_MARK
;
726 enum tree_code alt_stmt_code
= ERROR_MARK
;
727 enum tree_code rhs_code
= ERROR_MARK
;
728 enum tree_code first_cond_code
= ERROR_MARK
;
730 bool need_same_oprnds
= false;
731 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
734 machine_mode optab_op2_mode
;
735 machine_mode vec_mode
;
736 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
739 /* For every stmt in NODE find its def stmt/s. */
740 stmt_vec_info stmt_info
;
741 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
743 gimple
*stmt
= stmt_info
->stmt
;
747 if (dump_enabled_p ())
748 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
750 /* Fail to vectorize statements marked as unvectorizable. */
751 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
753 if (dump_enabled_p ())
754 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
755 "Build SLP failed: unvectorizable statement %G",
757 /* Fatal mismatch. */
762 lhs
= gimple_get_lhs (stmt
);
763 if (lhs
== NULL_TREE
)
765 if (dump_enabled_p ())
766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
767 "Build SLP failed: not GIMPLE_ASSIGN nor "
768 "GIMPLE_CALL %G", stmt
);
769 /* Fatal mismatch. */
775 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
778 && !vect_record_max_nunits (stmt_info
, group_size
,
779 nunits_vectype
, max_nunits
)))
781 /* Fatal mismatch. */
786 gcc_assert (vectype
);
788 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
790 rhs_code
= CALL_EXPR
;
792 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
794 else if ((gimple_call_internal_p (call_stmt
)
795 && (!vectorizable_internal_fn_p
796 (gimple_call_internal_fn (call_stmt
))))
797 || gimple_call_tail_p (call_stmt
)
798 || gimple_call_noreturn_p (call_stmt
)
799 || !gimple_call_nothrow_p (call_stmt
)
800 || gimple_call_chain (call_stmt
))
802 if (dump_enabled_p ())
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
804 "Build SLP failed: unsupported call type %G",
806 /* Fatal mismatch. */
813 rhs_code
= gimple_assign_rhs_code (stmt
);
814 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
817 /* Check the operation. */
820 first_stmt_code
= rhs_code
;
822 /* Shift arguments should be equal in all the packed stmts for a
823 vector shift with scalar shift operand. */
824 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
825 || rhs_code
== LROTATE_EXPR
826 || rhs_code
== RROTATE_EXPR
)
828 if (vectype
== boolean_type_node
)
830 if (dump_enabled_p ())
831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
832 "Build SLP failed: shift of a"
834 /* Fatal mismatch. */
839 vec_mode
= TYPE_MODE (vectype
);
841 /* First see if we have a vector/vector shift. */
842 optab
= optab_for_tree_code (rhs_code
, vectype
,
846 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
848 /* No vector/vector shift, try for a vector/scalar shift. */
849 optab
= optab_for_tree_code (rhs_code
, vectype
,
854 if (dump_enabled_p ())
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
856 "Build SLP failed: no optab.\n");
857 /* Fatal mismatch. */
861 icode
= (int) optab_handler (optab
, vec_mode
);
862 if (icode
== CODE_FOR_nothing
)
864 if (dump_enabled_p ())
865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
867 "op not supported by target.\n");
868 /* Fatal mismatch. */
872 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
873 if (!VECTOR_MODE_P (optab_op2_mode
))
875 need_same_oprnds
= true;
876 first_op1
= gimple_assign_rhs2 (stmt
);
880 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
882 need_same_oprnds
= true;
883 first_op1
= gimple_assign_rhs2 (stmt
);
888 if (first_stmt_code
!= rhs_code
889 && alt_stmt_code
== ERROR_MARK
)
890 alt_stmt_code
= rhs_code
;
891 if (first_stmt_code
!= rhs_code
892 && (first_stmt_code
!= IMAGPART_EXPR
893 || rhs_code
!= REALPART_EXPR
)
894 && (first_stmt_code
!= REALPART_EXPR
895 || rhs_code
!= IMAGPART_EXPR
)
896 /* Handle mismatches in plus/minus by computing both
897 and merging the results. */
898 && !((first_stmt_code
== PLUS_EXPR
899 || first_stmt_code
== MINUS_EXPR
)
900 && (alt_stmt_code
== PLUS_EXPR
901 || alt_stmt_code
== MINUS_EXPR
)
902 && rhs_code
== alt_stmt_code
)
903 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
904 && (first_stmt_code
== ARRAY_REF
905 || first_stmt_code
== BIT_FIELD_REF
906 || first_stmt_code
== INDIRECT_REF
907 || first_stmt_code
== COMPONENT_REF
908 || first_stmt_code
== MEM_REF
)))
910 if (dump_enabled_p ())
912 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
913 "Build SLP failed: different operation "
915 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
916 "original stmt %G", first_stmt_info
->stmt
);
923 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
925 if (dump_enabled_p ())
926 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
927 "Build SLP failed: different shift "
928 "arguments in %G", stmt
);
933 if (!load_p
&& rhs_code
== CALL_EXPR
)
935 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
936 as_a
<gcall
*> (stmt
)))
938 if (dump_enabled_p ())
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
940 "Build SLP failed: different calls in %G",
948 /* Grouped store or load. */
949 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
951 if (REFERENCE_CLASS_P (lhs
))
959 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
962 /* Check that there are no loads from different interleaving
963 chains in the same node. */
964 if (prev_first_load
!= first_load
)
966 if (dump_enabled_p ())
967 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
969 "Build SLP failed: different "
970 "interleaving chains in one node %G",
977 prev_first_load
= first_load
;
979 } /* Grouped access. */
984 /* Not grouped load. */
985 if (dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
987 "Build SLP failed: not grouped load %G", stmt
);
989 /* FORNOW: Not grouped loads are not supported. */
990 /* Fatal mismatch. */
995 /* Not memory operation. */
996 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
997 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
998 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
999 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1000 && rhs_code
!= VIEW_CONVERT_EXPR
1001 && rhs_code
!= CALL_EXPR
)
1003 if (dump_enabled_p ())
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1005 "Build SLP failed: operation unsupported %G",
1007 /* Fatal mismatch. */
1012 if (rhs_code
== COND_EXPR
)
1014 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1015 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1016 enum tree_code swap_code
= ERROR_MARK
;
1017 enum tree_code invert_code
= ERROR_MARK
;
1020 first_cond_code
= TREE_CODE (cond_expr
);
1021 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1023 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1024 swap_code
= swap_tree_comparison (cond_code
);
1025 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1028 if (first_cond_code
== cond_code
)
1030 /* Isomorphic can be achieved by swapping. */
1031 else if (first_cond_code
== swap_code
)
1033 /* Isomorphic can be achieved by inverting. */
1034 else if (first_cond_code
== invert_code
)
1038 if (dump_enabled_p ())
1039 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1040 "Build SLP failed: different"
1041 " operation %G", stmt
);
1051 for (i
= 0; i
< group_size
; ++i
)
1055 /* If we allowed a two-operation SLP node verify the target can cope
1056 with the permute we are going to use. */
1057 if (alt_stmt_code
!= ERROR_MARK
1058 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1060 if (vectype
== boolean_type_node
1061 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1062 vectype
, alt_stmt_code
))
1064 for (i
= 0; i
< group_size
; ++i
)
1065 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1068 if (dump_enabled_p ())
1070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1071 "Build SLP failed: different operation "
1072 "in stmt %G", stmts
[i
]->stmt
);
1073 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1074 "original stmt %G", first_stmt_info
->stmt
);
1079 *two_operators
= true;
1085 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1086 Note we never remove apart from at destruction time so we do not
1087 need a special value for deleted that differs from empty. */
1090 typedef vec
<stmt_vec_info
> value_type
;
1091 typedef vec
<stmt_vec_info
> compare_type
;
1092 static inline hashval_t
hash (value_type
);
1093 static inline bool equal (value_type existing
, value_type candidate
);
1094 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1095 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1096 static inline void mark_empty (value_type
&x
) { x
.release (); }
1097 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1098 static inline void remove (value_type
&x
) { x
.release (); }
1101 bst_traits::hash (value_type x
)
1104 for (unsigned i
= 0; i
< x
.length (); ++i
)
1105 h
.add_int (gimple_uid (x
[i
]->stmt
));
1109 bst_traits::equal (value_type existing
, value_type candidate
)
1111 if (existing
.length () != candidate
.length ())
1113 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1114 if (existing
[i
] != candidate
[i
])
1119 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1120 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1121 scalar_stmts_to_slp_tree_map_t
;
1124 vect_build_slp_tree_2 (vec_info
*vinfo
,
1125 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1126 poly_uint64
*max_nunits
,
1127 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1128 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1131 vect_build_slp_tree (vec_info
*vinfo
,
1132 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1133 poly_uint64
*max_nunits
,
1134 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1135 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1137 if (slp_tree
*leader
= bst_map
->get (stmts
))
1139 if (dump_enabled_p ())
1140 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1141 *leader
? "" : "failed ", *leader
);
1144 (*leader
)->refcnt
++;
1145 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1149 poly_uint64 this_max_nunits
= 1;
1150 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1151 matches
, npermutes
, tree_size
, bst_map
);
1154 res
->max_nunits
= this_max_nunits
;
1155 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1156 /* Keep a reference for the bst_map use. */
1159 bst_map
->put (stmts
.copy (), res
);
1163 /* Recursively build an SLP tree starting from NODE.
1164 Fail (and return a value not equal to zero) if def-stmts are not
1165 isomorphic, require data permutation or are of unsupported types of
1166 operation. Otherwise, return 0.
1167 The value returned is the depth in the SLP tree where a mismatch
1171 vect_build_slp_tree_2 (vec_info
*vinfo
,
1172 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1173 poly_uint64
*max_nunits
,
1174 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1175 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1177 unsigned nops
, i
, this_tree_size
= 0;
1178 poly_uint64 this_max_nunits
= *max_nunits
;
1183 stmt_vec_info stmt_info
= stmts
[0];
1184 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1185 nops
= gimple_call_num_args (stmt
);
1186 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1188 nops
= gimple_num_ops (stmt
) - 1;
1189 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1192 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1197 /* If the SLP node is a PHI (induction or reduction), terminate
1199 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1201 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1202 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1203 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1206 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1207 /* Induction from different IVs is not supported. */
1208 if (def_type
== vect_induction_def
)
1210 stmt_vec_info other_info
;
1211 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1212 if (stmt_info
!= other_info
)
1215 else if (def_type
== vect_reduction_def
1216 || def_type
== vect_double_reduction_def
1217 || def_type
== vect_nested_cycle
)
1219 /* Else def types have to match. */
1220 stmt_vec_info other_info
;
1221 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1222 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1228 node
= vect_create_new_slp_node (stmts
);
1233 bool two_operators
= false;
1234 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1235 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1236 &this_max_nunits
, matches
, &two_operators
))
1239 /* If the SLP node is a load, terminate the recursion unless masked. */
1240 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1241 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1243 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1246 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1251 *max_nunits
= this_max_nunits
;
1253 node
= vect_create_new_slp_node (stmts
);
1258 /* Get at the operands, verifying they are compatible. */
1259 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1260 slp_oprnd_info oprnd_info
;
1261 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1263 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1264 stmts
, i
, &oprnds_info
);
1266 matches
[(res
== -1) ? 0 : i
] = false;
1270 for (i
= 0; i
< group_size
; ++i
)
1273 vect_free_oprnd_info (oprnds_info
);
1277 auto_vec
<slp_tree
, 4> children
;
1279 stmt_info
= stmts
[0];
1281 /* Create SLP_TREE nodes for the definition node/s. */
1282 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1285 unsigned old_tree_size
= this_tree_size
;
1288 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1290 /* COND_EXPR have one too many eventually if the condition
1292 gcc_assert (i
== 3 && nops
== 4);
1296 if (oprnd_info
->first_dt
!= vect_internal_def
1297 && oprnd_info
->first_dt
!= vect_reduction_def
1298 && oprnd_info
->first_dt
!= vect_induction_def
)
1300 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1301 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1302 oprnd_info
->ops
= vNULL
;
1303 children
.safe_push (invnode
);
1307 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1308 group_size
, &this_max_nunits
,
1310 &this_tree_size
, bst_map
)) != NULL
)
1312 /* If we have all children of child built up from scalars then just
1313 throw that away and build it up this node from scalars. */
1314 if (is_a
<bb_vec_info
> (vinfo
)
1315 && !SLP_TREE_CHILDREN (child
).is_empty ()
1316 /* ??? Rejecting patterns this way doesn't work. We'd have to
1317 do extra work to cancel the pattern so the uses see the
1319 && !oprnd_info
->any_pattern
)
1321 slp_tree grandchild
;
1323 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1324 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1329 this_tree_size
= old_tree_size
;
1330 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1331 vect_free_slp_tree (grandchild
, false);
1332 SLP_TREE_CHILDREN (child
).truncate (0);
1334 if (dump_enabled_p ())
1335 dump_printf_loc (MSG_NOTE
, vect_location
,
1336 "Building parent vector operands from "
1337 "scalars instead\n");
1338 oprnd_info
->def_stmts
= vNULL
;
1339 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1340 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1341 oprnd_info
->ops
= vNULL
;
1343 children
.safe_push (child
);
1348 oprnd_info
->def_stmts
= vNULL
;
1349 children
.safe_push (child
);
1353 /* If the SLP build failed fatally and we analyze a basic-block
1354 simply treat nodes we fail to build as externally defined
1355 (and thus build vectors from the scalar defs).
1356 The cost model will reject outright expensive cases.
1357 ??? This doesn't treat cases where permutation ultimatively
1358 fails (or we don't try permutation below). Ideally we'd
1359 even compute a permutation that will end up with the maximum
1361 if (is_a
<bb_vec_info
> (vinfo
)
1363 /* ??? Rejecting patterns this way doesn't work. We'd have to
1364 do extra work to cancel the pattern so the uses see the
1366 && !is_pattern_stmt_p (stmt_info
)
1367 && !oprnd_info
->any_pattern
)
1369 if (dump_enabled_p ())
1370 dump_printf_loc (MSG_NOTE
, vect_location
,
1371 "Building vector operands from scalars\n");
1373 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1374 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1375 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1376 children
.safe_push (child
);
1377 oprnd_info
->ops
= vNULL
;
1378 oprnd_info
->def_stmts
= vNULL
;
1382 /* If the SLP build for operand zero failed and operand zero
1383 and one can be commutated try that for the scalar stmts
1384 that failed the match. */
1386 /* A first scalar stmt mismatch signals a fatal mismatch. */
1388 /* ??? For COND_EXPRs we can swap the comparison operands
1389 as well as the arms under some constraints. */
1391 && oprnds_info
[1]->first_dt
== vect_internal_def
1392 && is_gimple_assign (stmt_info
->stmt
)
1393 /* Swapping operands for reductions breaks assumptions later on. */
1394 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1395 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1396 /* Do so only if the number of not successful permutes was nor more
1397 than a cut-ff as re-trying the recursive match on
1398 possibly each level of the tree would expose exponential
1402 /* See whether we can swap the matching or the non-matching
1404 bool swap_not_matching
= true;
1407 for (j
= 0; j
< group_size
; ++j
)
1409 if (matches
[j
] != !swap_not_matching
)
1411 stmt_vec_info stmt_info
= stmts
[j
];
1412 /* Verify if we can swap operands of this stmt. */
1413 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1415 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1417 if (!swap_not_matching
)
1419 swap_not_matching
= false;
1424 while (j
!= group_size
);
1426 /* Swap mismatched definition stmts. */
1427 if (dump_enabled_p ())
1428 dump_printf_loc (MSG_NOTE
, vect_location
,
1429 "Re-trying with swapped operands of stmts ");
1430 for (j
= 0; j
< group_size
; ++j
)
1431 if (matches
[j
] == !swap_not_matching
)
1433 std::swap (oprnds_info
[0]->def_stmts
[j
],
1434 oprnds_info
[1]->def_stmts
[j
]);
1435 std::swap (oprnds_info
[0]->ops
[j
],
1436 oprnds_info
[1]->ops
[j
]);
1437 if (dump_enabled_p ())
1438 dump_printf (MSG_NOTE
, "%d ", j
);
1440 if (dump_enabled_p ())
1441 dump_printf (MSG_NOTE
, "\n");
1442 /* And try again with scratch 'matches' ... */
1443 bool *tem
= XALLOCAVEC (bool, group_size
);
1444 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1445 group_size
, &this_max_nunits
,
1447 &this_tree_size
, bst_map
)) != NULL
)
1449 /* If we have all children of child built up from scalars then
1450 just throw that away and build it up this node from scalars. */
1451 if (is_a
<bb_vec_info
> (vinfo
)
1452 && !SLP_TREE_CHILDREN (child
).is_empty ()
1453 /* ??? Rejecting patterns this way doesn't work. We'd have
1454 to do extra work to cancel the pattern so the uses see the
1456 && !oprnd_info
->any_pattern
)
1459 slp_tree grandchild
;
1461 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1462 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1467 this_tree_size
= old_tree_size
;
1468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1469 vect_free_slp_tree (grandchild
, false);
1470 SLP_TREE_CHILDREN (child
).truncate (0);
1472 if (dump_enabled_p ())
1473 dump_printf_loc (MSG_NOTE
, vect_location
,
1474 "Building parent vector operands from "
1475 "scalars instead\n");
1476 oprnd_info
->def_stmts
= vNULL
;
1477 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1478 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1479 oprnd_info
->ops
= vNULL
;
1481 children
.safe_push (child
);
1486 oprnd_info
->def_stmts
= vNULL
;
1487 children
.safe_push (child
);
1495 gcc_assert (child
== NULL
);
1496 FOR_EACH_VEC_ELT (children
, j
, child
)
1497 vect_free_slp_tree (child
, false);
1498 vect_free_oprnd_info (oprnds_info
);
1502 vect_free_oprnd_info (oprnds_info
);
1504 *tree_size
+= this_tree_size
+ 1;
1505 *max_nunits
= this_max_nunits
;
1507 node
= vect_create_new_slp_node (stmts
);
1508 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1509 SLP_TREE_CHILDREN (node
).splice (children
);
1513 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1516 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1517 slp_tree node
, hash_set
<slp_tree
> &visited
)
1520 stmt_vec_info stmt_info
;
1524 if (visited
.add (node
))
1527 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1528 dump_user_location_t user_loc
= loc
.get_user_location ();
1529 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1530 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1532 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1535 estimated_poly_value (node
->max_nunits
));
1536 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1537 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1538 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1541 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1542 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1543 dump_printf (metadata
, "%T%s ", op
,
1544 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1545 dump_printf (metadata
, "}\n");
1547 if (SLP_TREE_CHILDREN (node
).is_empty ())
1549 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1550 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1551 dump_printf (dump_kind
, " %p", (void *)child
);
1552 dump_printf (dump_kind
, "\n");
1553 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1554 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1558 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1561 hash_set
<slp_tree
> visited
;
1562 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1565 /* Mark the tree rooted at NODE with PURE_SLP. */
1568 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1571 stmt_vec_info stmt_info
;
1574 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1577 if (visited
.add (node
))
1580 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1581 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1583 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1584 vect_mark_slp_stmts (child
, visited
);
1588 vect_mark_slp_stmts (slp_tree node
)
1590 hash_set
<slp_tree
> visited
;
1591 vect_mark_slp_stmts (node
, visited
);
1594 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1597 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1600 stmt_vec_info stmt_info
;
1603 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1606 if (visited
.add (node
))
1609 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1611 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1612 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1613 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1617 vect_mark_slp_stmts_relevant (child
, visited
);
1621 vect_mark_slp_stmts_relevant (slp_tree node
)
1623 hash_set
<slp_tree
> visited
;
1624 vect_mark_slp_stmts_relevant (node
, visited
);
1628 /* Rearrange the statements of NODE according to PERMUTATION. */
1631 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1632 vec
<unsigned> permutation
,
1633 hash_set
<slp_tree
> &visited
)
1638 if (visited
.add (node
))
1641 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1642 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1644 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1646 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1647 vec
<stmt_vec_info
> tmp_stmts
;
1648 tmp_stmts
.create (group_size
);
1649 tmp_stmts
.quick_grow (group_size
);
1650 stmt_vec_info stmt_info
;
1651 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1652 tmp_stmts
[permutation
[i
]] = stmt_info
;
1653 SLP_TREE_SCALAR_STMTS (node
).release ();
1654 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1656 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1658 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1660 tmp_ops
.create (group_size
);
1661 tmp_ops
.quick_grow (group_size
);
1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1664 tmp_ops
[permutation
[i
]] = op
;
1665 SLP_TREE_SCALAR_OPS (node
).release ();
1666 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1671 /* Attempt to reorder stmts in a reduction chain so that we don't
1672 require any load permutation. Return true if that was possible,
1673 otherwise return false. */
1676 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1678 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1681 slp_tree node
, load
;
1683 /* Compare all the permutation sequences to the first one. We know
1684 that at least one load is permuted. */
1685 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1686 if (!node
->load_permutation
.exists ())
1688 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1690 if (!load
->load_permutation
.exists ())
1692 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1693 if (lidx
!= node
->load_permutation
[j
])
1697 /* Check that the loads in the first sequence are different and there
1698 are no gaps between them. */
1699 auto_sbitmap
load_index (group_size
);
1700 bitmap_clear (load_index
);
1701 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1703 if (lidx
>= group_size
)
1705 if (bitmap_bit_p (load_index
, lidx
))
1708 bitmap_set_bit (load_index
, lidx
);
1710 for (i
= 0; i
< group_size
; i
++)
1711 if (!bitmap_bit_p (load_index
, i
))
1714 /* This permutation is valid for reduction. Since the order of the
1715 statements in the nodes is not important unless they are memory
1716 accesses, we can rearrange the statements in all the nodes
1717 according to the order of the loads. */
1718 hash_set
<slp_tree
> visited
;
1719 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1720 node
->load_permutation
, visited
);
1722 /* We are done, no actual permutations need to be generated. */
1723 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1724 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1726 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1727 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1728 /* But we have to keep those permutations that are required because
1729 of handling of gaps. */
1730 if (known_eq (unrolling_factor
, 1U)
1731 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1732 && DR_GROUP_GAP (first_stmt_info
) == 0))
1733 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1735 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1736 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1742 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1745 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1746 hash_set
<slp_tree
> &visited
)
1748 if (visited
.add (node
))
1751 if (SLP_TREE_CHILDREN (node
).length () == 0)
1753 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1755 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1756 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1757 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1758 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1764 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1765 vect_gather_slp_loads (inst
, child
, visited
);
1770 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1772 hash_set
<slp_tree
> visited
;
1773 vect_gather_slp_loads (inst
, node
, visited
);
1776 /* Check if the required load permutations in the SLP instance
1777 SLP_INSTN are supported. */
1780 vect_supported_load_permutation_p (slp_instance slp_instn
)
1782 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1783 unsigned int i
, j
, k
, next
;
1786 if (dump_enabled_p ())
1788 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1789 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1790 if (node
->load_permutation
.exists ())
1791 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1792 dump_printf (MSG_NOTE
, "%d ", next
);
1794 for (k
= 0; k
< group_size
; ++k
)
1795 dump_printf (MSG_NOTE
, "%d ", k
);
1796 dump_printf (MSG_NOTE
, "\n");
1799 /* In case of reduction every load permutation is allowed, since the order
1800 of the reduction statements is not important (as opposed to the case of
1801 grouped stores). The only condition we need to check is that all the
1802 load nodes are of the same size and have the same permutation (and then
1803 rearrange all the nodes of the SLP instance according to this
1806 /* Check that all the load nodes are of the same size. */
1807 /* ??? Can't we assert this? */
1808 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1809 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1812 node
= SLP_INSTANCE_TREE (slp_instn
);
1813 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1815 /* Reduction (there are no data-refs in the root).
1816 In reduction chain the order of the loads is not important. */
1817 if (!STMT_VINFO_DATA_REF (stmt_info
)
1818 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1819 vect_attempt_slp_rearrange_stmts (slp_instn
);
1821 /* In basic block vectorization we allow any subchain of an interleaving
1823 FORNOW: not supported in loop SLP because of realignment compications. */
1824 if (STMT_VINFO_BB_VINFO (stmt_info
))
1826 /* Check whether the loads in an instance form a subchain and thus
1827 no permutation is necessary. */
1828 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1830 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1832 bool subchain_p
= true;
1833 stmt_vec_info next_load_info
= NULL
;
1834 stmt_vec_info load_info
;
1835 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1838 && (next_load_info
!= load_info
1839 || DR_GROUP_GAP (load_info
) != 1))
1844 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1847 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1850 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1851 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1852 unsigned HOST_WIDE_INT nunits
;
1853 unsigned k
, maxk
= 0;
1854 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1857 /* In BB vectorization we may not actually use a loaded vector
1858 accessing elements in excess of DR_GROUP_SIZE. */
1859 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1860 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1861 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1863 if (dump_enabled_p ())
1864 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1865 "BB vectorization with gaps at the end of "
1866 "a load is not supported\n");
1870 /* Verify the permutation can be generated. */
1873 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1874 1, slp_instn
, true, &n_perms
))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1879 "unsupported load permutation\n");
1887 /* For loop vectorization verify we can generate the permutation. Be
1888 conservative about the vectorization factor, there are permutations
1889 that will use three vector inputs only starting from a specific factor
1890 and the vectorization factor is not yet final.
1891 ??? The SLP instance unrolling factor might not be the maximum one. */
1894 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1895 LOOP_VINFO_VECT_FACTOR
1896 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1897 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1898 if (node
->load_permutation
.exists ()
1899 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1900 slp_instn
, true, &n_perms
))
1907 /* Find the last store in SLP INSTANCE. */
1910 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1912 stmt_vec_info last
= NULL
;
1913 stmt_vec_info stmt_vinfo
;
1915 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1917 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1918 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1924 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1925 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1926 (also containing the first GROUP1_SIZE stmts, since stores are
1927 consecutive), the second containing the remainder.
1928 Return the first stmt in the second group. */
1930 static stmt_vec_info
1931 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1933 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1934 gcc_assert (group1_size
> 0);
1935 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1936 gcc_assert (group2_size
> 0);
1937 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1939 stmt_vec_info stmt_info
= first_vinfo
;
1940 for (unsigned i
= group1_size
; i
> 1; i
--)
1942 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1943 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1945 /* STMT is now the last element of the first group. */
1946 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1947 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1949 DR_GROUP_SIZE (group2
) = group2_size
;
1950 for (stmt_info
= group2
; stmt_info
;
1951 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1953 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1954 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1957 /* For the second group, the DR_GROUP_GAP is that before the original group,
1958 plus skipping over the first vector. */
1959 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1961 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1962 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1964 if (dump_enabled_p ())
1965 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1966 group1_size
, group2_size
);
1971 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1972 statements and a vector of NUNITS elements. */
1975 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1977 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1980 /* Analyze an SLP instance starting from a group of grouped stores. Call
1981 vect_build_slp_tree to build a tree of packed stmts if possible.
1982 Return FALSE if it's impossible to SLP any stmt in the loop. */
1985 vect_analyze_slp_instance (vec_info
*vinfo
,
1986 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1988 slp_instance new_instance
;
1990 unsigned int group_size
;
1991 tree vectype
, scalar_type
= NULL_TREE
;
1993 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1994 vec
<stmt_vec_info
> scalar_stmts
;
1995 bool constructor
= false;
1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1999 scalar_type
= TREE_TYPE (DR_REF (dr
));
2000 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
2001 group_size
= DR_GROUP_SIZE (stmt_info
);
2003 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2005 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2006 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2007 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2009 else if (is_gimple_assign (stmt_info
->stmt
)
2010 && gimple_assign_rhs_code (stmt_info
->stmt
) == CONSTRUCTOR
)
2012 vectype
= TREE_TYPE (gimple_assign_rhs1 (stmt_info
->stmt
));
2013 group_size
= CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info
->stmt
));
2018 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2019 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2020 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2025 if (dump_enabled_p ())
2026 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2027 "Build SLP failed: unsupported data-type %T\n",
2032 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2034 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2035 scalar_stmts
.create (group_size
);
2036 stmt_vec_info next_info
= stmt_info
;
2037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2039 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2042 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2043 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2046 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2048 /* Collect the reduction stmts and store them in
2049 SLP_TREE_SCALAR_STMTS. */
2052 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2053 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2055 /* Mark the first element of the reduction chain as reduction to properly
2056 transform the node. In the reduction analysis phase only the last
2057 element of the chain is marked as reduction. */
2058 STMT_VINFO_DEF_TYPE (stmt_info
)
2059 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2060 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2061 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2063 else if (constructor
)
2065 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2067 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2069 if (TREE_CODE (val
) == SSA_NAME
)
2071 gimple
* def
= SSA_NAME_DEF_STMT (val
);
2072 stmt_vec_info def_info
= vinfo
->lookup_stmt (def
);
2073 /* Value is defined in another basic block. */
2076 scalar_stmts
.safe_push (def_info
);
2084 /* Collect reduction statements. */
2085 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2086 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2087 scalar_stmts
.safe_push (next_info
);
2090 /* Build the tree for the SLP instance. */
2091 bool *matches
= XALLOCAVEC (bool, group_size
);
2092 unsigned npermutes
= 0;
2093 scalar_stmts_to_slp_tree_map_t
*bst_map
2094 = new scalar_stmts_to_slp_tree_map_t ();
2095 poly_uint64 max_nunits
= nunits
;
2096 unsigned tree_size
= 0;
2097 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2098 &max_nunits
, matches
, &npermutes
,
2099 &tree_size
, bst_map
);
2100 /* The map keeps a reference on SLP nodes built, release that. */
2101 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2102 it
!= bst_map
->end (); ++it
)
2104 vect_free_slp_tree ((*it
).second
, false);
2108 /* If this is a reduction chain with a conversion in front
2109 amend the SLP tree with a node for that. */
2111 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2112 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2114 /* Get at the conversion stmt - we know it's the single use
2115 of the last stmt of the reduction chain. */
2116 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2117 use_operand_p use_p
;
2119 bool r
= single_imm_use (gimple_assign_lhs (tem
), &use_p
, &use_stmt
);
2121 next_info
= vinfo
->lookup_stmt (use_stmt
);
2122 next_info
= vect_stmt_to_vectorize (next_info
);
2123 scalar_stmts
= vNULL
;
2124 scalar_stmts
.create (group_size
);
2125 for (unsigned i
= 0; i
< group_size
; ++i
)
2126 scalar_stmts
.quick_push (next_info
);
2127 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2128 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2130 /* We also have to fake this conversion stmt as SLP reduction group
2131 so we don't have to mess with too much code elsewhere. */
2132 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2133 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2136 /* Calculate the unrolling factor based on the smallest type. */
2137 poly_uint64 unrolling_factor
2138 = calculate_unrolling_factor (max_nunits
, group_size
);
2140 if (maybe_ne (unrolling_factor
, 1U)
2141 && is_a
<bb_vec_info
> (vinfo
))
2143 unsigned HOST_WIDE_INT const_max_nunits
;
2144 if (!max_nunits
.is_constant (&const_max_nunits
)
2145 || const_max_nunits
> group_size
)
2147 if (dump_enabled_p ())
2148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2149 "Build SLP failed: store group "
2150 "size not a multiple of the vector size "
2151 "in basic block SLP\n");
2152 vect_free_slp_tree (node
, false);
2155 /* Fatal mismatch. */
2156 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2157 vect_free_slp_tree (node
, false);
2161 /* Create a new SLP instance. */
2162 new_instance
= XNEW (class _slp_instance
);
2163 SLP_INSTANCE_TREE (new_instance
) = node
;
2164 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2165 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2166 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2167 SLP_INSTANCE_ROOT_STMT (new_instance
) = constructor
? stmt_info
: NULL
;
2169 vect_gather_slp_loads (new_instance
, node
);
2170 if (dump_enabled_p ())
2171 dump_printf_loc (MSG_NOTE
, vect_location
,
2172 "SLP size %u vs. limit %u.\n",
2173 tree_size
, max_tree_size
);
2175 /* Compute the load permutation. */
2177 bool loads_permuted
= false;
2178 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2180 vec
<unsigned> load_permutation
;
2182 stmt_vec_info load_info
;
2183 bool this_load_permuted
= false;
2184 load_permutation
.create (group_size
);
2185 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2186 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2187 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2189 int load_place
= vect_get_place_in_interleaving_chain
2190 (load_info
, first_stmt_info
);
2191 gcc_assert (load_place
!= -1);
2192 if (load_place
!= j
)
2193 this_load_permuted
= true;
2194 load_permutation
.safe_push (load_place
);
2196 if (!this_load_permuted
2197 /* The load requires permutation when unrolling exposes
2198 a gap either because the group is larger than the SLP
2199 group-size or because there is a gap between the groups. */
2200 && (known_eq (unrolling_factor
, 1U)
2201 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2202 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2204 load_permutation
.release ();
2207 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2208 loads_permuted
= true;
2213 if (!vect_supported_load_permutation_p (new_instance
))
2215 if (dump_enabled_p ())
2216 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2217 "Build SLP failed: unsupported load "
2218 "permutation %G", stmt_info
->stmt
);
2219 vect_free_slp_instance (new_instance
, false);
2224 /* If the loads and stores can be handled with load/store-lan
2225 instructions do not generate this SLP instance. */
2226 if (is_a
<loop_vec_info
> (vinfo
)
2228 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2231 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2233 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2234 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2235 /* Use SLP for strided accesses (or if we can't load-lanes). */
2236 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2237 || ! vect_load_lanes_supported
2238 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2239 DR_GROUP_SIZE (stmt_vinfo
), false))
2242 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2244 if (dump_enabled_p ())
2245 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2246 "Built SLP cancelled: can use "
2247 "load/store-lanes\n");
2248 vect_free_slp_instance (new_instance
, false);
2253 vinfo
->slp_instances
.safe_push (new_instance
);
2255 if (dump_enabled_p ())
2257 dump_printf_loc (MSG_NOTE
, vect_location
,
2258 "Final SLP tree for instance:\n");
2259 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2267 /* Failed to SLP. */
2268 /* Free the allocated memory. */
2269 scalar_stmts
.release ();
2272 /* For basic block SLP, try to break the group up into multiples of the
2274 unsigned HOST_WIDE_INT const_nunits
;
2275 if (is_a
<bb_vec_info
> (vinfo
)
2276 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2277 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2278 && nunits
.is_constant (&const_nunits
))
2280 /* We consider breaking the group only on VF boundaries from the existing
2282 for (i
= 0; i
< group_size
; i
++)
2283 if (!matches
[i
]) break;
2285 if (i
>= const_nunits
&& i
< group_size
)
2287 /* Split into two groups at the first vector boundary before i. */
2288 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2289 unsigned group1_size
= i
& ~(const_nunits
- 1);
2291 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2293 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2295 /* If the first non-match was in the middle of a vector,
2296 skip the rest of that vector. */
2297 if (group1_size
< i
)
2299 i
= group1_size
+ const_nunits
;
2301 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2304 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2307 /* Even though the first vector did not all match, we might be able to SLP
2308 (some) of the remainder. FORNOW ignore this possibility. */
2315 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2316 trees of packed scalar stmts if SLP is possible. */
2319 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2322 stmt_vec_info first_element
;
2324 DUMP_VECT_SCOPE ("vect_analyze_slp");
2326 /* Find SLP sequences starting from groups of grouped stores. */
2327 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2328 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2330 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2332 if (loop_vinfo
->reduction_chains
.length () > 0)
2334 /* Find SLP sequences starting from reduction chains. */
2335 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2336 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2339 /* Dissolve reduction chain group. */
2340 stmt_vec_info vinfo
= first_element
;
2341 stmt_vec_info last
= NULL
;
2344 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2345 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2346 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2350 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2351 /* It can be still vectorized as part of an SLP reduction. */
2352 loop_vinfo
->reductions
.safe_push (last
);
2356 /* Find SLP sequences starting from groups of reductions. */
2357 if (loop_vinfo
->reductions
.length () > 1)
2358 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2362 return opt_result::success ();
2366 /* For each possible SLP instance decide whether to SLP it and calculate overall
2367 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2368 least one instance. */
2371 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2374 poly_uint64 unrolling_factor
= 1;
2375 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2376 slp_instance instance
;
2377 int decided_to_slp
= 0;
2379 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2381 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2383 /* FORNOW: SLP if you can. */
2384 /* All unroll factors have the form vinfo->vector_size * X for some
2385 rational X, so they must have a common multiple. */
2387 = force_common_multiple (unrolling_factor
,
2388 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2390 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2391 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2392 loop-based vectorization. Such stmts will be marked as HYBRID. */
2393 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2397 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2399 if (decided_to_slp
&& dump_enabled_p ())
2401 dump_printf_loc (MSG_NOTE
, vect_location
,
2402 "Decided to SLP %d instances. Unrolling factor ",
2404 dump_dec (MSG_NOTE
, unrolling_factor
);
2405 dump_printf (MSG_NOTE
, "\n");
2408 return (decided_to_slp
> 0);
2412 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2413 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2416 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2417 hash_map
<slp_tree
, unsigned> &visited
)
2419 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2420 imm_use_iterator imm_iter
;
2422 stmt_vec_info use_vinfo
;
2424 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2427 /* We need to union stype over the incoming graph edges but we still
2428 want to limit recursion to stay O(N+E). */
2429 bool only_edge
= (++visited
.get_or_insert (node
) < node
->refcnt
);
2431 /* Propagate hybrid down the SLP tree. */
2432 if (stype
== hybrid
)
2434 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2436 else if (!only_edge
)
2438 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2439 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2440 /* If we get a pattern stmt here we have to use the LHS of the
2441 original stmt for immediate uses. */
2442 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2444 if (gimple_code (stmt
) == GIMPLE_PHI
)
2445 def
= gimple_phi_result (stmt
);
2447 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2449 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2451 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2454 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2455 if (!STMT_SLP_TYPE (use_vinfo
)
2456 && (STMT_VINFO_RELEVANT (use_vinfo
)
2457 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2458 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2459 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2461 if (dump_enabled_p ())
2462 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2463 "def in non-SLP stmt: %G", use_stmt
);
2470 && !HYBRID_SLP_STMT (stmt_vinfo
))
2472 if (dump_enabled_p ())
2473 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2475 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2479 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2480 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2481 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2482 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2486 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2488 hash_map
<slp_tree
, unsigned> visited
;
2489 vect_detect_hybrid_slp_stmts (node
, i
, stype
, visited
);
2492 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2495 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2497 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2498 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2503 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2504 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2506 if (dump_enabled_p ())
2507 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2508 def_stmt_info
->stmt
);
2509 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2516 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2519 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2520 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2521 /* If the stmt is in a SLP instance then this isn't a reason
2522 to mark use definitions in other SLP instances as hybrid. */
2523 if (! STMT_SLP_TYPE (use_vinfo
)
2524 && (STMT_VINFO_RELEVANT (use_vinfo
)
2525 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2526 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2527 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2534 /* Find stmts that must be both vectorized and SLPed. */
2537 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2540 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2541 slp_instance instance
;
2543 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2545 /* First walk all pattern stmt in the loop and mark defs of uses as
2546 hybrid because immediate uses in them are not recorded. */
2547 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2549 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2550 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2553 gimple
*stmt
= gsi_stmt (gsi
);
2554 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2555 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2558 memset (&wi
, 0, sizeof (wi
));
2559 wi
.info
= loop_vinfo
;
2560 gimple_stmt_iterator gsi2
2561 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2562 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2563 vect_detect_hybrid_slp_1
, &wi
);
2564 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2565 vect_detect_hybrid_slp_2
,
2566 vect_detect_hybrid_slp_1
, &wi
);
2571 /* Then walk the SLP instance trees marking stmts with uses in
2572 non-SLP stmts as hybrid, also propagating hybrid down the
2573 SLP tree, collecting the above info on-the-fly. */
2574 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2576 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2577 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2583 /* Initialize a bb_vec_info struct for the statements between
2584 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2586 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2587 gimple_stmt_iterator region_end_in
,
2588 vec_info_shared
*shared
)
2589 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2590 bb (gsi_bb (region_begin_in
)),
2591 region_begin (region_begin_in
),
2592 region_end (region_end_in
)
2594 gimple_stmt_iterator gsi
;
2596 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2599 gimple
*stmt
= gsi_stmt (gsi
);
2600 gimple_set_uid (stmt
, 0);
2608 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2609 stmts in the basic block. */
2611 _bb_vec_info::~_bb_vec_info ()
2613 for (gimple_stmt_iterator si
= region_begin
;
2614 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2615 /* Reset region marker. */
2616 gimple_set_uid (gsi_stmt (si
), -1);
2621 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2622 given then that child nodes have already been processed, and that
2623 their def types currently match their SLP node's def type. */
2626 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2627 slp_instance node_instance
,
2628 stmt_vector_for_cost
*cost_vec
)
2630 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2631 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2633 /* For BB vectorization vector types are assigned here.
2634 Memory accesses already got their vector type assigned
2635 in vect_analyze_data_refs. */
2636 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2638 && ! STMT_VINFO_DATA_REF (stmt_info
))
2640 tree vectype
, nunits_vectype
;
2641 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2643 /* We checked this when building the node. */
2645 if (vectype
== boolean_type_node
)
2647 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2649 /* vect_get_mask_type_for_stmt has already explained the
2654 stmt_vec_info sstmt_info
;
2656 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2657 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2660 /* Calculate the number of vector statements to be created for the
2661 scalar stmts in this node. For SLP reductions it is equal to the
2662 number of vector statements in the children (which has already been
2663 calculated by the recursive call). Otherwise it is the number of
2664 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2665 VF divided by the number of elements in a vector. */
2666 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2667 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2669 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2670 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2672 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2673 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2680 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2681 vf
= loop_vinfo
->vectorization_factor
;
2684 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2685 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2686 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2687 = vect_get_num_vectors (vf
* group_size
, vectype
);
2691 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2694 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2695 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2697 Return true if the operations are supported. */
2700 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2701 slp_instance node_instance
,
2702 scalar_stmts_to_slp_tree_map_t
*visited
,
2703 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2704 stmt_vector_for_cost
*cost_vec
)
2709 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2712 /* If we already analyzed the exact same set of scalar stmts we're done.
2713 We share the generated vector stmts for those. */
2715 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2716 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2718 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2719 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2723 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2724 doesn't result in any issue since we throw away the lvisited set
2726 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2729 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2730 visited
, lvisited
, cost_vec
))
2733 /* ??? We have to catch the case late where two first scalar stmts appear
2734 in multiple SLP children with different def type and fail. Remember
2735 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2736 match it when that is vect_internal_def. */
2737 auto_vec
<vect_def_type
, 4> dt
;
2738 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2739 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2740 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2741 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2743 /* Push SLP node def-type to stmt operands. */
2744 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2745 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2746 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2747 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2748 = SLP_TREE_DEF_TYPE (child
);
2750 /* Check everything worked out. */
2752 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2753 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2755 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2757 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2758 != SLP_TREE_DEF_TYPE (child
))
2761 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2765 if (!res
&& dump_enabled_p ())
2766 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2767 "not vectorized: same operand with different "
2768 "def type in stmt.\n");
2771 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2774 /* Restore def-types. */
2775 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2776 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2777 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2783 /* Analyze statements in SLP instances of VINFO. Return true if the
2784 operations are supported. */
2787 vect_slp_analyze_operations (vec_info
*vinfo
)
2789 slp_instance instance
;
2792 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2794 scalar_stmts_to_slp_tree_map_t
*visited
2795 = new scalar_stmts_to_slp_tree_map_t ();
2796 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2798 scalar_stmts_to_slp_tree_map_t lvisited
;
2799 stmt_vector_for_cost cost_vec
;
2800 cost_vec
.create (2);
2801 if (!vect_slp_analyze_node_operations (vinfo
,
2802 SLP_INSTANCE_TREE (instance
),
2803 instance
, visited
, &lvisited
,
2806 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2807 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2808 if (dump_enabled_p ())
2809 dump_printf_loc (MSG_NOTE
, vect_location
,
2810 "removing SLP instance operations starting from: %G",
2812 vect_free_slp_instance (instance
, false);
2813 vinfo
->slp_instances
.ordered_remove (i
);
2814 cost_vec
.release ();
2818 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2819 x
!= lvisited
.end(); ++x
)
2820 visited
->put ((*x
).first
.copy (), (*x
).second
);
2823 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2824 cost_vec
.release ();
2829 return !vinfo
->slp_instances
.is_empty ();
2833 /* Compute the scalar cost of the SLP node NODE and its children
2834 and return it. Do not account defs that are marked in LIFE and
2835 update LIFE according to uses of NODE. */
2838 vect_bb_slp_scalar_cost (basic_block bb
,
2839 slp_tree node
, vec
<bool, va_heap
> *life
,
2840 stmt_vector_for_cost
*cost_vec
,
2841 hash_set
<slp_tree
> &visited
)
2844 stmt_vec_info stmt_info
;
2847 if (visited
.add (node
))
2850 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2852 gimple
*stmt
= stmt_info
->stmt
;
2853 vec_info
*vinfo
= stmt_info
->vinfo
;
2854 ssa_op_iter op_iter
;
2855 def_operand_p def_p
;
2860 /* If there is a non-vectorized use of the defs then the scalar
2861 stmt is kept live in which case we do not account it or any
2862 required defs in the SLP children in the scalar cost. This
2863 way we make the vectorization more costly when compared to
2865 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2867 imm_use_iterator use_iter
;
2869 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2870 if (!is_gimple_debug (use_stmt
))
2872 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2873 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2876 BREAK_FROM_IMM_USE_STMT (use_iter
);
2883 /* Count scalar stmts only once. */
2884 if (gimple_visited_p (stmt
))
2886 gimple_set_visited (stmt
, true);
2888 vect_cost_for_stmt kind
;
2889 if (STMT_VINFO_DATA_REF (stmt_info
))
2891 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2894 kind
= scalar_store
;
2896 else if (vect_nop_conversion_p (stmt_info
))
2900 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2903 auto_vec
<bool, 20> subtree_life
;
2904 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2906 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2908 /* Do not directly pass LIFE to the recursive call, copy it to
2909 confine changes in the callee to the current child/subtree. */
2910 subtree_life
.safe_splice (*life
);
2911 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
2913 subtree_life
.truncate (0);
2919 vect_bb_slp_scalar_cost (basic_block bb
,
2920 slp_tree node
, vec
<bool, va_heap
> *life
,
2921 stmt_vector_for_cost
*cost_vec
)
2923 hash_set
<slp_tree
> visited
;
2924 vect_bb_slp_scalar_cost (bb
, node
, life
, cost_vec
, visited
);
2927 /* Check if vectorization of the basic block is profitable. */
2930 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2932 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2933 slp_instance instance
;
2935 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2936 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2938 /* Calculate scalar cost. */
2939 stmt_vector_for_cost scalar_costs
;
2940 scalar_costs
.create (0);
2941 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2943 auto_vec
<bool, 20> life
;
2944 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2945 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2946 SLP_INSTANCE_TREE (instance
),
2947 &life
, &scalar_costs
);
2949 void *target_cost_data
= init_cost (NULL
);
2950 add_stmt_costs (target_cost_data
, &scalar_costs
);
2951 scalar_costs
.release ();
2953 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2954 destroy_cost_data (target_cost_data
);
2956 /* Unset visited flag. */
2957 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2958 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2959 gimple_set_visited (gsi_stmt (gsi
), false);
2961 /* Complete the target-specific cost calculation. */
2962 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2963 &vec_inside_cost
, &vec_epilogue_cost
);
2965 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2967 if (dump_enabled_p ())
2969 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2970 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2972 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2973 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2974 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2977 /* Vectorization is profitable if its cost is more than the cost of scalar
2978 version. Note that we err on the vector side for equal cost because
2979 the cost estimate is otherwise quite pessimistic (constant uses are
2980 free on the scalar side but cost a load on the vector side for
2982 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2988 /* Find any vectorizable constructors and add them to the grouped_store
2992 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
2994 gimple_stmt_iterator gsi
;
2996 for (gsi
= bb_vinfo
->region_begin
;
2997 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2999 gimple
*stmt
= gsi_stmt (gsi
);
3001 if (is_gimple_assign (stmt
)
3002 && gimple_assign_rhs_code (stmt
) == CONSTRUCTOR
3003 && TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
3004 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt
))) == VECTOR_TYPE
)
3006 tree rhs
= gimple_assign_rhs1 (stmt
);
3008 if (CONSTRUCTOR_NELTS (rhs
) == 0)
3011 poly_uint64 subparts
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
));
3013 if (maybe_ne (subparts
, CONSTRUCTOR_NELTS (rhs
)))
3016 if (dump_enabled_p ())
3017 dump_printf_loc (MSG_NOTE
, vect_location
,
3018 "Found vectorizable constructor: %G\n", stmt
);
3019 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (stmt
);
3020 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
3025 /* Check if the region described by BB_VINFO can be vectorized, returning
3026 true if so. When returning false, set FATAL to true if the same failure
3027 would prevent vectorization at other vector sizes, false if it is still
3028 worth trying other sizes. N_STMTS is the number of statements in the
3032 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
3034 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3036 slp_instance instance
;
3038 poly_uint64 min_vf
= 2;
3040 /* The first group of checks is independent of the vector size. */
3043 /* Analyze the data references. */
3045 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
3047 if (dump_enabled_p ())
3048 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3049 "not vectorized: unhandled data-ref in basic "
3054 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
3056 if (dump_enabled_p ())
3057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3058 "not vectorized: not enough data-refs in "
3063 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
3065 if (dump_enabled_p ())
3066 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3067 "not vectorized: unhandled data access in "
3072 vect_slp_check_for_constructors (bb_vinfo
);
3074 /* If there are no grouped stores in the region there is no need
3075 to continue with pattern recog as vect_analyze_slp will fail
3077 if (bb_vinfo
->grouped_stores
.is_empty ())
3079 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3081 "not vectorized: no grouped stores in "
3086 /* While the rest of the analysis below depends on it in some way. */
3089 vect_pattern_recog (bb_vinfo
);
3091 /* Check the SLP opportunities in the basic block, analyze and build SLP
3093 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3095 if (dump_enabled_p ())
3097 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3098 "Failed to SLP the basic block.\n");
3099 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3100 "not vectorized: failed to find SLP opportunities "
3101 "in basic block.\n");
3106 vect_record_base_alignments (bb_vinfo
);
3108 /* Analyze and verify the alignment of data references and the
3109 dependence in the SLP instances. */
3110 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3112 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3113 || ! vect_slp_analyze_instance_dependence (instance
))
3115 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3116 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3117 if (dump_enabled_p ())
3118 dump_printf_loc (MSG_NOTE
, vect_location
,
3119 "removing SLP instance operations starting from: %G",
3121 vect_free_slp_instance (instance
, false);
3122 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3126 /* Mark all the statements that we want to vectorize as pure SLP and
3128 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3129 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3130 if (SLP_INSTANCE_ROOT_STMT (instance
))
3131 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance
)) = pure_slp
;
3135 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3138 if (!vect_slp_analyze_operations (bb_vinfo
))
3140 if (dump_enabled_p ())
3141 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3142 "not vectorized: bad operation in basic block.\n");
3146 /* Cost model: check if the vectorization is worthwhile. */
3147 if (!unlimited_cost_model (NULL
)
3148 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3150 if (dump_enabled_p ())
3151 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3152 "not vectorized: vectorization is not "
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_NOTE
, vect_location
,
3159 "Basic block will be vectorized using SLP\n");
3163 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3164 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3165 on success. The region has N_STMTS statements and has the datarefs
3166 given by DATAREFS. */
3169 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3170 gimple_stmt_iterator region_end
,
3171 vec
<data_reference_p
> datarefs
,
3172 unsigned int n_stmts
)
3174 bb_vec_info bb_vinfo
;
3175 auto_vector_modes vector_modes
;
3177 /* Autodetect first vector size we try. */
3178 machine_mode next_vector_mode
= VOIDmode
;
3179 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
3180 unsigned int mode_i
= 0;
3182 vec_info_shared shared
;
3184 poly_uint64 autodetected_vector_size
= 0;
3187 bool vectorized
= false;
3189 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3191 bool first_time_p
= shared
.datarefs
.is_empty ();
3192 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3194 bb_vinfo
->shared
->save_datarefs ();
3196 bb_vinfo
->shared
->check_datarefs ();
3197 bb_vinfo
->vector_size
= GET_MODE_SIZE (next_vector_mode
);
3199 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3200 && dbg_cnt (vect_slp
))
3202 if (dump_enabled_p ())
3203 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3205 bb_vinfo
->shared
->check_datarefs ();
3206 vect_schedule_slp (bb_vinfo
);
3208 unsigned HOST_WIDE_INT bytes
;
3209 if (dump_enabled_p ())
3211 if (bb_vinfo
->vector_size
.is_constant (&bytes
))
3212 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3213 "basic block part vectorized using %wu byte "
3214 "vectors\n", bytes
);
3216 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3217 "basic block part vectorized using variable "
3218 "length vectors\n");
3225 autodetected_vector_size
= bb_vinfo
->vector_size
;
3229 if (mode_i
< vector_modes
.length ()
3230 && known_eq (GET_MODE_SIZE (vector_modes
[mode_i
]),
3231 autodetected_vector_size
))
3235 || mode_i
== vector_modes
.length ()
3236 || known_eq (autodetected_vector_size
, 0U)
3237 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3238 vector sizes will fail do not bother iterating. */
3242 /* Try the next biggest vector size. */
3243 next_vector_mode
= vector_modes
[mode_i
++];
3244 if (dump_enabled_p ())
3245 dump_printf_loc (MSG_NOTE
, vect_location
,
3246 "***** Re-trying analysis with vector mode %s\n",
3247 GET_MODE_NAME (next_vector_mode
));
3251 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3252 true if anything in the basic-block was vectorized. */
3255 vect_slp_bb (basic_block bb
)
3257 gimple_stmt_iterator gsi
;
3258 bool any_vectorized
= false;
3260 gsi
= gsi_start_bb (bb
);
3261 while (!gsi_end_p (gsi
))
3263 gimple_stmt_iterator region_begin
= gsi
;
3264 vec
<data_reference_p
> datarefs
= vNULL
;
3267 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3269 gimple
*stmt
= gsi_stmt (gsi
);
3270 if (is_gimple_debug (stmt
))
3274 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3275 vect_location
= stmt
;
3277 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3281 /* Skip leading unhandled stmts. */
3282 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3288 gimple_stmt_iterator region_end
= gsi
;
3290 if (insns
> param_slp_max_insns_in_bb
)
3292 if (dump_enabled_p ())
3293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3294 "not vectorized: too many instructions in "
3297 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3298 any_vectorized
= true;
3300 if (gsi_end_p (region_end
))
3303 /* Skip the unhandled stmt. */
3307 return any_vectorized
;
3311 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3314 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
)
3316 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3318 enum vect_def_type dt
;
3320 /* For comparison and COND_EXPR type is chosen depending
3321 on the non-constant other comparison operand. */
3322 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3324 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3325 op
= gimple_assign_rhs1 (stmt
);
3327 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3330 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3333 if (code
== COND_EXPR
)
3335 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3336 tree cond
= gimple_assign_rhs1 (stmt
);
3338 if (TREE_CODE (cond
) == SSA_NAME
)
3341 op
= TREE_OPERAND (cond
, 0);
3343 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3346 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3349 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3352 /* Build a variable-length vector in which the elements in ELTS are repeated
3353 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3354 RESULTS and add any new instructions to SEQ.
3356 The approach we use is:
3358 (1) Find a vector mode VM with integer elements of mode IM.
3360 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3361 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3362 from small vectors to IM.
3364 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3366 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3367 correct byte contents.
3369 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3371 We try to find the largest IM for which this sequence works, in order
3372 to cut down on the number of interleaves. */
3375 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3376 vec
<tree
> elts
, unsigned int nresults
,
3379 unsigned int nelts
= elts
.length ();
3380 tree element_type
= TREE_TYPE (vector_type
);
3382 /* (1) Find a vector mode VM with integer elements of mode IM. */
3383 unsigned int nvectors
= 1;
3384 tree new_vector_type
;
3386 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, TYPE_MODE (element_type
),
3387 &nvectors
, &new_vector_type
,
3391 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3392 unsigned int partial_nelts
= nelts
/ nvectors
;
3393 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3395 tree_vector_builder partial_elts
;
3396 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3397 pieces
.quick_grow (nvectors
* 2);
3398 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3400 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3401 ELTS' has mode IM. */
3402 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3403 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3404 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3405 tree t
= gimple_build_vector (seq
, &partial_elts
);
3406 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3407 TREE_TYPE (new_vector_type
), t
);
3409 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3410 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3413 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3414 correct byte contents.
3416 We need to repeat the following operation log2(nvectors) times:
3418 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3419 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3421 However, if each input repeats every N elements and the VF is
3422 a multiple of N * 2, the HI result is the same as the LO. */
3423 unsigned int in_start
= 0;
3424 unsigned int out_start
= nvectors
;
3425 unsigned int hi_start
= nvectors
/ 2;
3426 /* A bound on the number of outputs needed to produce NRESULTS results
3427 in the final iteration. */
3428 unsigned int noutputs_bound
= nvectors
* nresults
;
3429 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3431 noutputs_bound
/= 2;
3432 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3433 for (unsigned int i
= 0; i
< limit
; ++i
)
3436 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3439 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3443 tree output
= make_ssa_name (new_vector_type
);
3444 tree input1
= pieces
[in_start
+ (i
/ 2)];
3445 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3446 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3449 gimple_seq_add_stmt (seq
, stmt
);
3450 pieces
[out_start
+ i
] = output
;
3452 std::swap (in_start
, out_start
);
3455 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3456 results
.reserve (nresults
);
3457 for (unsigned int i
= 0; i
< nresults
; ++i
)
3459 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3460 pieces
[in_start
+ i
]));
3462 results
.quick_push (results
[i
- nvectors
]);
3466 /* For constant and loop invariant defs of SLP_NODE this function returns
3467 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3468 OP_NODE determines the node for the operand containing the scalar
3472 vect_get_constant_vectors (slp_tree op_node
, slp_tree slp_node
,
3473 vec
<tree
> *vec_oprnds
)
3475 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3476 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3477 unsigned HOST_WIDE_INT nunits
;
3479 unsigned j
, number_of_places_left_in_vector
;
3482 int group_size
= op_node
->ops
.length ();
3483 unsigned int vec_num
, i
;
3484 unsigned number_of_copies
= 1;
3486 tree neutral_op
= NULL
;
3487 gimple_seq ctor_seq
= NULL
;
3488 auto_vec
<tree
, 16> permute_results
;
3490 /* ??? SLP analysis should compute the vector type for the
3491 constant / invariant and store it in the SLP node. */
3492 tree op
= op_node
->ops
[0];
3493 /* Check if vector type is a boolean vector. */
3494 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3495 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3496 && vect_mask_constant_operand_p (stmt_vinfo
))
3497 vector_type
= truth_type_for (stmt_vectype
);
3499 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
3501 /* ??? For lane-reducing ops we should also have the required number
3502 of vector stmts initialized rather than second-guessing here. */
3503 unsigned int number_of_vectors
;
3504 if (is_gimple_assign (stmt_vinfo
->stmt
)
3505 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3506 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3507 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3508 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3511 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3512 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3514 vec_oprnds
->create (number_of_vectors
);
3515 auto_vec
<tree
> voprnds (number_of_vectors
);
3517 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3518 created vectors. It is greater than 1 if unrolling is performed.
3520 For example, we have two scalar operands, s1 and s2 (e.g., group of
3521 strided accesses of size two), while NUNITS is four (i.e., four scalars
3522 of this type can be packed in a vector). The output vector will contain
3523 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3526 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3527 containing the operands.
3529 For example, NUNITS is four as before, and the group size is 8
3530 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3531 {s5, s6, s7, s8}. */
3533 /* When using duplicate_and_interleave, we just need one element for
3534 each scalar statement. */
3535 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3536 nunits
= group_size
;
3538 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3540 number_of_places_left_in_vector
= nunits
;
3542 tree_vector_builder
elts (vector_type
, nunits
, 1);
3543 elts
.quick_grow (nunits
);
3544 bool place_after_defs
= false;
3545 for (j
= 0; j
< number_of_copies
; j
++)
3547 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3549 /* Create 'vect_ = {op0,op1,...,opn}'. */
3550 number_of_places_left_in_vector
--;
3552 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3554 if (CONSTANT_CLASS_P (op
))
3556 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3558 /* Can't use VIEW_CONVERT_EXPR for booleans because
3559 of possibly different sizes of scalar value and
3561 if (integer_zerop (op
))
3562 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3563 else if (integer_onep (op
))
3564 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3569 op
= fold_unary (VIEW_CONVERT_EXPR
,
3570 TREE_TYPE (vector_type
), op
);
3571 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3575 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3577 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3580 = build_all_ones_cst (TREE_TYPE (vector_type
));
3582 = build_zero_cst (TREE_TYPE (vector_type
));
3583 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3584 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3590 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3593 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3596 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3600 elts
[number_of_places_left_in_vector
] = op
;
3601 if (!CONSTANT_CLASS_P (op
))
3603 if (TREE_CODE (orig_op
) == SSA_NAME
3604 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3605 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3606 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3607 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3608 place_after_defs
= true;
3610 if (number_of_places_left_in_vector
== 0)
3613 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3614 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3615 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3618 if (permute_results
.is_empty ())
3619 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3620 elts
, number_of_vectors
,
3622 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3625 gimple_stmt_iterator gsi
;
3626 if (place_after_defs
)
3628 stmt_vec_info last_stmt_info
3629 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3630 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3631 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3635 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3637 if (ctor_seq
!= NULL
)
3639 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3640 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3643 voprnds
.quick_push (init
);
3644 place_after_defs
= false;
3645 number_of_places_left_in_vector
= nunits
;
3647 elts
.new_vector (vector_type
, nunits
, 1);
3648 elts
.quick_grow (nunits
);
3653 /* Since the vectors are created in the reverse order, we should invert
3655 vec_num
= voprnds
.length ();
3656 for (j
= vec_num
; j
!= 0; j
--)
3658 vop
= voprnds
[j
- 1];
3659 vec_oprnds
->quick_push (vop
);
3662 /* In case that VF is greater than the unrolling factor needed for the SLP
3663 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3664 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3665 to replicate the vectors. */
3666 while (number_of_vectors
> vec_oprnds
->length ())
3668 tree neutral_vec
= NULL
;
3673 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3675 vec_oprnds
->quick_push (neutral_vec
);
3679 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3680 vec_oprnds
->quick_push (vop
);
3686 /* Get vectorized definitions from SLP_NODE that contains corresponding
3687 vectorized def-stmts. */
3690 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3692 stmt_vec_info vec_def_stmt_info
;
3695 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3697 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3698 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3702 /* Get N vectorized definitions for SLP_NODE.
3703 If the scalar definitions are loop invariants or constants, collect them and
3704 call vect_get_constant_vectors() to create vector stmts.
3705 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3706 must be stored in the corresponding child of SLP_NODE, and we call
3707 vect_get_slp_vect_defs () to retrieve them. */
3710 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3713 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3715 for (unsigned i
= 0; i
< n
; ++i
)
3717 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3719 vec
<tree
> vec_defs
= vNULL
;
3721 /* For each operand we check if it has vectorized definitions in a child
3722 node or we need to create them (for invariants and constants). */
3723 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3725 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3726 vect_get_slp_vect_defs (child
, &vec_defs
);
3729 vect_get_constant_vectors (child
, slp_node
, &vec_defs
);
3731 vec_oprnds
->quick_push (vec_defs
);
3735 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3736 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3737 permute statements for the SLP node NODE of the SLP instance
3738 SLP_NODE_INSTANCE. */
3741 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3742 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3743 slp_instance slp_node_instance
, bool analyze_only
,
3746 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3747 vec_info
*vinfo
= stmt_info
->vinfo
;
3749 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3750 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3751 unsigned int mask_element
;
3754 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3757 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3759 mode
= TYPE_MODE (vectype
);
3760 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3762 /* Initialize the vect stmts of NODE to properly insert the generated
3765 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3766 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3767 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3769 /* Generate permutation masks for every NODE. Number of masks for each NODE
3770 is equal to GROUP_SIZE.
3771 E.g., we have a group of three nodes with three loads from the same
3772 location in each node, and the vector size is 4. I.e., we have a
3773 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3774 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3775 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3778 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3779 The last mask is illegal since we assume two operands for permute
3780 operation, and the mask element values can't be outside that range.
3781 Hence, the last mask must be converted into {2,5,5,5}.
3782 For the first two permutations we need the first and the second input
3783 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3784 we need the second and the third vectors: {b1,c1,a2,b2} and
3787 int vect_stmts_counter
= 0;
3788 unsigned int index
= 0;
3789 int first_vec_index
= -1;
3790 int second_vec_index
= -1;
3794 vec_perm_builder mask
;
3795 unsigned int nelts_to_build
;
3796 unsigned int nvectors_per_build
;
3797 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3798 && multiple_p (nunits
, group_size
));
3801 /* A single vector contains a whole number of copies of the node, so:
3802 (a) all permutes can use the same mask; and
3803 (b) the permutes only need a single vector input. */
3804 mask
.new_vector (nunits
, group_size
, 3);
3805 nelts_to_build
= mask
.encoded_nelts ();
3806 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3810 /* We need to construct a separate mask for each vector statement. */
3811 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3812 if (!nunits
.is_constant (&const_nunits
)
3813 || !vf
.is_constant (&const_vf
))
3815 mask
.new_vector (const_nunits
, const_nunits
, 1);
3816 nelts_to_build
= const_vf
* group_size
;
3817 nvectors_per_build
= 1;
3820 unsigned int count
= mask
.encoded_nelts ();
3821 mask
.quick_grow (count
);
3822 vec_perm_indices indices
;
3824 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3826 unsigned int iter_num
= j
/ group_size
;
3827 unsigned int stmt_num
= j
% group_size
;
3828 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3829 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3832 first_vec_index
= 0;
3837 /* Enforced before the loop when !repeating_p. */
3838 unsigned int const_nunits
= nunits
.to_constant ();
3839 vec_index
= i
/ const_nunits
;
3840 mask_element
= i
% const_nunits
;
3841 if (vec_index
== first_vec_index
3842 || first_vec_index
== -1)
3844 first_vec_index
= vec_index
;
3846 else if (vec_index
== second_vec_index
3847 || second_vec_index
== -1)
3849 second_vec_index
= vec_index
;
3850 mask_element
+= const_nunits
;
3854 if (dump_enabled_p ())
3855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3856 "permutation requires at "
3857 "least three vectors %G",
3859 gcc_assert (analyze_only
);
3863 gcc_assert (mask_element
< 2 * const_nunits
);
3866 if (mask_element
!= index
)
3868 mask
[index
++] = mask_element
;
3870 if (index
== count
&& !noop_p
)
3872 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3873 if (!can_vec_perm_const_p (mode
, indices
))
3875 if (dump_enabled_p ())
3877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3879 "unsupported vect permute { ");
3880 for (i
= 0; i
< count
; ++i
)
3882 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3883 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3885 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3887 gcc_assert (analyze_only
);
3898 tree mask_vec
= NULL_TREE
;
3901 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3903 if (second_vec_index
== -1)
3904 second_vec_index
= first_vec_index
;
3906 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3908 /* Generate the permute statement if necessary. */
3909 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3910 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3911 stmt_vec_info perm_stmt_info
;
3914 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3916 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3918 perm_dest
= make_ssa_name (perm_dest
);
3920 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3921 first_vec
, second_vec
,
3924 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3928 /* If mask was NULL_TREE generate the requested
3929 identity transform. */
3930 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3932 /* Store the vector statement in NODE. */
3933 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3939 first_vec_index
= -1;
3940 second_vec_index
= -1;
3948 /* Vectorize SLP instance tree in postorder. */
3951 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3952 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3954 gimple_stmt_iterator si
;
3955 stmt_vec_info stmt_info
;
3956 unsigned int group_size
;
3961 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3964 /* See if we have already vectorized the node in the graph of the
3966 if (SLP_TREE_VEC_STMTS (node
).exists ())
3969 /* See if we have already vectorized the same set of stmts and reuse their
3970 vectorized stmts across instances. */
3971 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3973 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3977 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3978 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3979 vect_schedule_slp_instance (child
, instance
, bst_map
);
3981 /* Push SLP node def-type to stmts. */
3982 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3983 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3985 stmt_vec_info child_stmt_info
;
3986 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3987 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3990 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3992 /* VECTYPE is the type of the destination. */
3993 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3994 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3995 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3997 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3998 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
4000 if (dump_enabled_p ())
4001 dump_printf_loc (MSG_NOTE
, vect_location
,
4002 "------>vectorizing SLP node starting from: %G",
4005 /* Vectorized stmts go before the last scalar stmt which is where
4006 all uses are ready. */
4007 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
4008 si
= gsi_for_stmt (last_stmt_info
->stmt
);
4010 /* Handle two-operation SLP nodes by vectorizing the group with
4011 both operations and then performing a merge. */
4012 if (SLP_TREE_TWO_OPERATORS (node
))
4014 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
4015 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
4016 enum tree_code ocode
= ERROR_MARK
;
4017 stmt_vec_info ostmt_info
;
4018 vec_perm_builder
mask (group_size
, group_size
, 1);
4019 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
4021 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
4022 if (gimple_assign_rhs_code (ostmt
) != code0
)
4024 mask
.quick_push (1);
4025 ocode
= gimple_assign_rhs_code (ostmt
);
4028 mask
.quick_push (0);
4030 if (ocode
!= ERROR_MARK
)
4032 vec
<stmt_vec_info
> v0
;
4033 vec
<stmt_vec_info
> v1
;
4035 tree tmask
= NULL_TREE
;
4036 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4037 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
4038 SLP_TREE_VEC_STMTS (node
).truncate (0);
4039 gimple_assign_set_rhs_code (stmt
, ocode
);
4040 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4041 gimple_assign_set_rhs_code (stmt
, code0
);
4042 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
4043 SLP_TREE_VEC_STMTS (node
).truncate (0);
4044 tree meltype
= build_nonstandard_integer_type
4045 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
4046 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
4048 for (j
= 0; j
< v0
.length (); ++j
)
4050 /* Enforced by vect_build_slp_tree, which rejects variable-length
4051 vectors for SLP_TREE_TWO_OPERATORS. */
4052 unsigned int const_nunits
= nunits
.to_constant ();
4053 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
4054 for (l
= 0; l
< const_nunits
; ++l
)
4056 if (k
>= group_size
)
4058 tree t
= build_int_cst (meltype
,
4059 mask
[k
++] * const_nunits
+ l
);
4060 melts
.quick_push (t
);
4062 tmask
= melts
.build ();
4064 /* ??? Not all targets support a VEC_PERM_EXPR with a
4065 constant mask that would translate to a vec_merge RTX
4066 (with their vec_perm_const_ok). We can either not
4067 vectorize in that case or let veclower do its job.
4068 Unfortunately that isn't too great and at least for
4069 plus/minus we'd eventually like to match targets
4070 vector addsub instructions. */
4072 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4074 gimple_assign_lhs (v0
[j
]->stmt
),
4075 gimple_assign_lhs (v1
[j
]->stmt
),
4077 SLP_TREE_VEC_STMTS (node
).quick_push
4078 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4085 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4087 /* Restore stmt def-types. */
4088 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4089 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4091 stmt_vec_info child_stmt_info
;
4092 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4093 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4097 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4098 For loop vectorization this is done in vectorizable_call, but for SLP
4099 it needs to be deferred until end of vect_schedule_slp, because multiple
4100 SLP instances may refer to the same scalar stmt. */
4103 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4106 gimple_stmt_iterator gsi
;
4110 stmt_vec_info stmt_info
;
4112 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4115 if (visited
.add (node
))
4118 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4119 vect_remove_slp_scalar_calls (child
, visited
);
4121 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4123 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4124 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4126 if (is_pattern_stmt_p (stmt_info
)
4127 || !PURE_SLP_STMT (stmt_info
))
4129 lhs
= gimple_call_lhs (stmt
);
4130 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4131 gsi
= gsi_for_stmt (stmt
);
4132 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4133 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4138 vect_remove_slp_scalar_calls (slp_tree node
)
4140 hash_set
<slp_tree
> visited
;
4141 vect_remove_slp_scalar_calls (node
, visited
);
4144 /* Vectorize the instance root. */
4147 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
4149 gassign
*rstmt
= NULL
;
4151 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
4153 stmt_vec_info child_stmt_info
;
4156 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4158 tree vect_lhs
= gimple_get_lhs (child_stmt_info
->stmt
);
4159 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4160 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
4164 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
4166 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
4167 stmt_vec_info child_stmt_info
;
4169 vec
<constructor_elt
, va_gc
> *v
;
4170 vec_alloc (v
, nelts
);
4172 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt_info
)
4174 CONSTRUCTOR_APPEND_ELT (v
,
4176 gimple_get_lhs (child_stmt_info
->stmt
));
4178 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
4179 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
4180 tree r_constructor
= build_constructor (rtype
, v
);
4181 rstmt
= gimple_build_assign (lhs
, r_constructor
);
4186 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
4187 gsi_replace (&rgsi
, rstmt
, true);
4190 /* Generate vector code for all SLP instances in the loop/basic block. */
4193 vect_schedule_slp (vec_info
*vinfo
)
4195 vec
<slp_instance
> slp_instances
;
4196 slp_instance instance
;
4199 scalar_stmts_to_slp_tree_map_t
*bst_map
4200 = new scalar_stmts_to_slp_tree_map_t ();
4201 slp_instances
= vinfo
->slp_instances
;
4202 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4204 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4205 /* Schedule the tree of INSTANCE. */
4206 vect_schedule_slp_instance (node
, instance
, bst_map
);
4208 if (SLP_INSTANCE_ROOT_STMT (instance
))
4209 vectorize_slp_instance_root_stmt (node
, instance
);
4211 if (dump_enabled_p ())
4212 dump_printf_loc (MSG_NOTE
, vect_location
,
4213 "vectorizing stmts using SLP.\n");
4217 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4219 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4220 stmt_vec_info store_info
;
4223 /* Remove scalar call stmts. Do not do this for basic-block
4224 vectorization as not all uses may be vectorized.
4225 ??? Why should this be necessary? DCE should be able to
4226 remove the stmts itself.
4227 ??? For BB vectorization we can as well remove scalar
4228 stmts starting from the SLP tree root if they have no
4230 if (is_a
<loop_vec_info
> (vinfo
))
4231 vect_remove_slp_scalar_calls (root
);
4233 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4234 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4236 if (!STMT_VINFO_DATA_REF (store_info
))
4239 if (SLP_INSTANCE_ROOT_STMT (instance
))
4242 store_info
= vect_orig_stmt (store_info
);
4243 /* Free the attached stmt_vec_info and remove the stmt. */
4244 vinfo
->remove_stmt (store_info
);