1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
36 #include "fold-const.h"
37 #include "stor-layout.h"
38 #include "gimple-iterator.h"
40 #include "tree-vectorizer.h"
41 #include "langhooks.h"
42 #include "gimple-walk.h"
44 #include "tree-vector-builder.h"
45 #include "vec-perm-indices.h"
46 #include "gimple-fold.h"
47 #include "internal-fn.h"
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
55 vect_free_slp_tree (slp_tree node
, bool final_p
)
60 if (--node
->refcnt
!= 0)
63 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
64 vect_free_slp_tree (child
, final_p
);
66 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
67 Some statements might no longer exist, after having been
68 removed by vect_transform_stmt. Updating the remaining
69 statements would be redundant. */
72 stmt_vec_info stmt_info
;
73 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
75 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info
) > 0);
76 STMT_VINFO_NUM_SLP_USES (stmt_info
)--;
80 SLP_TREE_CHILDREN (node
).release ();
81 SLP_TREE_SCALAR_STMTS (node
).release ();
82 SLP_TREE_SCALAR_OPS (node
).release ();
83 SLP_TREE_VEC_STMTS (node
).release ();
84 SLP_TREE_LOAD_PERMUTATION (node
).release ();
89 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
90 have vectorized the instance or if we have made a final decision not
91 to vectorize the statements in any way. */
94 vect_free_slp_instance (slp_instance instance
, bool final_p
)
96 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
), final_p
);
97 SLP_INSTANCE_LOADS (instance
).release ();
102 /* Create an SLP node for SCALAR_STMTS. */
105 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
)
108 stmt_vec_info stmt_info
= scalar_stmts
[0];
111 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
112 nops
= gimple_call_num_args (stmt
);
113 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
115 nops
= gimple_num_ops (stmt
) - 1;
116 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
119 else if (is_a
<gphi
*> (stmt_info
->stmt
))
124 node
= XNEW (struct _slp_tree
);
125 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
126 SLP_TREE_SCALAR_OPS (node
) = vNULL
;
127 SLP_TREE_VEC_STMTS (node
).create (0);
128 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
129 SLP_TREE_CHILDREN (node
).create (nops
);
130 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
131 SLP_TREE_TWO_OPERATORS (node
) = false;
132 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
134 node
->max_nunits
= 1;
137 FOR_EACH_VEC_ELT (scalar_stmts
, i
, stmt_info
)
138 STMT_VINFO_NUM_SLP_USES (stmt_info
)++;
143 /* Create an SLP node for OPS. */
146 vect_create_new_slp_node (vec
<tree
> ops
)
150 node
= XNEW (struct _slp_tree
);
151 SLP_TREE_SCALAR_STMTS (node
) = vNULL
;
152 SLP_TREE_SCALAR_OPS (node
) = ops
;
153 SLP_TREE_VEC_STMTS (node
).create (0);
154 SLP_TREE_NUMBER_OF_VEC_STMTS (node
) = 0;
155 SLP_TREE_CHILDREN (node
) = vNULL
;
156 SLP_TREE_LOAD_PERMUTATION (node
) = vNULL
;
157 SLP_TREE_TWO_OPERATORS (node
) = false;
158 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
160 node
->max_nunits
= 1;
166 /* This structure is used in creation of an SLP tree. Each instance
167 corresponds to the same operand in a group of scalar stmts in an SLP
169 typedef struct _slp_oprnd_info
171 /* Def-stmts for the operands. */
172 vec
<stmt_vec_info
> def_stmts
;
175 /* Information about the first statement, its vector def-type, type, the
176 operand itself in case it's constant, and an indication if it's a pattern
179 enum vect_def_type first_dt
;
184 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
186 static vec
<slp_oprnd_info
>
187 vect_create_oprnd_info (int nops
, int group_size
)
190 slp_oprnd_info oprnd_info
;
191 vec
<slp_oprnd_info
> oprnds_info
;
193 oprnds_info
.create (nops
);
194 for (i
= 0; i
< nops
; i
++)
196 oprnd_info
= XNEW (struct _slp_oprnd_info
);
197 oprnd_info
->def_stmts
.create (group_size
);
198 oprnd_info
->ops
.create (group_size
);
199 oprnd_info
->first_dt
= vect_uninitialized_def
;
200 oprnd_info
->first_op_type
= NULL_TREE
;
201 oprnd_info
->any_pattern
= false;
202 oprnds_info
.quick_push (oprnd_info
);
209 /* Free operands info. */
212 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
215 slp_oprnd_info oprnd_info
;
217 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
219 oprnd_info
->def_stmts
.release ();
220 oprnd_info
->ops
.release ();
221 XDELETE (oprnd_info
);
224 oprnds_info
.release ();
228 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
229 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
233 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
234 stmt_vec_info first_stmt_info
)
236 stmt_vec_info next_stmt_info
= first_stmt_info
;
239 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
244 if (next_stmt_info
== stmt_info
)
246 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
248 result
+= DR_GROUP_GAP (next_stmt_info
);
250 while (next_stmt_info
);
255 /* Check whether it is possible to load COUNT elements of type ELT_MODE
256 using the method implemented by duplicate_and_interleave. Return true
257 if so, returning the number of intermediate vectors in *NVECTORS_OUT
258 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
262 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
263 machine_mode elt_mode
,
264 unsigned int *nvectors_out
,
265 tree
*vector_type_out
,
268 poly_int64 elt_bytes
= count
* GET_MODE_SIZE (elt_mode
);
270 unsigned int nvectors
= 1;
273 scalar_int_mode int_mode
;
274 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
275 if (multiple_p (vinfo
->vector_size
, elt_bytes
, &nelts
)
276 && int_mode_for_size (elt_bits
, 0).exists (&int_mode
))
278 tree int_type
= build_nonstandard_integer_type
279 (GET_MODE_BITSIZE (int_mode
), 1);
280 tree vector_type
= build_vector_type (int_type
, nelts
);
281 if (VECTOR_MODE_P (TYPE_MODE (vector_type
)))
283 vec_perm_builder
sel1 (nelts
, 2, 3);
284 vec_perm_builder
sel2 (nelts
, 2, 3);
285 poly_int64 half_nelts
= exact_div (nelts
, 2);
286 for (unsigned int i
= 0; i
< 3; ++i
)
289 sel1
.quick_push (i
+ nelts
);
290 sel2
.quick_push (half_nelts
+ i
);
291 sel2
.quick_push (half_nelts
+ i
+ nelts
);
293 vec_perm_indices
indices1 (sel1
, 2, nelts
);
294 vec_perm_indices
indices2 (sel2
, 2, nelts
);
295 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
296 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
299 *nvectors_out
= nvectors
;
301 *vector_type_out
= vector_type
;
304 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
306 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
313 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
319 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
320 they are of a valid type and that they match the defs of the first stmt of
321 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
322 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
323 indicates swap is required for cond_expr stmts. Specifically, *SWAP
324 is 1 if STMT is cond and operands of comparison need to be swapped;
325 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
326 If there is any operand swap in this function, *SWAP is set to non-zero
328 If there was a fatal error return -1; if the error could be corrected by
329 swapping operands of father node of this one, return 1; if everything is
332 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char *swap
,
333 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
334 vec
<slp_oprnd_info
> *oprnds_info
)
336 stmt_vec_info stmt_info
= stmts
[stmt_num
];
338 unsigned int i
, number_of_oprnds
;
339 enum vect_def_type dt
= vect_uninitialized_def
;
340 slp_oprnd_info oprnd_info
;
341 int first_op_idx
= 1;
342 unsigned int commutative_op
= -1U;
343 bool first_op_cond
= false;
344 bool first
= stmt_num
== 0;
346 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
348 number_of_oprnds
= gimple_call_num_args (stmt
);
350 if (gimple_call_internal_p (stmt
))
352 internal_fn ifn
= gimple_call_internal_fn (stmt
);
353 commutative_op
= first_commutative_argument (ifn
);
355 /* Masked load, only look at mask. */
356 if (ifn
== IFN_MASK_LOAD
)
358 number_of_oprnds
= 1;
359 /* Mask operand index. */
364 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
366 enum tree_code code
= gimple_assign_rhs_code (stmt
);
367 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
368 /* Swap can only be done for cond_expr if asked to, otherwise we
369 could result in different comparison code to the first stmt. */
370 if (code
== COND_EXPR
371 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
373 first_op_cond
= true;
377 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
382 bool swapped
= (*swap
!= 0);
383 gcc_assert (!swapped
|| first_op_cond
);
384 for (i
= 0; i
< number_of_oprnds
; i
++)
389 /* Map indicating how operands of cond_expr should be swapped. */
390 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
391 int *map
= maps
[*swap
];
394 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
395 first_op_idx
), map
[i
]);
397 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
400 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
401 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
402 oprnd
= TREE_OPERAND (oprnd
, 0);
404 oprnd_info
= (*oprnds_info
)[i
];
406 stmt_vec_info def_stmt_info
;
407 if (!vect_is_simple_use (oprnd
, vinfo
, &dt
, &def_stmt_info
))
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
411 "Build SLP failed: can't analyze def for %T\n",
417 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
418 oprnd_info
->any_pattern
= true;
422 /* For the swapping logic below force vect_reduction_def
423 for the reduction op in a SLP reduction group. */
424 if (!STMT_VINFO_DATA_REF (stmt_info
)
425 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
426 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
428 dt
= vect_reduction_def
;
429 oprnd_info
->first_dt
= dt
;
430 oprnd_info
->first_op_type
= TREE_TYPE (oprnd
);
434 /* Not first stmt of the group, check that the def-stmt/s match
435 the def-stmt/s of the first stmt. Allow different definition
436 types for reduction chains: the first stmt must be a
437 vect_reduction_def (a phi node), and the rest
438 end in the reduction chain. */
439 tree type
= TREE_TYPE (oprnd
);
440 if ((oprnd_info
->first_dt
!= dt
441 && !(oprnd_info
->first_dt
== vect_reduction_def
442 && !STMT_VINFO_DATA_REF (stmt_info
)
443 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
445 && !STMT_VINFO_DATA_REF (def_stmt_info
)
446 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
447 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
448 && !((oprnd_info
->first_dt
== vect_external_def
449 || oprnd_info
->first_dt
== vect_constant_def
)
450 && (dt
== vect_external_def
451 || dt
== vect_constant_def
)))
452 || !types_compatible_p (oprnd_info
->first_op_type
, type
)
453 || (!STMT_VINFO_DATA_REF (stmt_info
)
454 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
456 || STMT_VINFO_DATA_REF (def_stmt_info
)
457 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
458 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
459 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
461 /* Try swapping operands if we got a mismatch. */
462 if (i
== commutative_op
&& !swapped
)
464 if (dump_enabled_p ())
465 dump_printf_loc (MSG_NOTE
, vect_location
,
466 "trying swapped operands\n");
471 if (dump_enabled_p ())
472 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
473 "Build SLP failed: different types\n");
477 if ((dt
== vect_constant_def
478 || dt
== vect_external_def
)
479 && !vinfo
->vector_size
.is_constant ()
480 && (TREE_CODE (type
) == BOOLEAN_TYPE
481 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
484 if (dump_enabled_p ())
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
486 "Build SLP failed: invalid type of def "
487 "for variable-length SLP %T\n", oprnd
);
492 /* Check the types of the definitions. */
495 case vect_external_def
:
496 /* Make sure to demote the overall operand to external. */
497 oprnd_info
->first_dt
= vect_external_def
;
499 case vect_constant_def
:
500 oprnd_info
->def_stmts
.quick_push (NULL
);
501 oprnd_info
->ops
.quick_push (oprnd
);
504 case vect_internal_def
:
505 case vect_reduction_def
:
506 if (oprnd_info
->first_dt
== vect_reduction_def
507 && !STMT_VINFO_DATA_REF (stmt_info
)
508 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
509 && !STMT_VINFO_DATA_REF (def_stmt_info
)
510 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
511 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
513 /* For a SLP reduction chain we want to duplicate the
514 reduction to each of the chain members. That gets
515 us a sane SLP graph (still the stmts are not 100%
516 correct wrt the initial values). */
518 oprnd_info
->def_stmts
.quick_push (oprnd_info
->def_stmts
[0]);
519 oprnd_info
->ops
.quick_push (oprnd_info
->ops
[0]);
523 case vect_induction_def
:
524 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
525 oprnd_info
->ops
.quick_push (oprnd
);
529 /* FORNOW: Not supported. */
530 if (dump_enabled_p ())
531 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
532 "Build SLP failed: illegal type of def %T\n",
544 /* If there are already uses of this stmt in a SLP instance then
545 we've committed to the operand order and can't swap it. */
546 if (STMT_VINFO_NUM_SLP_USES (stmt_info
) != 0)
548 if (dump_enabled_p ())
549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
550 "Build SLP failed: cannot swap operands of "
551 "shared stmt %G", stmt_info
->stmt
);
555 /* To get rid of this swapping we have to move the stmt code
556 to the SLP tree as well (and gather it here per stmt). */
557 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
558 tree cond
= gimple_assign_rhs1 (stmt
);
559 enum tree_code code
= TREE_CODE (cond
);
564 swap_ssa_operands (stmt
, &TREE_OPERAND (cond
, 0),
565 &TREE_OPERAND (cond
, 1));
566 TREE_SET_CODE (cond
, swap_tree_comparison (code
));
571 swap_ssa_operands (stmt
, gimple_assign_rhs2_ptr (stmt
),
572 gimple_assign_rhs3_ptr (stmt
));
573 if (STMT_VINFO_REDUC_IDX (stmt_info
) == 1)
574 STMT_VINFO_REDUC_IDX (stmt_info
) = 2;
575 else if (STMT_VINFO_REDUC_IDX (stmt_info
) == 2)
576 STMT_VINFO_REDUC_IDX (stmt_info
) = 1;
577 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond
, 0));
578 code
= invert_tree_comparison (TREE_CODE (cond
), honor_nans
);
579 gcc_assert (code
!= ERROR_MARK
);
580 TREE_SET_CODE (cond
, code
);
585 /* Commutative ops need not reflect swapping, ops are in
588 if (dump_enabled_p ())
589 dump_printf_loc (MSG_NOTE
, vect_location
,
590 "swapped operands to match def types in %G",
598 /* Return true if call statements CALL1 and CALL2 are similar enough
599 to be combined into the same SLP group. */
602 compatible_calls_p (gcall
*call1
, gcall
*call2
)
604 unsigned int nargs
= gimple_call_num_args (call1
);
605 if (nargs
!= gimple_call_num_args (call2
))
608 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
611 if (gimple_call_internal_p (call1
))
613 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
614 TREE_TYPE (gimple_call_lhs (call2
))))
616 for (unsigned int i
= 0; i
< nargs
; ++i
)
617 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
618 TREE_TYPE (gimple_call_arg (call2
, i
))))
623 if (!operand_equal_p (gimple_call_fn (call1
),
624 gimple_call_fn (call2
), 0))
627 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
633 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
634 caller's attempt to find the vector type in STMT_INFO with the narrowest
635 element type. Return true if VECTYPE is nonnull and if it is valid
636 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
637 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
638 vect_build_slp_tree. */
641 vect_record_max_nunits (stmt_vec_info stmt_info
, unsigned int group_size
,
642 tree vectype
, poly_uint64
*max_nunits
)
646 if (dump_enabled_p ())
647 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
648 "Build SLP failed: unsupported data-type in %G\n",
650 /* Fatal mismatch. */
654 /* If populating the vector type requires unrolling then fail
655 before adjusting *max_nunits for basic-block vectorization. */
656 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
657 unsigned HOST_WIDE_INT const_nunits
;
658 if (STMT_VINFO_BB_VINFO (stmt_info
)
659 && (!nunits
.is_constant (&const_nunits
)
660 || const_nunits
> group_size
))
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
664 "Build SLP failed: unrolling required "
665 "in basic block SLP\n");
666 /* Fatal mismatch. */
670 /* In case of multiple types we need to detect the smallest type. */
671 vect_update_max_nunits (max_nunits
, vectype
);
675 /* STMTS is a group of GROUP_SIZE SLP statements in which some
676 statements do the same operation as the first statement and in which
677 the others do ALT_STMT_CODE. Return true if we can take one vector
678 of the first operation and one vector of the second and permute them
679 to get the required result. VECTYPE is the type of the vector that
680 would be permuted. */
683 vect_two_operations_perm_ok_p (vec
<stmt_vec_info
> stmts
,
684 unsigned int group_size
, tree vectype
,
685 tree_code alt_stmt_code
)
687 unsigned HOST_WIDE_INT count
;
688 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&count
))
691 vec_perm_builder
sel (count
, count
, 1);
692 for (unsigned int i
= 0; i
< count
; ++i
)
694 unsigned int elt
= i
;
695 gassign
*stmt
= as_a
<gassign
*> (stmts
[i
% group_size
]->stmt
);
696 if (gimple_assign_rhs_code (stmt
) == alt_stmt_code
)
698 sel
.quick_push (elt
);
700 vec_perm_indices
indices (sel
, 2, count
);
701 return can_vec_perm_const_p (TYPE_MODE (vectype
), indices
);
704 /* Verify if the scalar stmts STMTS are isomorphic, require data
705 permutation or are of unsupported types of operation. Return
706 true if they are, otherwise return false and indicate in *MATCHES
707 which stmts are not isomorphic to the first one. If MATCHES[0]
708 is false then this indicates the comparison could not be
709 carried out or the stmts will never be vectorized by SLP.
711 Note COND_EXPR is possibly isomorphic to another one after swapping its
712 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
713 the first stmt by swapping the two operands of comparison; set SWAP[i]
714 to 2 if stmt I is isormorphic to the first stmt by inverting the code
715 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
716 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
719 vect_build_slp_tree_1 (unsigned char *swap
,
720 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
721 poly_uint64
*max_nunits
, bool *matches
,
725 stmt_vec_info first_stmt_info
= stmts
[0];
726 enum tree_code first_stmt_code
= ERROR_MARK
;
727 enum tree_code alt_stmt_code
= ERROR_MARK
;
728 enum tree_code rhs_code
= ERROR_MARK
;
729 enum tree_code first_cond_code
= ERROR_MARK
;
731 bool need_same_oprnds
= false;
732 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
735 machine_mode optab_op2_mode
;
736 machine_mode vec_mode
;
737 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
740 /* For every stmt in NODE find its def stmt/s. */
741 stmt_vec_info stmt_info
;
742 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
744 gimple
*stmt
= stmt_info
->stmt
;
748 if (dump_enabled_p ())
749 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
751 /* Fail to vectorize statements marked as unvectorizable. */
752 if (!STMT_VINFO_VECTORIZABLE (stmt_info
))
754 if (dump_enabled_p ())
755 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
756 "Build SLP failed: unvectorizable statement %G",
758 /* Fatal mismatch. */
763 lhs
= gimple_get_lhs (stmt
);
764 if (lhs
== NULL_TREE
)
766 if (dump_enabled_p ())
767 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
768 "Build SLP failed: not GIMPLE_ASSIGN nor "
769 "GIMPLE_CALL %G", stmt
);
770 /* Fatal mismatch. */
776 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
779 && !vect_record_max_nunits (stmt_info
, group_size
,
780 nunits_vectype
, max_nunits
)))
782 /* Fatal mismatch. */
787 gcc_assert (vectype
);
789 if (gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
))
791 rhs_code
= CALL_EXPR
;
793 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
795 else if ((gimple_call_internal_p (call_stmt
)
796 && (!vectorizable_internal_fn_p
797 (gimple_call_internal_fn (call_stmt
))))
798 || gimple_call_tail_p (call_stmt
)
799 || gimple_call_noreturn_p (call_stmt
)
800 || !gimple_call_nothrow_p (call_stmt
)
801 || gimple_call_chain (call_stmt
))
803 if (dump_enabled_p ())
804 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
805 "Build SLP failed: unsupported call type %G",
807 /* Fatal mismatch. */
814 rhs_code
= gimple_assign_rhs_code (stmt
);
815 load_p
= TREE_CODE_CLASS (rhs_code
) == tcc_reference
;
818 /* Check the operation. */
821 first_stmt_code
= rhs_code
;
823 /* Shift arguments should be equal in all the packed stmts for a
824 vector shift with scalar shift operand. */
825 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
826 || rhs_code
== LROTATE_EXPR
827 || rhs_code
== RROTATE_EXPR
)
829 if (vectype
== boolean_type_node
)
831 if (dump_enabled_p ())
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
833 "Build SLP failed: shift of a"
835 /* Fatal mismatch. */
840 vec_mode
= TYPE_MODE (vectype
);
842 /* First see if we have a vector/vector shift. */
843 optab
= optab_for_tree_code (rhs_code
, vectype
,
847 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
849 /* No vector/vector shift, try for a vector/scalar shift. */
850 optab
= optab_for_tree_code (rhs_code
, vectype
,
855 if (dump_enabled_p ())
856 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
857 "Build SLP failed: no optab.\n");
858 /* Fatal mismatch. */
862 icode
= (int) optab_handler (optab
, vec_mode
);
863 if (icode
== CODE_FOR_nothing
)
865 if (dump_enabled_p ())
866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
868 "op not supported by target.\n");
869 /* Fatal mismatch. */
873 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
874 if (!VECTOR_MODE_P (optab_op2_mode
))
876 need_same_oprnds
= true;
877 first_op1
= gimple_assign_rhs2 (stmt
);
881 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
883 need_same_oprnds
= true;
884 first_op1
= gimple_assign_rhs2 (stmt
);
889 if (first_stmt_code
!= rhs_code
890 && alt_stmt_code
== ERROR_MARK
)
891 alt_stmt_code
= rhs_code
;
892 if (first_stmt_code
!= rhs_code
893 && (first_stmt_code
!= IMAGPART_EXPR
894 || rhs_code
!= REALPART_EXPR
)
895 && (first_stmt_code
!= REALPART_EXPR
896 || rhs_code
!= IMAGPART_EXPR
)
897 /* Handle mismatches in plus/minus by computing both
898 and merging the results. */
899 && !((first_stmt_code
== PLUS_EXPR
900 || first_stmt_code
== MINUS_EXPR
)
901 && (alt_stmt_code
== PLUS_EXPR
902 || alt_stmt_code
== MINUS_EXPR
)
903 && rhs_code
== alt_stmt_code
)
904 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
905 && (first_stmt_code
== ARRAY_REF
906 || first_stmt_code
== BIT_FIELD_REF
907 || first_stmt_code
== INDIRECT_REF
908 || first_stmt_code
== COMPONENT_REF
909 || first_stmt_code
== MEM_REF
)))
911 if (dump_enabled_p ())
913 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
914 "Build SLP failed: different operation "
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
917 "original stmt %G", first_stmt_info
->stmt
);
924 && !operand_equal_p (first_op1
, gimple_assign_rhs2 (stmt
), 0))
926 if (dump_enabled_p ())
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
928 "Build SLP failed: different shift "
929 "arguments in %G", stmt
);
934 if (!load_p
&& rhs_code
== CALL_EXPR
)
936 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
937 as_a
<gcall
*> (stmt
)))
939 if (dump_enabled_p ())
940 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
941 "Build SLP failed: different calls in %G",
949 /* Grouped store or load. */
950 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
952 if (REFERENCE_CLASS_P (lhs
))
960 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
963 /* Check that there are no loads from different interleaving
964 chains in the same node. */
965 if (prev_first_load
!= first_load
)
967 if (dump_enabled_p ())
968 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
970 "Build SLP failed: different "
971 "interleaving chains in one node %G",
978 prev_first_load
= first_load
;
980 } /* Grouped access. */
985 /* Not grouped load. */
986 if (dump_enabled_p ())
987 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
988 "Build SLP failed: not grouped load %G", stmt
);
990 /* FORNOW: Not grouped loads are not supported. */
991 /* Fatal mismatch. */
996 /* Not memory operation. */
997 if (TREE_CODE_CLASS (rhs_code
) != tcc_binary
998 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
999 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1000 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1001 && rhs_code
!= VIEW_CONVERT_EXPR
1002 && rhs_code
!= CALL_EXPR
)
1004 if (dump_enabled_p ())
1005 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1006 "Build SLP failed: operation unsupported %G",
1008 /* Fatal mismatch. */
1013 if (rhs_code
== COND_EXPR
)
1015 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1016 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1017 enum tree_code swap_code
= ERROR_MARK
;
1018 enum tree_code invert_code
= ERROR_MARK
;
1021 first_cond_code
= TREE_CODE (cond_expr
);
1022 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1024 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1025 swap_code
= swap_tree_comparison (cond_code
);
1026 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1029 if (first_cond_code
== cond_code
)
1031 /* Isomorphic can be achieved by swapping. */
1032 else if (first_cond_code
== swap_code
)
1034 /* Isomorphic can be achieved by inverting. */
1035 else if (first_cond_code
== invert_code
)
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1041 "Build SLP failed: different"
1042 " operation %G", stmt
);
1052 for (i
= 0; i
< group_size
; ++i
)
1056 /* If we allowed a two-operation SLP node verify the target can cope
1057 with the permute we are going to use. */
1058 if (alt_stmt_code
!= ERROR_MARK
1059 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1061 if (vectype
== boolean_type_node
1062 || !vect_two_operations_perm_ok_p (stmts
, group_size
,
1063 vectype
, alt_stmt_code
))
1065 for (i
= 0; i
< group_size
; ++i
)
1066 if (gimple_assign_rhs_code (stmts
[i
]->stmt
) == alt_stmt_code
)
1069 if (dump_enabled_p ())
1071 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1072 "Build SLP failed: different operation "
1073 "in stmt %G", stmts
[i
]->stmt
);
1074 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1075 "original stmt %G", first_stmt_info
->stmt
);
1080 *two_operators
= true;
1086 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1087 Note we never remove apart from at destruction time so we do not
1088 need a special value for deleted that differs from empty. */
1091 typedef vec
<stmt_vec_info
> value_type
;
1092 typedef vec
<stmt_vec_info
> compare_type
;
1093 static inline hashval_t
hash (value_type
);
1094 static inline bool equal (value_type existing
, value_type candidate
);
1095 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1096 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1097 static inline void mark_empty (value_type
&x
) { x
.release (); }
1098 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1099 static inline void remove (value_type
&x
) { x
.release (); }
1102 bst_traits::hash (value_type x
)
1105 for (unsigned i
= 0; i
< x
.length (); ++i
)
1106 h
.add_int (gimple_uid (x
[i
]->stmt
));
1110 bst_traits::equal (value_type existing
, value_type candidate
)
1112 if (existing
.length () != candidate
.length ())
1114 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1115 if (existing
[i
] != candidate
[i
])
1120 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1121 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1122 scalar_stmts_to_slp_tree_map_t
;
1125 vect_build_slp_tree_2 (vec_info
*vinfo
,
1126 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1127 poly_uint64
*max_nunits
,
1128 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1129 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1132 vect_build_slp_tree (vec_info
*vinfo
,
1133 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1134 poly_uint64
*max_nunits
,
1135 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1136 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1138 if (slp_tree
*leader
= bst_map
->get (stmts
))
1140 if (dump_enabled_p ())
1141 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1142 *leader
? "" : "failed ", *leader
);
1145 (*leader
)->refcnt
++;
1146 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1150 poly_uint64 this_max_nunits
= 1;
1151 slp_tree res
= vect_build_slp_tree_2 (vinfo
, stmts
, group_size
, max_nunits
,
1152 matches
, npermutes
, tree_size
, bst_map
);
1155 res
->max_nunits
= this_max_nunits
;
1156 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1157 /* Keep a reference for the bst_map use. */
1160 bst_map
->put (stmts
.copy (), res
);
1164 /* Recursively build an SLP tree starting from NODE.
1165 Fail (and return a value not equal to zero) if def-stmts are not
1166 isomorphic, require data permutation or are of unsupported types of
1167 operation. Otherwise, return 0.
1168 The value returned is the depth in the SLP tree where a mismatch
1172 vect_build_slp_tree_2 (vec_info
*vinfo
,
1173 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1174 poly_uint64
*max_nunits
,
1175 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1176 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1178 unsigned nops
, i
, this_tree_size
= 0;
1179 poly_uint64 this_max_nunits
= *max_nunits
;
1184 stmt_vec_info stmt_info
= stmts
[0];
1185 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1186 nops
= gimple_call_num_args (stmt
);
1187 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1189 nops
= gimple_num_ops (stmt
) - 1;
1190 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1193 else if (is_a
<gphi
*> (stmt_info
->stmt
))
1198 /* If the SLP node is a PHI (induction or reduction), terminate
1200 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1202 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1203 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
1204 if (!vect_record_max_nunits (stmt_info
, group_size
, vectype
, max_nunits
))
1207 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1208 /* Induction from different IVs is not supported. */
1209 if (def_type
== vect_induction_def
)
1211 stmt_vec_info other_info
;
1212 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1213 if (stmt_info
!= other_info
)
1216 else if (def_type
== vect_reduction_def
1217 || def_type
== vect_double_reduction_def
1218 || def_type
== vect_nested_cycle
)
1220 /* Else def types have to match. */
1221 stmt_vec_info other_info
;
1222 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1223 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1229 node
= vect_create_new_slp_node (stmts
);
1234 bool two_operators
= false;
1235 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1236 if (!vect_build_slp_tree_1 (swap
, stmts
, group_size
,
1237 &this_max_nunits
, matches
, &two_operators
))
1240 /* If the SLP node is a load, terminate the recursion unless masked. */
1241 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1242 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1244 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1247 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1252 *max_nunits
= this_max_nunits
;
1254 node
= vect_create_new_slp_node (stmts
);
1259 /* Get at the operands, verifying they are compatible. */
1260 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1261 slp_oprnd_info oprnd_info
;
1262 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1264 int res
= vect_get_and_check_slp_defs (vinfo
, &swap
[i
],
1265 stmts
, i
, &oprnds_info
);
1267 matches
[(res
== -1) ? 0 : i
] = false;
1271 for (i
= 0; i
< group_size
; ++i
)
1274 vect_free_oprnd_info (oprnds_info
);
1278 auto_vec
<slp_tree
, 4> children
;
1280 stmt_info
= stmts
[0];
1282 /* Create SLP_TREE nodes for the definition node/s. */
1283 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1286 unsigned old_tree_size
= this_tree_size
;
1289 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1291 /* COND_EXPR have one too many eventually if the condition
1293 gcc_assert (i
== 3 && nops
== 4);
1297 if (oprnd_info
->first_dt
!= vect_internal_def
1298 && oprnd_info
->first_dt
!= vect_reduction_def
1299 && oprnd_info
->first_dt
!= vect_induction_def
)
1301 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1302 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1303 oprnd_info
->ops
= vNULL
;
1304 children
.safe_push (invnode
);
1308 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1309 group_size
, &this_max_nunits
,
1311 &this_tree_size
, bst_map
)) != NULL
)
1313 /* If we have all children of child built up from scalars then just
1314 throw that away and build it up this node from scalars. */
1315 if (is_a
<bb_vec_info
> (vinfo
)
1316 && !SLP_TREE_CHILDREN (child
).is_empty ()
1317 /* ??? Rejecting patterns this way doesn't work. We'd have to
1318 do extra work to cancel the pattern so the uses see the
1320 && !oprnd_info
->any_pattern
)
1322 slp_tree grandchild
;
1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1325 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1330 this_tree_size
= old_tree_size
;
1331 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1332 vect_free_slp_tree (grandchild
, false);
1333 SLP_TREE_CHILDREN (child
).truncate (0);
1335 if (dump_enabled_p ())
1336 dump_printf_loc (MSG_NOTE
, vect_location
,
1337 "Building parent vector operands from "
1338 "scalars instead\n");
1339 oprnd_info
->def_stmts
= vNULL
;
1340 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1341 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1342 oprnd_info
->ops
= vNULL
;
1344 children
.safe_push (child
);
1349 oprnd_info
->def_stmts
= vNULL
;
1350 children
.safe_push (child
);
1354 /* If the SLP build failed fatally and we analyze a basic-block
1355 simply treat nodes we fail to build as externally defined
1356 (and thus build vectors from the scalar defs).
1357 The cost model will reject outright expensive cases.
1358 ??? This doesn't treat cases where permutation ultimatively
1359 fails (or we don't try permutation below). Ideally we'd
1360 even compute a permutation that will end up with the maximum
1362 if (is_a
<bb_vec_info
> (vinfo
)
1364 /* ??? Rejecting patterns this way doesn't work. We'd have to
1365 do extra work to cancel the pattern so the uses see the
1367 && !is_pattern_stmt_p (stmt_info
)
1368 && !oprnd_info
->any_pattern
)
1370 if (dump_enabled_p ())
1371 dump_printf_loc (MSG_NOTE
, vect_location
,
1372 "Building vector operands from scalars\n");
1374 child
= vect_create_new_slp_node (oprnd_info
->def_stmts
);
1375 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1376 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1377 children
.safe_push (child
);
1378 oprnd_info
->ops
= vNULL
;
1379 oprnd_info
->def_stmts
= vNULL
;
1383 /* If the SLP build for operand zero failed and operand zero
1384 and one can be commutated try that for the scalar stmts
1385 that failed the match. */
1387 /* A first scalar stmt mismatch signals a fatal mismatch. */
1389 /* ??? For COND_EXPRs we can swap the comparison operands
1390 as well as the arms under some constraints. */
1392 && oprnds_info
[1]->first_dt
== vect_internal_def
1393 && is_gimple_assign (stmt_info
->stmt
)
1394 /* Swapping operands for reductions breaks assumptions later on. */
1395 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1396 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1397 /* Do so only if the number of not successful permutes was nor more
1398 than a cut-ff as re-trying the recursive match on
1399 possibly each level of the tree would expose exponential
1403 /* See whether we can swap the matching or the non-matching
1405 bool swap_not_matching
= true;
1408 for (j
= 0; j
< group_size
; ++j
)
1410 if (matches
[j
] != !swap_not_matching
)
1412 stmt_vec_info stmt_info
= stmts
[j
];
1413 /* Verify if we can swap operands of this stmt. */
1414 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1416 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1418 if (!swap_not_matching
)
1420 swap_not_matching
= false;
1425 while (j
!= group_size
);
1427 /* Swap mismatched definition stmts. */
1428 if (dump_enabled_p ())
1429 dump_printf_loc (MSG_NOTE
, vect_location
,
1430 "Re-trying with swapped operands of stmts ");
1431 for (j
= 0; j
< group_size
; ++j
)
1432 if (matches
[j
] == !swap_not_matching
)
1434 std::swap (oprnds_info
[0]->def_stmts
[j
],
1435 oprnds_info
[1]->def_stmts
[j
]);
1436 std::swap (oprnds_info
[0]->ops
[j
],
1437 oprnds_info
[1]->ops
[j
]);
1438 if (dump_enabled_p ())
1439 dump_printf (MSG_NOTE
, "%d ", j
);
1441 if (dump_enabled_p ())
1442 dump_printf (MSG_NOTE
, "\n");
1443 /* And try again with scratch 'matches' ... */
1444 bool *tem
= XALLOCAVEC (bool, group_size
);
1445 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1446 group_size
, &this_max_nunits
,
1448 &this_tree_size
, bst_map
)) != NULL
)
1450 /* If we have all children of child built up from scalars then
1451 just throw that away and build it up this node from scalars. */
1452 if (is_a
<bb_vec_info
> (vinfo
)
1453 && !SLP_TREE_CHILDREN (child
).is_empty ()
1454 /* ??? Rejecting patterns this way doesn't work. We'd have
1455 to do extra work to cancel the pattern so the uses see the
1457 && !oprnd_info
->any_pattern
)
1460 slp_tree grandchild
;
1462 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1463 if (SLP_TREE_DEF_TYPE (grandchild
) != vect_external_def
)
1468 this_tree_size
= old_tree_size
;
1469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child
), j
, grandchild
)
1470 vect_free_slp_tree (grandchild
, false);
1471 SLP_TREE_CHILDREN (child
).truncate (0);
1473 if (dump_enabled_p ())
1474 dump_printf_loc (MSG_NOTE
, vect_location
,
1475 "Building parent vector operands from "
1476 "scalars instead\n");
1477 oprnd_info
->def_stmts
= vNULL
;
1478 SLP_TREE_DEF_TYPE (child
) = vect_external_def
;
1479 SLP_TREE_SCALAR_OPS (child
) = oprnd_info
->ops
;
1480 oprnd_info
->ops
= vNULL
;
1482 children
.safe_push (child
);
1487 oprnd_info
->def_stmts
= vNULL
;
1488 children
.safe_push (child
);
1496 gcc_assert (child
== NULL
);
1497 FOR_EACH_VEC_ELT (children
, j
, child
)
1498 vect_free_slp_tree (child
, false);
1499 vect_free_oprnd_info (oprnds_info
);
1503 vect_free_oprnd_info (oprnds_info
);
1505 *tree_size
+= this_tree_size
+ 1;
1506 *max_nunits
= this_max_nunits
;
1508 node
= vect_create_new_slp_node (stmts
);
1509 SLP_TREE_TWO_OPERATORS (node
) = two_operators
;
1510 SLP_TREE_CHILDREN (node
).splice (children
);
1514 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1517 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1518 slp_tree node
, hash_set
<slp_tree
> &visited
)
1521 stmt_vec_info stmt_info
;
1525 if (visited
.add (node
))
1528 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1529 dump_user_location_t user_loc
= loc
.get_user_location ();
1530 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u)\n",
1531 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1533 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1536 estimated_poly_value (node
->max_nunits
));
1537 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1538 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1539 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1542 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1543 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1544 dump_printf (metadata
, "%T%s ", op
,
1545 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1546 dump_printf (metadata
, "}\n");
1548 if (SLP_TREE_CHILDREN (node
).is_empty ())
1550 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1552 dump_printf (dump_kind
, " %p", (void *)child
);
1553 dump_printf (dump_kind
, "\n");
1554 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1555 vect_print_slp_tree (dump_kind
, loc
, child
, visited
);
1559 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1562 hash_set
<slp_tree
> visited
;
1563 vect_print_slp_tree (dump_kind
, loc
, node
, visited
);
1566 /* Mark the tree rooted at NODE with PURE_SLP. */
1569 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
1572 stmt_vec_info stmt_info
;
1575 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1578 if (visited
.add (node
))
1581 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1582 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
1584 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1585 vect_mark_slp_stmts (child
, visited
);
1589 vect_mark_slp_stmts (slp_tree node
)
1591 hash_set
<slp_tree
> visited
;
1592 vect_mark_slp_stmts (node
, visited
);
1595 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1598 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
1601 stmt_vec_info stmt_info
;
1604 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1607 if (visited
.add (node
))
1610 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1612 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
1613 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
1614 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
1617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1618 vect_mark_slp_stmts_relevant (child
, visited
);
1622 vect_mark_slp_stmts_relevant (slp_tree node
)
1624 hash_set
<slp_tree
> visited
;
1625 vect_mark_slp_stmts_relevant (node
, visited
);
1629 /* Rearrange the statements of NODE according to PERMUTATION. */
1632 vect_slp_rearrange_stmts (slp_tree node
, unsigned int group_size
,
1633 vec
<unsigned> permutation
,
1634 hash_set
<slp_tree
> &visited
)
1639 if (visited
.add (node
))
1642 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1643 vect_slp_rearrange_stmts (child
, group_size
, permutation
, visited
);
1645 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1647 gcc_assert (group_size
== SLP_TREE_SCALAR_STMTS (node
).length ());
1648 vec
<stmt_vec_info
> tmp_stmts
;
1649 tmp_stmts
.create (group_size
);
1650 tmp_stmts
.quick_grow (group_size
);
1651 stmt_vec_info stmt_info
;
1652 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1653 tmp_stmts
[permutation
[i
]] = stmt_info
;
1654 SLP_TREE_SCALAR_STMTS (node
).release ();
1655 SLP_TREE_SCALAR_STMTS (node
) = tmp_stmts
;
1657 if (SLP_TREE_SCALAR_OPS (node
).exists ())
1659 gcc_assert (group_size
== SLP_TREE_SCALAR_OPS (node
).length ());
1661 tmp_ops
.create (group_size
);
1662 tmp_ops
.quick_grow (group_size
);
1664 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1665 tmp_ops
[permutation
[i
]] = op
;
1666 SLP_TREE_SCALAR_OPS (node
).release ();
1667 SLP_TREE_SCALAR_OPS (node
) = tmp_ops
;
1672 /* Attempt to reorder stmts in a reduction chain so that we don't
1673 require any load permutation. Return true if that was possible,
1674 otherwise return false. */
1677 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn
)
1679 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1682 slp_tree node
, load
;
1684 /* Compare all the permutation sequences to the first one. We know
1685 that at least one load is permuted. */
1686 node
= SLP_INSTANCE_LOADS (slp_instn
)[0];
1687 if (!node
->load_permutation
.exists ())
1689 for (i
= 1; SLP_INSTANCE_LOADS (slp_instn
).iterate (i
, &load
); ++i
)
1691 if (!load
->load_permutation
.exists ())
1693 FOR_EACH_VEC_ELT (load
->load_permutation
, j
, lidx
)
1694 if (lidx
!= node
->load_permutation
[j
])
1698 /* Check that the loads in the first sequence are different and there
1699 are no gaps between them. */
1700 auto_sbitmap
load_index (group_size
);
1701 bitmap_clear (load_index
);
1702 FOR_EACH_VEC_ELT (node
->load_permutation
, i
, lidx
)
1704 if (lidx
>= group_size
)
1706 if (bitmap_bit_p (load_index
, lidx
))
1709 bitmap_set_bit (load_index
, lidx
);
1711 for (i
= 0; i
< group_size
; i
++)
1712 if (!bitmap_bit_p (load_index
, i
))
1715 /* This permutation is valid for reduction. Since the order of the
1716 statements in the nodes is not important unless they are memory
1717 accesses, we can rearrange the statements in all the nodes
1718 according to the order of the loads. */
1719 hash_set
<slp_tree
> visited
;
1720 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn
), group_size
,
1721 node
->load_permutation
, visited
);
1723 /* We are done, no actual permutations need to be generated. */
1724 poly_uint64 unrolling_factor
= SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
);
1725 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1727 stmt_vec_info first_stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1728 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (first_stmt_info
);
1729 /* But we have to keep those permutations that are required because
1730 of handling of gaps. */
1731 if (known_eq (unrolling_factor
, 1U)
1732 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
1733 && DR_GROUP_GAP (first_stmt_info
) == 0))
1734 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1736 for (j
= 0; j
< SLP_TREE_LOAD_PERMUTATION (node
).length (); ++j
)
1737 SLP_TREE_LOAD_PERMUTATION (node
)[j
] = j
;
1743 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1746 vect_gather_slp_loads (slp_instance inst
, slp_tree node
,
1747 hash_set
<slp_tree
> &visited
)
1749 if (visited
.add (node
))
1752 if (SLP_TREE_CHILDREN (node
).length () == 0)
1754 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
1756 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1757 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1758 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1759 SLP_INSTANCE_LOADS (inst
).safe_push (node
);
1765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1766 vect_gather_slp_loads (inst
, child
, visited
);
1771 vect_gather_slp_loads (slp_instance inst
, slp_tree node
)
1773 hash_set
<slp_tree
> visited
;
1774 vect_gather_slp_loads (inst
, node
, visited
);
1777 /* Check if the required load permutations in the SLP instance
1778 SLP_INSTN are supported. */
1781 vect_supported_load_permutation_p (slp_instance slp_instn
)
1783 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_instn
);
1784 unsigned int i
, j
, k
, next
;
1787 if (dump_enabled_p ())
1789 dump_printf_loc (MSG_NOTE
, vect_location
, "Load permutation ");
1790 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1791 if (node
->load_permutation
.exists ())
1792 FOR_EACH_VEC_ELT (node
->load_permutation
, j
, next
)
1793 dump_printf (MSG_NOTE
, "%d ", next
);
1795 for (k
= 0; k
< group_size
; ++k
)
1796 dump_printf (MSG_NOTE
, "%d ", k
);
1797 dump_printf (MSG_NOTE
, "\n");
1800 /* In case of reduction every load permutation is allowed, since the order
1801 of the reduction statements is not important (as opposed to the case of
1802 grouped stores). The only condition we need to check is that all the
1803 load nodes are of the same size and have the same permutation (and then
1804 rearrange all the nodes of the SLP instance according to this
1807 /* Check that all the load nodes are of the same size. */
1808 /* ??? Can't we assert this? */
1809 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1810 if (SLP_TREE_SCALAR_STMTS (node
).length () != (unsigned) group_size
)
1813 node
= SLP_INSTANCE_TREE (slp_instn
);
1814 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1816 /* Reduction (there are no data-refs in the root).
1817 In reduction chain the order of the loads is not important. */
1818 if (!STMT_VINFO_DATA_REF (stmt_info
)
1819 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
1820 vect_attempt_slp_rearrange_stmts (slp_instn
);
1822 /* In basic block vectorization we allow any subchain of an interleaving
1824 FORNOW: not supported in loop SLP because of realignment compications. */
1825 if (STMT_VINFO_BB_VINFO (stmt_info
))
1827 /* Check whether the loads in an instance form a subchain and thus
1828 no permutation is necessary. */
1829 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1831 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1833 bool subchain_p
= true;
1834 stmt_vec_info next_load_info
= NULL
;
1835 stmt_vec_info load_info
;
1836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1839 && (next_load_info
!= load_info
1840 || DR_GROUP_GAP (load_info
) != 1))
1845 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
1848 SLP_TREE_LOAD_PERMUTATION (node
).release ();
1851 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
1852 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
1853 unsigned HOST_WIDE_INT nunits
;
1854 unsigned k
, maxk
= 0;
1855 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), j
, k
)
1858 /* In BB vectorization we may not actually use a loaded vector
1859 accessing elements in excess of DR_GROUP_SIZE. */
1860 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
1861 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
1862 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
1864 if (dump_enabled_p ())
1865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1866 "BB vectorization with gaps at the end of "
1867 "a load is not supported\n");
1871 /* Verify the permutation can be generated. */
1874 if (!vect_transform_slp_perm_load (node
, tem
, NULL
,
1875 1, slp_instn
, true, &n_perms
))
1877 if (dump_enabled_p ())
1878 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1880 "unsupported load permutation\n");
1888 /* For loop vectorization verify we can generate the permutation. Be
1889 conservative about the vectorization factor, there are permutations
1890 that will use three vector inputs only starting from a specific factor
1891 and the vectorization factor is not yet final.
1892 ??? The SLP instance unrolling factor might not be the maximum one. */
1895 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn
),
1896 LOOP_VINFO_VECT_FACTOR
1897 (STMT_VINFO_LOOP_VINFO (stmt_info
)));
1898 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn
), i
, node
)
1899 if (node
->load_permutation
.exists ()
1900 && !vect_transform_slp_perm_load (node
, vNULL
, NULL
, test_vf
,
1901 slp_instn
, true, &n_perms
))
1908 /* Find the last store in SLP INSTANCE. */
1911 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
1913 stmt_vec_info last
= NULL
;
1914 stmt_vec_info stmt_vinfo
;
1916 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
1918 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
1919 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
1925 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1926 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1927 (also containing the first GROUP1_SIZE stmts, since stores are
1928 consecutive), the second containing the remainder.
1929 Return the first stmt in the second group. */
1931 static stmt_vec_info
1932 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
1934 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
1935 gcc_assert (group1_size
> 0);
1936 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
1937 gcc_assert (group2_size
> 0);
1938 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
1940 stmt_vec_info stmt_info
= first_vinfo
;
1941 for (unsigned i
= group1_size
; i
> 1; i
--)
1943 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1944 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1946 /* STMT is now the last element of the first group. */
1947 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
1948 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
1950 DR_GROUP_SIZE (group2
) = group2_size
;
1951 for (stmt_info
= group2
; stmt_info
;
1952 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
1954 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
1955 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
1958 /* For the second group, the DR_GROUP_GAP is that before the original group,
1959 plus skipping over the first vector. */
1960 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
1962 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1963 DR_GROUP_GAP (first_vinfo
) += group2_size
;
1965 if (dump_enabled_p ())
1966 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
1967 group1_size
, group2_size
);
1972 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1973 statements and a vector of NUNITS elements. */
1976 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
1978 return exact_div (common_multiple (nunits
, group_size
), group_size
);
1981 /* Analyze an SLP instance starting from a group of grouped stores. Call
1982 vect_build_slp_tree to build a tree of packed stmts if possible.
1983 Return FALSE if it's impossible to SLP any stmt in the loop. */
1986 vect_analyze_slp_instance (vec_info
*vinfo
,
1987 stmt_vec_info stmt_info
, unsigned max_tree_size
)
1989 slp_instance new_instance
;
1991 unsigned int group_size
;
1992 tree vectype
, scalar_type
= NULL_TREE
;
1994 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
1995 vec
<stmt_vec_info
> scalar_stmts
;
1997 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1999 scalar_type
= TREE_TYPE (DR_REF (dr
));
2000 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
2001 group_size
= DR_GROUP_SIZE (stmt_info
);
2003 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2005 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2006 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2007 group_size
= REDUC_GROUP_SIZE (stmt_info
);
2011 gcc_assert (is_a
<loop_vec_info
> (vinfo
));
2012 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2013 group_size
= as_a
<loop_vec_info
> (vinfo
)->reductions
.length ();
2018 if (dump_enabled_p ())
2019 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2020 "Build SLP failed: unsupported data-type %T\n",
2025 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2027 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2028 scalar_stmts
.create (group_size
);
2029 stmt_vec_info next_info
= stmt_info
;
2030 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2032 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2035 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2036 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2039 else if (!dr
&& REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2041 /* Collect the reduction stmts and store them in
2042 SLP_TREE_SCALAR_STMTS. */
2045 scalar_stmts
.safe_push (vect_stmt_to_vectorize (next_info
));
2046 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2048 /* Mark the first element of the reduction chain as reduction to properly
2049 transform the node. In the reduction analysis phase only the last
2050 element of the chain is marked as reduction. */
2051 STMT_VINFO_DEF_TYPE (stmt_info
)
2052 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2053 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2054 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2058 /* Collect reduction statements. */
2059 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2060 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2061 scalar_stmts
.safe_push (next_info
);
2064 /* Build the tree for the SLP instance. */
2065 bool *matches
= XALLOCAVEC (bool, group_size
);
2066 unsigned npermutes
= 0;
2067 scalar_stmts_to_slp_tree_map_t
*bst_map
2068 = new scalar_stmts_to_slp_tree_map_t ();
2069 poly_uint64 max_nunits
= nunits
;
2070 unsigned tree_size
= 0;
2071 node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2072 &max_nunits
, matches
, &npermutes
,
2073 &tree_size
, bst_map
);
2074 /* The map keeps a reference on SLP nodes built, release that. */
2075 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2076 it
!= bst_map
->end (); ++it
)
2078 vect_free_slp_tree ((*it
).second
, false);
2082 /* If this is a reduction chain with a conversion in front
2083 amend the SLP tree with a node for that. */
2085 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
2086 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
)
2088 /* Get at the conversion stmt - we know it's the single use
2089 of the last stmt of the reduction chain. */
2090 gimple
*tem
= vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2091 use_operand_p use_p
;
2093 bool r
= single_imm_use (gimple_assign_lhs (tem
), &use_p
, &use_stmt
);
2095 next_info
= vinfo
->lookup_stmt (use_stmt
);
2096 next_info
= vect_stmt_to_vectorize (next_info
);
2097 scalar_stmts
= vNULL
;
2098 scalar_stmts
.create (group_size
);
2099 for (unsigned i
= 0; i
< group_size
; ++i
)
2100 scalar_stmts
.quick_push (next_info
);
2101 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
);
2102 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2104 /* We also have to fake this conversion stmt as SLP reduction group
2105 so we don't have to mess with too much code elsewhere. */
2106 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2107 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2110 /* Calculate the unrolling factor based on the smallest type. */
2111 poly_uint64 unrolling_factor
2112 = calculate_unrolling_factor (max_nunits
, group_size
);
2114 if (maybe_ne (unrolling_factor
, 1U)
2115 && is_a
<bb_vec_info
> (vinfo
))
2117 unsigned HOST_WIDE_INT const_max_nunits
;
2118 if (!max_nunits
.is_constant (&const_max_nunits
)
2119 || const_max_nunits
> group_size
)
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2123 "Build SLP failed: store group "
2124 "size not a multiple of the vector size "
2125 "in basic block SLP\n");
2126 vect_free_slp_tree (node
, false);
2129 /* Fatal mismatch. */
2130 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2131 vect_free_slp_tree (node
, false);
2135 /* Create a new SLP instance. */
2136 new_instance
= XNEW (class _slp_instance
);
2137 SLP_INSTANCE_TREE (new_instance
) = node
;
2138 SLP_INSTANCE_GROUP_SIZE (new_instance
) = group_size
;
2139 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2140 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2141 vect_gather_slp_loads (new_instance
, node
);
2142 if (dump_enabled_p ())
2143 dump_printf_loc (MSG_NOTE
, vect_location
,
2144 "SLP size %u vs. limit %u.\n",
2145 tree_size
, max_tree_size
);
2147 /* Compute the load permutation. */
2149 bool loads_permuted
= false;
2150 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2152 vec
<unsigned> load_permutation
;
2154 stmt_vec_info load_info
;
2155 bool this_load_permuted
= false;
2156 load_permutation
.create (group_size
);
2157 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT
2158 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2159 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node
), j
, load_info
)
2161 int load_place
= vect_get_place_in_interleaving_chain
2162 (load_info
, first_stmt_info
);
2163 gcc_assert (load_place
!= -1);
2164 if (load_place
!= j
)
2165 this_load_permuted
= true;
2166 load_permutation
.safe_push (load_place
);
2168 if (!this_load_permuted
2169 /* The load requires permutation when unrolling exposes
2170 a gap either because the group is larger than the SLP
2171 group-size or because there is a gap between the groups. */
2172 && (known_eq (unrolling_factor
, 1U)
2173 || (group_size
== DR_GROUP_SIZE (first_stmt_info
)
2174 && DR_GROUP_GAP (first_stmt_info
) == 0)))
2176 load_permutation
.release ();
2179 SLP_TREE_LOAD_PERMUTATION (load_node
) = load_permutation
;
2180 loads_permuted
= true;
2185 if (!vect_supported_load_permutation_p (new_instance
))
2187 if (dump_enabled_p ())
2188 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2189 "Build SLP failed: unsupported load "
2190 "permutation %G", stmt_info
->stmt
);
2191 vect_free_slp_instance (new_instance
, false);
2196 /* If the loads and stores can be handled with load/store-lan
2197 instructions do not generate this SLP instance. */
2198 if (is_a
<loop_vec_info
> (vinfo
)
2200 && dr
&& vect_store_lanes_supported (vectype
, group_size
, false))
2203 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance
), i
, load_node
)
2205 stmt_vec_info stmt_vinfo
= DR_GROUP_FIRST_ELEMENT
2206 (SLP_TREE_SCALAR_STMTS (load_node
)[0]);
2207 /* Use SLP for strided accesses (or if we can't load-lanes). */
2208 if (STMT_VINFO_STRIDED_P (stmt_vinfo
)
2209 || ! vect_load_lanes_supported
2210 (STMT_VINFO_VECTYPE (stmt_vinfo
),
2211 DR_GROUP_SIZE (stmt_vinfo
), false))
2214 if (i
== SLP_INSTANCE_LOADS (new_instance
).length ())
2216 if (dump_enabled_p ())
2217 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2218 "Built SLP cancelled: can use "
2219 "load/store-lanes\n");
2220 vect_free_slp_instance (new_instance
, false);
2225 vinfo
->slp_instances
.safe_push (new_instance
);
2227 if (dump_enabled_p ())
2229 dump_printf_loc (MSG_NOTE
, vect_location
,
2230 "Final SLP tree for instance:\n");
2231 vect_print_slp_tree (MSG_NOTE
, vect_location
, node
);
2239 /* Failed to SLP. */
2240 /* Free the allocated memory. */
2241 scalar_stmts
.release ();
2244 /* For basic block SLP, try to break the group up into multiples of the
2246 unsigned HOST_WIDE_INT const_nunits
;
2247 if (is_a
<bb_vec_info
> (vinfo
)
2248 && STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2249 && DR_GROUP_FIRST_ELEMENT (stmt_info
)
2250 && nunits
.is_constant (&const_nunits
))
2252 /* We consider breaking the group only on VF boundaries from the existing
2254 for (i
= 0; i
< group_size
; i
++)
2255 if (!matches
[i
]) break;
2257 if (i
>= const_nunits
&& i
< group_size
)
2259 /* Split into two groups at the first vector boundary before i. */
2260 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2261 unsigned group1_size
= i
& ~(const_nunits
- 1);
2263 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2265 bool res
= vect_analyze_slp_instance (vinfo
, stmt_info
,
2267 /* If the first non-match was in the middle of a vector,
2268 skip the rest of that vector. */
2269 if (group1_size
< i
)
2271 i
= group1_size
+ const_nunits
;
2273 rest
= vect_split_slp_store_group (rest
, const_nunits
);
2276 res
|= vect_analyze_slp_instance (vinfo
, rest
, max_tree_size
);
2279 /* Even though the first vector did not all match, we might be able to SLP
2280 (some) of the remainder. FORNOW ignore this possibility. */
2287 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2288 trees of packed scalar stmts if SLP is possible. */
2291 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2294 stmt_vec_info first_element
;
2296 DUMP_VECT_SCOPE ("vect_analyze_slp");
2298 /* Find SLP sequences starting from groups of grouped stores. */
2299 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2300 vect_analyze_slp_instance (vinfo
, first_element
, max_tree_size
);
2302 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2304 if (loop_vinfo
->reduction_chains
.length () > 0)
2306 /* Find SLP sequences starting from reduction chains. */
2307 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2308 if (! vect_analyze_slp_instance (vinfo
, first_element
,
2311 /* Dissolve reduction chain group. */
2312 stmt_vec_info vinfo
= first_element
;
2313 stmt_vec_info last
= NULL
;
2316 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2317 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2318 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2322 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2323 /* It can be still vectorized as part of an SLP reduction. */
2324 loop_vinfo
->reductions
.safe_push (last
);
2328 /* Find SLP sequences starting from groups of reductions. */
2329 if (loop_vinfo
->reductions
.length () > 1)
2330 vect_analyze_slp_instance (vinfo
, loop_vinfo
->reductions
[0],
2334 return opt_result::success ();
2338 /* For each possible SLP instance decide whether to SLP it and calculate overall
2339 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2340 least one instance. */
2343 vect_make_slp_decision (loop_vec_info loop_vinfo
)
2346 poly_uint64 unrolling_factor
= 1;
2347 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2348 slp_instance instance
;
2349 int decided_to_slp
= 0;
2351 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2353 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2355 /* FORNOW: SLP if you can. */
2356 /* All unroll factors have the form vinfo->vector_size * X for some
2357 rational X, so they must have a common multiple. */
2359 = force_common_multiple (unrolling_factor
,
2360 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
2362 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2363 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2364 loop-based vectorization. Such stmts will be marked as HYBRID. */
2365 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
2369 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
2371 if (decided_to_slp
&& dump_enabled_p ())
2373 dump_printf_loc (MSG_NOTE
, vect_location
,
2374 "Decided to SLP %d instances. Unrolling factor ",
2376 dump_dec (MSG_NOTE
, unrolling_factor
);
2377 dump_printf (MSG_NOTE
, "\n");
2380 return (decided_to_slp
> 0);
2384 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2385 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2388 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
,
2389 hash_map
<slp_tree
, unsigned> &visited
)
2391 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (node
)[i
];
2392 imm_use_iterator imm_iter
;
2394 stmt_vec_info use_vinfo
;
2396 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2399 /* We need to union stype over the incoming graph edges but we still
2400 want to limit recursion to stay O(N+E). */
2401 bool only_edge
= (++visited
.get_or_insert (node
) < node
->refcnt
);
2403 /* Propagate hybrid down the SLP tree. */
2404 if (stype
== hybrid
)
2406 else if (HYBRID_SLP_STMT (stmt_vinfo
))
2408 else if (!only_edge
)
2410 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2411 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo
));
2412 /* If we get a pattern stmt here we have to use the LHS of the
2413 original stmt for immediate uses. */
2414 gimple
*stmt
= vect_orig_stmt (stmt_vinfo
)->stmt
;
2416 if (gimple_code (stmt
) == GIMPLE_PHI
)
2417 def
= gimple_phi_result (stmt
);
2419 def
= SINGLE_SSA_TREE_OPERAND (stmt
, SSA_OP_DEF
);
2421 FOR_EACH_IMM_USE_STMT (use_stmt
, imm_iter
, def
)
2423 use_vinfo
= loop_vinfo
->lookup_stmt (use_stmt
);
2426 use_vinfo
= vect_stmt_to_vectorize (use_vinfo
);
2427 if (!STMT_SLP_TYPE (use_vinfo
)
2428 && (STMT_VINFO_RELEVANT (use_vinfo
)
2429 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2430 && !(gimple_code (use_stmt
) == GIMPLE_PHI
2431 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_NOTE
, vect_location
, "use of SLP "
2435 "def in non-SLP stmt: %G", use_stmt
);
2442 && !HYBRID_SLP_STMT (stmt_vinfo
))
2444 if (dump_enabled_p ())
2445 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2447 STMT_SLP_TYPE (stmt_vinfo
) = hybrid
;
2451 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2452 if (SLP_TREE_DEF_TYPE (child
) != vect_external_def
2453 && SLP_TREE_DEF_TYPE (child
) != vect_constant_def
)
2454 vect_detect_hybrid_slp_stmts (child
, i
, stype
, visited
);
2458 vect_detect_hybrid_slp_stmts (slp_tree node
, unsigned i
, slp_vect_type stype
)
2460 hash_map
<slp_tree
, unsigned> visited
;
2461 vect_detect_hybrid_slp_stmts (node
, i
, stype
, visited
);
2464 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2467 vect_detect_hybrid_slp_1 (tree
*tp
, int *, void *data
)
2469 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
2470 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2475 stmt_vec_info def_stmt_info
= loop_vinfo
->lookup_def (*tp
);
2476 if (def_stmt_info
&& PURE_SLP_STMT (def_stmt_info
))
2478 if (dump_enabled_p ())
2479 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
2480 def_stmt_info
->stmt
);
2481 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
2488 vect_detect_hybrid_slp_2 (gimple_stmt_iterator
*gsi
, bool *handled
,
2491 loop_vec_info loop_vinfo
= (loop_vec_info
) wi
->info
;
2492 stmt_vec_info use_vinfo
= loop_vinfo
->lookup_stmt (gsi_stmt (*gsi
));
2493 /* If the stmt is in a SLP instance then this isn't a reason
2494 to mark use definitions in other SLP instances as hybrid. */
2495 if (! STMT_SLP_TYPE (use_vinfo
)
2496 && (STMT_VINFO_RELEVANT (use_vinfo
)
2497 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo
)))
2498 && ! (gimple_code (gsi_stmt (*gsi
)) == GIMPLE_PHI
2499 && STMT_VINFO_DEF_TYPE (use_vinfo
) == vect_reduction_def
))
2506 /* Find stmts that must be both vectorized and SLPed. */
2509 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
2512 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
2513 slp_instance instance
;
2515 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2517 /* First walk all pattern stmt in the loop and mark defs of uses as
2518 hybrid because immediate uses in them are not recorded. */
2519 for (i
= 0; i
< LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
; ++i
)
2521 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
2522 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
);
2525 gimple
*stmt
= gsi_stmt (gsi
);
2526 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
2527 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
2530 memset (&wi
, 0, sizeof (wi
));
2531 wi
.info
= loop_vinfo
;
2532 gimple_stmt_iterator gsi2
2533 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
)->stmt
);
2534 walk_gimple_stmt (&gsi2
, vect_detect_hybrid_slp_2
,
2535 vect_detect_hybrid_slp_1
, &wi
);
2536 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
2537 vect_detect_hybrid_slp_2
,
2538 vect_detect_hybrid_slp_1
, &wi
);
2543 /* Then walk the SLP instance trees marking stmts with uses in
2544 non-SLP stmts as hybrid, also propagating hybrid down the
2545 SLP tree, collecting the above info on-the-fly. */
2546 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2548 for (unsigned i
= 0; i
< SLP_INSTANCE_GROUP_SIZE (instance
); ++i
)
2549 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance
),
2555 /* Initialize a bb_vec_info struct for the statements between
2556 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2558 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in
,
2559 gimple_stmt_iterator region_end_in
,
2560 vec_info_shared
*shared
)
2561 : vec_info (vec_info::bb
, init_cost (NULL
), shared
),
2562 bb (gsi_bb (region_begin_in
)),
2563 region_begin (region_begin_in
),
2564 region_end (region_end_in
)
2566 gimple_stmt_iterator gsi
;
2568 for (gsi
= region_begin
; gsi_stmt (gsi
) != gsi_stmt (region_end
);
2571 gimple
*stmt
= gsi_stmt (gsi
);
2572 gimple_set_uid (stmt
, 0);
2580 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2581 stmts in the basic block. */
2583 _bb_vec_info::~_bb_vec_info ()
2585 for (gimple_stmt_iterator si
= region_begin
;
2586 gsi_stmt (si
) != gsi_stmt (region_end
); gsi_next (&si
))
2587 /* Reset region marker. */
2588 gimple_set_uid (gsi_stmt (si
), -1);
2593 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2594 given then that child nodes have already been processed, and that
2595 their def types currently match their SLP node's def type. */
2598 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
2599 slp_instance node_instance
,
2600 stmt_vector_for_cost
*cost_vec
)
2602 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2603 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
2605 /* For BB vectorization vector types are assigned here.
2606 Memory accesses already got their vector type assigned
2607 in vect_analyze_data_refs. */
2608 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2610 && ! STMT_VINFO_DATA_REF (stmt_info
))
2612 tree vectype
, nunits_vectype
;
2613 if (!vect_get_vector_types_for_stmt (stmt_info
, &vectype
,
2615 /* We checked this when building the node. */
2617 if (vectype
== boolean_type_node
)
2619 vectype
= vect_get_mask_type_for_stmt (stmt_info
);
2621 /* vect_get_mask_type_for_stmt has already explained the
2626 stmt_vec_info sstmt_info
;
2628 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, sstmt_info
)
2629 STMT_VINFO_VECTYPE (sstmt_info
) = vectype
;
2632 /* Calculate the number of vector statements to be created for the
2633 scalar stmts in this node. For SLP reductions it is equal to the
2634 number of vector statements in the children (which has already been
2635 calculated by the recursive call). Otherwise it is the number of
2636 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2637 VF divided by the number of elements in a vector. */
2638 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2639 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
2641 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
2642 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
2644 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2645 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
2652 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2653 vf
= loop_vinfo
->vectorization_factor
;
2656 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (node_instance
);
2657 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2658 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2659 = vect_get_num_vectors (vf
* group_size
, vectype
);
2663 return vect_analyze_stmt (stmt_info
, &dummy
, node
, node_instance
, cost_vec
);
2666 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2667 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2669 Return true if the operations are supported. */
2672 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
2673 slp_instance node_instance
,
2674 scalar_stmts_to_slp_tree_map_t
*visited
,
2675 scalar_stmts_to_slp_tree_map_t
*lvisited
,
2676 stmt_vector_for_cost
*cost_vec
)
2681 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2684 /* If we already analyzed the exact same set of scalar stmts we're done.
2685 We share the generated vector stmts for those. */
2687 if ((leader
= visited
->get (SLP_TREE_SCALAR_STMTS (node
)))
2688 || (leader
= lvisited
->get (SLP_TREE_SCALAR_STMTS (node
))))
2690 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
2691 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader
);
2695 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2696 doesn't result in any issue since we throw away the lvisited set
2698 lvisited
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
2700 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2701 if (!vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
2702 visited
, lvisited
, cost_vec
))
2705 /* ??? We have to catch the case late where two first scalar stmts appear
2706 in multiple SLP children with different def type and fail. Remember
2707 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2708 match it when that is vect_internal_def. */
2709 auto_vec
<vect_def_type
, 4> dt
;
2710 dt
.safe_grow (SLP_TREE_CHILDREN (node
).length ());
2711 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2712 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2713 dt
[j
] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]);
2715 /* Push SLP node def-type to stmt operands. */
2716 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2717 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
2718 && SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2719 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2720 = SLP_TREE_DEF_TYPE (child
);
2722 /* Check everything worked out. */
2724 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2725 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2727 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
2729 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2730 != SLP_TREE_DEF_TYPE (child
))
2733 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0])
2737 if (!res
&& dump_enabled_p ())
2738 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2739 "not vectorized: same operand with different "
2740 "def type in stmt.\n");
2743 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
2746 /* Restore def-types. */
2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2748 if (SLP_TREE_SCALAR_STMTS (child
).length () != 0)
2749 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child
)[0]) = dt
[j
];
2755 /* Analyze statements in SLP instances of VINFO. Return true if the
2756 operations are supported. */
2759 vect_slp_analyze_operations (vec_info
*vinfo
)
2761 slp_instance instance
;
2764 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2766 scalar_stmts_to_slp_tree_map_t
*visited
2767 = new scalar_stmts_to_slp_tree_map_t ();
2768 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
2770 scalar_stmts_to_slp_tree_map_t lvisited
;
2771 stmt_vector_for_cost cost_vec
;
2772 cost_vec
.create (2);
2773 if (!vect_slp_analyze_node_operations (vinfo
,
2774 SLP_INSTANCE_TREE (instance
),
2775 instance
, visited
, &lvisited
,
2778 slp_tree node
= SLP_INSTANCE_TREE (instance
);
2779 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2780 if (dump_enabled_p ())
2781 dump_printf_loc (MSG_NOTE
, vect_location
,
2782 "removing SLP instance operations starting from: %G",
2784 vect_free_slp_instance (instance
, false);
2785 vinfo
->slp_instances
.ordered_remove (i
);
2786 cost_vec
.release ();
2790 for (scalar_stmts_to_slp_tree_map_t::iterator x
= lvisited
.begin();
2791 x
!= lvisited
.end(); ++x
)
2792 visited
->put ((*x
).first
.copy (), (*x
).second
);
2795 add_stmt_costs (vinfo
->target_cost_data
, &cost_vec
);
2796 cost_vec
.release ();
2801 return !vinfo
->slp_instances
.is_empty ();
2805 /* Compute the scalar cost of the SLP node NODE and its children
2806 and return it. Do not account defs that are marked in LIFE and
2807 update LIFE according to uses of NODE. */
2810 vect_bb_slp_scalar_cost (basic_block bb
,
2811 slp_tree node
, vec
<bool, va_heap
> *life
,
2812 stmt_vector_for_cost
*cost_vec
,
2813 hash_set
<slp_tree
> &visited
)
2816 stmt_vec_info stmt_info
;
2819 if (visited
.add (node
))
2822 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2824 gimple
*stmt
= stmt_info
->stmt
;
2825 vec_info
*vinfo
= stmt_info
->vinfo
;
2826 ssa_op_iter op_iter
;
2827 def_operand_p def_p
;
2832 /* If there is a non-vectorized use of the defs then the scalar
2833 stmt is kept live in which case we do not account it or any
2834 required defs in the SLP children in the scalar cost. This
2835 way we make the vectorization more costly when compared to
2837 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
2839 imm_use_iterator use_iter
;
2841 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
2842 if (!is_gimple_debug (use_stmt
))
2844 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
2845 if (!use_stmt_info
|| !PURE_SLP_STMT (use_stmt_info
))
2848 BREAK_FROM_IMM_USE_STMT (use_iter
);
2855 /* Count scalar stmts only once. */
2856 if (gimple_visited_p (stmt
))
2858 gimple_set_visited (stmt
, true);
2860 vect_cost_for_stmt kind
;
2861 if (STMT_VINFO_DATA_REF (stmt_info
))
2863 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2866 kind
= scalar_store
;
2870 record_stmt_cost (cost_vec
, 1, kind
, stmt_info
, 0, vect_body
);
2873 auto_vec
<bool, 20> subtree_life
;
2874 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2876 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2878 /* Do not directly pass LIFE to the recursive call, copy it to
2879 confine changes in the callee to the current child/subtree. */
2880 subtree_life
.safe_splice (*life
);
2881 vect_bb_slp_scalar_cost (bb
, child
, &subtree_life
, cost_vec
,
2883 subtree_life
.truncate (0);
2889 vect_bb_slp_scalar_cost (basic_block bb
,
2890 slp_tree node
, vec
<bool, va_heap
> *life
,
2891 stmt_vector_for_cost
*cost_vec
)
2893 hash_set
<slp_tree
> visited
;
2894 vect_bb_slp_scalar_cost (bb
, node
, life
, cost_vec
, visited
);
2897 /* Check if vectorization of the basic block is profitable. */
2900 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
)
2902 vec
<slp_instance
> slp_instances
= BB_VINFO_SLP_INSTANCES (bb_vinfo
);
2903 slp_instance instance
;
2905 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
2906 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
2908 /* Calculate scalar cost. */
2909 stmt_vector_for_cost scalar_costs
;
2910 scalar_costs
.create (0);
2911 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
2913 auto_vec
<bool, 20> life
;
2914 life
.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance
));
2915 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo
),
2916 SLP_INSTANCE_TREE (instance
),
2917 &life
, &scalar_costs
);
2919 void *target_cost_data
= init_cost (NULL
);
2920 add_stmt_costs (target_cost_data
, &scalar_costs
);
2921 scalar_costs
.release ();
2923 finish_cost (target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
2924 destroy_cost_data (target_cost_data
);
2926 /* Unset visited flag. */
2927 for (gimple_stmt_iterator gsi
= bb_vinfo
->region_begin
;
2928 gsi_stmt (gsi
) != gsi_stmt (bb_vinfo
->region_end
); gsi_next (&gsi
))
2929 gimple_set_visited (gsi_stmt (gsi
), false);
2931 /* Complete the target-specific cost calculation. */
2932 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo
), &vec_prologue_cost
,
2933 &vec_inside_cost
, &vec_epilogue_cost
);
2935 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
2937 if (dump_enabled_p ())
2939 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
2940 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
2942 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
2943 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
2944 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
2947 /* Vectorization is profitable if its cost is more than the cost of scalar
2948 version. Note that we err on the vector side for equal cost because
2949 the cost estimate is otherwise quite pessimistic (constant uses are
2950 free on the scalar side but cost a load on the vector side for
2952 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
2958 /* Check if the region described by BB_VINFO can be vectorized, returning
2959 true if so. When returning false, set FATAL to true if the same failure
2960 would prevent vectorization at other vector sizes, false if it is still
2961 worth trying other sizes. N_STMTS is the number of statements in the
2965 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
)
2967 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2969 slp_instance instance
;
2971 poly_uint64 min_vf
= 2;
2973 /* The first group of checks is independent of the vector size. */
2976 /* Analyze the data references. */
2978 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
2980 if (dump_enabled_p ())
2981 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2982 "not vectorized: unhandled data-ref in basic "
2987 if (BB_VINFO_DATAREFS (bb_vinfo
).length () < 2)
2989 if (dump_enabled_p ())
2990 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2991 "not vectorized: not enough data-refs in "
2996 if (!vect_analyze_data_ref_accesses (bb_vinfo
))
2998 if (dump_enabled_p ())
2999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3000 "not vectorized: unhandled data access in "
3005 /* If there are no grouped stores in the region there is no need
3006 to continue with pattern recog as vect_analyze_slp will fail
3008 if (bb_vinfo
->grouped_stores
.is_empty ())
3010 if (dump_enabled_p ())
3011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3012 "not vectorized: no grouped stores in "
3017 /* While the rest of the analysis below depends on it in some way. */
3020 vect_pattern_recog (bb_vinfo
);
3022 /* Check the SLP opportunities in the basic block, analyze and build SLP
3024 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
3026 if (dump_enabled_p ())
3028 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3029 "Failed to SLP the basic block.\n");
3030 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3031 "not vectorized: failed to find SLP opportunities "
3032 "in basic block.\n");
3037 vect_record_base_alignments (bb_vinfo
);
3039 /* Analyze and verify the alignment of data references and the
3040 dependence in the SLP instances. */
3041 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
3043 if (! vect_slp_analyze_and_verify_instance_alignment (instance
)
3044 || ! vect_slp_analyze_instance_dependence (instance
))
3046 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3047 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3048 if (dump_enabled_p ())
3049 dump_printf_loc (MSG_NOTE
, vect_location
,
3050 "removing SLP instance operations starting from: %G",
3052 vect_free_slp_instance (instance
, false);
3053 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
3057 /* Mark all the statements that we want to vectorize as pure SLP and
3059 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3060 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
3064 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
3067 if (!vect_slp_analyze_operations (bb_vinfo
))
3069 if (dump_enabled_p ())
3070 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3071 "not vectorized: bad operation in basic block.\n");
3075 /* Cost model: check if the vectorization is worthwhile. */
3076 if (!unlimited_cost_model (NULL
)
3077 && !vect_bb_vectorization_profitable_p (bb_vinfo
))
3079 if (dump_enabled_p ())
3080 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3081 "not vectorized: vectorization is not "
3086 if (dump_enabled_p ())
3087 dump_printf_loc (MSG_NOTE
, vect_location
,
3088 "Basic block will be vectorized using SLP\n");
3092 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3093 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3094 on success. The region has N_STMTS statements and has the datarefs
3095 given by DATAREFS. */
3098 vect_slp_bb_region (gimple_stmt_iterator region_begin
,
3099 gimple_stmt_iterator region_end
,
3100 vec
<data_reference_p
> datarefs
,
3101 unsigned int n_stmts
)
3103 bb_vec_info bb_vinfo
;
3104 auto_vector_sizes vector_sizes
;
3106 /* Autodetect first vector size we try. */
3107 poly_uint64 next_vector_size
= 0;
3108 targetm
.vectorize
.autovectorize_vector_sizes (&vector_sizes
, false);
3109 unsigned int next_size
= 0;
3111 vec_info_shared shared
;
3113 poly_uint64 autodetected_vector_size
= 0;
3116 bool vectorized
= false;
3118 bb_vinfo
= new _bb_vec_info (region_begin
, region_end
, &shared
);
3120 bool first_time_p
= shared
.datarefs
.is_empty ();
3121 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
3123 bb_vinfo
->shared
->save_datarefs ();
3125 bb_vinfo
->shared
->check_datarefs ();
3126 bb_vinfo
->vector_size
= next_vector_size
;
3128 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
)
3129 && dbg_cnt (vect_slp
))
3131 if (dump_enabled_p ())
3132 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
3134 bb_vinfo
->shared
->check_datarefs ();
3135 vect_schedule_slp (bb_vinfo
);
3137 unsigned HOST_WIDE_INT bytes
;
3138 if (dump_enabled_p ())
3140 if (bb_vinfo
->vector_size
.is_constant (&bytes
))
3141 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3142 "basic block part vectorized using %wu byte "
3143 "vectors\n", bytes
);
3145 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
3146 "basic block part vectorized using variable "
3147 "length vectors\n");
3154 autodetected_vector_size
= bb_vinfo
->vector_size
;
3158 if (next_size
< vector_sizes
.length ()
3159 && known_eq (vector_sizes
[next_size
], autodetected_vector_size
))
3163 || next_size
== vector_sizes
.length ()
3164 || known_eq (autodetected_vector_size
, 0U)
3165 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3166 vector sizes will fail do not bother iterating. */
3170 /* Try the next biggest vector size. */
3171 next_vector_size
= vector_sizes
[next_size
++];
3172 if (dump_enabled_p ())
3174 dump_printf_loc (MSG_NOTE
, vect_location
,
3175 "***** Re-trying analysis with "
3177 dump_dec (MSG_NOTE
, next_vector_size
);
3178 dump_printf (MSG_NOTE
, "\n");
3183 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3184 true if anything in the basic-block was vectorized. */
3187 vect_slp_bb (basic_block bb
)
3189 gimple_stmt_iterator gsi
;
3190 bool any_vectorized
= false;
3192 gsi
= gsi_start_bb (bb
);
3193 while (!gsi_end_p (gsi
))
3195 gimple_stmt_iterator region_begin
= gsi
;
3196 vec
<data_reference_p
> datarefs
= vNULL
;
3199 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3201 gimple
*stmt
= gsi_stmt (gsi
);
3202 if (is_gimple_debug (stmt
))
3206 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
3207 vect_location
= stmt
;
3209 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
))
3213 /* Skip leading unhandled stmts. */
3214 if (gsi_stmt (region_begin
) == gsi_stmt (gsi
))
3220 gimple_stmt_iterator region_end
= gsi
;
3222 if (insns
> PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB
))
3224 if (dump_enabled_p ())
3225 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3226 "not vectorized: too many instructions in "
3229 else if (vect_slp_bb_region (region_begin
, region_end
, datarefs
, insns
))
3230 any_vectorized
= true;
3232 if (gsi_end_p (region_end
))
3235 /* Skip the unhandled stmt. */
3239 return any_vectorized
;
3243 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3246 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo
)
3248 enum tree_code code
= gimple_expr_code (stmt_vinfo
->stmt
);
3250 enum vect_def_type dt
;
3252 /* For comparison and COND_EXPR type is chosen depending
3253 on the non-constant other comparison operand. */
3254 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
3256 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3257 op
= gimple_assign_rhs1 (stmt
);
3259 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3262 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3265 if (code
== COND_EXPR
)
3267 gassign
*stmt
= as_a
<gassign
*> (stmt_vinfo
->stmt
);
3268 tree cond
= gimple_assign_rhs1 (stmt
);
3270 if (TREE_CODE (cond
) == SSA_NAME
)
3273 op
= TREE_OPERAND (cond
, 0);
3275 if (!vect_is_simple_use (op
, stmt_vinfo
->vinfo
, &dt
, &vectype
))
3278 return !vectype
|| VECTOR_BOOLEAN_TYPE_P (vectype
);
3281 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo
));
3284 /* Build a variable-length vector in which the elements in ELTS are repeated
3285 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3286 RESULTS and add any new instructions to SEQ.
3288 The approach we use is:
3290 (1) Find a vector mode VM with integer elements of mode IM.
3292 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3293 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3294 from small vectors to IM.
3296 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3298 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3299 correct byte contents.
3301 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3303 We try to find the largest IM for which this sequence works, in order
3304 to cut down on the number of interleaves. */
3307 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
3308 vec
<tree
> elts
, unsigned int nresults
,
3311 unsigned int nelts
= elts
.length ();
3312 tree element_type
= TREE_TYPE (vector_type
);
3314 /* (1) Find a vector mode VM with integer elements of mode IM. */
3315 unsigned int nvectors
= 1;
3316 tree new_vector_type
;
3318 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, TYPE_MODE (element_type
),
3319 &nvectors
, &new_vector_type
,
3323 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3324 unsigned int partial_nelts
= nelts
/ nvectors
;
3325 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
3327 tree_vector_builder partial_elts
;
3328 auto_vec
<tree
, 32> pieces (nvectors
* 2);
3329 pieces
.quick_grow (nvectors
* 2);
3330 for (unsigned int i
= 0; i
< nvectors
; ++i
)
3332 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3333 ELTS' has mode IM. */
3334 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
3335 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
3336 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
3337 tree t
= gimple_build_vector (seq
, &partial_elts
);
3338 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
3339 TREE_TYPE (new_vector_type
), t
);
3341 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3342 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
3345 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3346 correct byte contents.
3348 We need to repeat the following operation log2(nvectors) times:
3350 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3351 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3353 However, if each input repeats every N elements and the VF is
3354 a multiple of N * 2, the HI result is the same as the LO. */
3355 unsigned int in_start
= 0;
3356 unsigned int out_start
= nvectors
;
3357 unsigned int hi_start
= nvectors
/ 2;
3358 /* A bound on the number of outputs needed to produce NRESULTS results
3359 in the final iteration. */
3360 unsigned int noutputs_bound
= nvectors
* nresults
;
3361 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
3363 noutputs_bound
/= 2;
3364 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
3365 for (unsigned int i
= 0; i
< limit
; ++i
)
3368 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
3371 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
3375 tree output
= make_ssa_name (new_vector_type
);
3376 tree input1
= pieces
[in_start
+ (i
/ 2)];
3377 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
3378 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
3381 gimple_seq_add_stmt (seq
, stmt
);
3382 pieces
[out_start
+ i
] = output
;
3384 std::swap (in_start
, out_start
);
3387 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3388 results
.reserve (nresults
);
3389 for (unsigned int i
= 0; i
< nresults
; ++i
)
3391 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
3392 pieces
[in_start
+ i
]));
3394 results
.quick_push (results
[i
- nvectors
]);
3398 /* For constant and loop invariant defs of SLP_NODE this function returns
3399 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3400 OP_NODE determines the node for the operand containing the scalar
3404 vect_get_constant_vectors (slp_tree op_node
, slp_tree slp_node
,
3405 vec
<tree
> *vec_oprnds
)
3407 stmt_vec_info stmt_vinfo
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
3408 vec_info
*vinfo
= stmt_vinfo
->vinfo
;
3409 unsigned HOST_WIDE_INT nunits
;
3411 unsigned j
, number_of_places_left_in_vector
;
3414 int group_size
= op_node
->ops
.length ();
3415 unsigned int vec_num
, i
;
3416 unsigned number_of_copies
= 1;
3418 tree neutral_op
= NULL
;
3419 gimple_seq ctor_seq
= NULL
;
3420 auto_vec
<tree
, 16> permute_results
;
3422 /* ??? SLP analysis should compute the vector type for the
3423 constant / invariant and store it in the SLP node. */
3424 tree op
= op_node
->ops
[0];
3425 /* Check if vector type is a boolean vector. */
3426 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
3427 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
3428 && vect_mask_constant_operand_p (stmt_vinfo
))
3430 = build_same_sized_truth_vector_type (stmt_vectype
);
3432 vector_type
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
));
3434 /* ??? For lane-reducing ops we should also have the required number
3435 of vector stmts initialized rather than second-guessing here. */
3436 unsigned int number_of_vectors
;
3437 if (is_gimple_assign (stmt_vinfo
->stmt
)
3438 && (gimple_assign_rhs_code (stmt_vinfo
->stmt
) == SAD_EXPR
3439 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == DOT_PROD_EXPR
3440 || gimple_assign_rhs_code (stmt_vinfo
->stmt
) == WIDEN_SUM_EXPR
))
3441 number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
3444 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3445 * TYPE_VECTOR_SUBPARTS (stmt_vectype
),
3447 vec_oprnds
->create (number_of_vectors
);
3448 auto_vec
<tree
> voprnds (number_of_vectors
);
3450 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3451 created vectors. It is greater than 1 if unrolling is performed.
3453 For example, we have two scalar operands, s1 and s2 (e.g., group of
3454 strided accesses of size two), while NUNITS is four (i.e., four scalars
3455 of this type can be packed in a vector). The output vector will contain
3456 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3459 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3460 containing the operands.
3462 For example, NUNITS is four as before, and the group size is 8
3463 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3464 {s5, s6, s7, s8}. */
3466 /* When using duplicate_and_interleave, we just need one element for
3467 each scalar statement. */
3468 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
3469 nunits
= group_size
;
3471 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
3473 number_of_places_left_in_vector
= nunits
;
3475 tree_vector_builder
elts (vector_type
, nunits
, 1);
3476 elts
.quick_grow (nunits
);
3477 bool place_after_defs
= false;
3478 for (j
= 0; j
< number_of_copies
; j
++)
3480 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
3482 /* Create 'vect_ = {op0,op1,...,opn}'. */
3483 number_of_places_left_in_vector
--;
3485 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
3487 if (CONSTANT_CLASS_P (op
))
3489 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3491 /* Can't use VIEW_CONVERT_EXPR for booleans because
3492 of possibly different sizes of scalar value and
3494 if (integer_zerop (op
))
3495 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
3496 else if (integer_onep (op
))
3497 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
3502 op
= fold_unary (VIEW_CONVERT_EXPR
,
3503 TREE_TYPE (vector_type
), op
);
3504 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
3508 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
3510 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
3513 = build_all_ones_cst (TREE_TYPE (vector_type
));
3515 = build_zero_cst (TREE_TYPE (vector_type
));
3516 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
3517 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
3523 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
3526 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
3529 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
3533 elts
[number_of_places_left_in_vector
] = op
;
3534 if (!CONSTANT_CLASS_P (op
))
3536 if (TREE_CODE (orig_op
) == SSA_NAME
3537 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
3538 && STMT_VINFO_BB_VINFO (stmt_vinfo
)
3539 && (STMT_VINFO_BB_VINFO (stmt_vinfo
)->bb
3540 == gimple_bb (SSA_NAME_DEF_STMT (orig_op
))))
3541 place_after_defs
= true;
3543 if (number_of_places_left_in_vector
== 0)
3546 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
3547 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
3548 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
3551 if (permute_results
.is_empty ())
3552 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
3553 elts
, number_of_vectors
,
3555 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
3558 gimple_stmt_iterator gsi
;
3559 if (place_after_defs
)
3561 stmt_vec_info last_stmt_info
3562 = vect_find_last_scalar_stmt_in_slp (slp_node
);
3563 gsi
= gsi_for_stmt (last_stmt_info
->stmt
);
3564 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3568 init
= vect_init_vector (stmt_vinfo
, vec_cst
, vector_type
,
3570 if (ctor_seq
!= NULL
)
3572 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (init
));
3573 gsi_insert_seq_before (&gsi
, ctor_seq
, GSI_SAME_STMT
);
3576 voprnds
.quick_push (init
);
3577 place_after_defs
= false;
3578 number_of_places_left_in_vector
= nunits
;
3580 elts
.new_vector (vector_type
, nunits
, 1);
3581 elts
.quick_grow (nunits
);
3586 /* Since the vectors are created in the reverse order, we should invert
3588 vec_num
= voprnds
.length ();
3589 for (j
= vec_num
; j
!= 0; j
--)
3591 vop
= voprnds
[j
- 1];
3592 vec_oprnds
->quick_push (vop
);
3595 /* In case that VF is greater than the unrolling factor needed for the SLP
3596 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3597 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3598 to replicate the vectors. */
3599 while (number_of_vectors
> vec_oprnds
->length ())
3601 tree neutral_vec
= NULL
;
3606 neutral_vec
= build_vector_from_val (vector_type
, neutral_op
);
3608 vec_oprnds
->quick_push (neutral_vec
);
3612 for (i
= 0; vec_oprnds
->iterate (i
, &vop
) && i
< vec_num
; i
++)
3613 vec_oprnds
->quick_push (vop
);
3619 /* Get vectorized definitions from SLP_NODE that contains corresponding
3620 vectorized def-stmts. */
3623 vect_get_slp_vect_defs (slp_tree slp_node
, vec
<tree
> *vec_oprnds
)
3625 stmt_vec_info vec_def_stmt_info
;
3628 gcc_assert (SLP_TREE_VEC_STMTS (slp_node
).exists ());
3630 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), i
, vec_def_stmt_info
)
3631 vec_oprnds
->quick_push (gimple_get_lhs (vec_def_stmt_info
->stmt
));
3635 /* Get N vectorized definitions for SLP_NODE.
3636 If the scalar definitions are loop invariants or constants, collect them and
3637 call vect_get_constant_vectors() to create vector stmts.
3638 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3639 must be stored in the corresponding child of SLP_NODE, and we call
3640 vect_get_slp_vect_defs () to retrieve them. */
3643 vect_get_slp_defs (slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
3646 n
= SLP_TREE_CHILDREN (slp_node
).length ();
3648 for (unsigned i
= 0; i
< n
; ++i
)
3650 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
3652 vec
<tree
> vec_defs
= vNULL
;
3654 /* For each operand we check if it has vectorized definitions in a child
3655 node or we need to create them (for invariants and constants). */
3656 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3658 vec_defs
.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child
));
3659 vect_get_slp_vect_defs (child
, &vec_defs
);
3662 vect_get_constant_vectors (child
, slp_node
, &vec_defs
);
3664 vec_oprnds
->quick_push (vec_defs
);
3668 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3669 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3670 permute statements for the SLP node NODE of the SLP instance
3671 SLP_NODE_INSTANCE. */
3674 vect_transform_slp_perm_load (slp_tree node
, vec
<tree
> dr_chain
,
3675 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
3676 slp_instance slp_node_instance
, bool analyze_only
,
3679 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3680 vec_info
*vinfo
= stmt_info
->vinfo
;
3682 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3683 unsigned int group_size
= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
3684 unsigned int mask_element
;
3687 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3690 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
3692 mode
= TYPE_MODE (vectype
);
3693 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3695 /* Initialize the vect stmts of NODE to properly insert the generated
3698 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
3699 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
3700 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
3702 /* Generate permutation masks for every NODE. Number of masks for each NODE
3703 is equal to GROUP_SIZE.
3704 E.g., we have a group of three nodes with three loads from the same
3705 location in each node, and the vector size is 4. I.e., we have a
3706 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3707 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3708 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3711 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3712 The last mask is illegal since we assume two operands for permute
3713 operation, and the mask element values can't be outside that range.
3714 Hence, the last mask must be converted into {2,5,5,5}.
3715 For the first two permutations we need the first and the second input
3716 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3717 we need the second and the third vectors: {b1,c1,a2,b2} and
3720 int vect_stmts_counter
= 0;
3721 unsigned int index
= 0;
3722 int first_vec_index
= -1;
3723 int second_vec_index
= -1;
3727 vec_perm_builder mask
;
3728 unsigned int nelts_to_build
;
3729 unsigned int nvectors_per_build
;
3730 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
3731 && multiple_p (nunits
, group_size
));
3734 /* A single vector contains a whole number of copies of the node, so:
3735 (a) all permutes can use the same mask; and
3736 (b) the permutes only need a single vector input. */
3737 mask
.new_vector (nunits
, group_size
, 3);
3738 nelts_to_build
= mask
.encoded_nelts ();
3739 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
3743 /* We need to construct a separate mask for each vector statement. */
3744 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
3745 if (!nunits
.is_constant (&const_nunits
)
3746 || !vf
.is_constant (&const_vf
))
3748 mask
.new_vector (const_nunits
, const_nunits
, 1);
3749 nelts_to_build
= const_vf
* group_size
;
3750 nvectors_per_build
= 1;
3753 unsigned int count
= mask
.encoded_nelts ();
3754 mask
.quick_grow (count
);
3755 vec_perm_indices indices
;
3757 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
3759 unsigned int iter_num
= j
/ group_size
;
3760 unsigned int stmt_num
= j
% group_size
;
3761 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
3762 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
3765 first_vec_index
= 0;
3770 /* Enforced before the loop when !repeating_p. */
3771 unsigned int const_nunits
= nunits
.to_constant ();
3772 vec_index
= i
/ const_nunits
;
3773 mask_element
= i
% const_nunits
;
3774 if (vec_index
== first_vec_index
3775 || first_vec_index
== -1)
3777 first_vec_index
= vec_index
;
3779 else if (vec_index
== second_vec_index
3780 || second_vec_index
== -1)
3782 second_vec_index
= vec_index
;
3783 mask_element
+= const_nunits
;
3787 if (dump_enabled_p ())
3788 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3789 "permutation requires at "
3790 "least three vectors %G",
3792 gcc_assert (analyze_only
);
3796 gcc_assert (mask_element
< 2 * const_nunits
);
3799 if (mask_element
!= index
)
3801 mask
[index
++] = mask_element
;
3803 if (index
== count
&& !noop_p
)
3805 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
3806 if (!can_vec_perm_const_p (mode
, indices
))
3808 if (dump_enabled_p ())
3810 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
3812 "unsupported vect permute { ");
3813 for (i
= 0; i
< count
; ++i
)
3815 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
3816 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
3818 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
3820 gcc_assert (analyze_only
);
3831 tree mask_vec
= NULL_TREE
;
3834 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
3836 if (second_vec_index
== -1)
3837 second_vec_index
= first_vec_index
;
3839 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
3841 /* Generate the permute statement if necessary. */
3842 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
3843 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
3844 stmt_vec_info perm_stmt_info
;
3847 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3849 = vect_create_destination_var (gimple_assign_lhs (stmt
),
3851 perm_dest
= make_ssa_name (perm_dest
);
3853 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
3854 first_vec
, second_vec
,
3857 = vect_finish_stmt_generation (stmt_info
, perm_stmt
,
3861 /* If mask was NULL_TREE generate the requested
3862 identity transform. */
3863 perm_stmt_info
= vinfo
->lookup_def (first_vec
);
3865 /* Store the vector statement in NODE. */
3866 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++]
3872 first_vec_index
= -1;
3873 second_vec_index
= -1;
3881 /* Vectorize SLP instance tree in postorder. */
3884 vect_schedule_slp_instance (slp_tree node
, slp_instance instance
,
3885 scalar_stmts_to_slp_tree_map_t
*bst_map
)
3887 gimple_stmt_iterator si
;
3888 stmt_vec_info stmt_info
;
3889 unsigned int group_size
;
3894 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
3897 /* See if we have already vectorized the node in the graph of the
3899 if (SLP_TREE_VEC_STMTS (node
).exists ())
3902 /* See if we have already vectorized the same set of stmts and reuse their
3903 vectorized stmts across instances. */
3904 if (slp_tree
*leader
= bst_map
->get (SLP_TREE_SCALAR_STMTS (node
)))
3906 SLP_TREE_VEC_STMTS (node
).safe_splice (SLP_TREE_VEC_STMTS (*leader
));
3910 bst_map
->put (SLP_TREE_SCALAR_STMTS (node
).copy (), node
);
3911 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3912 vect_schedule_slp_instance (child
, instance
, bst_map
);
3914 /* Push SLP node def-type to stmts. */
3915 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3916 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
3918 stmt_vec_info child_stmt_info
;
3919 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
3920 STMT_VINFO_DEF_TYPE (child_stmt_info
) = SLP_TREE_DEF_TYPE (child
);
3923 stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3925 /* VECTYPE is the type of the destination. */
3926 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3927 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3928 group_size
= SLP_INSTANCE_GROUP_SIZE (instance
);
3930 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
3931 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
3933 if (dump_enabled_p ())
3934 dump_printf_loc (MSG_NOTE
, vect_location
,
3935 "------>vectorizing SLP node starting from: %G",
3938 /* Vectorized stmts go before the last scalar stmt which is where
3939 all uses are ready. */
3940 stmt_vec_info last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
3941 si
= gsi_for_stmt (last_stmt_info
->stmt
);
3943 /* Handle two-operation SLP nodes by vectorizing the group with
3944 both operations and then performing a merge. */
3945 if (SLP_TREE_TWO_OPERATORS (node
))
3947 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
3948 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
3949 enum tree_code ocode
= ERROR_MARK
;
3950 stmt_vec_info ostmt_info
;
3951 vec_perm_builder
mask (group_size
, group_size
, 1);
3952 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, ostmt_info
)
3954 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
3955 if (gimple_assign_rhs_code (ostmt
) != code0
)
3957 mask
.quick_push (1);
3958 ocode
= gimple_assign_rhs_code (ostmt
);
3961 mask
.quick_push (0);
3963 if (ocode
!= ERROR_MARK
)
3965 vec
<stmt_vec_info
> v0
;
3966 vec
<stmt_vec_info
> v1
;
3968 tree tmask
= NULL_TREE
;
3969 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3970 v0
= SLP_TREE_VEC_STMTS (node
).copy ();
3971 SLP_TREE_VEC_STMTS (node
).truncate (0);
3972 gimple_assign_set_rhs_code (stmt
, ocode
);
3973 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
3974 gimple_assign_set_rhs_code (stmt
, code0
);
3975 v1
= SLP_TREE_VEC_STMTS (node
).copy ();
3976 SLP_TREE_VEC_STMTS (node
).truncate (0);
3977 tree meltype
= build_nonstandard_integer_type
3978 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype
))), 1);
3979 tree mvectype
= get_same_sized_vectype (meltype
, vectype
);
3981 for (j
= 0; j
< v0
.length (); ++j
)
3983 /* Enforced by vect_build_slp_tree, which rejects variable-length
3984 vectors for SLP_TREE_TWO_OPERATORS. */
3985 unsigned int const_nunits
= nunits
.to_constant ();
3986 tree_vector_builder
melts (mvectype
, const_nunits
, 1);
3987 for (l
= 0; l
< const_nunits
; ++l
)
3989 if (k
>= group_size
)
3991 tree t
= build_int_cst (meltype
,
3992 mask
[k
++] * const_nunits
+ l
);
3993 melts
.quick_push (t
);
3995 tmask
= melts
.build ();
3997 /* ??? Not all targets support a VEC_PERM_EXPR with a
3998 constant mask that would translate to a vec_merge RTX
3999 (with their vec_perm_const_ok). We can either not
4000 vectorize in that case or let veclower do its job.
4001 Unfortunately that isn't too great and at least for
4002 plus/minus we'd eventually like to match targets
4003 vector addsub instructions. */
4005 vstmt
= gimple_build_assign (make_ssa_name (vectype
),
4007 gimple_assign_lhs (v0
[j
]->stmt
),
4008 gimple_assign_lhs (v1
[j
]->stmt
),
4010 SLP_TREE_VEC_STMTS (node
).quick_push
4011 (vect_finish_stmt_generation (stmt_info
, vstmt
, &si
));
4018 vect_transform_stmt (stmt_info
, &si
, node
, instance
);
4020 /* Restore stmt def-types. */
4021 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4022 if (SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
4024 stmt_vec_info child_stmt_info
;
4025 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child
), j
, child_stmt_info
)
4026 STMT_VINFO_DEF_TYPE (child_stmt_info
) = vect_internal_def
;
4030 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4031 For loop vectorization this is done in vectorizable_call, but for SLP
4032 it needs to be deferred until end of vect_schedule_slp, because multiple
4033 SLP instances may refer to the same scalar stmt. */
4036 vect_remove_slp_scalar_calls (slp_tree node
, hash_set
<slp_tree
> &visited
)
4039 gimple_stmt_iterator gsi
;
4043 stmt_vec_info stmt_info
;
4045 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
4048 if (visited
.add (node
))
4051 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4052 vect_remove_slp_scalar_calls (child
, visited
);
4054 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
4056 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
4057 if (!stmt
|| gimple_bb (stmt
) == NULL
)
4059 if (is_pattern_stmt_p (stmt_info
)
4060 || !PURE_SLP_STMT (stmt_info
))
4062 lhs
= gimple_call_lhs (stmt
);
4063 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
4064 gsi
= gsi_for_stmt (stmt
);
4065 stmt_info
->vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
4066 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
4071 vect_remove_slp_scalar_calls (slp_tree node
)
4073 hash_set
<slp_tree
> visited
;
4074 vect_remove_slp_scalar_calls (node
, visited
);
4077 /* Generate vector code for all SLP instances in the loop/basic block. */
4080 vect_schedule_slp (vec_info
*vinfo
)
4082 vec
<slp_instance
> slp_instances
;
4083 slp_instance instance
;
4086 scalar_stmts_to_slp_tree_map_t
*bst_map
4087 = new scalar_stmts_to_slp_tree_map_t ();
4088 slp_instances
= vinfo
->slp_instances
;
4089 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4091 /* Schedule the tree of INSTANCE. */
4092 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance
),
4094 if (dump_enabled_p ())
4095 dump_printf_loc (MSG_NOTE
, vect_location
,
4096 "vectorizing stmts using SLP.\n");
4100 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4102 slp_tree root
= SLP_INSTANCE_TREE (instance
);
4103 stmt_vec_info store_info
;
4106 /* Remove scalar call stmts. Do not do this for basic-block
4107 vectorization as not all uses may be vectorized.
4108 ??? Why should this be necessary? DCE should be able to
4109 remove the stmts itself.
4110 ??? For BB vectorization we can as well remove scalar
4111 stmts starting from the SLP tree root if they have no
4113 if (is_a
<loop_vec_info
> (vinfo
))
4114 vect_remove_slp_scalar_calls (root
);
4116 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
)
4117 && j
< SLP_INSTANCE_GROUP_SIZE (instance
); j
++)
4119 if (!STMT_VINFO_DATA_REF (store_info
))
4122 store_info
= vect_orig_stmt (store_info
);
4123 /* Free the attached stmt_vec_info and remove the stmt. */
4124 vinfo
->remove_stmt (store_info
);