1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "tree-pass.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47 #include "dump-context.h"
51 #include "alloc-pool.h"
53 static bool vectorizable_slp_permutation (vec_info
*, gimple_stmt_iterator
*,
54 slp_tree
, stmt_vector_for_cost
*);
56 static object_allocator
<_slp_tree
> *slp_tree_pool
;
57 static slp_tree slp_first_node
;
62 slp_tree_pool
= new object_allocator
<_slp_tree
> ("SLP nodes");
68 while (slp_first_node
)
69 delete slp_first_node
;
75 _slp_tree::operator new (size_t n
)
77 gcc_assert (n
== sizeof (_slp_tree
));
78 return slp_tree_pool
->allocate_raw ();
82 _slp_tree::operator delete (void *node
, size_t n
)
84 gcc_assert (n
== sizeof (_slp_tree
));
85 slp_tree_pool
->remove_raw (node
);
89 /* Initialize a SLP node. */
91 _slp_tree::_slp_tree ()
93 this->prev_node
= NULL
;
95 slp_first_node
->prev_node
= this;
96 this->next_node
= slp_first_node
;
97 slp_first_node
= this;
98 SLP_TREE_SCALAR_STMTS (this) = vNULL
;
99 SLP_TREE_SCALAR_OPS (this) = vNULL
;
100 SLP_TREE_VEC_STMTS (this) = vNULL
;
101 SLP_TREE_VEC_DEFS (this) = vNULL
;
102 SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0;
103 SLP_TREE_CHILDREN (this) = vNULL
;
104 SLP_TREE_LOAD_PERMUTATION (this) = vNULL
;
105 SLP_TREE_LANE_PERMUTATION (this) = vNULL
;
106 SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def
;
107 SLP_TREE_CODE (this) = ERROR_MARK
;
108 SLP_TREE_VECTYPE (this) = NULL_TREE
;
109 SLP_TREE_REPRESENTATIVE (this) = NULL
;
110 SLP_TREE_REF_COUNT (this) = 1;
111 this->max_nunits
= 1;
115 /* Tear down a SLP node. */
117 _slp_tree::~_slp_tree ()
120 this->prev_node
->next_node
= this->next_node
;
122 slp_first_node
= this->next_node
;
124 this->next_node
->prev_node
= this->prev_node
;
125 SLP_TREE_CHILDREN (this).release ();
126 SLP_TREE_SCALAR_STMTS (this).release ();
127 SLP_TREE_SCALAR_OPS (this).release ();
128 SLP_TREE_VEC_STMTS (this).release ();
129 SLP_TREE_VEC_DEFS (this).release ();
130 SLP_TREE_LOAD_PERMUTATION (this).release ();
131 SLP_TREE_LANE_PERMUTATION (this).release ();
134 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
137 vect_free_slp_tree (slp_tree node
)
142 if (--SLP_TREE_REF_COUNT (node
) != 0)
145 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
147 vect_free_slp_tree (child
);
152 /* Return a location suitable for dumpings related to the SLP instance. */
155 _slp_instance::location () const
158 return root_stmt
->stmt
;
160 return SLP_TREE_SCALAR_STMTS (root
)[0]->stmt
;
164 /* Free the memory allocated for the SLP instance. */
167 vect_free_slp_instance (slp_instance instance
)
169 vect_free_slp_tree (SLP_INSTANCE_TREE (instance
));
170 SLP_INSTANCE_LOADS (instance
).release ();
171 instance
->subgraph_entries
.release ();
172 instance
->cost_vec
.release ();
177 /* Create an SLP node for SCALAR_STMTS. */
180 vect_create_new_slp_node (slp_tree node
,
181 vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
183 SLP_TREE_SCALAR_STMTS (node
) = scalar_stmts
;
184 SLP_TREE_CHILDREN (node
).create (nops
);
185 SLP_TREE_DEF_TYPE (node
) = vect_internal_def
;
186 SLP_TREE_REPRESENTATIVE (node
) = scalar_stmts
[0];
187 SLP_TREE_LANES (node
) = scalar_stmts
.length ();
191 /* Create an SLP node for SCALAR_STMTS. */
194 vect_create_new_slp_node (vec
<stmt_vec_info
> scalar_stmts
, unsigned nops
)
196 return vect_create_new_slp_node (new _slp_tree
, scalar_stmts
, nops
);
199 /* Create an SLP node for OPS. */
202 vect_create_new_slp_node (slp_tree node
, vec
<tree
> ops
)
204 SLP_TREE_SCALAR_OPS (node
) = ops
;
205 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
206 SLP_TREE_LANES (node
) = ops
.length ();
210 /* Create an SLP node for OPS. */
213 vect_create_new_slp_node (vec
<tree
> ops
)
215 return vect_create_new_slp_node (new _slp_tree
, ops
);
219 /* This structure is used in creation of an SLP tree. Each instance
220 corresponds to the same operand in a group of scalar stmts in an SLP
222 typedef struct _slp_oprnd_info
224 /* Def-stmts for the operands. */
225 vec
<stmt_vec_info
> def_stmts
;
228 /* Information about the first statement, its vector def-type, type, the
229 operand itself in case it's constant, and an indication if it's a pattern
232 enum vect_def_type first_dt
;
237 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
239 static vec
<slp_oprnd_info
>
240 vect_create_oprnd_info (int nops
, int group_size
)
243 slp_oprnd_info oprnd_info
;
244 vec
<slp_oprnd_info
> oprnds_info
;
246 oprnds_info
.create (nops
);
247 for (i
= 0; i
< nops
; i
++)
249 oprnd_info
= XNEW (struct _slp_oprnd_info
);
250 oprnd_info
->def_stmts
.create (group_size
);
251 oprnd_info
->ops
.create (group_size
);
252 oprnd_info
->first_dt
= vect_uninitialized_def
;
253 oprnd_info
->first_op_type
= NULL_TREE
;
254 oprnd_info
->any_pattern
= false;
255 oprnds_info
.quick_push (oprnd_info
);
262 /* Free operands info. */
265 vect_free_oprnd_info (vec
<slp_oprnd_info
> &oprnds_info
)
268 slp_oprnd_info oprnd_info
;
270 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
272 oprnd_info
->def_stmts
.release ();
273 oprnd_info
->ops
.release ();
274 XDELETE (oprnd_info
);
277 oprnds_info
.release ();
281 /* Return true if STMTS contains a pattern statement. */
284 vect_contains_pattern_stmt_p (vec
<stmt_vec_info
> stmts
)
286 stmt_vec_info stmt_info
;
288 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
289 if (is_pattern_stmt_p (stmt_info
))
294 /* Return true when all lanes in the external or constant NODE have
298 vect_slp_tree_uniform_p (slp_tree node
)
300 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
301 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
);
303 /* Pre-exsting vectors. */
304 if (SLP_TREE_SCALAR_OPS (node
).is_empty ())
308 tree op
, first
= NULL_TREE
;
309 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
312 else if (!operand_equal_p (first
, op
, 0))
318 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
319 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
323 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info
,
324 stmt_vec_info first_stmt_info
)
326 stmt_vec_info next_stmt_info
= first_stmt_info
;
329 if (first_stmt_info
!= DR_GROUP_FIRST_ELEMENT (stmt_info
))
334 if (next_stmt_info
== stmt_info
)
336 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
338 result
+= DR_GROUP_GAP (next_stmt_info
);
340 while (next_stmt_info
);
345 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
346 using the method implemented by duplicate_and_interleave. Return true
347 if so, returning the number of intermediate vectors in *NVECTORS_OUT
348 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
352 can_duplicate_and_interleave_p (vec_info
*vinfo
, unsigned int count
,
353 tree elt_type
, unsigned int *nvectors_out
,
354 tree
*vector_type_out
,
357 tree base_vector_type
= get_vectype_for_scalar_type (vinfo
, elt_type
, count
);
358 if (!base_vector_type
|| !VECTOR_MODE_P (TYPE_MODE (base_vector_type
)))
361 machine_mode base_vector_mode
= TYPE_MODE (base_vector_type
);
362 poly_int64 elt_bytes
= count
* GET_MODE_UNIT_SIZE (base_vector_mode
);
363 unsigned int nvectors
= 1;
366 scalar_int_mode int_mode
;
367 poly_int64 elt_bits
= elt_bytes
* BITS_PER_UNIT
;
368 if (int_mode_for_size (elt_bits
, 1).exists (&int_mode
))
370 /* Get the natural vector type for this SLP group size. */
371 tree int_type
= build_nonstandard_integer_type
372 (GET_MODE_BITSIZE (int_mode
), 1);
374 = get_vectype_for_scalar_type (vinfo
, int_type
, count
);
376 && VECTOR_MODE_P (TYPE_MODE (vector_type
))
377 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
378 GET_MODE_SIZE (base_vector_mode
)))
380 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
381 together into elements of type INT_TYPE and using the result
382 to build NVECTORS vectors. */
383 poly_uint64 nelts
= GET_MODE_NUNITS (TYPE_MODE (vector_type
));
384 vec_perm_builder
sel1 (nelts
, 2, 3);
385 vec_perm_builder
sel2 (nelts
, 2, 3);
386 poly_int64 half_nelts
= exact_div (nelts
, 2);
387 for (unsigned int i
= 0; i
< 3; ++i
)
390 sel1
.quick_push (i
+ nelts
);
391 sel2
.quick_push (half_nelts
+ i
);
392 sel2
.quick_push (half_nelts
+ i
+ nelts
);
394 vec_perm_indices
indices1 (sel1
, 2, nelts
);
395 vec_perm_indices
indices2 (sel2
, 2, nelts
);
396 if (can_vec_perm_const_p (TYPE_MODE (vector_type
), indices1
)
397 && can_vec_perm_const_p (TYPE_MODE (vector_type
), indices2
))
400 *nvectors_out
= nvectors
;
402 *vector_type_out
= vector_type
;
405 permutes
[0] = vect_gen_perm_mask_checked (vector_type
,
407 permutes
[1] = vect_gen_perm_mask_checked (vector_type
,
414 if (!multiple_p (elt_bytes
, 2, &elt_bytes
))
420 /* Return true if DTA and DTB match. */
423 vect_def_types_match (enum vect_def_type dta
, enum vect_def_type dtb
)
426 || ((dta
== vect_external_def
|| dta
== vect_constant_def
)
427 && (dtb
== vect_external_def
|| dtb
== vect_constant_def
)));
430 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
431 they are of a valid type and that they match the defs of the first stmt of
432 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
433 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
434 indicates swap is required for cond_expr stmts. Specifically, *SWAP
435 is 1 if STMT is cond and operands of comparison need to be swapped;
436 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
437 If there is any operand swap in this function, *SWAP is set to non-zero
439 If there was a fatal error return -1; if the error could be corrected by
440 swapping operands of father node of this one, return 1; if everything is
443 vect_get_and_check_slp_defs (vec_info
*vinfo
, unsigned char swap
,
445 vec
<stmt_vec_info
> stmts
, unsigned stmt_num
,
446 vec
<slp_oprnd_info
> *oprnds_info
)
448 stmt_vec_info stmt_info
= stmts
[stmt_num
];
450 unsigned int i
, number_of_oprnds
;
451 enum vect_def_type dt
= vect_uninitialized_def
;
452 slp_oprnd_info oprnd_info
;
453 int first_op_idx
= 1;
454 unsigned int commutative_op
= -1U;
455 bool first_op_cond
= false;
456 bool first
= stmt_num
== 0;
458 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
460 number_of_oprnds
= gimple_call_num_args (stmt
);
462 if (gimple_call_internal_p (stmt
))
464 internal_fn ifn
= gimple_call_internal_fn (stmt
);
465 commutative_op
= first_commutative_argument (ifn
);
467 /* Masked load, only look at mask. */
468 if (ifn
== IFN_MASK_LOAD
)
470 number_of_oprnds
= 1;
471 /* Mask operand index. */
476 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
478 enum tree_code code
= gimple_assign_rhs_code (stmt
);
479 number_of_oprnds
= gimple_num_ops (stmt
) - 1;
480 /* Swap can only be done for cond_expr if asked to, otherwise we
481 could result in different comparison code to the first stmt. */
482 if (code
== COND_EXPR
483 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt
)))
485 first_op_cond
= true;
489 commutative_op
= commutative_tree_code (code
) ? 0U : -1U;
491 else if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
492 number_of_oprnds
= gimple_phi_num_args (stmt
);
496 bool swapped
= (swap
!= 0);
497 bool backedge
= false;
498 gcc_assert (!swapped
|| first_op_cond
);
499 enum vect_def_type
*dts
= XALLOCAVEC (enum vect_def_type
, number_of_oprnds
);
500 for (i
= 0; i
< number_of_oprnds
; i
++)
504 /* Map indicating how operands of cond_expr should be swapped. */
505 int maps
[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
506 int *map
= maps
[swap
];
509 oprnd
= TREE_OPERAND (gimple_op (stmt_info
->stmt
,
510 first_op_idx
), map
[i
]);
512 oprnd
= gimple_op (stmt_info
->stmt
, map
[i
]);
514 else if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
516 oprnd
= gimple_phi_arg_def (stmt
, i
);
517 backedge
= dominated_by_p (CDI_DOMINATORS
,
518 gimple_phi_arg_edge (stmt
, i
)->src
,
519 gimple_bb (stmt_info
->stmt
));
522 oprnd
= gimple_op (stmt_info
->stmt
, first_op_idx
+ (swapped
? !i
: i
));
523 if (TREE_CODE (oprnd
) == VIEW_CONVERT_EXPR
)
524 oprnd
= TREE_OPERAND (oprnd
, 0);
526 oprnd_info
= (*oprnds_info
)[i
];
528 stmt_vec_info def_stmt_info
;
529 if (!vect_is_simple_use (oprnd
, vinfo
, &dts
[i
], &def_stmt_info
))
531 if (dump_enabled_p ())
532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
533 "Build SLP failed: can't analyze def for %T\n",
541 oprnd_info
->def_stmts
.quick_push (NULL
);
542 oprnd_info
->ops
.quick_push (NULL_TREE
);
543 oprnd_info
->first_dt
= vect_uninitialized_def
;
547 if (def_stmt_info
&& is_pattern_stmt_p (def_stmt_info
))
548 oprnd_info
->any_pattern
= true;
550 oprnd_info
->def_stmts
.quick_push (def_stmt_info
);
551 oprnd_info
->ops
.quick_push (oprnd
);
553 /* If there's a extern def on a backedge make sure we can
554 code-generate at the region start.
555 ??? This is another case that could be fixed by adjusting
556 how we split the function but at the moment we'd have conflicting
559 && dts
[i
] == vect_external_def
560 && is_a
<bb_vec_info
> (vinfo
)
561 && TREE_CODE (oprnd
) == SSA_NAME
562 && !SSA_NAME_IS_DEFAULT_DEF (oprnd
)
563 && !dominated_by_p (CDI_DOMINATORS
,
564 as_a
<bb_vec_info
> (vinfo
)->bbs
[0],
565 gimple_bb (SSA_NAME_DEF_STMT (oprnd
))))
567 if (dump_enabled_p ())
568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
569 "Build SLP failed: extern def %T only defined "
570 "on backedge\n", oprnd
);
576 tree type
= TREE_TYPE (oprnd
);
578 if ((dt
== vect_constant_def
579 || dt
== vect_external_def
)
580 && !GET_MODE_SIZE (vinfo
->vector_mode
).is_constant ()
581 && (TREE_CODE (type
) == BOOLEAN_TYPE
582 || !can_duplicate_and_interleave_p (vinfo
, stmts
.length (),
585 if (dump_enabled_p ())
586 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
587 "Build SLP failed: invalid type of def "
588 "for variable-length SLP %T\n", oprnd
);
592 /* For the swapping logic below force vect_reduction_def
593 for the reduction op in a SLP reduction group. */
594 if (!STMT_VINFO_DATA_REF (stmt_info
)
595 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
596 && (int)i
== STMT_VINFO_REDUC_IDX (stmt_info
)
598 dts
[i
] = dt
= vect_reduction_def
;
600 /* Check the types of the definition. */
603 case vect_external_def
:
604 case vect_constant_def
:
605 case vect_internal_def
:
606 case vect_reduction_def
:
607 case vect_induction_def
:
608 case vect_nested_cycle
:
612 /* FORNOW: Not supported. */
613 if (dump_enabled_p ())
614 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
615 "Build SLP failed: illegal type of def %T\n",
620 oprnd_info
->first_dt
= dt
;
621 oprnd_info
->first_op_type
= type
;
627 /* Now match the operand definition types to that of the first stmt. */
628 for (i
= 0; i
< number_of_oprnds
;)
636 oprnd_info
= (*oprnds_info
)[i
];
638 stmt_vec_info def_stmt_info
= oprnd_info
->def_stmts
[stmt_num
];
639 oprnd
= oprnd_info
->ops
[stmt_num
];
640 tree type
= TREE_TYPE (oprnd
);
642 if (!types_compatible_p (oprnd_info
->first_op_type
, type
))
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
646 "Build SLP failed: different operand types\n");
650 /* Not first stmt of the group, check that the def-stmt/s match
651 the def-stmt/s of the first stmt. Allow different definition
652 types for reduction chains: the first stmt must be a
653 vect_reduction_def (a phi node), and the rest
654 end in the reduction chain. */
655 if ((!vect_def_types_match (oprnd_info
->first_dt
, dt
)
656 && !(oprnd_info
->first_dt
== vect_reduction_def
657 && !STMT_VINFO_DATA_REF (stmt_info
)
658 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
660 && !STMT_VINFO_DATA_REF (def_stmt_info
)
661 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
662 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
))))
663 || (!STMT_VINFO_DATA_REF (stmt_info
)
664 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
666 || STMT_VINFO_DATA_REF (def_stmt_info
)
667 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
668 != REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
669 != (oprnd_info
->first_dt
!= vect_reduction_def
))))
671 /* Try swapping operands if we got a mismatch. For BB
672 vectorization only in case it will clearly improve things. */
673 if (i
== commutative_op
&& !swapped
674 && (!is_a
<bb_vec_info
> (vinfo
)
675 || (!vect_def_types_match ((*oprnds_info
)[i
+1]->first_dt
,
677 && (vect_def_types_match (oprnd_info
->first_dt
, dts
[i
+1])
678 || vect_def_types_match
679 ((*oprnds_info
)[i
+1]->first_dt
, dts
[i
])))))
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_NOTE
, vect_location
,
683 "trying swapped operands\n");
684 std::swap (dts
[i
], dts
[i
+1]);
685 std::swap ((*oprnds_info
)[i
]->def_stmts
[stmt_num
],
686 (*oprnds_info
)[i
+1]->def_stmts
[stmt_num
]);
687 std::swap ((*oprnds_info
)[i
]->ops
[stmt_num
],
688 (*oprnds_info
)[i
+1]->ops
[stmt_num
]);
693 if (is_a
<bb_vec_info
> (vinfo
)
694 && !oprnd_info
->any_pattern
)
696 /* Now for commutative ops we should see whether we can
697 make the other operand matching. */
698 if (dump_enabled_p ())
699 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
700 "treating operand as external\n");
701 oprnd_info
->first_dt
= dt
= vect_external_def
;
705 if (dump_enabled_p ())
706 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
707 "Build SLP failed: different types\n");
712 /* Make sure to demote the overall operand to external. */
713 if (dt
== vect_external_def
)
714 oprnd_info
->first_dt
= vect_external_def
;
715 /* For a SLP reduction chain we want to duplicate the reduction to
716 each of the chain members. That gets us a sane SLP graph (still
717 the stmts are not 100% correct wrt the initial values). */
718 else if ((dt
== vect_internal_def
719 || dt
== vect_reduction_def
)
720 && oprnd_info
->first_dt
== vect_reduction_def
721 && !STMT_VINFO_DATA_REF (stmt_info
)
722 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
)
723 && !STMT_VINFO_DATA_REF (def_stmt_info
)
724 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info
)
725 == REDUC_GROUP_FIRST_ELEMENT (stmt_info
)))
727 oprnd_info
->def_stmts
[stmt_num
] = oprnd_info
->def_stmts
[0];
728 oprnd_info
->ops
[stmt_num
] = oprnd_info
->ops
[0];
737 if (dump_enabled_p ())
738 dump_printf_loc (MSG_NOTE
, vect_location
,
739 "swapped operands to match def types in %G",
746 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
747 Return true if we can, meaning that this choice doesn't conflict with
748 existing SLP nodes that use STMT_INFO. */
751 vect_update_shared_vectype (stmt_vec_info stmt_info
, tree vectype
)
753 tree old_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
755 return useless_type_conversion_p (vectype
, old_vectype
);
757 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
759 /* We maintain the invariant that if any statement in the group is
760 used, all other members of the group have the same vector type. */
761 stmt_vec_info first_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
762 stmt_vec_info member_info
= first_info
;
763 for (; member_info
; member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
764 if (is_pattern_stmt_p (member_info
)
765 && !useless_type_conversion_p (vectype
,
766 STMT_VINFO_VECTYPE (member_info
)))
771 for (member_info
= first_info
; member_info
;
772 member_info
= DR_GROUP_NEXT_ELEMENT (member_info
))
773 STMT_VINFO_VECTYPE (member_info
) = vectype
;
777 else if (!is_pattern_stmt_p (stmt_info
))
779 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
783 if (dump_enabled_p ())
785 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
786 "Build SLP failed: incompatible vector"
787 " types for: %G", stmt_info
->stmt
);
788 dump_printf_loc (MSG_NOTE
, vect_location
,
789 " old vector type: %T\n", old_vectype
);
790 dump_printf_loc (MSG_NOTE
, vect_location
,
791 " new vector type: %T\n", vectype
);
796 /* Return true if call statements CALL1 and CALL2 are similar enough
797 to be combined into the same SLP group. */
800 compatible_calls_p (gcall
*call1
, gcall
*call2
)
802 unsigned int nargs
= gimple_call_num_args (call1
);
803 if (nargs
!= gimple_call_num_args (call2
))
806 if (gimple_call_combined_fn (call1
) != gimple_call_combined_fn (call2
))
809 if (gimple_call_internal_p (call1
))
811 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1
)),
812 TREE_TYPE (gimple_call_lhs (call2
))))
814 for (unsigned int i
= 0; i
< nargs
; ++i
)
815 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1
, i
)),
816 TREE_TYPE (gimple_call_arg (call2
, i
))))
821 if (!operand_equal_p (gimple_call_fn (call1
),
822 gimple_call_fn (call2
), 0))
825 if (gimple_call_fntype (call1
) != gimple_call_fntype (call2
))
831 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
832 caller's attempt to find the vector type in STMT_INFO with the narrowest
833 element type. Return true if VECTYPE is nonnull and if it is valid
834 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
835 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
836 vect_build_slp_tree. */
839 vect_record_max_nunits (vec_info
*vinfo
, stmt_vec_info stmt_info
,
840 unsigned int group_size
,
841 tree vectype
, poly_uint64
*max_nunits
)
845 if (dump_enabled_p ())
846 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
847 "Build SLP failed: unsupported data-type in %G\n",
849 /* Fatal mismatch. */
853 /* If populating the vector type requires unrolling then fail
854 before adjusting *max_nunits for basic-block vectorization. */
855 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
856 unsigned HOST_WIDE_INT const_nunits
;
857 if (is_a
<bb_vec_info
> (vinfo
)
858 && (!nunits
.is_constant (&const_nunits
)
859 || const_nunits
> group_size
))
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
863 "Build SLP failed: unrolling required "
864 "in basic block SLP\n");
865 /* Fatal mismatch. */
869 /* In case of multiple types we need to detect the smallest type. */
870 vect_update_max_nunits (max_nunits
, vectype
);
874 /* Verify if the scalar stmts STMTS are isomorphic, require data
875 permutation or are of unsupported types of operation. Return
876 true if they are, otherwise return false and indicate in *MATCHES
877 which stmts are not isomorphic to the first one. If MATCHES[0]
878 is false then this indicates the comparison could not be
879 carried out or the stmts will never be vectorized by SLP.
881 Note COND_EXPR is possibly isomorphic to another one after swapping its
882 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
883 the first stmt by swapping the two operands of comparison; set SWAP[i]
884 to 2 if stmt I is isormorphic to the first stmt by inverting the code
885 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
886 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
889 vect_build_slp_tree_1 (vec_info
*vinfo
, unsigned char *swap
,
890 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
891 poly_uint64
*max_nunits
, bool *matches
,
892 bool *two_operators
, tree
*node_vectype
)
895 stmt_vec_info first_stmt_info
= stmts
[0];
896 enum tree_code first_stmt_code
= ERROR_MARK
;
897 enum tree_code alt_stmt_code
= ERROR_MARK
;
898 enum tree_code rhs_code
= ERROR_MARK
;
899 enum tree_code first_cond_code
= ERROR_MARK
;
901 bool need_same_oprnds
= false;
902 tree vectype
= NULL_TREE
, first_op1
= NULL_TREE
;
905 machine_mode optab_op2_mode
;
906 machine_mode vec_mode
;
907 stmt_vec_info first_load
= NULL
, prev_first_load
= NULL
;
908 bool first_stmt_load_p
= false, load_p
= false;
909 bool first_stmt_phi_p
= false, phi_p
= false;
911 /* For every stmt in NODE find its def stmt/s. */
912 stmt_vec_info stmt_info
;
913 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
915 gimple
*stmt
= stmt_info
->stmt
;
919 if (dump_enabled_p ())
920 dump_printf_loc (MSG_NOTE
, vect_location
, "Build SLP for %G", stmt
);
922 /* Fail to vectorize statements marked as unvectorizable, throw
924 if (!STMT_VINFO_VECTORIZABLE (stmt_info
)
925 || stmt_can_throw_internal (cfun
, stmt
)
926 || gimple_has_volatile_ops (stmt
))
928 if (dump_enabled_p ())
929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
930 "Build SLP failed: unvectorizable statement %G",
932 /* ??? For BB vectorization we want to commutate operands in a way
933 to shuffle all unvectorizable defs into one operand and have
934 the other still vectorized. The following doesn't reliably
935 work for this though but it's the easiest we can do here. */
936 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
938 /* Fatal mismatch. */
943 lhs
= gimple_get_lhs (stmt
);
944 if (lhs
== NULL_TREE
)
946 if (dump_enabled_p ())
947 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
948 "Build SLP failed: not GIMPLE_ASSIGN nor "
949 "GIMPLE_CALL %G", stmt
);
950 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
952 /* Fatal mismatch. */
958 if (!vect_get_vector_types_for_stmt (vinfo
, stmt_info
, &vectype
,
959 &nunits_vectype
, group_size
)
961 && !vect_record_max_nunits (vinfo
, stmt_info
, group_size
,
962 nunits_vectype
, max_nunits
)))
964 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
966 /* Fatal mismatch. */
971 gcc_assert (vectype
);
973 gcall
*call_stmt
= dyn_cast
<gcall
*> (stmt
);
976 rhs_code
= CALL_EXPR
;
978 if (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
))
980 else if ((gimple_call_internal_p (call_stmt
)
981 && (!vectorizable_internal_fn_p
982 (gimple_call_internal_fn (call_stmt
))))
983 || gimple_call_tail_p (call_stmt
)
984 || gimple_call_noreturn_p (call_stmt
)
985 || !gimple_call_nothrow_p (call_stmt
)
986 || gimple_call_chain (call_stmt
))
988 if (dump_enabled_p ())
989 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
990 "Build SLP failed: unsupported call type %G",
992 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
994 /* Fatal mismatch. */
999 else if (gimple_code (stmt
) == GIMPLE_PHI
)
1001 rhs_code
= ERROR_MARK
;
1006 rhs_code
= gimple_assign_rhs_code (stmt
);
1007 load_p
= gimple_vuse (stmt
);
1010 /* Check the operation. */
1013 *node_vectype
= vectype
;
1014 first_stmt_code
= rhs_code
;
1015 first_stmt_load_p
= load_p
;
1016 first_stmt_phi_p
= phi_p
;
1018 /* Shift arguments should be equal in all the packed stmts for a
1019 vector shift with scalar shift operand. */
1020 if (rhs_code
== LSHIFT_EXPR
|| rhs_code
== RSHIFT_EXPR
1021 || rhs_code
== LROTATE_EXPR
1022 || rhs_code
== RROTATE_EXPR
)
1024 vec_mode
= TYPE_MODE (vectype
);
1026 /* First see if we have a vector/vector shift. */
1027 optab
= optab_for_tree_code (rhs_code
, vectype
,
1031 || optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
1033 /* No vector/vector shift, try for a vector/scalar shift. */
1034 optab
= optab_for_tree_code (rhs_code
, vectype
,
1039 if (dump_enabled_p ())
1040 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1041 "Build SLP failed: no optab.\n");
1042 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1044 /* Fatal mismatch. */
1048 icode
= (int) optab_handler (optab
, vec_mode
);
1049 if (icode
== CODE_FOR_nothing
)
1051 if (dump_enabled_p ())
1052 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1053 "Build SLP failed: "
1054 "op not supported by target.\n");
1055 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1057 /* Fatal mismatch. */
1061 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
1062 if (!VECTOR_MODE_P (optab_op2_mode
))
1064 need_same_oprnds
= true;
1065 first_op1
= gimple_assign_rhs2 (stmt
);
1069 else if (rhs_code
== WIDEN_LSHIFT_EXPR
)
1071 need_same_oprnds
= true;
1072 first_op1
= gimple_assign_rhs2 (stmt
);
1075 && rhs_code
== BIT_FIELD_REF
)
1077 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt
), 0);
1078 if (TREE_CODE (vec
) != SSA_NAME
1079 || !types_compatible_p (vectype
, TREE_TYPE (vec
)))
1081 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1083 if (dump_enabled_p ())
1084 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1085 "Build SLP failed: "
1086 "BIT_FIELD_REF not supported\n");
1087 /* Fatal mismatch. */
1093 && gimple_call_internal_p (call_stmt
, IFN_DIV_POW2
))
1095 need_same_oprnds
= true;
1096 first_op1
= gimple_call_arg (call_stmt
, 1);
1101 if (first_stmt_code
!= rhs_code
1102 && alt_stmt_code
== ERROR_MARK
)
1103 alt_stmt_code
= rhs_code
;
1104 if ((first_stmt_code
!= rhs_code
1105 && (first_stmt_code
!= IMAGPART_EXPR
1106 || rhs_code
!= REALPART_EXPR
)
1107 && (first_stmt_code
!= REALPART_EXPR
1108 || rhs_code
!= IMAGPART_EXPR
)
1109 /* Handle mismatches in plus/minus by computing both
1110 and merging the results. */
1111 && !((first_stmt_code
== PLUS_EXPR
1112 || first_stmt_code
== MINUS_EXPR
)
1113 && (alt_stmt_code
== PLUS_EXPR
1114 || alt_stmt_code
== MINUS_EXPR
)
1115 && rhs_code
== alt_stmt_code
)
1116 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1117 && (first_stmt_code
== ARRAY_REF
1118 || first_stmt_code
== BIT_FIELD_REF
1119 || first_stmt_code
== INDIRECT_REF
1120 || first_stmt_code
== COMPONENT_REF
1121 || first_stmt_code
== MEM_REF
)))
1122 || first_stmt_load_p
!= load_p
1123 || first_stmt_phi_p
!= phi_p
)
1125 if (dump_enabled_p ())
1127 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1128 "Build SLP failed: different operation "
1129 "in stmt %G", stmt
);
1130 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1131 "original stmt %G", first_stmt_info
->stmt
);
1137 if (need_same_oprnds
)
1139 tree other_op1
= (call_stmt
1140 ? gimple_call_arg (call_stmt
, 1)
1141 : gimple_assign_rhs2 (stmt
));
1142 if (!operand_equal_p (first_op1
, other_op1
, 0))
1144 if (dump_enabled_p ())
1145 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1146 "Build SLP failed: different shift "
1147 "arguments in %G", stmt
);
1153 && first_stmt_code
== BIT_FIELD_REF
1154 && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info
->stmt
), 0)
1155 != TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0)))
1157 if (dump_enabled_p ())
1158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1159 "Build SLP failed: different BIT_FIELD_REF "
1160 "arguments in %G", stmt
);
1165 if (!load_p
&& rhs_code
== CALL_EXPR
)
1167 if (!compatible_calls_p (as_a
<gcall
*> (stmts
[0]->stmt
),
1168 as_a
<gcall
*> (stmt
)))
1170 if (dump_enabled_p ())
1171 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1172 "Build SLP failed: different calls in %G",
1180 && (gimple_bb (first_stmt_info
->stmt
)
1181 != gimple_bb (stmt_info
->stmt
)))
1183 if (dump_enabled_p ())
1184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1185 "Build SLP failed: different BB for PHI "
1191 if (!types_compatible_p (vectype
, *node_vectype
))
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1195 "Build SLP failed: different vector type "
1202 /* Grouped store or load. */
1203 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1205 if (REFERENCE_CLASS_P (lhs
))
1213 first_load
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1214 if (prev_first_load
)
1216 /* Check that there are no loads from different interleaving
1217 chains in the same node. */
1218 if (prev_first_load
!= first_load
)
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
1223 "Build SLP failed: different "
1224 "interleaving chains in one node %G",
1231 prev_first_load
= first_load
;
1233 } /* Grouped access. */
1238 /* Not grouped load. */
1239 if (dump_enabled_p ())
1240 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1241 "Build SLP failed: not grouped load %G", stmt
);
1243 /* FORNOW: Not grouped loads are not supported. */
1244 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1246 /* Fatal mismatch. */
1251 /* Not memory operation. */
1253 && TREE_CODE_CLASS (rhs_code
) != tcc_binary
1254 && TREE_CODE_CLASS (rhs_code
) != tcc_unary
1255 && TREE_CODE_CLASS (rhs_code
) != tcc_expression
1256 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
1257 && rhs_code
!= VIEW_CONVERT_EXPR
1258 && rhs_code
!= CALL_EXPR
1259 && rhs_code
!= BIT_FIELD_REF
)
1261 if (dump_enabled_p ())
1262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1263 "Build SLP failed: operation unsupported %G",
1265 if (is_a
<bb_vec_info
> (vinfo
) && i
!= 0)
1267 /* Fatal mismatch. */
1272 if (rhs_code
== COND_EXPR
)
1274 tree cond_expr
= gimple_assign_rhs1 (stmt
);
1275 enum tree_code cond_code
= TREE_CODE (cond_expr
);
1276 enum tree_code swap_code
= ERROR_MARK
;
1277 enum tree_code invert_code
= ERROR_MARK
;
1280 first_cond_code
= TREE_CODE (cond_expr
);
1281 else if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
)
1283 bool honor_nans
= HONOR_NANS (TREE_OPERAND (cond_expr
, 0));
1284 swap_code
= swap_tree_comparison (cond_code
);
1285 invert_code
= invert_tree_comparison (cond_code
, honor_nans
);
1288 if (first_cond_code
== cond_code
)
1290 /* Isomorphic can be achieved by swapping. */
1291 else if (first_cond_code
== swap_code
)
1293 /* Isomorphic can be achieved by inverting. */
1294 else if (first_cond_code
== invert_code
)
1298 if (dump_enabled_p ())
1299 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1300 "Build SLP failed: different"
1301 " operation %G", stmt
);
1311 for (i
= 0; i
< group_size
; ++i
)
1315 /* If we allowed a two-operation SLP node verify the target can cope
1316 with the permute we are going to use. */
1317 if (alt_stmt_code
!= ERROR_MARK
1318 && TREE_CODE_CLASS (alt_stmt_code
) != tcc_reference
)
1320 *two_operators
= true;
1326 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1327 Note we never remove apart from at destruction time so we do not
1328 need a special value for deleted that differs from empty. */
1331 typedef vec
<stmt_vec_info
> value_type
;
1332 typedef vec
<stmt_vec_info
> compare_type
;
1333 static inline hashval_t
hash (value_type
);
1334 static inline bool equal (value_type existing
, value_type candidate
);
1335 static inline bool is_empty (value_type x
) { return !x
.exists (); }
1336 static inline bool is_deleted (value_type x
) { return !x
.exists (); }
1337 static const bool empty_zero_p
= true;
1338 static inline void mark_empty (value_type
&x
) { x
.release (); }
1339 static inline void mark_deleted (value_type
&x
) { x
.release (); }
1340 static inline void remove (value_type
&x
) { x
.release (); }
1343 bst_traits::hash (value_type x
)
1346 for (unsigned i
= 0; i
< x
.length (); ++i
)
1347 h
.add_int (gimple_uid (x
[i
]->stmt
));
1351 bst_traits::equal (value_type existing
, value_type candidate
)
1353 if (existing
.length () != candidate
.length ())
1355 for (unsigned i
= 0; i
< existing
.length (); ++i
)
1356 if (existing
[i
] != candidate
[i
])
1361 typedef hash_map
<vec
<gimple
*>, slp_tree
,
1362 simple_hashmap_traits
<bst_traits
, slp_tree
> >
1363 scalar_stmts_to_slp_tree_map_t
;
1366 vect_build_slp_tree_2 (vec_info
*vinfo
, slp_tree node
,
1367 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1368 poly_uint64
*max_nunits
,
1369 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1370 scalar_stmts_to_slp_tree_map_t
*bst_map
);
1373 vect_build_slp_tree (vec_info
*vinfo
,
1374 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1375 poly_uint64
*max_nunits
,
1376 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1377 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1379 if (slp_tree
*leader
= bst_map
->get (stmts
))
1381 if (dump_enabled_p ())
1382 dump_printf_loc (MSG_NOTE
, vect_location
, "re-using %sSLP tree %p\n",
1383 *leader
? "" : "failed ", *leader
);
1386 SLP_TREE_REF_COUNT (*leader
)++;
1387 vect_update_max_nunits (max_nunits
, (*leader
)->max_nunits
);
1392 /* Seed the bst_map with a stub node to be filled by vect_build_slp_tree_2
1393 so we can pick up backedge destinations during discovery. */
1394 slp_tree res
= new _slp_tree
;
1395 SLP_TREE_DEF_TYPE (res
) = vect_internal_def
;
1396 SLP_TREE_SCALAR_STMTS (res
) = stmts
;
1397 bst_map
->put (stmts
.copy (), res
);
1399 poly_uint64 this_max_nunits
= 1;
1400 slp_tree res_
= vect_build_slp_tree_2 (vinfo
, res
, stmts
, group_size
,
1402 matches
, npermutes
, tree_size
, bst_map
);
1405 bool existed_p
= bst_map
->put (stmts
, NULL
);
1406 gcc_assert (existed_p
);
1407 /* Mark the node invalid so we can detect those when still in use
1408 as backedge destinations. */
1409 SLP_TREE_SCALAR_STMTS (res
) = vNULL
;
1410 SLP_TREE_DEF_TYPE (res
) = vect_uninitialized_def
;
1411 vect_free_slp_tree (res
);
1415 gcc_assert (res_
== res
);
1416 res
->max_nunits
= this_max_nunits
;
1417 vect_update_max_nunits (max_nunits
, this_max_nunits
);
1418 /* Keep a reference for the bst_map use. */
1419 SLP_TREE_REF_COUNT (res
)++;
1424 /* Recursively build an SLP tree starting from NODE.
1425 Fail (and return a value not equal to zero) if def-stmts are not
1426 isomorphic, require data permutation or are of unsupported types of
1427 operation. Otherwise, return 0.
1428 The value returned is the depth in the SLP tree where a mismatch
1432 vect_build_slp_tree_2 (vec_info
*vinfo
, slp_tree node
,
1433 vec
<stmt_vec_info
> stmts
, unsigned int group_size
,
1434 poly_uint64
*max_nunits
,
1435 bool *matches
, unsigned *npermutes
, unsigned *tree_size
,
1436 scalar_stmts_to_slp_tree_map_t
*bst_map
)
1438 unsigned nops
, i
, this_tree_size
= 0;
1439 poly_uint64 this_max_nunits
= *max_nunits
;
1443 stmt_vec_info stmt_info
= stmts
[0];
1444 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1445 nops
= gimple_call_num_args (stmt
);
1446 else if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1448 nops
= gimple_num_ops (stmt
) - 1;
1449 if (gimple_assign_rhs_code (stmt
) == COND_EXPR
)
1452 else if (gphi
*phi
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1453 nops
= gimple_phi_num_args (phi
);
1457 /* If the SLP node is a PHI (induction or reduction), terminate
1459 bool *skip_args
= XALLOCAVEC (bool, nops
);
1460 memset (skip_args
, 0, sizeof (bool) * nops
);
1461 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
1462 if (gphi
*stmt
= dyn_cast
<gphi
*> (stmt_info
->stmt
))
1464 tree scalar_type
= TREE_TYPE (PHI_RESULT (stmt
));
1465 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
1467 if (!vect_record_max_nunits (vinfo
, stmt_info
, group_size
, vectype
,
1471 vect_def_type def_type
= STMT_VINFO_DEF_TYPE (stmt_info
);
1472 if (def_type
== vect_induction_def
)
1474 /* Induction PHIs are not cycles but walk the initial
1475 value. Only for inner loops through, for outer loops
1476 we need to pick up the value from the actual PHIs
1477 to more easily support peeling and epilogue vectorization. */
1478 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1479 if (!nested_in_vect_loop_p (loop
, stmt_info
))
1480 skip_args
[loop_preheader_edge (loop
)->dest_idx
] = true;
1483 skip_args
[loop_latch_edge (loop
)->dest_idx
] = true;
1485 else if (def_type
== vect_reduction_def
1486 || def_type
== vect_double_reduction_def
1487 || def_type
== vect_nested_cycle
)
1489 /* Else def types have to match. */
1490 stmt_vec_info other_info
;
1491 bool all_same
= true;
1492 FOR_EACH_VEC_ELT (stmts
, i
, other_info
)
1494 if (STMT_VINFO_DEF_TYPE (other_info
) != def_type
)
1496 if (other_info
!= stmt_info
)
1499 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1500 /* Reduction initial values are not explicitely represented. */
1501 if (!nested_in_vect_loop_p (loop
, stmt_info
))
1502 skip_args
[loop_preheader_edge (loop
)->dest_idx
] = true;
1503 /* Reduction chain backedge defs are filled manually.
1504 ??? Need a better way to identify a SLP reduction chain PHI.
1505 Or a better overall way to SLP match those. */
1506 if (all_same
&& def_type
== vect_reduction_def
)
1507 skip_args
[loop_latch_edge (loop
)->dest_idx
] = true;
1509 else if (def_type
!= vect_internal_def
)
1514 bool two_operators
= false;
1515 unsigned char *swap
= XALLOCAVEC (unsigned char, group_size
);
1516 tree vectype
= NULL_TREE
;
1517 if (!vect_build_slp_tree_1 (vinfo
, swap
, stmts
, group_size
,
1518 &this_max_nunits
, matches
, &two_operators
,
1522 /* If the SLP node is a load, terminate the recursion unless masked. */
1523 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1524 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
1526 if (gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1529 gcc_assert (gimple_call_internal_p (stmt
, IFN_MASK_LOAD
));
1534 *max_nunits
= this_max_nunits
;
1536 node
= vect_create_new_slp_node (node
, stmts
, 0);
1537 SLP_TREE_VECTYPE (node
) = vectype
;
1538 /* And compute the load permutation. Whether it is actually
1539 a permutation depends on the unrolling factor which is
1541 vec
<unsigned> load_permutation
;
1543 stmt_vec_info load_info
;
1544 load_permutation
.create (group_size
);
1545 stmt_vec_info first_stmt_info
1546 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
1547 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
1549 int load_place
= vect_get_place_in_interleaving_chain
1550 (load_info
, first_stmt_info
);
1551 gcc_assert (load_place
!= -1);
1552 load_permutation
.safe_push (load_place
);
1554 SLP_TREE_LOAD_PERMUTATION (node
) = load_permutation
;
1558 else if (gimple_assign_single_p (stmt_info
->stmt
)
1559 && !gimple_vuse (stmt_info
->stmt
)
1560 && gimple_assign_rhs_code (stmt_info
->stmt
) == BIT_FIELD_REF
)
1562 /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference
1563 the same SSA name vector of a compatible type to vectype. */
1564 vec
<std::pair
<unsigned, unsigned> > lperm
= vNULL
;
1565 tree vec
= TREE_OPERAND (gimple_assign_rhs1 (stmt_info
->stmt
), 0);
1566 stmt_vec_info estmt_info
;
1567 FOR_EACH_VEC_ELT (stmts
, i
, estmt_info
)
1569 gassign
*estmt
= as_a
<gassign
*> (estmt_info
->stmt
);
1570 tree bfref
= gimple_assign_rhs1 (estmt
);
1572 if (!known_eq (bit_field_size (bfref
),
1573 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype
))))
1574 || !constant_multiple_p (bit_field_offset (bfref
),
1575 bit_field_size (bfref
), &lane
))
1580 lperm
.safe_push (std::make_pair (0, (unsigned)lane
));
1582 slp_tree vnode
= vect_create_new_slp_node (vNULL
);
1583 SLP_TREE_VECTYPE (vnode
) = TREE_TYPE (vec
);
1584 SLP_TREE_VEC_DEFS (vnode
).safe_push (vec
);
1585 /* We are always building a permutation node even if it is an identity
1586 permute to shield the rest of the vectorizer from the odd node
1587 representing an actual vector without any scalar ops.
1588 ??? We could hide it completely with making the permute node
1590 node
= vect_create_new_slp_node (node
, stmts
, 1);
1591 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1592 SLP_TREE_LANE_PERMUTATION (node
) = lperm
;
1593 SLP_TREE_VECTYPE (node
) = vectype
;
1594 SLP_TREE_CHILDREN (node
).quick_push (vnode
);
1598 /* Get at the operands, verifying they are compatible. */
1599 vec
<slp_oprnd_info
> oprnds_info
= vect_create_oprnd_info (nops
, group_size
);
1600 slp_oprnd_info oprnd_info
;
1601 FOR_EACH_VEC_ELT (stmts
, i
, stmt_info
)
1603 int res
= vect_get_and_check_slp_defs (vinfo
, swap
[i
], skip_args
,
1604 stmts
, i
, &oprnds_info
);
1606 matches
[(res
== -1) ? 0 : i
] = false;
1610 for (i
= 0; i
< group_size
; ++i
)
1613 vect_free_oprnd_info (oprnds_info
);
1618 auto_vec
<slp_tree
, 4> children
;
1620 stmt_info
= stmts
[0];
1622 /* Create SLP_TREE nodes for the definition node/s. */
1623 FOR_EACH_VEC_ELT (oprnds_info
, i
, oprnd_info
)
1628 /* We're skipping certain operands from processing, for example
1629 outer loop reduction initial defs. */
1632 children
.safe_push (NULL
);
1636 if (oprnd_info
->first_dt
== vect_uninitialized_def
)
1638 /* COND_EXPR have one too many eventually if the condition
1640 gcc_assert (i
== 3 && nops
== 4);
1644 if (is_a
<bb_vec_info
> (vinfo
)
1645 && oprnd_info
->first_dt
== vect_internal_def
1646 && !oprnd_info
->any_pattern
)
1648 /* For BB vectorization, if all defs are the same do not
1649 bother to continue the build along the single-lane
1650 graph but use a splat of the scalar value. */
1651 stmt_vec_info first_def
= oprnd_info
->def_stmts
[0];
1652 for (j
= 1; j
< group_size
; ++j
)
1653 if (oprnd_info
->def_stmts
[j
] != first_def
)
1656 /* But avoid doing this for loads where we may be
1657 able to CSE things, unless the stmt is not
1659 && (!STMT_VINFO_VECTORIZABLE (first_def
)
1660 || !gimple_vuse (first_def
->stmt
)))
1662 if (dump_enabled_p ())
1663 dump_printf_loc (MSG_NOTE
, vect_location
,
1664 "Using a splat of the uniform operand\n");
1665 oprnd_info
->first_dt
= vect_external_def
;
1669 if (oprnd_info
->first_dt
== vect_external_def
1670 || oprnd_info
->first_dt
== vect_constant_def
)
1672 slp_tree invnode
= vect_create_new_slp_node (oprnd_info
->ops
);
1673 SLP_TREE_DEF_TYPE (invnode
) = oprnd_info
->first_dt
;
1674 oprnd_info
->ops
= vNULL
;
1675 children
.safe_push (invnode
);
1679 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1680 group_size
, &this_max_nunits
,
1682 &this_tree_size
, bst_map
)) != NULL
)
1684 oprnd_info
->def_stmts
= vNULL
;
1685 children
.safe_push (child
);
1689 /* If the SLP build for operand zero failed and operand zero
1690 and one can be commutated try that for the scalar stmts
1691 that failed the match. */
1693 /* A first scalar stmt mismatch signals a fatal mismatch. */
1695 /* ??? For COND_EXPRs we can swap the comparison operands
1696 as well as the arms under some constraints. */
1698 && oprnds_info
[1]->first_dt
== vect_internal_def
1699 && is_gimple_assign (stmt_info
->stmt
)
1700 /* Swapping operands for reductions breaks assumptions later on. */
1701 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_reduction_def
1702 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_double_reduction_def
1703 /* Do so only if the number of not successful permutes was nor more
1704 than a cut-ff as re-trying the recursive match on
1705 possibly each level of the tree would expose exponential
1709 /* See whether we can swap the matching or the non-matching
1711 bool swap_not_matching
= true;
1714 for (j
= 0; j
< group_size
; ++j
)
1716 if (matches
[j
] != !swap_not_matching
)
1718 stmt_vec_info stmt_info
= stmts
[j
];
1719 /* Verify if we can swap operands of this stmt. */
1720 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1722 || !commutative_tree_code (gimple_assign_rhs_code (stmt
)))
1724 if (!swap_not_matching
)
1726 swap_not_matching
= false;
1731 while (j
!= group_size
);
1733 /* Swap mismatched definition stmts. */
1734 if (dump_enabled_p ())
1735 dump_printf_loc (MSG_NOTE
, vect_location
,
1736 "Re-trying with swapped operands of stmts ");
1737 for (j
= 0; j
< group_size
; ++j
)
1738 if (matches
[j
] == !swap_not_matching
)
1740 std::swap (oprnds_info
[0]->def_stmts
[j
],
1741 oprnds_info
[1]->def_stmts
[j
]);
1742 std::swap (oprnds_info
[0]->ops
[j
],
1743 oprnds_info
[1]->ops
[j
]);
1744 if (dump_enabled_p ())
1745 dump_printf (MSG_NOTE
, "%d ", j
);
1747 if (dump_enabled_p ())
1748 dump_printf (MSG_NOTE
, "\n");
1749 /* And try again with scratch 'matches' ... */
1750 bool *tem
= XALLOCAVEC (bool, group_size
);
1751 if ((child
= vect_build_slp_tree (vinfo
, oprnd_info
->def_stmts
,
1752 group_size
, &this_max_nunits
,
1754 &this_tree_size
, bst_map
)) != NULL
)
1756 oprnd_info
->def_stmts
= vNULL
;
1757 children
.safe_push (child
);
1760 /* We do not undo the swapping here since it might still be
1761 the better order for the second operand in case we build
1762 the first one from scalars below. */
1767 /* If the SLP build failed and we analyze a basic-block
1768 simply treat nodes we fail to build as externally defined
1769 (and thus build vectors from the scalar defs).
1770 The cost model will reject outright expensive cases.
1771 ??? This doesn't treat cases where permutation ultimatively
1772 fails (or we don't try permutation below). Ideally we'd
1773 even compute a permutation that will end up with the maximum
1775 if (is_a
<bb_vec_info
> (vinfo
)
1776 /* ??? Rejecting patterns this way doesn't work. We'd have to
1777 do extra work to cancel the pattern so the uses see the
1779 && !is_pattern_stmt_p (stmt_info
)
1780 && !oprnd_info
->any_pattern
)
1782 /* But if there's a leading vector sized set of matching stmts
1783 fail here so we can split the group. This matches the condition
1784 vect_analyze_slp_instance uses. */
1785 /* ??? We might want to split here and combine the results to support
1786 multiple vector sizes better. */
1787 for (j
= 0; j
< group_size
; ++j
)
1790 if (!known_ge (j
, TYPE_VECTOR_SUBPARTS (vectype
)))
1792 if (dump_enabled_p ())
1793 dump_printf_loc (MSG_NOTE
, vect_location
,
1794 "Building vector operands from scalars\n");
1796 child
= vect_create_new_slp_node (oprnd_info
->ops
);
1797 children
.safe_push (child
);
1798 oprnd_info
->ops
= vNULL
;
1803 gcc_assert (child
== NULL
);
1804 FOR_EACH_VEC_ELT (children
, j
, child
)
1806 vect_free_slp_tree (child
);
1807 vect_free_oprnd_info (oprnds_info
);
1811 vect_free_oprnd_info (oprnds_info
);
1813 /* If we have all children of a child built up from uniform scalars
1814 or does more than one possibly expensive vector construction then
1815 just throw that away, causing it built up from scalars.
1816 The exception is the SLP node for the vector store. */
1817 if (is_a
<bb_vec_info
> (vinfo
)
1818 && !STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1819 /* ??? Rejecting patterns this way doesn't work. We'd have to
1820 do extra work to cancel the pattern so the uses see the
1822 && !is_pattern_stmt_p (stmt_info
))
1826 bool all_uniform_p
= true;
1827 unsigned n_vector_builds
= 0;
1828 FOR_EACH_VEC_ELT (children
, j
, child
)
1832 else if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
1833 all_uniform_p
= false;
1834 else if (!vect_slp_tree_uniform_p (child
))
1836 all_uniform_p
= false;
1837 if (SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
1841 if (all_uniform_p
|| n_vector_builds
> 1)
1845 FOR_EACH_VEC_ELT (children
, j
, child
)
1847 vect_free_slp_tree (child
);
1849 if (dump_enabled_p ())
1850 dump_printf_loc (MSG_NOTE
, vect_location
,
1851 "Building parent vector operands from "
1852 "scalars instead\n");
1857 *tree_size
+= this_tree_size
+ 1;
1858 *max_nunits
= this_max_nunits
;
1862 /* ??? We'd likely want to either cache in bst_map sth like
1863 { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or
1864 the true { a+b, a+b, a+b, a+b } ... but there we don't have
1865 explicit stmts to put in so the keying on 'stmts' doesn't
1866 work (but we have the same issue with nodes that use 'ops'). */
1867 slp_tree one
= new _slp_tree
;
1868 slp_tree two
= new _slp_tree
;
1869 SLP_TREE_DEF_TYPE (one
) = vect_internal_def
;
1870 SLP_TREE_DEF_TYPE (two
) = vect_internal_def
;
1871 SLP_TREE_VECTYPE (one
) = vectype
;
1872 SLP_TREE_VECTYPE (two
) = vectype
;
1873 SLP_TREE_CHILDREN (one
).safe_splice (children
);
1874 SLP_TREE_CHILDREN (two
).safe_splice (children
);
1876 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two
), i
, child
)
1877 SLP_TREE_REF_COUNT (child
)++;
1879 /* Here we record the original defs since this
1880 node represents the final lane configuration. */
1881 node
= vect_create_new_slp_node (node
, stmts
, 2);
1882 SLP_TREE_VECTYPE (node
) = vectype
;
1883 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
1884 SLP_TREE_CHILDREN (node
).quick_push (one
);
1885 SLP_TREE_CHILDREN (node
).quick_push (two
);
1886 gassign
*stmt
= as_a
<gassign
*> (stmts
[0]->stmt
);
1887 enum tree_code code0
= gimple_assign_rhs_code (stmt
);
1888 enum tree_code ocode
= ERROR_MARK
;
1889 stmt_vec_info ostmt_info
;
1891 FOR_EACH_VEC_ELT (stmts
, i
, ostmt_info
)
1893 gassign
*ostmt
= as_a
<gassign
*> (ostmt_info
->stmt
);
1894 if (gimple_assign_rhs_code (ostmt
) != code0
)
1896 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (1, i
));
1897 ocode
= gimple_assign_rhs_code (ostmt
);
1901 SLP_TREE_LANE_PERMUTATION (node
).safe_push (std::make_pair (0, i
));
1903 SLP_TREE_CODE (one
) = code0
;
1904 SLP_TREE_CODE (two
) = ocode
;
1905 SLP_TREE_LANES (one
) = stmts
.length ();
1906 SLP_TREE_LANES (two
) = stmts
.length ();
1907 SLP_TREE_REPRESENTATIVE (one
) = stmts
[0];
1908 SLP_TREE_REPRESENTATIVE (two
) = stmts
[j
];
1912 node
= vect_create_new_slp_node (node
, stmts
, nops
);
1913 SLP_TREE_VECTYPE (node
) = vectype
;
1914 SLP_TREE_CHILDREN (node
).splice (children
);
1918 /* Dump a single SLP tree NODE. */
1921 vect_print_slp_tree (dump_flags_t dump_kind
, dump_location_t loc
,
1926 stmt_vec_info stmt_info
;
1929 dump_metadata_t
metadata (dump_kind
, loc
.get_impl_location ());
1930 dump_user_location_t user_loc
= loc
.get_user_location ();
1931 dump_printf_loc (metadata
, user_loc
, "node%s %p (max_nunits=%u, refcnt=%u)\n",
1932 SLP_TREE_DEF_TYPE (node
) == vect_external_def
1934 : (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
1937 estimated_poly_value (node
->max_nunits
),
1938 SLP_TREE_REF_COUNT (node
));
1939 if (SLP_TREE_DEF_TYPE (node
) == vect_internal_def
)
1941 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
1942 dump_printf_loc (metadata
, user_loc
, "op: VEC_PERM_EXPR\n");
1944 dump_printf_loc (metadata
, user_loc
, "op template: %G",
1945 SLP_TREE_REPRESENTATIVE (node
)->stmt
);
1947 if (SLP_TREE_SCALAR_STMTS (node
).exists ())
1948 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
1949 dump_printf_loc (metadata
, user_loc
, "\tstmt %u %G", i
, stmt_info
->stmt
);
1952 dump_printf_loc (metadata
, user_loc
, "\t{ ");
1953 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node
), i
, op
)
1954 dump_printf (metadata
, "%T%s ", op
,
1955 i
< SLP_TREE_SCALAR_OPS (node
).length () - 1 ? "," : "");
1956 dump_printf (metadata
, "}\n");
1958 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
1960 dump_printf_loc (metadata
, user_loc
, "\tload permutation {");
1961 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node
), i
, j
)
1962 dump_printf (dump_kind
, " %u", j
);
1963 dump_printf (dump_kind
, " }\n");
1965 if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
1967 dump_printf_loc (metadata
, user_loc
, "\tlane permutation {");
1968 for (i
= 0; i
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++i
)
1969 dump_printf (dump_kind
, " %u[%u]",
1970 SLP_TREE_LANE_PERMUTATION (node
)[i
].first
,
1971 SLP_TREE_LANE_PERMUTATION (node
)[i
].second
);
1972 dump_printf (dump_kind
, " }\n");
1974 if (SLP_TREE_CHILDREN (node
).is_empty ())
1976 dump_printf_loc (metadata
, user_loc
, "\tchildren");
1977 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
1978 dump_printf (dump_kind
, " %p", (void *)child
);
1979 dump_printf (dump_kind
, "\n");
1983 debug (slp_tree node
)
1985 debug_dump_context ctx
;
1986 vect_print_slp_tree (MSG_NOTE
,
1987 dump_location_t::from_location_t (UNKNOWN_LOCATION
),
1991 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1994 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
1995 slp_tree node
, hash_set
<slp_tree
> &visited
)
2000 if (visited
.add (node
))
2003 vect_print_slp_tree (dump_kind
, loc
, node
);
2005 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2007 vect_print_slp_graph (dump_kind
, loc
, child
, visited
);
2011 vect_print_slp_graph (dump_flags_t dump_kind
, dump_location_t loc
,
2014 hash_set
<slp_tree
> visited
;
2015 vect_print_slp_graph (dump_kind
, loc
, entry
, visited
);
2018 /* Mark the tree rooted at NODE with PURE_SLP. */
2021 vect_mark_slp_stmts (slp_tree node
, hash_set
<slp_tree
> &visited
)
2024 stmt_vec_info stmt_info
;
2027 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2030 if (visited
.add (node
))
2033 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2034 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
2036 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2038 vect_mark_slp_stmts (child
, visited
);
2042 vect_mark_slp_stmts (slp_tree node
)
2044 hash_set
<slp_tree
> visited
;
2045 vect_mark_slp_stmts (node
, visited
);
2048 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
2051 vect_mark_slp_stmts_relevant (slp_tree node
, hash_set
<slp_tree
> &visited
)
2054 stmt_vec_info stmt_info
;
2057 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2060 if (visited
.add (node
))
2063 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
2065 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info
)
2066 || STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_scope
);
2067 STMT_VINFO_RELEVANT (stmt_info
) = vect_used_in_scope
;
2070 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2072 vect_mark_slp_stmts_relevant (child
, visited
);
2076 vect_mark_slp_stmts_relevant (slp_tree node
)
2078 hash_set
<slp_tree
> visited
;
2079 vect_mark_slp_stmts_relevant (node
, visited
);
2083 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
2086 vect_gather_slp_loads (vec
<slp_tree
> &loads
, slp_tree node
,
2087 hash_set
<slp_tree
> &visited
)
2089 if (!node
|| visited
.add (node
))
2092 if (SLP_TREE_CHILDREN (node
).length () == 0)
2094 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2096 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
2097 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
2098 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
2099 loads
.safe_push (node
);
2105 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2106 vect_gather_slp_loads (loads
, child
, visited
);
2111 /* Find the last store in SLP INSTANCE. */
2114 vect_find_last_scalar_stmt_in_slp (slp_tree node
)
2116 stmt_vec_info last
= NULL
;
2117 stmt_vec_info stmt_vinfo
;
2119 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2121 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2122 last
= last
? get_later_stmt (stmt_vinfo
, last
) : stmt_vinfo
;
2128 /* Find the first stmt in NODE. */
2131 vect_find_first_scalar_stmt_in_slp (slp_tree node
)
2133 stmt_vec_info first
= NULL
;
2134 stmt_vec_info stmt_vinfo
;
2136 for (int i
= 0; SLP_TREE_SCALAR_STMTS (node
).iterate (i
, &stmt_vinfo
); i
++)
2138 stmt_vinfo
= vect_orig_stmt (stmt_vinfo
);
2140 || get_later_stmt (stmt_vinfo
, first
) == first
)
2147 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2148 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2149 (also containing the first GROUP1_SIZE stmts, since stores are
2150 consecutive), the second containing the remainder.
2151 Return the first stmt in the second group. */
2153 static stmt_vec_info
2154 vect_split_slp_store_group (stmt_vec_info first_vinfo
, unsigned group1_size
)
2156 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo
) == first_vinfo
);
2157 gcc_assert (group1_size
> 0);
2158 int group2_size
= DR_GROUP_SIZE (first_vinfo
) - group1_size
;
2159 gcc_assert (group2_size
> 0);
2160 DR_GROUP_SIZE (first_vinfo
) = group1_size
;
2162 stmt_vec_info stmt_info
= first_vinfo
;
2163 for (unsigned i
= group1_size
; i
> 1; i
--)
2165 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2166 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2168 /* STMT is now the last element of the first group. */
2169 stmt_vec_info group2
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2170 DR_GROUP_NEXT_ELEMENT (stmt_info
) = 0;
2172 DR_GROUP_SIZE (group2
) = group2_size
;
2173 for (stmt_info
= group2
; stmt_info
;
2174 stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
))
2176 DR_GROUP_FIRST_ELEMENT (stmt_info
) = group2
;
2177 gcc_assert (DR_GROUP_GAP (stmt_info
) == 1);
2180 /* For the second group, the DR_GROUP_GAP is that before the original group,
2181 plus skipping over the first vector. */
2182 DR_GROUP_GAP (group2
) = DR_GROUP_GAP (first_vinfo
) + group1_size
;
2184 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2185 DR_GROUP_GAP (first_vinfo
) += group2_size
;
2187 if (dump_enabled_p ())
2188 dump_printf_loc (MSG_NOTE
, vect_location
, "Split group into %d and %d\n",
2189 group1_size
, group2_size
);
2194 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2195 statements and a vector of NUNITS elements. */
2198 calculate_unrolling_factor (poly_uint64 nunits
, unsigned int group_size
)
2200 return exact_div (common_multiple (nunits
, group_size
), group_size
);
2204 vect_analyze_slp_instance (vec_info
*vinfo
,
2205 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2206 stmt_vec_info stmt_info
, slp_instance_kind kind
,
2207 unsigned max_tree_size
);
2209 /* Analyze an SLP instance starting from SCALAR_STMTS which are a group
2210 of KIND. Return true if successful. */
2213 vect_build_slp_instance (vec_info
*vinfo
,
2214 slp_instance_kind kind
,
2215 vec
<stmt_vec_info
> scalar_stmts
,
2216 stmt_vec_info root_stmt_info
,
2217 unsigned max_tree_size
,
2218 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2219 /* ??? We need stmt_info for group splitting. */
2220 stmt_vec_info stmt_info_
)
2222 if (dump_enabled_p ())
2224 dump_printf_loc (MSG_NOTE
, vect_location
,
2225 "Starting SLP discovery for\n");
2226 for (unsigned i
= 0; i
< scalar_stmts
.length (); ++i
)
2227 dump_printf_loc (MSG_NOTE
, vect_location
,
2228 " %G", scalar_stmts
[i
]->stmt
);
2231 /* Build the tree for the SLP instance. */
2232 unsigned int group_size
= scalar_stmts
.length ();
2233 bool *matches
= XALLOCAVEC (bool, group_size
);
2234 unsigned npermutes
= 0;
2235 poly_uint64 max_nunits
= 1;
2236 unsigned tree_size
= 0;
2238 slp_tree node
= vect_build_slp_tree (vinfo
, scalar_stmts
, group_size
,
2239 &max_nunits
, matches
, &npermutes
,
2240 &tree_size
, bst_map
);
2243 /* Calculate the unrolling factor based on the smallest type. */
2244 poly_uint64 unrolling_factor
2245 = calculate_unrolling_factor (max_nunits
, group_size
);
2247 if (maybe_ne (unrolling_factor
, 1U)
2248 && is_a
<bb_vec_info
> (vinfo
))
2250 unsigned HOST_WIDE_INT const_max_nunits
;
2251 if (!max_nunits
.is_constant (&const_max_nunits
)
2252 || const_max_nunits
> group_size
)
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2256 "Build SLP failed: store group "
2257 "size not a multiple of the vector size "
2258 "in basic block SLP\n");
2259 vect_free_slp_tree (node
);
2262 /* Fatal mismatch. */
2263 if (dump_enabled_p ())
2264 dump_printf_loc (MSG_NOTE
, vect_location
,
2265 "SLP discovery succeeded but node needs "
2267 memset (matches
, true, group_size
);
2268 matches
[group_size
/ const_max_nunits
* const_max_nunits
] = false;
2269 vect_free_slp_tree (node
);
2273 /* Create a new SLP instance. */
2274 slp_instance new_instance
= XNEW (class _slp_instance
);
2275 SLP_INSTANCE_TREE (new_instance
) = node
;
2276 SLP_INSTANCE_UNROLLING_FACTOR (new_instance
) = unrolling_factor
;
2277 SLP_INSTANCE_LOADS (new_instance
) = vNULL
;
2278 SLP_INSTANCE_ROOT_STMT (new_instance
) = root_stmt_info
;
2279 SLP_INSTANCE_KIND (new_instance
) = kind
;
2280 new_instance
->reduc_phis
= NULL
;
2281 new_instance
->cost_vec
= vNULL
;
2282 new_instance
->subgraph_entries
= vNULL
;
2284 if (dump_enabled_p ())
2285 dump_printf_loc (MSG_NOTE
, vect_location
,
2286 "SLP size %u vs. limit %u.\n",
2287 tree_size
, max_tree_size
);
2289 /* Fixup SLP reduction chains. */
2290 if (kind
== slp_inst_kind_reduc_chain
)
2292 /* If this is a reduction chain with a conversion in front
2293 amend the SLP tree with a node for that. */
2295 = vect_orig_stmt (scalar_stmts
[group_size
- 1])->stmt
;
2296 if (STMT_VINFO_DEF_TYPE (scalar_stmts
[0]) != vect_reduction_def
)
2298 /* Get at the conversion stmt - we know it's the single use
2299 of the last stmt of the reduction chain. */
2300 use_operand_p use_p
;
2301 bool r
= single_imm_use (gimple_assign_lhs (scalar_def
),
2302 &use_p
, &scalar_def
);
2304 stmt_vec_info next_info
= vinfo
->lookup_stmt (scalar_def
);
2305 next_info
= vect_stmt_to_vectorize (next_info
);
2306 scalar_stmts
= vNULL
;
2307 scalar_stmts
.create (group_size
);
2308 for (unsigned i
= 0; i
< group_size
; ++i
)
2309 scalar_stmts
.quick_push (next_info
);
2310 slp_tree conv
= vect_create_new_slp_node (scalar_stmts
, 1);
2311 SLP_TREE_VECTYPE (conv
) = STMT_VINFO_VECTYPE (next_info
);
2312 SLP_TREE_CHILDREN (conv
).quick_push (node
);
2313 SLP_INSTANCE_TREE (new_instance
) = conv
;
2314 /* We also have to fake this conversion stmt as SLP reduction
2315 group so we don't have to mess with too much code
2317 REDUC_GROUP_FIRST_ELEMENT (next_info
) = next_info
;
2318 REDUC_GROUP_NEXT_ELEMENT (next_info
) = NULL
;
2320 /* Fill the backedge child of the PHI SLP node. The
2321 general matching code cannot find it because the
2322 scalar code does not reflect how we vectorize the
2324 use_operand_p use_p
;
2325 imm_use_iterator imm_iter
;
2326 class loop
*loop
= LOOP_VINFO_LOOP (as_a
<loop_vec_info
> (vinfo
));
2327 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
,
2328 gimple_get_lhs (scalar_def
))
2329 /* There are exactly two non-debug uses, the reduction
2330 PHI and the loop-closed PHI node. */
2331 if (!is_gimple_debug (USE_STMT (use_p
))
2332 && gimple_bb (USE_STMT (use_p
)) == loop
->header
)
2334 auto_vec
<stmt_vec_info
, 64> phis (group_size
);
2335 stmt_vec_info phi_info
2336 = vinfo
->lookup_stmt (USE_STMT (use_p
));
2337 for (unsigned i
= 0; i
< group_size
; ++i
)
2338 phis
.quick_push (phi_info
);
2339 slp_tree
*phi_node
= bst_map
->get (phis
);
2340 unsigned dest_idx
= loop_latch_edge (loop
)->dest_idx
;
2341 SLP_TREE_CHILDREN (*phi_node
)[dest_idx
]
2342 = SLP_INSTANCE_TREE (new_instance
);
2343 SLP_INSTANCE_TREE (new_instance
)->refcnt
++;
2347 vinfo
->slp_instances
.safe_push (new_instance
);
2349 /* ??? We've replaced the old SLP_INSTANCE_GROUP_SIZE with
2350 the number of scalar stmts in the root in a few places.
2351 Verify that assumption holds. */
2352 gcc_assert (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (new_instance
))
2353 .length () == group_size
);
2355 if (dump_enabled_p ())
2357 dump_printf_loc (MSG_NOTE
, vect_location
,
2358 "Final SLP tree for instance %p:\n", new_instance
);
2359 vect_print_slp_graph (MSG_NOTE
, vect_location
,
2360 SLP_INSTANCE_TREE (new_instance
));
2368 /* Failed to SLP. */
2369 /* Free the allocated memory. */
2370 scalar_stmts
.release ();
2373 stmt_vec_info stmt_info
= stmt_info_
;
2374 /* Try to break the group up into pieces. */
2375 if (kind
== slp_inst_kind_store
)
2377 /* ??? We could delay all the actual splitting of store-groups
2378 until after SLP discovery of the original group completed.
2379 Then we can recurse to vect_build_slp_instance directly. */
2380 for (i
= 0; i
< group_size
; i
++)
2384 /* For basic block SLP, try to break the group up into multiples of
2386 if (is_a
<bb_vec_info
> (vinfo
)
2387 && (i
> 1 && i
< group_size
))
2390 = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info
)));
2391 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
2392 1 << floor_log2 (i
));
2393 unsigned HOST_WIDE_INT const_nunits
;
2395 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
))
2397 /* Split into two groups at the first vector boundary. */
2398 gcc_assert ((const_nunits
& (const_nunits
- 1)) == 0);
2399 unsigned group1_size
= i
& ~(const_nunits
- 1);
2401 if (dump_enabled_p ())
2402 dump_printf_loc (MSG_NOTE
, vect_location
,
2403 "Splitting SLP group at stmt %u\n", i
);
2404 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2406 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2407 kind
, max_tree_size
);
2408 /* Split the rest at the failure point and possibly
2409 re-analyze the remaining matching part if it has
2410 at least two lanes. */
2412 && (i
+ 1 < group_size
2413 || i
- group1_size
> 1))
2415 stmt_vec_info rest2
= rest
;
2416 rest
= vect_split_slp_store_group (rest
, i
- group1_size
);
2417 if (i
- group1_size
> 1)
2418 res
|= vect_analyze_slp_instance (vinfo
, bst_map
, rest2
,
2419 kind
, max_tree_size
);
2421 /* Re-analyze the non-matching tail if it has at least
2423 if (i
+ 1 < group_size
)
2424 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2425 rest
, kind
, max_tree_size
);
2430 /* For loop vectorization split into arbitrary pieces of size > 1. */
2431 if (is_a
<loop_vec_info
> (vinfo
)
2432 && (i
> 1 && i
< group_size
))
2434 unsigned group1_size
= i
;
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_NOTE
, vect_location
,
2438 "Splitting SLP group at stmt %u\n", i
);
2440 stmt_vec_info rest
= vect_split_slp_store_group (stmt_info
,
2442 /* Loop vectorization cannot handle gaps in stores, make sure
2443 the split group appears as strided. */
2444 STMT_VINFO_STRIDED_P (rest
) = 1;
2445 DR_GROUP_GAP (rest
) = 0;
2446 STMT_VINFO_STRIDED_P (stmt_info
) = 1;
2447 DR_GROUP_GAP (stmt_info
) = 0;
2449 bool res
= vect_analyze_slp_instance (vinfo
, bst_map
, stmt_info
,
2450 kind
, max_tree_size
);
2451 if (i
+ 1 < group_size
)
2452 res
|= vect_analyze_slp_instance (vinfo
, bst_map
,
2453 rest
, kind
, max_tree_size
);
2458 /* Even though the first vector did not all match, we might be able to SLP
2459 (some) of the remainder. FORNOW ignore this possibility. */
2462 /* Failed to SLP. */
2463 if (dump_enabled_p ())
2464 dump_printf_loc (MSG_NOTE
, vect_location
, "SLP discovery failed\n");
2469 /* Analyze an SLP instance starting from a group of grouped stores. Call
2470 vect_build_slp_tree to build a tree of packed stmts if possible.
2471 Return FALSE if it's impossible to SLP any stmt in the loop. */
2474 vect_analyze_slp_instance (vec_info
*vinfo
,
2475 scalar_stmts_to_slp_tree_map_t
*bst_map
,
2476 stmt_vec_info stmt_info
,
2477 slp_instance_kind kind
,
2478 unsigned max_tree_size
)
2481 vec
<stmt_vec_info
> scalar_stmts
;
2483 if (is_a
<bb_vec_info
> (vinfo
))
2484 vect_location
= stmt_info
->stmt
;
2486 stmt_vec_info next_info
= stmt_info
;
2487 if (kind
== slp_inst_kind_store
)
2489 /* Collect the stores and store them in scalar_stmts. */
2490 scalar_stmts
.create (DR_GROUP_SIZE (stmt_info
));
2493 scalar_stmts
.quick_push (vect_stmt_to_vectorize (next_info
));
2494 next_info
= DR_GROUP_NEXT_ELEMENT (next_info
);
2497 else if (kind
== slp_inst_kind_reduc_chain
)
2499 /* Collect the reduction stmts and store them in scalar_stmts. */
2500 scalar_stmts
.create (REDUC_GROUP_SIZE (stmt_info
));
2503 scalar_stmts
.quick_push (vect_stmt_to_vectorize (next_info
));
2504 next_info
= REDUC_GROUP_NEXT_ELEMENT (next_info
);
2506 /* Mark the first element of the reduction chain as reduction to properly
2507 transform the node. In the reduction analysis phase only the last
2508 element of the chain is marked as reduction. */
2509 STMT_VINFO_DEF_TYPE (stmt_info
)
2510 = STMT_VINFO_DEF_TYPE (scalar_stmts
.last ());
2511 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
))
2512 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts
.last ()));
2514 else if (kind
== slp_inst_kind_ctor
)
2516 tree rhs
= gimple_assign_rhs1 (stmt_info
->stmt
);
2518 scalar_stmts
.create (CONSTRUCTOR_NELTS (rhs
));
2519 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), i
, val
)
2521 stmt_vec_info def_info
= vinfo
->lookup_def (val
);
2522 def_info
= vect_stmt_to_vectorize (def_info
);
2523 scalar_stmts
.quick_push (def_info
);
2525 if (dump_enabled_p ())
2526 dump_printf_loc (MSG_NOTE
, vect_location
,
2527 "Analyzing vectorizable constructor: %G\n",
2530 else if (kind
== slp_inst_kind_reduc_group
)
2532 /* Collect reduction statements. */
2533 vec
<stmt_vec_info
> reductions
= as_a
<loop_vec_info
> (vinfo
)->reductions
;
2534 scalar_stmts
.create (reductions
.length ());
2535 for (i
= 0; reductions
.iterate (i
, &next_info
); i
++)
2536 if (STMT_VINFO_RELEVANT_P (next_info
)
2537 || STMT_VINFO_LIVE_P (next_info
))
2538 scalar_stmts
.quick_push (next_info
);
2539 /* If less than two were relevant/live there's nothing to SLP. */
2540 if (scalar_stmts
.length () < 2)
2546 /* Build the tree for the SLP instance. */
2547 bool res
= vect_build_slp_instance (vinfo
, kind
, scalar_stmts
,
2548 kind
== slp_inst_kind_ctor
2550 max_tree_size
, bst_map
,
2551 kind
== slp_inst_kind_store
2552 ? stmt_info
: NULL
);
2554 /* ??? If this is slp_inst_kind_store and the above succeeded here's
2555 where we should do store group splitting. */
2560 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2561 trees of packed scalar stmts if SLP is possible. */
2564 vect_analyze_slp (vec_info
*vinfo
, unsigned max_tree_size
)
2567 stmt_vec_info first_element
;
2569 DUMP_VECT_SCOPE ("vect_analyze_slp");
2571 scalar_stmts_to_slp_tree_map_t
*bst_map
2572 = new scalar_stmts_to_slp_tree_map_t ();
2574 /* Find SLP sequences starting from groups of grouped stores. */
2575 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
2576 vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2577 STMT_VINFO_GROUPED_ACCESS (first_element
)
2578 ? slp_inst_kind_store
: slp_inst_kind_ctor
,
2581 if (bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
))
2583 for (unsigned i
= 0; i
< bb_vinfo
->roots
.length (); ++i
)
2585 vect_location
= bb_vinfo
->roots
[i
].root
->stmt
;
2586 if (vect_build_slp_instance (bb_vinfo
, bb_vinfo
->roots
[i
].kind
,
2587 bb_vinfo
->roots
[i
].stmts
,
2588 bb_vinfo
->roots
[i
].root
,
2589 max_tree_size
, bst_map
, NULL
))
2590 bb_vinfo
->roots
[i
].stmts
= vNULL
;
2594 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
2596 /* Find SLP sequences starting from reduction chains. */
2597 FOR_EACH_VEC_ELT (loop_vinfo
->reduction_chains
, i
, first_element
)
2598 if (! STMT_VINFO_RELEVANT_P (first_element
)
2599 && ! STMT_VINFO_LIVE_P (first_element
))
2601 else if (! vect_analyze_slp_instance (vinfo
, bst_map
, first_element
,
2602 slp_inst_kind_reduc_chain
,
2605 /* Dissolve reduction chain group. */
2606 stmt_vec_info vinfo
= first_element
;
2607 stmt_vec_info last
= NULL
;
2610 stmt_vec_info next
= REDUC_GROUP_NEXT_ELEMENT (vinfo
);
2611 REDUC_GROUP_FIRST_ELEMENT (vinfo
) = NULL
;
2612 REDUC_GROUP_NEXT_ELEMENT (vinfo
) = NULL
;
2616 STMT_VINFO_DEF_TYPE (first_element
) = vect_internal_def
;
2617 /* It can be still vectorized as part of an SLP reduction. */
2618 loop_vinfo
->reductions
.safe_push (last
);
2621 /* Find SLP sequences starting from groups of reductions. */
2622 if (loop_vinfo
->reductions
.length () > 1)
2623 vect_analyze_slp_instance (vinfo
, bst_map
, loop_vinfo
->reductions
[0],
2624 slp_inst_kind_reduc_group
, max_tree_size
);
2627 /* The map keeps a reference on SLP nodes built, release that. */
2628 for (scalar_stmts_to_slp_tree_map_t::iterator it
= bst_map
->begin ();
2629 it
!= bst_map
->end (); ++it
)
2631 vect_free_slp_tree ((*it
).second
);
2634 return opt_result::success ();
2637 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2640 vect_slp_build_vertices (hash_set
<slp_tree
> &visited
, slp_tree node
,
2641 vec
<slp_tree
> &vertices
, vec
<int> &leafs
)
2646 if (visited
.add (node
))
2649 node
->vertex
= vertices
.length ();
2650 vertices
.safe_push (node
);
2653 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
2657 vect_slp_build_vertices (visited
, child
, vertices
, leafs
);
2660 leafs
.safe_push (node
->vertex
);
2663 /* Fill the vertices and leafs vector with all nodes in the SLP graph. */
2666 vect_slp_build_vertices (vec_info
*info
, vec
<slp_tree
> &vertices
,
2669 hash_set
<slp_tree
> visited
;
2671 slp_instance instance
;
2672 FOR_EACH_VEC_ELT (info
->slp_instances
, i
, instance
)
2674 unsigned n_v
= vertices
.length ();
2675 unsigned n_l
= leafs
.length ();
2676 vect_slp_build_vertices (visited
, SLP_INSTANCE_TREE (instance
), vertices
,
2678 /* If we added vertices but no entries to the reverse graph we've
2679 added a cycle that is not backwards-reachable. Push the entry
2680 to mimic as leaf then. */
2681 if (vertices
.length () > n_v
2682 && leafs
.length () == n_l
)
2683 leafs
.safe_push (SLP_INSTANCE_TREE (instance
)->vertex
);
2687 /* Apply (reverse) bijectite PERM to VEC. */
2691 vect_slp_permute (vec
<unsigned> perm
,
2692 vec
<T
> &vec
, bool reverse
)
2694 auto_vec
<T
, 64> saved
;
2695 saved
.create (vec
.length ());
2696 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2697 saved
.quick_push (vec
[i
]);
2701 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2702 vec
[perm
[i
]] = saved
[i
];
2703 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2704 gcc_assert (vec
[perm
[i
]] == saved
[i
]);
2708 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2709 vec
[i
] = saved
[perm
[i
]];
2710 for (unsigned i
= 0; i
< vec
.length (); ++i
)
2711 gcc_assert (vec
[i
] == saved
[perm
[i
]]);
2715 /* Return whether permutations PERM_A and PERM_B as recorded in the
2716 PERMS vector are equal. */
2719 vect_slp_perms_eq (const vec
<vec
<unsigned> > &perms
,
2720 int perm_a
, int perm_b
)
2722 return (perm_a
== perm_b
2723 || (perms
[perm_a
].length () == perms
[perm_b
].length ()
2724 && memcmp (&perms
[perm_a
][0], &perms
[perm_b
][0],
2725 sizeof (unsigned) * perms
[perm_a
].length ()) == 0));
2728 /* Optimize the SLP graph of VINFO. */
2731 vect_optimize_slp (vec_info
*vinfo
)
2733 if (vinfo
->slp_instances
.is_empty ())
2738 auto_vec
<slp_tree
> vertices
;
2739 auto_vec
<int> leafs
;
2740 vect_slp_build_vertices (vinfo
, vertices
, leafs
);
2742 struct graph
*slpg
= new_graph (vertices
.length ());
2743 FOR_EACH_VEC_ELT (vertices
, i
, node
)
2747 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2749 add_edge (slpg
, i
, child
->vertex
);
2752 /* Compute (reverse) postorder on the inverted graph. */
2754 graphds_dfs (slpg
, &leafs
[0], leafs
.length (), &ipo
, false, NULL
, NULL
);
2756 auto_sbitmap
n_visited (vertices
.length ());
2757 auto_sbitmap
n_materialize (vertices
.length ());
2758 auto_vec
<int> n_perm (vertices
.length ());
2759 auto_vec
<vec
<unsigned> > perms
;
2761 bitmap_clear (n_visited
);
2762 bitmap_clear (n_materialize
);
2763 n_perm
.quick_grow_cleared (vertices
.length ());
2764 perms
.safe_push (vNULL
); /* zero is no permute */
2766 /* Produce initial permutations. */
2767 for (i
= 0; i
< leafs
.length (); ++i
)
2770 slp_tree node
= vertices
[idx
];
2772 /* Handle externals and constants optimistically throughout the
2774 if (SLP_TREE_DEF_TYPE (node
) == vect_external_def
2775 || SLP_TREE_DEF_TYPE (node
) == vect_constant_def
)
2778 /* Leafs do not change across iterations. Note leafs also double
2779 as entries to the reverse graph. */
2780 if (!slpg
->vertices
[idx
].succ
)
2781 bitmap_set_bit (n_visited
, idx
);
2782 /* Loads are the only thing generating permutes. */
2783 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2786 /* If splitting out a SLP_TREE_LANE_PERMUTATION can make the
2787 node unpermuted, record this permute. */
2788 stmt_vec_info dr_stmt
= SLP_TREE_REPRESENTATIVE (node
);
2789 if (!STMT_VINFO_GROUPED_ACCESS (dr_stmt
))
2791 dr_stmt
= DR_GROUP_FIRST_ELEMENT (dr_stmt
);
2792 unsigned imin
= DR_GROUP_SIZE (dr_stmt
) + 1, imax
= 0;
2793 bool any_permute
= false;
2794 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2796 unsigned idx
= SLP_TREE_LOAD_PERMUTATION (node
)[j
];
2797 imin
= MIN (imin
, idx
);
2798 imax
= MAX (imax
, idx
);
2799 if (idx
- SLP_TREE_LOAD_PERMUTATION (node
)[0] != j
)
2802 /* If there's no permute no need to split one out. */
2805 /* If the span doesn't match we'd disrupt VF computation, avoid
2807 if (imax
- imin
+ 1 != SLP_TREE_LANES (node
))
2810 /* For now only handle true permutes, like
2811 vect_attempt_slp_rearrange_stmts did. This allows us to be lazy
2812 when permuting constants and invariants keeping the permute
2814 auto_sbitmap
load_index (SLP_TREE_LANES (node
));
2815 bitmap_clear (load_index
);
2816 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2817 bitmap_set_bit (load_index
, SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
);
2819 for (j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2820 if (!bitmap_bit_p (load_index
, j
))
2822 if (j
!= SLP_TREE_LANES (node
))
2825 vec
<unsigned> perm
= vNULL
;
2826 perm
.safe_grow (SLP_TREE_LANES (node
), true);
2827 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
2828 perm
[j
] = SLP_TREE_LOAD_PERMUTATION (node
)[j
] - imin
;
2829 perms
.safe_push (perm
);
2830 n_perm
[idx
] = perms
.length () - 1;
2833 /* Propagate permutes along the graph and compute materialization points. */
2835 unsigned iteration
= 0;
2841 for (i
= vertices
.length (); i
> 0 ; --i
)
2844 slp_tree node
= vertices
[idx
];
2845 /* For leafs there's nothing to do - we've seeded permutes
2847 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
2850 bitmap_set_bit (n_visited
, idx
);
2852 /* We cannot move a permute across a store. */
2853 if (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))
2855 (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node
))))
2859 for (graph_edge
*succ
= slpg
->vertices
[idx
].succ
;
2860 succ
; succ
= succ
->succ_next
)
2862 int succ_idx
= succ
->dest
;
2863 /* Handle unvisited nodes optimistically. */
2864 /* ??? But for constants once we want to handle non-bijective
2865 permutes we have to verify the permute, when unifying lanes,
2866 will not unify different constants. For example see
2867 gcc.dg/vect/bb-slp-14.c for a case that would break. */
2868 if (!bitmap_bit_p (n_visited
, succ_idx
))
2870 int succ_perm
= n_perm
[succ_idx
];
2871 /* Once we materialize succs permutation its output lanes
2872 appear unpermuted to us. */
2873 if (bitmap_bit_p (n_materialize
, succ_idx
))
2877 else if (succ_perm
== 0)
2882 else if (!vect_slp_perms_eq (perms
, perm
, succ_perm
))
2890 /* Pick up pre-computed leaf values. */
2892 else if (!vect_slp_perms_eq (perms
, perm
, n_perm
[idx
]))
2895 /* Make sure we eventually converge. */
2896 gcc_checking_assert (perm
== 0);
2899 bitmap_clear_bit (n_materialize
, idx
);
2906 /* Elide pruning at materialization points in the first
2907 iteration so every node was visited once at least. */
2911 /* Decide on permute materialization. Look whether there's
2912 a use (pred) edge that is permuted differently than us.
2913 In that case mark ourselves so the permutation is applied. */
2914 bool all_preds_permuted
= slpg
->vertices
[idx
].pred
!= NULL
;
2915 for (graph_edge
*pred
= slpg
->vertices
[idx
].pred
;
2916 pred
; pred
= pred
->pred_next
)
2918 gcc_checking_assert (bitmap_bit_p (n_visited
, pred
->src
));
2919 int pred_perm
= n_perm
[pred
->src
];
2920 if (!vect_slp_perms_eq (perms
, perm
, pred_perm
))
2922 all_preds_permuted
= false;
2926 if (!all_preds_permuted
)
2928 if (!bitmap_bit_p (n_materialize
, idx
))
2930 bitmap_set_bit (n_materialize
, idx
);
2934 while (changed
|| iteration
== 1);
2937 for (i
= 0; i
< vertices
.length (); ++i
)
2939 int perm
= n_perm
[i
];
2943 slp_tree node
= vertices
[i
];
2945 /* First permute invariant/external original successors. */
2948 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
2950 if (!child
|| SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
2953 /* If the vector is uniform there's nothing to do. */
2954 if (vect_slp_tree_uniform_p (child
))
2957 /* We can end up sharing some externals via two_operator
2958 handling. Be prepared to unshare those. */
2959 if (child
->refcnt
!= 1)
2961 gcc_assert (slpg
->vertices
[child
->vertex
].pred
->pred_next
);
2962 SLP_TREE_CHILDREN (node
)[j
] = child
2963 = vect_create_new_slp_node
2964 (SLP_TREE_SCALAR_OPS (child
).copy ());
2966 vect_slp_permute (perms
[perm
],
2967 SLP_TREE_SCALAR_OPS (child
), true);
2970 if (bitmap_bit_p (n_materialize
, i
))
2972 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
2973 /* For loads simply drop the permutation, the load permutation
2974 already performs the desired permutation. */
2976 else if (SLP_TREE_LANE_PERMUTATION (node
).exists ())
2978 /* If the node if already a permute node we just need to apply
2979 the permutation to the permute node itself. */
2980 if (dump_enabled_p ())
2981 dump_printf_loc (MSG_NOTE
, vect_location
,
2982 "simplifying permute node %p\n",
2985 vect_slp_permute (perms
[perm
], SLP_TREE_LANE_PERMUTATION (node
),
2990 if (dump_enabled_p ())
2991 dump_printf_loc (MSG_NOTE
, vect_location
,
2992 "inserting permute node in place of %p\n",
2995 /* Make a copy of NODE and in-place change it to a
2996 VEC_PERM node to permute the lanes of the copy. */
2997 slp_tree copy
= new _slp_tree
;
2998 SLP_TREE_CHILDREN (copy
) = SLP_TREE_CHILDREN (node
);
2999 SLP_TREE_CHILDREN (node
) = vNULL
;
3000 SLP_TREE_SCALAR_STMTS (copy
)
3001 = SLP_TREE_SCALAR_STMTS (node
).copy ();
3002 vect_slp_permute (perms
[perm
],
3003 SLP_TREE_SCALAR_STMTS (copy
), true);
3004 gcc_assert (!SLP_TREE_SCALAR_OPS (node
).exists ());
3005 SLP_TREE_REPRESENTATIVE (copy
) = SLP_TREE_REPRESENTATIVE (node
);
3006 gcc_assert (!SLP_TREE_LOAD_PERMUTATION (node
).exists ());
3007 SLP_TREE_LANE_PERMUTATION (copy
)
3008 = SLP_TREE_LANE_PERMUTATION (node
);
3009 SLP_TREE_LANE_PERMUTATION (node
) = vNULL
;
3010 SLP_TREE_VECTYPE (copy
) = SLP_TREE_VECTYPE (node
);
3012 copy
->max_nunits
= node
->max_nunits
;
3013 SLP_TREE_DEF_TYPE (copy
) = SLP_TREE_DEF_TYPE (node
);
3014 SLP_TREE_LANES (copy
) = SLP_TREE_LANES (node
);
3015 SLP_TREE_CODE (copy
) = SLP_TREE_CODE (node
);
3017 /* Now turn NODE into a VEC_PERM. */
3018 SLP_TREE_CHILDREN (node
).safe_push (copy
);
3019 SLP_TREE_LANE_PERMUTATION (node
).create (SLP_TREE_LANES (node
));
3020 for (unsigned j
= 0; j
< SLP_TREE_LANES (node
); ++j
)
3021 SLP_TREE_LANE_PERMUTATION (node
)
3022 .quick_push (std::make_pair (0, perms
[perm
][j
]));
3023 SLP_TREE_CODE (node
) = VEC_PERM_EXPR
;
3028 /* Apply the reverse permutation to our stmts. */
3029 vect_slp_permute (perms
[perm
],
3030 SLP_TREE_SCALAR_STMTS (node
), true);
3031 /* And to the load permutation, which we can simply
3032 make regular by design. */
3033 if (SLP_TREE_LOAD_PERMUTATION (node
).exists ())
3035 /* ??? When we handle non-bijective permutes the idea
3036 is that we can force the load-permutation to be
3037 { min, min + 1, min + 2, ... max }. But then the
3038 scalar defs might no longer match the lane content
3039 which means wrong-code with live lane vectorization.
3040 So we possibly have to have NULL entries for those. */
3041 vect_slp_permute (perms
[perm
],
3042 SLP_TREE_LOAD_PERMUTATION (node
), true);
3047 /* Free the perms vector used for propagation. */
3048 while (!perms
.is_empty ())
3049 perms
.pop ().release ();
3053 /* Now elide load permutations that are not necessary. */
3054 for (i
= 0; i
< leafs
.length (); ++i
)
3056 node
= vertices
[leafs
[i
]];
3057 if (!SLP_TREE_LOAD_PERMUTATION (node
).exists ())
3060 /* In basic block vectorization we allow any subchain of an interleaving
3062 FORNOW: not in loop SLP because of realignment complications. */
3063 if (is_a
<bb_vec_info
> (vinfo
))
3065 bool subchain_p
= true;
3066 stmt_vec_info next_load_info
= NULL
;
3067 stmt_vec_info load_info
;
3069 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
3072 && (next_load_info
!= load_info
3073 || DR_GROUP_GAP (load_info
) != 1))
3078 next_load_info
= DR_GROUP_NEXT_ELEMENT (load_info
);
3082 SLP_TREE_LOAD_PERMUTATION (node
).release ();
3088 stmt_vec_info load_info
;
3089 bool this_load_permuted
= false;
3091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), j
, load_info
)
3092 if (SLP_TREE_LOAD_PERMUTATION (node
)[j
] != j
)
3094 this_load_permuted
= true;
3097 stmt_vec_info first_stmt_info
3098 = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node
)[0]);
3099 if (!this_load_permuted
3100 /* The load requires permutation when unrolling exposes
3101 a gap either because the group is larger than the SLP
3102 group-size or because there is a gap between the groups. */
3103 && (known_eq (LOOP_VINFO_VECT_FACTOR
3104 (as_a
<loop_vec_info
> (vinfo
)), 1U)
3105 || ((SLP_TREE_LANES (node
) == DR_GROUP_SIZE (first_stmt_info
))
3106 && DR_GROUP_GAP (first_stmt_info
) == 0)))
3108 SLP_TREE_LOAD_PERMUTATION (node
).release ();
3115 /* Gather loads reachable from the individual SLP graph entries. */
3118 vect_gather_slp_loads (vec_info
*vinfo
)
3121 slp_instance instance
;
3122 FOR_EACH_VEC_ELT (vinfo
->slp_instances
, i
, instance
)
3124 hash_set
<slp_tree
> visited
;
3125 vect_gather_slp_loads (SLP_INSTANCE_LOADS (instance
),
3126 SLP_INSTANCE_TREE (instance
), visited
);
3131 /* For each possible SLP instance decide whether to SLP it and calculate overall
3132 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
3133 least one instance. */
3136 vect_make_slp_decision (loop_vec_info loop_vinfo
)
3139 poly_uint64 unrolling_factor
= 1;
3140 vec
<slp_instance
> slp_instances
= LOOP_VINFO_SLP_INSTANCES (loop_vinfo
);
3141 slp_instance instance
;
3142 int decided_to_slp
= 0;
3144 DUMP_VECT_SCOPE ("vect_make_slp_decision");
3146 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
3148 /* FORNOW: SLP if you can. */
3149 /* All unroll factors have the form:
3151 GET_MODE_SIZE (vinfo->vector_mode) * X
3153 for some rational X, so they must have a common multiple. */
3155 = force_common_multiple (unrolling_factor
,
3156 SLP_INSTANCE_UNROLLING_FACTOR (instance
));
3158 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3159 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3160 loop-based vectorization. Such stmts will be marked as HYBRID. */
3161 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
3165 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo
) = unrolling_factor
;
3167 if (decided_to_slp
&& dump_enabled_p ())
3169 dump_printf_loc (MSG_NOTE
, vect_location
,
3170 "Decided to SLP %d instances. Unrolling factor ",
3172 dump_dec (MSG_NOTE
, unrolling_factor
);
3173 dump_printf (MSG_NOTE
, "\n");
3176 return (decided_to_slp
> 0);
3179 /* Private data for vect_detect_hybrid_slp. */
3182 loop_vec_info loop_vinfo
;
3183 vec
<stmt_vec_info
> *worklist
;
3186 /* Walker for walk_gimple_op. */
3189 vect_detect_hybrid_slp (tree
*tp
, int *, void *data
)
3191 walk_stmt_info
*wi
= (walk_stmt_info
*)data
;
3192 vdhs_data
*dat
= (vdhs_data
*)wi
->info
;
3197 stmt_vec_info def_stmt_info
= dat
->loop_vinfo
->lookup_def (*tp
);
3200 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
3201 if (PURE_SLP_STMT (def_stmt_info
))
3203 if (dump_enabled_p ())
3204 dump_printf_loc (MSG_NOTE
, vect_location
, "marking hybrid: %G",
3205 def_stmt_info
->stmt
);
3206 STMT_SLP_TYPE (def_stmt_info
) = hybrid
;
3207 dat
->worklist
->safe_push (def_stmt_info
);
3213 /* Look if STMT_INFO is consumed by SLP indirectly and mark it pure_slp
3214 if so, otherwise pushing it to WORKLIST. */
3217 maybe_push_to_hybrid_worklist (vec_info
*vinfo
,
3218 vec
<stmt_vec_info
> &worklist
,
3219 stmt_vec_info stmt_info
)
3221 if (dump_enabled_p ())
3222 dump_printf_loc (MSG_NOTE
, vect_location
,
3223 "Processing hybrid candidate : %G", stmt_info
->stmt
);
3224 stmt_vec_info orig_info
= vect_orig_stmt (stmt_info
);
3225 imm_use_iterator iter2
;
3227 use_operand_p use_p
;
3228 def_operand_p def_p
;
3229 bool any_def
= false;
3230 FOR_EACH_PHI_OR_STMT_DEF (def_p
, orig_info
->stmt
, iter1
, SSA_OP_DEF
)
3233 FOR_EACH_IMM_USE_FAST (use_p
, iter2
, DEF_FROM_PTR (def_p
))
3235 if (is_gimple_debug (USE_STMT (use_p
)))
3237 stmt_vec_info use_info
= vinfo
->lookup_stmt (USE_STMT (use_p
));
3238 /* An out-of loop use means this is a loop_vect sink. */
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_NOTE
, vect_location
,
3243 "Found loop_vect sink: %G", stmt_info
->stmt
);
3244 worklist
.safe_push (stmt_info
);
3247 else if (!STMT_SLP_TYPE (vect_stmt_to_vectorize (use_info
)))
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_NOTE
, vect_location
,
3251 "Found loop_vect use: %G", use_info
->stmt
);
3252 worklist
.safe_push (stmt_info
);
3257 /* No def means this is a loo_vect sink. */
3260 if (dump_enabled_p ())
3261 dump_printf_loc (MSG_NOTE
, vect_location
,
3262 "Found loop_vect sink: %G", stmt_info
->stmt
);
3263 worklist
.safe_push (stmt_info
);
3266 if (dump_enabled_p ())
3267 dump_printf_loc (MSG_NOTE
, vect_location
,
3268 "Marked SLP consumed stmt pure: %G", stmt_info
->stmt
);
3269 STMT_SLP_TYPE (stmt_info
) = pure_slp
;
3272 /* Find stmts that must be both vectorized and SLPed. */
3275 vect_detect_hybrid_slp (loop_vec_info loop_vinfo
)
3277 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
3279 /* All stmts participating in SLP are marked pure_slp, all other
3280 stmts are loop_vect.
3281 First collect all loop_vect stmts into a worklist.
3282 SLP patterns cause not all original scalar stmts to appear in
3283 SLP_TREE_SCALAR_STMTS and thus not all of them are marked pure_slp.
3284 Rectify this here and do a backward walk over the IL only considering
3285 stmts as loop_vect when they are used by a loop_vect stmt and otherwise
3286 mark them as pure_slp. */
3287 auto_vec
<stmt_vec_info
> worklist
;
3288 for (int i
= LOOP_VINFO_LOOP (loop_vinfo
)->num_nodes
- 1; i
>= 0; --i
)
3290 basic_block bb
= LOOP_VINFO_BBS (loop_vinfo
)[i
];
3291 for (gphi_iterator gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
);
3294 gphi
*phi
= gsi
.phi ();
3295 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (phi
);
3296 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3297 maybe_push_to_hybrid_worklist (loop_vinfo
,
3298 worklist
, stmt_info
);
3300 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb
); !gsi_end_p (gsi
);
3303 gimple
*stmt
= gsi_stmt (gsi
);
3304 if (is_gimple_debug (stmt
))
3306 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (stmt
);
3307 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
3309 for (gimple_stmt_iterator gsi2
3310 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
3311 !gsi_end_p (gsi2
); gsi_next (&gsi2
))
3313 stmt_vec_info patt_info
3314 = loop_vinfo
->lookup_stmt (gsi_stmt (gsi2
));
3315 if (!STMT_SLP_TYPE (patt_info
)
3316 && STMT_VINFO_RELEVANT (patt_info
))
3317 maybe_push_to_hybrid_worklist (loop_vinfo
,
3318 worklist
, patt_info
);
3320 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
3322 if (!STMT_SLP_TYPE (stmt_info
) && STMT_VINFO_RELEVANT (stmt_info
))
3323 maybe_push_to_hybrid_worklist (loop_vinfo
,
3324 worklist
, stmt_info
);
3328 /* Now we have a worklist of non-SLP stmts, follow use->def chains and
3329 mark any SLP vectorized stmt as hybrid.
3330 ??? We're visiting def stmts N times (once for each non-SLP and
3331 once for each hybrid-SLP use). */
3334 dat
.worklist
= &worklist
;
3335 dat
.loop_vinfo
= loop_vinfo
;
3336 memset (&wi
, 0, sizeof (wi
));
3337 wi
.info
= (void *)&dat
;
3338 while (!worklist
.is_empty ())
3340 stmt_vec_info stmt_info
= worklist
.pop ();
3341 /* Since SSA operands are not set up for pattern stmts we need
3342 to use walk_gimple_op. */
3344 walk_gimple_op (stmt_info
->stmt
, vect_detect_hybrid_slp
, &wi
);
3349 /* Initialize a bb_vec_info struct for the statements in BBS basic blocks. */
3351 _bb_vec_info::_bb_vec_info (vec
<basic_block
> _bbs
, vec_info_shared
*shared
)
3352 : vec_info (vec_info::bb
, init_cost (NULL
), shared
), bbs (_bbs
), roots (vNULL
)
3354 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3357 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3360 gphi
*phi
= si
.phi ();
3361 gimple_set_uid (phi
, 0);
3364 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3365 !gsi_end_p (gsi
); gsi_next (&gsi
))
3367 gimple
*stmt
= gsi_stmt (gsi
);
3368 gimple_set_uid (stmt
, 0);
3369 if (is_gimple_debug (stmt
))
3377 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
3378 stmts in the basic block. */
3380 _bb_vec_info::~_bb_vec_info ()
3382 /* Reset region marker. */
3383 for (unsigned i
= 0; i
< bbs
.length (); ++i
)
3386 for (gphi_iterator si
= gsi_start_phis (bbs
[i
]); !gsi_end_p (si
);
3389 gphi
*phi
= si
.phi ();
3390 gimple_set_uid (phi
, -1);
3392 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
3393 !gsi_end_p (gsi
); gsi_next (&gsi
))
3395 gimple
*stmt
= gsi_stmt (gsi
);
3396 gimple_set_uid (stmt
, -1);
3400 for (unsigned i
= 0; i
< roots
.length (); ++i
)
3401 roots
[i
].stmts
.release ();
3405 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
3406 given then that child nodes have already been processed, and that
3407 their def types currently match their SLP node's def type. */
3410 vect_slp_analyze_node_operations_1 (vec_info
*vinfo
, slp_tree node
,
3411 slp_instance node_instance
,
3412 stmt_vector_for_cost
*cost_vec
)
3414 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
3415 gcc_assert (STMT_SLP_TYPE (stmt_info
) != loop_vect
);
3417 /* Calculate the number of vector statements to be created for the
3418 scalar stmts in this node. For SLP reductions it is equal to the
3419 number of vector statements in the children (which has already been
3420 calculated by the recursive call). Otherwise it is the number of
3421 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
3422 VF divided by the number of elements in a vector. */
3423 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
)
3424 && REDUC_GROUP_FIRST_ELEMENT (stmt_info
))
3426 for (unsigned i
= 0; i
< SLP_TREE_CHILDREN (node
).length (); ++i
)
3427 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node
)[i
]) == vect_internal_def
)
3429 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3430 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node
)[i
]);
3437 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3438 vf
= loop_vinfo
->vectorization_factor
;
3441 unsigned int group_size
= SLP_TREE_LANES (node
);
3442 tree vectype
= SLP_TREE_VECTYPE (node
);
3443 SLP_TREE_NUMBER_OF_VEC_STMTS (node
)
3444 = vect_get_num_vectors (vf
* group_size
, vectype
);
3447 /* Handle purely internal nodes. */
3448 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
3449 return vectorizable_slp_permutation (vinfo
, NULL
, node
, cost_vec
);
3451 if (is_a
<bb_vec_info
> (vinfo
)
3452 && !vect_update_shared_vectype (stmt_info
, SLP_TREE_VECTYPE (node
)))
3454 if (dump_enabled_p ())
3455 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3456 "desired vector type conflicts with earlier one "
3457 "for %G", stmt_info
->stmt
);
3462 return vect_analyze_stmt (vinfo
, stmt_info
, &dummy
,
3463 node
, node_instance
, cost_vec
);
3466 /* Try to build NODE from scalars, returning true on success.
3467 NODE_INSTANCE is the SLP instance that contains NODE. */
3470 vect_slp_convert_to_external (vec_info
*vinfo
, slp_tree node
,
3471 slp_instance node_instance
)
3473 stmt_vec_info stmt_info
;
3476 if (!is_a
<bb_vec_info
> (vinfo
)
3477 || node
== SLP_INSTANCE_TREE (node_instance
)
3478 || !SLP_TREE_SCALAR_STMTS (node
).exists ()
3479 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node
)))
3482 if (dump_enabled_p ())
3483 dump_printf_loc (MSG_NOTE
, vect_location
,
3484 "Building vector operands of %p from scalars instead\n", node
);
3486 /* Don't remove and free the child nodes here, since they could be
3487 referenced by other structures. The analysis and scheduling phases
3488 (need to) ignore child nodes of anything that isn't vect_internal_def. */
3489 unsigned int group_size
= SLP_TREE_LANES (node
);
3490 SLP_TREE_DEF_TYPE (node
) = vect_external_def
;
3491 SLP_TREE_SCALAR_OPS (node
).safe_grow (group_size
, true);
3492 SLP_TREE_LOAD_PERMUTATION (node
).release ();
3493 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3495 tree lhs
= gimple_get_lhs (vect_orig_stmt (stmt_info
)->stmt
);
3496 SLP_TREE_SCALAR_OPS (node
)[i
] = lhs
;
3501 /* Compute the prologue cost for invariant or constant operands represented
3505 vect_prologue_cost_for_slp (slp_tree node
,
3506 stmt_vector_for_cost
*cost_vec
)
3508 /* There's a special case of an existing vector, that costs nothing. */
3509 if (SLP_TREE_SCALAR_OPS (node
).length () == 0
3510 && !SLP_TREE_VEC_DEFS (node
).is_empty ())
3512 /* Without looking at the actual initializer a vector of
3513 constants can be implemented as load from the constant pool.
3514 When all elements are the same we can use a splat. */
3515 tree vectype
= SLP_TREE_VECTYPE (node
);
3516 unsigned group_size
= SLP_TREE_SCALAR_OPS (node
).length ();
3517 unsigned num_vects_to_check
;
3518 unsigned HOST_WIDE_INT const_nunits
;
3519 unsigned nelt_limit
;
3520 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
3521 && ! multiple_p (const_nunits
, group_size
))
3523 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
3524 nelt_limit
= const_nunits
;
3528 /* If either the vector has variable length or the vectors
3529 are composed of repeated whole groups we only need to
3530 cost construction once. All vectors will be the same. */
3531 num_vects_to_check
= 1;
3532 nelt_limit
= group_size
;
3534 tree elt
= NULL_TREE
;
3536 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
3538 unsigned si
= j
% group_size
;
3540 elt
= SLP_TREE_SCALAR_OPS (node
)[si
];
3541 /* ??? We're just tracking whether all operands of a single
3542 vector initializer are the same, ideally we'd check if
3543 we emitted the same one already. */
3544 else if (elt
!= SLP_TREE_SCALAR_OPS (node
)[si
])
3547 if (nelt
== nelt_limit
)
3549 record_stmt_cost (cost_vec
, 1,
3550 SLP_TREE_DEF_TYPE (node
) == vect_external_def
3551 ? (elt
? scalar_to_vec
: vec_construct
)
3553 NULL
, vectype
, 0, vect_prologue
);
3559 /* Analyze statements contained in SLP tree NODE after recursively analyzing
3560 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
3562 Return true if the operations are supported. */
3565 vect_slp_analyze_node_operations (vec_info
*vinfo
, slp_tree node
,
3566 slp_instance node_instance
,
3567 hash_set
<slp_tree
> &visited_set
,
3568 vec
<slp_tree
> &visited_vec
,
3569 stmt_vector_for_cost
*cost_vec
)
3574 /* Assume we can code-generate all invariants. */
3576 || SLP_TREE_DEF_TYPE (node
) == vect_constant_def
3577 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
3580 if (SLP_TREE_DEF_TYPE (node
) == vect_uninitialized_def
)
3582 if (dump_enabled_p ())
3583 dump_printf_loc (MSG_NOTE
, vect_location
,
3584 "Failed cyclic SLP reference in %p", node
);
3587 gcc_assert (SLP_TREE_DEF_TYPE (node
) == vect_internal_def
);
3589 /* If we already analyzed the exact same set of scalar stmts we're done.
3590 We share the generated vector stmts for those. */
3591 if (visited_set
.add (node
))
3593 visited_vec
.safe_push (node
);
3596 unsigned visited_rec_start
= visited_vec
.length ();
3597 unsigned cost_vec_rec_start
= cost_vec
->length ();
3598 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3600 res
= vect_slp_analyze_node_operations (vinfo
, child
, node_instance
,
3601 visited_set
, visited_vec
,
3608 res
= vect_slp_analyze_node_operations_1 (vinfo
, node
, node_instance
,
3610 /* If analysis failed we have to pop all recursive visited nodes
3614 while (visited_vec
.length () >= visited_rec_start
)
3615 visited_set
.remove (visited_vec
.pop ());
3616 cost_vec
->truncate (cost_vec_rec_start
);
3619 /* When the node can be vectorized cost invariant nodes it references.
3620 This is not done in DFS order to allow the refering node
3621 vectorizable_* calls to nail down the invariant nodes vector type
3622 and possibly unshare it if it needs a different vector type than
3625 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), j
, child
)
3627 && (SLP_TREE_DEF_TYPE (child
) == vect_constant_def
3628 || SLP_TREE_DEF_TYPE (child
) == vect_external_def
)
3629 /* Perform usual caching, note code-generation still
3630 code-gens these nodes multiple times but we expect
3631 to CSE them later. */
3632 && !visited_set
.add (child
))
3634 visited_vec
.safe_push (child
);
3635 /* ??? After auditing more code paths make a "default"
3636 and push the vector type from NODE to all children
3637 if it is not already set. */
3638 /* Compute the number of vectors to be generated. */
3639 tree vector_type
= SLP_TREE_VECTYPE (child
);
3642 /* For shifts with a scalar argument we don't need
3643 to cost or code-generate anything.
3644 ??? Represent this more explicitely. */
3645 gcc_assert ((STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (node
))
3646 == shift_vec_info_type
)
3650 unsigned group_size
= SLP_TREE_LANES (child
);
3652 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
3653 vf
= loop_vinfo
->vectorization_factor
;
3654 SLP_TREE_NUMBER_OF_VEC_STMTS (child
)
3655 = vect_get_num_vectors (vf
* group_size
, vector_type
);
3656 /* And cost them. */
3657 vect_prologue_cost_for_slp (child
, cost_vec
);
3660 /* If this node or any of its children can't be vectorized, try pruning
3661 the tree here rather than felling the whole thing. */
3662 if (!res
&& vect_slp_convert_to_external (vinfo
, node
, node_instance
))
3664 /* We'll need to revisit this for invariant costing and number
3665 of vectorized stmt setting. */
3673 /* Mark lanes of NODE that are live outside of the basic-block vectorized
3674 region and that can be vectorized using vectorizable_live_operation
3675 with STMT_VINFO_LIVE_P. Not handled live operations will cause the
3676 scalar code computing it to be retained. */
3679 vect_bb_slp_mark_live_stmts (bb_vec_info bb_vinfo
, slp_tree node
,
3680 slp_instance instance
,
3681 stmt_vector_for_cost
*cost_vec
,
3682 hash_set
<stmt_vec_info
> &svisited
,
3683 hash_set
<slp_tree
> &visited
)
3685 if (visited
.add (node
))
3689 stmt_vec_info stmt_info
;
3690 stmt_vec_info last_stmt
= vect_find_last_scalar_stmt_in_slp (node
);
3691 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3693 if (svisited
.contains (stmt_info
))
3695 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3696 if (STMT_VINFO_IN_PATTERN_P (orig_stmt_info
)
3697 && STMT_VINFO_RELATED_STMT (orig_stmt_info
) != stmt_info
)
3698 /* Only the pattern root stmt computes the original scalar value. */
3700 bool mark_visited
= true;
3701 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3702 ssa_op_iter op_iter
;
3703 def_operand_p def_p
;
3704 FOR_EACH_PHI_OR_STMT_DEF (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3706 imm_use_iterator use_iter
;
3708 stmt_vec_info use_stmt_info
;
3709 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3710 if (!is_gimple_debug (use_stmt
))
3712 use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
);
3714 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3716 STMT_VINFO_LIVE_P (stmt_info
) = true;
3717 if (vectorizable_live_operation (bb_vinfo
, stmt_info
,
3718 NULL
, node
, instance
, i
,
3720 /* ??? So we know we can vectorize the live stmt
3721 from one SLP node. If we cannot do so from all
3722 or none consistently we'd have to record which
3723 SLP node (and lane) we want to use for the live
3724 operation. So make sure we can code-generate
3726 mark_visited
= false;
3728 STMT_VINFO_LIVE_P (stmt_info
) = false;
3729 BREAK_FROM_IMM_USE_STMT (use_iter
);
3732 /* We have to verify whether we can insert the lane extract
3733 before all uses. The following is a conservative approximation.
3734 We cannot put this into vectorizable_live_operation because
3735 iterating over all use stmts from inside a FOR_EACH_IMM_USE_STMT
3737 Note that while the fact that we emit code for loads at the
3738 first load should make this a non-problem leafs we construct
3739 from scalars are vectorized after the last scalar def.
3740 ??? If we'd actually compute the insert location during
3741 analysis we could use sth less conservative than the last
3742 scalar stmt in the node for the dominance check. */
3743 /* ??? What remains is "live" uses in vector CTORs in the same
3744 SLP graph which is where those uses can end up code-generated
3745 right after their definition instead of close to their original
3746 use. But that would restrict us to code-generate lane-extracts
3747 from the latest stmt in a node. So we compensate for this
3748 during code-generation, simply not replacing uses for those
3749 hopefully rare cases. */
3750 if (STMT_VINFO_LIVE_P (stmt_info
))
3751 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3752 if (!is_gimple_debug (use_stmt
)
3753 && (!(use_stmt_info
= bb_vinfo
->lookup_stmt (use_stmt
))
3754 || !PURE_SLP_STMT (vect_stmt_to_vectorize (use_stmt_info
)))
3755 && !vect_stmt_dominates_stmt_p (last_stmt
->stmt
, use_stmt
))
3757 if (dump_enabled_p ())
3758 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3759 "Cannot determine insertion place for "
3761 STMT_VINFO_LIVE_P (stmt_info
) = false;
3762 mark_visited
= true;
3766 svisited
.add (stmt_info
);
3770 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3771 if (child
&& SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3772 vect_bb_slp_mark_live_stmts (bb_vinfo
, child
, instance
,
3773 cost_vec
, svisited
, visited
);
3776 /* Analyze statements in SLP instances of VINFO. Return true if the
3777 operations are supported. */
3780 vect_slp_analyze_operations (vec_info
*vinfo
)
3782 slp_instance instance
;
3785 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
3787 hash_set
<slp_tree
> visited
;
3788 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); )
3790 auto_vec
<slp_tree
> visited_vec
;
3791 stmt_vector_for_cost cost_vec
;
3792 cost_vec
.create (2);
3793 if (is_a
<bb_vec_info
> (vinfo
))
3794 vect_location
= instance
->location ();
3795 if (!vect_slp_analyze_node_operations (vinfo
,
3796 SLP_INSTANCE_TREE (instance
),
3797 instance
, visited
, visited_vec
,
3799 /* Instances with a root stmt require vectorized defs for the
3801 || (SLP_INSTANCE_ROOT_STMT (instance
)
3802 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance
))
3803 != vect_internal_def
)))
3805 slp_tree node
= SLP_INSTANCE_TREE (instance
);
3806 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
3807 if (dump_enabled_p ())
3808 dump_printf_loc (MSG_NOTE
, vect_location
,
3809 "removing SLP instance operations starting from: %G",
3811 vect_free_slp_instance (instance
);
3812 vinfo
->slp_instances
.ordered_remove (i
);
3813 cost_vec
.release ();
3814 while (!visited_vec
.is_empty ())
3815 visited
.remove (visited_vec
.pop ());
3821 /* For BB vectorization remember the SLP graph entry
3823 if (is_a
<bb_vec_info
> (vinfo
))
3824 instance
->cost_vec
= cost_vec
;
3827 add_stmt_costs (vinfo
, vinfo
->target_cost_data
, &cost_vec
);
3828 cost_vec
.release ();
3833 /* Compute vectorizable live stmts. */
3834 if (bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
))
3836 hash_set
<stmt_vec_info
> svisited
;
3837 hash_set
<slp_tree
> visited
;
3838 for (i
= 0; vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3840 vect_location
= instance
->location ();
3841 vect_bb_slp_mark_live_stmts (bb_vinfo
, SLP_INSTANCE_TREE (instance
),
3842 instance
, &instance
->cost_vec
, svisited
,
3847 return !vinfo
->slp_instances
.is_empty ();
3850 /* Get the SLP instance leader from INSTANCE_LEADER thereby transitively
3851 closing the eventual chain. */
3854 get_ultimate_leader (slp_instance instance
,
3855 hash_map
<slp_instance
, slp_instance
> &instance_leader
)
3857 auto_vec
<slp_instance
*, 8> chain
;
3859 while (*(tem
= instance_leader
.get (instance
)) != instance
)
3861 chain
.safe_push (tem
);
3864 while (!chain
.is_empty ())
3865 *chain
.pop () = instance
;
3869 /* Worker of vect_bb_partition_graph, recurse on NODE. */
3872 vect_bb_partition_graph_r (bb_vec_info bb_vinfo
,
3873 slp_instance instance
, slp_tree node
,
3874 hash_map
<stmt_vec_info
, slp_instance
> &stmt_to_instance
,
3875 hash_map
<slp_instance
, slp_instance
> &instance_leader
,
3876 hash_set
<slp_tree
> &visited
)
3878 stmt_vec_info stmt_info
;
3881 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3884 slp_instance
&stmt_instance
3885 = stmt_to_instance
.get_or_insert (stmt_info
, &existed_p
);
3888 else if (stmt_instance
!= instance
)
3890 /* If we're running into a previously marked stmt make us the
3891 leader of the current ultimate leader. This keeps the
3892 leader chain acyclic and works even when the current instance
3893 connects two previously independent graph parts. */
3894 slp_instance stmt_leader
3895 = get_ultimate_leader (stmt_instance
, instance_leader
);
3896 if (stmt_leader
!= instance
)
3897 instance_leader
.put (stmt_leader
, instance
);
3899 stmt_instance
= instance
;
3902 if (visited
.add (node
))
3906 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
3907 if (child
&& SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
3908 vect_bb_partition_graph_r (bb_vinfo
, instance
, child
, stmt_to_instance
,
3909 instance_leader
, visited
);
3912 /* Partition the SLP graph into pieces that can be costed independently. */
3915 vect_bb_partition_graph (bb_vec_info bb_vinfo
)
3917 DUMP_VECT_SCOPE ("vect_bb_partition_graph");
3919 /* First walk the SLP graph assigning each involved scalar stmt a
3920 corresponding SLP graph entry and upon visiting a previously
3921 marked stmt, make the stmts leader the current SLP graph entry. */
3922 hash_map
<stmt_vec_info
, slp_instance
> stmt_to_instance
;
3923 hash_map
<slp_instance
, slp_instance
> instance_leader
;
3924 hash_set
<slp_tree
> visited
;
3925 slp_instance instance
;
3926 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3928 instance_leader
.put (instance
, instance
);
3929 vect_bb_partition_graph_r (bb_vinfo
,
3930 instance
, SLP_INSTANCE_TREE (instance
),
3931 stmt_to_instance
, instance_leader
,
3935 /* Then collect entries to each independent subgraph. */
3936 for (unsigned i
= 0; bb_vinfo
->slp_instances
.iterate (i
, &instance
); ++i
)
3938 slp_instance leader
= get_ultimate_leader (instance
, instance_leader
);
3939 leader
->subgraph_entries
.safe_push (instance
);
3940 if (dump_enabled_p ()
3941 && leader
!= instance
)
3942 dump_printf_loc (MSG_NOTE
, vect_location
,
3943 "instance %p is leader of %p\n",
3948 /* Compute the scalar cost of the SLP node NODE and its children
3949 and return it. Do not account defs that are marked in LIFE and
3950 update LIFE according to uses of NODE. */
3953 vect_bb_slp_scalar_cost (vec_info
*vinfo
,
3954 slp_tree node
, vec
<bool, va_heap
> *life
,
3955 stmt_vector_for_cost
*cost_vec
,
3956 hash_set
<slp_tree
> &visited
)
3959 stmt_vec_info stmt_info
;
3962 if (visited
.add (node
))
3965 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
3967 ssa_op_iter op_iter
;
3968 def_operand_p def_p
;
3973 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
3974 gimple
*orig_stmt
= orig_stmt_info
->stmt
;
3976 /* If there is a non-vectorized use of the defs then the scalar
3977 stmt is kept live in which case we do not account it or any
3978 required defs in the SLP children in the scalar cost. This
3979 way we make the vectorization more costly when compared to
3981 if (!STMT_VINFO_LIVE_P (stmt_info
))
3983 FOR_EACH_PHI_OR_STMT_DEF (def_p
, orig_stmt
, op_iter
, SSA_OP_DEF
)
3985 imm_use_iterator use_iter
;
3987 FOR_EACH_IMM_USE_STMT (use_stmt
, use_iter
, DEF_FROM_PTR (def_p
))
3988 if (!is_gimple_debug (use_stmt
))
3990 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
3993 (vect_stmt_to_vectorize (use_stmt_info
)))
3996 BREAK_FROM_IMM_USE_STMT (use_iter
);
4004 /* Count scalar stmts only once. */
4005 if (gimple_visited_p (orig_stmt
))
4007 gimple_set_visited (orig_stmt
, true);
4009 vect_cost_for_stmt kind
;
4010 if (STMT_VINFO_DATA_REF (orig_stmt_info
))
4012 if (DR_IS_READ (STMT_VINFO_DATA_REF (orig_stmt_info
)))
4015 kind
= scalar_store
;
4017 else if (vect_nop_conversion_p (orig_stmt_info
))
4021 record_stmt_cost (cost_vec
, 1, kind
, orig_stmt_info
,
4022 SLP_TREE_VECTYPE (node
), 0, vect_body
);
4025 auto_vec
<bool, 20> subtree_life
;
4026 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
4028 if (child
&& SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
4030 /* Do not directly pass LIFE to the recursive call, copy it to
4031 confine changes in the callee to the current child/subtree. */
4032 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
4034 subtree_life
.safe_grow_cleared (SLP_TREE_LANES (child
), true);
4035 for (unsigned j
= 0;
4036 j
< SLP_TREE_LANE_PERMUTATION (node
).length (); ++j
)
4038 auto perm
= SLP_TREE_LANE_PERMUTATION (node
)[j
];
4039 if (perm
.first
== i
)
4040 subtree_life
[perm
.second
] = (*life
)[j
];
4045 gcc_assert (SLP_TREE_LANES (node
) == SLP_TREE_LANES (child
));
4046 subtree_life
.safe_splice (*life
);
4048 vect_bb_slp_scalar_cost (vinfo
, child
, &subtree_life
, cost_vec
,
4050 subtree_life
.truncate (0);
4055 /* Check if vectorization of the basic block is profitable for the
4056 subgraph denoted by SLP_INSTANCES. */
4059 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo
,
4060 vec
<slp_instance
> slp_instances
)
4062 slp_instance instance
;
4064 unsigned int vec_inside_cost
= 0, vec_outside_cost
= 0, scalar_cost
= 0;
4065 unsigned int vec_prologue_cost
= 0, vec_epilogue_cost
= 0;
4067 void *vect_target_cost_data
= init_cost (NULL
);
4069 /* Calculate scalar cost and sum the cost for the vector stmts
4070 previously collected. */
4071 stmt_vector_for_cost scalar_costs
;
4072 scalar_costs
.create (0);
4073 hash_set
<slp_tree
> visited
;
4074 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
4076 auto_vec
<bool, 20> life
;
4077 life
.safe_grow_cleared (SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
)),
4079 vect_bb_slp_scalar_cost (bb_vinfo
,
4080 SLP_INSTANCE_TREE (instance
),
4081 &life
, &scalar_costs
, visited
);
4082 add_stmt_costs (bb_vinfo
, vect_target_cost_data
, &instance
->cost_vec
);
4083 instance
->cost_vec
.release ();
4085 /* Unset visited flag. */
4086 stmt_info_for_cost
*si
;
4087 FOR_EACH_VEC_ELT (scalar_costs
, i
, si
)
4088 gimple_set_visited (si
->stmt_info
->stmt
, false);
4090 void *scalar_target_cost_data
= init_cost (NULL
);
4091 add_stmt_costs (bb_vinfo
, scalar_target_cost_data
, &scalar_costs
);
4092 scalar_costs
.release ();
4094 finish_cost (scalar_target_cost_data
, &dummy
, &scalar_cost
, &dummy
);
4095 destroy_cost_data (scalar_target_cost_data
);
4097 /* Complete the target-specific vector cost calculation. */
4098 finish_cost (vect_target_cost_data
, &vec_prologue_cost
,
4099 &vec_inside_cost
, &vec_epilogue_cost
);
4100 destroy_cost_data (vect_target_cost_data
);
4102 vec_outside_cost
= vec_prologue_cost
+ vec_epilogue_cost
;
4104 if (dump_enabled_p ())
4106 dump_printf_loc (MSG_NOTE
, vect_location
, "Cost model analysis: \n");
4107 dump_printf (MSG_NOTE
, " Vector inside of basic block cost: %d\n",
4109 dump_printf (MSG_NOTE
, " Vector prologue cost: %d\n", vec_prologue_cost
);
4110 dump_printf (MSG_NOTE
, " Vector epilogue cost: %d\n", vec_epilogue_cost
);
4111 dump_printf (MSG_NOTE
, " Scalar cost of basic block: %d\n", scalar_cost
);
4114 /* Vectorization is profitable if its cost is more than the cost of scalar
4115 version. Note that we err on the vector side for equal cost because
4116 the cost estimate is otherwise quite pessimistic (constant uses are
4117 free on the scalar side but cost a load on the vector side for
4119 if (vec_outside_cost
+ vec_inside_cost
> scalar_cost
)
4125 /* qsort comparator for lane defs. */
4128 vld_cmp (const void *a_
, const void *b_
)
4130 auto *a
= (const std::pair
<unsigned, tree
> *)a_
;
4131 auto *b
= (const std::pair
<unsigned, tree
> *)b_
;
4132 return a
->first
- b
->first
;
4135 /* Return true if USE_STMT is a vector lane insert into VEC and set
4136 *THIS_LANE to the lane number that is set. */
4139 vect_slp_is_lane_insert (gimple
*use_stmt
, tree vec
, unsigned *this_lane
)
4141 gassign
*use_ass
= dyn_cast
<gassign
*> (use_stmt
);
4143 || gimple_assign_rhs_code (use_ass
) != BIT_INSERT_EXPR
4145 ? gimple_assign_rhs1 (use_ass
) != vec
4146 : ((vec
= gimple_assign_rhs1 (use_ass
)), false))
4147 || !useless_type_conversion_p (TREE_TYPE (TREE_TYPE (vec
)),
4148 TREE_TYPE (gimple_assign_rhs2 (use_ass
)))
4149 || !constant_multiple_p
4150 (tree_to_poly_uint64 (gimple_assign_rhs3 (use_ass
)),
4151 tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (TREE_TYPE (vec
)))),
4157 /* Find any vectorizable constructors and add them to the grouped_store
4161 vect_slp_check_for_constructors (bb_vec_info bb_vinfo
)
4163 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
4164 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
4165 !gsi_end_p (gsi
); gsi_next (&gsi
))
4167 gassign
*assign
= dyn_cast
<gassign
*> (gsi_stmt (gsi
));
4171 tree rhs
= gimple_assign_rhs1 (assign
);
4172 if (gimple_assign_rhs_code (assign
) == CONSTRUCTOR
)
4174 if (!VECTOR_TYPE_P (TREE_TYPE (rhs
))
4175 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)),
4176 CONSTRUCTOR_NELTS (rhs
))
4177 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs
, 0)->value
))
4178 || uniform_vector_p (rhs
))
4183 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs
), j
, val
)
4184 if (TREE_CODE (val
) != SSA_NAME
4185 || !bb_vinfo
->lookup_def (val
))
4187 if (j
!= CONSTRUCTOR_NELTS (rhs
))
4190 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (assign
);
4191 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt_info
);
4193 else if (gimple_assign_rhs_code (assign
) == BIT_INSERT_EXPR
4194 && VECTOR_TYPE_P (TREE_TYPE (rhs
))
4195 && TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)).is_constant ()
4196 && TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs
)).to_constant () > 1
4197 && integer_zerop (gimple_assign_rhs3 (assign
))
4198 && useless_type_conversion_p
4199 (TREE_TYPE (TREE_TYPE (rhs
)),
4200 TREE_TYPE (gimple_assign_rhs2 (assign
)))
4201 && bb_vinfo
->lookup_def (gimple_assign_rhs2 (assign
)))
4203 /* We start to match on insert to lane zero but since the
4204 inserts need not be ordered we'd have to search both
4205 the def and the use chains. */
4206 tree vectype
= TREE_TYPE (rhs
);
4207 unsigned nlanes
= TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
4208 auto_vec
<std::pair
<unsigned, tree
> > lane_defs (nlanes
);
4209 auto_sbitmap
lanes (nlanes
);
4210 bitmap_clear (lanes
);
4211 bitmap_set_bit (lanes
, 0);
4212 tree def
= gimple_assign_lhs (assign
);
4213 lane_defs
.quick_push
4214 (std::make_pair (0, gimple_assign_rhs2 (assign
)));
4215 unsigned lanes_found
= 1;
4216 /* Start with the use chains, the last stmt will be the root. */
4217 stmt_vec_info last
= bb_vinfo
->lookup_stmt (assign
);
4220 use_operand_p use_p
;
4222 if (!single_imm_use (def
, &use_p
, &use_stmt
))
4225 if (!bb_vinfo
->lookup_stmt (use_stmt
)
4226 || !vect_slp_is_lane_insert (use_stmt
, def
, &this_lane
)
4227 || !bb_vinfo
->lookup_def (gimple_assign_rhs2 (use_stmt
)))
4229 if (bitmap_bit_p (lanes
, this_lane
))
4232 bitmap_set_bit (lanes
, this_lane
);
4233 gassign
*use_ass
= as_a
<gassign
*> (use_stmt
);
4234 lane_defs
.quick_push (std::make_pair
4235 (this_lane
, gimple_assign_rhs2 (use_ass
)));
4236 last
= bb_vinfo
->lookup_stmt (use_ass
);
4237 def
= gimple_assign_lhs (use_ass
);
4239 while (lanes_found
< nlanes
);
4240 if (lanes_found
< nlanes
)
4242 /* Now search the def chain. */
4243 def
= gimple_assign_rhs1 (assign
);
4246 if (TREE_CODE (def
) != SSA_NAME
4247 || !has_single_use (def
))
4249 gimple
*def_stmt
= SSA_NAME_DEF_STMT (def
);
4251 if (!bb_vinfo
->lookup_stmt (def_stmt
)
4252 || !vect_slp_is_lane_insert (def_stmt
,
4253 NULL_TREE
, &this_lane
)
4254 || !bb_vinfo
->lookup_def (gimple_assign_rhs2 (def_stmt
)))
4256 if (bitmap_bit_p (lanes
, this_lane
))
4259 bitmap_set_bit (lanes
, this_lane
);
4260 lane_defs
.quick_push (std::make_pair
4262 gimple_assign_rhs2 (def_stmt
)));
4263 def
= gimple_assign_rhs1 (def_stmt
);
4265 while (lanes_found
< nlanes
);
4267 if (lanes_found
== nlanes
)
4269 /* Sort lane_defs after the lane index and register the root. */
4270 lane_defs
.qsort (vld_cmp
);
4271 vec
<stmt_vec_info
> stmts
;
4272 stmts
.create (nlanes
);
4273 for (unsigned i
= 0; i
< nlanes
; ++i
)
4274 stmts
.quick_push (bb_vinfo
->lookup_def (lane_defs
[i
].second
));
4275 bb_vinfo
->roots
.safe_push (slp_root (slp_inst_kind_ctor
,
4282 /* Walk the grouped store chains and replace entries with their
4283 pattern variant if any. */
4286 vect_fixup_store_groups_with_patterns (vec_info
*vinfo
)
4288 stmt_vec_info first_element
;
4291 FOR_EACH_VEC_ELT (vinfo
->grouped_stores
, i
, first_element
)
4293 /* We also have CTORs in this array. */
4294 if (!STMT_VINFO_GROUPED_ACCESS (first_element
))
4296 if (STMT_VINFO_IN_PATTERN_P (first_element
))
4298 stmt_vec_info orig
= first_element
;
4299 first_element
= STMT_VINFO_RELATED_STMT (first_element
);
4300 DR_GROUP_FIRST_ELEMENT (first_element
) = first_element
;
4301 DR_GROUP_SIZE (first_element
) = DR_GROUP_SIZE (orig
);
4302 DR_GROUP_GAP (first_element
) = DR_GROUP_GAP (orig
);
4303 DR_GROUP_NEXT_ELEMENT (first_element
) = DR_GROUP_NEXT_ELEMENT (orig
);
4304 vinfo
->grouped_stores
[i
] = first_element
;
4306 stmt_vec_info prev
= first_element
;
4307 while (DR_GROUP_NEXT_ELEMENT (prev
))
4309 stmt_vec_info elt
= DR_GROUP_NEXT_ELEMENT (prev
);
4310 if (STMT_VINFO_IN_PATTERN_P (elt
))
4312 stmt_vec_info orig
= elt
;
4313 elt
= STMT_VINFO_RELATED_STMT (elt
);
4314 DR_GROUP_NEXT_ELEMENT (prev
) = elt
;
4315 DR_GROUP_GAP (elt
) = DR_GROUP_GAP (orig
);
4316 DR_GROUP_NEXT_ELEMENT (elt
) = DR_GROUP_NEXT_ELEMENT (orig
);
4318 DR_GROUP_FIRST_ELEMENT (elt
) = first_element
;
4324 /* Check if the region described by BB_VINFO can be vectorized, returning
4325 true if so. When returning false, set FATAL to true if the same failure
4326 would prevent vectorization at other vector sizes, false if it is still
4327 worth trying other sizes. N_STMTS is the number of statements in the
4331 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo
, int n_stmts
, bool &fatal
,
4332 vec
<int> *dataref_groups
)
4334 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
4336 slp_instance instance
;
4338 poly_uint64 min_vf
= 2;
4340 /* The first group of checks is independent of the vector size. */
4343 /* Analyze the data references. */
4345 if (!vect_analyze_data_refs (bb_vinfo
, &min_vf
, NULL
))
4347 if (dump_enabled_p ())
4348 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4349 "not vectorized: unhandled data-ref in basic "
4354 if (!vect_analyze_data_ref_accesses (bb_vinfo
, dataref_groups
))
4356 if (dump_enabled_p ())
4357 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4358 "not vectorized: unhandled data access in "
4363 vect_slp_check_for_constructors (bb_vinfo
);
4365 /* If there are no grouped stores and no constructors in the region
4366 there is no need to continue with pattern recog as vect_analyze_slp
4367 will fail anyway. */
4368 if (bb_vinfo
->grouped_stores
.is_empty ()
4369 && bb_vinfo
->roots
.is_empty ())
4371 if (dump_enabled_p ())
4372 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4373 "not vectorized: no grouped stores in "
4378 /* While the rest of the analysis below depends on it in some way. */
4381 vect_pattern_recog (bb_vinfo
);
4383 /* Update store groups from pattern processing. */
4384 vect_fixup_store_groups_with_patterns (bb_vinfo
);
4386 /* Check the SLP opportunities in the basic block, analyze and build SLP
4388 if (!vect_analyze_slp (bb_vinfo
, n_stmts
))
4390 if (dump_enabled_p ())
4392 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4393 "Failed to SLP the basic block.\n");
4394 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4395 "not vectorized: failed to find SLP opportunities "
4396 "in basic block.\n");
4401 /* Optimize permutations. */
4402 vect_optimize_slp (bb_vinfo
);
4404 /* Gather the loads reachable from the SLP graph entries. */
4405 vect_gather_slp_loads (bb_vinfo
);
4407 vect_record_base_alignments (bb_vinfo
);
4409 /* Analyze and verify the alignment of data references and the
4410 dependence in the SLP instances. */
4411 for (i
= 0; BB_VINFO_SLP_INSTANCES (bb_vinfo
).iterate (i
, &instance
); )
4413 vect_location
= instance
->location ();
4414 if (! vect_slp_analyze_instance_alignment (bb_vinfo
, instance
)
4415 || ! vect_slp_analyze_instance_dependence (bb_vinfo
, instance
))
4417 slp_tree node
= SLP_INSTANCE_TREE (instance
);
4418 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
4419 if (dump_enabled_p ())
4420 dump_printf_loc (MSG_NOTE
, vect_location
,
4421 "removing SLP instance operations starting from: %G",
4423 vect_free_slp_instance (instance
);
4424 BB_VINFO_SLP_INSTANCES (bb_vinfo
).ordered_remove (i
);
4428 /* Mark all the statements that we want to vectorize as pure SLP and
4430 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance
));
4431 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance
));
4432 if (stmt_vec_info root
= SLP_INSTANCE_ROOT_STMT (instance
))
4434 STMT_SLP_TYPE (root
) = pure_slp
;
4435 if (is_gimple_assign (root
->stmt
)
4436 && gimple_assign_rhs_code (root
->stmt
) == BIT_INSERT_EXPR
)
4438 /* ??? We should probably record the whole vector of
4439 root stmts so we do not have to back-track here... */
4440 for (unsigned n
= SLP_TREE_LANES (SLP_INSTANCE_TREE (instance
));
4443 root
= bb_vinfo
->lookup_def (gimple_assign_rhs1 (root
->stmt
));
4444 STMT_SLP_TYPE (root
) = pure_slp
;
4451 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo
).length ())
4454 if (!vect_slp_analyze_operations (bb_vinfo
))
4456 if (dump_enabled_p ())
4457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4458 "not vectorized: bad operation in basic block.\n");
4462 vect_bb_partition_graph (bb_vinfo
);
4467 /* Subroutine of vect_slp_bb. Try to vectorize the statements for all
4468 basic blocks in BBS, returning true on success.
4469 The region has N_STMTS statements and has the datarefs given by DATAREFS. */
4472 vect_slp_region (vec
<basic_block
> bbs
, vec
<data_reference_p
> datarefs
,
4473 vec
<int> *dataref_groups
, unsigned int n_stmts
)
4475 bb_vec_info bb_vinfo
;
4476 auto_vector_modes vector_modes
;
4478 /* Autodetect first vector size we try. */
4479 machine_mode next_vector_mode
= VOIDmode
;
4480 targetm
.vectorize
.autovectorize_vector_modes (&vector_modes
, false);
4481 unsigned int mode_i
= 0;
4483 vec_info_shared shared
;
4485 machine_mode autodetected_vector_mode
= VOIDmode
;
4488 bool vectorized
= false;
4490 bb_vinfo
= new _bb_vec_info (bbs
, &shared
);
4492 bool first_time_p
= shared
.datarefs
.is_empty ();
4493 BB_VINFO_DATAREFS (bb_vinfo
) = datarefs
;
4495 bb_vinfo
->shared
->save_datarefs ();
4497 bb_vinfo
->shared
->check_datarefs ();
4498 bb_vinfo
->vector_mode
= next_vector_mode
;
4500 if (vect_slp_analyze_bb_1 (bb_vinfo
, n_stmts
, fatal
, dataref_groups
)
4501 && dbg_cnt (vect_slp
))
4503 if (dump_enabled_p ())
4505 dump_printf_loc (MSG_NOTE
, vect_location
,
4506 "***** Analysis succeeded with vector mode"
4507 " %s\n", GET_MODE_NAME (bb_vinfo
->vector_mode
));
4508 dump_printf_loc (MSG_NOTE
, vect_location
, "SLPing BB part\n");
4511 bb_vinfo
->shared
->check_datarefs ();
4514 slp_instance instance
;
4515 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo
), i
, instance
)
4517 if (instance
->subgraph_entries
.is_empty ())
4520 vect_location
= instance
->location ();
4521 if (!unlimited_cost_model (NULL
)
4522 && !vect_bb_vectorization_profitable_p
4523 (bb_vinfo
, instance
->subgraph_entries
))
4525 if (dump_enabled_p ())
4526 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4527 "not vectorized: vectorization is not "
4532 if (!vectorized
&& dump_enabled_p ())
4533 dump_printf_loc (MSG_NOTE
, vect_location
,
4534 "Basic block will be vectorized "
4538 vect_schedule_slp (bb_vinfo
, instance
->subgraph_entries
);
4540 unsigned HOST_WIDE_INT bytes
;
4541 if (dump_enabled_p ())
4544 (bb_vinfo
->vector_mode
).is_constant (&bytes
))
4545 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4546 "basic block part vectorized using %wu "
4547 "byte vectors\n", bytes
);
4549 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
, vect_location
,
4550 "basic block part vectorized using "
4551 "variable length vectors\n");
4557 if (dump_enabled_p ())
4558 dump_printf_loc (MSG_NOTE
, vect_location
,
4559 "***** Analysis failed with vector mode %s\n",
4560 GET_MODE_NAME (bb_vinfo
->vector_mode
));
4564 autodetected_vector_mode
= bb_vinfo
->vector_mode
;
4567 while (mode_i
< vector_modes
.length ()
4568 && vect_chooses_same_modes_p (bb_vinfo
, vector_modes
[mode_i
]))
4570 if (dump_enabled_p ())
4571 dump_printf_loc (MSG_NOTE
, vect_location
,
4572 "***** The result for vector mode %s would"
4574 GET_MODE_NAME (vector_modes
[mode_i
]));
4580 if (mode_i
< vector_modes
.length ()
4581 && VECTOR_MODE_P (autodetected_vector_mode
)
4582 && (related_vector_mode (vector_modes
[mode_i
],
4583 GET_MODE_INNER (autodetected_vector_mode
))
4584 == autodetected_vector_mode
)
4585 && (related_vector_mode (autodetected_vector_mode
,
4586 GET_MODE_INNER (vector_modes
[mode_i
]))
4587 == vector_modes
[mode_i
]))
4589 if (dump_enabled_p ())
4590 dump_printf_loc (MSG_NOTE
, vect_location
,
4591 "***** Skipping vector mode %s, which would"
4592 " repeat the analysis for %s\n",
4593 GET_MODE_NAME (vector_modes
[mode_i
]),
4594 GET_MODE_NAME (autodetected_vector_mode
));
4599 || mode_i
== vector_modes
.length ()
4600 || autodetected_vector_mode
== VOIDmode
4601 /* If vect_slp_analyze_bb_1 signaled that analysis for all
4602 vector sizes will fail do not bother iterating. */
4606 /* Try the next biggest vector size. */
4607 next_vector_mode
= vector_modes
[mode_i
++];
4608 if (dump_enabled_p ())
4609 dump_printf_loc (MSG_NOTE
, vect_location
,
4610 "***** Re-trying analysis with vector mode %s\n",
4611 GET_MODE_NAME (next_vector_mode
));
4616 /* Main entry for the BB vectorizer. Analyze and transform BBS, returns
4617 true if anything in the basic-block was vectorized. */
4620 vect_slp_bbs (vec
<basic_block
> bbs
)
4622 vec
<data_reference_p
> datarefs
= vNULL
;
4623 auto_vec
<int> dataref_groups
;
4625 int current_group
= 0;
4627 for (unsigned i
= 0; i
< bbs
.length (); i
++)
4629 basic_block bb
= bbs
[i
];
4630 for (gimple_stmt_iterator gsi
= gsi_after_labels (bb
); !gsi_end_p (gsi
);
4633 gimple
*stmt
= gsi_stmt (gsi
);
4634 if (is_gimple_debug (stmt
))
4639 if (gimple_location (stmt
) != UNKNOWN_LOCATION
)
4640 vect_location
= stmt
;
4642 if (!vect_find_stmt_data_reference (NULL
, stmt
, &datarefs
,
4643 &dataref_groups
, current_group
))
4648 return vect_slp_region (bbs
, datarefs
, &dataref_groups
, insns
);
4651 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4652 true if anything in the basic-block was vectorized. */
4655 vect_slp_bb (basic_block bb
)
4657 auto_vec
<basic_block
> bbs
;
4659 return vect_slp_bbs (bbs
);
4662 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
4663 true if anything in the basic-block was vectorized. */
4666 vect_slp_function (function
*fun
)
4669 int *rpo
= XNEWVEC (int, n_basic_blocks_for_fn (fun
));
4670 unsigned n
= pre_and_rev_post_order_compute_fn (fun
, NULL
, rpo
, false);
4672 /* For the moment split the function into pieces to avoid making
4673 the iteration on the vector mode moot. Split at points we know
4674 to not handle well which is CFG merges (SLP discovery doesn't
4675 handle non-loop-header PHIs) and loop exits. Since pattern
4676 recog requires reverse iteration to visit uses before defs
4677 simply chop RPO into pieces. */
4678 auto_vec
<basic_block
> bbs
;
4679 for (unsigned i
= 0; i
< n
; i
++)
4681 basic_block bb
= BASIC_BLOCK_FOR_FN (fun
, rpo
[i
]);
4684 /* Split when a BB is not dominated by the first block. */
4685 if (!bbs
.is_empty ()
4686 && !dominated_by_p (CDI_DOMINATORS
, bb
, bbs
[0]))
4688 if (dump_enabled_p ())
4689 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4690 "splitting region at dominance boundary bb%d\n",
4694 /* Split when the loop determined by the first block
4695 is exited. This is because we eventually insert
4696 invariants at region begin. */
4697 else if (!bbs
.is_empty ()
4698 && bbs
[0]->loop_father
!= bb
->loop_father
4699 && !flow_loop_nested_p (bbs
[0]->loop_father
, bb
->loop_father
))
4701 if (dump_enabled_p ())
4702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4703 "splitting region at loop %d exit at bb%d\n",
4704 bbs
[0]->loop_father
->num
, bb
->index
);
4708 if (split
&& !bbs
.is_empty ())
4710 r
|= vect_slp_bbs (bbs
);
4712 bbs
.quick_push (bb
);
4717 /* When we have a stmt ending this block and defining a
4718 value we have to insert on edges when inserting after it for
4719 a vector containing its definition. Avoid this for now. */
4720 if (gimple
*last
= last_stmt (bb
))
4721 if (gimple_get_lhs (last
)
4722 && is_ctrl_altering_stmt (last
))
4724 if (dump_enabled_p ())
4725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4726 "splitting region at control altering "
4727 "definition %G", last
);
4728 r
|= vect_slp_bbs (bbs
);
4733 if (!bbs
.is_empty ())
4734 r
|= vect_slp_bbs (bbs
);
4741 /* Build a variable-length vector in which the elements in ELTS are repeated
4742 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
4743 RESULTS and add any new instructions to SEQ.
4745 The approach we use is:
4747 (1) Find a vector mode VM with integer elements of mode IM.
4749 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4750 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
4751 from small vectors to IM.
4753 (3) Duplicate each ELTS'[I] into a vector of mode VM.
4755 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
4756 correct byte contents.
4758 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
4760 We try to find the largest IM for which this sequence works, in order
4761 to cut down on the number of interleaves. */
4764 duplicate_and_interleave (vec_info
*vinfo
, gimple_seq
*seq
, tree vector_type
,
4765 vec
<tree
> elts
, unsigned int nresults
,
4768 unsigned int nelts
= elts
.length ();
4769 tree element_type
= TREE_TYPE (vector_type
);
4771 /* (1) Find a vector mode VM with integer elements of mode IM. */
4772 unsigned int nvectors
= 1;
4773 tree new_vector_type
;
4775 if (!can_duplicate_and_interleave_p (vinfo
, nelts
, element_type
,
4776 &nvectors
, &new_vector_type
,
4780 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
4781 unsigned int partial_nelts
= nelts
/ nvectors
;
4782 tree partial_vector_type
= build_vector_type (element_type
, partial_nelts
);
4784 tree_vector_builder partial_elts
;
4785 auto_vec
<tree
, 32> pieces (nvectors
* 2);
4786 pieces
.quick_grow (nvectors
* 2);
4787 for (unsigned int i
= 0; i
< nvectors
; ++i
)
4789 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
4790 ELTS' has mode IM. */
4791 partial_elts
.new_vector (partial_vector_type
, partial_nelts
, 1);
4792 for (unsigned int j
= 0; j
< partial_nelts
; ++j
)
4793 partial_elts
.quick_push (elts
[i
* partial_nelts
+ j
]);
4794 tree t
= gimple_build_vector (seq
, &partial_elts
);
4795 t
= gimple_build (seq
, VIEW_CONVERT_EXPR
,
4796 TREE_TYPE (new_vector_type
), t
);
4798 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
4799 pieces
[i
] = gimple_build_vector_from_val (seq
, new_vector_type
, t
);
4802 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
4803 correct byte contents.
4805 We need to repeat the following operation log2(nvectors) times:
4807 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
4808 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
4810 However, if each input repeats every N elements and the VF is
4811 a multiple of N * 2, the HI result is the same as the LO. */
4812 unsigned int in_start
= 0;
4813 unsigned int out_start
= nvectors
;
4814 unsigned int hi_start
= nvectors
/ 2;
4815 /* A bound on the number of outputs needed to produce NRESULTS results
4816 in the final iteration. */
4817 unsigned int noutputs_bound
= nvectors
* nresults
;
4818 for (unsigned int in_repeat
= 1; in_repeat
< nvectors
; in_repeat
*= 2)
4820 noutputs_bound
/= 2;
4821 unsigned int limit
= MIN (noutputs_bound
, nvectors
);
4822 for (unsigned int i
= 0; i
< limit
; ++i
)
4825 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type
),
4828 pieces
[out_start
+ i
] = pieces
[out_start
+ i
- 1];
4832 tree output
= make_ssa_name (new_vector_type
);
4833 tree input1
= pieces
[in_start
+ (i
/ 2)];
4834 tree input2
= pieces
[in_start
+ (i
/ 2) + hi_start
];
4835 gassign
*stmt
= gimple_build_assign (output
, VEC_PERM_EXPR
,
4838 gimple_seq_add_stmt (seq
, stmt
);
4839 pieces
[out_start
+ i
] = output
;
4841 std::swap (in_start
, out_start
);
4844 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
4845 results
.reserve (nresults
);
4846 for (unsigned int i
= 0; i
< nresults
; ++i
)
4848 results
.quick_push (gimple_build (seq
, VIEW_CONVERT_EXPR
, vector_type
,
4849 pieces
[in_start
+ i
]));
4851 results
.quick_push (results
[i
- nvectors
]);
4855 /* For constant and loop invariant defs in OP_NODE this function creates
4856 vector defs that will be used in the vectorized stmts and stores them
4857 to SLP_TREE_VEC_DEFS of OP_NODE. */
4860 vect_create_constant_vectors (vec_info
*vinfo
, slp_tree op_node
)
4862 unsigned HOST_WIDE_INT nunits
;
4864 unsigned j
, number_of_places_left_in_vector
;
4867 int group_size
= op_node
->ops
.length ();
4868 unsigned int vec_num
, i
;
4869 unsigned number_of_copies
= 1;
4871 gimple_seq ctor_seq
= NULL
;
4872 auto_vec
<tree
, 16> permute_results
;
4874 /* We always want SLP_TREE_VECTYPE (op_node) here correctly set. */
4875 vector_type
= SLP_TREE_VECTYPE (op_node
);
4877 unsigned int number_of_vectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (op_node
);
4878 SLP_TREE_VEC_DEFS (op_node
).create (number_of_vectors
);
4879 auto_vec
<tree
> voprnds (number_of_vectors
);
4881 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4882 created vectors. It is greater than 1 if unrolling is performed.
4884 For example, we have two scalar operands, s1 and s2 (e.g., group of
4885 strided accesses of size two), while NUNITS is four (i.e., four scalars
4886 of this type can be packed in a vector). The output vector will contain
4887 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4890 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
4891 containing the operands.
4893 For example, NUNITS is four as before, and the group size is 8
4894 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4895 {s5, s6, s7, s8}. */
4897 /* When using duplicate_and_interleave, we just need one element for
4898 each scalar statement. */
4899 if (!TYPE_VECTOR_SUBPARTS (vector_type
).is_constant (&nunits
))
4900 nunits
= group_size
;
4902 number_of_copies
= nunits
* number_of_vectors
/ group_size
;
4904 number_of_places_left_in_vector
= nunits
;
4906 tree_vector_builder
elts (vector_type
, nunits
, 1);
4907 elts
.quick_grow (nunits
);
4908 stmt_vec_info insert_after
= NULL
;
4909 for (j
= 0; j
< number_of_copies
; j
++)
4912 for (i
= group_size
- 1; op_node
->ops
.iterate (i
, &op
); i
--)
4914 /* Create 'vect_ = {op0,op1,...,opn}'. */
4915 number_of_places_left_in_vector
--;
4917 if (!types_compatible_p (TREE_TYPE (vector_type
), TREE_TYPE (op
)))
4919 if (CONSTANT_CLASS_P (op
))
4921 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4923 /* Can't use VIEW_CONVERT_EXPR for booleans because
4924 of possibly different sizes of scalar value and
4926 if (integer_zerop (op
))
4927 op
= build_int_cst (TREE_TYPE (vector_type
), 0);
4928 else if (integer_onep (op
))
4929 op
= build_all_ones_cst (TREE_TYPE (vector_type
));
4934 op
= fold_unary (VIEW_CONVERT_EXPR
,
4935 TREE_TYPE (vector_type
), op
);
4936 gcc_assert (op
&& CONSTANT_CLASS_P (op
));
4940 tree new_temp
= make_ssa_name (TREE_TYPE (vector_type
));
4942 if (VECTOR_BOOLEAN_TYPE_P (vector_type
))
4945 = build_all_ones_cst (TREE_TYPE (vector_type
));
4947 = build_zero_cst (TREE_TYPE (vector_type
));
4948 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op
)));
4949 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
4955 op
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vector_type
),
4958 = gimple_build_assign (new_temp
, VIEW_CONVERT_EXPR
,
4961 gimple_seq_add_stmt (&ctor_seq
, init_stmt
);
4965 elts
[number_of_places_left_in_vector
] = op
;
4966 if (!CONSTANT_CLASS_P (op
))
4968 /* For BB vectorization we have to compute an insert location
4969 when a def is inside the analyzed region since we cannot
4970 simply insert at the BB start in this case. */
4971 stmt_vec_info opdef
;
4972 if (TREE_CODE (orig_op
) == SSA_NAME
4973 && !SSA_NAME_IS_DEFAULT_DEF (orig_op
)
4974 && is_a
<bb_vec_info
> (vinfo
)
4975 && (opdef
= vinfo
->lookup_def (orig_op
)))
4978 insert_after
= opdef
;
4980 insert_after
= get_later_stmt (insert_after
, opdef
);
4983 if (number_of_places_left_in_vector
== 0)
4986 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
)
4987 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type
), nunits
))
4988 vec_cst
= gimple_build_vector (&ctor_seq
, &elts
);
4991 if (permute_results
.is_empty ())
4992 duplicate_and_interleave (vinfo
, &ctor_seq
, vector_type
,
4993 elts
, number_of_vectors
,
4995 vec_cst
= permute_results
[number_of_vectors
- j
- 1];
4997 if (!gimple_seq_empty_p (ctor_seq
))
5001 gimple_stmt_iterator gsi
;
5002 if (gimple_code (insert_after
->stmt
) == GIMPLE_PHI
)
5004 gsi
= gsi_after_labels (gimple_bb (insert_after
->stmt
));
5005 gsi_insert_seq_before (&gsi
, ctor_seq
,
5006 GSI_CONTINUE_LINKING
);
5008 else if (!stmt_ends_bb_p (insert_after
->stmt
))
5010 gsi
= gsi_for_stmt (insert_after
->stmt
);
5011 gsi_insert_seq_after (&gsi
, ctor_seq
,
5012 GSI_CONTINUE_LINKING
);
5016 /* When we want to insert after a def where the
5017 defining stmt throws then insert on the fallthru
5019 edge e
= find_fallthru_edge
5020 (gimple_bb (insert_after
->stmt
)->succs
);
5022 = gsi_insert_seq_on_edge_immediate (e
, ctor_seq
);
5023 gcc_assert (!new_bb
);
5027 vinfo
->insert_seq_on_entry (NULL
, ctor_seq
);
5030 voprnds
.quick_push (vec_cst
);
5031 insert_after
= NULL
;
5032 number_of_places_left_in_vector
= nunits
;
5034 elts
.new_vector (vector_type
, nunits
, 1);
5035 elts
.quick_grow (nunits
);
5040 /* Since the vectors are created in the reverse order, we should invert
5042 vec_num
= voprnds
.length ();
5043 for (j
= vec_num
; j
!= 0; j
--)
5045 vop
= voprnds
[j
- 1];
5046 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
5049 /* In case that VF is greater than the unrolling factor needed for the SLP
5050 group of stmts, NUMBER_OF_VECTORS to be created is greater than
5051 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
5052 to replicate the vectors. */
5053 while (number_of_vectors
> SLP_TREE_VEC_DEFS (op_node
).length ())
5054 for (i
= 0; SLP_TREE_VEC_DEFS (op_node
).iterate (i
, &vop
) && i
< vec_num
;
5056 SLP_TREE_VEC_DEFS (op_node
).quick_push (vop
);
5059 /* Get the Ith vectorized definition from SLP_NODE. */
5062 vect_get_slp_vect_def (slp_tree slp_node
, unsigned i
)
5064 if (SLP_TREE_VEC_STMTS (slp_node
).exists ())
5065 return gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node
)[i
]);
5067 return SLP_TREE_VEC_DEFS (slp_node
)[i
];
5070 /* Get the vectorized definitions of SLP_NODE in *VEC_DEFS. */
5073 vect_get_slp_defs (slp_tree slp_node
, vec
<tree
> *vec_defs
)
5075 vec_defs
->create (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
));
5076 if (SLP_TREE_DEF_TYPE (slp_node
) == vect_internal_def
)
5079 gimple
*vec_def_stmt
;
5080 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node
), j
, vec_def_stmt
)
5081 vec_defs
->quick_push (gimple_get_lhs (vec_def_stmt
));
5084 vec_defs
->splice (SLP_TREE_VEC_DEFS (slp_node
));
5087 /* Get N vectorized definitions for SLP_NODE. */
5090 vect_get_slp_defs (vec_info
*,
5091 slp_tree slp_node
, vec
<vec
<tree
> > *vec_oprnds
, unsigned n
)
5094 n
= SLP_TREE_CHILDREN (slp_node
).length ();
5096 for (unsigned i
= 0; i
< n
; ++i
)
5098 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[i
];
5099 vec
<tree
> vec_defs
= vNULL
;
5100 vect_get_slp_defs (child
, &vec_defs
);
5101 vec_oprnds
->quick_push (vec_defs
);
5105 /* Generate vector permute statements from a list of loads in DR_CHAIN.
5106 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
5107 permute statements for the SLP node NODE. Store the number of vector
5108 permute instructions in *N_PERMS and the number of vector load
5109 instructions in *N_LOADS. */
5112 vect_transform_slp_perm_load (vec_info
*vinfo
,
5113 slp_tree node
, vec
<tree
> dr_chain
,
5114 gimple_stmt_iterator
*gsi
, poly_uint64 vf
,
5115 bool analyze_only
, unsigned *n_perms
,
5116 unsigned int *n_loads
)
5118 stmt_vec_info stmt_info
= SLP_TREE_SCALAR_STMTS (node
)[0];
5120 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5121 unsigned int group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
5122 unsigned int mask_element
;
5125 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
5128 stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
5130 mode
= TYPE_MODE (vectype
);
5131 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5133 /* Initialize the vect stmts of NODE to properly insert the generated
5136 for (unsigned i
= SLP_TREE_VEC_STMTS (node
).length ();
5137 i
< SLP_TREE_NUMBER_OF_VEC_STMTS (node
); i
++)
5138 SLP_TREE_VEC_STMTS (node
).quick_push (NULL
);
5140 /* Generate permutation masks for every NODE. Number of masks for each NODE
5141 is equal to GROUP_SIZE.
5142 E.g., we have a group of three nodes with three loads from the same
5143 location in each node, and the vector size is 4. I.e., we have a
5144 a0b0c0a1b1c1... sequence and we need to create the following vectors:
5145 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
5146 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
5149 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
5150 The last mask is illegal since we assume two operands for permute
5151 operation, and the mask element values can't be outside that range.
5152 Hence, the last mask must be converted into {2,5,5,5}.
5153 For the first two permutations we need the first and the second input
5154 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
5155 we need the second and the third vectors: {b1,c1,a2,b2} and
5158 int vect_stmts_counter
= 0;
5159 unsigned int index
= 0;
5160 int first_vec_index
= -1;
5161 int second_vec_index
= -1;
5165 vec_perm_builder mask
;
5166 unsigned int nelts_to_build
;
5167 unsigned int nvectors_per_build
;
5168 unsigned int in_nlanes
;
5169 bool repeating_p
= (group_size
== DR_GROUP_SIZE (stmt_info
)
5170 && multiple_p (nunits
, group_size
));
5173 /* A single vector contains a whole number of copies of the node, so:
5174 (a) all permutes can use the same mask; and
5175 (b) the permutes only need a single vector input. */
5176 mask
.new_vector (nunits
, group_size
, 3);
5177 nelts_to_build
= mask
.encoded_nelts ();
5178 nvectors_per_build
= SLP_TREE_VEC_STMTS (node
).length ();
5179 in_nlanes
= DR_GROUP_SIZE (stmt_info
) * 3;
5183 /* We need to construct a separate mask for each vector statement. */
5184 unsigned HOST_WIDE_INT const_nunits
, const_vf
;
5185 if (!nunits
.is_constant (&const_nunits
)
5186 || !vf
.is_constant (&const_vf
))
5188 mask
.new_vector (const_nunits
, const_nunits
, 1);
5189 nelts_to_build
= const_vf
* group_size
;
5190 nvectors_per_build
= 1;
5191 in_nlanes
= const_vf
* DR_GROUP_SIZE (stmt_info
);
5193 auto_sbitmap
used_in_lanes (in_nlanes
);
5194 bitmap_clear (used_in_lanes
);
5196 unsigned int count
= mask
.encoded_nelts ();
5197 mask
.quick_grow (count
);
5198 vec_perm_indices indices
;
5200 for (unsigned int j
= 0; j
< nelts_to_build
; j
++)
5202 unsigned int iter_num
= j
/ group_size
;
5203 unsigned int stmt_num
= j
% group_size
;
5204 unsigned int i
= (iter_num
* DR_GROUP_SIZE (stmt_info
)
5205 + SLP_TREE_LOAD_PERMUTATION (node
)[stmt_num
]);
5206 bitmap_set_bit (used_in_lanes
, i
);
5209 first_vec_index
= 0;
5214 /* Enforced before the loop when !repeating_p. */
5215 unsigned int const_nunits
= nunits
.to_constant ();
5216 vec_index
= i
/ const_nunits
;
5217 mask_element
= i
% const_nunits
;
5218 if (vec_index
== first_vec_index
5219 || first_vec_index
== -1)
5221 first_vec_index
= vec_index
;
5223 else if (vec_index
== second_vec_index
5224 || second_vec_index
== -1)
5226 second_vec_index
= vec_index
;
5227 mask_element
+= const_nunits
;
5231 if (dump_enabled_p ())
5232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5233 "permutation requires at "
5234 "least three vectors %G",
5236 gcc_assert (analyze_only
);
5240 gcc_assert (mask_element
< 2 * const_nunits
);
5243 if (mask_element
!= index
)
5245 mask
[index
++] = mask_element
;
5247 if (index
== count
&& !noop_p
)
5249 indices
.new_vector (mask
, second_vec_index
== -1 ? 1 : 2, nunits
);
5250 if (!can_vec_perm_const_p (mode
, indices
))
5252 if (dump_enabled_p ())
5254 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
5256 "unsupported vect permute { ");
5257 for (i
= 0; i
< count
; ++i
)
5259 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
5260 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
5262 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
5264 gcc_assert (analyze_only
);
5275 tree mask_vec
= NULL_TREE
;
5278 mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
5280 if (second_vec_index
== -1)
5281 second_vec_index
= first_vec_index
;
5283 for (unsigned int ri
= 0; ri
< nvectors_per_build
; ++ri
)
5285 /* Generate the permute statement if necessary. */
5286 tree first_vec
= dr_chain
[first_vec_index
+ ri
];
5287 tree second_vec
= dr_chain
[second_vec_index
+ ri
];
5291 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
5293 = vect_create_destination_var (gimple_assign_lhs (stmt
),
5295 perm_dest
= make_ssa_name (perm_dest
);
5297 = gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
5298 first_vec
, second_vec
,
5300 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
5304 /* If mask was NULL_TREE generate the requested
5305 identity transform. */
5306 perm_stmt
= SSA_NAME_DEF_STMT (first_vec
);
5308 /* Store the vector statement in NODE. */
5309 SLP_TREE_VEC_STMTS (node
)[vect_stmts_counter
++] = perm_stmt
;
5314 first_vec_index
= -1;
5315 second_vec_index
= -1;
5323 *n_loads
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
5326 /* Enforced above when !repeating_p. */
5327 unsigned int const_nunits
= nunits
.to_constant ();
5329 bool load_seen
= false;
5330 for (unsigned i
= 0; i
< in_nlanes
; ++i
)
5332 if (i
% const_nunits
== 0)
5338 if (bitmap_bit_p (used_in_lanes
, i
))
5350 /* Vectorize the SLP permutations in NODE as specified
5351 in SLP_TREE_LANE_PERMUTATION which is a vector of pairs of SLP
5352 child number and lane number.
5353 Interleaving of two two-lane two-child SLP subtrees (not supported):
5354 [ { 0, 0 }, { 1, 0 }, { 0, 1 }, { 1, 1 } ]
5355 A blend of two four-lane two-child SLP subtrees:
5356 [ { 0, 0 }, { 1, 1 }, { 0, 2 }, { 1, 3 } ]
5357 Highpart of a four-lane one-child SLP subtree (not supported):
5358 [ { 0, 2 }, { 0, 3 } ]
5359 Where currently only a subset is supported by code generating below. */
5362 vectorizable_slp_permutation (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
5363 slp_tree node
, stmt_vector_for_cost
*cost_vec
)
5365 tree vectype
= SLP_TREE_VECTYPE (node
);
5367 /* ??? We currently only support all same vector input and output types
5368 while the SLP IL should really do a concat + select and thus accept
5369 arbitrary mismatches. */
5372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5373 if (!vect_maybe_update_slp_op_vectype (child
, vectype
)
5374 || !types_compatible_p (SLP_TREE_VECTYPE (child
), vectype
))
5376 if (dump_enabled_p ())
5377 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5378 "Unsupported lane permutation\n");
5382 vec
<std::pair
<unsigned, unsigned> > &perm
= SLP_TREE_LANE_PERMUTATION (node
);
5383 gcc_assert (perm
.length () == SLP_TREE_LANES (node
));
5384 if (dump_enabled_p ())
5386 dump_printf_loc (MSG_NOTE
, vect_location
,
5387 "vectorizing permutation");
5388 for (unsigned i
= 0; i
< perm
.length (); ++i
)
5389 dump_printf (MSG_NOTE
, " op%u[%u]", perm
[i
].first
, perm
[i
].second
);
5390 dump_printf (MSG_NOTE
, "\n");
5393 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5394 if (!nunits
.is_constant ())
5396 unsigned HOST_WIDE_INT vf
= 1;
5397 if (loop_vec_info linfo
= dyn_cast
<loop_vec_info
> (vinfo
))
5398 if (!LOOP_VINFO_VECT_FACTOR (linfo
).is_constant (&vf
))
5400 unsigned olanes
= vf
* SLP_TREE_LANES (node
);
5401 gcc_assert (multiple_p (olanes
, nunits
));
5403 /* Compute the { { SLP operand, vector index}, lane } permutation sequence
5404 from the { SLP operand, scalar lane } permutation as recorded in the
5405 SLP node as intermediate step. This part should already work
5406 with SLP children with arbitrary number of lanes. */
5407 auto_vec
<std::pair
<std::pair
<unsigned, unsigned>, unsigned> > vperm
;
5408 auto_vec
<unsigned> active_lane
;
5409 vperm
.create (olanes
);
5410 active_lane
.safe_grow_cleared (SLP_TREE_CHILDREN (node
).length (), true);
5411 for (unsigned i
= 0; i
< vf
; ++i
)
5413 for (unsigned pi
= 0; pi
< perm
.length (); ++pi
)
5415 std::pair
<unsigned, unsigned> p
= perm
[pi
];
5416 tree vtype
= SLP_TREE_VECTYPE (SLP_TREE_CHILDREN (node
)[p
.first
]);
5417 unsigned vnunits
= TYPE_VECTOR_SUBPARTS (vtype
).to_constant ();
5418 unsigned vi
= (active_lane
[p
.first
] + p
.second
) / vnunits
;
5419 unsigned vl
= (active_lane
[p
.first
] + p
.second
) % vnunits
;
5420 vperm
.quick_push (std::make_pair (std::make_pair (p
.first
, vi
), vl
));
5422 /* Advance to the next group. */
5423 for (unsigned j
= 0; j
< SLP_TREE_CHILDREN (node
).length (); ++j
)
5424 active_lane
[j
] += SLP_TREE_LANES (SLP_TREE_CHILDREN (node
)[j
]);
5427 if (dump_enabled_p ())
5429 dump_printf_loc (MSG_NOTE
, vect_location
, "as");
5430 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
5432 if (i
!= 0 && multiple_p (i
, TYPE_VECTOR_SUBPARTS (vectype
)))
5433 dump_printf (MSG_NOTE
, ",");
5434 dump_printf (MSG_NOTE
, " vops%u[%u][%u]",
5435 vperm
[i
].first
.first
, vperm
[i
].first
.second
,
5436 vperm
[i
].first
.second
);
5438 dump_printf (MSG_NOTE
, "\n");
5441 /* We can only handle two-vector permutes, everything else should
5442 be lowered on the SLP level. The following is closely inspired
5443 by vect_transform_slp_perm_load and is supposed to eventually
5445 ??? As intermediate step do code-gen in the SLP tree representation
5447 std::pair
<unsigned, unsigned> first_vec
= std::make_pair (-1U, -1U);
5448 std::pair
<unsigned, unsigned> second_vec
= std::make_pair (-1U, -1U);
5449 unsigned int const_nunits
= nunits
.to_constant ();
5450 unsigned int index
= 0;
5451 unsigned int mask_element
;
5452 vec_perm_builder mask
;
5453 mask
.new_vector (const_nunits
, const_nunits
, 1);
5454 unsigned int count
= mask
.encoded_nelts ();
5455 mask
.quick_grow (count
);
5456 vec_perm_indices indices
;
5457 unsigned nperms
= 0;
5458 for (unsigned i
= 0; i
< vperm
.length (); ++i
)
5460 mask_element
= vperm
[i
].second
;
5461 if (first_vec
.first
== -1U
5462 || first_vec
== vperm
[i
].first
)
5463 first_vec
= vperm
[i
].first
;
5464 else if (second_vec
.first
== -1U
5465 || second_vec
== vperm
[i
].first
)
5467 second_vec
= vperm
[i
].first
;
5468 mask_element
+= const_nunits
;
5472 if (dump_enabled_p ())
5473 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5474 "permutation requires at "
5475 "least three vectors\n");
5480 mask
[index
++] = mask_element
;
5484 indices
.new_vector (mask
, second_vec
.first
== -1U ? 1 : 2,
5486 bool identity_p
= indices
.series_p (0, 1, 0, 1);
5488 && !can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
5490 if (dump_enabled_p ())
5492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
5494 "unsupported vect permute { ");
5495 for (i
= 0; i
< count
; ++i
)
5497 dump_dec (MSG_MISSED_OPTIMIZATION
, mask
[i
]);
5498 dump_printf (MSG_MISSED_OPTIMIZATION
, " ");
5500 dump_printf (MSG_MISSED_OPTIMIZATION
, "}\n");
5510 if (second_vec
.first
== -1U)
5511 second_vec
= first_vec
;
5513 /* Generate the permute statement if necessary. */
5514 slp_tree first_node
= SLP_TREE_CHILDREN (node
)[first_vec
.first
];
5516 = vect_get_slp_vect_def (first_node
, first_vec
.second
);
5518 tree perm_dest
= make_ssa_name (vectype
);
5521 slp_tree second_node
5522 = SLP_TREE_CHILDREN (node
)[second_vec
.first
];
5524 = vect_get_slp_vect_def (second_node
, second_vec
.second
);
5525 tree mask_vec
= vect_gen_perm_mask_checked (vectype
, indices
);
5526 perm_stmt
= gimple_build_assign (perm_dest
, VEC_PERM_EXPR
,
5527 first_def
, second_def
,
5531 /* We need a copy here in case the def was external. */
5532 perm_stmt
= gimple_build_assign (perm_dest
, first_def
);
5533 vect_finish_stmt_generation (vinfo
, NULL
, perm_stmt
, gsi
);
5534 /* Store the vector statement in NODE. */
5535 SLP_TREE_VEC_STMTS (node
).quick_push (perm_stmt
);
5539 first_vec
= std::make_pair (-1U, -1U);
5540 second_vec
= std::make_pair (-1U, -1U);
5545 record_stmt_cost (cost_vec
, nperms
, vec_perm
, NULL
, vectype
, 0, vect_body
);
5550 /* Vectorize SLP NODE. */
5553 vect_schedule_slp_node (vec_info
*vinfo
,
5554 slp_tree node
, slp_instance instance
)
5556 gimple_stmt_iterator si
;
5560 /* For existing vectors there's nothing to do. */
5561 if (SLP_TREE_VEC_DEFS (node
).exists ())
5564 gcc_assert (SLP_TREE_VEC_STMTS (node
).is_empty ());
5566 /* Vectorize externals and constants. */
5567 if (SLP_TREE_DEF_TYPE (node
) == vect_constant_def
5568 || SLP_TREE_DEF_TYPE (node
) == vect_external_def
)
5570 /* ??? vectorizable_shift can end up using a scalar operand which is
5571 currently denoted as !SLP_TREE_VECTYPE. No need to vectorize the
5572 node in this case. */
5573 if (!SLP_TREE_VECTYPE (node
))
5576 vect_create_constant_vectors (vinfo
, node
);
5580 stmt_vec_info stmt_info
= SLP_TREE_REPRESENTATIVE (node
);
5582 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) != 0);
5583 SLP_TREE_VEC_STMTS (node
).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node
));
5585 if (dump_enabled_p ())
5586 dump_printf_loc (MSG_NOTE
, vect_location
,
5587 "------>vectorizing SLP node starting from: %G",
5590 if (STMT_VINFO_DATA_REF (stmt_info
)
5591 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
5593 /* Vectorized loads go before the first scalar load to make it
5594 ready early, vectorized stores go before the last scalar
5595 stmt which is where all uses are ready. */
5596 stmt_vec_info last_stmt_info
= NULL
;
5597 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info
)))
5598 last_stmt_info
= vect_find_first_scalar_stmt_in_slp (node
);
5599 else /* DR_IS_WRITE */
5600 last_stmt_info
= vect_find_last_scalar_stmt_in_slp (node
);
5601 si
= gsi_for_stmt (last_stmt_info
->stmt
);
5603 else if ((STMT_VINFO_TYPE (stmt_info
) == cycle_phi_info_type
5604 || STMT_VINFO_TYPE (stmt_info
) == induc_vec_info_type
5605 || STMT_VINFO_TYPE (stmt_info
) == phi_info_type
)
5606 && SLP_TREE_CODE (node
) != VEC_PERM_EXPR
)
5608 /* For PHI node vectorization we do not use the insertion iterator. */
5613 /* Emit other stmts after the children vectorized defs which is
5614 earliest possible. */
5615 gimple
*last_stmt
= NULL
;
5616 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5617 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
5619 /* For fold-left reductions we are retaining the scalar
5620 reduction PHI but we still have SLP_TREE_NUM_VEC_STMTS
5621 set so the representation isn't perfect. Resort to the
5622 last scalar def here. */
5623 if (SLP_TREE_VEC_STMTS (child
).is_empty ())
5625 gcc_assert (STMT_VINFO_TYPE (SLP_TREE_REPRESENTATIVE (child
))
5626 == cycle_phi_info_type
);
5627 gphi
*phi
= as_a
<gphi
*>
5628 (vect_find_last_scalar_stmt_in_slp (child
)->stmt
);
5630 || vect_stmt_dominates_stmt_p (last_stmt
, phi
))
5633 /* We are emitting all vectorized stmts in the same place and
5634 the last one is the last.
5635 ??? Unless we have a load permutation applied and that
5636 figures to re-use an earlier generated load. */
5639 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (child
), j
, vstmt
)
5641 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5644 else if (!SLP_TREE_VECTYPE (child
))
5646 /* For externals we use unvectorized at all scalar defs. */
5649 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (child
), j
, def
)
5650 if (TREE_CODE (def
) == SSA_NAME
5651 && !SSA_NAME_IS_DEFAULT_DEF (def
))
5653 gimple
*stmt
= SSA_NAME_DEF_STMT (def
);
5655 || vect_stmt_dominates_stmt_p (last_stmt
, stmt
))
5661 /* For externals we have to look at all defs since their
5662 insertion place is decided per vector. But beware
5663 of pre-existing vectors where we need to make sure
5664 we do not insert before the region boundary. */
5665 if (SLP_TREE_SCALAR_OPS (child
).is_empty ()
5666 && !vinfo
->lookup_def (SLP_TREE_VEC_DEFS (child
)[0]))
5667 last_stmt
= gsi_stmt (gsi_after_labels
5668 (as_a
<bb_vec_info
> (vinfo
)->bbs
[0]));
5673 FOR_EACH_VEC_ELT (SLP_TREE_VEC_DEFS (child
), j
, vdef
)
5674 if (TREE_CODE (vdef
) == SSA_NAME
5675 && !SSA_NAME_IS_DEFAULT_DEF (vdef
))
5677 gimple
*vstmt
= SSA_NAME_DEF_STMT (vdef
);
5679 || vect_stmt_dominates_stmt_p (last_stmt
, vstmt
))
5684 /* This can happen when all children are pre-existing vectors or
5687 last_stmt
= vect_find_first_scalar_stmt_in_slp (node
)->stmt
;
5688 if (is_a
<gphi
*> (last_stmt
))
5689 si
= gsi_after_labels (gimple_bb (last_stmt
));
5692 si
= gsi_for_stmt (last_stmt
);
5697 bool done_p
= false;
5699 /* Handle purely internal nodes. */
5700 if (SLP_TREE_CODE (node
) == VEC_PERM_EXPR
)
5702 /* ??? the transform kind is stored to STMT_VINFO_TYPE which might
5703 be shared with different SLP nodes (but usually it's the same
5704 operation apart from the case the stmt is only there for denoting
5705 the actual scalar lane defs ...). So do not call vect_transform_stmt
5706 but open-code it here (partly). */
5707 bool done
= vectorizable_slp_permutation (vinfo
, &si
, node
, NULL
);
5712 vect_transform_stmt (vinfo
, stmt_info
, &si
, node
, instance
);
5715 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
5716 For loop vectorization this is done in vectorizable_call, but for SLP
5717 it needs to be deferred until end of vect_schedule_slp, because multiple
5718 SLP instances may refer to the same scalar stmt. */
5721 vect_remove_slp_scalar_calls (vec_info
*vinfo
,
5722 slp_tree node
, hash_set
<slp_tree
> &visited
)
5725 gimple_stmt_iterator gsi
;
5729 stmt_vec_info stmt_info
;
5731 if (!node
|| SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
5734 if (visited
.add (node
))
5737 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5738 vect_remove_slp_scalar_calls (vinfo
, child
, visited
);
5740 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node
), i
, stmt_info
)
5742 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
5743 if (!stmt
|| gimple_bb (stmt
) == NULL
)
5745 if (is_pattern_stmt_p (stmt_info
)
5746 || !PURE_SLP_STMT (stmt_info
))
5748 lhs
= gimple_call_lhs (stmt
);
5749 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
5750 gsi
= gsi_for_stmt (stmt
);
5751 vinfo
->replace_stmt (&gsi
, stmt_info
, new_stmt
);
5752 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt
)) = new_stmt
;
5757 vect_remove_slp_scalar_calls (vec_info
*vinfo
, slp_tree node
)
5759 hash_set
<slp_tree
> visited
;
5760 vect_remove_slp_scalar_calls (vinfo
, node
, visited
);
5763 /* Vectorize the instance root. */
5766 vectorize_slp_instance_root_stmt (slp_tree node
, slp_instance instance
)
5768 gassign
*rstmt
= NULL
;
5770 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) == 1)
5775 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5777 tree vect_lhs
= gimple_get_lhs (child_stmt
);
5778 tree root_lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5779 if (!useless_type_conversion_p (TREE_TYPE (root_lhs
),
5780 TREE_TYPE (vect_lhs
)))
5781 vect_lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (root_lhs
),
5783 rstmt
= gimple_build_assign (root_lhs
, vect_lhs
);
5787 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node
) > 1)
5789 int nelts
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
5792 vec
<constructor_elt
, va_gc
> *v
;
5793 vec_alloc (v
, nelts
);
5795 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node
), j
, child_stmt
)
5797 CONSTRUCTOR_APPEND_ELT (v
,
5799 gimple_get_lhs (child_stmt
));
5801 tree lhs
= gimple_get_lhs (instance
->root_stmt
->stmt
);
5802 tree rtype
= TREE_TYPE (gimple_assign_rhs1 (instance
->root_stmt
->stmt
));
5803 tree r_constructor
= build_constructor (rtype
, v
);
5804 rstmt
= gimple_build_assign (lhs
, r_constructor
);
5809 gimple_stmt_iterator rgsi
= gsi_for_stmt (instance
->root_stmt
->stmt
);
5810 gsi_replace (&rgsi
, rstmt
, true);
5820 /* Schedule the SLP INSTANCE doing a DFS walk and collecting SCCs. */
5823 vect_schedule_scc (vec_info
*vinfo
, slp_tree node
, slp_instance instance
,
5824 hash_map
<slp_tree
, slp_scc_info
> &scc_info
,
5825 int &maxdfs
, vec
<slp_tree
> &stack
)
5828 slp_scc_info
*info
= &scc_info
.get_or_insert (node
, &existed_p
);
5829 gcc_assert (!existed_p
);
5831 info
->lowlink
= maxdfs
;
5835 if (SLP_TREE_DEF_TYPE (node
) != vect_internal_def
)
5837 info
->on_stack
= false;
5838 vect_schedule_slp_node (vinfo
, node
, instance
);
5842 info
->on_stack
= true;
5843 stack
.safe_push (node
);
5848 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node
), i
, child
)
5852 slp_scc_info
*child_info
= scc_info
.get (child
);
5855 vect_schedule_scc (vinfo
, child
, instance
, scc_info
, maxdfs
, stack
);
5856 /* Recursion might have re-allocated the node. */
5857 info
= scc_info
.get (node
);
5858 child_info
= scc_info
.get (child
);
5859 info
->lowlink
= MIN (info
->lowlink
, child_info
->lowlink
);
5861 else if (child_info
->on_stack
)
5862 info
->lowlink
= MIN (info
->lowlink
, child_info
->dfs
);
5864 if (info
->lowlink
!= info
->dfs
)
5867 auto_vec
<slp_tree
, 4> phis_to_fixup
;
5870 if (stack
.last () == node
)
5873 info
->on_stack
= false;
5874 vect_schedule_slp_node (vinfo
, node
, instance
);
5875 if (SLP_TREE_CODE (node
) != VEC_PERM_EXPR
5876 && is_a
<gphi
*> (SLP_TREE_REPRESENTATIVE (node
)->stmt
))
5877 phis_to_fixup
.quick_push (node
);
5882 int last_idx
= stack
.length () - 1;
5883 while (stack
[last_idx
] != node
)
5885 /* We can break the cycle at PHIs who have at least one child
5886 code generated. Then we could re-start the DFS walk until
5887 all nodes in the SCC are covered (we might have new entries
5888 for only back-reachable nodes). But it's simpler to just
5889 iterate and schedule those that are ready. */
5890 unsigned todo
= stack
.length () - last_idx
;
5893 for (int idx
= stack
.length () - 1; idx
>= last_idx
; --idx
)
5895 slp_tree entry
= stack
[idx
];
5898 bool phi
= (SLP_TREE_CODE (entry
) != VEC_PERM_EXPR
5899 && is_a
<gphi
*> (SLP_TREE_REPRESENTATIVE (entry
)->stmt
));
5901 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (entry
), i
, child
)
5908 else if (scc_info
.get (child
)->on_stack
)
5926 vect_schedule_slp_node (vinfo
, entry
, instance
);
5927 scc_info
.get (entry
)->on_stack
= false;
5931 phis_to_fixup
.safe_push (entry
);
5938 stack
.truncate (last_idx
);
5941 /* Now fixup the backedge def of the vectorized PHIs in this SCC. */
5943 FOR_EACH_VEC_ELT (phis_to_fixup
, i
, phi_node
)
5945 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_REPRESENTATIVE (phi_node
)->stmt
);
5948 FOR_EACH_EDGE (e
, ei
, gimple_bb (phi
)->preds
)
5950 unsigned dest_idx
= e
->dest_idx
;
5951 child
= SLP_TREE_CHILDREN (phi_node
)[dest_idx
];
5952 if (!child
|| SLP_TREE_DEF_TYPE (child
) != vect_internal_def
)
5954 /* Simply fill all args. */
5955 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
5956 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]),
5957 vect_get_slp_vect_def (child
, i
),
5958 e
, gimple_phi_arg_location (phi
, dest_idx
));
5963 /* Generate vector code for SLP_INSTANCES in the loop/basic block. */
5966 vect_schedule_slp (vec_info
*vinfo
, vec
<slp_instance
> slp_instances
)
5968 slp_instance instance
;
5971 hash_map
<slp_tree
, slp_scc_info
> scc_info
;
5973 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
5975 slp_tree node
= SLP_INSTANCE_TREE (instance
);
5976 if (dump_enabled_p ())
5978 dump_printf_loc (MSG_NOTE
, vect_location
,
5979 "Vectorizing SLP tree:\n");
5980 if (SLP_INSTANCE_ROOT_STMT (instance
))
5981 dump_printf_loc (MSG_NOTE
, vect_location
, "Root stmt: %G",
5982 SLP_INSTANCE_ROOT_STMT (instance
)->stmt
);
5983 vect_print_slp_graph (MSG_NOTE
, vect_location
,
5984 SLP_INSTANCE_TREE (instance
));
5986 /* Schedule the tree of INSTANCE, scheduling SCCs in a way to
5987 have a PHI be the node breaking the cycle. */
5988 auto_vec
<slp_tree
> stack
;
5989 if (!scc_info
.get (node
))
5990 vect_schedule_scc (vinfo
, node
, instance
, scc_info
, maxdfs
, stack
);
5992 if (SLP_INSTANCE_ROOT_STMT (instance
))
5993 vectorize_slp_instance_root_stmt (node
, instance
);
5995 if (dump_enabled_p ())
5996 dump_printf_loc (MSG_NOTE
, vect_location
,
5997 "vectorizing stmts using SLP.\n");
6000 FOR_EACH_VEC_ELT (slp_instances
, i
, instance
)
6002 slp_tree root
= SLP_INSTANCE_TREE (instance
);
6003 stmt_vec_info store_info
;
6006 /* Remove scalar call stmts. Do not do this for basic-block
6007 vectorization as not all uses may be vectorized.
6008 ??? Why should this be necessary? DCE should be able to
6009 remove the stmts itself.
6010 ??? For BB vectorization we can as well remove scalar
6011 stmts starting from the SLP tree root if they have no
6013 if (is_a
<loop_vec_info
> (vinfo
))
6014 vect_remove_slp_scalar_calls (vinfo
, root
);
6016 /* Remove vectorized stores original scalar stmts. */
6017 for (j
= 0; SLP_TREE_SCALAR_STMTS (root
).iterate (j
, &store_info
); j
++)
6019 if (!STMT_VINFO_DATA_REF (store_info
)
6020 || !DR_IS_WRITE (STMT_VINFO_DATA_REF (store_info
)))
6023 store_info
= vect_orig_stmt (store_info
);
6024 /* Free the attached stmt_vec_info and remove the stmt. */
6025 vinfo
->remove_stmt (store_info
);