1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "gimple-range.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "gimple-fold.h"
59 #include "optabs-libfuncs.h"
61 /* For lang_hooks.types.type_for_mode. */
62 #include "langhooks.h"
64 /* Return the vectorized type for the given statement. */
67 stmt_vectype (class _stmt_vec_info
*stmt_info
)
69 return STMT_VINFO_VECTYPE (stmt_info
);
72 /* Return TRUE iff the given statement is in an inner loop relative to
73 the loop being vectorized. */
75 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
77 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
78 basic_block bb
= gimple_bb (stmt
);
79 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
85 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
87 return (bb
->loop_father
== loop
->inner
);
90 /* Record the cost of a statement, either by directly informing the
91 target model or by saving it in a vector for later processing.
92 Return a preliminary estimate of the statement's cost. */
95 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
96 enum vect_cost_for_stmt kind
,
97 stmt_vec_info stmt_info
, slp_tree node
,
98 tree vectype
, int misalign
,
99 enum vect_cost_model_location where
)
101 if ((kind
== vector_load
|| kind
== unaligned_load
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_gather_load
;
104 if ((kind
== vector_store
|| kind
== unaligned_store
)
105 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
106 kind
= vector_scatter_store
;
108 stmt_info_for_cost si
109 = { count
, kind
, where
, stmt_info
, node
, vectype
, misalign
};
110 body_cost_vec
->safe_push (si
);
113 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
117 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
118 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
119 tree vectype
, int misalign
,
120 enum vect_cost_model_location where
)
122 return record_stmt_cost (body_cost_vec
, count
, kind
, stmt_info
, NULL
,
123 vectype
, misalign
, where
);
127 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
128 enum vect_cost_for_stmt kind
, slp_tree node
,
129 tree vectype
, int misalign
,
130 enum vect_cost_model_location where
)
132 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, node
,
133 vectype
, misalign
, where
);
137 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
138 enum vect_cost_for_stmt kind
,
139 enum vect_cost_model_location where
)
141 gcc_assert (kind
== cond_branch_taken
|| kind
== cond_branch_not_taken
142 || kind
== scalar_stmt
);
143 return record_stmt_cost (body_cost_vec
, count
, kind
, NULL
, NULL
,
144 NULL_TREE
, 0, where
);
147 /* Return a variable of type ELEM_TYPE[NELEMS]. */
150 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
152 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
156 /* ARRAY is an array of vectors created by create_vector_array.
157 Return an SSA_NAME for the vector in index N. The reference
158 is part of the vectorization of STMT_INFO and the vector is associated
159 with scalar destination SCALAR_DEST. */
162 read_vector_array (vec_info
*vinfo
,
163 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
164 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
166 tree vect_type
, vect
, vect_name
, array_ref
;
169 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
170 vect_type
= TREE_TYPE (TREE_TYPE (array
));
171 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
172 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
173 build_int_cst (size_type_node
, n
),
174 NULL_TREE
, NULL_TREE
);
176 new_stmt
= gimple_build_assign (vect
, array_ref
);
177 vect_name
= make_ssa_name (vect
, new_stmt
);
178 gimple_assign_set_lhs (new_stmt
, vect_name
);
179 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
184 /* ARRAY is an array of vectors created by create_vector_array.
185 Emit code to store SSA_NAME VECT in index N of the array.
186 The store is part of the vectorization of STMT_INFO. */
189 write_vector_array (vec_info
*vinfo
,
190 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
191 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
196 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
197 build_int_cst (size_type_node
, n
),
198 NULL_TREE
, NULL_TREE
);
200 new_stmt
= gimple_build_assign (array_ref
, vect
);
201 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
204 /* PTR is a pointer to an array of type TYPE. Return a representation
205 of *PTR. The memory reference replaces those in FIRST_DR
209 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
213 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
214 /* Arrays have the same alignment as their type. */
215 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
219 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
220 Emit the clobber before *GSI. */
223 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
224 gimple_stmt_iterator
*gsi
, tree var
)
226 tree clobber
= build_clobber (TREE_TYPE (var
));
227 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
231 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
233 /* Function vect_mark_relevant.
235 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
238 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
239 enum vect_relevant relevant
, bool live_p
)
241 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
242 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
244 if (dump_enabled_p ())
245 dump_printf_loc (MSG_NOTE
, vect_location
,
246 "mark relevant %d, live %d: %G", relevant
, live_p
,
249 /* If this stmt is an original stmt in a pattern, we might need to mark its
250 related pattern stmt instead of the original stmt. However, such stmts
251 may have their own uses that are not in any pattern, in such cases the
252 stmt itself should be marked. */
253 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
255 /* This is the last stmt in a sequence that was detected as a
256 pattern that can potentially be vectorized. Don't mark the stmt
257 as relevant/live because it's not going to be vectorized.
258 Instead mark the pattern-stmt that replaces it. */
260 if (dump_enabled_p ())
261 dump_printf_loc (MSG_NOTE
, vect_location
,
262 "last stmt in pattern. don't mark"
263 " relevant/live.\n");
265 stmt_vec_info old_stmt_info
= stmt_info
;
266 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
267 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
268 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
269 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 if (live_p
&& relevant
== vect_unused_in_scope
)
273 if (dump_enabled_p ())
274 dump_printf_loc (MSG_NOTE
, vect_location
,
275 "vec_stmt_relevant_p: forcing live pattern stmt "
277 relevant
= vect_used_only_live
;
280 if (dump_enabled_p ())
281 dump_printf_loc (MSG_NOTE
, vect_location
,
282 "mark relevant %d, live %d: %G", relevant
, live_p
,
286 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
287 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
288 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
290 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
291 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
293 if (dump_enabled_p ())
294 dump_printf_loc (MSG_NOTE
, vect_location
,
295 "already marked relevant/live.\n");
299 worklist
->safe_push (stmt_info
);
303 /* Function is_simple_and_all_uses_invariant
305 Return true if STMT_INFO is simple and all uses of it are invariant. */
308 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
309 loop_vec_info loop_vinfo
)
314 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
318 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
320 enum vect_def_type dt
= vect_uninitialized_def
;
322 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
324 if (dump_enabled_p ())
325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
326 "use not simple.\n");
330 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
336 /* Function vect_stmt_relevant_p.
338 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
339 is "relevant for vectorization".
341 A stmt is considered "relevant for vectorization" if:
342 - it has uses outside the loop.
343 - it has vdefs (it alters memory).
344 - control stmts in the loop (except for the exit condition).
345 - it is an induction and we have multiple exits.
347 CHECKME: what other side effects would the vectorizer allow? */
350 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
351 enum vect_relevant
*relevant
, bool *live_p
)
353 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
355 imm_use_iterator imm_iter
;
359 *relevant
= vect_unused_in_scope
;
362 /* cond stmt other than loop exit cond. */
363 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
364 if (is_ctrl_stmt (stmt
)
365 && LOOP_VINFO_LOOP_IV_COND (loop_vinfo
) != stmt
366 && (!loop
->inner
|| gimple_bb (stmt
)->loop_father
== loop
))
367 *relevant
= vect_used_in_scope
;
369 /* changing memory. */
370 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
371 if (gimple_vdef (stmt_info
->stmt
)
372 && !gimple_clobber_p (stmt_info
->stmt
))
374 if (dump_enabled_p ())
375 dump_printf_loc (MSG_NOTE
, vect_location
,
376 "vec_stmt_relevant_p: stmt has vdefs.\n");
377 *relevant
= vect_used_in_scope
;
380 /* uses outside the loop. */
381 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
383 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
385 basic_block bb
= gimple_bb (USE_STMT (use_p
));
386 if (!flow_bb_inside_loop_p (loop
, bb
))
388 if (is_gimple_debug (USE_STMT (use_p
)))
391 if (dump_enabled_p ())
392 dump_printf_loc (MSG_NOTE
, vect_location
,
393 "vec_stmt_relevant_p: used out of loop.\n");
395 /* We expect all such uses to be in the loop exit phis
396 (because of loop closed form) */
397 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
404 /* Check if it's an induction and multiple exits. In this case there will be
405 a usage later on after peeling which is needed for the alternate exit. */
406 if (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
407 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
)
409 if (dump_enabled_p ())
410 dump_printf_loc (MSG_NOTE
, vect_location
,
411 "vec_stmt_relevant_p: induction forced for "
417 if (*live_p
&& *relevant
== vect_unused_in_scope
418 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
420 if (dump_enabled_p ())
421 dump_printf_loc (MSG_NOTE
, vect_location
,
422 "vec_stmt_relevant_p: stmt live but not relevant.\n");
423 *relevant
= vect_used_only_live
;
426 return (*live_p
|| *relevant
);
430 /* Function exist_non_indexing_operands_for_use_p
432 USE is one of the uses attached to STMT_INFO. Check if USE is
433 used in STMT_INFO for anything other than indexing an array. */
436 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
440 /* USE corresponds to some operand in STMT. If there is no data
441 reference in STMT, then any operand that corresponds to USE
442 is not indexing an array. */
443 if (!STMT_VINFO_DATA_REF (stmt_info
))
446 /* STMT has a data_ref. FORNOW this means that its of one of
450 (This should have been verified in analyze_data_refs).
452 'var' in the second case corresponds to a def, not a use,
453 so USE cannot correspond to any operands that are not used
456 Therefore, all we need to check is if STMT falls into the
457 first case, and whether var corresponds to USE. */
459 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
460 if (!assign
|| !gimple_assign_copy_p (assign
))
462 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
463 if (call
&& gimple_call_internal_p (call
))
465 internal_fn ifn
= gimple_call_internal_fn (call
);
466 int mask_index
= internal_fn_mask_index (ifn
);
468 && use
== gimple_call_arg (call
, mask_index
))
470 int stored_value_index
= internal_fn_stored_value_index (ifn
);
471 if (stored_value_index
>= 0
472 && use
== gimple_call_arg (call
, stored_value_index
))
474 if (internal_gather_scatter_fn_p (ifn
)
475 && use
== gimple_call_arg (call
, 1))
481 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
483 operand
= gimple_assign_rhs1 (assign
);
484 if (TREE_CODE (operand
) != SSA_NAME
)
495 Function process_use.
498 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
499 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
500 that defined USE. This is done by calling mark_relevant and passing it
501 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
502 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
506 Generally, LIVE_P and RELEVANT are used to define the liveness and
507 relevance info of the DEF_STMT of this USE:
508 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
509 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
511 - case 1: If USE is used only for address computations (e.g. array indexing),
512 which does not need to be directly vectorized, then the liveness/relevance
513 of the respective DEF_STMT is left unchanged.
514 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
515 we skip DEF_STMT cause it had already been processed.
516 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
517 "relevant" will be modified accordingly.
519 Return true if everything is as expected. Return false otherwise. */
522 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
523 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
526 stmt_vec_info dstmt_vinfo
;
527 enum vect_def_type dt
;
529 /* case 1: we are only interested in uses that need to be vectorized. Uses
530 that are used for address computation are not considered relevant. */
531 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
532 return opt_result::success ();
534 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
535 return opt_result::failure_at (stmt_vinfo
->stmt
,
537 " unsupported use in stmt.\n");
540 return opt_result::success ();
542 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
543 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
545 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
546 We have to force the stmt live since the epilogue loop needs it to
547 continue computing the reduction. */
548 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
549 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
550 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
551 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
552 && bb
->loop_father
== def_bb
->loop_father
)
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE
, vect_location
,
556 "reduc-stmt defining reduc-phi in the same nest.\n");
557 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
558 return opt_result::success ();
561 /* case 3a: outer-loop stmt defining an inner-loop stmt:
562 outer-loop-header-bb:
568 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
570 if (dump_enabled_p ())
571 dump_printf_loc (MSG_NOTE
, vect_location
,
572 "outer-loop def-stmt defining inner-loop stmt.\n");
576 case vect_unused_in_scope
:
577 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
578 vect_used_in_scope
: vect_unused_in_scope
;
581 case vect_used_in_outer_by_reduction
:
582 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
583 relevant
= vect_used_by_reduction
;
586 case vect_used_in_outer
:
587 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
588 relevant
= vect_used_in_scope
;
591 case vect_used_in_scope
:
599 /* case 3b: inner-loop stmt defining an outer-loop stmt:
600 outer-loop-header-bb:
604 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
606 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
608 if (dump_enabled_p ())
609 dump_printf_loc (MSG_NOTE
, vect_location
,
610 "inner-loop def-stmt defining outer-loop stmt.\n");
614 case vect_unused_in_scope
:
615 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
616 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
617 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
620 case vect_used_by_reduction
:
621 case vect_used_only_live
:
622 relevant
= vect_used_in_outer_by_reduction
;
625 case vect_used_in_scope
:
626 relevant
= vect_used_in_outer
;
633 /* We are also not interested in uses on loop PHI backedges that are
634 inductions. Otherwise we'll needlessly vectorize the IV increment
635 and cause hybrid SLP for SLP inductions. Unless the PHI is live
637 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
638 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
639 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
640 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
641 loop_latch_edge (bb
->loop_father
))
644 if (dump_enabled_p ())
645 dump_printf_loc (MSG_NOTE
, vect_location
,
646 "induction value on backedge.\n");
647 return opt_result::success ();
651 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
652 return opt_result::success ();
656 /* Function vect_mark_stmts_to_be_vectorized.
658 Not all stmts in the loop need to be vectorized. For example:
667 Stmt 1 and 3 do not need to be vectorized, because loop control and
668 addressing of vectorized data-refs are handled differently.
670 This pass detects such stmts. */
673 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
675 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
676 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
677 unsigned int nbbs
= loop
->num_nodes
;
678 gimple_stmt_iterator si
;
682 enum vect_relevant relevant
;
684 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
686 auto_vec
<stmt_vec_info
, 64> worklist
;
688 /* 1. Init worklist. */
689 for (i
= 0; i
< nbbs
; i
++)
692 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
694 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
695 if (dump_enabled_p ())
696 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
699 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
700 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
702 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
704 if (is_gimple_debug (gsi_stmt (si
)))
706 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
707 if (dump_enabled_p ())
708 dump_printf_loc (MSG_NOTE
, vect_location
,
709 "init: stmt relevant? %G", stmt_info
->stmt
);
711 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
712 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
716 /* 2. Process_worklist */
717 while (worklist
.length () > 0)
722 stmt_vec_info stmt_vinfo
= worklist
.pop ();
723 if (dump_enabled_p ())
724 dump_printf_loc (MSG_NOTE
, vect_location
,
725 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
727 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
728 (DEF_STMT) as relevant/irrelevant according to the relevance property
730 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
732 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
733 propagated as is to the DEF_STMTs of its USEs.
735 One exception is when STMT has been identified as defining a reduction
736 variable; in this case we set the relevance to vect_used_by_reduction.
737 This is because we distinguish between two kinds of relevant stmts -
738 those that are used by a reduction computation, and those that are
739 (also) used by a regular computation. This allows us later on to
740 identify stmts that are used solely by a reduction, and therefore the
741 order of the results that they produce does not have to be kept. */
743 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
745 case vect_reduction_def
:
746 gcc_assert (relevant
!= vect_unused_in_scope
);
747 if (relevant
!= vect_unused_in_scope
748 && relevant
!= vect_used_in_scope
749 && relevant
!= vect_used_by_reduction
750 && relevant
!= vect_used_only_live
)
751 return opt_result::failure_at
752 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
755 case vect_nested_cycle
:
756 if (relevant
!= vect_unused_in_scope
757 && relevant
!= vect_used_in_outer_by_reduction
758 && relevant
!= vect_used_in_outer
)
759 return opt_result::failure_at
760 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
763 case vect_double_reduction_def
:
764 if (relevant
!= vect_unused_in_scope
765 && relevant
!= vect_used_by_reduction
766 && relevant
!= vect_used_only_live
)
767 return opt_result::failure_at
768 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
775 if (is_pattern_stmt_p (stmt_vinfo
))
777 /* Pattern statements are not inserted into the code, so
778 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
779 have to scan the RHS or function arguments instead. */
780 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
782 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
783 tree op
= gimple_assign_rhs1 (assign
);
786 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
789 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
790 loop_vinfo
, relevant
, &worklist
, false);
793 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
794 loop_vinfo
, relevant
, &worklist
, false);
799 for (; i
< gimple_num_ops (assign
); i
++)
801 op
= gimple_op (assign
, i
);
802 if (TREE_CODE (op
) == SSA_NAME
)
805 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
812 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt_vinfo
->stmt
))
814 tree_code rhs_code
= gimple_cond_code (cond
);
815 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
817 = process_use (stmt_vinfo
, gimple_cond_lhs (cond
),
818 loop_vinfo
, relevant
, &worklist
, false);
821 res
= process_use (stmt_vinfo
, gimple_cond_rhs (cond
),
822 loop_vinfo
, relevant
, &worklist
, false);
826 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
828 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
830 tree arg
= gimple_call_arg (call
, i
);
832 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
842 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
844 tree op
= USE_FROM_PTR (use_p
);
846 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
852 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
854 gather_scatter_info gs_info
;
855 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
858 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
867 } /* while worklist */
869 return opt_result::success ();
872 /* Function vect_model_simple_cost.
874 Models cost for simple operations, i.e. those that only emit ncopies of a
875 single op. Right now, this does not account for multiple insns that could
876 be generated for the single vector op. We will handle that shortly. */
879 vect_model_simple_cost (vec_info
*,
880 stmt_vec_info stmt_info
, int ncopies
,
881 enum vect_def_type
*dt
,
884 stmt_vector_for_cost
*cost_vec
,
885 vect_cost_for_stmt kind
= vector_stmt
)
887 int inside_cost
= 0, prologue_cost
= 0;
889 gcc_assert (cost_vec
!= NULL
);
891 /* ??? Somehow we need to fix this at the callers. */
893 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
896 /* Cost the "broadcast" of a scalar operand in to a vector operand.
897 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
899 for (int i
= 0; i
< ndts
; i
++)
900 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
901 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
902 stmt_info
, 0, vect_prologue
);
904 /* Pass the inside-of-loop statements to the target-specific cost model. */
905 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
906 stmt_info
, 0, vect_body
);
908 if (dump_enabled_p ())
909 dump_printf_loc (MSG_NOTE
, vect_location
,
910 "vect_model_simple_cost: inside_cost = %d, "
911 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
915 /* Model cost for type demotion and promotion operations. PWR is
916 normally zero for single-step promotions and demotions. It will be
917 one if two-step promotion/demotion is required, and so on. NCOPIES
918 is the number of vector results (and thus number of instructions)
919 for the narrowest end of the operation chain. Each additional
920 step doubles the number of instructions required. If WIDEN_ARITH
921 is true the stmt is doing widening arithmetic. */
924 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
925 enum vect_def_type
*dt
,
926 unsigned int ncopies
, int pwr
,
927 stmt_vector_for_cost
*cost_vec
,
931 int inside_cost
= 0, prologue_cost
= 0;
933 for (i
= 0; i
< pwr
+ 1; i
++)
935 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
,
937 ? vector_stmt
: vec_promote_demote
,
938 stmt_info
, 0, vect_body
);
942 /* FORNOW: Assuming maximum 2 args per stmts. */
943 for (i
= 0; i
< 2; i
++)
944 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
945 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
946 stmt_info
, 0, vect_prologue
);
948 if (dump_enabled_p ())
949 dump_printf_loc (MSG_NOTE
, vect_location
,
950 "vect_model_promotion_demotion_cost: inside_cost = %d, "
951 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
954 /* Returns true if the current function returns DECL. */
957 cfun_returns (tree decl
)
961 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
963 greturn
*ret
= safe_dyn_cast
<greturn
*> (*gsi_last_bb (e
->src
));
966 if (gimple_return_retval (ret
) == decl
)
968 /* We often end up with an aggregate copy to the result decl,
969 handle that case as well. First skip intermediate clobbers
974 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
976 while (gimple_clobber_p (def
));
977 if (is_a
<gassign
*> (def
)
978 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
979 && gimple_assign_rhs1 (def
) == decl
)
985 /* Calculate cost of DR's memory access. */
987 vect_get_store_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
988 dr_alignment_support alignment_support_scheme
,
990 unsigned int *inside_cost
,
991 stmt_vector_for_cost
*body_cost_vec
)
993 switch (alignment_support_scheme
)
997 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
998 vector_store
, stmt_info
, 0,
1001 if (dump_enabled_p ())
1002 dump_printf_loc (MSG_NOTE
, vect_location
,
1003 "vect_model_store_cost: aligned.\n");
1007 case dr_unaligned_supported
:
1009 /* Here, we assign an additional cost for the unaligned store. */
1010 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1011 unaligned_store
, stmt_info
,
1012 misalignment
, vect_body
);
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_NOTE
, vect_location
,
1015 "vect_model_store_cost: unaligned supported by "
1020 case dr_unaligned_unsupported
:
1022 *inside_cost
= VECT_MAX_COST
;
1024 if (dump_enabled_p ())
1025 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1026 "vect_model_store_cost: unsupported access.\n");
1035 /* Calculate cost of DR's memory access. */
1037 vect_get_load_cost (vec_info
*, stmt_vec_info stmt_info
, int ncopies
,
1038 dr_alignment_support alignment_support_scheme
,
1040 bool add_realign_cost
, unsigned int *inside_cost
,
1041 unsigned int *prologue_cost
,
1042 stmt_vector_for_cost
*prologue_cost_vec
,
1043 stmt_vector_for_cost
*body_cost_vec
,
1044 bool record_prologue_costs
)
1046 switch (alignment_support_scheme
)
1050 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1051 stmt_info
, 0, vect_body
);
1053 if (dump_enabled_p ())
1054 dump_printf_loc (MSG_NOTE
, vect_location
,
1055 "vect_model_load_cost: aligned.\n");
1059 case dr_unaligned_supported
:
1061 /* Here, we assign an additional cost for the unaligned load. */
1062 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1063 unaligned_load
, stmt_info
,
1064 misalignment
, vect_body
);
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_NOTE
, vect_location
,
1068 "vect_model_load_cost: unaligned supported by "
1073 case dr_explicit_realign
:
1075 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1076 vector_load
, stmt_info
, 0, vect_body
);
1077 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1078 vec_perm
, stmt_info
, 0, vect_body
);
1080 /* FIXME: If the misalignment remains fixed across the iterations of
1081 the containing loop, the following cost should be added to the
1083 if (targetm
.vectorize
.builtin_mask_for_load
)
1084 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1085 stmt_info
, 0, vect_body
);
1087 if (dump_enabled_p ())
1088 dump_printf_loc (MSG_NOTE
, vect_location
,
1089 "vect_model_load_cost: explicit realign\n");
1093 case dr_explicit_realign_optimized
:
1095 if (dump_enabled_p ())
1096 dump_printf_loc (MSG_NOTE
, vect_location
,
1097 "vect_model_load_cost: unaligned software "
1100 /* Unaligned software pipeline has a load of an address, an initial
1101 load, and possibly a mask operation to "prime" the loop. However,
1102 if this is an access in a group of loads, which provide grouped
1103 access, then the above cost should only be considered for one
1104 access in the group. Inside the loop, there is a load op
1105 and a realignment op. */
1107 if (add_realign_cost
&& record_prologue_costs
)
1109 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1110 vector_stmt
, stmt_info
,
1112 if (targetm
.vectorize
.builtin_mask_for_load
)
1113 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1114 vector_stmt
, stmt_info
,
1118 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1119 stmt_info
, 0, vect_body
);
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1121 stmt_info
, 0, vect_body
);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE
, vect_location
,
1125 "vect_model_load_cost: explicit realign optimized"
1131 case dr_unaligned_unsupported
:
1133 *inside_cost
= VECT_MAX_COST
;
1135 if (dump_enabled_p ())
1136 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1137 "vect_model_load_cost: unsupported access.\n");
1146 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1147 the loop preheader for the vectorized stmt STMT_VINFO. */
1150 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1151 gimple_stmt_iterator
*gsi
)
1154 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1156 vinfo
->insert_on_entry (stmt_vinfo
, new_stmt
);
1158 if (dump_enabled_p ())
1159 dump_printf_loc (MSG_NOTE
, vect_location
,
1160 "created new init_stmt: %G", new_stmt
);
1163 /* Function vect_init_vector.
1165 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1166 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1167 vector type a vector with all elements equal to VAL is created first.
1168 Place the initialization at GSI if it is not NULL. Otherwise, place the
1169 initialization at the loop preheader.
1170 Return the DEF of INIT_STMT.
1171 It will be used in the vectorization of STMT_INFO. */
1174 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1175 gimple_stmt_iterator
*gsi
)
1180 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1181 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1183 gcc_assert (VECTOR_TYPE_P (type
));
1184 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1186 /* Scalar boolean value should be transformed into
1187 all zeros or all ones value before building a vector. */
1188 if (VECTOR_BOOLEAN_TYPE_P (type
))
1190 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1191 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1193 if (CONSTANT_CLASS_P (val
))
1194 val
= integer_zerop (val
) ? false_val
: true_val
;
1197 new_temp
= make_ssa_name (TREE_TYPE (type
));
1198 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1199 val
, true_val
, false_val
);
1200 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1206 gimple_seq stmts
= NULL
;
1207 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1208 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1209 TREE_TYPE (type
), val
);
1211 /* ??? Condition vectorization expects us to do
1212 promotion of invariant/external defs. */
1213 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1214 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1215 !gsi_end_p (gsi2
); )
1217 init_stmt
= gsi_stmt (gsi2
);
1218 gsi_remove (&gsi2
, false);
1219 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1223 val
= build_vector_from_val (type
, val
);
1226 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1227 init_stmt
= gimple_build_assign (new_temp
, val
);
1228 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1233 /* Function vect_get_vec_defs_for_operand.
1235 OP is an operand in STMT_VINFO. This function returns a vector of
1236 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1238 In the case that OP is an SSA_NAME which is defined in the loop, then
1239 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1241 In case OP is an invariant or constant, a new stmt that creates a vector def
1242 needs to be introduced. VECTYPE may be used to specify a required type for
1243 vector invariant. */
1246 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1248 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1251 enum vect_def_type dt
;
1253 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_get_vec_defs_for_operand: %T\n", op
);
1259 stmt_vec_info def_stmt_info
;
1260 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1261 &def_stmt_info
, &def_stmt
);
1262 gcc_assert (is_simple_use
);
1263 if (def_stmt
&& dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1266 vec_oprnds
->create (ncopies
);
1267 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1269 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1273 vector_type
= vectype
;
1274 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1275 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1276 vector_type
= truth_type_for (stmt_vectype
);
1278 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1280 gcc_assert (vector_type
);
1281 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1283 vec_oprnds
->quick_push (vop
);
1287 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1288 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1289 for (unsigned i
= 0; i
< ncopies
; ++i
)
1290 vec_oprnds
->quick_push (gimple_get_lhs
1291 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1296 /* Get vectorized definitions for OP0 and OP1. */
1299 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1301 tree op0
, tree vectype0
, vec
<tree
> *vec_oprnds0
,
1302 tree op1
, tree vectype1
, vec
<tree
> *vec_oprnds1
,
1303 tree op2
, tree vectype2
, vec
<tree
> *vec_oprnds2
,
1304 tree op3
, tree vectype3
, vec
<tree
> *vec_oprnds3
)
1309 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1311 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1313 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1315 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1320 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1321 op0
, vec_oprnds0
, vectype0
);
1323 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1324 op1
, vec_oprnds1
, vectype1
);
1326 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1327 op2
, vec_oprnds2
, vectype2
);
1329 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1330 op3
, vec_oprnds3
, vectype3
);
1335 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1337 tree op0
, vec
<tree
> *vec_oprnds0
,
1338 tree op1
, vec
<tree
> *vec_oprnds1
,
1339 tree op2
, vec
<tree
> *vec_oprnds2
,
1340 tree op3
, vec
<tree
> *vec_oprnds3
)
1342 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1343 op0
, NULL_TREE
, vec_oprnds0
,
1344 op1
, NULL_TREE
, vec_oprnds1
,
1345 op2
, NULL_TREE
, vec_oprnds2
,
1346 op3
, NULL_TREE
, vec_oprnds3
);
1349 /* Helper function called by vect_finish_replace_stmt and
1350 vect_finish_stmt_generation. Set the location of the new
1351 statement and create and return a stmt_vec_info for it. */
1354 vect_finish_stmt_generation_1 (vec_info
*,
1355 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1357 if (dump_enabled_p ())
1358 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1362 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1364 /* While EH edges will generally prevent vectorization, stmt might
1365 e.g. be in a must-not-throw region. Ensure newly created stmts
1366 that could throw are part of the same region. */
1367 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1368 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1369 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1372 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1375 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1376 which sets the same scalar result as STMT_INFO did. Create and return a
1377 stmt_vec_info for VEC_STMT. */
1380 vect_finish_replace_stmt (vec_info
*vinfo
,
1381 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1383 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1384 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1386 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1387 gsi_replace (&gsi
, vec_stmt
, true);
1389 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1392 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1393 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1396 vect_finish_stmt_generation (vec_info
*vinfo
,
1397 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1398 gimple_stmt_iterator
*gsi
)
1400 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1402 if (!gsi_end_p (*gsi
)
1403 && gimple_has_mem_ops (vec_stmt
))
1405 gimple
*at_stmt
= gsi_stmt (*gsi
);
1406 tree vuse
= gimple_vuse (at_stmt
);
1407 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1409 tree vdef
= gimple_vdef (at_stmt
);
1410 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1411 gimple_set_modified (vec_stmt
, true);
1412 /* If we have an SSA vuse and insert a store, update virtual
1413 SSA form to avoid triggering the renamer. Do so only
1414 if we can easily see all uses - which is what almost always
1415 happens with the way vectorized stmts are inserted. */
1416 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1417 && ((is_gimple_assign (vec_stmt
)
1418 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1419 || (is_gimple_call (vec_stmt
)
1420 && (!(gimple_call_flags (vec_stmt
)
1421 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
))
1422 || (gimple_call_lhs (vec_stmt
)
1423 && !is_gimple_reg (gimple_call_lhs (vec_stmt
)))))))
1425 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1426 gimple_set_vdef (vec_stmt
, new_vdef
);
1427 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1431 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1432 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1435 /* We want to vectorize a call to combined function CFN with function
1436 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1437 as the types of all inputs. Check whether this is possible using
1438 an internal function, returning its code if so or IFN_LAST if not. */
1441 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1442 tree vectype_out
, tree vectype_in
)
1445 if (internal_fn_p (cfn
))
1446 ifn
= as_internal_fn (cfn
);
1448 ifn
= associated_internal_fn (fndecl
);
1449 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1451 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1452 if (info
.vectorizable
)
1454 bool same_size_p
= TYPE_SIZE (vectype_in
) == TYPE_SIZE (vectype_out
);
1455 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1456 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1458 /* The type size of both the vectype_in and vectype_out should be
1459 exactly the same when vectype_out isn't participating the optab.
1460 While there is no restriction for type size when vectype_out
1461 is part of the optab query. */
1462 if (type0
!= vectype_out
&& type1
!= vectype_out
&& !same_size_p
)
1465 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1466 OPTIMIZE_FOR_SPEED
))
1474 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1475 gimple_stmt_iterator
*);
1477 /* Check whether a load or store statement in the loop described by
1478 LOOP_VINFO is possible in a loop using partial vectors. This is
1479 testing whether the vectorizer pass has the appropriate support,
1480 as well as whether the target does.
1482 VLS_TYPE says whether the statement is a load or store and VECTYPE
1483 is the type of the vector being loaded or stored. SLP_NODE is the SLP
1484 node that contains the statement, or null if none. MEMORY_ACCESS_TYPE
1485 says how the load or store is going to be implemented and GROUP_SIZE
1486 is the number of load or store statements in the containing group.
1487 If the access is a gather load or scatter store, GS_INFO describes
1488 its arguments. If the load or store is conditional, SCALAR_MASK is the
1489 condition under which it occurs.
1491 Clear LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P if a loop using partial
1492 vectors is not supported, otherwise record the required rgroup control
1496 check_load_store_for_partial_vectors (loop_vec_info loop_vinfo
, tree vectype
,
1498 vec_load_store_type vls_type
,
1500 vect_memory_access_type
1502 gather_scatter_info
*gs_info
,
1505 /* Invariant loads need no special support. */
1506 if (memory_access_type
== VMAT_INVARIANT
)
1509 unsigned int nvectors
;
1511 nvectors
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1513 nvectors
= vect_get_num_copies (loop_vinfo
, vectype
);
1515 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1516 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
1517 machine_mode vecmode
= TYPE_MODE (vectype
);
1518 bool is_load
= (vls_type
== VLS_LOAD
);
1519 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1522 = (is_load
? vect_load_lanes_supported (vectype
, group_size
, true)
1523 : vect_store_lanes_supported (vectype
, group_size
, true));
1524 if (ifn
== IFN_MASK_LEN_LOAD_LANES
|| ifn
== IFN_MASK_LEN_STORE_LANES
)
1525 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1526 else if (ifn
== IFN_MASK_LOAD_LANES
|| ifn
== IFN_MASK_STORE_LANES
)
1527 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1531 if (dump_enabled_p ())
1532 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1533 "can't operate on partial vectors because"
1534 " the target doesn't have an appropriate"
1535 " load/store-lanes instruction.\n");
1536 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1541 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1543 internal_fn ifn
= (is_load
1544 ? IFN_MASK_GATHER_LOAD
1545 : IFN_MASK_SCATTER_STORE
);
1546 internal_fn len_ifn
= (is_load
1547 ? IFN_MASK_LEN_GATHER_LOAD
1548 : IFN_MASK_LEN_SCATTER_STORE
);
1549 if (internal_gather_scatter_fn_supported_p (len_ifn
, vectype
,
1550 gs_info
->memory_type
,
1551 gs_info
->offset_vectype
,
1553 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, 1);
1554 else if (internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1555 gs_info
->memory_type
,
1556 gs_info
->offset_vectype
,
1558 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
,
1562 if (dump_enabled_p ())
1563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1564 "can't operate on partial vectors because"
1565 " the target doesn't have an appropriate"
1566 " gather load or scatter store instruction.\n");
1567 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1572 if (memory_access_type
!= VMAT_CONTIGUOUS
1573 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1575 /* Element X of the data must come from iteration i * VF + X of the
1576 scalar loop. We need more work to support other mappings. */
1577 if (dump_enabled_p ())
1578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1579 "can't operate on partial vectors because an"
1580 " access isn't contiguous.\n");
1581 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1585 if (!VECTOR_MODE_P (vecmode
))
1587 if (dump_enabled_p ())
1588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1589 "can't operate on partial vectors when emulating"
1590 " vector operations.\n");
1591 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1595 /* We might load more scalars than we need for permuting SLP loads.
1596 We checked in get_group_load_store_type that the extra elements
1597 don't leak into a new vector. */
1598 auto group_memory_nvectors
= [](poly_uint64 size
, poly_uint64 nunits
)
1600 unsigned int nvectors
;
1601 if (can_div_away_from_zero_p (size
, nunits
, &nvectors
))
1606 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1607 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1608 machine_mode mask_mode
;
1610 bool using_partial_vectors_p
= false;
1611 if (get_len_load_store_mode (vecmode
, is_load
).exists (&vmode
))
1613 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1614 unsigned factor
= (vecmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vecmode
);
1615 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype
, factor
);
1616 using_partial_vectors_p
= true;
1618 else if (targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1619 && can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1621 nvectors
= group_memory_nvectors (group_size
* vf
, nunits
);
1622 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1623 using_partial_vectors_p
= true;
1626 if (!using_partial_vectors_p
)
1628 if (dump_enabled_p ())
1629 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1630 "can't operate on partial vectors because the"
1631 " target doesn't have the appropriate partial"
1632 " vectorization load or store.\n");
1633 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
1637 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1638 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1639 that needs to be applied to all loads and stores in a vectorized loop.
1640 Return VEC_MASK if LOOP_MASK is null or if VEC_MASK is already masked,
1641 otherwise return VEC_MASK & LOOP_MASK.
1643 MASK_TYPE is the type of both masks. If new statements are needed,
1644 insert them before GSI. */
1647 prepare_vec_mask (loop_vec_info loop_vinfo
, tree mask_type
, tree loop_mask
,
1648 tree vec_mask
, gimple_stmt_iterator
*gsi
)
1650 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1654 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1656 if (loop_vinfo
->vec_cond_masked_set
.contains ({ vec_mask
, loop_mask
}))
1659 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1660 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1661 vec_mask
, loop_mask
);
1663 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1667 /* Determine whether we can use a gather load or scatter store to vectorize
1668 strided load or store STMT_INFO by truncating the current offset to a
1669 smaller width. We need to be able to construct an offset vector:
1671 { 0, X, X*2, X*3, ... }
1673 without loss of precision, where X is STMT_INFO's DR_STEP.
1675 Return true if this is possible, describing the gather load or scatter
1676 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1679 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1680 loop_vec_info loop_vinfo
, bool masked_p
,
1681 gather_scatter_info
*gs_info
)
1683 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1684 data_reference
*dr
= dr_info
->dr
;
1685 tree step
= DR_STEP (dr
);
1686 if (TREE_CODE (step
) != INTEGER_CST
)
1688 /* ??? Perhaps we could use range information here? */
1689 if (dump_enabled_p ())
1690 dump_printf_loc (MSG_NOTE
, vect_location
,
1691 "cannot truncate variable step.\n");
1695 /* Get the number of bits in an element. */
1696 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1697 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1698 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1700 /* Set COUNT to the upper limit on the number of elements - 1.
1701 Start with the maximum vectorization factor. */
1702 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1704 /* Try lowering COUNT to the number of scalar latch iterations. */
1705 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1706 widest_int max_iters
;
1707 if (max_loop_iterations (loop
, &max_iters
)
1708 && max_iters
< count
)
1709 count
= max_iters
.to_shwi ();
1711 /* Try scales of 1 and the element size. */
1712 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1713 wi::overflow_type overflow
= wi::OVF_NONE
;
1714 for (int i
= 0; i
< 2; ++i
)
1716 int scale
= scales
[i
];
1718 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1721 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1722 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1725 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1726 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1728 /* Find the narrowest viable offset type. */
1729 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1730 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1733 /* See whether the target supports the operation with an offset
1734 no narrower than OFFSET_TYPE. */
1735 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1736 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1737 vectype
, memory_type
, offset_type
, scale
,
1738 &gs_info
->ifn
, &gs_info
->offset_vectype
)
1739 || gs_info
->ifn
== IFN_LAST
)
1742 gs_info
->decl
= NULL_TREE
;
1743 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1744 but we don't need to store that here. */
1745 gs_info
->base
= NULL_TREE
;
1746 gs_info
->element_type
= TREE_TYPE (vectype
);
1747 gs_info
->offset
= fold_convert (offset_type
, step
);
1748 gs_info
->offset_dt
= vect_constant_def
;
1749 gs_info
->scale
= scale
;
1750 gs_info
->memory_type
= memory_type
;
1754 if (overflow
&& dump_enabled_p ())
1755 dump_printf_loc (MSG_NOTE
, vect_location
,
1756 "truncating gather/scatter offset to %d bits"
1757 " might change its value.\n", element_bits
);
1762 /* Return true if we can use gather/scatter internal functions to
1763 vectorize STMT_INFO, which is a grouped or strided load or store.
1764 MASKED_P is true if load or store is conditional. When returning
1765 true, fill in GS_INFO with the information required to perform the
1769 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1770 loop_vec_info loop_vinfo
, bool masked_p
,
1771 gather_scatter_info
*gs_info
)
1773 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1774 || gs_info
->ifn
== IFN_LAST
)
1775 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1778 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1779 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1781 gcc_assert (TYPE_PRECISION (new_offset_type
)
1782 >= TYPE_PRECISION (old_offset_type
));
1783 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1785 if (dump_enabled_p ())
1786 dump_printf_loc (MSG_NOTE
, vect_location
,
1787 "using gather/scatter for strided/grouped access,"
1788 " scale = %d\n", gs_info
->scale
);
1793 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1794 elements with a known constant step. Return -1 if that step
1795 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1798 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1800 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1801 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1805 /* If the target supports a permute mask that reverses the elements in
1806 a vector of type VECTYPE, return that mask, otherwise return null. */
1809 perm_mask_for_reverse (tree vectype
)
1811 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1813 /* The encoding has a single stepped pattern. */
1814 vec_perm_builder
sel (nunits
, 1, 3);
1815 for (int i
= 0; i
< 3; ++i
)
1816 sel
.quick_push (nunits
- 1 - i
);
1818 vec_perm_indices
indices (sel
, 1, nunits
);
1819 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), TYPE_MODE (vectype
),
1822 return vect_gen_perm_mask_checked (vectype
, indices
);
1825 /* A subroutine of get_load_store_type, with a subset of the same
1826 arguments. Handle the case where STMT_INFO is a load or store that
1827 accesses consecutive elements with a negative step. Sets *POFFSET
1828 to the offset to be applied to the DR for the first access. */
1830 static vect_memory_access_type
1831 get_negative_load_store_type (vec_info
*vinfo
,
1832 stmt_vec_info stmt_info
, tree vectype
,
1833 vec_load_store_type vls_type
,
1834 unsigned int ncopies
, poly_int64
*poffset
)
1836 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1837 dr_alignment_support alignment_support_scheme
;
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "multiple types with negative step.\n");
1844 return VMAT_ELEMENTWISE
;
1847 /* For backward running DRs the first access in vectype actually is
1848 N-1 elements before the address of the DR. */
1849 *poffset
= ((-TYPE_VECTOR_SUBPARTS (vectype
) + 1)
1850 * TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (vectype
))));
1852 int misalignment
= dr_misalignment (dr_info
, vectype
, *poffset
);
1853 alignment_support_scheme
1854 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
, misalignment
);
1855 if (alignment_support_scheme
!= dr_aligned
1856 && alignment_support_scheme
!= dr_unaligned_supported
)
1858 if (dump_enabled_p ())
1859 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1860 "negative step but alignment required.\n");
1862 return VMAT_ELEMENTWISE
;
1865 if (vls_type
== VLS_STORE_INVARIANT
)
1867 if (dump_enabled_p ())
1868 dump_printf_loc (MSG_NOTE
, vect_location
,
1869 "negative step with invariant source;"
1870 " no permute needed.\n");
1871 return VMAT_CONTIGUOUS_DOWN
;
1874 if (!perm_mask_for_reverse (vectype
))
1876 if (dump_enabled_p ())
1877 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1878 "negative step and reversing not supported.\n");
1880 return VMAT_ELEMENTWISE
;
1883 return VMAT_CONTIGUOUS_REVERSE
;
1886 /* STMT_INFO is either a masked or unconditional store. Return the value
1890 vect_get_store_rhs (stmt_vec_info stmt_info
)
1892 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
1894 gcc_assert (gimple_assign_single_p (assign
));
1895 return gimple_assign_rhs1 (assign
);
1897 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
1899 internal_fn ifn
= gimple_call_internal_fn (call
);
1900 int index
= internal_fn_stored_value_index (ifn
);
1901 gcc_assert (index
>= 0);
1902 return gimple_call_arg (call
, index
);
1907 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
1909 This function returns a vector type which can be composed with NETLS pieces,
1910 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
1911 same vector size as the return vector. It checks target whether supports
1912 pieces-size vector mode for construction firstly, if target fails to, check
1913 pieces-size scalar mode for construction further. It returns NULL_TREE if
1914 fails to find the available composition.
1916 For example, for (vtype=V16QI, nelts=4), we can probably get:
1917 - V16QI with PTYPE V4QI.
1918 - V4SI with PTYPE SI.
1922 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
1924 gcc_assert (VECTOR_TYPE_P (vtype
));
1925 gcc_assert (known_gt (nelts
, 0U));
1927 machine_mode vmode
= TYPE_MODE (vtype
);
1928 if (!VECTOR_MODE_P (vmode
))
1931 /* When we are asked to compose the vector from its components let
1932 that happen directly. */
1933 if (known_eq (TYPE_VECTOR_SUBPARTS (vtype
), nelts
))
1935 *ptype
= TREE_TYPE (vtype
);
1939 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
1940 unsigned int pbsize
;
1941 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
1943 /* First check if vec_init optab supports construction from
1944 vector pieces directly. */
1945 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
1946 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
1948 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
1949 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
1950 != CODE_FOR_nothing
))
1952 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
1956 /* Otherwise check if exists an integer type of the same piece size and
1957 if vec_init optab supports construction from it directly. */
1958 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
1959 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
1960 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
1961 != CODE_FOR_nothing
))
1963 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
1964 return build_vector_type (*ptype
, nelts
);
1971 /* A subroutine of get_load_store_type, with a subset of the same
1972 arguments. Handle the case where STMT_INFO is part of a grouped load
1975 For stores, the statements in the group are all consecutive
1976 and there is no gap at the end. For loads, the statements in the
1977 group might not be consecutive; there can be gaps between statements
1978 as well as at the end. */
1981 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
1982 tree vectype
, slp_tree slp_node
,
1983 bool masked_p
, vec_load_store_type vls_type
,
1984 vect_memory_access_type
*memory_access_type
,
1985 poly_int64
*poffset
,
1986 dr_alignment_support
*alignment_support_scheme
,
1988 gather_scatter_info
*gs_info
,
1989 internal_fn
*lanes_ifn
)
1991 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1992 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
1993 stmt_vec_info first_stmt_info
;
1994 unsigned int group_size
;
1995 unsigned HOST_WIDE_INT gap
;
1996 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1998 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1999 group_size
= DR_GROUP_SIZE (first_stmt_info
);
2000 gap
= DR_GROUP_GAP (first_stmt_info
);
2004 first_stmt_info
= stmt_info
;
2008 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2009 bool single_element_p
= (stmt_info
== first_stmt_info
2010 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2011 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2013 /* True if the vectorized statements would access beyond the last
2014 statement in the group. */
2015 bool overrun_p
= false;
2017 /* True if we can cope with such overrun by peeling for gaps, so that
2018 there is at least one final scalar iteration after the vector loop. */
2019 bool can_overrun_p
= (!masked_p
2020 && vls_type
== VLS_LOAD
2024 /* There can only be a gap at the end of the group if the stride is
2025 known at compile time. */
2026 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2028 /* Stores can't yet have gaps. */
2029 gcc_assert (slp_node
|| vls_type
== VLS_LOAD
|| gap
== 0);
2033 /* For SLP vectorization we directly vectorize a subchain
2034 without permutation. */
2035 if (! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
2037 = STMT_VINFO_DR_INFO (SLP_TREE_SCALAR_STMTS (slp_node
)[0]);
2038 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2040 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2041 separated by the stride, until we have a complete vector.
2042 Fall back to scalar accesses if that isn't possible. */
2043 if (multiple_p (nunits
, group_size
))
2044 *memory_access_type
= VMAT_STRIDED_SLP
;
2046 *memory_access_type
= VMAT_ELEMENTWISE
;
2050 overrun_p
= loop_vinfo
&& gap
!= 0;
2051 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2053 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2054 "Grouped store with gaps requires"
2055 " non-consecutive accesses\n");
2058 /* An overrun is fine if the trailing elements are smaller
2059 than the alignment boundary B. Every vector access will
2060 be a multiple of B and so we are guaranteed to access a
2061 non-gap element in the same B-sized block. */
2063 && gap
< (vect_known_alignment_in_bytes (first_dr_info
,
2065 / vect_get_scalar_dr_size (first_dr_info
)))
2068 /* If the gap splits the vector in half and the target
2069 can do half-vector operations avoid the epilogue peeling
2070 by simply loading half of the vector only. Usually
2071 the construction with an upper zero half will be elided. */
2072 dr_alignment_support alss
;
2073 int misalign
= dr_misalignment (first_dr_info
, vectype
);
2077 && (((alss
= vect_supportable_dr_alignment (vinfo
, first_dr_info
,
2078 vectype
, misalign
)))
2080 || alss
== dr_unaligned_supported
)
2081 && known_eq (nunits
, (group_size
- gap
) * 2)
2082 && known_eq (nunits
, group_size
)
2083 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2087 if (overrun_p
&& !can_overrun_p
)
2089 if (dump_enabled_p ())
2090 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2091 "Peeling for outer loop is not supported\n");
2094 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2097 if (single_element_p
)
2098 /* ??? The VMAT_CONTIGUOUS_REVERSE code generation is
2099 only correct for single element "interleaving" SLP. */
2100 *memory_access_type
= get_negative_load_store_type
2101 (vinfo
, stmt_info
, vectype
, vls_type
, 1, poffset
);
2104 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2105 separated by the stride, until we have a complete vector.
2106 Fall back to scalar accesses if that isn't possible. */
2107 if (multiple_p (nunits
, group_size
))
2108 *memory_access_type
= VMAT_STRIDED_SLP
;
2110 *memory_access_type
= VMAT_ELEMENTWISE
;
2113 else if (cmp
== 0 && loop_vinfo
)
2115 gcc_assert (vls_type
== VLS_LOAD
);
2116 *memory_access_type
= VMAT_INVARIANT
;
2117 /* Invariant accesses perform only component accesses, alignment
2118 is irrelevant for them. */
2119 *alignment_support_scheme
= dr_unaligned_supported
;
2122 *memory_access_type
= VMAT_CONTIGUOUS
;
2124 /* When we have a contiguous access across loop iterations
2125 but the access in the loop doesn't cover the full vector
2126 we can end up with no gap recorded but still excess
2127 elements accessed, see PR103116. Make sure we peel for
2128 gaps if necessary and sufficient and give up if not.
2130 If there is a combination of the access not covering the full
2131 vector and a gap recorded then we may need to peel twice. */
2133 && *memory_access_type
== VMAT_CONTIGUOUS
2134 && SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
2135 && !multiple_p (group_size
* LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
2138 unsigned HOST_WIDE_INT cnunits
, cvf
;
2140 || !nunits
.is_constant (&cnunits
)
2141 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&cvf
)
2142 /* Peeling for gaps assumes that a single scalar iteration
2143 is enough to make sure the last vector iteration doesn't
2144 access excess elements.
2145 ??? Enhancements include peeling multiple iterations
2146 or using masked loads with a static mask. */
2147 || (group_size
* cvf
) % cnunits
+ group_size
- gap
< cnunits
)
2149 if (dump_enabled_p ())
2150 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2151 "peeling for gaps insufficient for "
2161 /* We can always handle this case using elementwise accesses,
2162 but see if something more efficient is available. */
2163 *memory_access_type
= VMAT_ELEMENTWISE
;
2165 /* If there is a gap at the end of the group then these optimizations
2166 would access excess elements in the last iteration. */
2167 bool would_overrun_p
= (gap
!= 0);
2168 /* An overrun is fine if the trailing elements are smaller than the
2169 alignment boundary B. Every vector access will be a multiple of B
2170 and so we are guaranteed to access a non-gap element in the
2171 same B-sized block. */
2174 && gap
< (vect_known_alignment_in_bytes (first_dr_info
, vectype
)
2175 / vect_get_scalar_dr_size (first_dr_info
)))
2176 would_overrun_p
= false;
2178 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2179 && (can_overrun_p
|| !would_overrun_p
)
2180 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2182 /* First cope with the degenerate case of a single-element
2184 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2189 /* Otherwise try using LOAD/STORE_LANES. */
2191 = vls_type
== VLS_LOAD
2192 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2193 : vect_store_lanes_supported (vectype
, group_size
,
2195 if (*lanes_ifn
!= IFN_LAST
)
2197 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2198 overrun_p
= would_overrun_p
;
2201 /* If that fails, try using permuting loads. */
2202 else if (vls_type
== VLS_LOAD
2203 ? vect_grouped_load_supported (vectype
,
2206 : vect_grouped_store_supported (vectype
, group_size
))
2208 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2209 overrun_p
= would_overrun_p
;
2214 /* As a last resort, trying using a gather load or scatter store.
2216 ??? Although the code can handle all group sizes correctly,
2217 it probably isn't a win to use separate strided accesses based
2218 on nearby locations. Or, even if it's a win over scalar code,
2219 it might not be a win over vectorizing at a lower VF, if that
2220 allows us to use contiguous accesses. */
2221 if (*memory_access_type
== VMAT_ELEMENTWISE
2224 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2226 *memory_access_type
= VMAT_GATHER_SCATTER
;
2229 if (*memory_access_type
== VMAT_GATHER_SCATTER
2230 || *memory_access_type
== VMAT_ELEMENTWISE
)
2232 *alignment_support_scheme
= dr_unaligned_supported
;
2233 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2237 *misalignment
= dr_misalignment (first_dr_info
, vectype
, *poffset
);
2238 *alignment_support_scheme
2239 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, vectype
,
2243 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2245 /* STMT is the leader of the group. Check the operands of all the
2246 stmts of the group. */
2247 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2248 while (next_stmt_info
)
2250 tree op
= vect_get_store_rhs (next_stmt_info
);
2251 enum vect_def_type dt
;
2252 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2254 if (dump_enabled_p ())
2255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2256 "use not simple.\n");
2259 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2265 gcc_assert (can_overrun_p
);
2266 if (dump_enabled_p ())
2267 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2268 "Data access with gaps requires scalar "
2270 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2276 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2277 if there is a memory access type that the vectorized form can use,
2278 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2279 or scatters, fill in GS_INFO accordingly. In addition
2280 *ALIGNMENT_SUPPORT_SCHEME is filled out and false is returned if
2281 the target does not support the alignment scheme. *MISALIGNMENT
2282 is set according to the alignment of the access (including
2283 DR_MISALIGNMENT_UNKNOWN when it is unknown).
2285 SLP says whether we're performing SLP rather than loop vectorization.
2286 MASKED_P is true if the statement is conditional on a vectorized mask.
2287 VECTYPE is the vector type that the vectorized statements will use.
2288 NCOPIES is the number of vector statements that will be needed. */
2291 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2292 tree vectype
, slp_tree slp_node
,
2293 bool masked_p
, vec_load_store_type vls_type
,
2294 unsigned int ncopies
,
2295 vect_memory_access_type
*memory_access_type
,
2296 poly_int64
*poffset
,
2297 dr_alignment_support
*alignment_support_scheme
,
2299 gather_scatter_info
*gs_info
,
2300 internal_fn
*lanes_ifn
)
2302 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2303 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2304 *misalignment
= DR_MISALIGNMENT_UNKNOWN
;
2306 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2308 *memory_access_type
= VMAT_GATHER_SCATTER
;
2309 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2311 /* When using internal functions, we rely on pattern recognition
2312 to convert the type of the offset to the type that the target
2313 requires, with the result being a call to an internal function.
2314 If that failed for some reason (e.g. because another pattern
2315 took priority), just handle cases in which the offset already
2316 has the right type. */
2317 else if (gs_info
->ifn
!= IFN_LAST
2318 && !is_gimple_call (stmt_info
->stmt
)
2319 && !tree_nop_conversion_p (TREE_TYPE (gs_info
->offset
),
2320 TREE_TYPE (gs_info
->offset_vectype
)))
2322 if (dump_enabled_p ())
2323 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2324 "%s offset requires a conversion\n",
2325 vls_type
== VLS_LOAD
? "gather" : "scatter");
2328 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2329 &gs_info
->offset_dt
,
2330 &gs_info
->offset_vectype
))
2332 if (dump_enabled_p ())
2333 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2334 "%s index use not simple.\n",
2335 vls_type
== VLS_LOAD
? "gather" : "scatter");
2338 else if (gs_info
->ifn
== IFN_LAST
&& !gs_info
->decl
)
2340 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant ()
2341 || !TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
).is_constant ()
2342 || !constant_multiple_p (TYPE_VECTOR_SUBPARTS
2343 (gs_info
->offset_vectype
),
2344 TYPE_VECTOR_SUBPARTS (vectype
)))
2346 if (dump_enabled_p ())
2347 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2348 "unsupported vector types for emulated "
2353 /* Gather-scatter accesses perform only component accesses, alignment
2354 is irrelevant for them. */
2355 *alignment_support_scheme
= dr_unaligned_supported
;
2357 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) || slp_node
)
2359 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
,
2361 vls_type
, memory_access_type
, poffset
,
2362 alignment_support_scheme
,
2363 misalignment
, gs_info
, lanes_ifn
))
2366 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2368 gcc_assert (!slp_node
);
2370 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2372 *memory_access_type
= VMAT_GATHER_SCATTER
;
2374 *memory_access_type
= VMAT_ELEMENTWISE
;
2375 /* Alignment is irrelevant here. */
2376 *alignment_support_scheme
= dr_unaligned_supported
;
2380 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2383 gcc_assert (vls_type
== VLS_LOAD
);
2384 *memory_access_type
= VMAT_INVARIANT
;
2385 /* Invariant accesses perform only component accesses, alignment
2386 is irrelevant for them. */
2387 *alignment_support_scheme
= dr_unaligned_supported
;
2392 *memory_access_type
= get_negative_load_store_type
2393 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
, poffset
);
2395 *memory_access_type
= VMAT_CONTIGUOUS
;
2396 *misalignment
= dr_misalignment (STMT_VINFO_DR_INFO (stmt_info
),
2398 *alignment_support_scheme
2399 = vect_supportable_dr_alignment (vinfo
,
2400 STMT_VINFO_DR_INFO (stmt_info
),
2401 vectype
, *misalignment
);
2405 if ((*memory_access_type
== VMAT_ELEMENTWISE
2406 || *memory_access_type
== VMAT_STRIDED_SLP
)
2407 && !nunits
.is_constant ())
2409 if (dump_enabled_p ())
2410 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2411 "Not using elementwise accesses due to variable "
2412 "vectorization factor.\n");
2416 if (*alignment_support_scheme
== dr_unaligned_unsupported
)
2418 if (dump_enabled_p ())
2419 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2420 "unsupported unaligned access\n");
2424 /* FIXME: At the moment the cost model seems to underestimate the
2425 cost of using elementwise accesses. This check preserves the
2426 traditional behavior until that can be fixed. */
2427 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2428 if (!first_stmt_info
)
2429 first_stmt_info
= stmt_info
;
2430 if (*memory_access_type
== VMAT_ELEMENTWISE
2431 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2432 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2433 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2434 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2436 if (dump_enabled_p ())
2437 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2438 "not falling back to elementwise accesses\n");
2444 /* Return true if boolean argument at MASK_INDEX is suitable for vectorizing
2445 conditional operation STMT_INFO. When returning true, store the mask
2446 in *MASK, the type of its definition in *MASK_DT_OUT, the type of the
2447 vectorized mask in *MASK_VECTYPE_OUT and the SLP node corresponding
2448 to the mask in *MASK_NODE if MASK_NODE is not NULL. */
2451 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2452 slp_tree slp_node
, unsigned mask_index
,
2453 tree
*mask
, slp_tree
*mask_node
,
2454 vect_def_type
*mask_dt_out
, tree
*mask_vectype_out
)
2456 enum vect_def_type mask_dt
;
2458 slp_tree mask_node_1
;
2459 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, mask_index
,
2460 mask
, &mask_node_1
, &mask_dt
, &mask_vectype
))
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2464 "mask use not simple.\n");
2468 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (*mask
)))
2470 if (dump_enabled_p ())
2471 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2472 "mask argument is not a boolean.\n");
2476 /* If the caller is not prepared for adjusting an external/constant
2477 SLP mask vector type fail. */
2480 && SLP_TREE_DEF_TYPE (mask_node_1
) != vect_internal_def
)
2482 if (dump_enabled_p ())
2483 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2484 "SLP mask argument is not vectorized.\n");
2488 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2490 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
),
2493 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2495 if (dump_enabled_p ())
2496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2497 "could not find an appropriate vector mask type.\n");
2501 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2502 TYPE_VECTOR_SUBPARTS (vectype
)))
2504 if (dump_enabled_p ())
2505 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2506 "vector mask type %T"
2507 " does not match vector data type %T.\n",
2508 mask_vectype
, vectype
);
2513 *mask_dt_out
= mask_dt
;
2514 *mask_vectype_out
= mask_vectype
;
2516 *mask_node
= mask_node_1
;
2520 /* Return true if stored value is suitable for vectorizing store
2521 statement STMT_INFO. When returning true, store the scalar stored
2522 in *RHS and *RHS_NODE, the type of the definition in *RHS_DT_OUT,
2523 the type of the vectorized store value in
2524 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2527 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2528 slp_tree slp_node
, tree
*rhs
, slp_tree
*rhs_node
,
2529 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2530 vec_load_store_type
*vls_type_out
)
2533 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2535 if (gimple_call_internal_p (call
)
2536 && internal_store_fn_p (gimple_call_internal_fn (call
)))
2537 op_no
= internal_fn_stored_value_index (gimple_call_internal_fn (call
));
2540 op_no
= vect_slp_child_index_for_operand
2541 (stmt_info
->stmt
, op_no
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
2543 enum vect_def_type rhs_dt
;
2545 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, op_no
,
2546 rhs
, rhs_node
, &rhs_dt
, &rhs_vectype
))
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2550 "use not simple.\n");
2554 /* In the case this is a store from a constant make sure
2555 native_encode_expr can handle it. */
2556 if (rhs_dt
== vect_constant_def
2557 && CONSTANT_CLASS_P (*rhs
) && native_encode_expr (*rhs
, NULL
, 64) == 0)
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2561 "cannot encode constant as a byte sequence.\n");
2565 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2566 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2568 if (dump_enabled_p ())
2569 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2570 "incompatible vector types.\n");
2574 *rhs_dt_out
= rhs_dt
;
2575 *rhs_vectype_out
= rhs_vectype
;
2576 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2577 *vls_type_out
= VLS_STORE_INVARIANT
;
2579 *vls_type_out
= VLS_STORE
;
2583 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2584 Note that we support masks with floating-point type, in which case the
2585 floats are interpreted as a bitmask. */
2588 vect_build_all_ones_mask (vec_info
*vinfo
,
2589 stmt_vec_info stmt_info
, tree masktype
)
2591 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2592 return build_int_cst (masktype
, -1);
2593 else if (VECTOR_BOOLEAN_TYPE_P (masktype
)
2594 || TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2596 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2597 mask
= build_vector_from_val (masktype
, mask
);
2598 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2600 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2604 for (int j
= 0; j
< 6; ++j
)
2606 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2607 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2608 mask
= build_vector_from_val (masktype
, mask
);
2609 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2614 /* Build an all-zero merge value of type VECTYPE while vectorizing
2615 STMT_INFO as a gather load. */
2618 vect_build_zero_merge_argument (vec_info
*vinfo
,
2619 stmt_vec_info stmt_info
, tree vectype
)
2622 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2623 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2624 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2628 for (int j
= 0; j
< 6; ++j
)
2630 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2631 merge
= build_real (TREE_TYPE (vectype
), r
);
2635 merge
= build_vector_from_val (vectype
, merge
);
2636 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2639 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2640 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2641 the gather load operation. If the load is conditional, MASK is the
2642 vectorized condition, otherwise MASK is null. PTR is the base
2643 pointer and OFFSET is the vectorized offset. */
2646 vect_build_one_gather_load_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2647 gimple_stmt_iterator
*gsi
,
2648 gather_scatter_info
*gs_info
,
2649 tree ptr
, tree offset
, tree mask
)
2651 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2652 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2653 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2654 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2655 /* ptrtype */ arglist
= TREE_CHAIN (arglist
);
2656 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2657 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2658 tree scaletype
= TREE_VALUE (arglist
);
2660 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2662 || TREE_CODE (masktype
) == INTEGER_TYPE
2663 || types_compatible_p (srctype
, masktype
)));
2666 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2668 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2669 TYPE_VECTOR_SUBPARTS (idxtype
)));
2670 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2671 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2672 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2673 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2677 tree src_op
= NULL_TREE
;
2678 tree mask_op
= NULL_TREE
;
2681 if (!useless_type_conversion_p (masktype
, TREE_TYPE (mask
)))
2683 tree utype
, optype
= TREE_TYPE (mask
);
2684 if (VECTOR_TYPE_P (masktype
)
2685 || TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2688 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2689 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2690 tree mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask
);
2692 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2693 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2695 if (!useless_type_conversion_p (masktype
, utype
))
2697 gcc_assert (TYPE_PRECISION (utype
)
2698 <= TYPE_PRECISION (masktype
));
2699 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2700 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2701 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2704 src_op
= build_zero_cst (srctype
);
2715 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2716 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2719 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2720 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2723 if (!useless_type_conversion_p (vectype
, rettype
))
2725 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2726 TYPE_VECTOR_SUBPARTS (rettype
)));
2727 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2728 gimple_call_set_lhs (new_stmt
, op
);
2729 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2730 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2731 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
, op
);
2737 /* Build a scatter store call while vectorizing STMT_INFO. Insert new
2738 instructions before GSI. GS_INFO describes the scatter store operation.
2739 PTR is the base pointer, OFFSET the vectorized offsets and OPRND the
2740 vectorized data to store.
2741 If the store is conditional, MASK is the vectorized condition, otherwise
2745 vect_build_one_scatter_store_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2746 gimple_stmt_iterator
*gsi
,
2747 gather_scatter_info
*gs_info
,
2748 tree ptr
, tree offset
, tree oprnd
, tree mask
)
2750 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2751 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2752 /* tree ptrtype = TREE_VALUE (arglist); */ arglist
= TREE_CHAIN (arglist
);
2753 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2754 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2755 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2756 tree scaletype
= TREE_VALUE (arglist
);
2757 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
2758 && TREE_CODE (rettype
) == VOID_TYPE
);
2760 tree mask_arg
= NULL_TREE
;
2764 tree optype
= TREE_TYPE (mask_arg
);
2766 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
2769 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2770 tree var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2771 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
2773 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2774 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2776 if (!useless_type_conversion_p (masktype
, utype
))
2778 gcc_assert (TYPE_PRECISION (utype
) <= TYPE_PRECISION (masktype
));
2779 tree var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
2780 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2781 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2787 mask_arg
= build_int_cst (masktype
, -1);
2788 mask_arg
= vect_init_vector (vinfo
, stmt_info
, mask_arg
, masktype
, NULL
);
2792 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
2794 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
2795 TYPE_VECTOR_SUBPARTS (srctype
)));
2796 tree var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
2797 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
2798 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
2799 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2804 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2806 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2807 TYPE_VECTOR_SUBPARTS (idxtype
)));
2808 tree var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2809 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2810 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2811 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2815 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2817 = gimple_build_call (gs_info
->decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
2821 /* Prepare the base and offset in GS_INFO for vectorization.
2822 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2823 to the vectorized offset argument for the first copy of STMT_INFO.
2824 STMT_INFO is the statement described by GS_INFO and LOOP is the
2828 vect_get_gather_scatter_ops (loop_vec_info loop_vinfo
,
2829 class loop
*loop
, stmt_vec_info stmt_info
,
2830 slp_tree slp_node
, gather_scatter_info
*gs_info
,
2831 tree
*dataref_ptr
, vec
<tree
> *vec_offset
)
2833 gimple_seq stmts
= NULL
;
2834 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2838 edge pe
= loop_preheader_edge (loop
);
2839 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2840 gcc_assert (!new_bb
);
2843 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_offset
);
2847 = vect_get_num_copies (loop_vinfo
, gs_info
->offset_vectype
);
2848 vect_get_vec_defs_for_operand (loop_vinfo
, stmt_info
, ncopies
,
2849 gs_info
->offset
, vec_offset
,
2850 gs_info
->offset_vectype
);
2854 /* Prepare to implement a grouped or strided load or store using
2855 the gather load or scatter store operation described by GS_INFO.
2856 STMT_INFO is the load or store statement.
2858 Set *DATAREF_BUMP to the amount that should be added to the base
2859 address after each copy of the vectorized statement. Set *VEC_OFFSET
2860 to an invariant offset vector in which element I has the value
2861 I * DR_STEP / SCALE. */
2864 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2865 loop_vec_info loop_vinfo
,
2866 gimple_stmt_iterator
*gsi
,
2867 gather_scatter_info
*gs_info
,
2868 tree
*dataref_bump
, tree
*vec_offset
,
2869 vec_loop_lens
*loop_lens
)
2871 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2872 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2874 if (LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2876 /* _31 = .SELECT_VL (ivtmp_29, POLY_INT_CST [4, 4]);
2877 ivtmp_8 = _31 * 16 (step in bytes);
2878 .MASK_LEN_SCATTER_STORE (vectp_a.9_7, ... );
2879 vectp_a.9_26 = vectp_a.9_7 + ivtmp_8; */
2881 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, vectype
, 0, 0);
2883 = fold_build2 (MULT_EXPR
, sizetype
,
2884 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2886 *dataref_bump
= force_gimple_operand_gsi (gsi
, tmp
, true, NULL_TREE
, true,
2892 = size_binop (MULT_EXPR
,
2893 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2894 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2895 *dataref_bump
= cse_and_gimplify_to_preheader (loop_vinfo
, bump
);
2898 /* The offset given in GS_INFO can have pointer type, so use the element
2899 type of the vector instead. */
2900 tree offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2902 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2903 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2904 ssize_int (gs_info
->scale
));
2905 step
= fold_convert (offset_type
, step
);
2907 /* Create {0, X, X*2, X*3, ...}. */
2908 tree offset
= fold_build2 (VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2909 build_zero_cst (offset_type
), step
);
2910 *vec_offset
= cse_and_gimplify_to_preheader (loop_vinfo
, offset
);
2913 /* Prepare the pointer IVs which needs to be updated by a variable amount.
2914 Such variable amount is the outcome of .SELECT_VL. In this case, we can
2915 allow each iteration process the flexible number of elements as long as
2916 the number <= vf elments.
2918 Return data reference according to SELECT_VL.
2919 If new statements are needed, insert them before GSI. */
2922 vect_get_loop_variant_data_ptr_increment (
2923 vec_info
*vinfo
, tree aggr_type
, gimple_stmt_iterator
*gsi
,
2924 vec_loop_lens
*loop_lens
, dr_vec_info
*dr_info
,
2925 vect_memory_access_type memory_access_type
)
2927 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2928 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2930 /* gather/scatter never reach here. */
2931 gcc_assert (memory_access_type
!= VMAT_GATHER_SCATTER
);
2933 /* When we support SELECT_VL pattern, we dynamic adjust
2934 the memory address by .SELECT_VL result.
2936 The result of .SELECT_VL is the number of elements to
2937 be processed of each iteration. So the memory address
2938 adjustment operation should be:
2940 addr = addr + .SELECT_VL (ARG..) * step;
2943 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
, 1, aggr_type
, 0, 0);
2944 tree len_type
= TREE_TYPE (loop_len
);
2945 /* Since the outcome of .SELECT_VL is element size, we should adjust
2946 it into bytesize so that it can be used in address pointer variable
2947 amount IVs adjustment. */
2948 tree tmp
= fold_build2 (MULT_EXPR
, len_type
, loop_len
,
2949 wide_int_to_tree (len_type
, wi::to_widest (step
)));
2950 tree bump
= make_temp_ssa_name (len_type
, NULL
, "ivtmp");
2951 gassign
*assign
= gimple_build_assign (bump
, tmp
);
2952 gsi_insert_before (gsi
, assign
, GSI_SAME_STMT
);
2956 /* Return the amount that should be added to a vector pointer to move
2957 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2958 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2962 vect_get_data_ptr_increment (vec_info
*vinfo
, gimple_stmt_iterator
*gsi
,
2963 dr_vec_info
*dr_info
, tree aggr_type
,
2964 vect_memory_access_type memory_access_type
,
2965 vec_loop_lens
*loop_lens
= nullptr)
2967 if (memory_access_type
== VMAT_INVARIANT
)
2968 return size_zero_node
;
2970 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2971 if (loop_vinfo
&& LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
))
2972 return vect_get_loop_variant_data_ptr_increment (vinfo
, aggr_type
, gsi
,
2974 memory_access_type
);
2976 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2977 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2978 if (tree_int_cst_sgn (step
) == -1)
2979 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2983 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2986 vectorizable_bswap (vec_info
*vinfo
,
2987 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2988 gimple
**vec_stmt
, slp_tree slp_node
,
2990 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2993 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2994 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2997 op
= gimple_call_arg (stmt
, 0);
2998 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2999 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3001 /* Multiple types in SLP are handled by creating the appropriate number of
3002 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3007 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
3009 gcc_assert (ncopies
>= 1);
3011 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype
))
3013 if (dump_enabled_p ())
3014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3015 "mismatched vector sizes %T and %T\n",
3016 vectype_in
, vectype
);
3020 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
3024 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
3025 unsigned word_bytes
;
3026 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
3029 /* The encoding uses one stepped pattern for each byte in the word. */
3030 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
3031 for (unsigned i
= 0; i
< 3; ++i
)
3032 for (unsigned j
= 0; j
< word_bytes
; ++j
)
3033 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
3035 vec_perm_indices
indices (elts
, 1, num_bytes
);
3036 machine_mode vmode
= TYPE_MODE (char_vectype
);
3037 if (!can_vec_perm_const_p (vmode
, vmode
, indices
))
3043 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
3045 if (dump_enabled_p ())
3046 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3047 "incompatible vector types for invariants\n");
3051 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3052 DUMP_VECT_SCOPE ("vectorizable_bswap");
3053 record_stmt_cost (cost_vec
,
3054 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
3055 record_stmt_cost (cost_vec
,
3057 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
,
3058 vec_perm
, stmt_info
, 0, vect_body
);
3062 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
3065 vec
<tree
> vec_oprnds
= vNULL
;
3066 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
3068 /* Arguments are ready. create the new vector stmt. */
3071 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3074 tree tem
= make_ssa_name (char_vectype
);
3075 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3076 char_vectype
, vop
));
3077 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3078 tree tem2
= make_ssa_name (char_vectype
);
3079 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3080 tem
, tem
, bswap_vconst
);
3081 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3082 tem
= make_ssa_name (vectype
);
3083 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3085 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3087 slp_node
->push_vec_def (new_stmt
);
3089 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3093 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3095 vec_oprnds
.release ();
3099 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3100 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3101 in a single step. On success, store the binary pack code in
3105 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3106 code_helper
*convert_code
)
3108 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3109 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3113 int multi_step_cvt
= 0;
3114 auto_vec
<tree
, 8> interm_types
;
3115 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3116 &code
, &multi_step_cvt
, &interm_types
)
3120 *convert_code
= code
;
3124 /* Function vectorizable_call.
3126 Check if STMT_INFO performs a function call that can be vectorized.
3127 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3128 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3129 Return true if STMT_INFO is vectorizable in this way. */
3132 vectorizable_call (vec_info
*vinfo
,
3133 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3134 gimple
**vec_stmt
, slp_tree slp_node
,
3135 stmt_vector_for_cost
*cost_vec
)
3141 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3142 tree vectype_out
, vectype_in
;
3143 poly_uint64 nunits_in
;
3144 poly_uint64 nunits_out
;
3145 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3146 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3147 tree fndecl
, new_temp
, rhs_type
;
3148 enum vect_def_type dt
[4]
3149 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3150 vect_unknown_def_type
};
3151 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3152 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3153 int ndts
= ARRAY_SIZE (dt
);
3155 auto_vec
<tree
, 8> vargs
;
3156 enum { NARROW
, NONE
, WIDEN
} modifier
;
3159 tree clz_ctz_arg1
= NULL_TREE
;
3161 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3164 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3168 /* Is STMT_INFO a vectorizable call? */
3169 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3173 if (gimple_call_internal_p (stmt
)
3174 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3175 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3176 /* Handled by vectorizable_load and vectorizable_store. */
3179 if (gimple_call_lhs (stmt
) == NULL_TREE
3180 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3183 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3185 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3187 /* Process function arguments. */
3188 rhs_type
= NULL_TREE
;
3189 vectype_in
= NULL_TREE
;
3190 nargs
= gimple_call_num_args (stmt
);
3192 /* Bail out if the function has more than four arguments, we do not have
3193 interesting builtin functions to vectorize with more than two arguments
3194 except for fma. No arguments is also not good. */
3195 if (nargs
== 0 || nargs
> 4)
3198 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3199 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3200 if (cfn
== CFN_GOMP_SIMD_LANE
)
3203 rhs_type
= unsigned_type_node
;
3205 /* Similarly pretend IFN_CLZ and IFN_CTZ only has one argument, the second
3206 argument just says whether it is well-defined at zero or not and what
3207 value should be returned for it. */
3208 if ((cfn
== CFN_CLZ
|| cfn
== CFN_CTZ
) && nargs
== 2)
3211 clz_ctz_arg1
= gimple_call_arg (stmt
, 1);
3215 if (internal_fn_p (cfn
))
3216 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3218 for (i
= 0; i
< nargs
; i
++)
3220 if ((int) i
== mask_opno
)
3222 if (!vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_opno
,
3223 &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3228 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3229 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3231 if (dump_enabled_p ())
3232 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3233 "use not simple.\n");
3237 /* We can only handle calls with arguments of the same type. */
3239 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3241 if (dump_enabled_p ())
3242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3243 "argument types differ.\n");
3247 rhs_type
= TREE_TYPE (op
);
3250 vectype_in
= vectypes
[i
];
3251 else if (vectypes
[i
]
3252 && !types_compatible_p (vectypes
[i
], vectype_in
))
3254 if (dump_enabled_p ())
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3256 "argument vector types differ.\n");
3260 /* If all arguments are external or constant defs, infer the vector type
3261 from the scalar type. */
3263 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3265 gcc_assert (vectype_in
);
3268 if (dump_enabled_p ())
3269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3270 "no vectype for scalar type %T\n", rhs_type
);
3275 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3276 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3278 if (dump_enabled_p ())
3279 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3280 "mixed mask and nonmask vector types\n");
3284 if (vect_emulated_vector_p (vectype_in
) || vect_emulated_vector_p (vectype_out
))
3286 if (dump_enabled_p ())
3287 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3288 "use emulated vector type for call\n");
3293 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3294 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3295 if (known_eq (nunits_in
* 2, nunits_out
))
3297 else if (known_eq (nunits_out
, nunits_in
))
3299 else if (known_eq (nunits_out
* 2, nunits_in
))
3304 /* We only handle functions that do not read or clobber memory. */
3305 if (gimple_vuse (stmt
))
3307 if (dump_enabled_p ())
3308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3309 "function reads from or writes to memory.\n");
3313 /* For now, we only vectorize functions if a target specific builtin
3314 is available. TODO -- in some cases, it might be profitable to
3315 insert the calls for pieces of the vector, in order to be able
3316 to vectorize other operations in the loop. */
3318 internal_fn ifn
= IFN_LAST
;
3319 tree callee
= gimple_call_fndecl (stmt
);
3321 /* First try using an internal function. */
3322 code_helper convert_code
= MAX_TREE_CODES
;
3324 && (modifier
== NONE
3325 || (modifier
== NARROW
3326 && simple_integer_narrowing (vectype_out
, vectype_in
,
3328 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3331 /* If that fails, try asking for a target-specific built-in function. */
3332 if (ifn
== IFN_LAST
)
3334 if (cfn
!= CFN_LAST
)
3335 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3336 (cfn
, vectype_out
, vectype_in
);
3337 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3338 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3339 (callee
, vectype_out
, vectype_in
);
3342 if (ifn
== IFN_LAST
&& !fndecl
)
3344 if (cfn
== CFN_GOMP_SIMD_LANE
3347 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3348 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3349 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3350 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3352 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3353 { 0, 1, 2, ... vf - 1 } vector. */
3354 gcc_assert (nargs
== 0);
3356 else if (modifier
== NONE
3357 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3358 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3359 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3360 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3361 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3362 slp_op
, vectype_in
, cost_vec
);
3365 if (dump_enabled_p ())
3366 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3367 "function is not vectorizable.\n");
3374 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3375 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3377 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3379 /* Sanity check: make sure that at least one copy of the vectorized stmt
3380 needs to be generated. */
3381 gcc_assert (ncopies
>= 1);
3383 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
3384 internal_fn cond_fn
= get_conditional_internal_fn (ifn
);
3385 internal_fn cond_len_fn
= get_len_internal_fn (ifn
);
3386 int len_opno
= internal_fn_len_index (cond_len_fn
);
3387 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3388 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
3389 if (!vec_stmt
) /* transformation not required. */
3392 for (i
= 0; i
< nargs
; ++i
)
3393 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
],
3395 ? vectypes
[i
] : vectype_in
))
3397 if (dump_enabled_p ())
3398 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3399 "incompatible vector types for invariants\n");
3402 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3403 DUMP_VECT_SCOPE ("vectorizable_call");
3404 vect_model_simple_cost (vinfo
, stmt_info
,
3405 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3406 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3407 record_stmt_cost (cost_vec
, ncopies
/ 2,
3408 vec_promote_demote
, stmt_info
, 0, vect_body
);
3411 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
3412 && (reduc_idx
>= 0 || mask_opno
>= 0))
3415 && (cond_fn
== IFN_LAST
3416 || !direct_internal_fn_supported_p (cond_fn
, vectype_out
,
3417 OPTIMIZE_FOR_SPEED
))
3418 && (cond_len_fn
== IFN_LAST
3419 || !direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3420 OPTIMIZE_FOR_SPEED
)))
3422 if (dump_enabled_p ())
3423 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3424 "can't use a fully-masked loop because no"
3425 " conditional operation is available.\n");
3426 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
3430 unsigned int nvectors
3432 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3434 tree scalar_mask
= NULL_TREE
;
3436 scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3437 if (cond_len_fn
!= IFN_LAST
3438 && direct_internal_fn_supported_p (cond_len_fn
, vectype_out
,
3439 OPTIMIZE_FOR_SPEED
))
3440 vect_record_loop_len (loop_vinfo
, lens
, nvectors
, vectype_out
,
3443 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
,
3452 if (dump_enabled_p ())
3453 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3456 scalar_dest
= gimple_call_lhs (stmt
);
3457 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3459 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3460 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
3461 unsigned int vect_nargs
= nargs
;
3467 /* COND_* -> COND_LEN_* takes 2 extra arguments:LEN,BIAS. */
3470 else if (reduc_idx
>= 0)
3473 else if (masked_loop_p
&& reduc_idx
>= 0)
3481 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3483 tree prev_res
= NULL_TREE
;
3484 vargs
.safe_grow (vect_nargs
, true);
3485 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3486 for (j
= 0; j
< ncopies
; ++j
)
3488 /* Build argument list for the vectorized call. */
3491 vec
<tree
> vec_oprnds0
;
3493 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3494 vec_oprnds0
= vec_defs
[0];
3496 /* Arguments are ready. Create the new vector stmt. */
3497 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3500 if (masked_loop_p
&& reduc_idx
>= 0)
3502 unsigned int vec_num
= vec_oprnds0
.length ();
3503 /* Always true for SLP. */
3504 gcc_assert (ncopies
== 1);
3505 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
,
3506 gsi
, masks
, vec_num
,
3510 for (k
= 0; k
< nargs
; k
++)
3512 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3513 vargs
[varg
++] = vec_oprndsk
[i
];
3515 if (masked_loop_p
&& reduc_idx
>= 0)
3516 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3518 vargs
[varg
++] = clz_ctz_arg1
;
3521 if (modifier
== NARROW
)
3523 /* We don't define any narrowing conditional functions
3525 gcc_assert (mask_opno
< 0);
3526 tree half_res
= make_ssa_name (vectype_in
);
3528 = gimple_build_call_internal_vec (ifn
, vargs
);
3529 gimple_call_set_lhs (call
, half_res
);
3530 gimple_call_set_nothrow (call
, true);
3531 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3534 prev_res
= half_res
;
3537 new_temp
= make_ssa_name (vec_dest
);
3538 new_stmt
= vect_gimple_build (new_temp
, convert_code
,
3539 prev_res
, half_res
);
3540 vect_finish_stmt_generation (vinfo
, stmt_info
,
3545 if (len_opno
>= 0 && len_loop_p
)
3547 unsigned int vec_num
= vec_oprnds0
.length ();
3548 /* Always true for SLP. */
3549 gcc_assert (ncopies
== 1);
3551 = vect_get_loop_len (loop_vinfo
, gsi
, lens
, vec_num
,
3554 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3555 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3556 vargs
[len_opno
] = len
;
3557 vargs
[len_opno
+ 1] = bias
;
3559 else if (mask_opno
>= 0 && masked_loop_p
)
3561 unsigned int vec_num
= vec_oprnds0
.length ();
3562 /* Always true for SLP. */
3563 gcc_assert (ncopies
== 1);
3564 tree mask
= vect_get_loop_mask (loop_vinfo
,
3565 gsi
, masks
, vec_num
,
3567 vargs
[mask_opno
] = prepare_vec_mask
3568 (loop_vinfo
, TREE_TYPE (mask
), mask
,
3569 vargs
[mask_opno
], gsi
);
3573 if (ifn
!= IFN_LAST
)
3574 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3576 call
= gimple_build_call_vec (fndecl
, vargs
);
3577 new_temp
= make_ssa_name (vec_dest
, call
);
3578 gimple_call_set_lhs (call
, new_temp
);
3579 gimple_call_set_nothrow (call
, true);
3580 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3583 slp_node
->push_vec_def (new_stmt
);
3589 if (masked_loop_p
&& reduc_idx
>= 0)
3590 vargs
[varg
++] = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3592 for (i
= 0; i
< nargs
; i
++)
3594 op
= gimple_call_arg (stmt
, i
);
3597 vec_defs
.quick_push (vNULL
);
3598 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3602 vargs
[varg
++] = vec_defs
[i
][j
];
3604 if (masked_loop_p
&& reduc_idx
>= 0)
3605 vargs
[varg
++] = vargs
[reduc_idx
+ 1];
3607 vargs
[varg
++] = clz_ctz_arg1
;
3609 if (len_opno
>= 0 && len_loop_p
)
3611 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
, ncopies
,
3614 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
3615 tree bias
= build_int_cst (intQI_type_node
, biasval
);
3616 vargs
[len_opno
] = len
;
3617 vargs
[len_opno
+ 1] = bias
;
3619 else if (mask_opno
>= 0 && masked_loop_p
)
3621 tree mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
,
3624 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
3625 vargs
[mask_opno
], gsi
);
3629 if (cfn
== CFN_GOMP_SIMD_LANE
)
3631 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3633 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3634 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3635 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3636 new_temp
= make_ssa_name (vec_dest
);
3637 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3638 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3640 else if (modifier
== NARROW
)
3642 /* We don't define any narrowing conditional functions at
3644 gcc_assert (mask_opno
< 0);
3645 tree half_res
= make_ssa_name (vectype_in
);
3646 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3647 gimple_call_set_lhs (call
, half_res
);
3648 gimple_call_set_nothrow (call
, true);
3649 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3652 prev_res
= half_res
;
3655 new_temp
= make_ssa_name (vec_dest
);
3656 new_stmt
= vect_gimple_build (new_temp
, convert_code
, prev_res
,
3658 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3663 if (ifn
!= IFN_LAST
)
3664 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3666 call
= gimple_build_call_vec (fndecl
, vargs
);
3667 new_temp
= make_ssa_name (vec_dest
, call
);
3668 gimple_call_set_lhs (call
, new_temp
);
3669 gimple_call_set_nothrow (call
, true);
3670 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3674 if (j
== (modifier
== NARROW
? 1 : 0))
3675 *vec_stmt
= new_stmt
;
3676 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3678 for (i
= 0; i
< nargs
; i
++)
3680 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3681 vec_oprndsi
.release ();
3684 else if (modifier
== NARROW
)
3686 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3687 /* We don't define any narrowing conditional functions at present. */
3688 gcc_assert (mask_opno
< 0);
3689 for (j
= 0; j
< ncopies
; ++j
)
3691 /* Build argument list for the vectorized call. */
3693 vargs
.create (nargs
* 2);
3699 vec
<tree
> vec_oprnds0
;
3701 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3702 vec_oprnds0
= vec_defs
[0];
3704 /* Arguments are ready. Create the new vector stmt. */
3705 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3709 for (k
= 0; k
< nargs
; k
++)
3711 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3712 vargs
.quick_push (vec_oprndsk
[i
]);
3713 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3716 if (ifn
!= IFN_LAST
)
3717 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3719 call
= gimple_build_call_vec (fndecl
, vargs
);
3720 new_temp
= make_ssa_name (vec_dest
, call
);
3721 gimple_call_set_lhs (call
, new_temp
);
3722 gimple_call_set_nothrow (call
, true);
3723 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3724 slp_node
->push_vec_def (call
);
3729 for (i
= 0; i
< nargs
; i
++)
3731 op
= gimple_call_arg (stmt
, i
);
3734 vec_defs
.quick_push (vNULL
);
3735 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3736 op
, &vec_defs
[i
], vectypes
[i
]);
3738 vec_oprnd0
= vec_defs
[i
][2*j
];
3739 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3741 vargs
.quick_push (vec_oprnd0
);
3742 vargs
.quick_push (vec_oprnd1
);
3745 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3746 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3747 gimple_call_set_lhs (new_stmt
, new_temp
);
3748 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3750 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3754 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3756 for (i
= 0; i
< nargs
; i
++)
3758 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3759 vec_oprndsi
.release ();
3763 /* No current target implements this case. */
3768 /* The call in STMT might prevent it from being removed in dce.
3769 We however cannot remove it here, due to the way the ssa name
3770 it defines is mapped to the new definition. So just replace
3771 rhs of the statement with something harmless. */
3776 stmt_info
= vect_orig_stmt (stmt_info
);
3777 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3780 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3781 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3787 struct simd_call_arg_info
3791 HOST_WIDE_INT linear_step
;
3792 enum vect_def_type dt
;
3794 bool simd_lane_linear
;
3797 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3798 is linear within simd lane (but not within whole loop), note it in
3802 vect_simd_lane_linear (tree op
, class loop
*loop
,
3803 struct simd_call_arg_info
*arginfo
)
3805 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3807 if (!is_gimple_assign (def_stmt
)
3808 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3809 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3812 tree base
= gimple_assign_rhs1 (def_stmt
);
3813 HOST_WIDE_INT linear_step
= 0;
3814 tree v
= gimple_assign_rhs2 (def_stmt
);
3815 while (TREE_CODE (v
) == SSA_NAME
)
3818 def_stmt
= SSA_NAME_DEF_STMT (v
);
3819 if (is_gimple_assign (def_stmt
))
3820 switch (gimple_assign_rhs_code (def_stmt
))
3823 t
= gimple_assign_rhs2 (def_stmt
);
3824 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3826 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3827 v
= gimple_assign_rhs1 (def_stmt
);
3830 t
= gimple_assign_rhs2 (def_stmt
);
3831 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3833 linear_step
= tree_to_shwi (t
);
3834 v
= gimple_assign_rhs1 (def_stmt
);
3837 t
= gimple_assign_rhs1 (def_stmt
);
3838 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3839 || (TYPE_PRECISION (TREE_TYPE (v
))
3840 < TYPE_PRECISION (TREE_TYPE (t
))))
3849 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3851 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3852 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3857 arginfo
->linear_step
= linear_step
;
3859 arginfo
->simd_lane_linear
= true;
3865 /* Function vectorizable_simd_clone_call.
3867 Check if STMT_INFO performs a function call that can be vectorized
3868 by calling a simd clone of the function.
3869 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3870 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3871 Return true if STMT_INFO is vectorizable in this way. */
3874 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3875 gimple_stmt_iterator
*gsi
,
3876 gimple
**vec_stmt
, slp_tree slp_node
,
3877 stmt_vector_for_cost
*)
3882 tree vec_oprnd0
= NULL_TREE
;
3885 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3886 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3887 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3888 tree fndecl
, new_temp
;
3890 auto_vec
<simd_call_arg_info
> arginfo
;
3891 vec
<tree
> vargs
= vNULL
;
3893 tree lhs
, rtype
, ratype
;
3894 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3895 int masked_call_offset
= 0;
3897 /* Is STMT a vectorizable call? */
3898 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3902 fndecl
= gimple_call_fndecl (stmt
);
3903 if (fndecl
== NULL_TREE
3904 && gimple_call_internal_p (stmt
, IFN_MASK_CALL
))
3906 fndecl
= gimple_call_arg (stmt
, 0);
3907 gcc_checking_assert (TREE_CODE (fndecl
) == ADDR_EXPR
);
3908 fndecl
= TREE_OPERAND (fndecl
, 0);
3909 gcc_checking_assert (TREE_CODE (fndecl
) == FUNCTION_DECL
);
3910 masked_call_offset
= 1;
3912 if (fndecl
== NULL_TREE
)
3915 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3916 if (node
== NULL
|| node
->simd_clones
== NULL
)
3919 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3922 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3926 if (gimple_call_lhs (stmt
)
3927 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3930 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3932 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3934 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3937 /* Process function arguments. */
3938 nargs
= gimple_call_num_args (stmt
) - masked_call_offset
;
3940 /* Bail out if the function has zero arguments. */
3944 vec
<tree
>& simd_clone_info
= (slp_node
? SLP_TREE_SIMD_CLONE_INFO (slp_node
)
3945 : STMT_VINFO_SIMD_CLONE_INFO (stmt_info
));
3946 arginfo
.reserve (nargs
, true);
3947 auto_vec
<slp_tree
> slp_op
;
3948 slp_op
.safe_grow_cleared (nargs
);
3950 for (i
= 0; i
< nargs
; i
++)
3952 simd_call_arg_info thisarginfo
;
3955 thisarginfo
.linear_step
= 0;
3956 thisarginfo
.align
= 0;
3957 thisarginfo
.op
= NULL_TREE
;
3958 thisarginfo
.simd_lane_linear
= false;
3960 int op_no
= i
+ masked_call_offset
;
3962 op_no
= vect_slp_child_index_for_operand (stmt
, op_no
, false);
3963 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3964 op_no
, &op
, &slp_op
[i
],
3965 &thisarginfo
.dt
, &thisarginfo
.vectype
)
3966 || thisarginfo
.dt
== vect_uninitialized_def
)
3968 if (dump_enabled_p ())
3969 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3970 "use not simple.\n");
3974 if (thisarginfo
.dt
== vect_constant_def
3975 || thisarginfo
.dt
== vect_external_def
)
3977 /* With SLP we determine the vector type of constants/externals
3978 at analysis time, handling conflicts via
3979 vect_maybe_update_slp_op_vectype. At transform time
3980 we have a vector type recorded for SLP. */
3981 gcc_assert (!vec_stmt
3983 || thisarginfo
.vectype
!= NULL_TREE
);
3985 thisarginfo
.vectype
= get_vectype_for_scalar_type (vinfo
,
3990 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3992 /* For linear arguments, the analyze phase should have saved
3993 the base and step in {STMT_VINFO,SLP_TREE}_SIMD_CLONE_INFO. */
3994 if (i
* 3 + 4 <= simd_clone_info
.length ()
3995 && simd_clone_info
[i
* 3 + 2])
3997 gcc_assert (vec_stmt
);
3998 thisarginfo
.linear_step
= tree_to_shwi (simd_clone_info
[i
* 3 + 2]);
3999 thisarginfo
.op
= simd_clone_info
[i
* 3 + 1];
4000 thisarginfo
.simd_lane_linear
4001 = (simd_clone_info
[i
* 3 + 3] == boolean_true_node
);
4002 /* If loop has been peeled for alignment, we need to adjust it. */
4003 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
4004 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
4005 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
4007 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
4008 tree step
= simd_clone_info
[i
* 3 + 2];
4009 tree opt
= TREE_TYPE (thisarginfo
.op
);
4010 bias
= fold_convert (TREE_TYPE (step
), bias
);
4011 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
4013 = fold_build2 (POINTER_TYPE_P (opt
)
4014 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
4015 thisarginfo
.op
, bias
);
4019 && thisarginfo
.dt
!= vect_constant_def
4020 && thisarginfo
.dt
!= vect_external_def
4022 && TREE_CODE (op
) == SSA_NAME
4023 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
4025 && tree_fits_shwi_p (iv
.step
))
4027 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
4028 thisarginfo
.op
= iv
.base
;
4030 else if ((thisarginfo
.dt
== vect_constant_def
4031 || thisarginfo
.dt
== vect_external_def
)
4032 && POINTER_TYPE_P (TREE_TYPE (op
)))
4033 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
4034 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
4036 if (POINTER_TYPE_P (TREE_TYPE (op
))
4037 && !thisarginfo
.linear_step
4039 && thisarginfo
.dt
!= vect_constant_def
4040 && thisarginfo
.dt
!= vect_external_def
4042 && TREE_CODE (op
) == SSA_NAME
)
4043 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
4045 arginfo
.quick_push (thisarginfo
);
4048 poly_uint64 vf
= loop_vinfo
? LOOP_VINFO_VECT_FACTOR (loop_vinfo
) : 1;
4049 unsigned group_size
= slp_node
? SLP_TREE_LANES (slp_node
) : 1;
4050 unsigned int badness
= 0;
4051 struct cgraph_node
*bestn
= NULL
;
4052 if (simd_clone_info
.exists ())
4053 bestn
= cgraph_node::get (simd_clone_info
[0]);
4055 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
4056 n
= n
->simdclone
->next_clone
)
4058 unsigned int this_badness
= 0;
4059 unsigned int num_calls
;
4060 /* The number of arguments in the call and the number of parameters in
4061 the simdclone should match. However, when the simdclone is
4062 'inbranch', it could have one more paramater than nargs when using
4063 an inbranch simdclone to call a non-inbranch call, either in a
4064 non-masked loop using a all true constant mask, or inside a masked
4065 loop using it's mask. */
4066 size_t simd_nargs
= n
->simdclone
->nargs
;
4067 if (!masked_call_offset
&& n
->simdclone
->inbranch
)
4069 if (!constant_multiple_p (vf
* group_size
, n
->simdclone
->simdlen
,
4071 || (!n
->simdclone
->inbranch
&& (masked_call_offset
> 0))
4072 || (nargs
!= simd_nargs
))
4075 this_badness
+= floor_log2 (num_calls
) * 4096;
4076 if (n
->simdclone
->inbranch
)
4077 this_badness
+= 8192;
4078 int target_badness
= targetm
.simd_clone
.usable (n
);
4079 if (target_badness
< 0)
4081 this_badness
+= target_badness
* 512;
4082 for (i
= 0; i
< nargs
; i
++)
4084 switch (n
->simdclone
->args
[i
].arg_type
)
4086 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4087 if (!useless_type_conversion_p
4088 (n
->simdclone
->args
[i
].orig_type
,
4089 TREE_TYPE (gimple_call_arg (stmt
,
4090 i
+ masked_call_offset
))))
4092 else if (arginfo
[i
].dt
== vect_constant_def
4093 || arginfo
[i
].dt
== vect_external_def
4094 || arginfo
[i
].linear_step
)
4097 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4098 if (arginfo
[i
].dt
!= vect_constant_def
4099 && arginfo
[i
].dt
!= vect_external_def
)
4102 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4103 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4104 if (arginfo
[i
].dt
== vect_constant_def
4105 || arginfo
[i
].dt
== vect_external_def
4106 || (arginfo
[i
].linear_step
4107 != n
->simdclone
->args
[i
].linear_step
))
4110 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4111 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4112 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4113 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4114 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4115 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4119 case SIMD_CLONE_ARG_TYPE_MASK
:
4120 /* While we can create a traditional data vector from
4121 an incoming integer mode mask we have no good way to
4122 force generate an integer mode mask from a traditional
4123 boolean vector input. */
4124 if (SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4125 && !SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4127 else if (!SCALAR_INT_MODE_P (n
->simdclone
->mask_mode
)
4128 && SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
)))
4129 this_badness
+= 2048;
4132 if (i
== (size_t) -1)
4134 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
4139 if (arginfo
[i
].align
)
4140 this_badness
+= (exact_log2 (arginfo
[i
].align
)
4141 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
4143 if (i
== (size_t) -1)
4145 if (masked_call_offset
== 0
4146 && n
->simdclone
->inbranch
4147 && n
->simdclone
->nargs
> nargs
)
4149 gcc_assert (n
->simdclone
->args
[n
->simdclone
->nargs
- 1].arg_type
==
4150 SIMD_CLONE_ARG_TYPE_MASK
);
4151 /* Penalize using a masked SIMD clone in a non-masked loop, that is
4152 not in a branch, as we'd have to construct an all-true mask. */
4153 if (!loop_vinfo
|| !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4156 if (bestn
== NULL
|| this_badness
< badness
)
4159 badness
= this_badness
;
4166 unsigned int num_mask_args
= 0;
4167 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4168 for (i
= 0; i
< nargs
; i
++)
4169 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4172 for (i
= 0; i
< nargs
; i
++)
4174 if ((arginfo
[i
].dt
== vect_constant_def
4175 || arginfo
[i
].dt
== vect_external_def
)
4176 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
4178 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
,
4179 i
+ masked_call_offset
));
4180 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
4182 if (arginfo
[i
].vectype
== NULL
4183 || !constant_multiple_p (bestn
->simdclone
->simdlen
,
4184 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4188 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
4189 && VECTOR_BOOLEAN_TYPE_P (bestn
->simdclone
->args
[i
].vector_type
))
4191 if (dump_enabled_p ())
4192 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4193 "vector mask arguments are not supported.\n");
4197 if (bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_MASK
)
4199 tree clone_arg_vectype
= bestn
->simdclone
->args
[i
].vector_type
;
4200 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4202 if (maybe_ne (TYPE_VECTOR_SUBPARTS (clone_arg_vectype
),
4203 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4205 /* FORNOW we only have partial support for vector-type masks
4206 that can't hold all of simdlen. */
4207 if (dump_enabled_p ())
4208 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4210 "in-branch vector clones are not yet"
4211 " supported for mismatched vector sizes.\n");
4214 if (!expand_vec_cond_expr_p (clone_arg_vectype
,
4215 arginfo
[i
].vectype
, ERROR_MARK
))
4217 if (dump_enabled_p ())
4218 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4220 "cannot compute mask argument for"
4221 " in-branch vector clones.\n");
4225 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4227 if (!SCALAR_INT_MODE_P (TYPE_MODE (arginfo
[i
].vectype
))
4228 || maybe_ne (exact_div (bestn
->simdclone
->simdlen
,
4230 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4232 /* FORNOW we only have partial support for integer-type masks
4233 that represent the same number of lanes as the
4234 vectorized mask inputs. */
4235 if (dump_enabled_p ())
4236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4238 "in-branch vector clones are not yet "
4239 "supported for mismatched vector sizes.\n");
4245 if (dump_enabled_p ())
4246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
4248 "in-branch vector clones not supported"
4249 " on this target.\n");
4255 fndecl
= bestn
->decl
;
4256 nunits
= bestn
->simdclone
->simdlen
;
4258 ncopies
= vector_unroll_factor (vf
* group_size
, nunits
);
4260 ncopies
= vector_unroll_factor (vf
, nunits
);
4262 /* If the function isn't const, only allow it in simd loops where user
4263 has asserted that at least nunits consecutive iterations can be
4264 performed using SIMD instructions. */
4265 if ((loop
== NULL
|| maybe_lt ((unsigned) loop
->safelen
, nunits
))
4266 && gimple_vuse (stmt
))
4269 /* Sanity check: make sure that at least one copy of the vectorized stmt
4270 needs to be generated. */
4271 gcc_assert (ncopies
>= 1);
4273 if (!vec_stmt
) /* transformation not required. */
4276 for (unsigned i
= 0; i
< nargs
; ++i
)
4277 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], arginfo
[i
].vectype
))
4279 if (dump_enabled_p ())
4280 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4281 "incompatible vector types for invariants\n");
4284 /* When the original call is pure or const but the SIMD ABI dictates
4285 an aggregate return we will have to use a virtual definition and
4286 in a loop eventually even need to add a virtual PHI. That's
4287 not straight-forward so allow to fix this up via renaming. */
4288 if (gimple_call_lhs (stmt
)
4289 && !gimple_vdef (stmt
)
4290 && TREE_CODE (TREE_TYPE (TREE_TYPE (bestn
->decl
))) == ARRAY_TYPE
)
4291 vinfo
->any_known_not_updated_vssa
= true;
4292 /* ??? For SLP code-gen we end up inserting after the last
4293 vector argument def rather than at the original call position
4294 so automagic virtual operand updating doesn't work. */
4295 if (gimple_vuse (stmt
) && slp_node
)
4296 vinfo
->any_known_not_updated_vssa
= true;
4297 simd_clone_info
.safe_push (bestn
->decl
);
4298 for (i
= 0; i
< bestn
->simdclone
->nargs
; i
++)
4300 switch (bestn
->simdclone
->args
[i
].arg_type
)
4304 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4305 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4307 simd_clone_info
.safe_grow_cleared (i
* 3 + 1, true);
4308 simd_clone_info
.safe_push (arginfo
[i
].op
);
4309 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
4310 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4311 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4312 simd_clone_info
.safe_push (ls
);
4313 tree sll
= arginfo
[i
].simd_lane_linear
4314 ? boolean_true_node
: boolean_false_node
;
4315 simd_clone_info
.safe_push (sll
);
4318 case SIMD_CLONE_ARG_TYPE_MASK
:
4320 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4321 vect_record_loop_mask (loop_vinfo
,
4322 &LOOP_VINFO_MASKS (loop_vinfo
),
4323 ncopies
, vectype
, op
);
4329 if (!bestn
->simdclone
->inbranch
&& loop_vinfo
)
4331 if (dump_enabled_p ()
4332 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
4333 dump_printf_loc (MSG_NOTE
, vect_location
,
4334 "can't use a fully-masked loop because a"
4335 " non-masked simd clone was selected.\n");
4336 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
4339 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4340 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4341 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
4342 dt, slp_node, cost_vec); */
4348 if (dump_enabled_p ())
4349 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4352 scalar_dest
= gimple_call_lhs (stmt
);
4353 vec_dest
= NULL_TREE
;
4358 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4359 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4360 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4363 rtype
= TREE_TYPE (ratype
);
4367 auto_vec
<vec
<tree
> > vec_oprnds
;
4368 auto_vec
<unsigned> vec_oprnds_i
;
4369 vec_oprnds_i
.safe_grow_cleared (nargs
, true);
4372 vec_oprnds
.reserve_exact (nargs
);
4373 vect_get_slp_defs (vinfo
, slp_node
, &vec_oprnds
);
4376 vec_oprnds
.safe_grow_cleared (nargs
, true);
4377 for (j
= 0; j
< ncopies
; ++j
)
4379 poly_uint64 callee_nelements
;
4380 poly_uint64 caller_nelements
;
4381 /* Build argument list for the vectorized call. */
4383 vargs
.create (nargs
);
4387 for (i
= 0; i
< nargs
; i
++)
4389 unsigned int k
, l
, m
, o
;
4391 op
= gimple_call_arg (stmt
, i
+ masked_call_offset
);
4392 switch (bestn
->simdclone
->args
[i
].arg_type
)
4394 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4395 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4396 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4397 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4398 o
= vector_unroll_factor (nunits
, callee_nelements
);
4399 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4401 if (known_lt (callee_nelements
, caller_nelements
))
4403 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4404 if (!constant_multiple_p (caller_nelements
,
4405 callee_nelements
, &k
))
4408 gcc_assert ((k
& (k
- 1)) == 0);
4412 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4413 ncopies
* o
/ k
, op
,
4415 vec_oprnds_i
[i
] = 0;
4416 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4420 vec_oprnd0
= arginfo
[i
].op
;
4421 if ((m
& (k
- 1)) == 0)
4422 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4424 arginfo
[i
].op
= vec_oprnd0
;
4426 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4428 bitsize_int ((m
& (k
- 1)) * prec
));
4430 = gimple_build_assign (make_ssa_name (atype
),
4432 vect_finish_stmt_generation (vinfo
, stmt_info
,
4434 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4438 if (!constant_multiple_p (callee_nelements
,
4439 caller_nelements
, &k
))
4441 gcc_assert ((k
& (k
- 1)) == 0);
4442 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4444 vec_alloc (ctor_elts
, k
);
4447 for (l
= 0; l
< k
; l
++)
4449 if (m
== 0 && l
== 0)
4452 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4456 vec_oprnds_i
[i
] = 0;
4457 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4460 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4461 arginfo
[i
].op
= vec_oprnd0
;
4464 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4468 if (!useless_type_conversion_p (TREE_TYPE (vec_oprnd0
),
4471 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, atype
,
4474 = gimple_build_assign (make_ssa_name (atype
),
4476 vect_finish_stmt_generation (vinfo
, stmt_info
,
4478 vargs
.safe_push (gimple_get_lhs (new_stmt
));
4481 vargs
.safe_push (vec_oprnd0
);
4484 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4486 = gimple_build_assign (make_ssa_name (atype
),
4488 vect_finish_stmt_generation (vinfo
, stmt_info
,
4490 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4495 case SIMD_CLONE_ARG_TYPE_MASK
:
4496 if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4498 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4499 tree elt_type
= TREE_TYPE (atype
);
4500 tree one
= fold_convert (elt_type
, integer_one_node
);
4501 tree zero
= fold_convert (elt_type
, integer_zero_node
);
4502 callee_nelements
= TYPE_VECTOR_SUBPARTS (atype
);
4503 caller_nelements
= TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
);
4504 o
= vector_unroll_factor (nunits
, callee_nelements
);
4505 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4507 if (maybe_lt (callee_nelements
, caller_nelements
))
4509 /* The mask type has fewer elements than simdlen. */
4514 else if (known_eq (callee_nelements
, caller_nelements
))
4516 /* The SIMD clone function has the same number of
4517 elements as the current function. */
4521 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4525 vec_oprnds_i
[i
] = 0;
4527 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4529 && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4531 vec_loop_masks
*loop_masks
4532 = &LOOP_VINFO_MASKS (loop_vinfo
);
4534 = vect_get_loop_mask (loop_vinfo
, gsi
,
4535 loop_masks
, ncopies
,
4538 = prepare_vec_mask (loop_vinfo
,
4539 TREE_TYPE (loop_mask
),
4540 loop_mask
, vec_oprnd0
,
4542 loop_vinfo
->vec_cond_masked_set
.add ({ vec_oprnd0
,
4547 = build3 (VEC_COND_EXPR
, atype
, vec_oprnd0
,
4548 build_vector_from_val (atype
, one
),
4549 build_vector_from_val (atype
, zero
));
4551 = gimple_build_assign (make_ssa_name (atype
),
4553 vect_finish_stmt_generation (vinfo
, stmt_info
,
4555 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4559 /* The mask type has more elements than simdlen. */
4566 else if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4568 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4569 /* Guess the number of lanes represented by atype. */
4570 poly_uint64 atype_subparts
4571 = exact_div (bestn
->simdclone
->simdlen
,
4573 o
= vector_unroll_factor (nunits
, atype_subparts
);
4574 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4579 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4583 vec_oprnds_i
[i
] = 0;
4585 if (maybe_lt (atype_subparts
,
4586 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4588 /* The mask argument has fewer elements than the
4593 else if (known_eq (atype_subparts
,
4594 TYPE_VECTOR_SUBPARTS (arginfo
[i
].vectype
)))
4596 /* The vector mask argument matches the input
4597 in the number of lanes, but not necessarily
4599 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4600 tree st
= lang_hooks
.types
.type_for_mode
4601 (TYPE_MODE (TREE_TYPE (vec_oprnd0
)), 1);
4602 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, st
,
4605 = gimple_build_assign (make_ssa_name (st
),
4607 vect_finish_stmt_generation (vinfo
, stmt_info
,
4609 if (!types_compatible_p (atype
, st
))
4612 = gimple_build_assign (make_ssa_name (atype
),
4616 vect_finish_stmt_generation (vinfo
, stmt_info
,
4619 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4623 /* The mask argument has more elements than the
4633 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4634 vargs
.safe_push (op
);
4636 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4637 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4642 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4643 &stmts
, true, NULL_TREE
);
4647 edge pe
= loop_preheader_edge (loop
);
4648 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4649 gcc_assert (!new_bb
);
4651 if (arginfo
[i
].simd_lane_linear
)
4653 vargs
.safe_push (arginfo
[i
].op
);
4656 tree phi_res
= copy_ssa_name (op
);
4657 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4658 add_phi_arg (new_phi
, arginfo
[i
].op
,
4659 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4661 = POINTER_TYPE_P (TREE_TYPE (op
))
4662 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4663 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4664 ? sizetype
: TREE_TYPE (op
);
4666 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4668 tree tcst
= wide_int_to_tree (type
, cst
);
4669 tree phi_arg
= copy_ssa_name (op
);
4671 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4672 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4673 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4674 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4676 arginfo
[i
].op
= phi_res
;
4677 vargs
.safe_push (phi_res
);
4682 = POINTER_TYPE_P (TREE_TYPE (op
))
4683 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4684 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4685 ? sizetype
: TREE_TYPE (op
);
4687 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4689 tree tcst
= wide_int_to_tree (type
, cst
);
4690 new_temp
= make_ssa_name (TREE_TYPE (op
));
4692 = gimple_build_assign (new_temp
, code
,
4693 arginfo
[i
].op
, tcst
);
4694 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4695 vargs
.safe_push (new_temp
);
4698 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4699 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4700 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4701 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4702 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4703 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4709 if (masked_call_offset
== 0
4710 && bestn
->simdclone
->inbranch
4711 && bestn
->simdclone
->nargs
> nargs
)
4714 size_t mask_i
= bestn
->simdclone
->nargs
- 1;
4716 gcc_assert (bestn
->simdclone
->args
[mask_i
].arg_type
==
4717 SIMD_CLONE_ARG_TYPE_MASK
);
4719 tree masktype
= bestn
->simdclone
->args
[mask_i
].vector_type
;
4720 callee_nelements
= TYPE_VECTOR_SUBPARTS (masktype
);
4721 o
= vector_unroll_factor (nunits
, callee_nelements
);
4722 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4724 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
4726 vec_loop_masks
*loop_masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
4727 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
4728 ncopies
, vectype
, j
);
4731 mask
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
4734 if (SCALAR_INT_MODE_P (bestn
->simdclone
->mask_mode
))
4736 /* This means we are dealing with integer mask modes.
4737 First convert to an integer type with the same size as
4738 the current vector type. */
4739 unsigned HOST_WIDE_INT intermediate_size
4740 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (mask
)));
4742 build_nonstandard_integer_type (intermediate_size
, 1);
4743 mask
= build1 (VIEW_CONVERT_EXPR
, mid_int_type
, mask
);
4745 = gimple_build_assign (make_ssa_name (mid_int_type
),
4747 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4748 /* Then zero-extend to the mask mode. */
4749 mask
= fold_build1 (NOP_EXPR
, masktype
,
4750 gimple_get_lhs (new_stmt
));
4752 else if (bestn
->simdclone
->mask_mode
== VOIDmode
)
4754 tree one
= fold_convert (TREE_TYPE (masktype
),
4756 tree zero
= fold_convert (TREE_TYPE (masktype
),
4758 mask
= build3 (VEC_COND_EXPR
, masktype
, mask
,
4759 build_vector_from_val (masktype
, one
),
4760 build_vector_from_val (masktype
, zero
));
4765 new_stmt
= gimple_build_assign (make_ssa_name (masktype
), mask
);
4766 vect_finish_stmt_generation (vinfo
, stmt_info
,
4768 mask
= gimple_assign_lhs (new_stmt
);
4769 vargs
.safe_push (mask
);
4773 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4777 || known_eq (TYPE_VECTOR_SUBPARTS (rtype
), nunits
));
4779 new_temp
= create_tmp_var (ratype
);
4780 else if (useless_type_conversion_p (vectype
, rtype
))
4781 new_temp
= make_ssa_name (vec_dest
, new_call
);
4783 new_temp
= make_ssa_name (rtype
, new_call
);
4784 gimple_call_set_lhs (new_call
, new_temp
);
4786 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4787 gimple
*new_stmt
= new_call
;
4791 if (!multiple_p (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
4794 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4795 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4796 k
= vector_unroll_factor (nunits
,
4797 TYPE_VECTOR_SUBPARTS (vectype
));
4798 gcc_assert ((k
& (k
- 1)) == 0);
4799 for (l
= 0; l
< k
; l
++)
4804 t
= build_fold_addr_expr (new_temp
);
4805 t
= build2 (MEM_REF
, vectype
, t
,
4806 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4809 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4810 bitsize_int (prec
), bitsize_int (l
* prec
));
4811 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4812 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4814 if (j
== 0 && l
== 0)
4815 *vec_stmt
= new_stmt
;
4817 SLP_TREE_VEC_DEFS (slp_node
)
4818 .quick_push (gimple_assign_lhs (new_stmt
));
4820 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4824 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4827 else if (!multiple_p (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
4830 if (!constant_multiple_p (TYPE_VECTOR_SUBPARTS (vectype
),
4831 TYPE_VECTOR_SUBPARTS (rtype
), &k
))
4833 gcc_assert ((k
& (k
- 1)) == 0);
4834 if ((j
& (k
- 1)) == 0)
4835 vec_alloc (ret_ctor_elts
, k
);
4839 o
= vector_unroll_factor (nunits
,
4840 TYPE_VECTOR_SUBPARTS (rtype
));
4841 for (m
= 0; m
< o
; m
++)
4843 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4844 size_int (m
), NULL_TREE
, NULL_TREE
);
4845 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4847 vect_finish_stmt_generation (vinfo
, stmt_info
,
4849 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4850 gimple_assign_lhs (new_stmt
));
4852 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4855 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4856 if ((j
& (k
- 1)) != k
- 1)
4858 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4860 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4861 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4863 if ((unsigned) j
== k
- 1)
4864 *vec_stmt
= new_stmt
;
4866 SLP_TREE_VEC_DEFS (slp_node
)
4867 .quick_push (gimple_assign_lhs (new_stmt
));
4869 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4874 tree t
= build_fold_addr_expr (new_temp
);
4875 t
= build2 (MEM_REF
, vectype
, t
,
4876 build_int_cst (TREE_TYPE (t
), 0));
4877 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4878 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4879 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4881 else if (!useless_type_conversion_p (vectype
, rtype
))
4883 vec_oprnd0
= build1 (VIEW_CONVERT_EXPR
, vectype
, new_temp
);
4885 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4886 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4891 *vec_stmt
= new_stmt
;
4893 SLP_TREE_VEC_DEFS (slp_node
).quick_push (gimple_get_lhs (new_stmt
));
4895 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4898 for (i
= 0; i
< nargs
; ++i
)
4900 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4905 /* Mark the clone as no longer being a candidate for GC. */
4906 bestn
->gc_candidate
= false;
4908 /* The call in STMT might prevent it from being removed in dce.
4909 We however cannot remove it here, due to the way the ssa name
4910 it defines is mapped to the new definition. So just replace
4911 rhs of the statement with something harmless. */
4919 type
= TREE_TYPE (scalar_dest
);
4920 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4921 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4924 new_stmt
= gimple_build_nop ();
4925 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4926 unlink_stmt_vdef (stmt
);
4932 /* Function vect_gen_widened_results_half
4934 Create a vector stmt whose code, type, number of arguments, and result
4935 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4936 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4937 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4938 needs to be created (DECL is a function-decl of a target-builtin).
4939 STMT_INFO is the original scalar stmt that we are vectorizing. */
4942 vect_gen_widened_results_half (vec_info
*vinfo
, code_helper ch
,
4943 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4944 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4945 stmt_vec_info stmt_info
)
4950 /* Generate half of the widened result: */
4951 if (op_type
!= binary_op
)
4953 new_stmt
= vect_gimple_build (vec_dest
, ch
, vec_oprnd0
, vec_oprnd1
);
4954 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4955 gimple_set_lhs (new_stmt
, new_temp
);
4956 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4962 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4963 For multi-step conversions store the resulting vectors and call the function
4964 recursively. When NARROW_SRC_P is true, there's still a conversion after
4965 narrowing, don't store the vectors in the SLP_NODE or in vector info of
4966 the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */
4969 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4971 stmt_vec_info stmt_info
,
4972 vec
<tree
> &vec_dsts
,
4973 gimple_stmt_iterator
*gsi
,
4974 slp_tree slp_node
, code_helper code
,
4978 tree vop0
, vop1
, new_tmp
, vec_dest
;
4980 vec_dest
= vec_dsts
.pop ();
4982 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4984 /* Create demotion operation. */
4985 vop0
= (*vec_oprnds
)[i
];
4986 vop1
= (*vec_oprnds
)[i
+ 1];
4987 gimple
*new_stmt
= vect_gimple_build (vec_dest
, code
, vop0
, vop1
);
4988 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4989 gimple_set_lhs (new_stmt
, new_tmp
);
4990 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4991 if (multi_step_cvt
|| narrow_src_p
)
4992 /* Store the resulting vector for next recursive call,
4993 or return the resulting vector_tmp for NARROW FLOAT_EXPR. */
4994 (*vec_oprnds
)[i
/2] = new_tmp
;
4997 /* This is the last step of the conversion sequence. Store the
4998 vectors in SLP_NODE or in vector info of the scalar statement
4999 (or in STMT_VINFO_RELATED_STMT chain). */
5001 slp_node
->push_vec_def (new_stmt
);
5003 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5007 /* For multi-step demotion operations we first generate demotion operations
5008 from the source type to the intermediate types, and then combine the
5009 results (stored in VEC_OPRNDS) in demotion operation to the destination
5013 /* At each level of recursion we have half of the operands we had at the
5015 vec_oprnds
->truncate ((i
+1)/2);
5016 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
5018 stmt_info
, vec_dsts
, gsi
,
5019 slp_node
, VEC_PACK_TRUNC_EXPR
,
5023 vec_dsts
.quick_push (vec_dest
);
5027 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
5028 and VEC_OPRNDS1, for a binary operation associated with scalar statement
5029 STMT_INFO. For multi-step conversions store the resulting vectors and
5030 call the function recursively. */
5033 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
5034 vec
<tree
> *vec_oprnds0
,
5035 vec
<tree
> *vec_oprnds1
,
5036 stmt_vec_info stmt_info
, tree vec_dest
,
5037 gimple_stmt_iterator
*gsi
,
5039 code_helper ch2
, int op_type
)
5042 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
5043 gimple
*new_stmt1
, *new_stmt2
;
5044 vec
<tree
> vec_tmp
= vNULL
;
5046 vec_tmp
.create (vec_oprnds0
->length () * 2);
5047 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5049 if (op_type
== binary_op
)
5050 vop1
= (*vec_oprnds1
)[i
];
5054 /* Generate the two halves of promotion operation. */
5055 new_stmt1
= vect_gen_widened_results_half (vinfo
, ch1
, vop0
, vop1
,
5056 op_type
, vec_dest
, gsi
,
5058 new_stmt2
= vect_gen_widened_results_half (vinfo
, ch2
, vop0
, vop1
,
5059 op_type
, vec_dest
, gsi
,
5061 if (is_gimple_call (new_stmt1
))
5063 new_tmp1
= gimple_call_lhs (new_stmt1
);
5064 new_tmp2
= gimple_call_lhs (new_stmt2
);
5068 new_tmp1
= gimple_assign_lhs (new_stmt1
);
5069 new_tmp2
= gimple_assign_lhs (new_stmt2
);
5072 /* Store the results for the next step. */
5073 vec_tmp
.quick_push (new_tmp1
);
5074 vec_tmp
.quick_push (new_tmp2
);
5077 vec_oprnds0
->release ();
5078 *vec_oprnds0
= vec_tmp
;
5081 /* Create vectorized promotion stmts for widening stmts using only half the
5082 potential vector size for input. */
5084 vect_create_half_widening_stmts (vec_info
*vinfo
,
5085 vec
<tree
> *vec_oprnds0
,
5086 vec
<tree
> *vec_oprnds1
,
5087 stmt_vec_info stmt_info
, tree vec_dest
,
5088 gimple_stmt_iterator
*gsi
,
5097 vec
<tree
> vec_tmp
= vNULL
;
5099 vec_tmp
.create (vec_oprnds0
->length ());
5100 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
5102 tree new_tmp1
, new_tmp2
, new_tmp3
, out_type
;
5104 gcc_assert (op_type
== binary_op
);
5105 vop1
= (*vec_oprnds1
)[i
];
5107 /* Widen the first vector input. */
5108 out_type
= TREE_TYPE (vec_dest
);
5109 new_tmp1
= make_ssa_name (out_type
);
5110 new_stmt1
= gimple_build_assign (new_tmp1
, NOP_EXPR
, vop0
);
5111 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt1
, gsi
);
5112 if (VECTOR_TYPE_P (TREE_TYPE (vop1
)))
5114 /* Widen the second vector input. */
5115 new_tmp2
= make_ssa_name (out_type
);
5116 new_stmt2
= gimple_build_assign (new_tmp2
, NOP_EXPR
, vop1
);
5117 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt2
, gsi
);
5118 /* Perform the operation. With both vector inputs widened. */
5119 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, new_tmp2
);
5123 /* Perform the operation. With the single vector input widened. */
5124 new_stmt3
= vect_gimple_build (vec_dest
, code1
, new_tmp1
, vop1
);
5127 new_tmp3
= make_ssa_name (vec_dest
, new_stmt3
);
5128 gimple_assign_set_lhs (new_stmt3
, new_tmp3
);
5129 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt3
, gsi
);
5131 /* Store the results for the next step. */
5132 vec_tmp
.quick_push (new_tmp3
);
5135 vec_oprnds0
->release ();
5136 *vec_oprnds0
= vec_tmp
;
5140 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
5141 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5142 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5143 Return true if STMT_INFO is vectorizable in this way. */
5146 vectorizable_conversion (vec_info
*vinfo
,
5147 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5148 gimple
**vec_stmt
, slp_tree slp_node
,
5149 stmt_vector_for_cost
*cost_vec
)
5151 tree vec_dest
, cvt_op
= NULL_TREE
;
5153 tree op0
, op1
= NULL_TREE
;
5154 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5156 code_helper code
, code1
, code2
;
5157 code_helper codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
5159 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5161 poly_uint64 nunits_in
;
5162 poly_uint64 nunits_out
;
5163 tree vectype_out
, vectype_in
;
5165 tree lhs_type
, rhs_type
;
5166 /* For conversions between floating point and integer, there're 2 NARROW
5167 cases. NARROW_SRC is for FLOAT_EXPR, means
5168 integer --DEMOTION--> integer --FLOAT_EXPR--> floating point.
5169 This is safe when the range of the source integer can fit into the lower
5170 precision. NARROW_DST is for FIX_TRUNC_EXPR, means
5171 floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER.
5172 For other conversions, when there's narrowing, NARROW_DST is used as
5174 enum { NARROW_SRC
, NARROW_DST
, NONE
, WIDEN
} modifier
;
5175 vec
<tree
> vec_oprnds0
= vNULL
;
5176 vec
<tree
> vec_oprnds1
= vNULL
;
5178 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5179 int multi_step_cvt
= 0;
5180 vec
<tree
> interm_types
= vNULL
;
5181 tree intermediate_type
, cvt_type
= NULL_TREE
;
5183 unsigned short fltsz
;
5185 /* Is STMT a vectorizable conversion? */
5187 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5190 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5194 gimple
* stmt
= stmt_info
->stmt
;
5195 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
5198 if (gimple_get_lhs (stmt
) == NULL_TREE
5199 || TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5202 if (TREE_CODE (gimple_get_lhs (stmt
)) != SSA_NAME
)
5205 if (is_gimple_assign (stmt
))
5207 code
= gimple_assign_rhs_code (stmt
);
5208 op_type
= TREE_CODE_LENGTH ((tree_code
) code
);
5210 else if (gimple_call_internal_p (stmt
))
5212 code
= gimple_call_internal_fn (stmt
);
5213 op_type
= gimple_call_num_args (stmt
);
5218 bool widen_arith
= (code
== WIDEN_MULT_EXPR
5219 || code
== WIDEN_LSHIFT_EXPR
5220 || widening_fn_p (code
));
5223 && !CONVERT_EXPR_CODE_P (code
)
5224 && code
!= FIX_TRUNC_EXPR
5225 && code
!= FLOAT_EXPR
)
5228 /* Check types of lhs and rhs. */
5229 scalar_dest
= gimple_get_lhs (stmt
);
5230 lhs_type
= TREE_TYPE (scalar_dest
);
5231 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5233 /* Check the operands of the operation. */
5234 slp_tree slp_op0
, slp_op1
= NULL
;
5235 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5236 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
5238 if (dump_enabled_p ())
5239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5240 "use not simple.\n");
5244 rhs_type
= TREE_TYPE (op0
);
5245 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
5246 && !((INTEGRAL_TYPE_P (lhs_type
)
5247 && INTEGRAL_TYPE_P (rhs_type
))
5248 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
5249 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
5252 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5253 && ((INTEGRAL_TYPE_P (lhs_type
)
5254 && !type_has_mode_precision_p (lhs_type
))
5255 || (INTEGRAL_TYPE_P (rhs_type
)
5256 && !type_has_mode_precision_p (rhs_type
))))
5258 if (dump_enabled_p ())
5259 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5260 "type conversion to/from bit-precision unsupported."
5265 if (op_type
== binary_op
)
5267 gcc_assert (code
== WIDEN_MULT_EXPR
5268 || code
== WIDEN_LSHIFT_EXPR
5269 || widening_fn_p (code
));
5271 op1
= is_gimple_assign (stmt
) ? gimple_assign_rhs2 (stmt
) :
5272 gimple_call_arg (stmt
, 0);
5274 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
5275 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
5277 if (dump_enabled_p ())
5278 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5279 "use not simple.\n");
5282 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
5285 vectype_in
= vectype1_in
;
5288 /* If op0 is an external or constant def, infer the vector type
5289 from the scalar type. */
5291 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
5293 gcc_assert (vectype_in
);
5296 if (dump_enabled_p ())
5297 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5298 "no vectype for scalar type %T\n", rhs_type
);
5303 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5304 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5306 if (dump_enabled_p ())
5307 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5308 "can't convert between boolean and non "
5309 "boolean vectors %T\n", rhs_type
);
5314 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
5315 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5316 if (known_eq (nunits_out
, nunits_in
))
5321 else if (multiple_p (nunits_out
, nunits_in
))
5322 modifier
= NARROW_DST
;
5325 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
5329 /* Multiple types in SLP are handled by creating the appropriate number of
5330 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5334 else if (modifier
== NARROW_DST
)
5335 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
5337 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
5339 /* Sanity check: make sure that at least one copy of the vectorized stmt
5340 needs to be generated. */
5341 gcc_assert (ncopies
>= 1);
5343 bool found_mode
= false;
5344 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
5345 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
5346 opt_scalar_mode rhs_mode_iter
;
5348 /* Supportable by target? */
5352 if (code
!= FIX_TRUNC_EXPR
5353 && code
!= FLOAT_EXPR
5354 && !CONVERT_EXPR_CODE_P (code
))
5356 gcc_assert (code
.is_tree_code ());
5357 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5364 /* For conversions between float and integer types try whether
5365 we can use intermediate signed integer types to support the
5367 if (GET_MODE_SIZE (lhs_mode
) != GET_MODE_SIZE (rhs_mode
)
5368 && (code
== FLOAT_EXPR
||
5369 (code
== FIX_TRUNC_EXPR
&& !flag_trapping_math
)))
5371 bool demotion
= GET_MODE_SIZE (rhs_mode
) > GET_MODE_SIZE (lhs_mode
);
5372 bool float_expr_p
= code
== FLOAT_EXPR
;
5373 unsigned short target_size
;
5374 scalar_mode intermediate_mode
;
5377 intermediate_mode
= lhs_mode
;
5378 target_size
= GET_MODE_SIZE (rhs_mode
);
5382 target_size
= GET_MODE_SIZE (lhs_mode
);
5383 if (!int_mode_for_size
5384 (GET_MODE_BITSIZE (rhs_mode
), 0).exists (&intermediate_mode
))
5387 code1
= float_expr_p
? code
: NOP_EXPR
;
5388 codecvt1
= float_expr_p
? NOP_EXPR
: code
;
5389 opt_scalar_mode mode_iter
;
5390 FOR_EACH_2XWIDER_MODE (mode_iter
, intermediate_mode
)
5392 intermediate_mode
= mode_iter
.require ();
5394 if (GET_MODE_SIZE (intermediate_mode
) > target_size
)
5397 scalar_mode cvt_mode
;
5398 if (!int_mode_for_size
5399 (GET_MODE_BITSIZE (intermediate_mode
), 0).exists (&cvt_mode
))
5402 cvt_type
= build_nonstandard_integer_type
5403 (GET_MODE_BITSIZE (cvt_mode
), 0);
5405 /* Check if the intermediate type can hold OP0's range.
5406 When converting from float to integer this is not necessary
5407 because values that do not fit the (smaller) target type are
5408 unspecified anyway. */
5409 if (demotion
&& float_expr_p
)
5411 wide_int op_min_value
, op_max_value
;
5412 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5415 if (cvt_type
== NULL_TREE
5416 || (wi::min_precision (op_max_value
, SIGNED
)
5417 > TYPE_PRECISION (cvt_type
))
5418 || (wi::min_precision (op_min_value
, SIGNED
)
5419 > TYPE_PRECISION (cvt_type
)))
5423 cvt_type
= get_vectype_for_scalar_type (vinfo
, cvt_type
, slp_node
);
5424 /* This should only happened for SLP as long as loop vectorizer
5425 only supports same-sized vector. */
5426 if (cvt_type
== NULL_TREE
5427 || maybe_ne (TYPE_VECTOR_SUBPARTS (cvt_type
), nunits_in
)
5428 || !supportable_convert_operation ((tree_code
) code1
,
5431 || !supportable_convert_operation ((tree_code
) codecvt1
,
5443 interm_types
.safe_push (cvt_type
);
5444 cvt_type
= NULL_TREE
;
5452 if (dump_enabled_p ())
5453 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5454 "conversion not supported by target.\n");
5458 if (known_eq (nunits_in
, nunits_out
))
5460 if (!(code
.is_tree_code ()
5461 && supportable_half_widening_operation ((tree_code
) code
,
5462 vectype_out
, vectype_in
,
5466 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5469 if (supportable_widening_operation (vinfo
, code
, stmt_info
,
5470 vectype_out
, vectype_in
, &code1
,
5471 &code2
, &multi_step_cvt
,
5474 /* Binary widening operation can only be supported directly by the
5476 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
5480 if (code
!= FLOAT_EXPR
5481 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
5484 fltsz
= GET_MODE_SIZE (lhs_mode
);
5485 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
5487 rhs_mode
= rhs_mode_iter
.require ();
5488 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
5492 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5493 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5494 if (cvt_type
== NULL_TREE
)
5497 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5500 gcc_assert (code
.is_tree_code ());
5501 if (!supportable_convert_operation ((tree_code
) code
, vectype_out
,
5506 else if (!supportable_widening_operation (vinfo
, code
,
5507 stmt_info
, vectype_out
,
5508 cvt_type
, &codecvt1
,
5509 &codecvt2
, &multi_step_cvt
,
5513 gcc_assert (multi_step_cvt
== 0);
5515 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
5518 &code2
, &multi_step_cvt
,
5529 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
5530 codecvt2
= ERROR_MARK
;
5534 interm_types
.safe_push (cvt_type
);
5535 cvt_type
= NULL_TREE
;
5540 gcc_assert (op_type
== unary_op
);
5541 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
5542 &code1
, &multi_step_cvt
,
5546 if (GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
5549 if (code
== FIX_TRUNC_EXPR
)
5552 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
5553 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
5554 if (cvt_type
== NULL_TREE
)
5556 if (supportable_convert_operation ((tree_code
) code
, cvt_type
, vectype_in
,
5561 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
5562 &code1
, &multi_step_cvt
,
5566 /* If op0 can be represented with low precision integer,
5567 truncate it to cvt_type and the do FLOAT_EXPR. */
5568 else if (code
== FLOAT_EXPR
)
5570 wide_int op_min_value
, op_max_value
;
5571 if (!vect_get_range_info (op0
, &op_min_value
, &op_max_value
))
5575 = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode
), 0);
5576 if (cvt_type
== NULL_TREE
5577 || (wi::min_precision (op_max_value
, SIGNED
)
5578 > TYPE_PRECISION (cvt_type
))
5579 || (wi::min_precision (op_min_value
, SIGNED
)
5580 > TYPE_PRECISION (cvt_type
)))
5583 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_out
);
5584 if (cvt_type
== NULL_TREE
)
5586 if (!supportable_narrowing_operation (NOP_EXPR
, cvt_type
, vectype_in
,
5587 &code1
, &multi_step_cvt
,
5590 if (supportable_convert_operation ((tree_code
) code
, vectype_out
,
5594 modifier
= NARROW_SRC
;
5605 if (!vec_stmt
) /* transformation not required. */
5608 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
5609 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
5611 if (dump_enabled_p ())
5612 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5613 "incompatible vector types for invariants\n");
5616 DUMP_VECT_SCOPE ("vectorizable_conversion");
5617 if (modifier
== NONE
)
5619 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
5620 vect_model_simple_cost (vinfo
, stmt_info
,
5621 ncopies
* (1 + multi_step_cvt
),
5622 dt
, ndts
, slp_node
, cost_vec
);
5624 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5626 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
5627 /* The final packing step produces one vector result per copy. */
5628 unsigned int nvectors
5629 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
5630 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5631 multi_step_cvt
, cost_vec
,
5636 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
5637 /* The initial unpacking step produces two vector results
5638 per copy. MULTI_STEP_CVT is 0 for a single conversion,
5639 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
5640 unsigned int nvectors
5642 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
5644 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
5645 multi_step_cvt
, cost_vec
,
5648 interm_types
.release ();
5653 if (dump_enabled_p ())
5654 dump_printf_loc (MSG_NOTE
, vect_location
,
5655 "transform conversion. ncopies = %d.\n", ncopies
);
5657 if (op_type
== binary_op
)
5659 if (CONSTANT_CLASS_P (op0
))
5660 op0
= fold_convert (TREE_TYPE (op1
), op0
);
5661 else if (CONSTANT_CLASS_P (op1
))
5662 op1
= fold_convert (TREE_TYPE (op0
), op1
);
5665 /* In case of multi-step conversion, we first generate conversion operations
5666 to the intermediate types, and then from that types to the final one.
5667 We create vector destinations for the intermediate type (TYPES) received
5668 from supportable_*_operation, and store them in the correct order
5669 for future use in vect_create_vectorized_*_stmts (). */
5670 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
5671 bool widen_or_narrow_float_p
5672 = cvt_type
&& (modifier
== WIDEN
|| modifier
== NARROW_SRC
);
5673 vec_dest
= vect_create_destination_var (scalar_dest
,
5674 widen_or_narrow_float_p
5675 ? cvt_type
: vectype_out
);
5676 vec_dsts
.quick_push (vec_dest
);
5680 for (i
= interm_types
.length () - 1;
5681 interm_types
.iterate (i
, &intermediate_type
); i
--)
5683 vec_dest
= vect_create_destination_var (scalar_dest
,
5685 vec_dsts
.quick_push (vec_dest
);
5690 vec_dest
= vect_create_destination_var (scalar_dest
,
5691 widen_or_narrow_float_p
5692 ? vectype_out
: cvt_type
);
5697 if (modifier
== WIDEN
)
5699 else if (modifier
== NARROW_SRC
|| modifier
== NARROW_DST
)
5702 ninputs
= vect_pow2 (multi_step_cvt
);
5710 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5711 op0
, vectype_in
, &vec_oprnds0
);
5712 /* vec_dest is intermediate type operand when multi_step_cvt. */
5716 vec_dest
= vec_dsts
[0];
5719 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5721 /* Arguments are ready, create the new vector stmt. */
5725 gcc_assert (multi_step_cvt
== 1);
5726 new_stmt
= vect_gimple_build (cvt_op
, codecvt1
, vop0
);
5727 new_temp
= make_ssa_name (cvt_op
, new_stmt
);
5728 gimple_assign_set_lhs (new_stmt
, new_temp
);
5729 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5732 new_stmt
= vect_gimple_build (vec_dest
, code1
, vop0
);
5733 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5734 gimple_set_lhs (new_stmt
, new_temp
);
5735 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5738 slp_node
->push_vec_def (new_stmt
);
5740 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5745 /* In case the vectorization factor (VF) is bigger than the number
5746 of elements that we can fit in a vectype (nunits), we have to
5747 generate more than one vector stmt - i.e - we need to "unroll"
5748 the vector stmt by a factor VF/nunits. */
5749 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5750 op0
, vectype_in
, &vec_oprnds0
,
5751 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
5752 vectype_in
, &vec_oprnds1
);
5753 if (code
== WIDEN_LSHIFT_EXPR
)
5755 int oprnds_size
= vec_oprnds0
.length ();
5756 vec_oprnds1
.create (oprnds_size
);
5757 for (i
= 0; i
< oprnds_size
; ++i
)
5758 vec_oprnds1
.quick_push (op1
);
5760 /* Arguments are ready. Create the new vector stmts. */
5761 for (i
= multi_step_cvt
; i
>= 0; i
--)
5763 tree this_dest
= vec_dsts
[i
];
5764 code_helper c1
= code1
, c2
= code2
;
5765 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5770 if (known_eq (nunits_out
, nunits_in
))
5771 vect_create_half_widening_stmts (vinfo
, &vec_oprnds0
, &vec_oprnds1
,
5772 stmt_info
, this_dest
, gsi
, c1
,
5775 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
5776 &vec_oprnds1
, stmt_info
,
5781 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5786 new_temp
= make_ssa_name (vec_dest
);
5787 new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5788 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5791 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
5794 slp_node
->push_vec_def (new_stmt
);
5796 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5802 /* In case the vectorization factor (VF) is bigger than the number
5803 of elements that we can fit in a vectype (nunits), we have to
5804 generate more than one vector stmt - i.e - we need to "unroll"
5805 the vector stmt by a factor VF/nunits. */
5806 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
5807 op0
, vectype_in
, &vec_oprnds0
);
5808 /* Arguments are ready. Create the new vector stmts. */
5809 if (cvt_type
&& modifier
== NARROW_DST
)
5810 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5812 new_temp
= make_ssa_name (vec_dest
);
5813 gimple
*new_stmt
= vect_gimple_build (new_temp
, codecvt1
, vop0
);
5814 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5815 vec_oprnds0
[i
] = new_temp
;
5818 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
5820 stmt_info
, vec_dsts
, gsi
,
5822 modifier
== NARROW_SRC
);
5823 /* After demoting op0 to cvt_type, convert it to dest. */
5824 if (cvt_type
&& code
== FLOAT_EXPR
)
5826 for (unsigned int i
= 0; i
!= vec_oprnds0
.length() / 2; i
++)
5828 /* Arguments are ready, create the new vector stmt. */
5829 gcc_assert (TREE_CODE_LENGTH ((tree_code
) codecvt1
) == unary_op
);
5831 = vect_gimple_build (vec_dest
, codecvt1
, vec_oprnds0
[i
]);
5832 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5833 gimple_set_lhs (new_stmt
, new_temp
);
5834 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5836 /* This is the last step of the conversion sequence. Store the
5837 vectors in SLP_NODE or in vector info of the scalar statement
5838 (or in STMT_VINFO_RELATED_STMT chain). */
5840 slp_node
->push_vec_def (new_stmt
);
5842 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5848 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5850 vec_oprnds0
.release ();
5851 vec_oprnds1
.release ();
5852 interm_types
.release ();
5857 /* Return true if we can assume from the scalar form of STMT_INFO that
5858 neither the scalar nor the vector forms will generate code. STMT_INFO
5859 is known not to involve a data reference. */
5862 vect_nop_conversion_p (stmt_vec_info stmt_info
)
5864 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5868 tree lhs
= gimple_assign_lhs (stmt
);
5869 tree_code code
= gimple_assign_rhs_code (stmt
);
5870 tree rhs
= gimple_assign_rhs1 (stmt
);
5872 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
5875 if (CONVERT_EXPR_CODE_P (code
))
5876 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
5881 /* Function vectorizable_assignment.
5883 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5884 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5885 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5886 Return true if STMT_INFO is vectorizable in this way. */
5889 vectorizable_assignment (vec_info
*vinfo
,
5890 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5891 gimple
**vec_stmt
, slp_tree slp_node
,
5892 stmt_vector_for_cost
*cost_vec
)
5897 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5899 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5903 vec
<tree
> vec_oprnds
= vNULL
;
5905 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5906 enum tree_code code
;
5909 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5912 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5916 /* Is vectorizable assignment? */
5917 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5921 scalar_dest
= gimple_assign_lhs (stmt
);
5922 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5925 if (STMT_VINFO_DATA_REF (stmt_info
))
5928 code
= gimple_assign_rhs_code (stmt
);
5929 if (!(gimple_assign_single_p (stmt
)
5930 || code
== PAREN_EXPR
5931 || CONVERT_EXPR_CODE_P (code
)))
5934 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5935 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5937 /* Multiple types in SLP are handled by creating the appropriate number of
5938 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5943 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5945 gcc_assert (ncopies
>= 1);
5948 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5949 &dt
[0], &vectype_in
))
5951 if (dump_enabled_p ())
5952 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5953 "use not simple.\n");
5957 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5959 /* We can handle VIEW_CONVERT conversions that do not change the number
5960 of elements or the vector size or other conversions when the component
5961 types are nop-convertible. */
5963 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5964 || (code
== VIEW_CONVERT_EXPR
5965 && maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5966 GET_MODE_SIZE (TYPE_MODE (vectype_in
))))
5967 || (CONVERT_EXPR_CODE_P (code
)
5968 && !tree_nop_conversion_p (TREE_TYPE (vectype
),
5969 TREE_TYPE (vectype_in
))))
5972 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
5974 if (dump_enabled_p ())
5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5976 "can't convert between boolean and non "
5977 "boolean vectors %T\n", TREE_TYPE (op
));
5982 /* We do not handle bit-precision changes. */
5983 if ((CONVERT_EXPR_CODE_P (code
)
5984 || code
== VIEW_CONVERT_EXPR
)
5985 && ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5986 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5987 || (INTEGRAL_TYPE_P (TREE_TYPE (op
))
5988 && !type_has_mode_precision_p (TREE_TYPE (op
))))
5989 /* But a conversion that does not change the bit-pattern is ok. */
5990 && !(INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5991 && INTEGRAL_TYPE_P (TREE_TYPE (op
))
5992 && (((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5993 > TYPE_PRECISION (TREE_TYPE (op
)))
5994 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5995 || (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5996 == TYPE_PRECISION (TREE_TYPE (op
))))))
5998 if (dump_enabled_p ())
5999 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6000 "type conversion to/from bit-precision "
6005 if (!vec_stmt
) /* transformation not required. */
6008 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
6010 if (dump_enabled_p ())
6011 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6012 "incompatible vector types for invariants\n");
6015 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
6016 DUMP_VECT_SCOPE ("vectorizable_assignment");
6017 if (!vect_nop_conversion_p (stmt_info
))
6018 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
6024 if (dump_enabled_p ())
6025 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
6028 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6031 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
6033 /* Arguments are ready. create the new vector stmt. */
6034 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
6036 if (CONVERT_EXPR_CODE_P (code
)
6037 || code
== VIEW_CONVERT_EXPR
)
6038 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
6039 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
6040 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6041 gimple_assign_set_lhs (new_stmt
, new_temp
);
6042 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6044 slp_node
->push_vec_def (new_stmt
);
6046 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6049 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6051 vec_oprnds
.release ();
6056 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
6057 either as shift by a scalar or by a vector. */
6060 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
6063 machine_mode vec_mode
;
6068 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
6072 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6074 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
6076 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6078 || (optab_handler (optab
, TYPE_MODE (vectype
))
6079 == CODE_FOR_nothing
))
6083 vec_mode
= TYPE_MODE (vectype
);
6084 icode
= (int) optab_handler (optab
, vec_mode
);
6085 if (icode
== CODE_FOR_nothing
)
6092 /* Function vectorizable_shift.
6094 Check if STMT_INFO performs a shift operation that can be vectorized.
6095 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
6096 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6097 Return true if STMT_INFO is vectorizable in this way. */
6100 vectorizable_shift (vec_info
*vinfo
,
6101 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6102 gimple
**vec_stmt
, slp_tree slp_node
,
6103 stmt_vector_for_cost
*cost_vec
)
6107 tree op0
, op1
= NULL
;
6108 tree vec_oprnd1
= NULL_TREE
;
6110 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6111 enum tree_code code
;
6112 machine_mode vec_mode
;
6116 machine_mode optab_op2_mode
;
6117 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
6119 poly_uint64 nunits_in
;
6120 poly_uint64 nunits_out
;
6125 vec
<tree
> vec_oprnds0
= vNULL
;
6126 vec
<tree
> vec_oprnds1
= vNULL
;
6129 bool scalar_shift_arg
= true;
6130 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6131 bool incompatible_op1_vectype_p
= false;
6133 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6136 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6137 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
6141 /* Is STMT a vectorizable binary/unary operation? */
6142 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6146 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6149 code
= gimple_assign_rhs_code (stmt
);
6151 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
6152 || code
== RROTATE_EXPR
))
6155 scalar_dest
= gimple_assign_lhs (stmt
);
6156 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6157 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
6159 if (dump_enabled_p ())
6160 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6161 "bit-precision shifts not supported.\n");
6166 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6167 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6169 if (dump_enabled_p ())
6170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6171 "use not simple.\n");
6174 /* If op0 is an external or constant def, infer the vector type
6175 from the scalar type. */
6177 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
6179 gcc_assert (vectype
);
6182 if (dump_enabled_p ())
6183 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6184 "no vectype for scalar type\n");
6188 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6189 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6190 if (maybe_ne (nunits_out
, nunits_in
))
6193 stmt_vec_info op1_def_stmt_info
;
6195 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
6196 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
6198 if (dump_enabled_p ())
6199 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6200 "use not simple.\n");
6204 /* Multiple types in SLP are handled by creating the appropriate number of
6205 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6210 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6212 gcc_assert (ncopies
>= 1);
6214 /* Determine whether the shift amount is a vector, or scalar. If the
6215 shift/rotate amount is a vector, use the vector/vector shift optabs. */
6217 if ((dt
[1] == vect_internal_def
6218 || dt
[1] == vect_induction_def
6219 || dt
[1] == vect_nested_cycle
)
6221 scalar_shift_arg
= false;
6222 else if (dt
[1] == vect_constant_def
6223 || dt
[1] == vect_external_def
6224 || dt
[1] == vect_internal_def
)
6226 /* In SLP, need to check whether the shift count is the same,
6227 in loops if it is a constant or invariant, it is always
6231 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
6232 stmt_vec_info slpstmt_info
;
6234 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
6236 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
6237 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
6238 scalar_shift_arg
= false;
6241 /* For internal SLP defs we have to make sure we see scalar stmts
6242 for all vector elements.
6243 ??? For different vectors we could resort to a different
6244 scalar shift operand but code-generation below simply always
6246 if (dt
[1] == vect_internal_def
6247 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
6249 scalar_shift_arg
= false;
6252 /* If the shift amount is computed by a pattern stmt we cannot
6253 use the scalar amount directly thus give up and use a vector
6255 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
6256 scalar_shift_arg
= false;
6260 if (dump_enabled_p ())
6261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6262 "operand mode requires invariant argument.\n");
6266 /* Vector shifted by vector. */
6267 bool was_scalar_shift_arg
= scalar_shift_arg
;
6268 if (!scalar_shift_arg
)
6270 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6271 if (dump_enabled_p ())
6272 dump_printf_loc (MSG_NOTE
, vect_location
,
6273 "vector/vector shift/rotate found.\n");
6276 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
6278 incompatible_op1_vectype_p
6279 = (op1_vectype
== NULL_TREE
6280 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
6281 TYPE_VECTOR_SUBPARTS (vectype
))
6282 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
6283 if (incompatible_op1_vectype_p
6285 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
6286 || slp_op1
->refcnt
!= 1))
6288 if (dump_enabled_p ())
6289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6290 "unusable type for last operand in"
6291 " vector/vector shift/rotate.\n");
6295 /* See if the machine has a vector shifted by scalar insn and if not
6296 then see if it has a vector shifted by vector insn. */
6299 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
6301 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
6303 if (dump_enabled_p ())
6304 dump_printf_loc (MSG_NOTE
, vect_location
,
6305 "vector/scalar shift/rotate found.\n");
6309 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
6311 && (optab_handler (optab
, TYPE_MODE (vectype
))
6312 != CODE_FOR_nothing
))
6314 scalar_shift_arg
= false;
6316 if (dump_enabled_p ())
6317 dump_printf_loc (MSG_NOTE
, vect_location
,
6318 "vector/vector shift/rotate found.\n");
6321 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
6325 /* Unlike the other binary operators, shifts/rotates have
6326 the rhs being int, instead of the same type as the lhs,
6327 so make sure the scalar is the right type if we are
6328 dealing with vectors of long long/long/short/char. */
6329 incompatible_op1_vectype_p
6331 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
6333 if (incompatible_op1_vectype_p
6334 && dt
[1] == vect_internal_def
)
6336 if (dump_enabled_p ())
6337 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6338 "unusable type for last operand in"
6339 " vector/vector shift/rotate.\n");
6346 /* Supportable by target? */
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6354 vec_mode
= TYPE_MODE (vectype
);
6355 icode
= (int) optab_handler (optab
, vec_mode
);
6356 if (icode
== CODE_FOR_nothing
)
6358 if (dump_enabled_p ())
6359 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6360 "op not supported by target.\n");
6363 /* vector lowering cannot optimize vector shifts using word arithmetic. */
6364 if (vect_emulated_vector_p (vectype
))
6367 if (!vec_stmt
) /* transformation not required. */
6370 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6371 || ((!scalar_shift_arg
|| dt
[1] == vect_internal_def
)
6372 && (!incompatible_op1_vectype_p
6373 || dt
[1] == vect_constant_def
)
6374 && !vect_maybe_update_slp_op_vectype
6376 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
6378 if (dump_enabled_p ())
6379 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6380 "incompatible vector types for invariants\n");
6383 /* Now adjust the constant shift amount in place. */
6385 && incompatible_op1_vectype_p
6386 && dt
[1] == vect_constant_def
)
6388 for (unsigned i
= 0;
6389 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
6391 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
6392 = fold_convert (TREE_TYPE (vectype
),
6393 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
6394 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
6398 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
6399 DUMP_VECT_SCOPE ("vectorizable_shift");
6400 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
6401 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
6407 if (dump_enabled_p ())
6408 dump_printf_loc (MSG_NOTE
, vect_location
,
6409 "transform binary/unary operation.\n");
6411 if (incompatible_op1_vectype_p
&& !slp_node
)
6413 gcc_assert (!scalar_shift_arg
&& was_scalar_shift_arg
);
6414 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6415 if (dt
[1] != vect_constant_def
)
6416 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
6417 TREE_TYPE (vectype
), NULL
);
6421 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6423 if (scalar_shift_arg
&& dt
[1] != vect_internal_def
)
6425 /* Vector shl and shr insn patterns can be defined with scalar
6426 operand 2 (shift operand). In this case, use constant or loop
6427 invariant op1 directly, without extending it to vector mode
6429 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
6430 if (!VECTOR_MODE_P (optab_op2_mode
))
6432 if (dump_enabled_p ())
6433 dump_printf_loc (MSG_NOTE
, vect_location
,
6434 "operand 1 using scalar mode.\n");
6436 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
6437 vec_oprnds1
.quick_push (vec_oprnd1
);
6438 /* Store vec_oprnd1 for every vector stmt to be created.
6439 We check during the analysis that all the shift arguments
6441 TODO: Allow different constants for different vector
6442 stmts generated for an SLP instance. */
6444 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
6445 vec_oprnds1
.quick_push (vec_oprnd1
);
6448 else if (!scalar_shift_arg
&& slp_node
&& incompatible_op1_vectype_p
)
6450 if (was_scalar_shift_arg
)
6452 /* If the argument was the same in all lanes create
6453 the correctly typed vector shift amount directly. */
6454 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
6455 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
6456 !loop_vinfo
? gsi
: NULL
);
6457 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
6458 !loop_vinfo
? gsi
: NULL
);
6459 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
6460 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
6461 vec_oprnds1
.quick_push (vec_oprnd1
);
6463 else if (dt
[1] == vect_constant_def
)
6464 /* The constant shift amount has been adjusted in place. */
6467 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
6470 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
6471 (a special case for certain kind of vector shifts); otherwise,
6472 operand 1 should be of a vector type (the usual case). */
6473 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6475 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
6477 /* Arguments are ready. Create the new vector stmt. */
6478 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6480 /* For internal defs where we need to use a scalar shift arg
6481 extract the first lane. */
6482 if (scalar_shift_arg
&& dt
[1] == vect_internal_def
)
6484 vop1
= vec_oprnds1
[0];
6485 new_temp
= make_ssa_name (TREE_TYPE (TREE_TYPE (vop1
)));
6487 = gimple_build_assign (new_temp
,
6488 build3 (BIT_FIELD_REF
, TREE_TYPE (new_temp
),
6490 TYPE_SIZE (TREE_TYPE (new_temp
)),
6491 bitsize_zero_node
));
6492 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6496 vop1
= vec_oprnds1
[i
];
6497 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
6498 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6499 gimple_assign_set_lhs (new_stmt
, new_temp
);
6500 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6502 slp_node
->push_vec_def (new_stmt
);
6504 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6508 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6510 vec_oprnds0
.release ();
6511 vec_oprnds1
.release ();
6516 /* Function vectorizable_operation.
6518 Check if STMT_INFO performs a binary, unary or ternary operation that can
6520 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6521 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6522 Return true if STMT_INFO is vectorizable in this way. */
6525 vectorizable_operation (vec_info
*vinfo
,
6526 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6527 gimple
**vec_stmt
, slp_tree slp_node
,
6528 stmt_vector_for_cost
*cost_vec
)
6532 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
6534 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6535 enum tree_code code
, orig_code
;
6536 machine_mode vec_mode
;
6540 bool target_support_p
;
6541 enum vect_def_type dt
[3]
6542 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
6544 poly_uint64 nunits_in
;
6545 poly_uint64 nunits_out
;
6547 int ncopies
, vec_num
;
6549 vec
<tree
> vec_oprnds0
= vNULL
;
6550 vec
<tree
> vec_oprnds1
= vNULL
;
6551 vec
<tree
> vec_oprnds2
= vNULL
;
6552 tree vop0
, vop1
, vop2
;
6553 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6555 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6558 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6562 /* Is STMT a vectorizable binary/unary operation? */
6563 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6567 /* Loads and stores are handled in vectorizable_{load,store}. */
6568 if (STMT_VINFO_DATA_REF (stmt_info
))
6571 orig_code
= code
= gimple_assign_rhs_code (stmt
);
6573 /* Shifts are handled in vectorizable_shift. */
6574 if (code
== LSHIFT_EXPR
6575 || code
== RSHIFT_EXPR
6576 || code
== LROTATE_EXPR
6577 || code
== RROTATE_EXPR
)
6580 /* Comparisons are handled in vectorizable_comparison. */
6581 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
6584 /* Conditions are handled in vectorizable_condition. */
6585 if (code
== COND_EXPR
)
6588 /* For pointer addition and subtraction, we should use the normal
6589 plus and minus for the vector operation. */
6590 if (code
== POINTER_PLUS_EXPR
)
6592 if (code
== POINTER_DIFF_EXPR
)
6595 /* Support only unary or binary operations. */
6596 op_type
= TREE_CODE_LENGTH (code
);
6597 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
6599 if (dump_enabled_p ())
6600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6601 "num. args = %d (not unary/binary/ternary op).\n",
6606 scalar_dest
= gimple_assign_lhs (stmt
);
6607 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
6609 /* Most operations cannot handle bit-precision types without extra
6611 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
6613 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
6614 /* Exception are bitwise binary operations. */
6615 && code
!= BIT_IOR_EXPR
6616 && code
!= BIT_XOR_EXPR
6617 && code
!= BIT_AND_EXPR
)
6619 if (dump_enabled_p ())
6620 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6621 "bit-precision arithmetic not supported.\n");
6626 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6627 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
6629 if (dump_enabled_p ())
6630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6631 "use not simple.\n");
6634 bool is_invariant
= (dt
[0] == vect_external_def
6635 || dt
[0] == vect_constant_def
);
6636 /* If op0 is an external or constant def, infer the vector type
6637 from the scalar type. */
6640 /* For boolean type we cannot determine vectype by
6641 invariant value (don't know whether it is a vector
6642 of booleans or vector of integers). We use output
6643 vectype because operations on boolean don't change
6645 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
6647 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
6649 if (dump_enabled_p ())
6650 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6651 "not supported operation on bool value.\n");
6654 vectype
= vectype_out
;
6657 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
6661 gcc_assert (vectype
);
6664 if (dump_enabled_p ())
6665 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6666 "no vectype for scalar type %T\n",
6672 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
6673 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
6674 if (maybe_ne (nunits_out
, nunits_in
)
6675 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
), TREE_TYPE (vectype
)))
6678 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
6679 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
6680 if (op_type
== binary_op
|| op_type
== ternary_op
)
6682 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6683 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
6685 if (dump_enabled_p ())
6686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6687 "use not simple.\n");
6690 is_invariant
&= (dt
[1] == vect_external_def
6691 || dt
[1] == vect_constant_def
);
6693 && (maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype2
))
6694 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
),
6695 TREE_TYPE (vectype2
))))
6698 if (op_type
== ternary_op
)
6700 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
6701 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
6703 if (dump_enabled_p ())
6704 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6705 "use not simple.\n");
6708 is_invariant
&= (dt
[2] == vect_external_def
6709 || dt
[2] == vect_constant_def
);
6711 && (maybe_ne (nunits_out
, TYPE_VECTOR_SUBPARTS (vectype3
))
6712 || !tree_nop_conversion_p (TREE_TYPE (vectype_out
),
6713 TREE_TYPE (vectype3
))))
6717 /* Multiple types in SLP are handled by creating the appropriate number of
6718 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6723 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6727 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6731 gcc_assert (ncopies
>= 1);
6733 /* Reject attempts to combine mask types with nonmask types, e.g. if
6734 we have an AND between a (nonmask) boolean loaded from memory and
6735 a (mask) boolean result of a comparison.
6737 TODO: We could easily fix these cases up using pattern statements. */
6738 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
6739 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
6740 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
6742 if (dump_enabled_p ())
6743 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6744 "mixed mask and nonmask vector types\n");
6748 /* Supportable by target? */
6750 vec_mode
= TYPE_MODE (vectype
);
6751 if (code
== MULT_HIGHPART_EXPR
)
6752 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
6755 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6758 if (dump_enabled_p ())
6759 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6763 target_support_p
= (optab_handler (optab
, vec_mode
) != CODE_FOR_nothing
6764 || optab_libfunc (optab
, vec_mode
));
6767 bool using_emulated_vectors_p
= vect_emulated_vector_p (vectype
);
6768 if (!target_support_p
|| using_emulated_vectors_p
)
6770 if (dump_enabled_p ())
6771 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6772 "op not supported by target.\n");
6773 /* When vec_mode is not a vector mode and we verified ops we
6774 do not have to lower like AND are natively supported let
6775 those through even when the mode isn't word_mode. For
6776 ops we have to lower the lowering code assumes we are
6777 dealing with word_mode. */
6778 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype
))
6779 || (((code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
)
6780 || !target_support_p
)
6781 && maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
))
6782 /* Check only during analysis. */
6783 || (!vec_stmt
&& !vect_can_vectorize_without_simd_p (code
)))
6785 if (dump_enabled_p ())
6786 dump_printf (MSG_NOTE
, "using word mode not possible.\n");
6789 if (dump_enabled_p ())
6790 dump_printf_loc (MSG_NOTE
, vect_location
,
6791 "proceeding using word mode.\n");
6792 using_emulated_vectors_p
= true;
6795 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
6796 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
6797 vec_loop_lens
*lens
= (loop_vinfo
? &LOOP_VINFO_LENS (loop_vinfo
) : NULL
);
6798 internal_fn cond_fn
= get_conditional_internal_fn (code
);
6799 internal_fn cond_len_fn
= get_conditional_len_internal_fn (code
);
6801 /* If operating on inactive elements could generate spurious traps,
6802 we need to restrict the operation to active lanes. Note that this
6803 specifically doesn't apply to unhoisted invariants, since they
6804 operate on the same value for every lane.
6806 Similarly, if this operation is part of a reduction, a fully-masked
6807 loop should only change the active lanes of the reduction chain,
6808 keeping the inactive lanes as-is. */
6809 bool mask_out_inactive
= ((!is_invariant
&& gimple_could_trap_p (stmt
))
6812 if (!vec_stmt
) /* transformation not required. */
6815 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
)
6816 && mask_out_inactive
)
6818 if (cond_len_fn
!= IFN_LAST
6819 && direct_internal_fn_supported_p (cond_len_fn
, vectype
,
6820 OPTIMIZE_FOR_SPEED
))
6821 vect_record_loop_len (loop_vinfo
, lens
, ncopies
* vec_num
, vectype
,
6823 else if (cond_fn
!= IFN_LAST
6824 && direct_internal_fn_supported_p (cond_fn
, vectype
,
6825 OPTIMIZE_FOR_SPEED
))
6826 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
6830 if (dump_enabled_p ())
6831 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6832 "can't use a fully-masked loop because no"
6833 " conditional operation is available.\n");
6834 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
6838 /* Put types on constant and invariant SLP children. */
6840 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
6841 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
6842 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
6844 if (dump_enabled_p ())
6845 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6846 "incompatible vector types for invariants\n");
6850 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
6851 DUMP_VECT_SCOPE ("vectorizable_operation");
6852 vect_model_simple_cost (vinfo
, stmt_info
,
6853 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
6854 if (using_emulated_vectors_p
)
6856 /* The above vect_model_simple_cost call handles constants
6857 in the prologue and (mis-)costs one of the stmts as
6858 vector stmt. See below for the actual lowering that will
6861 = slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
;
6874 /* Bit operations do not have extra cost and are accounted
6875 as vector stmt by vect_model_simple_cost. */
6881 /* We also need to materialize two large constants. */
6882 record_stmt_cost (cost_vec
, 2, scalar_stmt
, stmt_info
,
6884 record_stmt_cost (cost_vec
, n
, scalar_stmt
, stmt_info
,
6893 if (dump_enabled_p ())
6894 dump_printf_loc (MSG_NOTE
, vect_location
,
6895 "transform binary/unary operation.\n");
6897 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
6898 bool len_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
6900 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
6901 vectors with unsigned elements, but the result is signed. So, we
6902 need to compute the MINUS_EXPR into vectype temporary and
6903 VIEW_CONVERT_EXPR it into the final vectype_out result. */
6904 tree vec_cvt_dest
= NULL_TREE
;
6905 if (orig_code
== POINTER_DIFF_EXPR
)
6907 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
6908 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6912 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
6914 /* In case the vectorization factor (VF) is bigger than the number
6915 of elements that we can fit in a vectype (nunits), we have to generate
6916 more than one vector stmt - i.e - we need to "unroll" the
6917 vector stmt by a factor VF/nunits. In doing so, we record a pointer
6918 from one copy of the vector stmt to the next, in the field
6919 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
6920 stages to find the correct vector defs to be used when vectorizing
6921 stmts that use the defs of the current stmt. The example below
6922 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
6923 we need to create 4 vectorized stmts):
6925 before vectorization:
6926 RELATED_STMT VEC_STMT
6930 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
6932 RELATED_STMT VEC_STMT
6933 VS1_0: vx0 = memref0 VS1_1 -
6934 VS1_1: vx1 = memref1 VS1_2 -
6935 VS1_2: vx2 = memref2 VS1_3 -
6936 VS1_3: vx3 = memref3 - -
6937 S1: x = load - VS1_0
6940 step2: vectorize stmt S2 (done here):
6941 To vectorize stmt S2 we first need to find the relevant vector
6942 def for the first operand 'x'. This is, as usual, obtained from
6943 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6944 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6945 relevant vector def 'vx0'. Having found 'vx0' we can generate
6946 the vector stmt VS2_0, and as usual, record it in the
6947 STMT_VINFO_VEC_STMT of stmt S2.
6948 When creating the second copy (VS2_1), we obtain the relevant vector
6949 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6950 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6951 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6952 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6953 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6954 chain of stmts and pointers:
6955 RELATED_STMT VEC_STMT
6956 VS1_0: vx0 = memref0 VS1_1 -
6957 VS1_1: vx1 = memref1 VS1_2 -
6958 VS1_2: vx2 = memref2 VS1_3 -
6959 VS1_3: vx3 = memref3 - -
6960 S1: x = load - VS1_0
6961 VS2_0: vz0 = vx0 + v1 VS2_1 -
6962 VS2_1: vz1 = vx1 + v1 VS2_2 -
6963 VS2_2: vz2 = vx2 + v1 VS2_3 -
6964 VS2_3: vz3 = vx3 + v1 - -
6965 S2: z = x + 1 - VS2_0 */
6967 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
6968 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
6969 /* Arguments are ready. Create the new vector stmt. */
6970 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6972 gimple
*new_stmt
= NULL
;
6973 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6974 ? vec_oprnds1
[i
] : NULL_TREE
);
6975 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
6976 if (using_emulated_vectors_p
6977 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== NEGATE_EXPR
))
6979 /* Lower the operation. This follows vector lowering. */
6980 unsigned int width
= vector_element_bits (vectype
);
6981 tree inner_type
= TREE_TYPE (vectype
);
6983 = build_nonstandard_integer_type (GET_MODE_BITSIZE (word_mode
), 1);
6984 HOST_WIDE_INT max
= GET_MODE_MASK (TYPE_MODE (inner_type
));
6985 tree low_bits
= build_replicated_int_cst (word_type
, width
, max
>> 1);
6987 = build_replicated_int_cst (word_type
, width
, max
& ~(max
>> 1));
6988 tree wvop0
= make_ssa_name (word_type
);
6989 new_stmt
= gimple_build_assign (wvop0
, VIEW_CONVERT_EXPR
,
6990 build1 (VIEW_CONVERT_EXPR
,
6992 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6993 tree result_low
, signs
;
6994 if (code
== PLUS_EXPR
|| code
== MINUS_EXPR
)
6996 tree wvop1
= make_ssa_name (word_type
);
6997 new_stmt
= gimple_build_assign (wvop1
, VIEW_CONVERT_EXPR
,
6998 build1 (VIEW_CONVERT_EXPR
,
7000 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7001 signs
= make_ssa_name (word_type
);
7002 new_stmt
= gimple_build_assign (signs
,
7003 BIT_XOR_EXPR
, wvop0
, wvop1
);
7004 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7005 tree b_low
= make_ssa_name (word_type
);
7006 new_stmt
= gimple_build_assign (b_low
,
7007 BIT_AND_EXPR
, wvop1
, low_bits
);
7008 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7009 tree a_low
= make_ssa_name (word_type
);
7010 if (code
== PLUS_EXPR
)
7011 new_stmt
= gimple_build_assign (a_low
,
7012 BIT_AND_EXPR
, wvop0
, low_bits
);
7014 new_stmt
= gimple_build_assign (a_low
,
7015 BIT_IOR_EXPR
, wvop0
, high_bits
);
7016 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7017 if (code
== MINUS_EXPR
)
7019 new_stmt
= gimple_build_assign (NULL_TREE
,
7020 BIT_NOT_EXPR
, signs
);
7021 signs
= make_ssa_name (word_type
);
7022 gimple_assign_set_lhs (new_stmt
, signs
);
7023 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7025 new_stmt
= gimple_build_assign (NULL_TREE
,
7026 BIT_AND_EXPR
, signs
, high_bits
);
7027 signs
= make_ssa_name (word_type
);
7028 gimple_assign_set_lhs (new_stmt
, signs
);
7029 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7030 result_low
= make_ssa_name (word_type
);
7031 new_stmt
= gimple_build_assign (result_low
, code
, a_low
, b_low
);
7032 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7036 tree a_low
= make_ssa_name (word_type
);
7037 new_stmt
= gimple_build_assign (a_low
,
7038 BIT_AND_EXPR
, wvop0
, low_bits
);
7039 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7040 signs
= make_ssa_name (word_type
);
7041 new_stmt
= gimple_build_assign (signs
, BIT_NOT_EXPR
, wvop0
);
7042 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7043 new_stmt
= gimple_build_assign (NULL_TREE
,
7044 BIT_AND_EXPR
, signs
, high_bits
);
7045 signs
= make_ssa_name (word_type
);
7046 gimple_assign_set_lhs (new_stmt
, signs
);
7047 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7048 result_low
= make_ssa_name (word_type
);
7049 new_stmt
= gimple_build_assign (result_low
,
7050 MINUS_EXPR
, high_bits
, a_low
);
7051 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7053 new_stmt
= gimple_build_assign (NULL_TREE
, BIT_XOR_EXPR
, result_low
,
7055 result_low
= make_ssa_name (word_type
);
7056 gimple_assign_set_lhs (new_stmt
, result_low
);
7057 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7058 new_stmt
= gimple_build_assign (NULL_TREE
, VIEW_CONVERT_EXPR
,
7059 build1 (VIEW_CONVERT_EXPR
,
7060 vectype
, result_low
));
7061 new_temp
= make_ssa_name (vectype
);
7062 gimple_assign_set_lhs (new_stmt
, new_temp
);
7063 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7065 else if ((masked_loop_p
|| len_loop_p
) && mask_out_inactive
)
7069 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7070 vec_num
* ncopies
, vectype
, i
);
7073 mask
= build_minus_one_cst (truth_type_for (vectype
));
7074 auto_vec
<tree
> vops (6);
7075 vops
.quick_push (mask
);
7076 vops
.quick_push (vop0
);
7078 vops
.quick_push (vop1
);
7080 vops
.quick_push (vop2
);
7083 /* Perform the operation on active elements only and take
7084 inactive elements from the reduction chain input. */
7086 vops
.quick_push (reduc_idx
== 1 ? vop1
: vop0
);
7090 auto else_value
= targetm
.preferred_else_value
7091 (cond_fn
, vectype
, vops
.length () - 1, &vops
[1]);
7092 vops
.quick_push (else_value
);
7096 tree len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
7097 vec_num
* ncopies
, vectype
, i
, 1);
7099 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
7100 tree bias
= build_int_cst (intQI_type_node
, biasval
);
7101 vops
.quick_push (len
);
7102 vops
.quick_push (bias
);
7105 = gimple_build_call_internal_vec (masked_loop_p
? cond_fn
7108 new_temp
= make_ssa_name (vec_dest
, call
);
7109 gimple_call_set_lhs (call
, new_temp
);
7110 gimple_call_set_nothrow (call
, true);
7111 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7116 tree mask
= NULL_TREE
;
7117 /* When combining two masks check if either of them is elsewhere
7118 combined with a loop mask, if that's the case we can mark that the
7119 new combined mask doesn't need to be combined with a loop mask. */
7121 && code
== BIT_AND_EXPR
7122 && VECTOR_BOOLEAN_TYPE_P (vectype
))
7124 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op0
,
7127 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7128 vec_num
* ncopies
, vectype
, i
);
7130 vop0
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7134 if (loop_vinfo
->scalar_cond_masked_set
.contains ({ op1
,
7137 mask
= vect_get_loop_mask (loop_vinfo
, gsi
, masks
,
7138 vec_num
* ncopies
, vectype
, i
);
7140 vop1
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
7145 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
7146 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
7147 gimple_assign_set_lhs (new_stmt
, new_temp
);
7148 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7149 if (using_emulated_vectors_p
)
7150 suppress_warning (new_stmt
, OPT_Wvector_operation_performance
);
7152 /* Enter the combined value into the vector cond hash so we don't
7153 AND it with a loop mask again. */
7155 loop_vinfo
->vec_cond_masked_set
.add ({ new_temp
, mask
});
7160 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
7161 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
7163 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
7164 gimple_assign_set_lhs (new_stmt
, new_temp
);
7165 vect_finish_stmt_generation (vinfo
, stmt_info
,
7170 slp_node
->push_vec_def (new_stmt
);
7172 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7176 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7178 vec_oprnds0
.release ();
7179 vec_oprnds1
.release ();
7180 vec_oprnds2
.release ();
7185 /* A helper function to ensure data reference DR_INFO's base alignment. */
7188 ensure_base_align (dr_vec_info
*dr_info
)
7190 /* Alignment is only analyzed for the first element of a DR group,
7191 use that to look at base alignment we need to enforce. */
7192 if (STMT_VINFO_GROUPED_ACCESS (dr_info
->stmt
))
7193 dr_info
= STMT_VINFO_DR_INFO (DR_GROUP_FIRST_ELEMENT (dr_info
->stmt
));
7195 gcc_assert (dr_info
->misalignment
!= DR_MISALIGNMENT_UNINITIALIZED
);
7197 if (dr_info
->base_misaligned
)
7199 tree base_decl
= dr_info
->base_decl
;
7201 // We should only be able to increase the alignment of a base object if
7202 // we know what its new alignment should be at compile time.
7203 unsigned HOST_WIDE_INT align_base_to
=
7204 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
7206 if (decl_in_symtab_p (base_decl
))
7207 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
7208 else if (DECL_ALIGN (base_decl
) < align_base_to
)
7210 SET_DECL_ALIGN (base_decl
, align_base_to
);
7211 DECL_USER_ALIGN (base_decl
) = 1;
7213 dr_info
->base_misaligned
= false;
7218 /* Function get_group_alias_ptr_type.
7220 Return the alias type for the group starting at FIRST_STMT_INFO. */
7223 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
7225 struct data_reference
*first_dr
, *next_dr
;
7227 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
7228 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
7229 while (next_stmt_info
)
7231 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
7232 if (get_alias_set (DR_REF (first_dr
))
7233 != get_alias_set (DR_REF (next_dr
)))
7235 if (dump_enabled_p ())
7236 dump_printf_loc (MSG_NOTE
, vect_location
,
7237 "conflicting alias set types.\n");
7238 return ptr_type_node
;
7240 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7242 return reference_alias_ptr_type (DR_REF (first_dr
));
7246 /* Function scan_operand_equal_p.
7248 Helper function for check_scan_store. Compare two references
7249 with .GOMP_SIMD_LANE bases. */
7252 scan_operand_equal_p (tree ref1
, tree ref2
)
7254 tree ref
[2] = { ref1
, ref2
};
7255 poly_int64 bitsize
[2], bitpos
[2];
7256 tree offset
[2], base
[2];
7257 for (int i
= 0; i
< 2; ++i
)
7260 int unsignedp
, reversep
, volatilep
= 0;
7261 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
7262 &offset
[i
], &mode
, &unsignedp
,
7263 &reversep
, &volatilep
);
7264 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
7266 if (TREE_CODE (base
[i
]) == MEM_REF
7267 && offset
[i
] == NULL_TREE
7268 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
7270 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
7271 if (is_gimple_assign (def_stmt
)
7272 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
7273 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
7274 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
7276 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
7278 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
7279 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
7284 if (!operand_equal_p (base
[0], base
[1], 0))
7286 if (maybe_ne (bitsize
[0], bitsize
[1]))
7288 if (offset
[0] != offset
[1])
7290 if (!offset
[0] || !offset
[1])
7292 if (!operand_equal_p (offset
[0], offset
[1], 0))
7295 for (int i
= 0; i
< 2; ++i
)
7297 step
[i
] = integer_one_node
;
7298 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7300 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7301 if (is_gimple_assign (def_stmt
)
7302 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
7303 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
7306 step
[i
] = gimple_assign_rhs2 (def_stmt
);
7307 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
7310 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
7312 step
[i
] = TREE_OPERAND (offset
[i
], 1);
7313 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
7315 tree rhs1
= NULL_TREE
;
7316 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
7318 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
7319 if (gimple_assign_cast_p (def_stmt
))
7320 rhs1
= gimple_assign_rhs1 (def_stmt
);
7322 else if (CONVERT_EXPR_P (offset
[i
]))
7323 rhs1
= TREE_OPERAND (offset
[i
], 0);
7325 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
7326 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
7327 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
7328 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
7331 if (!operand_equal_p (offset
[0], offset
[1], 0)
7332 || !operand_equal_p (step
[0], step
[1], 0))
7340 enum scan_store_kind
{
7341 /* Normal permutation. */
7342 scan_store_kind_perm
,
7344 /* Whole vector left shift permutation with zero init. */
7345 scan_store_kind_lshift_zero
,
7347 /* Whole vector left shift permutation and VEC_COND_EXPR. */
7348 scan_store_kind_lshift_cond
7351 /* Function check_scan_store.
7353 Verify if we can perform the needed permutations or whole vector shifts.
7354 Return -1 on failure, otherwise exact log2 of vectype's nunits.
7355 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
7356 to do at each step. */
7359 scan_store_can_perm_p (tree vectype
, tree init
,
7360 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
7362 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7363 unsigned HOST_WIDE_INT nunits
;
7364 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7366 int units_log2
= exact_log2 (nunits
);
7367 if (units_log2
<= 0)
7371 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
7372 for (i
= 0; i
<= units_log2
; ++i
)
7374 unsigned HOST_WIDE_INT j
, k
;
7375 enum scan_store_kind kind
= scan_store_kind_perm
;
7376 vec_perm_builder
sel (nunits
, nunits
, 1);
7377 sel
.quick_grow (nunits
);
7378 if (i
== units_log2
)
7380 for (j
= 0; j
< nunits
; ++j
)
7381 sel
[j
] = nunits
- 1;
7385 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7387 for (k
= 0; j
< nunits
; ++j
, ++k
)
7388 sel
[j
] = nunits
+ k
;
7390 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7391 if (!can_vec_perm_const_p (vec_mode
, vec_mode
, indices
))
7393 if (i
== units_log2
)
7396 if (whole_vector_shift_kind
== scan_store_kind_perm
)
7398 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
7400 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
7401 /* Whole vector shifts shift in zeros, so if init is all zero
7402 constant, there is no need to do anything further. */
7403 if ((TREE_CODE (init
) != INTEGER_CST
7404 && TREE_CODE (init
) != REAL_CST
)
7405 || !initializer_zerop (init
))
7407 tree masktype
= truth_type_for (vectype
);
7408 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
7410 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
7413 kind
= whole_vector_shift_kind
;
7415 if (use_whole_vector
)
7417 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
7418 use_whole_vector
->safe_grow_cleared (i
, true);
7419 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
7420 use_whole_vector
->safe_push (kind
);
7428 /* Function check_scan_store.
7430 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
7433 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
7434 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
7435 vect_memory_access_type memory_access_type
)
7437 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7438 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7441 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
7444 || memory_access_type
!= VMAT_CONTIGUOUS
7445 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
7446 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
7447 || loop_vinfo
== NULL
7448 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7449 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7450 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
7451 || !integer_zerop (DR_INIT (dr_info
->dr
))
7452 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
7453 || !alias_sets_conflict_p (get_alias_set (vectype
),
7454 get_alias_set (TREE_TYPE (ref_type
))))
7456 if (dump_enabled_p ())
7457 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7458 "unsupported OpenMP scan store.\n");
7462 /* We need to pattern match code built by OpenMP lowering and simplified
7463 by following optimizations into something we can handle.
7464 #pragma omp simd reduction(inscan,+:r)
7468 #pragma omp scan inclusive (r)
7471 shall have body with:
7472 // Initialization for input phase, store the reduction initializer:
7473 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7474 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7476 // Actual input phase:
7478 r.0_5 = D.2042[_20];
7481 // Initialization for scan phase:
7482 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
7488 // Actual scan phase:
7490 r.1_8 = D.2042[_20];
7492 The "omp simd array" variable D.2042 holds the privatized copy used
7493 inside of the loop and D.2043 is another one that holds copies of
7494 the current original list item. The separate GOMP_SIMD_LANE ifn
7495 kinds are there in order to allow optimizing the initializer store
7496 and combiner sequence, e.g. if it is originally some C++ish user
7497 defined reduction, but allow the vectorizer to pattern recognize it
7498 and turn into the appropriate vectorized scan.
7500 For exclusive scan, this is slightly different:
7501 #pragma omp simd reduction(inscan,+:r)
7505 #pragma omp scan exclusive (r)
7508 shall have body with:
7509 // Initialization for input phase, store the reduction initializer:
7510 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
7511 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
7513 // Actual input phase:
7515 r.0_5 = D.2042[_20];
7518 // Initialization for scan phase:
7519 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
7525 // Actual scan phase:
7527 r.1_8 = D.2044[_20];
7530 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
7532 /* Match the D.2042[_21] = 0; store above. Just require that
7533 it is a constant or external definition store. */
7534 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
7537 if (dump_enabled_p ())
7538 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7539 "unsupported OpenMP scan initializer store.\n");
7543 if (! loop_vinfo
->scan_map
)
7544 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
7545 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7546 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
7549 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
7551 /* These stores can be vectorized normally. */
7555 if (rhs_dt
!= vect_internal_def
)
7558 if (dump_enabled_p ())
7559 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7560 "unsupported OpenMP scan combiner pattern.\n");
7564 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7565 tree rhs
= gimple_assign_rhs1 (stmt
);
7566 if (TREE_CODE (rhs
) != SSA_NAME
)
7569 gimple
*other_store_stmt
= NULL
;
7570 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7571 bool inscan_var_store
7572 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7574 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7576 if (!inscan_var_store
)
7578 use_operand_p use_p
;
7579 imm_use_iterator iter
;
7580 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7582 gimple
*use_stmt
= USE_STMT (use_p
);
7583 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7585 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
7586 || !is_gimple_assign (use_stmt
)
7587 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
7589 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
7591 other_store_stmt
= use_stmt
;
7593 if (other_store_stmt
== NULL
)
7595 rhs
= gimple_assign_lhs (other_store_stmt
);
7596 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
7600 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
7602 use_operand_p use_p
;
7603 imm_use_iterator iter
;
7604 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7606 gimple
*use_stmt
= USE_STMT (use_p
);
7607 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7609 if (other_store_stmt
)
7611 other_store_stmt
= use_stmt
;
7617 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7618 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
7619 || !is_gimple_assign (def_stmt
)
7620 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
7623 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7624 /* For pointer addition, we should use the normal plus for the vector
7628 case POINTER_PLUS_EXPR
:
7631 case MULT_HIGHPART_EXPR
:
7636 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
7639 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7640 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7641 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
7644 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7645 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7646 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
7647 || !gimple_assign_load_p (load1_stmt
)
7648 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
7649 || !gimple_assign_load_p (load2_stmt
))
7652 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7653 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7654 if (load1_stmt_info
== NULL
7655 || load2_stmt_info
== NULL
7656 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
7657 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
7658 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
7659 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7662 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
7664 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7665 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
7666 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
7668 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7670 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7674 use_operand_p use_p
;
7675 imm_use_iterator iter
;
7676 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
7678 gimple
*use_stmt
= USE_STMT (use_p
);
7679 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
7681 if (other_store_stmt
)
7683 other_store_stmt
= use_stmt
;
7687 if (other_store_stmt
== NULL
)
7689 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
7690 || !gimple_store_p (other_store_stmt
))
7693 stmt_vec_info other_store_stmt_info
7694 = loop_vinfo
->lookup_stmt (other_store_stmt
);
7695 if (other_store_stmt_info
== NULL
7696 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
7697 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
7700 gimple
*stmt1
= stmt
;
7701 gimple
*stmt2
= other_store_stmt
;
7702 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7703 std::swap (stmt1
, stmt2
);
7704 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7705 gimple_assign_rhs1 (load2_stmt
)))
7707 std::swap (rhs1
, rhs2
);
7708 std::swap (load1_stmt
, load2_stmt
);
7709 std::swap (load1_stmt_info
, load2_stmt_info
);
7711 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
7712 gimple_assign_rhs1 (load1_stmt
)))
7715 tree var3
= NULL_TREE
;
7716 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
7717 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
7718 gimple_assign_rhs1 (load2_stmt
)))
7720 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7722 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7723 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
7724 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
7726 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7727 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
7728 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
7729 || lookup_attribute ("omp simd inscan exclusive",
7730 DECL_ATTRIBUTES (var3
)))
7734 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
7735 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
7736 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
7739 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7740 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
7741 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
7742 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
7743 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7744 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
7747 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7748 std::swap (var1
, var2
);
7750 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
7752 if (!lookup_attribute ("omp simd inscan exclusive",
7753 DECL_ATTRIBUTES (var1
)))
7758 if (loop_vinfo
->scan_map
== NULL
)
7760 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7764 /* The IL is as expected, now check if we can actually vectorize it.
7771 should be vectorized as (where _40 is the vectorized rhs
7772 from the D.2042[_21] = 0; store):
7773 _30 = MEM <vector(8) int> [(int *)&D.2043];
7774 _31 = MEM <vector(8) int> [(int *)&D.2042];
7775 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7777 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
7778 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7780 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7781 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
7782 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7784 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7785 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
7787 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7788 MEM <vector(8) int> [(int *)&D.2043] = _39;
7789 MEM <vector(8) int> [(int *)&D.2042] = _38;
7796 should be vectorized as (where _40 is the vectorized rhs
7797 from the D.2042[_21] = 0; store):
7798 _30 = MEM <vector(8) int> [(int *)&D.2043];
7799 _31 = MEM <vector(8) int> [(int *)&D.2042];
7800 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7801 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
7803 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
7804 // _31[3]+_31[4], ... _31[5]+.._31[6] };
7805 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
7807 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7808 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
7809 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
7811 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
7812 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
7815 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
7816 MEM <vector(8) int> [(int *)&D.2044] = _39;
7817 MEM <vector(8) int> [(int *)&D.2042] = _51; */
7818 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
7819 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
7820 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
7823 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
7824 if (units_log2
== -1)
7831 /* Function vectorizable_scan_store.
7833 Helper of vectorizable_score, arguments like on vectorizable_store.
7834 Handle only the transformation, checking is done in check_scan_store. */
7837 vectorizable_scan_store (vec_info
*vinfo
,
7838 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7839 gimple
**vec_stmt
, int ncopies
)
7841 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
7842 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
7843 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7844 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7846 if (dump_enabled_p ())
7847 dump_printf_loc (MSG_NOTE
, vect_location
,
7848 "transform scan store. ncopies = %d\n", ncopies
);
7850 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
7851 tree rhs
= gimple_assign_rhs1 (stmt
);
7852 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
7854 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
7855 bool inscan_var_store
7856 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
7858 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7860 use_operand_p use_p
;
7861 imm_use_iterator iter
;
7862 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
7864 gimple
*use_stmt
= USE_STMT (use_p
);
7865 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
7867 rhs
= gimple_assign_lhs (use_stmt
);
7872 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
7873 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
7874 if (code
== POINTER_PLUS_EXPR
)
7876 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
7877 && commutative_tree_code (code
));
7878 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
7879 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
7880 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
7881 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
7882 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
7883 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
7884 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
7885 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
7886 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
7887 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
7888 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
7890 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
7892 std::swap (rhs1
, rhs2
);
7893 std::swap (var1
, var2
);
7894 std::swap (load1_dr_info
, load2_dr_info
);
7897 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
7900 unsigned HOST_WIDE_INT nunits
;
7901 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
7903 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
7904 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
7905 gcc_assert (units_log2
> 0);
7906 auto_vec
<tree
, 16> perms
;
7907 perms
.quick_grow (units_log2
+ 1);
7908 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
7909 for (int i
= 0; i
<= units_log2
; ++i
)
7911 unsigned HOST_WIDE_INT j
, k
;
7912 vec_perm_builder
sel (nunits
, nunits
, 1);
7913 sel
.quick_grow (nunits
);
7914 if (i
== units_log2
)
7915 for (j
= 0; j
< nunits
; ++j
)
7916 sel
[j
] = nunits
- 1;
7919 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
7921 for (k
= 0; j
< nunits
; ++j
, ++k
)
7922 sel
[j
] = nunits
+ k
;
7924 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
7925 if (!use_whole_vector
.is_empty ()
7926 && use_whole_vector
[i
] != scan_store_kind_perm
)
7928 if (zero_vec
== NULL_TREE
)
7929 zero_vec
= build_zero_cst (vectype
);
7930 if (masktype
== NULL_TREE
7931 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7932 masktype
= truth_type_for (vectype
);
7933 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
7936 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
7939 tree vec_oprnd1
= NULL_TREE
;
7940 tree vec_oprnd2
= NULL_TREE
;
7941 tree vec_oprnd3
= NULL_TREE
;
7942 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
7943 tree dataref_offset
= build_int_cst (ref_type
, 0);
7944 tree bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
,
7945 vectype
, VMAT_CONTIGUOUS
);
7946 tree ldataref_ptr
= NULL_TREE
;
7947 tree orig
= NULL_TREE
;
7948 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
7949 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
7950 auto_vec
<tree
> vec_oprnds1
;
7951 auto_vec
<tree
> vec_oprnds2
;
7952 auto_vec
<tree
> vec_oprnds3
;
7953 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
7954 *init
, &vec_oprnds1
,
7955 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
7956 rhs2
, &vec_oprnds3
);
7957 for (int j
= 0; j
< ncopies
; j
++)
7959 vec_oprnd1
= vec_oprnds1
[j
];
7960 if (ldataref_ptr
== NULL
)
7961 vec_oprnd2
= vec_oprnds2
[j
];
7962 vec_oprnd3
= vec_oprnds3
[j
];
7965 else if (!inscan_var_store
)
7966 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7970 vec_oprnd2
= make_ssa_name (vectype
);
7971 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
7972 unshare_expr (ldataref_ptr
),
7974 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
7975 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
7976 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7977 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7978 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7981 tree v
= vec_oprnd2
;
7982 for (int i
= 0; i
< units_log2
; ++i
)
7984 tree new_temp
= make_ssa_name (vectype
);
7985 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
7987 && (use_whole_vector
[i
]
7988 != scan_store_kind_perm
))
7989 ? zero_vec
: vec_oprnd1
, v
,
7991 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
7992 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
7993 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7995 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
7997 /* Whole vector shift shifted in zero bits, but if *init
7998 is not initializer_zerop, we need to replace those elements
7999 with elements from vec_oprnd1. */
8000 tree_vector_builder
vb (masktype
, nunits
, 1);
8001 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
8002 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
8003 ? boolean_false_node
: boolean_true_node
);
8005 tree new_temp2
= make_ssa_name (vectype
);
8006 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
8007 new_temp
, vec_oprnd1
);
8008 vect_finish_stmt_generation (vinfo
, stmt_info
,
8010 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8011 new_temp
= new_temp2
;
8014 /* For exclusive scan, perform the perms[i] permutation once
8017 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
8025 tree new_temp2
= make_ssa_name (vectype
);
8026 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
8027 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8028 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8033 tree new_temp
= make_ssa_name (vectype
);
8034 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
8035 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8036 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8038 tree last_perm_arg
= new_temp
;
8039 /* For exclusive scan, new_temp computed above is the exclusive scan
8040 prefix sum. Turn it into inclusive prefix sum for the broadcast
8041 of the last element into orig. */
8042 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
8044 last_perm_arg
= make_ssa_name (vectype
);
8045 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
8046 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8047 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8050 orig
= make_ssa_name (vectype
);
8051 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
8052 last_perm_arg
, perms
[units_log2
]);
8053 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8054 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8056 if (!inscan_var_store
)
8058 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8059 unshare_expr (dataref_ptr
),
8061 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8062 g
= gimple_build_assign (data_ref
, new_temp
);
8063 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8064 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8068 if (inscan_var_store
)
8069 for (int j
= 0; j
< ncopies
; j
++)
8072 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
8074 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
8075 unshare_expr (dataref_ptr
),
8077 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
8078 gimple
*g
= gimple_build_assign (data_ref
, orig
);
8079 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
8080 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
8086 /* Function vectorizable_store.
8088 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
8089 that can be vectorized.
8090 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8091 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8092 Return true if STMT_INFO is vectorizable in this way. */
8095 vectorizable_store (vec_info
*vinfo
,
8096 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8097 gimple
**vec_stmt
, slp_tree slp_node
,
8098 stmt_vector_for_cost
*cost_vec
)
8101 tree vec_oprnd
= NULL_TREE
;
8103 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8104 class loop
*loop
= NULL
;
8105 machine_mode vec_mode
;
8107 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
8108 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8109 tree dataref_ptr
= NULL_TREE
;
8110 tree dataref_offset
= NULL_TREE
;
8111 gimple
*ptr_incr
= NULL
;
8114 stmt_vec_info first_stmt_info
;
8116 unsigned int group_size
, i
;
8117 bool slp
= (slp_node
!= NULL
);
8118 unsigned int vec_num
;
8119 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8121 gather_scatter_info gs_info
;
8123 vec_load_store_type vls_type
;
8126 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8129 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8133 /* Is vectorizable store? */
8135 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8136 slp_tree mask_node
= NULL
;
8137 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8139 tree scalar_dest
= gimple_assign_lhs (assign
);
8140 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
8141 && is_pattern_stmt_p (stmt_info
))
8142 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
8143 if (TREE_CODE (scalar_dest
) != ARRAY_REF
8144 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
8145 && TREE_CODE (scalar_dest
) != INDIRECT_REF
8146 && TREE_CODE (scalar_dest
) != COMPONENT_REF
8147 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
8148 && TREE_CODE (scalar_dest
) != REALPART_EXPR
8149 && TREE_CODE (scalar_dest
) != MEM_REF
)
8154 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8155 if (!call
|| !gimple_call_internal_p (call
))
8158 internal_fn ifn
= gimple_call_internal_fn (call
);
8159 if (!internal_store_fn_p (ifn
))
8162 int mask_index
= internal_fn_mask_index (ifn
);
8163 if (mask_index
>= 0 && slp_node
)
8164 mask_index
= vect_slp_child_index_for_operand
8165 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8167 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
8168 &mask
, &mask_node
, &mask_dt
,
8173 /* Cannot have hybrid store SLP -- that would mean storing to the
8174 same location twice. */
8175 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
8177 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
8178 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8182 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8183 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8188 /* Multiple types in SLP are handled by creating the appropriate number of
8189 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8194 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8196 gcc_assert (ncopies
>= 1);
8198 /* FORNOW. This restriction should be relaxed. */
8199 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
8201 if (dump_enabled_p ())
8202 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8203 "multiple types in nested loop.\n");
8209 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
8210 &op
, &op_node
, &rhs_dt
, &rhs_vectype
, &vls_type
))
8213 elem_type
= TREE_TYPE (vectype
);
8214 vec_mode
= TYPE_MODE (vectype
);
8216 if (!STMT_VINFO_DATA_REF (stmt_info
))
8219 vect_memory_access_type memory_access_type
;
8220 enum dr_alignment_support alignment_support_scheme
;
8223 internal_fn lanes_ifn
;
8224 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, vls_type
,
8225 ncopies
, &memory_access_type
, &poffset
,
8226 &alignment_support_scheme
, &misalignment
, &gs_info
,
8232 if (memory_access_type
== VMAT_CONTIGUOUS
)
8234 if (!VECTOR_MODE_P (vec_mode
)
8235 || !can_vec_mask_load_store_p (vec_mode
,
8236 TYPE_MODE (mask_vectype
), false))
8239 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8240 && (memory_access_type
!= VMAT_GATHER_SCATTER
8241 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
8243 if (dump_enabled_p ())
8244 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8245 "unsupported access type for masked store.\n");
8248 else if (memory_access_type
== VMAT_GATHER_SCATTER
8249 && gs_info
.ifn
== IFN_LAST
8252 if (dump_enabled_p ())
8253 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8254 "unsupported masked emulated scatter.\n");
8260 /* FORNOW. In some cases can vectorize even if data-type not supported
8261 (e.g. - array initialization with 0). */
8262 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
8266 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8267 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
8268 && memory_access_type
!= VMAT_GATHER_SCATTER
8269 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
8272 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8273 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8274 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8278 first_stmt_info
= stmt_info
;
8279 first_dr_info
= dr_info
;
8280 group_size
= vec_num
= 1;
8283 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
8285 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
8286 memory_access_type
))
8290 bool costing_p
= !vec_stmt
;
8291 if (costing_p
) /* transformation not required. */
8293 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8296 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
8297 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
8298 vls_type
, group_size
,
8299 memory_access_type
, &gs_info
,
8303 && (!vect_maybe_update_slp_op_vectype (op_node
, vectype
)
8305 && !vect_maybe_update_slp_op_vectype (mask_node
,
8308 if (dump_enabled_p ())
8309 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8310 "incompatible vector types for invariants\n");
8314 if (dump_enabled_p ()
8315 && memory_access_type
!= VMAT_ELEMENTWISE
8316 && memory_access_type
!= VMAT_GATHER_SCATTER
8317 && alignment_support_scheme
!= dr_aligned
)
8318 dump_printf_loc (MSG_NOTE
, vect_location
,
8319 "Vectorizing an unaligned access.\n");
8321 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
8323 /* As function vect_transform_stmt shows, for interleaving stores
8324 the whole chain is vectorized when the last store in the chain
8325 is reached, the other stores in the group are skipped. So we
8326 want to only cost the last one here, but it's not trivial to
8327 get the last, as it's equivalent to use the first one for
8328 costing, use the first one instead. */
8331 && first_stmt_info
!= stmt_info
)
8334 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8338 ensure_base_align (dr_info
);
8340 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
8342 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
);
8346 unsigned int inside_cost
= 0, prologue_cost
= 0;
8347 if (vls_type
== VLS_STORE_INVARIANT
)
8348 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8349 stmt_info
, 0, vect_prologue
);
8350 vect_get_store_cost (vinfo
, stmt_info
, ncopies
,
8351 alignment_support_scheme
, misalignment
,
8352 &inside_cost
, cost_vec
);
8354 if (dump_enabled_p ())
8355 dump_printf_loc (MSG_NOTE
, vect_location
,
8356 "vect_model_store_cost: inside_cost = %d, "
8357 "prologue_cost = %d .\n",
8358 inside_cost
, prologue_cost
);
8362 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
8368 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
8372 grouped_store
= false;
8373 /* VEC_NUM is the number of vect stmts to be created for this
8375 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8376 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8377 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
8378 == first_stmt_info
);
8379 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8380 op
= vect_get_store_rhs (first_stmt_info
);
8383 /* VEC_NUM is the number of vect stmts to be created for this
8385 vec_num
= group_size
;
8387 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8390 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8392 if (!costing_p
&& dump_enabled_p ())
8393 dump_printf_loc (MSG_NOTE
, vect_location
, "transform store. ncopies = %d\n",
8396 /* Check if we need to update prologue cost for invariant,
8397 and update it accordingly if so. If it's not for
8398 interleaving store, we can just check vls_type; but if
8399 it's for interleaving store, need to check the def_type
8400 of the stored value since the current vls_type is just
8401 for first_stmt_info. */
8402 auto update_prologue_cost
= [&](unsigned *prologue_cost
, tree store_rhs
)
8404 gcc_assert (costing_p
);
8409 gcc_assert (store_rhs
);
8410 enum vect_def_type cdt
;
8411 gcc_assert (vect_is_simple_use (store_rhs
, vinfo
, &cdt
));
8412 if (cdt
!= vect_constant_def
&& cdt
!= vect_external_def
)
8415 else if (vls_type
!= VLS_STORE_INVARIANT
)
8417 *prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
,
8421 if (memory_access_type
== VMAT_ELEMENTWISE
8422 || memory_access_type
== VMAT_STRIDED_SLP
)
8424 unsigned inside_cost
= 0, prologue_cost
= 0;
8425 gimple_stmt_iterator incr_gsi
;
8431 tree stride_base
, stride_step
, alias_off
;
8432 tree vec_oprnd
= NULL_TREE
;
8435 /* Checked by get_load_store_type. */
8436 unsigned int const_nunits
= nunits
.to_constant ();
8438 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8439 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
8441 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8443 = fold_build_pointer_plus
8444 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8445 size_binop (PLUS_EXPR
,
8446 convert_to_ptrofftype (dr_offset
),
8447 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8448 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8450 /* For a store with loop-invariant (but other than power-of-2)
8451 stride (i.e. not a grouped access) like so:
8453 for (i = 0; i < n; i += stride)
8456 we generate a new induction variable and new stores from
8457 the components of the (vectorized) rhs:
8459 for (j = 0; ; j += VF*stride)
8464 array[j + stride] = tmp2;
8468 unsigned nstores
= const_nunits
;
8470 tree ltype
= elem_type
;
8471 tree lvectype
= vectype
;
8474 if (group_size
< const_nunits
8475 && const_nunits
% group_size
== 0)
8477 nstores
= const_nunits
/ group_size
;
8479 ltype
= build_vector_type (elem_type
, group_size
);
8482 /* First check if vec_extract optab doesn't support extraction
8483 of vector elts directly. */
8484 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
8486 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
8487 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
8488 group_size
).exists (&vmode
)
8489 || (convert_optab_handler (vec_extract_optab
,
8490 TYPE_MODE (vectype
), vmode
)
8491 == CODE_FOR_nothing
))
8493 /* Try to avoid emitting an extract of vector elements
8494 by performing the extracts using an integer type of the
8495 same size, extracting from a vector of those and then
8496 re-interpreting it as the original vector type if
8499 = group_size
* GET_MODE_BITSIZE (elmode
);
8500 unsigned int lnunits
= const_nunits
/ group_size
;
8501 /* If we can't construct such a vector fall back to
8502 element extracts from the original vector type and
8503 element size stores. */
8504 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
8505 && VECTOR_MODE_P (TYPE_MODE (vectype
))
8506 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
8507 lnunits
).exists (&vmode
)
8508 && (convert_optab_handler (vec_extract_optab
,
8510 != CODE_FOR_nothing
))
8514 ltype
= build_nonstandard_integer_type (lsize
, 1);
8515 lvectype
= build_vector_type (ltype
, nstores
);
8517 /* Else fall back to vector extraction anyway.
8518 Fewer stores are more important than avoiding spilling
8519 of the vector we extract from. Compared to the
8520 construction case in vectorizable_load no store-forwarding
8521 issue exists here for reasonable archs. */
8524 else if (group_size
>= const_nunits
8525 && group_size
% const_nunits
== 0)
8527 int mis_align
= dr_misalignment (first_dr_info
, vectype
);
8528 dr_alignment_support dr_align
8529 = vect_supportable_dr_alignment (vinfo
, dr_info
, vectype
,
8531 if (dr_align
== dr_aligned
8532 || dr_align
== dr_unaligned_supported
)
8535 lnel
= const_nunits
;
8538 alignment_support_scheme
= dr_align
;
8539 misalignment
= mis_align
;
8542 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
8543 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8548 ivstep
= stride_step
;
8549 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
8550 build_int_cst (TREE_TYPE (ivstep
), vf
));
8552 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8554 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8555 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8556 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
, loop
, &incr_gsi
,
8557 insert_after
, &offvar
, NULL
);
8558 incr
= gsi_stmt (incr_gsi
);
8560 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8563 alias_off
= build_int_cst (ref_type
, 0);
8564 stmt_vec_info next_stmt_info
= first_stmt_info
;
8565 auto_vec
<tree
> vec_oprnds
;
8566 /* For costing some adjacent vector stores, we'd like to cost with
8567 the total number of them once instead of cost each one by one. */
8568 unsigned int n_adjacent_stores
= 0;
8569 for (g
= 0; g
< group_size
; g
++)
8571 running_off
= offvar
;
8576 tree size
= TYPE_SIZE_UNIT (ltype
);
8578 = fold_build2 (MULT_EXPR
, sizetype
, size_int (g
), size
);
8579 tree newoff
= copy_ssa_name (running_off
, NULL
);
8580 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8582 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8583 running_off
= newoff
;
8587 op
= vect_get_store_rhs (next_stmt_info
);
8589 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
, op
,
8592 update_prologue_cost (&prologue_cost
, op
);
8593 unsigned int group_el
= 0;
8594 unsigned HOST_WIDE_INT
8595 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8596 for (j
= 0; j
< ncopies
; j
++)
8600 vec_oprnd
= vec_oprnds
[j
];
8601 /* Pun the vector to extract from if necessary. */
8602 if (lvectype
!= vectype
)
8604 tree tem
= make_ssa_name (lvectype
);
8606 = build1 (VIEW_CONVERT_EXPR
, lvectype
, vec_oprnd
);
8607 gimple
*pun
= gimple_build_assign (tem
, cvt
);
8608 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
8612 for (i
= 0; i
< nstores
; i
++)
8616 /* Only need vector extracting when there are more
8620 += record_stmt_cost (cost_vec
, 1, vec_to_scalar
,
8621 stmt_info
, 0, vect_body
);
8622 /* Take a single lane vector type store as scalar
8623 store to avoid ICE like 110776. */
8624 if (VECTOR_TYPE_P (ltype
)
8625 && known_ne (TYPE_VECTOR_SUBPARTS (ltype
), 1U))
8626 n_adjacent_stores
++;
8629 += record_stmt_cost (cost_vec
, 1, scalar_store
,
8630 stmt_info
, 0, vect_body
);
8633 tree newref
, newoff
;
8634 gimple
*incr
, *assign
;
8635 tree size
= TYPE_SIZE (ltype
);
8636 /* Extract the i'th component. */
8637 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
8638 bitsize_int (i
), size
);
8639 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
8642 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
8646 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8648 newref
= build2 (MEM_REF
, ltype
,
8649 running_off
, this_off
);
8650 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
8652 /* And store it to *running_off. */
8653 assign
= gimple_build_assign (newref
, elem
);
8654 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
8658 || group_el
== group_size
)
8660 newoff
= copy_ssa_name (running_off
, NULL
);
8661 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8662 running_off
, stride_step
);
8663 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8665 running_off
= newoff
;
8668 if (g
== group_size
- 1
8671 if (j
== 0 && i
== 0)
8673 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
8677 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8678 vec_oprnds
.truncate(0);
8685 if (n_adjacent_stores
> 0)
8686 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8687 alignment_support_scheme
, misalignment
,
8688 &inside_cost
, cost_vec
);
8689 if (dump_enabled_p ())
8690 dump_printf_loc (MSG_NOTE
, vect_location
,
8691 "vect_model_store_cost: inside_cost = %d, "
8692 "prologue_cost = %d .\n",
8693 inside_cost
, prologue_cost
);
8699 gcc_assert (alignment_support_scheme
);
8700 vec_loop_masks
*loop_masks
8701 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8702 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8704 vec_loop_lens
*loop_lens
8705 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
8706 ? &LOOP_VINFO_LENS (loop_vinfo
)
8709 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
8710 are some difference here. We cannot enable both the lens and masks
8711 during transform but it is allowed during analysis.
8712 Shouldn't go with length-based approach if fully masked. */
8713 if (cost_vec
== NULL
)
8714 /* The cost_vec is NULL during transfrom. */
8715 gcc_assert ((!loop_lens
|| !loop_masks
));
8717 /* Targets with store-lane instructions must not require explicit
8718 realignment. vect_supportable_dr_alignment always returns either
8719 dr_aligned or dr_unaligned_supported for masked operations. */
8720 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8723 || alignment_support_scheme
== dr_aligned
8724 || alignment_support_scheme
== dr_unaligned_supported
);
8726 tree offset
= NULL_TREE
;
8727 if (!known_eq (poffset
, 0))
8728 offset
= size_int (poffset
);
8731 tree vec_offset
= NULL_TREE
;
8732 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8734 aggr_type
= NULL_TREE
;
8737 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8739 aggr_type
= elem_type
;
8741 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
8742 &bump
, &vec_offset
, loop_lens
);
8746 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8747 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8749 aggr_type
= vectype
;
8750 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
8751 memory_access_type
, loop_lens
);
8754 if (mask
&& !costing_p
)
8755 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
8757 /* In case the vectorization factor (VF) is bigger than the number
8758 of elements that we can fit in a vectype (nunits), we have to generate
8759 more than one vector stmt - i.e - we need to "unroll" the
8760 vector stmt by a factor VF/nunits. */
8762 /* In case of interleaving (non-unit grouped access):
8769 We create vectorized stores starting from base address (the access of the
8770 first stmt in the chain (S2 in the above example), when the last store stmt
8771 of the chain (S4) is reached:
8774 VS2: &base + vec_size*1 = vx0
8775 VS3: &base + vec_size*2 = vx1
8776 VS4: &base + vec_size*3 = vx3
8778 Then permutation statements are generated:
8780 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
8781 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
8784 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8785 (the order of the data-refs in the output of vect_permute_store_chain
8786 corresponds to the order of scalar stmts in the interleaving chain - see
8787 the documentation of vect_permute_store_chain()).
8789 In case of both multiple types and interleaving, above vector stores and
8790 permutation stmts are created for every copy. The result vector stmts are
8791 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
8792 STMT_VINFO_RELATED_STMT for the next copies.
8795 auto_vec
<tree
> dr_chain (group_size
);
8796 auto_vec
<tree
> vec_masks
;
8797 tree vec_mask
= NULL
;
8798 auto_delete_vec
<auto_vec
<tree
>> gvec_oprnds (group_size
);
8799 for (i
= 0; i
< group_size
; i
++)
8800 gvec_oprnds
.quick_push (new auto_vec
<tree
> ());
8802 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8804 gcc_assert (!slp
&& grouped_store
);
8805 unsigned inside_cost
= 0, prologue_cost
= 0;
8806 /* For costing some adjacent vector stores, we'd like to cost with
8807 the total number of them once instead of cost each one by one. */
8808 unsigned int n_adjacent_stores
= 0;
8809 for (j
= 0; j
< ncopies
; j
++)
8814 /* For interleaved stores we collect vectorized defs for all
8815 the stores in the group in DR_CHAIN. DR_CHAIN is then used
8816 as an input to vect_permute_store_chain(). */
8817 stmt_vec_info next_stmt_info
= first_stmt_info
;
8818 for (i
= 0; i
< group_size
; i
++)
8820 /* Since gaps are not supported for interleaved stores,
8821 DR_GROUP_SIZE is the exact number of stmts in the
8822 chain. Therefore, NEXT_STMT_INFO can't be NULL_TREE. */
8823 op
= vect_get_store_rhs (next_stmt_info
);
8825 update_prologue_cost (&prologue_cost
, op
);
8828 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
8831 vec_oprnd
= (*gvec_oprnds
[i
])[0];
8832 dr_chain
.quick_push (vec_oprnd
);
8834 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8841 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
8844 vec_mask
= vec_masks
[0];
8847 /* We should have catched mismatched types earlier. */
8849 useless_type_conversion_p (vectype
, TREE_TYPE (vec_oprnd
)));
8851 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
8852 aggr_type
, NULL
, offset
, &dummy
,
8853 gsi
, &ptr_incr
, false, bump
);
8856 else if (!costing_p
)
8858 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
8859 /* DR_CHAIN is then used as an input to
8860 vect_permute_store_chain(). */
8861 for (i
= 0; i
< group_size
; i
++)
8863 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
8864 dr_chain
[i
] = vec_oprnd
;
8867 vec_mask
= vec_masks
[j
];
8868 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
8874 n_adjacent_stores
+= vec_num
;
8878 /* Get an array into which we can store the individual vectors. */
8879 tree vec_array
= create_vector_array (vectype
, vec_num
);
8881 /* Invalidate the current contents of VEC_ARRAY. This should
8882 become an RTL clobber too, which prevents the vector registers
8883 from being upward-exposed. */
8884 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8886 /* Store the individual vectors into the array. */
8887 for (i
= 0; i
< vec_num
; i
++)
8889 vec_oprnd
= dr_chain
[i
];
8890 write_vector_array (vinfo
, stmt_info
, gsi
, vec_oprnd
, vec_array
,
8894 tree final_mask
= NULL
;
8895 tree final_len
= NULL
;
8898 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
8899 ncopies
, vectype
, j
);
8901 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
8904 if (lanes_ifn
== IFN_MASK_LEN_STORE_LANES
)
8907 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
8908 ncopies
, vectype
, j
, 1);
8910 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8912 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
8913 bias
= build_int_cst (intQI_type_node
, biasval
);
8916 mask_vectype
= truth_type_for (vectype
);
8917 final_mask
= build_minus_one_cst (mask_vectype
);
8922 if (final_len
&& final_mask
)
8925 MASK_LEN_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8926 LEN, BIAS, VEC_ARRAY). */
8927 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8928 tree alias_ptr
= build_int_cst (ref_type
, align
);
8929 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE_LANES
, 6,
8930 dataref_ptr
, alias_ptr
,
8931 final_mask
, final_len
, bias
,
8934 else if (final_mask
)
8937 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
8939 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
8940 tree alias_ptr
= build_int_cst (ref_type
, align
);
8941 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
8942 dataref_ptr
, alias_ptr
,
8943 final_mask
, vec_array
);
8948 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
8949 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8950 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
8951 gimple_call_set_lhs (call
, data_ref
);
8953 gimple_call_set_nothrow (call
, true);
8954 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
8957 /* Record that VEC_ARRAY is now dead. */
8958 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
8960 *vec_stmt
= new_stmt
;
8961 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8966 if (n_adjacent_stores
> 0)
8967 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
8968 alignment_support_scheme
, misalignment
,
8969 &inside_cost
, cost_vec
);
8970 if (dump_enabled_p ())
8971 dump_printf_loc (MSG_NOTE
, vect_location
,
8972 "vect_model_store_cost: inside_cost = %d, "
8973 "prologue_cost = %d .\n",
8974 inside_cost
, prologue_cost
);
8980 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8982 gcc_assert (!grouped_store
);
8983 auto_vec
<tree
> vec_offsets
;
8984 unsigned int inside_cost
= 0, prologue_cost
= 0;
8985 for (j
= 0; j
< ncopies
; j
++)
8990 if (costing_p
&& vls_type
== VLS_STORE_INVARIANT
)
8991 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
8992 stmt_info
, 0, vect_prologue
);
8993 else if (!costing_p
)
8995 /* Since the store is not grouped, DR_GROUP_SIZE is 1, and
8996 DR_CHAIN is of size 1. */
8997 gcc_assert (group_size
== 1);
8999 vect_get_slp_defs (op_node
, gvec_oprnds
[0]);
9001 vect_get_vec_defs_for_operand (vinfo
, first_stmt_info
,
9002 ncopies
, op
, gvec_oprnds
[0]);
9006 vect_get_slp_defs (mask_node
, &vec_masks
);
9008 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
9014 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9015 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
9017 &dataref_ptr
, &vec_offsets
);
9020 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
,
9021 aggr_type
, NULL
, offset
,
9022 &dummy
, gsi
, &ptr_incr
, false,
9026 else if (!costing_p
)
9028 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9029 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9030 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9031 gsi
, stmt_info
, bump
);
9035 for (i
= 0; i
< vec_num
; ++i
)
9039 vec_oprnd
= (*gvec_oprnds
[0])[vec_num
* j
+ i
];
9041 vec_mask
= vec_masks
[vec_num
* j
+ i
];
9042 /* We should have catched mismatched types earlier. */
9043 gcc_assert (useless_type_conversion_p (vectype
,
9044 TREE_TYPE (vec_oprnd
)));
9046 unsigned HOST_WIDE_INT align
;
9047 tree final_mask
= NULL_TREE
;
9048 tree final_len
= NULL_TREE
;
9049 tree bias
= NULL_TREE
;
9053 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
,
9054 loop_masks
, ncopies
,
9057 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
9058 final_mask
, vec_mask
, gsi
);
9061 if (gs_info
.ifn
!= IFN_LAST
)
9065 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9067 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9068 stmt_info
, 0, vect_body
);
9072 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9073 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
9074 tree scale
= size_int (gs_info
.scale
);
9076 if (gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
)
9079 final_len
= vect_get_loop_len (loop_vinfo
, gsi
,
9083 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9085 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9086 bias
= build_int_cst (intQI_type_node
, biasval
);
9089 mask_vectype
= truth_type_for (vectype
);
9090 final_mask
= build_minus_one_cst (mask_vectype
);
9095 if (final_len
&& final_mask
)
9096 call
= gimple_build_call_internal
9097 (IFN_MASK_LEN_SCATTER_STORE
, 7, dataref_ptr
,
9098 vec_offset
, scale
, vec_oprnd
, final_mask
,
9100 else if (final_mask
)
9101 call
= gimple_build_call_internal
9102 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
,
9103 vec_offset
, scale
, vec_oprnd
, final_mask
);
9105 call
= gimple_build_call_internal (IFN_SCATTER_STORE
, 4,
9106 dataref_ptr
, vec_offset
,
9108 gimple_call_set_nothrow (call
, true);
9109 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9112 else if (gs_info
.decl
)
9114 /* The builtin decls path for scatter is legacy, x86 only. */
9115 gcc_assert (nunits
.is_constant ()
9117 || SCALAR_INT_MODE_P
9118 (TYPE_MODE (TREE_TYPE (final_mask
)))));
9121 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9123 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9124 stmt_info
, 0, vect_body
);
9127 poly_uint64 offset_nunits
9128 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
9129 if (known_eq (nunits
, offset_nunits
))
9131 new_stmt
= vect_build_one_scatter_store_call
9132 (vinfo
, stmt_info
, gsi
, &gs_info
,
9133 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
9134 vec_oprnd
, final_mask
);
9135 vect_finish_stmt_generation (vinfo
, stmt_info
,
9138 else if (known_eq (nunits
, offset_nunits
* 2))
9140 /* We have a offset vector with half the number of
9141 lanes but the builtins will store full vectype
9142 data from the lower lanes. */
9143 new_stmt
= vect_build_one_scatter_store_call
9144 (vinfo
, stmt_info
, gsi
, &gs_info
,
9146 vec_offsets
[2 * vec_num
* j
+ 2 * i
],
9147 vec_oprnd
, final_mask
);
9148 vect_finish_stmt_generation (vinfo
, stmt_info
,
9150 int count
= nunits
.to_constant ();
9151 vec_perm_builder
sel (count
, count
, 1);
9152 sel
.quick_grow (count
);
9153 for (int i
= 0; i
< count
; ++i
)
9154 sel
[i
] = i
| (count
/ 2);
9155 vec_perm_indices
indices (sel
, 2, count
);
9157 = vect_gen_perm_mask_checked (vectype
, indices
);
9158 new_stmt
= gimple_build_assign (NULL_TREE
, VEC_PERM_EXPR
,
9159 vec_oprnd
, vec_oprnd
,
9161 vec_oprnd
= make_ssa_name (vectype
);
9162 gimple_set_lhs (new_stmt
, vec_oprnd
);
9163 vect_finish_stmt_generation (vinfo
, stmt_info
,
9167 new_stmt
= gimple_build_assign (NULL_TREE
,
9170 final_mask
= make_ssa_name
9171 (truth_type_for (gs_info
.offset_vectype
));
9172 gimple_set_lhs (new_stmt
, final_mask
);
9173 vect_finish_stmt_generation (vinfo
, stmt_info
,
9176 new_stmt
= vect_build_one_scatter_store_call
9177 (vinfo
, stmt_info
, gsi
, &gs_info
,
9179 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
9180 vec_oprnd
, final_mask
);
9181 vect_finish_stmt_generation (vinfo
, stmt_info
,
9184 else if (known_eq (nunits
* 2, offset_nunits
))
9186 /* We have a offset vector with double the number of
9187 lanes. Select the low/high part accordingly. */
9188 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
9189 if ((vec_num
* j
+ i
) & 1)
9191 int count
= offset_nunits
.to_constant ();
9192 vec_perm_builder
sel (count
, count
, 1);
9193 sel
.quick_grow (count
);
9194 for (int i
= 0; i
< count
; ++i
)
9195 sel
[i
] = i
| (count
/ 2);
9196 vec_perm_indices
indices (sel
, 2, count
);
9197 tree perm_mask
= vect_gen_perm_mask_checked
9198 (TREE_TYPE (vec_offset
), indices
);
9199 new_stmt
= gimple_build_assign (NULL_TREE
,
9204 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
9205 gimple_set_lhs (new_stmt
, vec_offset
);
9206 vect_finish_stmt_generation (vinfo
, stmt_info
,
9209 new_stmt
= vect_build_one_scatter_store_call
9210 (vinfo
, stmt_info
, gsi
, &gs_info
,
9211 dataref_ptr
, vec_offset
,
9212 vec_oprnd
, final_mask
);
9213 vect_finish_stmt_generation (vinfo
, stmt_info
,
9221 /* Emulated scatter. */
9222 gcc_assert (!final_mask
);
9225 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
9226 /* For emulated scatter N offset vector element extracts
9227 (we assume the scalar scaling and ptr + offset add is
9228 consumed by the load). */
9230 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9231 stmt_info
, 0, vect_body
);
9232 /* N scalar stores plus extracting the elements. */
9234 += record_stmt_cost (cost_vec
, cnunits
, vec_to_scalar
,
9235 stmt_info
, 0, vect_body
);
9237 += record_stmt_cost (cost_vec
, cnunits
, scalar_store
,
9238 stmt_info
, 0, vect_body
);
9242 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
9243 unsigned HOST_WIDE_INT const_offset_nunits
9244 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
).to_constant ();
9245 vec
<constructor_elt
, va_gc
> *ctor_elts
;
9246 vec_alloc (ctor_elts
, const_nunits
);
9247 gimple_seq stmts
= NULL
;
9248 tree elt_type
= TREE_TYPE (vectype
);
9249 unsigned HOST_WIDE_INT elt_size
9250 = tree_to_uhwi (TYPE_SIZE (elt_type
));
9251 /* We support offset vectors with more elements
9252 than the data vector for now. */
9253 unsigned HOST_WIDE_INT factor
9254 = const_offset_nunits
/ const_nunits
;
9255 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
9257 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
9258 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
9259 tree scale
= size_int (gs_info
.scale
);
9260 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
9261 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
9262 for (unsigned k
= 0; k
< const_nunits
; ++k
)
9264 /* Compute the offsetted pointer. */
9265 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
9266 bitsize_int (k
+ elt_offset
));
9268 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
9269 vec_offset
, TYPE_SIZE (idx_type
), boff
);
9270 idx
= gimple_convert (&stmts
, sizetype
, idx
);
9271 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
,
9274 = gimple_build (&stmts
, PLUS_EXPR
,
9275 TREE_TYPE (dataref_ptr
),
9277 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
9278 /* Extract the element to be stored. */
9280 = gimple_build (&stmts
, BIT_FIELD_REF
,
9281 TREE_TYPE (vectype
),
9282 vec_oprnd
, TYPE_SIZE (elt_type
),
9283 bitsize_int (k
* elt_size
));
9284 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
9287 = build2 (MEM_REF
, ltype
, ptr
,
9288 build_int_cst (ref_type
, 0));
9289 new_stmt
= gimple_build_assign (ref
, elt
);
9290 vect_finish_stmt_generation (vinfo
, stmt_info
,
9294 slp_node
->push_vec_def (new_stmt
);
9297 if (!slp
&& !costing_p
)
9298 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9301 if (!slp
&& !costing_p
)
9302 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9304 if (costing_p
&& dump_enabled_p ())
9305 dump_printf_loc (MSG_NOTE
, vect_location
,
9306 "vect_model_store_cost: inside_cost = %d, "
9307 "prologue_cost = %d .\n",
9308 inside_cost
, prologue_cost
);
9313 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
9314 || memory_access_type
== VMAT_CONTIGUOUS_DOWN
9315 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
9316 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
);
9318 unsigned inside_cost
= 0, prologue_cost
= 0;
9319 /* For costing some adjacent vector stores, we'd like to cost with
9320 the total number of them once instead of cost each one by one. */
9321 unsigned int n_adjacent_stores
= 0;
9322 auto_vec
<tree
> result_chain (group_size
);
9323 auto_vec
<tree
, 1> vec_oprnds
;
9324 for (j
= 0; j
< ncopies
; j
++)
9329 if (slp
&& !costing_p
)
9331 /* Get vectorized arguments for SLP_NODE. */
9332 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1, op
,
9333 &vec_oprnds
, mask
, &vec_masks
);
9334 vec_oprnd
= vec_oprnds
[0];
9336 vec_mask
= vec_masks
[0];
9340 /* For interleaved stores we collect vectorized defs for all the
9341 stores in the group in DR_CHAIN. DR_CHAIN is then used as an
9342 input to vect_permute_store_chain().
9344 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
9346 stmt_vec_info next_stmt_info
= first_stmt_info
;
9347 for (i
= 0; i
< group_size
; i
++)
9349 /* Since gaps are not supported for interleaved stores,
9350 DR_GROUP_SIZE is the exact number of stmts in the chain.
9351 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
9352 that there is no interleaving, DR_GROUP_SIZE is 1,
9353 and only one iteration of the loop will be executed. */
9354 op
= vect_get_store_rhs (next_stmt_info
);
9356 update_prologue_cost (&prologue_cost
, op
);
9359 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
9362 vec_oprnd
= (*gvec_oprnds
[i
])[0];
9363 dr_chain
.quick_push (vec_oprnd
);
9365 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9367 if (mask
&& !costing_p
)
9369 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
9372 vec_mask
= vec_masks
[0];
9376 /* We should have catched mismatched types earlier. */
9377 gcc_assert (costing_p
9378 || useless_type_conversion_p (vectype
,
9379 TREE_TYPE (vec_oprnd
)));
9380 bool simd_lane_access_p
9381 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
9383 && simd_lane_access_p
9385 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9386 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9387 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9388 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9389 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9390 get_alias_set (TREE_TYPE (ref_type
))))
9392 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9393 dataref_offset
= build_int_cst (ref_type
, 0);
9395 else if (!costing_p
)
9397 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9398 simd_lane_access_p
? loop
: NULL
,
9399 offset
, &dummy
, gsi
, &ptr_incr
,
9400 simd_lane_access_p
, bump
);
9402 else if (!costing_p
)
9404 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
9405 /* DR_CHAIN is then used as an input to vect_permute_store_chain().
9406 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN is
9408 for (i
= 0; i
< group_size
; i
++)
9410 vec_oprnd
= (*gvec_oprnds
[i
])[j
];
9411 dr_chain
[i
] = vec_oprnd
;
9414 vec_mask
= vec_masks
[j
];
9416 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
9418 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9426 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
9429 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
9430 int nstmts
= ceil_log2 (group_size
) * group_size
;
9431 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
9432 stmt_info
, 0, vect_body
);
9433 if (dump_enabled_p ())
9434 dump_printf_loc (MSG_NOTE
, vect_location
,
9435 "vect_model_store_cost: "
9436 "strided group_size = %d .\n",
9440 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
9441 gsi
, &result_chain
);
9444 stmt_vec_info next_stmt_info
= first_stmt_info
;
9445 for (i
= 0; i
< vec_num
; i
++)
9450 vec_oprnd
= vec_oprnds
[i
];
9451 else if (grouped_store
)
9452 /* For grouped stores vectorized defs are interleaved in
9453 vect_permute_store_chain(). */
9454 vec_oprnd
= result_chain
[i
];
9457 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9460 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_perm
,
9461 stmt_info
, 0, vect_body
);
9464 tree perm_mask
= perm_mask_for_reverse (vectype
);
9465 tree perm_dest
= vect_create_destination_var (
9466 vect_get_store_rhs (stmt_info
), vectype
);
9467 tree new_temp
= make_ssa_name (perm_dest
);
9469 /* Generate the permute statement. */
9471 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
9472 vec_oprnd
, perm_mask
);
9473 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
,
9476 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9477 vec_oprnd
= new_temp
;
9483 n_adjacent_stores
++;
9487 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9488 if (!next_stmt_info
)
9495 tree final_mask
= NULL_TREE
;
9496 tree final_len
= NULL_TREE
;
9497 tree bias
= NULL_TREE
;
9499 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
9500 vec_num
* ncopies
, vectype
,
9502 if (slp
&& vec_mask
)
9503 vec_mask
= vec_masks
[i
];
9505 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
9509 /* Bump the vector pointer. */
9510 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9514 unsigned HOST_WIDE_INT align
;
9515 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9516 if (alignment_support_scheme
== dr_aligned
)
9518 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
9520 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
9524 misalign
= misalignment
;
9525 if (dataref_offset
== NULL_TREE
9526 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9527 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
9529 align
= least_bit_hwi (misalign
| align
);
9531 /* Compute IFN when LOOP_LENS or final_mask valid. */
9532 machine_mode vmode
= TYPE_MODE (vectype
);
9533 machine_mode new_vmode
= vmode
;
9534 internal_fn partial_ifn
= IFN_LAST
;
9537 opt_machine_mode new_ovmode
9538 = get_len_load_store_mode (vmode
, false, &partial_ifn
);
9539 new_vmode
= new_ovmode
.require ();
9541 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
9542 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
9543 vec_num
* ncopies
, vectype
,
9544 vec_num
* j
+ i
, factor
);
9546 else if (final_mask
)
9548 if (!can_vec_mask_load_store_p (
9549 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), false,
9554 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9558 /* Pass VF value to 'len' argument of
9559 MASK_LEN_STORE if LOOP_LENS is invalid. */
9560 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9564 /* Pass all ones value to 'mask' argument of
9565 MASK_LEN_STORE if final_mask is invalid. */
9566 mask_vectype
= truth_type_for (vectype
);
9567 final_mask
= build_minus_one_cst (mask_vectype
);
9573 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
9575 bias
= build_int_cst (intQI_type_node
, biasval
);
9578 /* Arguments are ready. Create the new vector stmt. */
9582 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9583 /* Need conversion if it's wrapped with VnQI. */
9584 if (vmode
!= new_vmode
)
9587 = build_vector_type_for_mode (unsigned_intQI_type_node
,
9589 tree var
= vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
9590 vec_oprnd
= build1 (VIEW_CONVERT_EXPR
, new_vtype
, vec_oprnd
);
9592 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, vec_oprnd
);
9593 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9597 if (partial_ifn
== IFN_MASK_LEN_STORE
)
9598 call
= gimple_build_call_internal (IFN_MASK_LEN_STORE
, 6,
9599 dataref_ptr
, ptr
, final_mask
,
9600 final_len
, bias
, vec_oprnd
);
9602 call
= gimple_build_call_internal (IFN_LEN_STORE
, 5,
9603 dataref_ptr
, ptr
, final_len
,
9605 gimple_call_set_nothrow (call
, true);
9606 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9609 else if (final_mask
)
9611 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
9613 = gimple_build_call_internal (IFN_MASK_STORE
, 4, dataref_ptr
,
9614 ptr
, final_mask
, vec_oprnd
);
9615 gimple_call_set_nothrow (call
, true);
9616 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9622 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
9623 dataref_offset
? dataref_offset
9624 : build_int_cst (ref_type
, 0));
9625 if (alignment_support_scheme
== dr_aligned
)
9628 TREE_TYPE (data_ref
)
9629 = build_aligned_type (TREE_TYPE (data_ref
),
9630 align
* BITS_PER_UNIT
);
9631 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9632 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
9633 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9639 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9640 if (!next_stmt_info
)
9643 if (!slp
&& !costing_p
)
9646 *vec_stmt
= new_stmt
;
9647 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9653 if (n_adjacent_stores
> 0)
9654 vect_get_store_cost (vinfo
, stmt_info
, n_adjacent_stores
,
9655 alignment_support_scheme
, misalignment
,
9656 &inside_cost
, cost_vec
);
9658 /* When vectorizing a store into the function result assign
9659 a penalty if the function returns in a multi-register location.
9660 In this case we assume we'll end up with having to spill the
9661 vector result and do piecewise loads as a conservative estimate. */
9662 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
9664 && (TREE_CODE (base
) == RESULT_DECL
9665 || (DECL_P (base
) && cfun_returns (base
)))
9666 && !aggregate_value_p (base
, cfun
->decl
))
9668 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
9669 /* ??? Handle PARALLEL in some way. */
9672 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
9673 /* Assume that a single reg-reg move is possible and cheap,
9674 do not account for vector to gp register move cost. */
9679 += record_stmt_cost (cost_vec
, ncopies
, vector_store
,
9680 stmt_info
, 0, vect_epilogue
);
9683 += record_stmt_cost (cost_vec
, ncopies
* nregs
, scalar_load
,
9684 stmt_info
, 0, vect_epilogue
);
9688 if (dump_enabled_p ())
9689 dump_printf_loc (MSG_NOTE
, vect_location
,
9690 "vect_model_store_cost: inside_cost = %d, "
9691 "prologue_cost = %d .\n",
9692 inside_cost
, prologue_cost
);
9698 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
9699 VECTOR_CST mask. No checks are made that the target platform supports the
9700 mask, so callers may wish to test can_vec_perm_const_p separately, or use
9701 vect_gen_perm_mask_checked. */
9704 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
9708 poly_uint64 nunits
= sel
.length ();
9709 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
9711 mask_type
= build_vector_type (ssizetype
, nunits
);
9712 return vec_perm_indices_to_tree (mask_type
, sel
);
9715 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
9716 i.e. that the target supports the pattern _for arbitrary input vectors_. */
9719 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
9721 machine_mode vmode
= TYPE_MODE (vectype
);
9722 gcc_assert (can_vec_perm_const_p (vmode
, vmode
, sel
));
9723 return vect_gen_perm_mask_any (vectype
, sel
);
9726 /* Given a vector variable X and Y, that was generated for the scalar
9727 STMT_INFO, generate instructions to permute the vector elements of X and Y
9728 using permutation mask MASK_VEC, insert them at *GSI and return the
9729 permuted vector variable. */
9732 permute_vec_elements (vec_info
*vinfo
,
9733 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
9734 gimple_stmt_iterator
*gsi
)
9736 tree vectype
= TREE_TYPE (x
);
9737 tree perm_dest
, data_ref
;
9740 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
9741 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
9742 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9744 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
9745 data_ref
= make_ssa_name (perm_dest
);
9747 /* Generate the permute statement. */
9748 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
9749 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
9754 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
9755 inserting them on the loops preheader edge. Returns true if we
9756 were successful in doing so (and thus STMT_INFO can be moved then),
9757 otherwise returns false. HOIST_P indicates if we want to hoist the
9758 definitions of all SSA uses, it would be false when we are costing. */
9761 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
, bool hoist_p
)
9767 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9769 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9770 if (!gimple_nop_p (def_stmt
)
9771 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9773 /* Make sure we don't need to recurse. While we could do
9774 so in simple cases when there are more complex use webs
9775 we don't have an easy way to preserve stmt order to fulfil
9776 dependencies within them. */
9779 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
9781 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
9783 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
9784 if (!gimple_nop_p (def_stmt2
)
9785 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
9798 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
9800 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
9801 if (!gimple_nop_p (def_stmt
)
9802 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
9804 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
9805 gsi_remove (&gsi
, false);
9806 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
9813 /* vectorizable_load.
9815 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
9816 that can be vectorized.
9817 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9818 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
9819 Return true if STMT_INFO is vectorizable in this way. */
9822 vectorizable_load (vec_info
*vinfo
,
9823 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9824 gimple
**vec_stmt
, slp_tree slp_node
,
9825 stmt_vector_for_cost
*cost_vec
)
9828 tree vec_dest
= NULL
;
9829 tree data_ref
= NULL
;
9830 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9831 class loop
*loop
= NULL
;
9832 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
9833 bool nested_in_vect_loop
= false;
9835 /* Avoid false positive uninitialized warning, see PR110652. */
9836 tree new_temp
= NULL_TREE
;
9839 tree dataref_ptr
= NULL_TREE
;
9840 tree dataref_offset
= NULL_TREE
;
9841 gimple
*ptr_incr
= NULL
;
9844 unsigned int group_size
;
9845 poly_uint64 group_gap_adj
;
9846 tree msq
= NULL_TREE
, lsq
;
9847 tree realignment_token
= NULL_TREE
;
9849 vec
<tree
> dr_chain
= vNULL
;
9850 bool grouped_load
= false;
9851 stmt_vec_info first_stmt_info
;
9852 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
9853 bool compute_in_loop
= false;
9854 class loop
*at_loop
;
9856 bool slp
= (slp_node
!= NULL
);
9857 bool slp_perm
= false;
9858 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9861 gather_scatter_info gs_info
;
9863 enum vect_def_type mask_dt
= vect_unknown_def_type
;
9865 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9868 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9872 if (!STMT_VINFO_DATA_REF (stmt_info
))
9875 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
9876 int mask_index
= -1;
9877 slp_tree slp_op
= NULL
;
9878 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
9880 scalar_dest
= gimple_assign_lhs (assign
);
9881 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
9884 tree_code code
= gimple_assign_rhs_code (assign
);
9885 if (code
!= ARRAY_REF
9886 && code
!= BIT_FIELD_REF
9887 && code
!= INDIRECT_REF
9888 && code
!= COMPONENT_REF
9889 && code
!= IMAGPART_EXPR
9890 && code
!= REALPART_EXPR
9892 && TREE_CODE_CLASS (code
) != tcc_declaration
)
9897 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9898 if (!call
|| !gimple_call_internal_p (call
))
9901 internal_fn ifn
= gimple_call_internal_fn (call
);
9902 if (!internal_load_fn_p (ifn
))
9905 scalar_dest
= gimple_call_lhs (call
);
9909 mask_index
= internal_fn_mask_index (ifn
);
9910 if (mask_index
>= 0 && slp_node
)
9911 mask_index
= vect_slp_child_index_for_operand
9912 (call
, mask_index
, STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9914 && !vect_check_scalar_mask (vinfo
, stmt_info
, slp_node
, mask_index
,
9915 &mask
, &slp_op
, &mask_dt
, &mask_vectype
))
9919 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9920 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9924 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
9925 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
9926 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
9931 /* Multiple types in SLP are handled by creating the appropriate number of
9932 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
9937 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9939 gcc_assert (ncopies
>= 1);
9941 /* FORNOW. This restriction should be relaxed. */
9942 if (nested_in_vect_loop
&& ncopies
> 1)
9944 if (dump_enabled_p ())
9945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9946 "multiple types in nested loop.\n");
9950 /* Invalidate assumptions made by dependence analysis when vectorization
9951 on the unrolled body effectively re-orders stmts. */
9953 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
9954 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
9955 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
9957 if (dump_enabled_p ())
9958 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9959 "cannot perform implicit CSE when unrolling "
9960 "with negative dependence distance\n");
9964 elem_type
= TREE_TYPE (vectype
);
9965 mode
= TYPE_MODE (vectype
);
9967 /* FORNOW. In some cases can vectorize even if data-type not supported
9968 (e.g. - data copies). */
9969 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
9971 if (dump_enabled_p ())
9972 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9973 "Aligned load, but unsupported type.\n");
9977 /* Check if the load is a part of an interleaving chain. */
9978 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
9980 grouped_load
= true;
9982 gcc_assert (!nested_in_vect_loop
);
9983 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
9985 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9986 group_size
= DR_GROUP_SIZE (first_stmt_info
);
9988 /* Refuse non-SLP vectorization of SLP-only groups. */
9989 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
9991 if (dump_enabled_p ())
9992 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9993 "cannot vectorize load in non-SLP mode.\n");
9997 /* Invalidate assumptions made by dependence analysis when vectorization
9998 on the unrolled body effectively re-orders stmts. */
9999 if (STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
10000 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
10001 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
10003 if (dump_enabled_p ())
10004 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10005 "cannot perform implicit CSE when performing "
10006 "group loads with negative dependence distance\n");
10013 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10019 /* In BB vectorization we may not actually use a loaded vector
10020 accessing elements in excess of DR_GROUP_SIZE. */
10021 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10022 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
10023 unsigned HOST_WIDE_INT nunits
;
10024 unsigned j
, k
, maxk
= 0;
10025 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
10028 tree vectype
= SLP_TREE_VECTYPE (slp_node
);
10029 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
10030 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
10032 if (dump_enabled_p ())
10033 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10034 "BB vectorization with gaps at the end of "
10035 "a load is not supported\n");
10040 auto_vec
<tree
> tem
;
10042 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
10045 if (dump_enabled_p ())
10046 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
10048 "unsupported load permutation\n");
10053 vect_memory_access_type memory_access_type
;
10054 enum dr_alignment_support alignment_support_scheme
;
10056 poly_int64 poffset
;
10057 internal_fn lanes_ifn
;
10058 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp_node
, mask
, VLS_LOAD
,
10059 ncopies
, &memory_access_type
, &poffset
,
10060 &alignment_support_scheme
, &misalignment
, &gs_info
,
10066 if (memory_access_type
== VMAT_CONTIGUOUS
)
10068 machine_mode vec_mode
= TYPE_MODE (vectype
);
10069 if (!VECTOR_MODE_P (vec_mode
)
10070 || !can_vec_mask_load_store_p (vec_mode
,
10071 TYPE_MODE (mask_vectype
), true))
10074 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
10075 && memory_access_type
!= VMAT_GATHER_SCATTER
)
10077 if (dump_enabled_p ())
10078 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10079 "unsupported access type for masked load.\n");
10082 else if (memory_access_type
== VMAT_GATHER_SCATTER
10083 && gs_info
.ifn
== IFN_LAST
10086 if (dump_enabled_p ())
10087 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10088 "unsupported masked emulated gather.\n");
10091 else if (memory_access_type
== VMAT_ELEMENTWISE
10092 || memory_access_type
== VMAT_STRIDED_SLP
)
10094 if (dump_enabled_p ())
10095 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10096 "unsupported masked strided access.\n");
10101 bool costing_p
= !vec_stmt
;
10103 if (costing_p
) /* transformation not required. */
10107 && !vect_maybe_update_slp_op_vectype (slp_op
,
10110 if (dump_enabled_p ())
10111 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10112 "incompatible vector types for invariants\n");
10117 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
10120 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
10121 check_load_store_for_partial_vectors (loop_vinfo
, vectype
, slp_node
,
10122 VLS_LOAD
, group_size
,
10123 memory_access_type
, &gs_info
,
10126 if (dump_enabled_p ()
10127 && memory_access_type
!= VMAT_ELEMENTWISE
10128 && memory_access_type
!= VMAT_GATHER_SCATTER
10129 && alignment_support_scheme
!= dr_aligned
)
10130 dump_printf_loc (MSG_NOTE
, vect_location
,
10131 "Vectorizing an unaligned access.\n");
10133 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10134 vinfo
->any_known_not_updated_vssa
= true;
10136 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
10140 gcc_assert (memory_access_type
10141 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
10143 if (dump_enabled_p () && !costing_p
)
10144 dump_printf_loc (MSG_NOTE
, vect_location
,
10145 "transform load. ncopies = %d\n", ncopies
);
10149 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
10150 ensure_base_align (dr_info
);
10152 if (memory_access_type
== VMAT_INVARIANT
)
10154 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
10155 /* If we have versioned for aliasing or the loop doesn't
10156 have any data dependencies that would preclude this,
10157 then we are sure this is a loop invariant load and
10158 thus we can insert it on the preheader edge.
10159 TODO: hoist_defs_of_uses should ideally be computed
10160 once at analysis time, remembered and used in the
10162 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
10163 && !nested_in_vect_loop
10164 && hoist_defs_of_uses (stmt_info
, loop
, !costing_p
));
10167 enum vect_cost_model_location cost_loc
10168 = hoist_p
? vect_prologue
: vect_body
;
10169 unsigned int cost
= record_stmt_cost (cost_vec
, 1, scalar_load
,
10170 stmt_info
, 0, cost_loc
);
10171 cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
, stmt_info
, 0,
10173 unsigned int prologue_cost
= hoist_p
? cost
: 0;
10174 unsigned int inside_cost
= hoist_p
? 0 : cost
;
10175 if (dump_enabled_p ())
10176 dump_printf_loc (MSG_NOTE
, vect_location
,
10177 "vect_model_load_cost: inside_cost = %d, "
10178 "prologue_cost = %d .\n",
10179 inside_cost
, prologue_cost
);
10184 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
10185 if (dump_enabled_p ())
10186 dump_printf_loc (MSG_NOTE
, vect_location
,
10187 "hoisting out of the vectorized loop: %G",
10189 scalar_dest
= copy_ssa_name (scalar_dest
);
10190 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
10191 edge pe
= loop_preheader_edge (loop
);
10192 gphi
*vphi
= get_virtual_phi (loop
->header
);
10195 vuse
= PHI_ARG_DEF_FROM_EDGE (vphi
, pe
);
10197 vuse
= gimple_vuse (gsi_stmt (*gsi
));
10198 gimple
*new_stmt
= gimple_build_assign (scalar_dest
, rhs
);
10199 gimple_set_vuse (new_stmt
, vuse
);
10200 gsi_insert_on_edge_immediate (pe
, new_stmt
);
10202 /* These copies are all equivalent. */
10204 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10208 gimple_stmt_iterator gsi2
= *gsi
;
10210 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
10213 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10215 for (j
= 0; j
< (int) SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
); ++j
)
10216 slp_node
->push_vec_def (new_stmt
);
10219 for (j
= 0; j
< ncopies
; ++j
)
10220 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10221 *vec_stmt
= new_stmt
;
10226 if (memory_access_type
== VMAT_ELEMENTWISE
10227 || memory_access_type
== VMAT_STRIDED_SLP
)
10229 gimple_stmt_iterator incr_gsi
;
10234 vec
<constructor_elt
, va_gc
> *v
= NULL
;
10235 tree stride_base
, stride_step
, alias_off
;
10236 /* Checked by get_load_store_type. */
10237 unsigned int const_nunits
= nunits
.to_constant ();
10238 unsigned HOST_WIDE_INT cst_offset
= 0;
10240 unsigned int inside_cost
= 0;
10242 gcc_assert (!LOOP_VINFO_USING_PARTIAL_VECTORS_P (loop_vinfo
));
10243 gcc_assert (!nested_in_vect_loop
);
10247 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10248 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10252 first_stmt_info
= stmt_info
;
10253 first_dr_info
= dr_info
;
10256 if (slp
&& grouped_load
)
10258 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10259 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10265 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
10266 * vect_get_place_in_interleaving_chain (stmt_info
,
10269 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
10274 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
10275 stride_base
= fold_build_pointer_plus (
10276 DR_BASE_ADDRESS (first_dr_info
->dr
),
10277 size_binop (PLUS_EXPR
, convert_to_ptrofftype (dr_offset
),
10278 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
10279 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
10281 /* For a load with loop-invariant (but other than power-of-2)
10282 stride (i.e. not a grouped access) like so:
10284 for (i = 0; i < n; i += stride)
10287 we generate a new induction variable and new accesses to
10288 form a new vector (or vectors, depending on ncopies):
10290 for (j = 0; ; j += VF*stride)
10292 tmp2 = array[j + stride];
10294 vectemp = {tmp1, tmp2, ...}
10297 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
10298 build_int_cst (TREE_TYPE (stride_step
), vf
));
10300 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
10302 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
10303 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
10304 create_iv (stride_base
, PLUS_EXPR
, ivstep
, NULL
,
10305 loop
, &incr_gsi
, insert_after
,
10308 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
10311 running_off
= offvar
;
10312 alias_off
= build_int_cst (ref_type
, 0);
10313 int nloads
= const_nunits
;
10315 tree ltype
= TREE_TYPE (vectype
);
10316 tree lvectype
= vectype
;
10317 auto_vec
<tree
> dr_chain
;
10318 if (memory_access_type
== VMAT_STRIDED_SLP
)
10320 if (group_size
< const_nunits
)
10322 /* First check if vec_init optab supports construction from vector
10323 elts directly. Otherwise avoid emitting a constructor of
10324 vector elements by performing the loads using an integer type
10325 of the same size, constructing a vector of those and then
10326 re-interpreting it as the original vector type. This avoids a
10327 huge runtime penalty due to the general inability to perform
10328 store forwarding from smaller stores to a larger load. */
10331 = vector_vector_composition_type (vectype
,
10332 const_nunits
/ group_size
,
10334 if (vtype
!= NULL_TREE
)
10336 nloads
= const_nunits
/ group_size
;
10345 lnel
= const_nunits
;
10348 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
10350 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
10351 else if (nloads
== 1)
10356 /* For SLP permutation support we need to load the whole group,
10357 not only the number of vector stmts the permutation result
10361 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
10363 unsigned int const_vf
= vf
.to_constant ();
10364 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
10365 dr_chain
.create (ncopies
);
10368 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10370 unsigned int group_el
= 0;
10371 unsigned HOST_WIDE_INT
10372 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
10373 unsigned int n_groups
= 0;
10374 /* For costing some adjacent vector loads, we'd like to cost with
10375 the total number of them once instead of cost each one by one. */
10376 unsigned int n_adjacent_loads
= 0;
10377 for (j
= 0; j
< ncopies
; j
++)
10379 if (nloads
> 1 && !costing_p
)
10380 vec_alloc (v
, nloads
);
10381 gimple
*new_stmt
= NULL
;
10382 for (i
= 0; i
< nloads
; i
++)
10386 /* For VMAT_ELEMENTWISE, just cost it as scalar_load to
10387 avoid ICE, see PR110776. */
10388 if (VECTOR_TYPE_P (ltype
)
10389 && memory_access_type
!= VMAT_ELEMENTWISE
)
10390 n_adjacent_loads
++;
10392 inside_cost
+= record_stmt_cost (cost_vec
, 1, scalar_load
,
10393 stmt_info
, 0, vect_body
);
10396 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
10397 group_el
* elsz
+ cst_offset
);
10398 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
10399 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
10400 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
10401 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10403 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
10404 gimple_assign_lhs (new_stmt
));
10408 || group_el
== group_size
)
10411 /* When doing SLP make sure to not load elements from
10412 the next vector iteration, those will not be accessed
10413 so just use the last element again. See PR107451. */
10414 if (!slp
|| known_lt (n_groups
, vf
))
10416 tree newoff
= copy_ssa_name (running_off
);
10418 = gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
10419 running_off
, stride_step
);
10420 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
10421 running_off
= newoff
;
10430 inside_cost
+= record_stmt_cost (cost_vec
, 1, vec_construct
,
10431 stmt_info
, 0, vect_body
);
10434 tree vec_inv
= build_constructor (lvectype
, v
);
10435 new_temp
= vect_init_vector (vinfo
, stmt_info
, vec_inv
,
10437 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
10438 if (lvectype
!= vectype
)
10441 = gimple_build_assign (make_ssa_name (vectype
),
10443 build1 (VIEW_CONVERT_EXPR
,
10444 vectype
, new_temp
));
10445 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
10456 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
10458 slp_node
->push_vec_def (new_stmt
);
10463 *vec_stmt
= new_stmt
;
10464 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10474 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
, vf
,
10475 true, &n_perms
, &n_loads
);
10476 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
10477 first_stmt_info
, 0, vect_body
);
10480 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
10486 if (n_adjacent_loads
> 0)
10487 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10488 alignment_support_scheme
, misalignment
, false,
10489 &inside_cost
, nullptr, cost_vec
, cost_vec
,
10491 if (dump_enabled_p ())
10492 dump_printf_loc (MSG_NOTE
, vect_location
,
10493 "vect_model_load_cost: inside_cost = %u, "
10494 "prologue_cost = 0 .\n",
10501 if (memory_access_type
== VMAT_GATHER_SCATTER
10502 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
10503 grouped_load
= false;
10506 || (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()))
10510 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10511 group_size
= DR_GROUP_SIZE (first_stmt_info
);
10515 first_stmt_info
= stmt_info
;
10518 /* For SLP vectorization we directly vectorize a subchain
10519 without permutation. */
10520 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
10521 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
10522 /* For BB vectorization always use the first stmt to base
10523 the data ref pointer on. */
10525 first_stmt_info_for_drptr
10526 = vect_find_first_scalar_stmt_in_slp (slp_node
);
10528 /* Check if the chain of loads is already vectorized. */
10529 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
10530 /* For SLP we would need to copy over SLP_TREE_VEC_DEFS.
10531 ??? But we can only do so if there is exactly one
10532 as we have no way to get at the rest. Leave the CSE
10534 ??? With the group load eventually participating
10535 in multiple different permutations (having multiple
10536 slp nodes which refer to the same group) the CSE
10537 is even wrong code. See PR56270. */
10540 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10543 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
10546 /* VEC_NUM is the number of vect stmts to be created for this group. */
10549 grouped_load
= false;
10550 /* If an SLP permutation is from N elements to N elements,
10551 and if one vector holds a whole number of N, we can load
10552 the inputs to the permutation in the same way as an
10553 unpermuted sequence. In other cases we need to load the
10554 whole group, not only the number of vector stmts the
10555 permutation result fits in. */
10556 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
10558 && (group_size
!= scalar_lanes
10559 || !multiple_p (nunits
, group_size
)))
10561 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
10562 variable VF; see vect_transform_slp_perm_load. */
10563 unsigned int const_vf
= vf
.to_constant ();
10564 unsigned int const_nunits
= nunits
.to_constant ();
10565 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
10566 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
10570 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10572 = group_size
- scalar_lanes
;
10576 vec_num
= group_size
;
10578 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
10582 first_stmt_info
= stmt_info
;
10583 first_dr_info
= dr_info
;
10584 group_size
= vec_num
= 1;
10586 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
10588 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
10591 gcc_assert (alignment_support_scheme
);
10592 vec_loop_masks
*loop_masks
10593 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
10594 ? &LOOP_VINFO_MASKS (loop_vinfo
)
10596 vec_loop_lens
*loop_lens
10597 = (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
)
10598 ? &LOOP_VINFO_LENS (loop_vinfo
)
10601 /* The vect_transform_stmt and vect_analyze_stmt will go here but there
10602 are some difference here. We cannot enable both the lens and masks
10603 during transform but it is allowed during analysis.
10604 Shouldn't go with length-based approach if fully masked. */
10605 if (cost_vec
== NULL
)
10606 /* The cost_vec is NULL during transfrom. */
10607 gcc_assert ((!loop_lens
|| !loop_masks
));
10609 /* Targets with store-lane instructions must not require explicit
10610 realignment. vect_supportable_dr_alignment always returns either
10611 dr_aligned or dr_unaligned_supported for masked operations. */
10612 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
10615 || alignment_support_scheme
== dr_aligned
10616 || alignment_support_scheme
== dr_unaligned_supported
);
10618 /* In case the vectorization factor (VF) is bigger than the number
10619 of elements that we can fit in a vectype (nunits), we have to generate
10620 more than one vector stmt - i.e - we need to "unroll" the
10621 vector stmt by a factor VF/nunits. In doing so, we record a pointer
10622 from one copy of the vector stmt to the next, in the field
10623 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
10624 stages to find the correct vector defs to be used when vectorizing
10625 stmts that use the defs of the current stmt. The example below
10626 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
10627 need to create 4 vectorized stmts):
10629 before vectorization:
10630 RELATED_STMT VEC_STMT
10634 step 1: vectorize stmt S1:
10635 We first create the vector stmt VS1_0, and, as usual, record a
10636 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
10637 Next, we create the vector stmt VS1_1, and record a pointer to
10638 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
10639 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
10640 stmts and pointers:
10641 RELATED_STMT VEC_STMT
10642 VS1_0: vx0 = memref0 VS1_1 -
10643 VS1_1: vx1 = memref1 VS1_2 -
10644 VS1_2: vx2 = memref2 VS1_3 -
10645 VS1_3: vx3 = memref3 - -
10646 S1: x = load - VS1_0
10650 /* In case of interleaving (non-unit grouped access):
10657 Vectorized loads are created in the order of memory accesses
10658 starting from the access of the first stmt of the chain:
10661 VS2: vx1 = &base + vec_size*1
10662 VS3: vx3 = &base + vec_size*2
10663 VS4: vx4 = &base + vec_size*3
10665 Then permutation statements are generated:
10667 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
10668 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
10671 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
10672 (the order of the data-refs in the output of vect_permute_load_chain
10673 corresponds to the order of scalar stmts in the interleaving chain - see
10674 the documentation of vect_permute_load_chain()).
10675 The generation of permutation stmts and recording them in
10676 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
10678 In case of both multiple types and interleaving, the vector loads and
10679 permutation stmts above are created for every copy. The result vector
10680 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
10681 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
10683 /* If the data reference is aligned (dr_aligned) or potentially unaligned
10684 on a target that supports unaligned accesses (dr_unaligned_supported)
10685 we generate the following code:
10689 p = p + indx * vectype_size;
10694 Otherwise, the data reference is potentially unaligned on a target that
10695 does not support unaligned accesses (dr_explicit_realign_optimized) -
10696 then generate the following code, in which the data in each iteration is
10697 obtained by two vector loads, one from the previous iteration, and one
10698 from the current iteration:
10700 msq_init = *(floor(p1))
10701 p2 = initial_addr + VS - 1;
10702 realignment_token = call target_builtin;
10705 p2 = p2 + indx * vectype_size
10707 vec_dest = realign_load (msq, lsq, realignment_token)
10712 /* If the misalignment remains the same throughout the execution of the
10713 loop, we can create the init_addr and permutation mask at the loop
10714 preheader. Otherwise, it needs to be created inside the loop.
10715 This can only occur when vectorizing memory accesses in the inner-loop
10716 nested within an outer-loop that is being vectorized. */
10718 if (nested_in_vect_loop
10719 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
10720 GET_MODE_SIZE (TYPE_MODE (vectype
))))
10722 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
10723 compute_in_loop
= true;
10726 bool diff_first_stmt_info
10727 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
10729 tree offset
= NULL_TREE
;
10730 if ((alignment_support_scheme
== dr_explicit_realign_optimized
10731 || alignment_support_scheme
== dr_explicit_realign
)
10732 && !compute_in_loop
)
10734 /* If we have different first_stmt_info, we can't set up realignment
10735 here, since we can't guarantee first_stmt_info DR has been
10736 initialized yet, use first_stmt_info_for_drptr DR by bumping the
10737 distance from first_stmt_info DR instead as below. */
10740 if (!diff_first_stmt_info
)
10741 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
10742 &realignment_token
,
10743 alignment_support_scheme
, NULL_TREE
,
10745 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
10747 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
10748 offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
10750 gcc_assert (!first_stmt_info_for_drptr
);
10757 if (!known_eq (poffset
, 0))
10759 ? size_binop (PLUS_EXPR
, offset
, size_int (poffset
))
10760 : size_int (poffset
));
10763 tree vec_offset
= NULL_TREE
;
10764 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10766 aggr_type
= NULL_TREE
;
10769 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
10771 aggr_type
= elem_type
;
10773 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, gsi
, &gs_info
,
10774 &bump
, &vec_offset
, loop_lens
);
10778 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10779 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
10781 aggr_type
= vectype
;
10782 bump
= vect_get_data_ptr_increment (vinfo
, gsi
, dr_info
, aggr_type
,
10783 memory_access_type
, loop_lens
);
10786 auto_vec
<tree
> vec_offsets
;
10787 auto_vec
<tree
> vec_masks
;
10788 if (mask
&& !costing_p
)
10791 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[mask_index
],
10794 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, mask
,
10795 &vec_masks
, mask_vectype
);
10798 tree vec_mask
= NULL_TREE
;
10799 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
10801 gcc_assert (alignment_support_scheme
== dr_aligned
10802 || alignment_support_scheme
== dr_unaligned_supported
);
10803 gcc_assert (grouped_load
&& !slp
);
10805 unsigned int inside_cost
= 0, prologue_cost
= 0;
10806 /* For costing some adjacent vector loads, we'd like to cost with
10807 the total number of them once instead of cost each one by one. */
10808 unsigned int n_adjacent_loads
= 0;
10809 for (j
= 0; j
< ncopies
; j
++)
10813 /* An IFN_LOAD_LANES will load all its vector results,
10814 regardless of which ones we actually need. Account
10815 for the cost of unused results. */
10816 if (first_stmt_info
== stmt_info
)
10818 unsigned int gaps
= DR_GROUP_SIZE (first_stmt_info
);
10819 stmt_vec_info next_stmt_info
= first_stmt_info
;
10823 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10825 while (next_stmt_info
);
10828 if (dump_enabled_p ())
10829 dump_printf_loc (MSG_NOTE
, vect_location
,
10830 "vect_model_load_cost: %d "
10831 "unused vectors.\n",
10833 vect_get_load_cost (vinfo
, stmt_info
, gaps
,
10834 alignment_support_scheme
,
10835 misalignment
, false, &inside_cost
,
10836 &prologue_cost
, cost_vec
, cost_vec
,
10840 n_adjacent_loads
++;
10844 /* 1. Create the vector or array pointer update chain. */
10847 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10848 at_loop
, offset
, &dummy
, gsi
,
10849 &ptr_incr
, false, bump
);
10852 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10853 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
10857 vec_mask
= vec_masks
[j
];
10859 tree vec_array
= create_vector_array (vectype
, vec_num
);
10861 tree final_mask
= NULL_TREE
;
10862 tree final_len
= NULL_TREE
;
10863 tree bias
= NULL_TREE
;
10865 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
10866 ncopies
, vectype
, j
);
10868 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
, final_mask
,
10871 if (lanes_ifn
== IFN_MASK_LEN_LOAD_LANES
)
10874 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
10875 ncopies
, vectype
, j
, 1);
10877 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
10878 signed char biasval
10879 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
10880 bias
= build_int_cst (intQI_type_node
, biasval
);
10883 mask_vectype
= truth_type_for (vectype
);
10884 final_mask
= build_minus_one_cst (mask_vectype
);
10889 if (final_len
&& final_mask
)
10892 VEC_ARRAY = MASK_LEN_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10893 VEC_MASK, LEN, BIAS). */
10894 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10895 tree alias_ptr
= build_int_cst (ref_type
, align
);
10896 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD_LANES
, 5,
10897 dataref_ptr
, alias_ptr
,
10898 final_mask
, final_len
, bias
);
10900 else if (final_mask
)
10903 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
10905 unsigned int align
= TYPE_ALIGN (TREE_TYPE (vectype
));
10906 tree alias_ptr
= build_int_cst (ref_type
, align
);
10907 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
10908 dataref_ptr
, alias_ptr
,
10914 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
10915 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
10916 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
10918 gimple_call_set_lhs (call
, vec_array
);
10919 gimple_call_set_nothrow (call
, true);
10920 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
10922 dr_chain
.create (vec_num
);
10923 /* Extract each vector into an SSA_NAME. */
10924 for (i
= 0; i
< vec_num
; i
++)
10926 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
10928 dr_chain
.quick_push (new_temp
);
10931 /* Record the mapping between SSA_NAMEs and statements. */
10932 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
10934 /* Record that VEC_ARRAY is now dead. */
10935 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
10937 dr_chain
.release ();
10939 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10944 if (n_adjacent_loads
> 0)
10945 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
10946 alignment_support_scheme
, misalignment
, false,
10947 &inside_cost
, &prologue_cost
, cost_vec
,
10949 if (dump_enabled_p ())
10950 dump_printf_loc (MSG_NOTE
, vect_location
,
10951 "vect_model_load_cost: inside_cost = %u, "
10952 "prologue_cost = %u .\n",
10953 inside_cost
, prologue_cost
);
10959 if (memory_access_type
== VMAT_GATHER_SCATTER
)
10961 gcc_assert (alignment_support_scheme
== dr_aligned
10962 || alignment_support_scheme
== dr_unaligned_supported
);
10963 gcc_assert (!grouped_load
&& !slp_perm
);
10965 unsigned int inside_cost
= 0, prologue_cost
= 0;
10966 for (j
= 0; j
< ncopies
; j
++)
10968 /* 1. Create the vector or array pointer update chain. */
10969 if (j
== 0 && !costing_p
)
10971 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10972 vect_get_gather_scatter_ops (loop_vinfo
, loop
, stmt_info
,
10973 slp_node
, &gs_info
, &dataref_ptr
,
10977 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
10978 at_loop
, offset
, &dummy
, gsi
,
10979 &ptr_incr
, false, bump
);
10981 else if (!costing_p
)
10983 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
10984 if (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
10985 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
10986 gsi
, stmt_info
, bump
);
10989 gimple
*new_stmt
= NULL
;
10990 for (i
= 0; i
< vec_num
; i
++)
10992 tree final_mask
= NULL_TREE
;
10993 tree final_len
= NULL_TREE
;
10994 tree bias
= NULL_TREE
;
10998 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11001 = vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11002 vec_num
* ncopies
, vectype
,
11005 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11006 final_mask
, vec_mask
, gsi
);
11008 if (i
> 0 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
11009 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11010 gsi
, stmt_info
, bump
);
11013 /* 2. Create the vector-load in the loop. */
11014 unsigned HOST_WIDE_INT align
;
11015 if (gs_info
.ifn
!= IFN_LAST
)
11019 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11021 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11022 stmt_info
, 0, vect_body
);
11025 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
11026 vec_offset
= vec_offsets
[vec_num
* j
+ i
];
11027 tree zero
= build_zero_cst (vectype
);
11028 tree scale
= size_int (gs_info
.scale
);
11030 if (gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
11034 = vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11035 vec_num
* ncopies
, vectype
,
11036 vec_num
* j
+ i
, 1);
11039 = build_int_cst (sizetype
,
11040 TYPE_VECTOR_SUBPARTS (vectype
));
11041 signed char biasval
11042 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11043 bias
= build_int_cst (intQI_type_node
, biasval
);
11046 mask_vectype
= truth_type_for (vectype
);
11047 final_mask
= build_minus_one_cst (mask_vectype
);
11052 if (final_len
&& final_mask
)
11054 = gimple_build_call_internal (IFN_MASK_LEN_GATHER_LOAD
, 7,
11055 dataref_ptr
, vec_offset
,
11056 scale
, zero
, final_mask
,
11058 else if (final_mask
)
11059 call
= gimple_build_call_internal (IFN_MASK_GATHER_LOAD
, 5,
11060 dataref_ptr
, vec_offset
,
11061 scale
, zero
, final_mask
);
11063 call
= gimple_build_call_internal (IFN_GATHER_LOAD
, 4,
11064 dataref_ptr
, vec_offset
,
11066 gimple_call_set_nothrow (call
, true);
11068 data_ref
= NULL_TREE
;
11070 else if (gs_info
.decl
)
11072 /* The builtin decls path for gather is legacy, x86 only. */
11073 gcc_assert (!final_len
&& nunits
.is_constant ());
11076 unsigned int cnunits
= vect_nunits_for_cost (vectype
);
11078 = record_stmt_cost (cost_vec
, cnunits
, scalar_load
,
11079 stmt_info
, 0, vect_body
);
11082 poly_uint64 offset_nunits
11083 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
11084 if (known_eq (nunits
, offset_nunits
))
11086 new_stmt
= vect_build_one_gather_load_call
11087 (vinfo
, stmt_info
, gsi
, &gs_info
,
11088 dataref_ptr
, vec_offsets
[vec_num
* j
+ i
],
11090 data_ref
= NULL_TREE
;
11092 else if (known_eq (nunits
, offset_nunits
* 2))
11094 /* We have a offset vector with half the number of
11095 lanes but the builtins will produce full vectype
11096 data with just the lower lanes filled. */
11097 new_stmt
= vect_build_one_gather_load_call
11098 (vinfo
, stmt_info
, gsi
, &gs_info
,
11099 dataref_ptr
, vec_offsets
[2 * vec_num
* j
+ 2 * i
],
11101 tree low
= make_ssa_name (vectype
);
11102 gimple_set_lhs (new_stmt
, low
);
11103 vect_finish_stmt_generation (vinfo
, stmt_info
,
11106 /* now put upper half of final_mask in final_mask low. */
11108 && !SCALAR_INT_MODE_P
11109 (TYPE_MODE (TREE_TYPE (final_mask
))))
11111 int count
= nunits
.to_constant ();
11112 vec_perm_builder
sel (count
, count
, 1);
11113 sel
.quick_grow (count
);
11114 for (int i
= 0; i
< count
; ++i
)
11115 sel
[i
] = i
| (count
/ 2);
11116 vec_perm_indices
indices (sel
, 2, count
);
11117 tree perm_mask
= vect_gen_perm_mask_checked
11118 (TREE_TYPE (final_mask
), indices
);
11119 new_stmt
= gimple_build_assign (NULL_TREE
,
11124 final_mask
= make_ssa_name (TREE_TYPE (final_mask
));
11125 gimple_set_lhs (new_stmt
, final_mask
);
11126 vect_finish_stmt_generation (vinfo
, stmt_info
,
11129 else if (final_mask
)
11131 new_stmt
= gimple_build_assign (NULL_TREE
,
11132 VEC_UNPACK_HI_EXPR
,
11134 final_mask
= make_ssa_name
11135 (truth_type_for (gs_info
.offset_vectype
));
11136 gimple_set_lhs (new_stmt
, final_mask
);
11137 vect_finish_stmt_generation (vinfo
, stmt_info
,
11141 new_stmt
= vect_build_one_gather_load_call
11142 (vinfo
, stmt_info
, gsi
, &gs_info
,
11144 vec_offsets
[2 * vec_num
* j
+ 2 * i
+ 1],
11146 tree high
= make_ssa_name (vectype
);
11147 gimple_set_lhs (new_stmt
, high
);
11148 vect_finish_stmt_generation (vinfo
, stmt_info
,
11151 /* compose low + high. */
11152 int count
= nunits
.to_constant ();
11153 vec_perm_builder
sel (count
, count
, 1);
11154 sel
.quick_grow (count
);
11155 for (int i
= 0; i
< count
; ++i
)
11156 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
11157 vec_perm_indices
indices (sel
, 2, count
);
11159 = vect_gen_perm_mask_checked (vectype
, indices
);
11160 new_stmt
= gimple_build_assign (NULL_TREE
,
11162 low
, high
, perm_mask
);
11163 data_ref
= NULL_TREE
;
11165 else if (known_eq (nunits
* 2, offset_nunits
))
11167 /* We have a offset vector with double the number of
11168 lanes. Select the low/high part accordingly. */
11169 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / 2];
11170 if ((vec_num
* j
+ i
) & 1)
11172 int count
= offset_nunits
.to_constant ();
11173 vec_perm_builder
sel (count
, count
, 1);
11174 sel
.quick_grow (count
);
11175 for (int i
= 0; i
< count
; ++i
)
11176 sel
[i
] = i
| (count
/ 2);
11177 vec_perm_indices
indices (sel
, 2, count
);
11178 tree perm_mask
= vect_gen_perm_mask_checked
11179 (TREE_TYPE (vec_offset
), indices
);
11180 new_stmt
= gimple_build_assign (NULL_TREE
,
11185 vec_offset
= make_ssa_name (TREE_TYPE (vec_offset
));
11186 gimple_set_lhs (new_stmt
, vec_offset
);
11187 vect_finish_stmt_generation (vinfo
, stmt_info
,
11190 new_stmt
= vect_build_one_gather_load_call
11191 (vinfo
, stmt_info
, gsi
, &gs_info
,
11192 dataref_ptr
, vec_offset
, final_mask
);
11193 data_ref
= NULL_TREE
;
11196 gcc_unreachable ();
11200 /* Emulated gather-scatter. */
11201 gcc_assert (!final_mask
);
11202 unsigned HOST_WIDE_INT const_nunits
= nunits
.to_constant ();
11205 /* For emulated gathers N offset vector element
11206 offset add is consumed by the load). */
11207 inside_cost
= record_stmt_cost (cost_vec
, const_nunits
,
11208 vec_to_scalar
, stmt_info
,
11210 /* N scalar loads plus gathering them into a
11213 = record_stmt_cost (cost_vec
, const_nunits
, scalar_load
,
11214 stmt_info
, 0, vect_body
);
11216 = record_stmt_cost (cost_vec
, 1, vec_construct
,
11217 stmt_info
, 0, vect_body
);
11220 unsigned HOST_WIDE_INT const_offset_nunits
11221 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
)
11223 vec
<constructor_elt
, va_gc
> *ctor_elts
;
11224 vec_alloc (ctor_elts
, const_nunits
);
11225 gimple_seq stmts
= NULL
;
11226 /* We support offset vectors with more elements
11227 than the data vector for now. */
11228 unsigned HOST_WIDE_INT factor
11229 = const_offset_nunits
/ const_nunits
;
11230 vec_offset
= vec_offsets
[(vec_num
* j
+ i
) / factor
];
11231 unsigned elt_offset
11232 = ((vec_num
* j
+ i
) % factor
) * const_nunits
;
11233 tree idx_type
= TREE_TYPE (TREE_TYPE (vec_offset
));
11234 tree scale
= size_int (gs_info
.scale
);
11235 align
= get_object_alignment (DR_REF (first_dr_info
->dr
));
11236 tree ltype
= build_aligned_type (TREE_TYPE (vectype
), align
);
11237 for (unsigned k
= 0; k
< const_nunits
; ++k
)
11239 tree boff
= size_binop (MULT_EXPR
, TYPE_SIZE (idx_type
),
11240 bitsize_int (k
+ elt_offset
));
11242 = gimple_build (&stmts
, BIT_FIELD_REF
, idx_type
,
11243 vec_offset
, TYPE_SIZE (idx_type
), boff
);
11244 idx
= gimple_convert (&stmts
, sizetype
, idx
);
11245 idx
= gimple_build (&stmts
, MULT_EXPR
, sizetype
, idx
,
11247 tree ptr
= gimple_build (&stmts
, PLUS_EXPR
,
11248 TREE_TYPE (dataref_ptr
),
11250 ptr
= gimple_convert (&stmts
, ptr_type_node
, ptr
);
11251 tree elt
= make_ssa_name (TREE_TYPE (vectype
));
11252 tree ref
= build2 (MEM_REF
, ltype
, ptr
,
11253 build_int_cst (ref_type
, 0));
11254 new_stmt
= gimple_build_assign (elt
, ref
);
11255 gimple_set_vuse (new_stmt
, gimple_vuse (gsi_stmt (*gsi
)));
11256 gimple_seq_add_stmt (&stmts
, new_stmt
);
11257 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
, elt
);
11259 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
11260 new_stmt
= gimple_build_assign (
11261 NULL_TREE
, build_constructor (vectype
, ctor_elts
));
11262 data_ref
= NULL_TREE
;
11265 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11266 /* DATA_REF is null if we've already built the statement. */
11269 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11270 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11272 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11273 gimple_set_lhs (new_stmt
, new_temp
);
11274 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11276 /* Store vector loads in the corresponding SLP_NODE. */
11278 slp_node
->push_vec_def (new_stmt
);
11281 if (!slp
&& !costing_p
)
11282 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11285 if (!slp
&& !costing_p
)
11286 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11288 if (costing_p
&& dump_enabled_p ())
11289 dump_printf_loc (MSG_NOTE
, vect_location
,
11290 "vect_model_load_cost: inside_cost = %u, "
11291 "prologue_cost = %u .\n",
11292 inside_cost
, prologue_cost
);
11296 poly_uint64 group_elt
= 0;
11297 unsigned int inside_cost
= 0, prologue_cost
= 0;
11298 /* For costing some adjacent vector loads, we'd like to cost with
11299 the total number of them once instead of cost each one by one. */
11300 unsigned int n_adjacent_loads
= 0;
11301 for (j
= 0; j
< ncopies
; j
++)
11303 /* 1. Create the vector or array pointer update chain. */
11304 if (j
== 0 && !costing_p
)
11306 bool simd_lane_access_p
11307 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
11308 if (simd_lane_access_p
11309 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
11310 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
11311 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
11312 && integer_zerop (DR_INIT (first_dr_info
->dr
))
11313 && alias_sets_conflict_p (get_alias_set (aggr_type
),
11314 get_alias_set (TREE_TYPE (ref_type
)))
11315 && (alignment_support_scheme
== dr_aligned
11316 || alignment_support_scheme
== dr_unaligned_supported
))
11318 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
11319 dataref_offset
= build_int_cst (ref_type
, 0);
11321 else if (diff_first_stmt_info
)
11324 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
11325 aggr_type
, at_loop
, offset
, &dummy
,
11326 gsi
, &ptr_incr
, simd_lane_access_p
,
11328 /* Adjust the pointer by the difference to first_stmt. */
11329 data_reference_p ptrdr
11330 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
11332 = fold_convert (sizetype
,
11333 size_binop (MINUS_EXPR
,
11334 DR_INIT (first_dr_info
->dr
),
11336 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11338 if (alignment_support_scheme
== dr_explicit_realign
)
11340 msq
= vect_setup_realignment (vinfo
,
11341 first_stmt_info_for_drptr
, gsi
,
11342 &realignment_token
,
11343 alignment_support_scheme
,
11344 dataref_ptr
, &at_loop
);
11345 gcc_assert (!compute_in_loop
);
11350 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
11352 offset
, &dummy
, gsi
, &ptr_incr
,
11353 simd_lane_access_p
, bump
);
11355 else if (!costing_p
)
11357 gcc_assert (!LOOP_VINFO_USING_SELECT_VL_P (loop_vinfo
));
11358 if (dataref_offset
)
11359 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
11362 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11366 if (grouped_load
|| slp_perm
)
11367 dr_chain
.create (vec_num
);
11369 gimple
*new_stmt
= NULL
;
11370 for (i
= 0; i
< vec_num
; i
++)
11372 tree final_mask
= NULL_TREE
;
11373 tree final_len
= NULL_TREE
;
11374 tree bias
= NULL_TREE
;
11378 vec_mask
= vec_masks
[vec_num
* j
+ i
];
11380 final_mask
= vect_get_loop_mask (loop_vinfo
, gsi
, loop_masks
,
11381 vec_num
* ncopies
, vectype
,
11384 final_mask
= prepare_vec_mask (loop_vinfo
, mask_vectype
,
11385 final_mask
, vec_mask
, gsi
);
11388 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
11389 gsi
, stmt_info
, bump
);
11392 /* 2. Create the vector-load in the loop. */
11393 switch (alignment_support_scheme
)
11396 case dr_unaligned_supported
:
11401 unsigned int misalign
;
11402 unsigned HOST_WIDE_INT align
;
11403 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
11404 if (alignment_support_scheme
== dr_aligned
)
11406 else if (misalignment
== DR_MISALIGNMENT_UNKNOWN
)
11409 = dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
11413 misalign
= misalignment
;
11414 if (dataref_offset
== NULL_TREE
11415 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
11416 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
11418 align
= least_bit_hwi (misalign
| align
);
11420 /* Compute IFN when LOOP_LENS or final_mask valid. */
11421 machine_mode vmode
= TYPE_MODE (vectype
);
11422 machine_mode new_vmode
= vmode
;
11423 internal_fn partial_ifn
= IFN_LAST
;
11426 opt_machine_mode new_ovmode
11427 = get_len_load_store_mode (vmode
, true, &partial_ifn
);
11428 new_vmode
= new_ovmode
.require ();
11430 = (new_ovmode
== vmode
) ? 1 : GET_MODE_UNIT_SIZE (vmode
);
11431 final_len
= vect_get_loop_len (loop_vinfo
, gsi
, loop_lens
,
11432 vec_num
* ncopies
, vectype
,
11433 vec_num
* j
+ i
, factor
);
11435 else if (final_mask
)
11437 if (!can_vec_mask_load_store_p (
11438 vmode
, TYPE_MODE (TREE_TYPE (final_mask
)), true,
11440 gcc_unreachable ();
11443 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11447 /* Pass VF value to 'len' argument of
11448 MASK_LEN_LOAD if LOOP_LENS is invalid. */
11449 final_len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11453 /* Pass all ones value to 'mask' argument of
11454 MASK_LEN_LOAD if final_mask is invalid. */
11455 mask_vectype
= truth_type_for (vectype
);
11456 final_mask
= build_minus_one_cst (mask_vectype
);
11461 signed char biasval
11462 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
11464 bias
= build_int_cst (intQI_type_node
, biasval
);
11469 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11471 if (partial_ifn
== IFN_MASK_LEN_LOAD
)
11472 call
= gimple_build_call_internal (IFN_MASK_LEN_LOAD
, 5,
11474 final_mask
, final_len
,
11477 call
= gimple_build_call_internal (IFN_LEN_LOAD
, 4,
11480 gimple_call_set_nothrow (call
, true);
11482 data_ref
= NULL_TREE
;
11484 /* Need conversion if it's wrapped with VnQI. */
11485 if (vmode
!= new_vmode
)
11487 tree new_vtype
= build_vector_type_for_mode (
11488 unsigned_intQI_type_node
, new_vmode
);
11490 = vect_get_new_ssa_name (new_vtype
, vect_simple_var
);
11491 gimple_set_lhs (call
, var
);
11492 vect_finish_stmt_generation (vinfo
, stmt_info
, call
,
11494 tree op
= build1 (VIEW_CONVERT_EXPR
, vectype
, var
);
11495 new_stmt
= gimple_build_assign (vec_dest
,
11496 VIEW_CONVERT_EXPR
, op
);
11499 else if (final_mask
)
11501 tree ptr
= build_int_cst (ref_type
, align
* BITS_PER_UNIT
);
11502 gcall
*call
= gimple_build_call_internal (IFN_MASK_LOAD
, 3,
11505 gimple_call_set_nothrow (call
, true);
11507 data_ref
= NULL_TREE
;
11511 tree ltype
= vectype
;
11512 tree new_vtype
= NULL_TREE
;
11513 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
11514 unsigned int vect_align
11515 = vect_known_alignment_in_bytes (first_dr_info
, vectype
);
11516 unsigned int scalar_dr_size
11517 = vect_get_scalar_dr_size (first_dr_info
);
11518 /* If there's no peeling for gaps but we have a gap
11519 with slp loads then load the lower half of the
11520 vector only. See get_group_load_store_type for
11521 when we apply this optimization. */
11524 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) && gap
!= 0
11525 && known_eq (nunits
, (group_size
- gap
) * 2)
11526 && known_eq (nunits
, group_size
)
11527 && gap
>= (vect_align
/ scalar_dr_size
))
11531 = vector_vector_composition_type (vectype
, 2,
11533 if (new_vtype
!= NULL_TREE
)
11534 ltype
= half_vtype
;
11536 /* Try to use a single smaller load when we are about
11537 to load excess elements compared to the unrolled
11539 ??? This should cover the above case as well. */
11540 else if (known_gt ((vec_num
* j
+ i
+ 1) * nunits
,
11541 (group_size
* vf
- gap
)))
11543 if (known_ge ((vec_num
* j
+ i
+ 1) * nunits
11544 - (group_size
* vf
- gap
), nunits
))
11545 /* DR will be unused. */
11547 else if (known_ge (vect_align
,
11548 tree_to_poly_uint64
11549 (TYPE_SIZE_UNIT (vectype
))))
11550 /* Aligned access to excess elements is OK if
11551 at least one element is accessed in the
11557 = ((group_size
* vf
- gap
)
11558 - (vec_num
* j
+ i
) * nunits
);
11559 /* remain should now be > 0 and < nunits. */
11561 if (constant_multiple_p (nunits
, remain
, &num
))
11565 = vector_vector_composition_type (vectype
,
11571 /* Else use multiple loads or a masked load? */
11575 = (dataref_offset
? dataref_offset
11576 : build_int_cst (ref_type
, 0));
11579 else if (ltype
!= vectype
11580 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11582 poly_uint64 gap_offset
11583 = (tree_to_poly_uint64 (TYPE_SIZE_UNIT (vectype
))
11584 - tree_to_poly_uint64 (TYPE_SIZE_UNIT (ltype
)));
11585 tree gapcst
= build_int_cstu (ref_type
, gap_offset
);
11586 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
11591 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
11592 if (alignment_support_scheme
== dr_aligned
)
11595 TREE_TYPE (data_ref
)
11596 = build_aligned_type (TREE_TYPE (data_ref
),
11597 align
* BITS_PER_UNIT
);
11600 data_ref
= build_constructor (vectype
, NULL
);
11601 else if (ltype
!= vectype
)
11603 vect_copy_ref_info (data_ref
,
11604 DR_REF (first_dr_info
->dr
));
11605 tree tem
= make_ssa_name (ltype
);
11606 new_stmt
= gimple_build_assign (tem
, data_ref
);
11607 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
,
11610 vec
<constructor_elt
, va_gc
> *v
;
11611 /* We've computed 'num' above to statically two
11612 or via constant_multiple_p. */
11614 = (exact_div (tree_to_poly_uint64
11615 (TYPE_SIZE_UNIT (vectype
)),
11616 tree_to_poly_uint64
11617 (TYPE_SIZE_UNIT (ltype
)))
11619 vec_alloc (v
, num
);
11620 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11623 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11624 build_zero_cst (ltype
));
11625 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11629 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
11631 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
11632 build_zero_cst (ltype
));
11634 gcc_assert (new_vtype
!= NULL_TREE
);
11635 if (new_vtype
== vectype
)
11636 new_stmt
= gimple_build_assign (
11637 vec_dest
, build_constructor (vectype
, v
));
11640 tree new_vname
= make_ssa_name (new_vtype
);
11641 new_stmt
= gimple_build_assign (
11642 new_vname
, build_constructor (new_vtype
, v
));
11643 vect_finish_stmt_generation (vinfo
, stmt_info
,
11645 new_stmt
= gimple_build_assign (
11647 build1 (VIEW_CONVERT_EXPR
, vectype
, new_vname
));
11653 case dr_explicit_realign
:
11659 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
11661 if (compute_in_loop
)
11662 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
11663 &realignment_token
,
11664 dr_explicit_realign
,
11665 dataref_ptr
, NULL
);
11667 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11668 ptr
= copy_ssa_name (dataref_ptr
);
11670 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11671 // For explicit realign the target alignment should be
11672 // known at compile time.
11673 unsigned HOST_WIDE_INT align
11674 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11675 new_stmt
= gimple_build_assign (
11676 ptr
, BIT_AND_EXPR
, dataref_ptr
,
11677 build_int_cst (TREE_TYPE (dataref_ptr
),
11678 -(HOST_WIDE_INT
) align
));
11679 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11681 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11682 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11683 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11684 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11685 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11686 gimple_assign_set_lhs (new_stmt
, new_temp
);
11687 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
11688 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11691 bump
= size_binop (MULT_EXPR
, vs
, TYPE_SIZE_UNIT (elem_type
));
11692 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
11693 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
, stmt_info
,
11695 new_stmt
= gimple_build_assign (
11696 NULL_TREE
, BIT_AND_EXPR
, ptr
,
11697 build_int_cst (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
11698 if (TREE_CODE (ptr
) == SSA_NAME
)
11699 ptr
= copy_ssa_name (ptr
, new_stmt
);
11701 ptr
= make_ssa_name (TREE_TYPE (ptr
), new_stmt
);
11702 gimple_assign_set_lhs (new_stmt
, ptr
);
11703 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11705 = build2 (MEM_REF
, vectype
, ptr
, build_int_cst (ref_type
, 0));
11708 case dr_explicit_realign_optimized
:
11712 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
11713 new_temp
= copy_ssa_name (dataref_ptr
);
11715 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
11716 // We should only be doing this if we know the target
11717 // alignment at compile time.
11718 unsigned HOST_WIDE_INT align
11719 = DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
11720 new_stmt
= gimple_build_assign (
11721 new_temp
, BIT_AND_EXPR
, dataref_ptr
,
11722 build_int_cst (TREE_TYPE (dataref_ptr
),
11723 -(HOST_WIDE_INT
) align
));
11724 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11725 data_ref
= build2 (MEM_REF
, vectype
, new_temp
,
11726 build_int_cst (ref_type
, 0));
11730 gcc_unreachable ();
11733 /* One common place to cost the above vect load for different
11734 alignment support schemes. */
11737 /* For VMAT_CONTIGUOUS_PERMUTE if it's grouped load, we
11738 only need to take care of the first stmt, whose
11739 stmt_info is first_stmt_info, vec_num iterating on it
11740 will cover the cost for the remaining, it's consistent
11741 with transforming. For the prologue cost for realign,
11742 we only need to count it once for the whole group. */
11743 bool first_stmt_info_p
= first_stmt_info
== stmt_info
;
11744 bool add_realign_cost
= first_stmt_info_p
&& i
== 0;
11745 if (memory_access_type
== VMAT_CONTIGUOUS
11746 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11747 || (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
11748 && (!grouped_load
|| first_stmt_info_p
)))
11750 /* Leave realign cases alone to keep them simple. */
11751 if (alignment_support_scheme
== dr_explicit_realign_optimized
11752 || alignment_support_scheme
== dr_explicit_realign
)
11753 vect_get_load_cost (vinfo
, stmt_info
, 1,
11754 alignment_support_scheme
, misalignment
,
11755 add_realign_cost
, &inside_cost
,
11756 &prologue_cost
, cost_vec
, cost_vec
,
11759 n_adjacent_loads
++;
11764 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11765 /* DATA_REF is null if we've already built the statement. */
11768 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
11769 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
11771 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11772 gimple_set_lhs (new_stmt
, new_temp
);
11773 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11776 /* 3. Handle explicit realignment if necessary/supported.
11778 vec_dest = realign_load (msq, lsq, realignment_token) */
11780 && (alignment_support_scheme
== dr_explicit_realign_optimized
11781 || alignment_support_scheme
== dr_explicit_realign
))
11783 lsq
= gimple_assign_lhs (new_stmt
);
11784 if (!realignment_token
)
11785 realignment_token
= dataref_ptr
;
11786 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
11787 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
, msq
,
11788 lsq
, realignment_token
);
11789 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
11790 gimple_assign_set_lhs (new_stmt
, new_temp
);
11791 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
11793 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
11796 if (i
== vec_num
- 1 && j
== ncopies
- 1)
11797 add_phi_arg (phi
, lsq
, loop_latch_edge (containing_loop
),
11803 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
11806 inside_cost
= record_stmt_cost (cost_vec
, 1, vec_perm
,
11807 stmt_info
, 0, vect_body
);
11810 tree perm_mask
= perm_mask_for_reverse (vectype
);
11811 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
11812 perm_mask
, stmt_info
, gsi
);
11813 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
11817 /* Collect vector loads and later create their permutation in
11818 vect_transform_grouped_load (). */
11819 if (!costing_p
&& (grouped_load
|| slp_perm
))
11820 dr_chain
.quick_push (new_temp
);
11822 /* Store vector loads in the corresponding SLP_NODE. */
11823 if (!costing_p
&& slp
&& !slp_perm
)
11824 slp_node
->push_vec_def (new_stmt
);
11826 /* With SLP permutation we load the gaps as well, without
11827 we need to skip the gaps after we manage to fully load
11828 all elements. group_gap_adj is DR_GROUP_SIZE here. */
11829 group_elt
+= nunits
;
11831 && maybe_ne (group_gap_adj
, 0U)
11833 && known_eq (group_elt
, group_size
- group_gap_adj
))
11835 poly_wide_int bump_val
11836 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11837 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
)
11839 bump_val
= -bump_val
;
11840 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11841 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11846 /* Bump the vector pointer to account for a gap or for excess
11847 elements loaded for a permuted SLP load. */
11849 && maybe_ne (group_gap_adj
, 0U)
11852 poly_wide_int bump_val
11853 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
)) * group_gap_adj
);
11854 if (tree_int_cst_sgn (vect_dr_behavior (vinfo
, dr_info
)->step
) == -1)
11855 bump_val
= -bump_val
;
11856 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
11857 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
11861 if (slp
&& !slp_perm
)
11867 /* For SLP we know we've seen all possible uses of dr_chain so
11868 direct vect_transform_slp_perm_load to DCE the unused parts.
11869 ??? This is a hack to prevent compile-time issues as seen
11870 in PR101120 and friends. */
11873 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, nullptr, vf
,
11874 true, &n_perms
, nullptr);
11875 inside_cost
= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
11876 stmt_info
, 0, vect_body
);
11880 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
11881 gsi
, vf
, false, &n_perms
,
11890 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11891 /* We assume that the cost of a single load-lanes instruction
11892 is equivalent to the cost of DR_GROUP_SIZE separate loads.
11893 If a grouped access is instead being provided by a
11894 load-and-permute operation, include the cost of the
11896 if (costing_p
&& first_stmt_info
== stmt_info
)
11898 /* Uses an even and odd extract operations or shuffle
11899 operations for each needed permute. */
11900 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
11901 int nstmts
= ceil_log2 (group_size
) * group_size
;
11902 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
11903 stmt_info
, 0, vect_body
);
11905 if (dump_enabled_p ())
11906 dump_printf_loc (MSG_NOTE
, vect_location
,
11907 "vect_model_load_cost:"
11908 "strided group_size = %d .\n",
11911 else if (!costing_p
)
11913 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
11915 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11918 else if (!costing_p
)
11919 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
11921 dr_chain
.release ();
11923 if (!slp
&& !costing_p
)
11924 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
11928 gcc_assert (memory_access_type
== VMAT_CONTIGUOUS
11929 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
11930 || memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
);
11931 if (n_adjacent_loads
> 0)
11932 vect_get_load_cost (vinfo
, stmt_info
, n_adjacent_loads
,
11933 alignment_support_scheme
, misalignment
, false,
11934 &inside_cost
, &prologue_cost
, cost_vec
, cost_vec
,
11936 if (dump_enabled_p ())
11937 dump_printf_loc (MSG_NOTE
, vect_location
,
11938 "vect_model_load_cost: inside_cost = %u, "
11939 "prologue_cost = %u .\n",
11940 inside_cost
, prologue_cost
);
11946 /* Function vect_is_simple_cond.
11949 LOOP - the loop that is being vectorized.
11950 COND - Condition that is checked for simple use.
11953 *COMP_VECTYPE - the vector type for the comparison.
11954 *DTS - The def types for the arguments of the comparison
11956 Returns whether a COND can be vectorized. Checks whether
11957 condition operands are supportable using vec_is_simple_use. */
11960 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
11961 slp_tree slp_node
, tree
*comp_vectype
,
11962 enum vect_def_type
*dts
, tree vectype
)
11965 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
11969 if (TREE_CODE (cond
) == SSA_NAME
11970 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
11972 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
11973 &slp_op
, &dts
[0], comp_vectype
)
11975 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
11980 if (!COMPARISON_CLASS_P (cond
))
11983 lhs
= TREE_OPERAND (cond
, 0);
11984 rhs
= TREE_OPERAND (cond
, 1);
11986 if (TREE_CODE (lhs
) == SSA_NAME
)
11988 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
11989 &lhs
, &slp_op
, &dts
[0], &vectype1
))
11992 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
11993 || TREE_CODE (lhs
) == FIXED_CST
)
11994 dts
[0] = vect_constant_def
;
11998 if (TREE_CODE (rhs
) == SSA_NAME
)
12000 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
12001 &rhs
, &slp_op
, &dts
[1], &vectype2
))
12004 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
12005 || TREE_CODE (rhs
) == FIXED_CST
)
12006 dts
[1] = vect_constant_def
;
12010 if (vectype1
&& vectype2
12011 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12012 TYPE_VECTOR_SUBPARTS (vectype2
)))
12015 *comp_vectype
= vectype1
? vectype1
: vectype2
;
12016 /* Invariant comparison. */
12017 if (! *comp_vectype
)
12019 tree scalar_type
= TREE_TYPE (lhs
);
12020 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
12021 *comp_vectype
= truth_type_for (vectype
);
12024 /* If we can widen the comparison to match vectype do so. */
12025 if (INTEGRAL_TYPE_P (scalar_type
)
12027 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
12028 TYPE_SIZE (TREE_TYPE (vectype
))))
12029 scalar_type
= build_nonstandard_integer_type
12030 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
12031 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
12039 /* vectorizable_condition.
12041 Check if STMT_INFO is conditional modify expression that can be vectorized.
12042 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12043 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
12046 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
12048 Return true if STMT_INFO is vectorizable in this way. */
12051 vectorizable_condition (vec_info
*vinfo
,
12052 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12054 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12056 tree scalar_dest
= NULL_TREE
;
12057 tree vec_dest
= NULL_TREE
;
12058 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
12059 tree then_clause
, else_clause
;
12060 tree comp_vectype
= NULL_TREE
;
12061 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
12062 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
12065 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12066 enum vect_def_type dts
[4]
12067 = {vect_unknown_def_type
, vect_unknown_def_type
,
12068 vect_unknown_def_type
, vect_unknown_def_type
};
12072 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12074 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12075 vec
<tree
> vec_oprnds0
= vNULL
;
12076 vec
<tree
> vec_oprnds1
= vNULL
;
12077 vec
<tree
> vec_oprnds2
= vNULL
;
12078 vec
<tree
> vec_oprnds3
= vNULL
;
12080 bool masked
= false;
12082 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12085 /* Is vectorizable conditional operation? */
12086 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12090 code
= gimple_assign_rhs_code (stmt
);
12091 if (code
!= COND_EXPR
)
12094 stmt_vec_info reduc_info
= NULL
;
12095 int reduc_index
= -1;
12096 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
12098 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
12103 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
12104 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
12105 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
12106 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
12107 || reduc_index
!= -1);
12111 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12115 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12116 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12121 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
12125 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12129 gcc_assert (ncopies
>= 1);
12130 if (for_reduction
&& ncopies
> 1)
12131 return false; /* FORNOW */
12133 cond_expr
= gimple_assign_rhs1 (stmt
);
12135 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
12136 &comp_vectype
, &dts
[0], vectype
)
12140 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
12141 slp_tree then_slp_node
, else_slp_node
;
12142 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
12143 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
12145 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
12146 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
12149 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
12152 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
12155 masked
= !COMPARISON_CLASS_P (cond_expr
);
12156 vec_cmp_type
= truth_type_for (comp_vectype
);
12158 if (vec_cmp_type
== NULL_TREE
)
12161 cond_code
= TREE_CODE (cond_expr
);
12164 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
12165 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
12168 /* For conditional reductions, the "then" value needs to be the candidate
12169 value calculated by this iteration while the "else" value needs to be
12170 the result carried over from previous iterations. If the COND_EXPR
12171 is the other way around, we need to swap it. */
12172 bool must_invert_cmp_result
= false;
12173 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
12176 must_invert_cmp_result
= true;
12179 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
12180 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
12181 if (new_code
== ERROR_MARK
)
12182 must_invert_cmp_result
= true;
12185 cond_code
= new_code
;
12186 /* Make sure we don't accidentally use the old condition. */
12187 cond_expr
= NULL_TREE
;
12190 std::swap (then_clause
, else_clause
);
12193 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
12195 /* Boolean values may have another representation in vectors
12196 and therefore we prefer bit operations over comparison for
12197 them (which also works for scalar masks). We store opcodes
12198 to use in bitop1 and bitop2. Statement is vectorized as
12199 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
12200 depending on bitop1 and bitop2 arity. */
12204 bitop1
= BIT_NOT_EXPR
;
12205 bitop2
= BIT_AND_EXPR
;
12208 bitop1
= BIT_NOT_EXPR
;
12209 bitop2
= BIT_IOR_EXPR
;
12212 bitop1
= BIT_NOT_EXPR
;
12213 bitop2
= BIT_AND_EXPR
;
12214 std::swap (cond_expr0
, cond_expr1
);
12217 bitop1
= BIT_NOT_EXPR
;
12218 bitop2
= BIT_IOR_EXPR
;
12219 std::swap (cond_expr0
, cond_expr1
);
12222 bitop1
= BIT_XOR_EXPR
;
12225 bitop1
= BIT_XOR_EXPR
;
12226 bitop2
= BIT_NOT_EXPR
;
12231 cond_code
= SSA_NAME
;
12234 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
12235 && reduction_type
== EXTRACT_LAST_REDUCTION
12236 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
12238 if (dump_enabled_p ())
12239 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12240 "reduction comparison operation not supported.\n");
12246 if (bitop1
!= NOP_EXPR
)
12248 machine_mode mode
= TYPE_MODE (comp_vectype
);
12251 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
12252 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12255 if (bitop2
!= NOP_EXPR
)
12257 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
12259 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12264 vect_cost_for_stmt kind
= vector_stmt
;
12265 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12266 /* Count one reduction-like operation per vector. */
12267 kind
= vec_to_scalar
;
12268 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
)
12270 || (!expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
,
12272 || !expand_vec_cond_expr_p (vectype
, vec_cmp_type
,
12277 && (!vect_maybe_update_slp_op_vectype
12278 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
12280 && !vect_maybe_update_slp_op_vectype
12281 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
12282 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
12283 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
12285 if (dump_enabled_p ())
12286 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12287 "incompatible vector types for invariants\n");
12291 if (loop_vinfo
&& for_reduction
12292 && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12294 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12296 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12297 vectype
, OPTIMIZE_FOR_SPEED
))
12298 vect_record_loop_len (loop_vinfo
,
12299 &LOOP_VINFO_LENS (loop_vinfo
),
12300 ncopies
* vec_num
, vectype
, 1);
12302 vect_record_loop_mask (loop_vinfo
,
12303 &LOOP_VINFO_MASKS (loop_vinfo
),
12304 ncopies
* vec_num
, vectype
, NULL
);
12306 /* Extra inactive lanes should be safe for vect_nested_cycle. */
12307 else if (STMT_VINFO_DEF_TYPE (reduc_info
) != vect_nested_cycle
)
12309 if (dump_enabled_p ())
12310 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12311 "conditional reduction prevents the use"
12312 " of partial vectors.\n");
12313 LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
) = false;
12317 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
12318 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
12326 scalar_dest
= gimple_assign_lhs (stmt
);
12327 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12328 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
12330 bool swap_cond_operands
= false;
12332 /* See whether another part of the vectorized code applies a loop
12333 mask to the condition, or to its inverse. */
12335 vec_loop_masks
*masks
= NULL
;
12336 vec_loop_lens
*lens
= NULL
;
12337 if (loop_vinfo
&& LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
))
12339 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12340 lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12342 else if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
12344 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12345 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12348 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
12349 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12350 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12353 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
12354 tree_code orig_code
= cond
.code
;
12355 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
12356 if (!masked
&& loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12358 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12359 cond_code
= cond
.code
;
12360 swap_cond_operands
= true;
12364 /* Try the inverse of the current mask. We check if the
12365 inverse mask is live and if so we generate a negate of
12366 the current mask such that we still honor NaNs. */
12367 cond
.inverted_p
= true;
12368 cond
.code
= orig_code
;
12369 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
12371 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12372 cond_code
= cond
.code
;
12373 swap_cond_operands
= true;
12374 must_invert_cmp_result
= true;
12381 /* Handle cond expr. */
12383 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12384 cond_expr
, comp_vectype
, &vec_oprnds0
,
12385 then_clause
, vectype
, &vec_oprnds2
,
12386 reduction_type
!= EXTRACT_LAST_REDUCTION
12387 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12389 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12390 cond_expr0
, comp_vectype
, &vec_oprnds0
,
12391 cond_expr1
, comp_vectype
, &vec_oprnds1
,
12392 then_clause
, vectype
, &vec_oprnds2
,
12393 reduction_type
!= EXTRACT_LAST_REDUCTION
12394 ? else_clause
: NULL
, vectype
, &vec_oprnds3
);
12396 /* Arguments are ready. Create the new vector stmt. */
12397 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
12399 vec_then_clause
= vec_oprnds2
[i
];
12400 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
12401 vec_else_clause
= vec_oprnds3
[i
];
12403 if (swap_cond_operands
)
12404 std::swap (vec_then_clause
, vec_else_clause
);
12407 vec_compare
= vec_cond_lhs
;
12410 vec_cond_rhs
= vec_oprnds1
[i
];
12411 if (bitop1
== NOP_EXPR
)
12413 gimple_seq stmts
= NULL
;
12414 vec_compare
= gimple_build (&stmts
, cond_code
, vec_cmp_type
,
12415 vec_cond_lhs
, vec_cond_rhs
);
12416 gsi_insert_before (gsi
, stmts
, GSI_SAME_STMT
);
12420 new_temp
= make_ssa_name (vec_cmp_type
);
12422 if (bitop1
== BIT_NOT_EXPR
)
12423 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
12427 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
12429 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12430 if (bitop2
== NOP_EXPR
)
12431 vec_compare
= new_temp
;
12432 else if (bitop2
== BIT_NOT_EXPR
12433 && reduction_type
!= EXTRACT_LAST_REDUCTION
)
12435 /* Instead of doing ~x ? y : z do x ? z : y. */
12436 vec_compare
= new_temp
;
12437 std::swap (vec_then_clause
, vec_else_clause
);
12441 vec_compare
= make_ssa_name (vec_cmp_type
);
12442 if (bitop2
== BIT_NOT_EXPR
)
12444 = gimple_build_assign (vec_compare
, bitop2
, new_temp
);
12447 = gimple_build_assign (vec_compare
, bitop2
,
12448 vec_cond_lhs
, new_temp
);
12449 vect_finish_stmt_generation (vinfo
, stmt_info
,
12455 /* If we decided to apply a loop mask to the result of the vector
12456 comparison, AND the comparison with the mask now. Later passes
12457 should then be able to reuse the AND results between mulitple
12461 for (int i = 0; i < 100; ++i)
12462 x[i] = y[i] ? z[i] : 10;
12464 results in following optimized GIMPLE:
12466 mask__35.8_43 = vect__4.7_41 != { 0, ... };
12467 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
12468 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
12469 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
12470 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
12471 vect_iftmp.11_47, { 10, ... }>;
12473 instead of using a masked and unmasked forms of
12474 vec != { 0, ... } (masked in the MASK_LOAD,
12475 unmasked in the VEC_COND_EXPR). */
12477 /* Force vec_compare to be an SSA_NAME rather than a comparison,
12478 in cases where that's necessary. */
12480 tree len
= NULL_TREE
, bias
= NULL_TREE
;
12481 if (masks
|| lens
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
12483 if (!is_gimple_val (vec_compare
))
12485 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12486 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12488 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12489 vec_compare
= vec_compare_name
;
12492 if (must_invert_cmp_result
)
12494 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
12495 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
12498 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12499 vec_compare
= vec_compare_name
;
12502 if (direct_internal_fn_supported_p (IFN_LEN_FOLD_EXTRACT_LAST
,
12503 vectype
, OPTIMIZE_FOR_SPEED
))
12507 len
= vect_get_loop_len (loop_vinfo
, gsi
, lens
,
12508 vec_num
* ncopies
, vectype
, i
, 1);
12509 signed char biasval
12510 = LOOP_VINFO_PARTIAL_LOAD_STORE_BIAS (loop_vinfo
);
12511 bias
= build_int_cst (intQI_type_node
, biasval
);
12515 len
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
12516 bias
= build_int_cst (intQI_type_node
, 0);
12522 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, vec_num
* ncopies
,
12524 tree tmp2
= make_ssa_name (vec_cmp_type
);
12526 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
12528 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
12529 vec_compare
= tmp2
;
12534 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
12536 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
12537 tree lhs
= gimple_get_lhs (old_stmt
);
12539 new_stmt
= gimple_build_call_internal
12540 (IFN_LEN_FOLD_EXTRACT_LAST
, 5, else_clause
, vec_compare
,
12541 vec_then_clause
, len
, bias
);
12543 new_stmt
= gimple_build_call_internal
12544 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
12546 gimple_call_set_lhs (new_stmt
, lhs
);
12547 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
12548 if (old_stmt
== gsi_stmt (*gsi
))
12549 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
12552 /* In this case we're moving the definition to later in the
12553 block. That doesn't matter because the only uses of the
12554 lhs are in phi statements. */
12555 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
12556 gsi_remove (&old_gsi
, true);
12557 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12562 new_temp
= make_ssa_name (vec_dest
);
12563 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
12564 vec_then_clause
, vec_else_clause
);
12565 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12568 slp_node
->push_vec_def (new_stmt
);
12570 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12574 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12576 vec_oprnds0
.release ();
12577 vec_oprnds1
.release ();
12578 vec_oprnds2
.release ();
12579 vec_oprnds3
.release ();
12584 /* Helper of vectorizable_comparison.
12586 Check if STMT_INFO is comparison expression CODE that can be vectorized.
12587 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12588 comparison, put it in VEC_STMT, and insert it at GSI.
12590 Return true if STMT_INFO is vectorizable in this way. */
12593 vectorizable_comparison_1 (vec_info
*vinfo
, tree vectype
,
12594 stmt_vec_info stmt_info
, tree_code code
,
12595 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12596 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12598 tree lhs
, rhs1
, rhs2
;
12599 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
12600 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
12602 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12603 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
12605 poly_uint64 nunits
;
12607 enum tree_code bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
12609 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12610 vec
<tree
> vec_oprnds0
= vNULL
;
12611 vec
<tree
> vec_oprnds1
= vNULL
;
12613 tree mask
= NULL_TREE
;
12615 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12618 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
12621 mask_type
= vectype
;
12622 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
12627 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12629 gcc_assert (ncopies
>= 1);
12631 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
12634 slp_tree slp_rhs1
, slp_rhs2
;
12635 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12636 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
12639 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
12640 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
12643 if (vectype1
&& vectype2
12644 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
12645 TYPE_VECTOR_SUBPARTS (vectype2
)))
12648 vectype
= vectype1
? vectype1
: vectype2
;
12650 /* Invariant comparison. */
12653 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
12654 vectype
= mask_type
;
12656 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
12658 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
12661 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
12664 /* Can't compare mask and non-mask types. */
12665 if (vectype1
&& vectype2
12666 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
12669 /* Boolean values may have another representation in vectors
12670 and therefore we prefer bit operations over comparison for
12671 them (which also works for scalar masks). We store opcodes
12672 to use in bitop1 and bitop2. Statement is vectorized as
12673 BITOP2 (rhs1 BITOP1 rhs2) or
12674 rhs1 BITOP2 (BITOP1 rhs2)
12675 depending on bitop1 and bitop2 arity. */
12676 bool swap_p
= false;
12677 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
12679 if (code
== GT_EXPR
)
12681 bitop1
= BIT_NOT_EXPR
;
12682 bitop2
= BIT_AND_EXPR
;
12684 else if (code
== GE_EXPR
)
12686 bitop1
= BIT_NOT_EXPR
;
12687 bitop2
= BIT_IOR_EXPR
;
12689 else if (code
== LT_EXPR
)
12691 bitop1
= BIT_NOT_EXPR
;
12692 bitop2
= BIT_AND_EXPR
;
12695 else if (code
== LE_EXPR
)
12697 bitop1
= BIT_NOT_EXPR
;
12698 bitop2
= BIT_IOR_EXPR
;
12703 bitop1
= BIT_XOR_EXPR
;
12704 if (code
== EQ_EXPR
)
12705 bitop2
= BIT_NOT_EXPR
;
12711 if (bitop1
== NOP_EXPR
)
12713 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
12718 machine_mode mode
= TYPE_MODE (vectype
);
12721 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
12722 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12725 if (bitop2
!= NOP_EXPR
)
12727 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
12728 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
12733 /* Put types on constant and invariant SLP children. */
12735 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
12736 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
12738 if (dump_enabled_p ())
12739 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12740 "incompatible vector types for invariants\n");
12744 vect_model_simple_cost (vinfo
, stmt_info
,
12745 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
12746 dts
, ndts
, slp_node
, cost_vec
);
12753 lhs
= gimple_get_lhs (STMT_VINFO_STMT (stmt_info
));
12755 mask
= vect_create_destination_var (lhs
, mask_type
);
12757 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
12758 rhs1
, vectype
, &vec_oprnds0
,
12759 rhs2
, vectype
, &vec_oprnds1
);
12761 std::swap (vec_oprnds0
, vec_oprnds1
);
12763 /* Arguments are ready. Create the new vector stmt. */
12764 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
12767 vec_rhs2
= vec_oprnds1
[i
];
12770 new_temp
= make_ssa_name (mask
);
12772 new_temp
= make_temp_ssa_name (mask_type
, NULL
, "cmp");
12773 if (bitop1
== NOP_EXPR
)
12775 new_stmt
= gimple_build_assign (new_temp
, code
,
12776 vec_rhs1
, vec_rhs2
);
12777 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12781 if (bitop1
== BIT_NOT_EXPR
)
12782 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
12784 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
12786 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12787 if (bitop2
!= NOP_EXPR
)
12789 tree res
= make_ssa_name (mask
);
12790 if (bitop2
== BIT_NOT_EXPR
)
12791 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
12793 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
12795 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
12799 slp_node
->push_vec_def (new_stmt
);
12801 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
12805 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
12807 vec_oprnds0
.release ();
12808 vec_oprnds1
.release ();
12813 /* vectorizable_comparison.
12815 Check if STMT_INFO is comparison expression that can be vectorized.
12816 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
12817 comparison, put it in VEC_STMT, and insert it at GSI.
12819 Return true if STMT_INFO is vectorizable in this way. */
12822 vectorizable_comparison (vec_info
*vinfo
,
12823 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
12825 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12827 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
12829 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
12832 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
12835 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
12839 enum tree_code code
= gimple_assign_rhs_code (stmt
);
12840 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
12841 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12842 vec_stmt
, slp_node
, cost_vec
))
12846 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
12851 /* Check to see if the current early break given in STMT_INFO is valid for
12855 vectorizable_early_exit (vec_info
*vinfo
, stmt_vec_info stmt_info
,
12856 gimple_stmt_iterator
*gsi
, gimple
**vec_stmt
,
12857 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
12859 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
12861 || !is_a
<gcond
*> (STMT_VINFO_STMT (stmt_info
)))
12864 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_condition_def
)
12867 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
12870 DUMP_VECT_SCOPE ("vectorizable_early_exit");
12872 auto code
= gimple_cond_code (STMT_VINFO_STMT (stmt_info
));
12874 tree vectype
= NULL_TREE
;
12877 enum vect_def_type dt0
;
12878 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op0
, &slp_op0
, &dt0
,
12881 if (dump_enabled_p ())
12882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12883 "use not simple.\n");
12890 machine_mode mode
= TYPE_MODE (vectype
);
12896 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
12898 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
12899 vec_loop_lens
*lens
= &LOOP_VINFO_LENS (loop_vinfo
);
12900 bool masked_loop_p
= LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
12901 bool len_loop_p
= LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo
);
12903 /* Now build the new conditional. Pattern gimple_conds get dropped during
12904 codegen so we must replace the original insn. */
12905 gimple
*orig_stmt
= STMT_VINFO_STMT (vect_orig_stmt (stmt_info
));
12906 gcond
*cond_stmt
= as_a
<gcond
*>(orig_stmt
);
12907 /* When vectorizing we assume that if the branch edge is taken that we're
12908 exiting the loop. This is not however always the case as the compiler will
12909 rewrite conditions to always be a comparison against 0. To do this it
12910 sometimes flips the edges. This is fine for scalar, but for vector we
12911 then have to flip the test, as we're still assuming that if you take the
12912 branch edge that we found the exit condition. i.e. we need to know whether
12913 we are generating a `forall` or an `exist` condition. */
12914 auto new_code
= NE_EXPR
;
12915 auto reduc_optab
= ior_optab
;
12916 auto reduc_op
= BIT_IOR_EXPR
;
12917 tree cst
= build_zero_cst (vectype
);
12918 edge exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 0);
12919 if (exit_true_edge
->flags
& EDGE_FALSE_VALUE
)
12920 exit_true_edge
= EDGE_SUCC (gimple_bb (cond_stmt
), 1);
12921 gcc_assert (exit_true_edge
->flags
& EDGE_TRUE_VALUE
);
12922 if (flow_bb_inside_loop_p (LOOP_VINFO_LOOP (loop_vinfo
),
12923 exit_true_edge
->dest
))
12925 new_code
= EQ_EXPR
;
12926 reduc_optab
= and_optab
;
12927 reduc_op
= BIT_AND_EXPR
;
12928 cst
= build_minus_one_cst (vectype
);
12931 /* Analyze only. */
12934 if (direct_optab_handler (cbranch_optab
, mode
) == CODE_FOR_nothing
)
12936 if (dump_enabled_p ())
12937 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12938 "can't vectorize early exit because the "
12939 "target doesn't support flag setting vector "
12945 && direct_optab_handler (reduc_optab
, mode
) == CODE_FOR_nothing
)
12947 if (dump_enabled_p ())
12948 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
12949 "can't vectorize early exit because the "
12950 "target does not support boolean vector %s "
12952 reduc_optab
== ior_optab
? "OR" : "AND",
12957 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12958 vec_stmt
, slp_node
, cost_vec
))
12961 if (LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo
))
12963 if (direct_internal_fn_supported_p (IFN_VCOND_MASK_LEN
, vectype
,
12964 OPTIMIZE_FOR_SPEED
))
12965 vect_record_loop_len (loop_vinfo
, lens
, ncopies
, vectype
, 1);
12967 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, NULL
);
12975 tree new_temp
= NULL_TREE
;
12976 gimple
*new_stmt
= NULL
;
12978 if (dump_enabled_p ())
12979 dump_printf_loc (MSG_NOTE
, vect_location
, "transform early-exit.\n");
12981 if (!vectorizable_comparison_1 (vinfo
, vectype
, stmt_info
, code
, gsi
,
12982 vec_stmt
, slp_node
, cost_vec
))
12983 gcc_unreachable ();
12985 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
12986 basic_block cond_bb
= gimple_bb (stmt
);
12987 gimple_stmt_iterator cond_gsi
= gsi_last_bb (cond_bb
);
12989 auto_vec
<tree
> stmts
;
12992 stmts
.safe_splice (SLP_TREE_VEC_DEFS (slp_node
));
12995 auto vec_stmts
= STMT_VINFO_VEC_STMTS (stmt_info
);
12996 stmts
.reserve_exact (vec_stmts
.length ());
12997 for (auto stmt
: vec_stmts
)
12998 stmts
.quick_push (gimple_assign_lhs (stmt
));
13001 /* Determine if we need to reduce the final value. */
13002 if (stmts
.length () > 1)
13004 /* We build the reductions in a way to maintain as much parallelism as
13006 auto_vec
<tree
> workset (stmts
.length ());
13008 /* Mask the statements as we queue them up. Normally we loop over
13009 vec_num, but since we inspect the exact results of vectorization
13010 we don't need to and instead can just use the stmts themselves. */
13012 for (unsigned i
= 0; i
< stmts
.length (); i
++)
13015 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
,
13018 = prepare_vec_mask (loop_vinfo
, TREE_TYPE (stmt_mask
), stmt_mask
,
13019 stmts
[i
], &cond_gsi
);
13020 workset
.quick_push (stmt_mask
);
13022 else if (len_loop_p
)
13023 for (unsigned i
= 0; i
< stmts
.length (); i
++)
13025 tree len_mask
= vect_gen_loop_len_mask (loop_vinfo
, gsi
, &cond_gsi
,
13026 lens
, ncopies
, vectype
,
13029 workset
.quick_push (len_mask
);
13032 workset
.splice (stmts
);
13034 while (workset
.length () > 1)
13036 new_temp
= make_temp_ssa_name (vectype
, NULL
, "vexit_reduc");
13037 tree arg0
= workset
.pop ();
13038 tree arg1
= workset
.pop ();
13039 new_stmt
= gimple_build_assign (new_temp
, reduc_op
, arg0
, arg1
);
13040 vect_finish_stmt_generation (loop_vinfo
, stmt_info
, new_stmt
,
13042 workset
.quick_insert (0, new_temp
);
13047 new_temp
= stmts
[0];
13051 = vect_get_loop_mask (loop_vinfo
, gsi
, masks
, ncopies
, vectype
, 0);
13052 new_temp
= prepare_vec_mask (loop_vinfo
, TREE_TYPE (mask
), mask
,
13053 new_temp
, &cond_gsi
);
13055 else if (len_loop_p
)
13056 new_temp
= vect_gen_loop_len_mask (loop_vinfo
, gsi
, &cond_gsi
, lens
,
13057 ncopies
, vectype
, new_temp
, 0, 1);
13060 gcc_assert (new_temp
);
13062 gimple_cond_set_condition (cond_stmt
, new_code
, new_temp
, cst
);
13063 update_stmt (orig_stmt
);
13066 SLP_TREE_VEC_DEFS (slp_node
).truncate (0);
13068 STMT_VINFO_VEC_STMTS (stmt_info
).truncate (0);
13071 *vec_stmt
= orig_stmt
;
13076 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
13077 can handle all live statements in the node. Otherwise return true
13078 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
13079 VEC_STMT_P is as for vectorizable_live_operation. */
13082 can_vectorize_live_stmts (vec_info
*vinfo
, stmt_vec_info stmt_info
,
13083 slp_tree slp_node
, slp_instance slp_node_instance
,
13085 stmt_vector_for_cost
*cost_vec
)
13087 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
13090 stmt_vec_info slp_stmt_info
;
13092 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
13094 if ((STMT_VINFO_LIVE_P (slp_stmt_info
)
13096 && LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13097 && STMT_VINFO_DEF_TYPE (slp_stmt_info
)
13098 == vect_induction_def
))
13099 && !vectorizable_live_operation (vinfo
, slp_stmt_info
, slp_node
,
13100 slp_node_instance
, i
,
13101 vec_stmt_p
, cost_vec
))
13105 else if ((STMT_VINFO_LIVE_P (stmt_info
)
13106 || (LOOP_VINFO_EARLY_BREAKS (loop_vinfo
)
13107 && STMT_VINFO_DEF_TYPE (stmt_info
) == vect_induction_def
))
13108 && !vectorizable_live_operation (vinfo
, stmt_info
,
13109 slp_node
, slp_node_instance
, -1,
13110 vec_stmt_p
, cost_vec
))
13116 /* Make sure the statement is vectorizable. */
13119 vect_analyze_stmt (vec_info
*vinfo
,
13120 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
13121 slp_tree node
, slp_instance node_instance
,
13122 stmt_vector_for_cost
*cost_vec
)
13124 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
13125 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
13127 gimple_seq pattern_def_seq
;
13129 if (dump_enabled_p ())
13130 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
13133 if (gimple_has_volatile_ops (stmt_info
->stmt
))
13134 return opt_result::failure_at (stmt_info
->stmt
,
13136 " stmt has volatile operands: %G\n",
13139 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13141 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
13143 gimple_stmt_iterator si
;
13145 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
13147 stmt_vec_info pattern_def_stmt_info
13148 = vinfo
->lookup_stmt (gsi_stmt (si
));
13149 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
13150 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
13152 /* Analyze def stmt of STMT if it's a pattern stmt. */
13153 if (dump_enabled_p ())
13154 dump_printf_loc (MSG_NOTE
, vect_location
,
13155 "==> examining pattern def statement: %G",
13156 pattern_def_stmt_info
->stmt
);
13159 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
13160 need_to_vectorize
, node
, node_instance
,
13168 /* Skip stmts that do not need to be vectorized. In loops this is expected
13170 - the COND_EXPR which is the loop exit condition
13171 - any LABEL_EXPRs in the loop
13172 - computations that are used only for array indexing or loop control.
13173 In basic blocks we only analyze statements that are a part of some SLP
13174 instance, therefore, all the statements are relevant.
13176 Pattern statement needs to be analyzed instead of the original statement
13177 if the original statement is not relevant. Otherwise, we analyze both
13178 statements. In basic blocks we are called from some SLP instance
13179 traversal, don't analyze pattern stmts instead, the pattern stmts
13180 already will be part of SLP instance. */
13182 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
13183 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
13184 && !STMT_VINFO_LIVE_P (stmt_info
))
13186 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13187 && pattern_stmt_info
13188 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13189 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13191 /* Analyze PATTERN_STMT instead of the original stmt. */
13192 stmt_info
= pattern_stmt_info
;
13193 if (dump_enabled_p ())
13194 dump_printf_loc (MSG_NOTE
, vect_location
,
13195 "==> examining pattern statement: %G",
13200 if (dump_enabled_p ())
13201 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
13203 return opt_result::success ();
13206 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
13208 && pattern_stmt_info
13209 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
13210 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
13212 /* Analyze PATTERN_STMT too. */
13213 if (dump_enabled_p ())
13214 dump_printf_loc (MSG_NOTE
, vect_location
,
13215 "==> examining pattern statement: %G",
13216 pattern_stmt_info
->stmt
);
13219 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
13220 node_instance
, cost_vec
);
13225 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
13227 case vect_internal_def
:
13228 case vect_condition_def
:
13231 case vect_reduction_def
:
13232 case vect_nested_cycle
:
13233 gcc_assert (!bb_vinfo
13234 && (relevance
== vect_used_in_outer
13235 || relevance
== vect_used_in_outer_by_reduction
13236 || relevance
== vect_used_by_reduction
13237 || relevance
== vect_unused_in_scope
13238 || relevance
== vect_used_only_live
));
13241 case vect_induction_def
:
13242 case vect_first_order_recurrence
:
13243 gcc_assert (!bb_vinfo
);
13246 case vect_constant_def
:
13247 case vect_external_def
:
13248 case vect_unknown_def_type
:
13250 gcc_unreachable ();
13253 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13255 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (node
);
13257 if (STMT_VINFO_RELEVANT_P (stmt_info
))
13259 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
13260 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
13261 || gimple_code (stmt_info
->stmt
) == GIMPLE_COND
13262 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
13263 *need_to_vectorize
= true;
13266 if (PURE_SLP_STMT (stmt_info
) && !node
)
13268 if (dump_enabled_p ())
13269 dump_printf_loc (MSG_NOTE
, vect_location
,
13270 "handled only by SLP analysis\n");
13271 return opt_result::success ();
13276 && (STMT_VINFO_RELEVANT_P (stmt_info
)
13277 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
13278 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
13279 -mveclibabi= takes preference over library functions with
13280 the simd attribute. */
13281 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13282 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
13284 || vectorizable_conversion (vinfo
, stmt_info
,
13285 NULL
, NULL
, node
, cost_vec
)
13286 || vectorizable_operation (vinfo
, stmt_info
,
13287 NULL
, NULL
, node
, cost_vec
)
13288 || vectorizable_assignment (vinfo
, stmt_info
,
13289 NULL
, NULL
, node
, cost_vec
)
13290 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13291 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13292 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13293 node
, node_instance
, cost_vec
)
13294 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13295 NULL
, node
, cost_vec
)
13296 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13297 || vectorizable_condition (vinfo
, stmt_info
,
13298 NULL
, NULL
, node
, cost_vec
)
13299 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13301 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13302 stmt_info
, NULL
, node
)
13303 || vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13304 stmt_info
, NULL
, node
, cost_vec
)
13305 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13310 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
13311 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
13312 NULL
, NULL
, node
, cost_vec
)
13313 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
13315 || vectorizable_shift (vinfo
, stmt_info
,
13316 NULL
, NULL
, node
, cost_vec
)
13317 || vectorizable_operation (vinfo
, stmt_info
,
13318 NULL
, NULL
, node
, cost_vec
)
13319 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
13321 || vectorizable_load (vinfo
, stmt_info
,
13322 NULL
, NULL
, node
, cost_vec
)
13323 || vectorizable_store (vinfo
, stmt_info
,
13324 NULL
, NULL
, node
, cost_vec
)
13325 || vectorizable_condition (vinfo
, stmt_info
,
13326 NULL
, NULL
, node
, cost_vec
)
13327 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
13329 || vectorizable_phi (vinfo
, stmt_info
, NULL
, node
, cost_vec
)
13330 || vectorizable_early_exit (vinfo
, stmt_info
, NULL
, NULL
, node
,
13336 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13339 return opt_result::failure_at (stmt_info
->stmt
,
13341 " relevant stmt not supported: %G",
13344 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
13345 need extra handling, except for vectorizable reductions. */
13347 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
13348 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
13349 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
13350 stmt_info
, node
, node_instance
,
13352 return opt_result::failure_at (stmt_info
->stmt
,
13354 " live stmt not supported: %G",
13357 return opt_result::success ();
13361 /* Function vect_transform_stmt.
13363 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
13366 vect_transform_stmt (vec_info
*vinfo
,
13367 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
13368 slp_tree slp_node
, slp_instance slp_node_instance
)
13370 bool is_store
= false;
13371 gimple
*vec_stmt
= NULL
;
13374 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
13376 tree saved_vectype
= STMT_VINFO_VECTYPE (stmt_info
);
13378 STMT_VINFO_VECTYPE (stmt_info
) = SLP_TREE_VECTYPE (slp_node
);
13380 switch (STMT_VINFO_TYPE (stmt_info
))
13382 case type_demotion_vec_info_type
:
13383 case type_promotion_vec_info_type
:
13384 case type_conversion_vec_info_type
:
13385 done
= vectorizable_conversion (vinfo
, stmt_info
,
13386 gsi
, &vec_stmt
, slp_node
, NULL
);
13390 case induc_vec_info_type
:
13391 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
13392 stmt_info
, &vec_stmt
, slp_node
,
13397 case shift_vec_info_type
:
13398 done
= vectorizable_shift (vinfo
, stmt_info
,
13399 gsi
, &vec_stmt
, slp_node
, NULL
);
13403 case op_vec_info_type
:
13404 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13409 case assignment_vec_info_type
:
13410 done
= vectorizable_assignment (vinfo
, stmt_info
,
13411 gsi
, &vec_stmt
, slp_node
, NULL
);
13415 case load_vec_info_type
:
13416 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
13421 case store_vec_info_type
:
13422 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
13424 && (++DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))
13425 < DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info
))))
13426 /* In case of interleaving, the whole chain is vectorized when the
13427 last store in the chain is reached. Store stmts before the last
13428 one are skipped, and there vec_stmt_info shouldn't be freed
13433 done
= vectorizable_store (vinfo
, stmt_info
,
13434 gsi
, &vec_stmt
, slp_node
, NULL
);
13440 case condition_vec_info_type
:
13441 done
= vectorizable_condition (vinfo
, stmt_info
,
13442 gsi
, &vec_stmt
, slp_node
, NULL
);
13446 case comparison_vec_info_type
:
13447 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13452 case call_vec_info_type
:
13453 done
= vectorizable_call (vinfo
, stmt_info
,
13454 gsi
, &vec_stmt
, slp_node
, NULL
);
13457 case call_simd_clone_vec_info_type
:
13458 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13462 case reduc_vec_info_type
:
13463 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13464 gsi
, &vec_stmt
, slp_node
);
13468 case cycle_phi_info_type
:
13469 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
13470 &vec_stmt
, slp_node
, slp_node_instance
);
13474 case lc_phi_info_type
:
13475 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
13476 stmt_info
, &vec_stmt
, slp_node
);
13480 case recurr_info_type
:
13481 done
= vectorizable_recurr (as_a
<loop_vec_info
> (vinfo
),
13482 stmt_info
, &vec_stmt
, slp_node
, NULL
);
13486 case phi_info_type
:
13487 done
= vectorizable_phi (vinfo
, stmt_info
, &vec_stmt
, slp_node
, NULL
);
13491 case loop_exit_ctrl_vec_info_type
:
13492 done
= vectorizable_early_exit (vinfo
, stmt_info
, gsi
, &vec_stmt
,
13498 if (!STMT_VINFO_LIVE_P (stmt_info
))
13500 if (dump_enabled_p ())
13501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13502 "stmt not supported.\n");
13503 gcc_unreachable ();
13508 if (!slp_node
&& vec_stmt
)
13509 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
13511 if (STMT_VINFO_TYPE (stmt_info
) != store_vec_info_type
)
13513 /* Handle stmts whose DEF is used outside the loop-nest that is
13514 being vectorized. */
13515 done
= can_vectorize_live_stmts (vinfo
, stmt_info
, slp_node
,
13516 slp_node_instance
, true, NULL
);
13521 STMT_VINFO_VECTYPE (stmt_info
) = saved_vectype
;
13527 /* Remove a group of stores (for SLP or interleaving), free their
13531 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
13533 stmt_vec_info next_stmt_info
= first_stmt_info
;
13535 while (next_stmt_info
)
13537 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
13538 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
13539 /* Free the attached stmt_vec_info and remove the stmt. */
13540 vinfo
->remove_stmt (next_stmt_info
);
13541 next_stmt_info
= tmp
;
13545 /* If NUNITS is nonzero, return a vector type that contains NUNITS
13546 elements of type SCALAR_TYPE, or null if the target doesn't support
13549 If NUNITS is zero, return a vector type that contains elements of
13550 type SCALAR_TYPE, choosing whichever vector size the target prefers.
13552 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
13553 for this vectorization region and want to "autodetect" the best choice.
13554 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
13555 and we want the new type to be interoperable with it. PREVAILING_MODE
13556 in this case can be a scalar integer mode or a vector mode; when it
13557 is a vector mode, the function acts like a tree-level version of
13558 related_vector_mode. */
13561 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
13562 tree scalar_type
, poly_uint64 nunits
)
13564 tree orig_scalar_type
= scalar_type
;
13565 scalar_mode inner_mode
;
13566 machine_mode simd_mode
;
13569 if ((!INTEGRAL_TYPE_P (scalar_type
)
13570 && !POINTER_TYPE_P (scalar_type
)
13571 && !SCALAR_FLOAT_TYPE_P (scalar_type
))
13572 || (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
13573 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
)))
13576 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
13578 /* Interoperability between modes requires one to be a constant multiple
13579 of the other, so that the number of vectors required for each operation
13580 is a compile-time constant. */
13581 if (prevailing_mode
!= VOIDmode
13582 && !constant_multiple_p (nunits
* nbytes
,
13583 GET_MODE_SIZE (prevailing_mode
))
13584 && !constant_multiple_p (GET_MODE_SIZE (prevailing_mode
),
13588 /* For vector types of elements whose mode precision doesn't
13589 match their types precision we use a element type of mode
13590 precision. The vectorization routines will have to make sure
13591 they support the proper result truncation/extension.
13592 We also make sure to build vector types with INTEGER_TYPE
13593 component type only. */
13594 if (INTEGRAL_TYPE_P (scalar_type
)
13595 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
13596 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
13597 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
13598 TYPE_UNSIGNED (scalar_type
));
13600 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
13601 When the component mode passes the above test simply use a type
13602 corresponding to that mode. The theory is that any use that
13603 would cause problems with this will disable vectorization anyway. */
13604 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
13605 && !INTEGRAL_TYPE_P (scalar_type
))
13606 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
13608 /* We can't build a vector type of elements with alignment bigger than
13610 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
13611 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
13612 TYPE_UNSIGNED (scalar_type
));
13614 /* If we felt back to using the mode fail if there was
13615 no scalar type for it. */
13616 if (scalar_type
== NULL_TREE
)
13619 /* If no prevailing mode was supplied, use the mode the target prefers.
13620 Otherwise lookup a vector mode based on the prevailing mode. */
13621 if (prevailing_mode
== VOIDmode
)
13623 gcc_assert (known_eq (nunits
, 0U));
13624 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
13625 if (SCALAR_INT_MODE_P (simd_mode
))
13627 /* Traditional behavior is not to take the integer mode
13628 literally, but simply to use it as a way of determining
13629 the vector size. It is up to mode_for_vector to decide
13630 what the TYPE_MODE should be.
13632 Note that nunits == 1 is allowed in order to support single
13633 element vector types. */
13634 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
13635 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13639 else if (SCALAR_INT_MODE_P (prevailing_mode
)
13640 || !related_vector_mode (prevailing_mode
,
13641 inner_mode
, nunits
).exists (&simd_mode
))
13643 /* Fall back to using mode_for_vector, mostly in the hope of being
13644 able to use an integer mode. */
13645 if (known_eq (nunits
, 0U)
13646 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
13649 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
13653 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
13655 /* In cases where the mode was chosen by mode_for_vector, check that
13656 the target actually supports the chosen mode, or that it at least
13657 allows the vector mode to be replaced by a like-sized integer. */
13658 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
13659 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
13662 /* Re-attach the address-space qualifier if we canonicalized the scalar
13664 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
13665 return build_qualified_type
13666 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
13671 /* Function get_vectype_for_scalar_type.
13673 Returns the vector type corresponding to SCALAR_TYPE as supported
13674 by the target. If GROUP_SIZE is nonzero and we're performing BB
13675 vectorization, make sure that the number of elements in the vector
13676 is no bigger than GROUP_SIZE. */
13679 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13680 unsigned int group_size
)
13682 /* For BB vectorization, we should always have a group size once we've
13683 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
13684 are tentative requests during things like early data reference
13685 analysis and pattern recognition. */
13686 if (is_a
<bb_vec_info
> (vinfo
))
13687 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
13691 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13693 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
13694 vinfo
->vector_mode
= TYPE_MODE (vectype
);
13696 /* Register the natural choice of vector type, before the group size
13697 has been applied. */
13699 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
13701 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
13702 try again with an explicit number of elements. */
13705 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
13707 /* Start with the biggest number of units that fits within
13708 GROUP_SIZE and halve it until we find a valid vector type.
13709 Usually either the first attempt will succeed or all will
13710 fail (in the latter case because GROUP_SIZE is too small
13711 for the target), but it's possible that a target could have
13712 a hole between supported vector types.
13714 If GROUP_SIZE is not a power of 2, this has the effect of
13715 trying the largest power of 2 that fits within the group,
13716 even though the group is not a multiple of that vector size.
13717 The BB vectorizer will then try to carve up the group into
13719 unsigned int nunits
= 1 << floor_log2 (group_size
);
13722 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
13723 scalar_type
, nunits
);
13726 while (nunits
> 1 && !vectype
);
13732 /* Return the vector type corresponding to SCALAR_TYPE as supported
13733 by the target. NODE, if nonnull, is the SLP tree node that will
13734 use the returned vector type. */
13737 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
13739 unsigned int group_size
= 0;
13741 group_size
= SLP_TREE_LANES (node
);
13742 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13745 /* Function get_mask_type_for_scalar_type.
13747 Returns the mask type corresponding to a result of comparison
13748 of vectors of specified SCALAR_TYPE as supported by target.
13749 If GROUP_SIZE is nonzero and we're performing BB vectorization,
13750 make sure that the number of elements in the vector is no bigger
13751 than GROUP_SIZE. */
13754 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13755 unsigned int group_size
)
13757 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
13762 return truth_type_for (vectype
);
13765 /* Function get_mask_type_for_scalar_type.
13767 Returns the mask type corresponding to a result of comparison
13768 of vectors of specified SCALAR_TYPE as supported by target.
13769 NODE, if nonnull, is the SLP tree node that will use the returned
13773 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
13776 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, node
);
13781 return truth_type_for (vectype
);
13784 /* Function get_same_sized_vectype
13786 Returns a vector type corresponding to SCALAR_TYPE of size
13787 VECTOR_TYPE if supported by the target. */
13790 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
13792 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
13793 return truth_type_for (vector_type
);
13795 poly_uint64 nunits
;
13796 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
13797 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
13800 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
13801 scalar_type
, nunits
);
13804 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
13805 would not change the chosen vector modes. */
13808 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
13810 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
13811 i
!= vinfo
->used_vector_modes
.end (); ++i
)
13812 if (!VECTOR_MODE_P (*i
)
13813 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
13818 /* Function vect_is_simple_use.
13821 VINFO - the vect info of the loop or basic block that is being vectorized.
13822 OPERAND - operand in the loop or bb.
13824 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
13825 case OPERAND is an SSA_NAME that is defined in the vectorizable region
13826 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
13827 the definition could be anywhere in the function
13828 DT - the type of definition
13830 Returns whether a stmt with OPERAND can be vectorized.
13831 For loops, supportable operands are constants, loop invariants, and operands
13832 that are defined by the current iteration of the loop. Unsupportable
13833 operands are those that are defined by a previous iteration of the loop (as
13834 is the case in reduction/induction computations).
13835 For basic blocks, supportable operands are constants and bb invariants.
13836 For now, operands defined outside the basic block are not supported. */
13839 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13840 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
13842 if (def_stmt_info_out
)
13843 *def_stmt_info_out
= NULL
;
13845 *def_stmt_out
= NULL
;
13846 *dt
= vect_unknown_def_type
;
13848 if (dump_enabled_p ())
13850 dump_printf_loc (MSG_NOTE
, vect_location
,
13851 "vect_is_simple_use: operand ");
13852 if (TREE_CODE (operand
) == SSA_NAME
13853 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
13854 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
13856 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
13859 if (CONSTANT_CLASS_P (operand
))
13860 *dt
= vect_constant_def
;
13861 else if (is_gimple_min_invariant (operand
))
13862 *dt
= vect_external_def
;
13863 else if (TREE_CODE (operand
) != SSA_NAME
)
13864 *dt
= vect_unknown_def_type
;
13865 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
13866 *dt
= vect_external_def
;
13869 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
13870 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
13872 *dt
= vect_external_def
;
13875 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
13876 def_stmt
= stmt_vinfo
->stmt
;
13877 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
13878 if (def_stmt_info_out
)
13879 *def_stmt_info_out
= stmt_vinfo
;
13882 *def_stmt_out
= def_stmt
;
13885 if (dump_enabled_p ())
13887 dump_printf (MSG_NOTE
, ", type of def: ");
13890 case vect_uninitialized_def
:
13891 dump_printf (MSG_NOTE
, "uninitialized\n");
13893 case vect_constant_def
:
13894 dump_printf (MSG_NOTE
, "constant\n");
13896 case vect_external_def
:
13897 dump_printf (MSG_NOTE
, "external\n");
13899 case vect_internal_def
:
13900 dump_printf (MSG_NOTE
, "internal\n");
13902 case vect_induction_def
:
13903 dump_printf (MSG_NOTE
, "induction\n");
13905 case vect_reduction_def
:
13906 dump_printf (MSG_NOTE
, "reduction\n");
13908 case vect_double_reduction_def
:
13909 dump_printf (MSG_NOTE
, "double reduction\n");
13911 case vect_nested_cycle
:
13912 dump_printf (MSG_NOTE
, "nested cycle\n");
13914 case vect_first_order_recurrence
:
13915 dump_printf (MSG_NOTE
, "first order recurrence\n");
13917 case vect_condition_def
:
13918 dump_printf (MSG_NOTE
, "control flow\n");
13920 case vect_unknown_def_type
:
13921 dump_printf (MSG_NOTE
, "unknown\n");
13926 if (*dt
== vect_unknown_def_type
)
13928 if (dump_enabled_p ())
13929 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
13930 "Unsupported pattern.\n");
13937 /* Function vect_is_simple_use.
13939 Same as vect_is_simple_use but also determines the vector operand
13940 type of OPERAND and stores it to *VECTYPE. If the definition of
13941 OPERAND is vect_uninitialized_def, vect_constant_def or
13942 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
13943 is responsible to compute the best suited vector type for the
13947 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
13948 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
13949 gimple
**def_stmt_out
)
13951 stmt_vec_info def_stmt_info
;
13953 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
13957 *def_stmt_out
= def_stmt
;
13958 if (def_stmt_info_out
)
13959 *def_stmt_info_out
= def_stmt_info
;
13961 /* Now get a vector type if the def is internal, otherwise supply
13962 NULL_TREE and leave it up to the caller to figure out a proper
13963 type for the use stmt. */
13964 if (*dt
== vect_internal_def
13965 || *dt
== vect_induction_def
13966 || *dt
== vect_reduction_def
13967 || *dt
== vect_double_reduction_def
13968 || *dt
== vect_nested_cycle
13969 || *dt
== vect_first_order_recurrence
)
13971 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
13972 gcc_assert (*vectype
!= NULL_TREE
);
13973 if (dump_enabled_p ())
13974 dump_printf_loc (MSG_NOTE
, vect_location
,
13975 "vect_is_simple_use: vectype %T\n", *vectype
);
13977 else if (*dt
== vect_uninitialized_def
13978 || *dt
== vect_constant_def
13979 || *dt
== vect_external_def
)
13980 *vectype
= NULL_TREE
;
13982 gcc_unreachable ();
13987 /* Function vect_is_simple_use.
13989 Same as vect_is_simple_use but determines the operand by operand
13990 position OPERAND from either STMT or SLP_NODE, filling in *OP
13991 and *SLP_DEF (when SLP_NODE is not NULL). */
13994 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
13995 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
13996 enum vect_def_type
*dt
,
13997 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
14001 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
14003 *vectype
= SLP_TREE_VECTYPE (child
);
14004 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
14006 /* ??? VEC_PERM nodes might be intermediate and their lane value
14007 have no representative (nor do we build a VEC_PERM stmt for
14008 the actual operation). Note for two-operator nodes we set
14009 a representative but leave scalar stmts empty as we'd only
14010 have one for a subset of lanes. Ideally no caller would
14011 require *op for internal defs. */
14012 if (SLP_TREE_REPRESENTATIVE (child
))
14014 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
14015 return vect_is_simple_use (*op
, vinfo
, dt
, def_stmt_info_out
);
14019 gcc_assert (SLP_TREE_CODE (child
) == VEC_PERM_EXPR
);
14020 *op
= error_mark_node
;
14021 *dt
= vect_internal_def
;
14022 if (def_stmt_info_out
)
14023 *def_stmt_info_out
= NULL
;
14029 if (def_stmt_info_out
)
14030 *def_stmt_info_out
= NULL
;
14031 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
14032 *dt
= SLP_TREE_DEF_TYPE (child
);
14039 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
14041 if (gimple_assign_rhs_code (ass
) == COND_EXPR
14042 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
14045 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
14047 *op
= gimple_op (ass
, operand
);
14049 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
14050 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
14052 *op
= gimple_op (ass
, operand
+ 1);
14054 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
->stmt
))
14055 *op
= gimple_op (cond
, operand
);
14056 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
14057 *op
= gimple_call_arg (call
, operand
);
14059 gcc_unreachable ();
14060 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
14064 /* If OP is not NULL and is external or constant update its vector
14065 type with VECTYPE. Returns true if successful or false if not,
14066 for example when conflicting vector types are present. */
14069 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
14071 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
14073 if (SLP_TREE_VECTYPE (op
))
14074 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
14075 /* For external defs refuse to produce VECTOR_BOOLEAN_TYPE_P, those
14076 should be handled by patters. Allow vect_constant_def for now. */
14077 if (VECTOR_BOOLEAN_TYPE_P (vectype
)
14078 && SLP_TREE_DEF_TYPE (op
) == vect_external_def
)
14080 SLP_TREE_VECTYPE (op
) = vectype
;
14084 /* Function supportable_widening_operation
14086 Check whether an operation represented by the code CODE is a
14087 widening operation that is supported by the target platform in
14088 vector form (i.e., when operating on arguments of type VECTYPE_IN
14089 producing a result of type VECTYPE_OUT).
14091 Widening operations we currently support are NOP (CONVERT), FLOAT,
14092 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
14093 are supported by the target platform either directly (via vector
14094 tree-codes), or via target builtins.
14097 - CODE1 and CODE2 are codes of vector operations to be used when
14098 vectorizing the operation, if available.
14099 - MULTI_STEP_CVT determines the number of required intermediate steps in
14100 case of multi-step conversion (like char->short->int - in that case
14101 MULTI_STEP_CVT will be 1).
14102 - INTERM_TYPES contains the intermediate type required to perform the
14103 widening operation (short in the above example). */
14106 supportable_widening_operation (vec_info
*vinfo
,
14108 stmt_vec_info stmt_info
,
14109 tree vectype_out
, tree vectype_in
,
14110 code_helper
*code1
,
14111 code_helper
*code2
,
14112 int *multi_step_cvt
,
14113 vec
<tree
> *interm_types
)
14115 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
14116 class loop
*vect_loop
= NULL
;
14117 machine_mode vec_mode
;
14118 enum insn_code icode1
, icode2
;
14119 optab optab1
= unknown_optab
, optab2
= unknown_optab
;
14120 tree vectype
= vectype_in
;
14121 tree wide_vectype
= vectype_out
;
14122 tree_code c1
= MAX_TREE_CODES
, c2
= MAX_TREE_CODES
;
14124 tree prev_type
, intermediate_type
;
14125 machine_mode intermediate_mode
, prev_mode
;
14126 optab optab3
, optab4
;
14128 *multi_step_cvt
= 0;
14130 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
14132 switch (code
.safe_as_tree_code ())
14134 case MAX_TREE_CODES
:
14135 /* Don't set c1 and c2 if code is not a tree_code. */
14138 case WIDEN_MULT_EXPR
:
14139 /* The result of a vectorized widening operation usually requires
14140 two vectors (because the widened results do not fit into one vector).
14141 The generated vector results would normally be expected to be
14142 generated in the same order as in the original scalar computation,
14143 i.e. if 8 results are generated in each vector iteration, they are
14144 to be organized as follows:
14145 vect1: [res1,res2,res3,res4],
14146 vect2: [res5,res6,res7,res8].
14148 However, in the special case that the result of the widening
14149 operation is used in a reduction computation only, the order doesn't
14150 matter (because when vectorizing a reduction we change the order of
14151 the computation). Some targets can take advantage of this and
14152 generate more efficient code. For example, targets like Altivec,
14153 that support widen_mult using a sequence of {mult_even,mult_odd}
14154 generate the following vectors:
14155 vect1: [res1,res3,res5,res7],
14156 vect2: [res2,res4,res6,res8].
14158 When vectorizing outer-loops, we execute the inner-loop sequentially
14159 (each vectorized inner-loop iteration contributes to VF outer-loop
14160 iterations in parallel). We therefore don't allow to change the
14161 order of the computation in the inner-loop during outer-loop
14163 /* TODO: Another case in which order doesn't *really* matter is when we
14164 widen and then contract again, e.g. (short)((int)x * y >> 8).
14165 Normally, pack_trunc performs an even/odd permute, whereas the
14166 repack from an even/odd expansion would be an interleave, which
14167 would be significantly simpler for e.g. AVX2. */
14168 /* In any case, in order to avoid duplicating the code below, recurse
14169 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
14170 are properly set up for the caller. If we fail, we'll continue with
14171 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
14173 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
14174 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
14175 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
14176 stmt_info
, vectype_out
,
14178 code2
, multi_step_cvt
,
14181 /* Elements in a vector with vect_used_by_reduction property cannot
14182 be reordered if the use chain with this property does not have the
14183 same operation. One such an example is s += a * b, where elements
14184 in a and b cannot be reordered. Here we check if the vector defined
14185 by STMT is only directly used in the reduction statement. */
14186 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
14187 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
14189 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
14192 c1
= VEC_WIDEN_MULT_LO_EXPR
;
14193 c2
= VEC_WIDEN_MULT_HI_EXPR
;
14196 case DOT_PROD_EXPR
:
14197 c1
= DOT_PROD_EXPR
;
14198 c2
= DOT_PROD_EXPR
;
14206 case VEC_WIDEN_MULT_EVEN_EXPR
:
14207 /* Support the recursion induced just above. */
14208 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
14209 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
14212 case WIDEN_LSHIFT_EXPR
:
14213 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
14214 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
14218 c1
= VEC_UNPACK_LO_EXPR
;
14219 c2
= VEC_UNPACK_HI_EXPR
;
14223 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
14224 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
14227 case FIX_TRUNC_EXPR
:
14228 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
14229 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
14233 gcc_unreachable ();
14236 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
14237 std::swap (c1
, c2
);
14239 if (code
== FIX_TRUNC_EXPR
)
14241 /* The signedness is determined from output operand. */
14242 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14243 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14245 else if (CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ())
14246 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14247 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14248 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14249 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14251 /* If the input and result modes are the same, a different optab
14252 is needed where we pass in the number of units in vectype. */
14253 optab1
= vec_unpacks_sbool_lo_optab
;
14254 optab2
= vec_unpacks_sbool_hi_optab
;
14257 vec_mode
= TYPE_MODE (vectype
);
14258 if (widening_fn_p (code
))
14260 /* If this is an internal fn then we must check whether the target
14261 supports either a low-high split or an even-odd split. */
14262 internal_fn ifn
= as_internal_fn ((combined_fn
) code
);
14264 internal_fn lo
, hi
, even
, odd
;
14265 lookup_hilo_internal_fn (ifn
, &lo
, &hi
);
14266 *code1
= as_combined_fn (lo
);
14267 *code2
= as_combined_fn (hi
);
14268 optab1
= direct_internal_fn_optab (lo
, {vectype
, vectype
});
14269 optab2
= direct_internal_fn_optab (hi
, {vectype
, vectype
});
14271 /* If we don't support low-high, then check for even-odd. */
14273 || (icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14275 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14277 lookup_evenodd_internal_fn (ifn
, &even
, &odd
);
14278 *code1
= as_combined_fn (even
);
14279 *code2
= as_combined_fn (odd
);
14280 optab1
= direct_internal_fn_optab (even
, {vectype
, vectype
});
14281 optab2
= direct_internal_fn_optab (odd
, {vectype
, vectype
});
14284 else if (code
.is_tree_code ())
14286 if (code
== FIX_TRUNC_EXPR
)
14288 /* The signedness is determined from output operand. */
14289 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14290 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
14292 else if (CONVERT_EXPR_CODE_P ((tree_code
) code
.safe_as_tree_code ())
14293 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
14294 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14295 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
14296 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
14298 /* If the input and result modes are the same, a different optab
14299 is needed where we pass in the number of units in vectype. */
14300 optab1
= vec_unpacks_sbool_lo_optab
;
14301 optab2
= vec_unpacks_sbool_hi_optab
;
14305 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14306 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
14312 if (!optab1
|| !optab2
)
14315 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
14316 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
14320 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14321 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14323 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14325 /* For scalar masks we may have different boolean
14326 vector types having the same QImode. Thus we
14327 add additional check for elements number. */
14328 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
14329 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14333 /* Check if it's a multi-step conversion that can be done using intermediate
14336 prev_type
= vectype
;
14337 prev_mode
= vec_mode
;
14339 if (!CONVERT_EXPR_CODE_P (code
.safe_as_tree_code ()))
14342 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14343 intermediate steps in promotion sequence. We try
14344 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
14346 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14347 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14349 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14350 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14352 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
14353 else if (VECTOR_MODE_P (intermediate_mode
))
14355 tree intermediate_element_type
14356 = lang_hooks
.types
.type_for_mode (GET_MODE_INNER (intermediate_mode
),
14357 TYPE_UNSIGNED (prev_type
));
14359 = build_vector_type_for_mode (intermediate_element_type
,
14360 intermediate_mode
);
14364 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
14365 TYPE_UNSIGNED (prev_type
));
14367 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14368 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14369 && intermediate_mode
== prev_mode
14370 && SCALAR_INT_MODE_P (prev_mode
))
14372 /* If the input and result modes are the same, a different optab
14373 is needed where we pass in the number of units in vectype. */
14374 optab3
= vec_unpacks_sbool_lo_optab
;
14375 optab4
= vec_unpacks_sbool_hi_optab
;
14379 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14380 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
14383 if (!optab3
|| !optab4
14384 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
14385 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14386 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
14387 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
14388 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
14389 == CODE_FOR_nothing
)
14390 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
14391 == CODE_FOR_nothing
))
14394 interm_types
->quick_push (intermediate_type
);
14395 (*multi_step_cvt
)++;
14397 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
14398 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
14400 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14402 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
14403 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
14407 prev_type
= intermediate_type
;
14408 prev_mode
= intermediate_mode
;
14411 interm_types
->release ();
14416 /* Function supportable_narrowing_operation
14418 Check whether an operation represented by the code CODE is a
14419 narrowing operation that is supported by the target platform in
14420 vector form (i.e., when operating on arguments of type VECTYPE_IN
14421 and producing a result of type VECTYPE_OUT).
14423 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
14424 and FLOAT. This function checks if these operations are supported by
14425 the target platform directly via vector tree-codes.
14428 - CODE1 is the code of a vector operation to be used when
14429 vectorizing the operation, if available.
14430 - MULTI_STEP_CVT determines the number of required intermediate steps in
14431 case of multi-step conversion (like int->short->char - in that case
14432 MULTI_STEP_CVT will be 1).
14433 - INTERM_TYPES contains the intermediate type required to perform the
14434 narrowing operation (short in the above example). */
14437 supportable_narrowing_operation (code_helper code
,
14438 tree vectype_out
, tree vectype_in
,
14439 code_helper
*code1
, int *multi_step_cvt
,
14440 vec
<tree
> *interm_types
)
14442 machine_mode vec_mode
;
14443 enum insn_code icode1
;
14444 optab optab1
, interm_optab
;
14445 tree vectype
= vectype_in
;
14446 tree narrow_vectype
= vectype_out
;
14448 tree intermediate_type
, prev_type
;
14449 machine_mode intermediate_mode
, prev_mode
;
14451 unsigned HOST_WIDE_INT n_elts
;
14454 if (!code
.is_tree_code ())
14457 *multi_step_cvt
= 0;
14458 switch ((tree_code
) code
)
14461 c1
= VEC_PACK_TRUNC_EXPR
;
14462 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
14463 && VECTOR_BOOLEAN_TYPE_P (vectype
)
14464 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
))
14465 && TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&n_elts
)
14466 && n_elts
< BITS_PER_UNIT
)
14467 optab1
= vec_pack_sbool_trunc_optab
;
14469 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14472 case FIX_TRUNC_EXPR
:
14473 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
14474 /* The signedness is determined from output operand. */
14475 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
14479 c1
= VEC_PACK_FLOAT_EXPR
;
14480 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
14484 gcc_unreachable ();
14490 vec_mode
= TYPE_MODE (vectype
);
14491 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
14496 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14498 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14500 /* For scalar masks we may have different boolean
14501 vector types having the same QImode. Thus we
14502 add additional check for elements number. */
14503 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
14504 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14508 if (code
== FLOAT_EXPR
)
14511 /* Check if it's a multi-step conversion that can be done using intermediate
14513 prev_mode
= vec_mode
;
14514 prev_type
= vectype
;
14515 if (code
== FIX_TRUNC_EXPR
)
14516 uns
= TYPE_UNSIGNED (vectype_out
);
14518 uns
= TYPE_UNSIGNED (vectype
);
14520 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
14521 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
14522 costly than signed. */
14523 if (code
== FIX_TRUNC_EXPR
&& uns
)
14525 enum insn_code icode2
;
14528 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
14530 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
14531 if (interm_optab
!= unknown_optab
14532 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
14533 && insn_data
[icode1
].operand
[0].mode
14534 == insn_data
[icode2
].operand
[0].mode
)
14537 optab1
= interm_optab
;
14542 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
14543 intermediate steps in promotion sequence. We try
14544 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
14545 interm_types
->create (MAX_INTERM_CVT_STEPS
);
14546 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
14548 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
14549 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
14551 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
14554 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
14555 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
14556 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
14557 && SCALAR_INT_MODE_P (prev_mode
)
14558 && TYPE_VECTOR_SUBPARTS (intermediate_type
).is_constant (&n_elts
)
14559 && n_elts
< BITS_PER_UNIT
)
14560 interm_optab
= vec_pack_sbool_trunc_optab
;
14563 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
14566 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
14567 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
14568 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
14569 == CODE_FOR_nothing
))
14572 interm_types
->quick_push (intermediate_type
);
14573 (*multi_step_cvt
)++;
14575 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
14577 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14579 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
14580 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
14584 prev_mode
= intermediate_mode
;
14585 prev_type
= intermediate_type
;
14586 optab1
= interm_optab
;
14589 interm_types
->release ();
14593 /* Generate and return a vector mask of MASK_TYPE such that
14594 mask[I] is true iff J + START_INDEX < END_INDEX for all J <= I.
14595 Add the statements to SEQ. */
14598 vect_gen_while (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14599 tree end_index
, const char *name
)
14601 tree cmp_type
= TREE_TYPE (start_index
);
14602 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
14603 cmp_type
, mask_type
,
14604 OPTIMIZE_FOR_SPEED
));
14605 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
14606 start_index
, end_index
,
14607 build_zero_cst (mask_type
));
14610 tmp
= make_temp_ssa_name (mask_type
, NULL
, name
);
14612 tmp
= make_ssa_name (mask_type
);
14613 gimple_call_set_lhs (call
, tmp
);
14614 gimple_seq_add_stmt (seq
, call
);
14618 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
14619 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
14622 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
14625 tree tmp
= vect_gen_while (seq
, mask_type
, start_index
, end_index
);
14626 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
14629 /* Try to compute the vector types required to vectorize STMT_INFO,
14630 returning true on success and false if vectorization isn't possible.
14631 If GROUP_SIZE is nonzero and we're performing BB vectorization,
14632 take sure that the number of elements in the vectors is no bigger
14637 - Set *STMT_VECTYPE_OUT to:
14638 - NULL_TREE if the statement doesn't need to be vectorized;
14639 - the equivalent of STMT_VINFO_VECTYPE otherwise.
14641 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
14642 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
14643 statement does not help to determine the overall number of units. */
14646 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
14647 tree
*stmt_vectype_out
,
14648 tree
*nunits_vectype_out
,
14649 unsigned int group_size
)
14651 gimple
*stmt
= stmt_info
->stmt
;
14653 /* For BB vectorization, we should always have a group size once we've
14654 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
14655 are tentative requests during things like early data reference
14656 analysis and pattern recognition. */
14657 if (is_a
<bb_vec_info
> (vinfo
))
14658 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
14662 *stmt_vectype_out
= NULL_TREE
;
14663 *nunits_vectype_out
= NULL_TREE
;
14665 if (gimple_get_lhs (stmt
) == NULL_TREE
14666 /* Allow vector conditionals through here. */
14667 && !is_a
<gcond
*> (stmt
)
14668 /* MASK_STORE has no lhs, but is ok. */
14669 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14671 if (is_a
<gcall
*> (stmt
))
14673 /* Ignore calls with no lhs. These must be calls to
14674 #pragma omp simd functions, and what vectorization factor
14675 it really needs can't be determined until
14676 vectorizable_simd_clone_call. */
14677 if (dump_enabled_p ())
14678 dump_printf_loc (MSG_NOTE
, vect_location
,
14679 "defer to SIMD clone analysis.\n");
14680 return opt_result::success ();
14683 return opt_result::failure_at (stmt
,
14684 "not vectorized: irregular stmt: %G", stmt
);
14688 tree scalar_type
= NULL_TREE
;
14689 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
14691 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
14692 if (dump_enabled_p ())
14693 dump_printf_loc (MSG_NOTE
, vect_location
,
14694 "precomputed vectype: %T\n", vectype
);
14696 else if (vect_use_mask_type_p (stmt_info
))
14698 unsigned int precision
= stmt_info
->mask_precision
;
14699 scalar_type
= build_nonstandard_integer_type (precision
, 1);
14700 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
14702 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
14703 " data-type %T\n", scalar_type
);
14704 if (dump_enabled_p ())
14705 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14709 /* If we got here with a gcond it means that the target had no available vector
14710 mode for the scalar type. We can't vectorize so abort. */
14711 if (is_a
<gcond
*> (stmt
))
14712 return opt_result::failure_at (stmt
,
14714 " unsupported data-type for gcond %T\n",
14717 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
14718 scalar_type
= TREE_TYPE (DR_REF (dr
));
14719 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
14720 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
14722 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
14724 if (dump_enabled_p ())
14727 dump_printf_loc (MSG_NOTE
, vect_location
,
14728 "get vectype for scalar type (group size %d):"
14729 " %T\n", group_size
, scalar_type
);
14731 dump_printf_loc (MSG_NOTE
, vect_location
,
14732 "get vectype for scalar type: %T\n", scalar_type
);
14734 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
14736 return opt_result::failure_at (stmt
,
14738 " unsupported data-type %T\n",
14741 if (dump_enabled_p ())
14742 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
14745 if (scalar_type
&& VECTOR_MODE_P (TYPE_MODE (scalar_type
)))
14746 return opt_result::failure_at (stmt
,
14747 "not vectorized: vector stmt in loop:%G",
14750 *stmt_vectype_out
= vectype
;
14752 /* Don't try to compute scalar types if the stmt produces a boolean
14753 vector; use the existing vector type instead. */
14754 tree nunits_vectype
= vectype
;
14755 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
14757 /* The number of units is set according to the smallest scalar
14758 type (or the largest vector size, but we only support one
14759 vector size per vectorization). */
14760 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
14761 TREE_TYPE (vectype
));
14762 if (scalar_type
!= TREE_TYPE (vectype
))
14764 if (dump_enabled_p ())
14765 dump_printf_loc (MSG_NOTE
, vect_location
,
14766 "get vectype for smallest scalar type: %T\n",
14768 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
14770 if (!nunits_vectype
)
14771 return opt_result::failure_at
14772 (stmt
, "not vectorized: unsupported data-type %T\n",
14774 if (dump_enabled_p ())
14775 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
14780 if (!multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
14781 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)))
14782 return opt_result::failure_at (stmt
,
14783 "Not vectorized: Incompatible number "
14784 "of vector subparts between %T and %T\n",
14785 nunits_vectype
, *stmt_vectype_out
);
14787 if (dump_enabled_p ())
14789 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
14790 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
14791 dump_printf (MSG_NOTE
, "\n");
14794 *nunits_vectype_out
= nunits_vectype
;
14795 return opt_result::success ();
14798 /* Generate and return statement sequence that sets vector length LEN that is:
14800 min_of_start_and_end = min (START_INDEX, END_INDEX);
14801 left_len = END_INDEX - min_of_start_and_end;
14802 rhs = min (left_len, LEN_LIMIT);
14805 Note: the cost of the code generated by this function is modeled
14806 by vect_estimate_min_profitable_iters, so changes here may need
14807 corresponding changes there. */
14810 vect_gen_len (tree len
, tree start_index
, tree end_index
, tree len_limit
)
14812 gimple_seq stmts
= NULL
;
14813 tree len_type
= TREE_TYPE (len
);
14814 gcc_assert (TREE_TYPE (start_index
) == len_type
);
14816 tree min
= gimple_build (&stmts
, MIN_EXPR
, len_type
, start_index
, end_index
);
14817 tree left_len
= gimple_build (&stmts
, MINUS_EXPR
, len_type
, end_index
, min
);
14818 tree rhs
= gimple_build (&stmts
, MIN_EXPR
, len_type
, left_len
, len_limit
);
14819 gimple
* stmt
= gimple_build_assign (len
, rhs
);
14820 gimple_seq_add_stmt (&stmts
, stmt
);