1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
46 #include "tree-ssa-loop.h"
47 #include "tree-scalar-evolution.h"
48 #include "tree-vectorizer.h"
50 #include "internal-fn.h"
51 #include "tree-vector-builder.h"
52 #include "vec-perm-indices.h"
53 #include "tree-ssa-loop-niter.h"
54 #include "gimple-fold.h"
56 /* For lang_hooks.types.type_for_mode. */
57 #include "langhooks.h"
59 /* Return the vectorized type for the given statement. */
62 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
64 return STMT_VINFO_VECTYPE (stmt_info
);
67 /* Return TRUE iff the given statement is in an inner loop relative to
68 the loop being vectorized. */
70 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
72 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
73 basic_block bb
= gimple_bb (stmt
);
74 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
80 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
82 return (bb
->loop_father
== loop
->inner
);
85 /* Record the cost of a statement, either by directly informing the
86 target model or by saving it in a vector for later processing.
87 Return a preliminary estimate of the statement's cost. */
90 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
91 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
92 int misalign
, enum vect_cost_model_location where
)
94 if ((kind
== vector_load
|| kind
== unaligned_load
)
95 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
96 kind
= vector_gather_load
;
97 if ((kind
== vector_store
|| kind
== unaligned_store
)
98 && STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
99 kind
= vector_scatter_store
;
101 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, misalign
};
102 body_cost_vec
->safe_push (si
);
104 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
106 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
109 /* Return a variable of type ELEM_TYPE[NELEMS]. */
112 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
114 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
118 /* ARRAY is an array of vectors created by create_vector_array.
119 Return an SSA_NAME for the vector in index N. The reference
120 is part of the vectorization of STMT_INFO and the vector is associated
121 with scalar destination SCALAR_DEST. */
124 read_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
125 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
127 tree vect_type
, vect
, vect_name
, array_ref
;
130 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
131 vect_type
= TREE_TYPE (TREE_TYPE (array
));
132 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
133 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
134 build_int_cst (size_type_node
, n
),
135 NULL_TREE
, NULL_TREE
);
137 new_stmt
= gimple_build_assign (vect
, array_ref
);
138 vect_name
= make_ssa_name (vect
, new_stmt
);
139 gimple_assign_set_lhs (new_stmt
, vect_name
);
140 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
145 /* ARRAY is an array of vectors created by create_vector_array.
146 Emit code to store SSA_NAME VECT in index N of the array.
147 The store is part of the vectorization of STMT_INFO. */
150 write_vector_array (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
151 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
156 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
157 build_int_cst (size_type_node
, n
),
158 NULL_TREE
, NULL_TREE
);
160 new_stmt
= gimple_build_assign (array_ref
, vect
);
161 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
164 /* PTR is a pointer to an array of type TYPE. Return a representation
165 of *PTR. The memory reference replaces those in FIRST_DR
169 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
173 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
174 /* Arrays have the same alignment as their type. */
175 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
179 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
180 Emit the clobber before *GSI. */
183 vect_clobber_variable (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
186 tree clobber
= build_clobber (TREE_TYPE (var
));
187 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
188 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
191 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
193 /* Function vect_mark_relevant.
195 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
198 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
199 enum vect_relevant relevant
, bool live_p
)
201 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
202 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
204 if (dump_enabled_p ())
205 dump_printf_loc (MSG_NOTE
, vect_location
,
206 "mark relevant %d, live %d: %G", relevant
, live_p
,
209 /* If this stmt is an original stmt in a pattern, we might need to mark its
210 related pattern stmt instead of the original stmt. However, such stmts
211 may have their own uses that are not in any pattern, in such cases the
212 stmt itself should be marked. */
213 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
215 /* This is the last stmt in a sequence that was detected as a
216 pattern that can potentially be vectorized. Don't mark the stmt
217 as relevant/live because it's not going to be vectorized.
218 Instead mark the pattern-stmt that replaces it. */
220 if (dump_enabled_p ())
221 dump_printf_loc (MSG_NOTE
, vect_location
,
222 "last stmt in pattern. don't mark"
223 " relevant/live.\n");
224 stmt_vec_info old_stmt_info
= stmt_info
;
225 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
226 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
227 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
228 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
231 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
232 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
233 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
235 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
236 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
238 if (dump_enabled_p ())
239 dump_printf_loc (MSG_NOTE
, vect_location
,
240 "already marked relevant/live.\n");
244 worklist
->safe_push (stmt_info
);
248 /* Function is_simple_and_all_uses_invariant
250 Return true if STMT_INFO is simple and all uses of it are invariant. */
253 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
254 loop_vec_info loop_vinfo
)
259 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
263 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
265 enum vect_def_type dt
= vect_uninitialized_def
;
267 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
269 if (dump_enabled_p ())
270 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
271 "use not simple.\n");
275 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
281 /* Function vect_stmt_relevant_p.
283 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
284 is "relevant for vectorization".
286 A stmt is considered "relevant for vectorization" if:
287 - it has uses outside the loop.
288 - it has vdefs (it alters memory).
289 - control stmts in the loop (except for the exit condition).
291 CHECKME: what other side effects would the vectorizer allow? */
294 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
295 enum vect_relevant
*relevant
, bool *live_p
)
297 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
299 imm_use_iterator imm_iter
;
303 *relevant
= vect_unused_in_scope
;
306 /* cond stmt other than loop exit cond. */
307 if (is_ctrl_stmt (stmt_info
->stmt
)
308 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
309 *relevant
= vect_used_in_scope
;
311 /* changing memory. */
312 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
313 if (gimple_vdef (stmt_info
->stmt
)
314 && !gimple_clobber_p (stmt_info
->stmt
))
316 if (dump_enabled_p ())
317 dump_printf_loc (MSG_NOTE
, vect_location
,
318 "vec_stmt_relevant_p: stmt has vdefs.\n");
319 *relevant
= vect_used_in_scope
;
322 /* uses outside the loop. */
323 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
325 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
327 basic_block bb
= gimple_bb (USE_STMT (use_p
));
328 if (!flow_bb_inside_loop_p (loop
, bb
))
330 if (dump_enabled_p ())
331 dump_printf_loc (MSG_NOTE
, vect_location
,
332 "vec_stmt_relevant_p: used out of loop.\n");
334 if (is_gimple_debug (USE_STMT (use_p
)))
337 /* We expect all such uses to be in the loop exit phis
338 (because of loop closed form) */
339 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
340 gcc_assert (bb
== single_exit (loop
)->dest
);
347 if (*live_p
&& *relevant
== vect_unused_in_scope
348 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
350 if (dump_enabled_p ())
351 dump_printf_loc (MSG_NOTE
, vect_location
,
352 "vec_stmt_relevant_p: stmt live but not relevant.\n");
353 *relevant
= vect_used_only_live
;
356 return (*live_p
|| *relevant
);
360 /* Function exist_non_indexing_operands_for_use_p
362 USE is one of the uses attached to STMT_INFO. Check if USE is
363 used in STMT_INFO for anything other than indexing an array. */
366 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
370 /* USE corresponds to some operand in STMT. If there is no data
371 reference in STMT, then any operand that corresponds to USE
372 is not indexing an array. */
373 if (!STMT_VINFO_DATA_REF (stmt_info
))
376 /* STMT has a data_ref. FORNOW this means that its of one of
380 (This should have been verified in analyze_data_refs).
382 'var' in the second case corresponds to a def, not a use,
383 so USE cannot correspond to any operands that are not used
386 Therefore, all we need to check is if STMT falls into the
387 first case, and whether var corresponds to USE. */
389 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
390 if (!assign
|| !gimple_assign_copy_p (assign
))
392 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
393 if (call
&& gimple_call_internal_p (call
))
395 internal_fn ifn
= gimple_call_internal_fn (call
);
396 int mask_index
= internal_fn_mask_index (ifn
);
398 && use
== gimple_call_arg (call
, mask_index
))
400 int stored_value_index
= internal_fn_stored_value_index (ifn
);
401 if (stored_value_index
>= 0
402 && use
== gimple_call_arg (call
, stored_value_index
))
404 if (internal_gather_scatter_fn_p (ifn
)
405 && use
== gimple_call_arg (call
, 1))
411 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
413 operand
= gimple_assign_rhs1 (assign
);
414 if (TREE_CODE (operand
) != SSA_NAME
)
425 Function process_use.
428 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
429 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
430 that defined USE. This is done by calling mark_relevant and passing it
431 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
432 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
436 Generally, LIVE_P and RELEVANT are used to define the liveness and
437 relevance info of the DEF_STMT of this USE:
438 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
439 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
441 - case 1: If USE is used only for address computations (e.g. array indexing),
442 which does not need to be directly vectorized, then the liveness/relevance
443 of the respective DEF_STMT is left unchanged.
444 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
445 we skip DEF_STMT cause it had already been processed.
446 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
447 "relevant" will be modified accordingly.
449 Return true if everything is as expected. Return false otherwise. */
452 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
453 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
456 stmt_vec_info dstmt_vinfo
;
457 basic_block bb
, def_bb
;
458 enum vect_def_type dt
;
460 /* case 1: we are only interested in uses that need to be vectorized. Uses
461 that are used for address computation are not considered relevant. */
462 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
463 return opt_result::success ();
465 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
466 return opt_result::failure_at (stmt_vinfo
->stmt
,
468 " unsupported use in stmt.\n");
471 return opt_result::success ();
473 def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
475 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
476 DSTMT_VINFO must have already been processed, because this should be the
477 only way that STMT, which is a reduction-phi, was put in the worklist,
478 as there should be no other uses for DSTMT_VINFO in the loop. So we just
479 check that everything is as expected, and we are done. */
480 bb
= gimple_bb (stmt_vinfo
->stmt
);
481 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
482 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
483 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
485 && bb
->loop_father
== def_bb
->loop_father
)
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE
, vect_location
,
489 "reduc-stmt defining reduc-phi in the same nest.\n");
490 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
491 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
492 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
610 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
780 } /* while worklist */
782 return opt_result::success ();
785 /* Compute the prologue cost for invariant or constant operands. */
788 vect_prologue_cost_for_slp_op (slp_tree node
, stmt_vec_info stmt_info
,
789 unsigned opno
, enum vect_def_type dt
,
790 stmt_vector_for_cost
*cost_vec
)
792 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
793 tree op
= gimple_op (stmt
, opno
);
794 unsigned prologue_cost
= 0;
796 /* Without looking at the actual initializer a vector of
797 constants can be implemented as load from the constant pool.
798 When all elements are the same we can use a splat. */
799 tree vectype
= get_vectype_for_scalar_type (TREE_TYPE (op
));
800 unsigned group_size
= SLP_TREE_SCALAR_STMTS (node
).length ();
801 unsigned num_vects_to_check
;
802 unsigned HOST_WIDE_INT const_nunits
;
804 if (TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&const_nunits
)
805 && ! multiple_p (const_nunits
, group_size
))
807 num_vects_to_check
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
808 nelt_limit
= const_nunits
;
812 /* If either the vector has variable length or the vectors
813 are composed of repeated whole groups we only need to
814 cost construction once. All vectors will be the same. */
815 num_vects_to_check
= 1;
816 nelt_limit
= group_size
;
818 tree elt
= NULL_TREE
;
820 for (unsigned j
= 0; j
< num_vects_to_check
* nelt_limit
; ++j
)
822 unsigned si
= j
% group_size
;
824 elt
= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
, opno
);
825 /* ??? We're just tracking whether all operands of a single
826 vector initializer are the same, ideally we'd check if
827 we emitted the same one already. */
828 else if (elt
!= gimple_op (SLP_TREE_SCALAR_STMTS (node
)[si
]->stmt
,
832 if (nelt
== nelt_limit
)
834 /* ??? We need to pass down stmt_info for a vector type
835 even if it points to the wrong stmt. */
836 prologue_cost
+= record_stmt_cost
838 dt
== vect_external_def
839 ? (elt
? scalar_to_vec
: vec_construct
)
841 stmt_info
, 0, vect_prologue
);
846 return prologue_cost
;
849 /* Function vect_model_simple_cost.
851 Models cost for simple operations, i.e. those that only emit ncopies of a
852 single op. Right now, this does not account for multiple insns that could
853 be generated for the single vector op. We will handle that shortly. */
856 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
857 enum vect_def_type
*dt
,
860 stmt_vector_for_cost
*cost_vec
)
862 int inside_cost
= 0, prologue_cost
= 0;
864 gcc_assert (cost_vec
!= NULL
);
866 /* ??? Somehow we need to fix this at the callers. */
868 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
872 /* Scan operands and account for prologue cost of constants/externals.
873 ??? This over-estimates cost for multiple uses and should be
875 gimple
*stmt
= SLP_TREE_SCALAR_STMTS (node
)[0]->stmt
;
876 tree lhs
= gimple_get_lhs (stmt
);
877 for (unsigned i
= 0; i
< gimple_num_ops (stmt
); ++i
)
879 tree op
= gimple_op (stmt
, i
);
880 enum vect_def_type dt
;
881 if (!op
|| op
== lhs
)
883 if (vect_is_simple_use (op
, stmt_info
->vinfo
, &dt
)
884 && (dt
== vect_constant_def
|| dt
== vect_external_def
))
885 prologue_cost
+= vect_prologue_cost_for_slp_op (node
, stmt_info
,
890 /* Cost the "broadcast" of a scalar operand in to a vector operand.
891 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
893 for (int i
= 0; i
< ndts
; i
++)
894 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
895 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
896 stmt_info
, 0, vect_prologue
);
898 /* Adjust for two-operator SLP nodes. */
899 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
902 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
903 stmt_info
, 0, vect_body
);
906 /* Pass the inside-of-loop statements to the target-specific cost model. */
907 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vector_stmt
,
908 stmt_info
, 0, vect_body
);
910 if (dump_enabled_p ())
911 dump_printf_loc (MSG_NOTE
, vect_location
,
912 "vect_model_simple_cost: inside_cost = %d, "
913 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
917 /* Model cost for type demotion and promotion operations. PWR is normally
918 zero for single-step promotions and demotions. It will be one if
919 two-step promotion/demotion is required, and so on. Each additional
920 step doubles the number of instructions required. */
923 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
924 enum vect_def_type
*dt
, int pwr
,
925 stmt_vector_for_cost
*cost_vec
)
928 int inside_cost
= 0, prologue_cost
= 0;
930 for (i
= 0; i
< pwr
+ 1; i
++)
932 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
934 inside_cost
+= record_stmt_cost (cost_vec
, vect_pow2 (tmp
),
935 vec_promote_demote
, stmt_info
, 0,
939 /* FORNOW: Assuming maximum 2 args per stmts. */
940 for (i
= 0; i
< 2; i
++)
941 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
942 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
943 stmt_info
, 0, vect_prologue
);
945 if (dump_enabled_p ())
946 dump_printf_loc (MSG_NOTE
, vect_location
,
947 "vect_model_promotion_demotion_cost: inside_cost = %d, "
948 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
951 /* Function vect_model_store_cost
953 Models cost for stores. In the case of grouped accesses, one access
954 has the overhead of the grouped access attributed to it. */
957 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
958 enum vect_def_type dt
,
959 vect_memory_access_type memory_access_type
,
960 vec_load_store_type vls_type
, slp_tree slp_node
,
961 stmt_vector_for_cost
*cost_vec
)
963 unsigned int inside_cost
= 0, prologue_cost
= 0;
964 stmt_vec_info first_stmt_info
= stmt_info
;
965 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
967 /* ??? Somehow we need to fix this at the callers. */
969 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
971 if (vls_type
== VLS_STORE_INVARIANT
)
974 prologue_cost
+= vect_prologue_cost_for_slp_op (slp_node
, stmt_info
,
977 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
978 stmt_info
, 0, vect_prologue
);
981 /* Grouped stores update all elements in the group at once,
982 so we want the DR for the first statement. */
983 if (!slp_node
&& grouped_access_p
)
984 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
986 /* True if we should include any once-per-group costs as well as
987 the cost of the statement itself. For SLP we only get called
988 once per group anyhow. */
989 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
991 /* We assume that the cost of a single store-lanes instruction is
992 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
993 access is instead being provided by a permute-and-store operation,
994 include the cost of the permutes. */
996 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
998 /* Uses a high and low interleave or shuffle operations for each
1000 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1001 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1002 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1003 stmt_info
, 0, vect_body
);
1005 if (dump_enabled_p ())
1006 dump_printf_loc (MSG_NOTE
, vect_location
,
1007 "vect_model_store_cost: strided group_size = %d .\n",
1011 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1012 /* Costs of the stores. */
1013 if (memory_access_type
== VMAT_ELEMENTWISE
1014 || memory_access_type
== VMAT_GATHER_SCATTER
)
1016 /* N scalar stores plus extracting the elements. */
1017 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1018 inside_cost
+= record_stmt_cost (cost_vec
,
1019 ncopies
* assumed_nunits
,
1020 scalar_store
, stmt_info
, 0, vect_body
);
1023 vect_get_store_cost (stmt_info
, ncopies
, &inside_cost
, cost_vec
);
1025 if (memory_access_type
== VMAT_ELEMENTWISE
1026 || memory_access_type
== VMAT_STRIDED_SLP
)
1028 /* N scalar stores plus extracting the elements. */
1029 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1030 inside_cost
+= record_stmt_cost (cost_vec
,
1031 ncopies
* assumed_nunits
,
1032 vec_to_scalar
, stmt_info
, 0, vect_body
);
1035 if (dump_enabled_p ())
1036 dump_printf_loc (MSG_NOTE
, vect_location
,
1037 "vect_model_store_cost: inside_cost = %d, "
1038 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1042 /* Calculate cost of DR's memory access. */
1044 vect_get_store_cost (stmt_vec_info stmt_info
, int ncopies
,
1045 unsigned int *inside_cost
,
1046 stmt_vector_for_cost
*body_cost_vec
)
1048 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1049 int alignment_support_scheme
1050 = vect_supportable_dr_alignment (dr_info
, false);
1052 switch (alignment_support_scheme
)
1056 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1057 vector_store
, stmt_info
, 0,
1060 if (dump_enabled_p ())
1061 dump_printf_loc (MSG_NOTE
, vect_location
,
1062 "vect_model_store_cost: aligned.\n");
1066 case dr_unaligned_supported
:
1068 /* Here, we assign an additional cost for the unaligned store. */
1069 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1070 unaligned_store
, stmt_info
,
1071 DR_MISALIGNMENT (dr_info
),
1073 if (dump_enabled_p ())
1074 dump_printf_loc (MSG_NOTE
, vect_location
,
1075 "vect_model_store_cost: unaligned supported by "
1080 case dr_unaligned_unsupported
:
1082 *inside_cost
= VECT_MAX_COST
;
1084 if (dump_enabled_p ())
1085 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1086 "vect_model_store_cost: unsupported access.\n");
1096 /* Function vect_model_load_cost
1098 Models cost for loads. In the case of grouped accesses, one access has
1099 the overhead of the grouped access attributed to it. Since unaligned
1100 accesses are supported for loads, we also account for the costs of the
1101 access scheme chosen. */
1104 vect_model_load_cost (stmt_vec_info stmt_info
, unsigned ncopies
,
1105 vect_memory_access_type memory_access_type
,
1106 slp_instance instance
,
1108 stmt_vector_for_cost
*cost_vec
)
1110 unsigned int inside_cost
= 0, prologue_cost
= 0;
1111 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1113 gcc_assert (cost_vec
);
1115 /* ??? Somehow we need to fix this at the callers. */
1117 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1119 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1121 /* If the load is permuted then the alignment is determined by
1122 the first group element not by the first scalar stmt DR. */
1123 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1124 /* Record the cost for the permutation. */
1126 unsigned assumed_nunits
1127 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1128 unsigned slp_vf
= (ncopies
* assumed_nunits
) / instance
->group_size
;
1129 vect_transform_slp_perm_load (slp_node
, vNULL
, NULL
,
1130 slp_vf
, instance
, true,
1132 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1133 first_stmt_info
, 0, vect_body
);
1134 /* And adjust the number of loads performed. This handles
1135 redundancies as well as loads that are later dead. */
1136 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1137 bitmap_clear (perm
);
1138 for (unsigned i
= 0;
1139 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1140 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1142 bool load_seen
= false;
1143 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1145 if (i
% assumed_nunits
== 0)
1151 if (bitmap_bit_p (perm
, i
))
1157 <= (DR_GROUP_SIZE (first_stmt_info
)
1158 - DR_GROUP_GAP (first_stmt_info
)
1159 + assumed_nunits
- 1) / assumed_nunits
);
1162 /* Grouped loads read all elements in the group at once,
1163 so we want the DR for the first statement. */
1164 stmt_vec_info first_stmt_info
= stmt_info
;
1165 if (!slp_node
&& grouped_access_p
)
1166 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1168 /* True if we should include any once-per-group costs as well as
1169 the cost of the statement itself. For SLP we only get called
1170 once per group anyhow. */
1171 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1173 /* We assume that the cost of a single load-lanes instruction is
1174 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1175 access is instead being provided by a load-and-permute operation,
1176 include the cost of the permutes. */
1178 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1180 /* Uses an even and odd extract operations or shuffle operations
1181 for each needed permute. */
1182 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1183 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1184 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1185 stmt_info
, 0, vect_body
);
1187 if (dump_enabled_p ())
1188 dump_printf_loc (MSG_NOTE
, vect_location
,
1189 "vect_model_load_cost: strided group_size = %d .\n",
1193 /* The loads themselves. */
1194 if (memory_access_type
== VMAT_ELEMENTWISE
1195 || memory_access_type
== VMAT_GATHER_SCATTER
)
1197 /* N scalar loads plus gathering them into a vector. */
1198 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1199 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1200 inside_cost
+= record_stmt_cost (cost_vec
,
1201 ncopies
* assumed_nunits
,
1202 scalar_load
, stmt_info
, 0, vect_body
);
1205 vect_get_load_cost (stmt_info
, ncopies
, first_stmt_p
,
1206 &inside_cost
, &prologue_cost
,
1207 cost_vec
, cost_vec
, true);
1208 if (memory_access_type
== VMAT_ELEMENTWISE
1209 || memory_access_type
== VMAT_STRIDED_SLP
)
1210 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1211 stmt_info
, 0, vect_body
);
1213 if (dump_enabled_p ())
1214 dump_printf_loc (MSG_NOTE
, vect_location
,
1215 "vect_model_load_cost: inside_cost = %d, "
1216 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1220 /* Calculate cost of DR's memory access. */
1222 vect_get_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1223 bool add_realign_cost
, unsigned int *inside_cost
,
1224 unsigned int *prologue_cost
,
1225 stmt_vector_for_cost
*prologue_cost_vec
,
1226 stmt_vector_for_cost
*body_cost_vec
,
1227 bool record_prologue_costs
)
1229 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1230 int alignment_support_scheme
1231 = vect_supportable_dr_alignment (dr_info
, false);
1233 switch (alignment_support_scheme
)
1237 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1238 stmt_info
, 0, vect_body
);
1240 if (dump_enabled_p ())
1241 dump_printf_loc (MSG_NOTE
, vect_location
,
1242 "vect_model_load_cost: aligned.\n");
1246 case dr_unaligned_supported
:
1248 /* Here, we assign an additional cost for the unaligned load. */
1249 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1250 unaligned_load
, stmt_info
,
1251 DR_MISALIGNMENT (dr_info
),
1254 if (dump_enabled_p ())
1255 dump_printf_loc (MSG_NOTE
, vect_location
,
1256 "vect_model_load_cost: unaligned supported by "
1261 case dr_explicit_realign
:
1263 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1264 vector_load
, stmt_info
, 0, vect_body
);
1265 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1266 vec_perm
, stmt_info
, 0, vect_body
);
1268 /* FIXME: If the misalignment remains fixed across the iterations of
1269 the containing loop, the following cost should be added to the
1271 if (targetm
.vectorize
.builtin_mask_for_load
)
1272 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1273 stmt_info
, 0, vect_body
);
1275 if (dump_enabled_p ())
1276 dump_printf_loc (MSG_NOTE
, vect_location
,
1277 "vect_model_load_cost: explicit realign\n");
1281 case dr_explicit_realign_optimized
:
1283 if (dump_enabled_p ())
1284 dump_printf_loc (MSG_NOTE
, vect_location
,
1285 "vect_model_load_cost: unaligned software "
1288 /* Unaligned software pipeline has a load of an address, an initial
1289 load, and possibly a mask operation to "prime" the loop. However,
1290 if this is an access in a group of loads, which provide grouped
1291 access, then the above cost should only be considered for one
1292 access in the group. Inside the loop, there is a load op
1293 and a realignment op. */
1295 if (add_realign_cost
&& record_prologue_costs
)
1297 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1298 vector_stmt
, stmt_info
,
1300 if (targetm
.vectorize
.builtin_mask_for_load
)
1301 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1302 vector_stmt
, stmt_info
,
1306 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1307 stmt_info
, 0, vect_body
);
1308 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1309 stmt_info
, 0, vect_body
);
1311 if (dump_enabled_p ())
1312 dump_printf_loc (MSG_NOTE
, vect_location
,
1313 "vect_model_load_cost: explicit realign optimized"
1319 case dr_unaligned_unsupported
:
1321 *inside_cost
= VECT_MAX_COST
;
1323 if (dump_enabled_p ())
1324 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1325 "vect_model_load_cost: unsupported access.\n");
1334 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1335 the loop preheader for the vectorized stmt STMT_VINFO. */
1338 vect_init_vector_1 (stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1339 gimple_stmt_iterator
*gsi
)
1342 vect_finish_stmt_generation (stmt_vinfo
, new_stmt
, gsi
);
1345 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1349 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1353 if (nested_in_vect_loop_p (loop
, stmt_vinfo
))
1356 pe
= loop_preheader_edge (loop
);
1357 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1358 gcc_assert (!new_bb
);
1362 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1364 gimple_stmt_iterator gsi_bb_start
;
1366 gcc_assert (bb_vinfo
);
1367 bb
= BB_VINFO_BB (bb_vinfo
);
1368 gsi_bb_start
= gsi_after_labels (bb
);
1369 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1373 if (dump_enabled_p ())
1374 dump_printf_loc (MSG_NOTE
, vect_location
,
1375 "created new init_stmt: %G", new_stmt
);
1378 /* Function vect_init_vector.
1380 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1381 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1382 vector type a vector with all elements equal to VAL is created first.
1383 Place the initialization at BSI if it is not NULL. Otherwise, place the
1384 initialization at the loop preheader.
1385 Return the DEF of INIT_STMT.
1386 It will be used in the vectorization of STMT_INFO. */
1389 vect_init_vector (stmt_vec_info stmt_info
, tree val
, tree type
,
1390 gimple_stmt_iterator
*gsi
)
1395 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1396 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1398 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1399 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1401 /* Scalar boolean value should be transformed into
1402 all zeros or all ones value before building a vector. */
1403 if (VECTOR_BOOLEAN_TYPE_P (type
))
1405 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1406 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1408 if (CONSTANT_CLASS_P (val
))
1409 val
= integer_zerop (val
) ? false_val
: true_val
;
1412 new_temp
= make_ssa_name (TREE_TYPE (type
));
1413 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1414 val
, true_val
, false_val
);
1415 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1419 else if (CONSTANT_CLASS_P (val
))
1420 val
= fold_convert (TREE_TYPE (type
), val
);
1423 new_temp
= make_ssa_name (TREE_TYPE (type
));
1424 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1425 init_stmt
= gimple_build_assign (new_temp
,
1426 fold_build1 (VIEW_CONVERT_EXPR
,
1430 init_stmt
= gimple_build_assign (new_temp
, NOP_EXPR
, val
);
1431 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1435 val
= build_vector_from_val (type
, val
);
1438 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1439 init_stmt
= gimple_build_assign (new_temp
, val
);
1440 vect_init_vector_1 (stmt_info
, init_stmt
, gsi
);
1444 /* Function vect_get_vec_def_for_operand_1.
1446 For a defining stmt DEF_STMT_INFO of a scalar stmt, return a vector def
1447 with type DT that will be used in the vectorized stmt. */
1450 vect_get_vec_def_for_operand_1 (stmt_vec_info def_stmt_info
,
1451 enum vect_def_type dt
)
1454 stmt_vec_info vec_stmt_info
;
1458 /* operand is a constant or a loop invariant. */
1459 case vect_constant_def
:
1460 case vect_external_def
:
1461 /* Code should use vect_get_vec_def_for_operand. */
1464 /* operand is defined inside the loop. */
1465 case vect_internal_def
:
1467 /* Get the def from the vectorized stmt. */
1468 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1469 /* Get vectorized pattern statement. */
1471 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1472 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1473 vec_stmt_info
= (STMT_VINFO_VEC_STMT
1474 (STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1475 gcc_assert (vec_stmt_info
);
1476 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1477 vec_oprnd
= PHI_RESULT (phi
);
1479 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1483 /* operand is defined by a loop header phi. */
1484 case vect_reduction_def
:
1485 case vect_double_reduction_def
:
1486 case vect_nested_cycle
:
1487 case vect_induction_def
:
1489 gcc_assert (gimple_code (def_stmt_info
->stmt
) == GIMPLE_PHI
);
1491 /* Get the def from the vectorized stmt. */
1492 vec_stmt_info
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1493 if (gphi
*phi
= dyn_cast
<gphi
*> (vec_stmt_info
->stmt
))
1494 vec_oprnd
= PHI_RESULT (phi
);
1496 vec_oprnd
= gimple_get_lhs (vec_stmt_info
->stmt
);
1506 /* Function vect_get_vec_def_for_operand.
1508 OP is an operand in STMT_VINFO. This function returns a (vector) def
1509 that will be used in the vectorized stmt for STMT_VINFO.
1511 In the case that OP is an SSA_NAME which is defined in the loop, then
1512 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1514 In case OP is an invariant or constant, a new stmt that creates a vector def
1515 needs to be introduced. VECTYPE may be used to specify a required type for
1516 vector invariant. */
1519 vect_get_vec_def_for_operand (tree op
, stmt_vec_info stmt_vinfo
, tree vectype
)
1522 enum vect_def_type dt
;
1524 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1526 if (dump_enabled_p ())
1527 dump_printf_loc (MSG_NOTE
, vect_location
,
1528 "vect_get_vec_def_for_operand: %T\n", op
);
1530 stmt_vec_info def_stmt_info
;
1531 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1532 &def_stmt_info
, &def_stmt
);
1533 gcc_assert (is_simple_use
);
1534 if (def_stmt
&& dump_enabled_p ())
1535 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1537 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1539 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1543 vector_type
= vectype
;
1544 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1545 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1546 vector_type
= build_same_sized_truth_vector_type (stmt_vectype
);
1548 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1550 gcc_assert (vector_type
);
1551 return vect_init_vector (stmt_vinfo
, op
, vector_type
, NULL
);
1554 return vect_get_vec_def_for_operand_1 (def_stmt_info
, dt
);
1558 /* Function vect_get_vec_def_for_stmt_copy
1560 Return a vector-def for an operand. This function is used when the
1561 vectorized stmt to be created (by the caller to this function) is a "copy"
1562 created in case the vectorized result cannot fit in one vector, and several
1563 copies of the vector-stmt are required. In this case the vector-def is
1564 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1565 of the stmt that defines VEC_OPRND. VINFO describes the vectorization.
1568 In case the vectorization factor (VF) is bigger than the number
1569 of elements that can fit in a vectype (nunits), we have to generate
1570 more than one vector stmt to vectorize the scalar stmt. This situation
1571 arises when there are multiple data-types operated upon in the loop; the
1572 smallest data-type determines the VF, and as a result, when vectorizing
1573 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1574 vector stmt (each computing a vector of 'nunits' results, and together
1575 computing 'VF' results in each iteration). This function is called when
1576 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1577 which VF=16 and nunits=4, so the number of copies required is 4):
1579 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1581 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1582 VS1.1: vx.1 = memref1 VS1.2
1583 VS1.2: vx.2 = memref2 VS1.3
1584 VS1.3: vx.3 = memref3
1586 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1587 VSnew.1: vz1 = vx.1 + ... VSnew.2
1588 VSnew.2: vz2 = vx.2 + ... VSnew.3
1589 VSnew.3: vz3 = vx.3 + ...
1591 The vectorization of S1 is explained in vectorizable_load.
1592 The vectorization of S2:
1593 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1594 the function 'vect_get_vec_def_for_operand' is called to
1595 get the relevant vector-def for each operand of S2. For operand x it
1596 returns the vector-def 'vx.0'.
1598 To create the remaining copies of the vector-stmt (VSnew.j), this
1599 function is called to get the relevant vector-def for each operand. It is
1600 obtained from the respective VS1.j stmt, which is recorded in the
1601 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1603 For example, to obtain the vector-def 'vx.1' in order to create the
1604 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1605 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1606 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1607 and return its def ('vx.1').
1608 Overall, to create the above sequence this function will be called 3 times:
1609 vx.1 = vect_get_vec_def_for_stmt_copy (vinfo, vx.0);
1610 vx.2 = vect_get_vec_def_for_stmt_copy (vinfo, vx.1);
1611 vx.3 = vect_get_vec_def_for_stmt_copy (vinfo, vx.2); */
1614 vect_get_vec_def_for_stmt_copy (vec_info
*vinfo
, tree vec_oprnd
)
1616 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (vec_oprnd
);
1618 /* Do nothing; can reuse same def. */
1621 def_stmt_info
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1622 gcc_assert (def_stmt_info
);
1623 if (gphi
*phi
= dyn_cast
<gphi
*> (def_stmt_info
->stmt
))
1624 vec_oprnd
= PHI_RESULT (phi
);
1626 vec_oprnd
= gimple_get_lhs (def_stmt_info
->stmt
);
1631 /* Get vectorized definitions for the operands to create a copy of an original
1632 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1635 vect_get_vec_defs_for_stmt_copy (vec_info
*vinfo
,
1636 vec
<tree
> *vec_oprnds0
,
1637 vec
<tree
> *vec_oprnds1
)
1639 tree vec_oprnd
= vec_oprnds0
->pop ();
1641 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1642 vec_oprnds0
->quick_push (vec_oprnd
);
1644 if (vec_oprnds1
&& vec_oprnds1
->length ())
1646 vec_oprnd
= vec_oprnds1
->pop ();
1647 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
1648 vec_oprnds1
->quick_push (vec_oprnd
);
1653 /* Get vectorized definitions for OP0 and OP1. */
1656 vect_get_vec_defs (tree op0
, tree op1
, stmt_vec_info stmt_info
,
1657 vec
<tree
> *vec_oprnds0
,
1658 vec
<tree
> *vec_oprnds1
,
1663 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1664 auto_vec
<tree
> ops (nops
);
1665 auto_vec
<vec
<tree
> > vec_defs (nops
);
1667 ops
.quick_push (op0
);
1669 ops
.quick_push (op1
);
1671 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
1673 *vec_oprnds0
= vec_defs
[0];
1675 *vec_oprnds1
= vec_defs
[1];
1681 vec_oprnds0
->create (1);
1682 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt_info
);
1683 vec_oprnds0
->quick_push (vec_oprnd
);
1687 vec_oprnds1
->create (1);
1688 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt_info
);
1689 vec_oprnds1
->quick_push (vec_oprnd
);
1694 /* Helper function called by vect_finish_replace_stmt and
1695 vect_finish_stmt_generation. Set the location of the new
1696 statement and create and return a stmt_vec_info for it. */
1698 static stmt_vec_info
1699 vect_finish_stmt_generation_1 (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1701 vec_info
*vinfo
= stmt_info
->vinfo
;
1703 stmt_vec_info vec_stmt_info
= vinfo
->add_stmt (vec_stmt
);
1705 if (dump_enabled_p ())
1706 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1708 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1710 /* While EH edges will generally prevent vectorization, stmt might
1711 e.g. be in a must-not-throw region. Ensure newly created stmts
1712 that could throw are part of the same region. */
1713 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1714 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1715 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1717 return vec_stmt_info
;
1720 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1721 which sets the same scalar result as STMT_INFO did. Create and return a
1722 stmt_vec_info for VEC_STMT. */
1725 vect_finish_replace_stmt (stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1727 gcc_assert (gimple_get_lhs (stmt_info
->stmt
) == gimple_get_lhs (vec_stmt
));
1729 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt_info
->stmt
);
1730 gsi_replace (&gsi
, vec_stmt
, false);
1732 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1735 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1736 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1739 vect_finish_stmt_generation (stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1740 gimple_stmt_iterator
*gsi
)
1742 gcc_assert (gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1744 if (!gsi_end_p (*gsi
)
1745 && gimple_has_mem_ops (vec_stmt
))
1747 gimple
*at_stmt
= gsi_stmt (*gsi
);
1748 tree vuse
= gimple_vuse (at_stmt
);
1749 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1751 tree vdef
= gimple_vdef (at_stmt
);
1752 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1753 /* If we have an SSA vuse and insert a store, update virtual
1754 SSA form to avoid triggering the renamer. Do so only
1755 if we can easily see all uses - which is what almost always
1756 happens with the way vectorized stmts are inserted. */
1757 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1758 && ((is_gimple_assign (vec_stmt
)
1759 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1760 || (is_gimple_call (vec_stmt
)
1761 && !(gimple_call_flags (vec_stmt
)
1762 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1764 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1765 gimple_set_vdef (vec_stmt
, new_vdef
);
1766 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1770 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1771 return vect_finish_stmt_generation_1 (stmt_info
, vec_stmt
);
1774 /* We want to vectorize a call to combined function CFN with function
1775 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1776 as the types of all inputs. Check whether this is possible using
1777 an internal function, returning its code if so or IFN_LAST if not. */
1780 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1781 tree vectype_out
, tree vectype_in
)
1784 if (internal_fn_p (cfn
))
1785 ifn
= as_internal_fn (cfn
);
1787 ifn
= associated_internal_fn (fndecl
);
1788 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1790 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1791 if (info
.vectorizable
)
1793 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1794 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1795 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1796 OPTIMIZE_FOR_SPEED
))
1804 static tree
permute_vec_elements (tree
, tree
, tree
, stmt_vec_info
,
1805 gimple_stmt_iterator
*);
1807 /* Check whether a load or store statement in the loop described by
1808 LOOP_VINFO is possible in a fully-masked loop. This is testing
1809 whether the vectorizer pass has the appropriate support, as well as
1810 whether the target does.
1812 VLS_TYPE says whether the statement is a load or store and VECTYPE
1813 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1814 says how the load or store is going to be implemented and GROUP_SIZE
1815 is the number of load or store statements in the containing group.
1816 If the access is a gather load or scatter store, GS_INFO describes
1819 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1820 supported, otherwise record the required mask types. */
1823 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1824 vec_load_store_type vls_type
, int group_size
,
1825 vect_memory_access_type memory_access_type
,
1826 gather_scatter_info
*gs_info
)
1828 /* Invariant loads need no special support. */
1829 if (memory_access_type
== VMAT_INVARIANT
)
1832 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1833 machine_mode vecmode
= TYPE_MODE (vectype
);
1834 bool is_load
= (vls_type
== VLS_LOAD
);
1835 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1838 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1839 : !vect_store_lanes_supported (vectype
, group_size
, true))
1841 if (dump_enabled_p ())
1842 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1843 "can't use a fully-masked loop because the"
1844 " target doesn't have an appropriate masked"
1845 " load/store-lanes instruction.\n");
1846 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1849 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1850 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1854 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1856 internal_fn ifn
= (is_load
1857 ? IFN_MASK_GATHER_LOAD
1858 : IFN_MASK_SCATTER_STORE
);
1859 tree offset_type
= TREE_TYPE (gs_info
->offset
);
1860 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1861 gs_info
->memory_type
,
1862 TYPE_SIGN (offset_type
),
1865 if (dump_enabled_p ())
1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1867 "can't use a fully-masked loop because the"
1868 " target doesn't have an appropriate masked"
1869 " gather load or scatter store instruction.\n");
1870 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1873 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1874 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
);
1878 if (memory_access_type
!= VMAT_CONTIGUOUS
1879 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1881 /* Element X of the data must come from iteration i * VF + X of the
1882 scalar loop. We need more work to support other mappings. */
1883 if (dump_enabled_p ())
1884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1885 "can't use a fully-masked loop because an access"
1886 " isn't contiguous.\n");
1887 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1891 machine_mode mask_mode
;
1892 if (!(targetm
.vectorize
.get_mask_mode
1893 (GET_MODE_NUNITS (vecmode
),
1894 GET_MODE_SIZE (vecmode
)).exists (&mask_mode
))
1895 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1897 if (dump_enabled_p ())
1898 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1899 "can't use a fully-masked loop because the target"
1900 " doesn't have the appropriate masked load or"
1902 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1905 /* We might load more scalars than we need for permuting SLP loads.
1906 We checked in get_group_load_store_type that the extra elements
1907 don't leak into a new vector. */
1908 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1909 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1910 unsigned int nvectors
;
1911 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1912 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
);
1917 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1918 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1919 that needs to be applied to all loads and stores in a vectorized loop.
1920 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1922 MASK_TYPE is the type of both masks. If new statements are needed,
1923 insert them before GSI. */
1926 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1927 gimple_stmt_iterator
*gsi
)
1929 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1933 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1934 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1935 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1936 vec_mask
, loop_mask
);
1937 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1941 /* Determine whether we can use a gather load or scatter store to vectorize
1942 strided load or store STMT_INFO by truncating the current offset to a
1943 smaller width. We need to be able to construct an offset vector:
1945 { 0, X, X*2, X*3, ... }
1947 without loss of precision, where X is STMT_INFO's DR_STEP.
1949 Return true if this is possible, describing the gather load or scatter
1950 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1953 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1954 loop_vec_info loop_vinfo
, bool masked_p
,
1955 gather_scatter_info
*gs_info
)
1957 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1958 data_reference
*dr
= dr_info
->dr
;
1959 tree step
= DR_STEP (dr
);
1960 if (TREE_CODE (step
) != INTEGER_CST
)
1962 /* ??? Perhaps we could use range information here? */
1963 if (dump_enabled_p ())
1964 dump_printf_loc (MSG_NOTE
, vect_location
,
1965 "cannot truncate variable step.\n");
1969 /* Get the number of bits in an element. */
1970 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1971 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1972 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1974 /* Set COUNT to the upper limit on the number of elements - 1.
1975 Start with the maximum vectorization factor. */
1976 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1978 /* Try lowering COUNT to the number of scalar latch iterations. */
1979 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1980 widest_int max_iters
;
1981 if (max_loop_iterations (loop
, &max_iters
)
1982 && max_iters
< count
)
1983 count
= max_iters
.to_shwi ();
1985 /* Try scales of 1 and the element size. */
1986 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1987 wi::overflow_type overflow
= wi::OVF_NONE
;
1988 for (int i
= 0; i
< 2; ++i
)
1990 int scale
= scales
[i
];
1992 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1995 /* See whether we can calculate (COUNT - 1) * STEP / SCALE
1996 in OFFSET_BITS bits. */
1997 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
2000 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
2001 if (wi::min_precision (range
, sign
) > element_bits
)
2003 overflow
= wi::OVF_UNKNOWN
;
2007 /* See whether the target supports the operation. */
2008 tree memory_type
= TREE_TYPE (DR_REF (dr
));
2009 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr
), masked_p
, vectype
,
2010 memory_type
, element_bits
, sign
, scale
,
2011 &gs_info
->ifn
, &gs_info
->element_type
))
2014 tree offset_type
= build_nonstandard_integer_type (element_bits
,
2017 gs_info
->decl
= NULL_TREE
;
2018 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
2019 but we don't need to store that here. */
2020 gs_info
->base
= NULL_TREE
;
2021 gs_info
->offset
= fold_convert (offset_type
, step
);
2022 gs_info
->offset_dt
= vect_constant_def
;
2023 gs_info
->offset_vectype
= NULL_TREE
;
2024 gs_info
->scale
= scale
;
2025 gs_info
->memory_type
= memory_type
;
2029 if (overflow
&& dump_enabled_p ())
2030 dump_printf_loc (MSG_NOTE
, vect_location
,
2031 "truncating gather/scatter offset to %d bits"
2032 " might change its value.\n", element_bits
);
2037 /* Return true if we can use gather/scatter internal functions to
2038 vectorize STMT_INFO, which is a grouped or strided load or store.
2039 MASKED_P is true if load or store is conditional. When returning
2040 true, fill in GS_INFO with the information required to perform the
2044 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
2045 loop_vec_info loop_vinfo
, bool masked_p
,
2046 gather_scatter_info
*gs_info
)
2048 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
2050 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
2053 scalar_mode element_mode
= SCALAR_TYPE_MODE (gs_info
->element_type
);
2054 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
2055 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2056 unsigned int offset_bits
= TYPE_PRECISION (offset_type
);
2058 /* Enforced by vect_check_gather_scatter. */
2059 gcc_assert (element_bits
>= offset_bits
);
2061 /* If the elements are wider than the offset, convert the offset to the
2062 same width, without changing its sign. */
2063 if (element_bits
> offset_bits
)
2065 bool unsigned_p
= TYPE_UNSIGNED (offset_type
);
2066 offset_type
= build_nonstandard_integer_type (element_bits
, unsigned_p
);
2067 gs_info
->offset
= fold_convert (offset_type
, gs_info
->offset
);
2070 if (dump_enabled_p ())
2071 dump_printf_loc (MSG_NOTE
, vect_location
,
2072 "using gather/scatter for strided/grouped access,"
2073 " scale = %d\n", gs_info
->scale
);
2078 /* STMT_INFO is a non-strided load or store, meaning that it accesses
2079 elements with a known constant step. Return -1 if that step
2080 is negative, 0 if it is zero, and 1 if it is greater than zero. */
2083 compare_step_with_zero (stmt_vec_info stmt_info
)
2085 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2086 return tree_int_cst_compare (vect_dr_behavior (dr_info
)->step
,
2090 /* If the target supports a permute mask that reverses the elements in
2091 a vector of type VECTYPE, return that mask, otherwise return null. */
2094 perm_mask_for_reverse (tree vectype
)
2096 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2098 /* The encoding has a single stepped pattern. */
2099 vec_perm_builder
sel (nunits
, 1, 3);
2100 for (int i
= 0; i
< 3; ++i
)
2101 sel
.quick_push (nunits
- 1 - i
);
2103 vec_perm_indices
indices (sel
, 1, nunits
);
2104 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
2106 return vect_gen_perm_mask_checked (vectype
, indices
);
2109 /* STMT_INFO is either a masked or unconditional store. Return the value
2113 vect_get_store_rhs (stmt_vec_info stmt_info
)
2115 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2117 gcc_assert (gimple_assign_single_p (assign
));
2118 return gimple_assign_rhs1 (assign
);
2120 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2122 internal_fn ifn
= gimple_call_internal_fn (call
);
2123 int index
= internal_fn_stored_value_index (ifn
);
2124 gcc_assert (index
>= 0);
2125 return gimple_call_arg (call
, index
);
2130 /* A subroutine of get_load_store_type, with a subset of the same
2131 arguments. Handle the case where STMT_INFO is part of a grouped load
2134 For stores, the statements in the group are all consecutive
2135 and there is no gap at the end. For loads, the statements in the
2136 group might not be consecutive; there can be gaps between statements
2137 as well as at the end. */
2140 get_group_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2141 bool masked_p
, vec_load_store_type vls_type
,
2142 vect_memory_access_type
*memory_access_type
,
2143 gather_scatter_info
*gs_info
)
2145 vec_info
*vinfo
= stmt_info
->vinfo
;
2146 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2147 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2148 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2149 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2150 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2151 bool single_element_p
= (stmt_info
== first_stmt_info
2152 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2153 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2154 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2156 /* True if the vectorized statements would access beyond the last
2157 statement in the group. */
2158 bool overrun_p
= false;
2160 /* True if we can cope with such overrun by peeling for gaps, so that
2161 there is at least one final scalar iteration after the vector loop. */
2162 bool can_overrun_p
= (!masked_p
2163 && vls_type
== VLS_LOAD
2167 /* There can only be a gap at the end of the group if the stride is
2168 known at compile time. */
2169 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2171 /* Stores can't yet have gaps. */
2172 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2176 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2178 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2179 separated by the stride, until we have a complete vector.
2180 Fall back to scalar accesses if that isn't possible. */
2181 if (multiple_p (nunits
, group_size
))
2182 *memory_access_type
= VMAT_STRIDED_SLP
;
2184 *memory_access_type
= VMAT_ELEMENTWISE
;
2188 overrun_p
= loop_vinfo
&& gap
!= 0;
2189 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2192 "Grouped store with gaps requires"
2193 " non-consecutive accesses\n");
2196 /* An overrun is fine if the trailing elements are smaller
2197 than the alignment boundary B. Every vector access will
2198 be a multiple of B and so we are guaranteed to access a
2199 non-gap element in the same B-sized block. */
2201 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2202 / vect_get_scalar_dr_size (first_dr_info
)))
2204 if (overrun_p
&& !can_overrun_p
)
2206 if (dump_enabled_p ())
2207 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2208 "Peeling for outer loop is not supported\n");
2211 *memory_access_type
= VMAT_CONTIGUOUS
;
2216 /* We can always handle this case using elementwise accesses,
2217 but see if something more efficient is available. */
2218 *memory_access_type
= VMAT_ELEMENTWISE
;
2220 /* If there is a gap at the end of the group then these optimizations
2221 would access excess elements in the last iteration. */
2222 bool would_overrun_p
= (gap
!= 0);
2223 /* An overrun is fine if the trailing elements are smaller than the
2224 alignment boundary B. Every vector access will be a multiple of B
2225 and so we are guaranteed to access a non-gap element in the
2226 same B-sized block. */
2229 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2230 / vect_get_scalar_dr_size (first_dr_info
)))
2231 would_overrun_p
= false;
2233 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2234 && (can_overrun_p
|| !would_overrun_p
)
2235 && compare_step_with_zero (stmt_info
) > 0)
2237 /* First cope with the degenerate case of a single-element
2239 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2240 *memory_access_type
= VMAT_CONTIGUOUS
;
2242 /* Otherwise try using LOAD/STORE_LANES. */
2243 if (*memory_access_type
== VMAT_ELEMENTWISE
2244 && (vls_type
== VLS_LOAD
2245 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2246 : vect_store_lanes_supported (vectype
, group_size
,
2249 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2250 overrun_p
= would_overrun_p
;
2253 /* If that fails, try using permuting loads. */
2254 if (*memory_access_type
== VMAT_ELEMENTWISE
2255 && (vls_type
== VLS_LOAD
2256 ? vect_grouped_load_supported (vectype
, single_element_p
,
2258 : vect_grouped_store_supported (vectype
, group_size
)))
2260 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2261 overrun_p
= would_overrun_p
;
2265 /* As a last resort, trying using a gather load or scatter store.
2267 ??? Although the code can handle all group sizes correctly,
2268 it probably isn't a win to use separate strided accesses based
2269 on nearby locations. Or, even if it's a win over scalar code,
2270 it might not be a win over vectorizing at a lower VF, if that
2271 allows us to use contiguous accesses. */
2272 if (*memory_access_type
== VMAT_ELEMENTWISE
2275 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2277 *memory_access_type
= VMAT_GATHER_SCATTER
;
2280 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2282 /* STMT is the leader of the group. Check the operands of all the
2283 stmts of the group. */
2284 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2285 while (next_stmt_info
)
2287 tree op
= vect_get_store_rhs (next_stmt_info
);
2288 enum vect_def_type dt
;
2289 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2291 if (dump_enabled_p ())
2292 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2293 "use not simple.\n");
2296 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2302 gcc_assert (can_overrun_p
);
2303 if (dump_enabled_p ())
2304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2305 "Data access with gaps requires scalar "
2307 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2313 /* A subroutine of get_load_store_type, with a subset of the same
2314 arguments. Handle the case where STMT_INFO is a load or store that
2315 accesses consecutive elements with a negative step. */
2317 static vect_memory_access_type
2318 get_negative_load_store_type (stmt_vec_info stmt_info
, tree vectype
,
2319 vec_load_store_type vls_type
,
2320 unsigned int ncopies
)
2322 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
2323 dr_alignment_support alignment_support_scheme
;
2327 if (dump_enabled_p ())
2328 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2329 "multiple types with negative step.\n");
2330 return VMAT_ELEMENTWISE
;
2333 alignment_support_scheme
= vect_supportable_dr_alignment (dr_info
, false);
2334 if (alignment_support_scheme
!= dr_aligned
2335 && alignment_support_scheme
!= dr_unaligned_supported
)
2337 if (dump_enabled_p ())
2338 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2339 "negative step but alignment required.\n");
2340 return VMAT_ELEMENTWISE
;
2343 if (vls_type
== VLS_STORE_INVARIANT
)
2345 if (dump_enabled_p ())
2346 dump_printf_loc (MSG_NOTE
, vect_location
,
2347 "negative step with invariant source;"
2348 " no permute needed.\n");
2349 return VMAT_CONTIGUOUS_DOWN
;
2352 if (!perm_mask_for_reverse (vectype
))
2354 if (dump_enabled_p ())
2355 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2356 "negative step and reversing not supported.\n");
2357 return VMAT_ELEMENTWISE
;
2360 return VMAT_CONTIGUOUS_REVERSE
;
2363 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2364 if there is a memory access type that the vectorized form can use,
2365 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2366 or scatters, fill in GS_INFO accordingly.
2368 SLP says whether we're performing SLP rather than loop vectorization.
2369 MASKED_P is true if the statement is conditional on a vectorized mask.
2370 VECTYPE is the vector type that the vectorized statements will use.
2371 NCOPIES is the number of vector statements that will be needed. */
2374 get_load_store_type (stmt_vec_info stmt_info
, tree vectype
, bool slp
,
2375 bool masked_p
, vec_load_store_type vls_type
,
2376 unsigned int ncopies
,
2377 vect_memory_access_type
*memory_access_type
,
2378 gather_scatter_info
*gs_info
)
2380 vec_info
*vinfo
= stmt_info
->vinfo
;
2381 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2382 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2383 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2385 *memory_access_type
= VMAT_GATHER_SCATTER
;
2386 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2388 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2389 &gs_info
->offset_dt
,
2390 &gs_info
->offset_vectype
))
2392 if (dump_enabled_p ())
2393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2394 "%s index use not simple.\n",
2395 vls_type
== VLS_LOAD
? "gather" : "scatter");
2399 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2401 if (!get_group_load_store_type (stmt_info
, vectype
, slp
, masked_p
,
2402 vls_type
, memory_access_type
, gs_info
))
2405 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2409 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2411 *memory_access_type
= VMAT_GATHER_SCATTER
;
2413 *memory_access_type
= VMAT_ELEMENTWISE
;
2417 int cmp
= compare_step_with_zero (stmt_info
);
2419 *memory_access_type
= get_negative_load_store_type
2420 (stmt_info
, vectype
, vls_type
, ncopies
);
2423 gcc_assert (vls_type
== VLS_LOAD
);
2424 *memory_access_type
= VMAT_INVARIANT
;
2427 *memory_access_type
= VMAT_CONTIGUOUS
;
2430 if ((*memory_access_type
== VMAT_ELEMENTWISE
2431 || *memory_access_type
== VMAT_STRIDED_SLP
)
2432 && !nunits
.is_constant ())
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2436 "Not using elementwise accesses due to variable "
2437 "vectorization factor.\n");
2441 /* FIXME: At the moment the cost model seems to underestimate the
2442 cost of using elementwise accesses. This check preserves the
2443 traditional behavior until that can be fixed. */
2444 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2445 if (!first_stmt_info
)
2446 first_stmt_info
= stmt_info
;
2447 if (*memory_access_type
== VMAT_ELEMENTWISE
2448 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2449 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2450 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2451 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2453 if (dump_enabled_p ())
2454 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2455 "not falling back to elementwise accesses\n");
2461 /* Return true if boolean argument MASK is suitable for vectorizing
2462 conditional load or store STMT_INFO. When returning true, store the type
2463 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2464 in *MASK_VECTYPE_OUT. */
2467 vect_check_load_store_mask (stmt_vec_info stmt_info
, tree mask
,
2468 vect_def_type
*mask_dt_out
,
2469 tree
*mask_vectype_out
)
2471 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2473 if (dump_enabled_p ())
2474 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2475 "mask argument is not a boolean.\n");
2479 if (TREE_CODE (mask
) != SSA_NAME
)
2481 if (dump_enabled_p ())
2482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2483 "mask argument is not an SSA name.\n");
2487 enum vect_def_type mask_dt
;
2489 if (!vect_is_simple_use (mask
, stmt_info
->vinfo
, &mask_dt
, &mask_vectype
))
2491 if (dump_enabled_p ())
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2493 "mask use not simple.\n");
2497 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2499 mask_vectype
= get_mask_type_for_scalar_type (TREE_TYPE (vectype
));
2501 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2503 if (dump_enabled_p ())
2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2505 "could not find an appropriate vector mask type.\n");
2509 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2510 TYPE_VECTOR_SUBPARTS (vectype
)))
2512 if (dump_enabled_p ())
2513 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2514 "vector mask type %T",
2515 " does not match vector data type %T.\n",
2516 mask_vectype
, vectype
);
2521 *mask_dt_out
= mask_dt
;
2522 *mask_vectype_out
= mask_vectype
;
2526 /* Return true if stored value RHS is suitable for vectorizing store
2527 statement STMT_INFO. When returning true, store the type of the
2528 definition in *RHS_DT_OUT, the type of the vectorized store value in
2529 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2532 vect_check_store_rhs (stmt_vec_info stmt_info
, tree rhs
,
2533 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2534 vec_load_store_type
*vls_type_out
)
2536 /* In the case this is a store from a constant make sure
2537 native_encode_expr can handle it. */
2538 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2540 if (dump_enabled_p ())
2541 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2542 "cannot encode constant as a byte sequence.\n");
2546 enum vect_def_type rhs_dt
;
2548 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &rhs_dt
, &rhs_vectype
))
2550 if (dump_enabled_p ())
2551 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2552 "use not simple.\n");
2556 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2557 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2559 if (dump_enabled_p ())
2560 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2561 "incompatible vector types.\n");
2565 *rhs_dt_out
= rhs_dt
;
2566 *rhs_vectype_out
= rhs_vectype
;
2567 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2568 *vls_type_out
= VLS_STORE_INVARIANT
;
2570 *vls_type_out
= VLS_STORE
;
2574 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2575 Note that we support masks with floating-point type, in which case the
2576 floats are interpreted as a bitmask. */
2579 vect_build_all_ones_mask (stmt_vec_info stmt_info
, tree masktype
)
2581 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2582 return build_int_cst (masktype
, -1);
2583 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2585 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2586 mask
= build_vector_from_val (masktype
, mask
);
2587 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2589 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2593 for (int j
= 0; j
< 6; ++j
)
2595 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2596 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2597 mask
= build_vector_from_val (masktype
, mask
);
2598 return vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
2603 /* Build an all-zero merge value of type VECTYPE while vectorizing
2604 STMT_INFO as a gather load. */
2607 vect_build_zero_merge_argument (stmt_vec_info stmt_info
, tree vectype
)
2610 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2611 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2612 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2616 for (int j
= 0; j
< 6; ++j
)
2618 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2619 merge
= build_real (TREE_TYPE (vectype
), r
);
2623 merge
= build_vector_from_val (vectype
, merge
);
2624 return vect_init_vector (stmt_info
, merge
, vectype
, NULL
);
2627 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2628 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2629 the gather load operation. If the load is conditional, MASK is the
2630 unvectorized condition and MASK_DT is its definition type, otherwise
2634 vect_build_gather_load_calls (stmt_vec_info stmt_info
,
2635 gimple_stmt_iterator
*gsi
,
2636 stmt_vec_info
*vec_stmt
,
2637 gather_scatter_info
*gs_info
,
2640 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2641 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2642 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2643 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2644 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2645 edge pe
= loop_preheader_edge (loop
);
2646 enum { NARROW
, NONE
, WIDEN
} modifier
;
2647 poly_uint64 gather_off_nunits
2648 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2650 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2651 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2652 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2653 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2654 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2655 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2656 tree scaletype
= TREE_VALUE (arglist
);
2657 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2658 && (!mask
|| types_compatible_p (srctype
, masktype
)));
2660 tree perm_mask
= NULL_TREE
;
2661 tree mask_perm_mask
= NULL_TREE
;
2662 if (known_eq (nunits
, gather_off_nunits
))
2664 else if (known_eq (nunits
* 2, gather_off_nunits
))
2668 /* Currently widening gathers and scatters are only supported for
2669 fixed-length vectors. */
2670 int count
= gather_off_nunits
.to_constant ();
2671 vec_perm_builder
sel (count
, count
, 1);
2672 for (int i
= 0; i
< count
; ++i
)
2673 sel
.quick_push (i
| (count
/ 2));
2675 vec_perm_indices
indices (sel
, 1, count
);
2676 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2679 else if (known_eq (nunits
, gather_off_nunits
* 2))
2683 /* Currently narrowing gathers and scatters are only supported for
2684 fixed-length vectors. */
2685 int count
= nunits
.to_constant ();
2686 vec_perm_builder
sel (count
, count
, 1);
2687 sel
.quick_grow (count
);
2688 for (int i
= 0; i
< count
; ++i
)
2689 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2690 vec_perm_indices
indices (sel
, 2, count
);
2691 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2697 for (int i
= 0; i
< count
; ++i
)
2698 sel
[i
] = i
| (count
/ 2);
2699 indices
.new_vector (sel
, 2, count
);
2700 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2706 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2707 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2709 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2710 if (!is_gimple_min_invariant (ptr
))
2713 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2714 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2715 gcc_assert (!new_bb
);
2718 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2720 tree vec_oprnd0
= NULL_TREE
;
2721 tree vec_mask
= NULL_TREE
;
2722 tree src_op
= NULL_TREE
;
2723 tree mask_op
= NULL_TREE
;
2724 tree prev_res
= NULL_TREE
;
2725 stmt_vec_info prev_stmt_info
= NULL
;
2729 src_op
= vect_build_zero_merge_argument (stmt_info
, rettype
);
2730 mask_op
= vect_build_all_ones_mask (stmt_info
, masktype
);
2733 for (int j
= 0; j
< ncopies
; ++j
)
2736 if (modifier
== WIDEN
&& (j
& 1))
2737 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
2738 perm_mask
, stmt_info
, gsi
);
2741 = vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
);
2743 op
= vec_oprnd0
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2746 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2748 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2749 TYPE_VECTOR_SUBPARTS (idxtype
)));
2750 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2751 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2752 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2753 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2759 if (mask_perm_mask
&& (j
& 1))
2760 mask_op
= permute_vec_elements (mask_op
, mask_op
,
2761 mask_perm_mask
, stmt_info
, gsi
);
2765 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
);
2767 vec_mask
= vect_get_vec_def_for_stmt_copy (loop_vinfo
,
2771 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2774 (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
)),
2775 TYPE_VECTOR_SUBPARTS (masktype
)));
2776 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2777 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2779 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2780 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2787 gcall
*new_call
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2790 stmt_vec_info new_stmt_info
;
2791 if (!useless_type_conversion_p (vectype
, rettype
))
2793 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2794 TYPE_VECTOR_SUBPARTS (rettype
)));
2795 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2796 gimple_call_set_lhs (new_call
, op
);
2797 vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2798 var
= make_ssa_name (vec_dest
);
2799 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2800 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2802 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
2806 var
= make_ssa_name (vec_dest
, new_call
);
2807 gimple_call_set_lhs (new_call
, var
);
2809 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
2812 if (modifier
== NARROW
)
2819 var
= permute_vec_elements (prev_res
, var
, perm_mask
,
2821 new_stmt_info
= loop_vinfo
->lookup_def (var
);
2824 if (prev_stmt_info
== NULL
)
2825 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
2827 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
2828 prev_stmt_info
= new_stmt_info
;
2832 /* Prepare the base and offset in GS_INFO for vectorization.
2833 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2834 to the vectorized offset argument for the first copy of STMT_INFO.
2835 STMT_INFO is the statement described by GS_INFO and LOOP is the
2839 vect_get_gather_scatter_ops (struct loop
*loop
, stmt_vec_info stmt_info
,
2840 gather_scatter_info
*gs_info
,
2841 tree
*dataref_ptr
, tree
*vec_offset
)
2843 gimple_seq stmts
= NULL
;
2844 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2848 edge pe
= loop_preheader_edge (loop
);
2849 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2850 gcc_assert (!new_bb
);
2852 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2853 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2854 *vec_offset
= vect_get_vec_def_for_operand (gs_info
->offset
, stmt_info
,
2858 /* Prepare to implement a grouped or strided load or store using
2859 the gather load or scatter store operation described by GS_INFO.
2860 STMT_INFO is the load or store statement.
2862 Set *DATAREF_BUMP to the amount that should be added to the base
2863 address after each copy of the vectorized statement. Set *VEC_OFFSET
2864 to an invariant offset vector in which element I has the value
2865 I * DR_STEP / SCALE. */
2868 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2869 loop_vec_info loop_vinfo
,
2870 gather_scatter_info
*gs_info
,
2871 tree
*dataref_bump
, tree
*vec_offset
)
2873 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2874 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2875 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2878 tree bump
= size_binop (MULT_EXPR
,
2879 fold_convert (sizetype
, DR_STEP (dr
)),
2880 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2881 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2883 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2885 /* The offset given in GS_INFO can have pointer type, so use the element
2886 type of the vector instead. */
2887 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2888 tree offset_vectype
= get_vectype_for_scalar_type (offset_type
);
2889 offset_type
= TREE_TYPE (offset_vectype
);
2891 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2892 tree step
= size_binop (EXACT_DIV_EXPR
, DR_STEP (dr
),
2893 ssize_int (gs_info
->scale
));
2894 step
= fold_convert (offset_type
, step
);
2895 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2897 /* Create {0, X, X*2, X*3, ...}. */
2898 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, offset_vectype
,
2899 build_zero_cst (offset_type
), step
);
2901 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2904 /* Return the amount that should be added to a vector pointer to move
2905 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2906 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2910 vect_get_data_ptr_increment (dr_vec_info
*dr_info
, tree aggr_type
,
2911 vect_memory_access_type memory_access_type
)
2913 if (memory_access_type
== VMAT_INVARIANT
)
2914 return size_zero_node
;
2916 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2917 tree step
= vect_dr_behavior (dr_info
)->step
;
2918 if (tree_int_cst_sgn (step
) == -1)
2919 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2923 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}. */
2926 vectorizable_bswap (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2927 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
2928 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2931 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2932 vec_info
*vinfo
= stmt_info
->vinfo
;
2933 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2936 op
= gimple_call_arg (stmt
, 0);
2937 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2938 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2940 /* Multiple types in SLP are handled by creating the appropriate number of
2941 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2946 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2948 gcc_assert (ncopies
>= 1);
2950 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2954 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2955 unsigned word_bytes
;
2956 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2959 /* The encoding uses one stepped pattern for each byte in the word. */
2960 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2961 for (unsigned i
= 0; i
< 3; ++i
)
2962 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2963 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2965 vec_perm_indices
indices (elts
, 1, num_bytes
);
2966 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2971 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2972 DUMP_VECT_SCOPE ("vectorizable_bswap");
2975 record_stmt_cost (cost_vec
,
2976 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2977 record_stmt_cost (cost_vec
,
2978 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2983 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2986 vec
<tree
> vec_oprnds
= vNULL
;
2987 stmt_vec_info new_stmt_info
= NULL
;
2988 stmt_vec_info prev_stmt_info
= NULL
;
2989 for (unsigned j
= 0; j
< ncopies
; j
++)
2993 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
2995 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
2997 /* Arguments are ready. create the new vector stmt. */
3000 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3003 tree tem
= make_ssa_name (char_vectype
);
3004 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3005 char_vectype
, vop
));
3006 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3007 tree tem2
= make_ssa_name (char_vectype
);
3008 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
3009 tem
, tem
, bswap_vconst
);
3010 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3011 tem
= make_ssa_name (vectype
);
3012 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
3015 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3017 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3024 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3026 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3028 prev_stmt_info
= new_stmt_info
;
3031 vec_oprnds
.release ();
3035 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3036 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3037 in a single step. On success, store the binary pack code in
3041 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3042 tree_code
*convert_code
)
3044 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3045 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3049 int multi_step_cvt
= 0;
3050 auto_vec
<tree
, 8> interm_types
;
3051 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3052 &code
, &multi_step_cvt
,
3057 *convert_code
= code
;
3061 /* Function vectorizable_call.
3063 Check if STMT_INFO performs a function call that can be vectorized.
3064 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3065 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3066 Return true if STMT_INFO is vectorizable in this way. */
3069 vectorizable_call (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3070 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3071 stmt_vector_for_cost
*cost_vec
)
3077 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3078 stmt_vec_info prev_stmt_info
;
3079 tree vectype_out
, vectype_in
;
3080 poly_uint64 nunits_in
;
3081 poly_uint64 nunits_out
;
3082 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3083 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3084 vec_info
*vinfo
= stmt_info
->vinfo
;
3085 tree fndecl
, new_temp
, rhs_type
;
3086 enum vect_def_type dt
[4]
3087 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3088 vect_unknown_def_type
};
3089 int ndts
= ARRAY_SIZE (dt
);
3091 auto_vec
<tree
, 8> vargs
;
3092 auto_vec
<tree
, 8> orig_vargs
;
3093 enum { NARROW
, NONE
, WIDEN
} modifier
;
3097 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3100 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3104 /* Is STMT_INFO a vectorizable call? */
3105 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3109 if (gimple_call_internal_p (stmt
)
3110 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3111 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3112 /* Handled by vectorizable_load and vectorizable_store. */
3115 if (gimple_call_lhs (stmt
) == NULL_TREE
3116 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3119 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3121 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3123 /* Process function arguments. */
3124 rhs_type
= NULL_TREE
;
3125 vectype_in
= NULL_TREE
;
3126 nargs
= gimple_call_num_args (stmt
);
3128 /* Bail out if the function has more than three arguments, we do not have
3129 interesting builtin functions to vectorize with more than two arguments
3130 except for fma. No arguments is also not good. */
3131 if (nargs
== 0 || nargs
> 4)
3134 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
3135 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3136 if (cfn
== CFN_GOMP_SIMD_LANE
)
3139 rhs_type
= unsigned_type_node
;
3143 if (internal_fn_p (cfn
))
3144 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3146 for (i
= 0; i
< nargs
; i
++)
3150 op
= gimple_call_arg (stmt
, i
);
3151 if (!vect_is_simple_use (op
, vinfo
, &dt
[i
], &opvectype
))
3153 if (dump_enabled_p ())
3154 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3155 "use not simple.\n");
3159 /* Skip the mask argument to an internal function. This operand
3160 has been converted via a pattern if necessary. */
3161 if ((int) i
== mask_opno
)
3164 /* We can only handle calls with arguments of the same type. */
3166 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3168 if (dump_enabled_p ())
3169 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3170 "argument types differ.\n");
3174 rhs_type
= TREE_TYPE (op
);
3177 vectype_in
= opvectype
;
3179 && opvectype
!= vectype_in
)
3181 if (dump_enabled_p ())
3182 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3183 "argument vector types differ.\n");
3187 /* If all arguments are external or constant defs use a vector type with
3188 the same size as the output vector type. */
3190 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
3192 gcc_assert (vectype_in
);
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3197 "no vectype for scalar type %T\n", rhs_type
);
3203 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3204 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3205 if (known_eq (nunits_in
* 2, nunits_out
))
3207 else if (known_eq (nunits_out
, nunits_in
))
3209 else if (known_eq (nunits_out
* 2, nunits_in
))
3214 /* We only handle functions that do not read or clobber memory. */
3215 if (gimple_vuse (stmt
))
3217 if (dump_enabled_p ())
3218 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3219 "function reads from or writes to memory.\n");
3223 /* For now, we only vectorize functions if a target specific builtin
3224 is available. TODO -- in some cases, it might be profitable to
3225 insert the calls for pieces of the vector, in order to be able
3226 to vectorize other operations in the loop. */
3228 internal_fn ifn
= IFN_LAST
;
3229 tree callee
= gimple_call_fndecl (stmt
);
3231 /* First try using an internal function. */
3232 tree_code convert_code
= ERROR_MARK
;
3234 && (modifier
== NONE
3235 || (modifier
== NARROW
3236 && simple_integer_narrowing (vectype_out
, vectype_in
,
3238 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3241 /* If that fails, try asking for a target-specific built-in function. */
3242 if (ifn
== IFN_LAST
)
3244 if (cfn
!= CFN_LAST
)
3245 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3246 (cfn
, vectype_out
, vectype_in
);
3248 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3249 (callee
, vectype_out
, vectype_in
);
3252 if (ifn
== IFN_LAST
&& !fndecl
)
3254 if (cfn
== CFN_GOMP_SIMD_LANE
3257 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3258 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3259 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3260 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3262 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3263 { 0, 1, 2, ... vf - 1 } vector. */
3264 gcc_assert (nargs
== 0);
3266 else if (modifier
== NONE
3267 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3268 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3269 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)))
3270 return vectorizable_bswap (stmt_info
, gsi
, vec_stmt
, slp_node
,
3271 vectype_in
, cost_vec
);
3274 if (dump_enabled_p ())
3275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3276 "function is not vectorizable.\n");
3283 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3284 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3286 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3288 /* Sanity check: make sure that at least one copy of the vectorized stmt
3289 needs to be generated. */
3290 gcc_assert (ncopies
>= 1);
3292 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3293 if (!vec_stmt
) /* transformation not required. */
3295 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3296 DUMP_VECT_SCOPE ("vectorizable_call");
3297 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3298 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3299 record_stmt_cost (cost_vec
, ncopies
/ 2,
3300 vec_promote_demote
, stmt_info
, 0, vect_body
);
3302 if (loop_vinfo
&& mask_opno
>= 0)
3304 unsigned int nvectors
= (slp_node
3305 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3307 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype_out
);
3314 if (dump_enabled_p ())
3315 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3318 scalar_dest
= gimple_call_lhs (stmt
);
3319 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3321 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3323 stmt_vec_info new_stmt_info
= NULL
;
3324 prev_stmt_info
= NULL
;
3325 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3327 tree prev_res
= NULL_TREE
;
3328 vargs
.safe_grow (nargs
);
3329 orig_vargs
.safe_grow (nargs
);
3330 for (j
= 0; j
< ncopies
; ++j
)
3332 /* Build argument list for the vectorized call. */
3335 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3336 vec
<tree
> vec_oprnds0
;
3338 for (i
= 0; i
< nargs
; i
++)
3339 vargs
[i
] = gimple_call_arg (stmt
, i
);
3340 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3341 vec_oprnds0
= vec_defs
[0];
3343 /* Arguments are ready. Create the new vector stmt. */
3344 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3347 for (k
= 0; k
< nargs
; k
++)
3349 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3350 vargs
[k
] = vec_oprndsk
[i
];
3352 if (modifier
== NARROW
)
3354 /* We don't define any narrowing conditional functions
3356 gcc_assert (mask_opno
< 0);
3357 tree half_res
= make_ssa_name (vectype_in
);
3359 = gimple_build_call_internal_vec (ifn
, vargs
);
3360 gimple_call_set_lhs (call
, half_res
);
3361 gimple_call_set_nothrow (call
, true);
3363 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3366 prev_res
= half_res
;
3369 new_temp
= make_ssa_name (vec_dest
);
3371 = gimple_build_assign (new_temp
, convert_code
,
3372 prev_res
, half_res
);
3374 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
3379 if (mask_opno
>= 0 && masked_loop_p
)
3381 unsigned int vec_num
= vec_oprnds0
.length ();
3382 /* Always true for SLP. */
3383 gcc_assert (ncopies
== 1);
3384 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3386 vargs
[mask_opno
] = prepare_load_store_mask
3387 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3391 if (ifn
!= IFN_LAST
)
3392 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3394 call
= gimple_build_call_vec (fndecl
, vargs
);
3395 new_temp
= make_ssa_name (vec_dest
, call
);
3396 gimple_call_set_lhs (call
, new_temp
);
3397 gimple_call_set_nothrow (call
, true);
3399 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3401 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3404 for (i
= 0; i
< nargs
; i
++)
3406 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3407 vec_oprndsi
.release ();
3412 for (i
= 0; i
< nargs
; i
++)
3414 op
= gimple_call_arg (stmt
, i
);
3417 = vect_get_vec_def_for_operand (op
, stmt_info
);
3420 = vect_get_vec_def_for_stmt_copy (vinfo
, orig_vargs
[i
]);
3422 orig_vargs
[i
] = vargs
[i
] = vec_oprnd0
;
3425 if (mask_opno
>= 0 && masked_loop_p
)
3427 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3430 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3431 vargs
[mask_opno
], gsi
);
3434 if (cfn
== CFN_GOMP_SIMD_LANE
)
3436 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3438 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3439 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3440 vect_init_vector_1 (stmt_info
, init_stmt
, NULL
);
3441 new_temp
= make_ssa_name (vec_dest
);
3442 gimple
*new_stmt
= gimple_build_assign (new_temp
, new_var
);
3444 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3446 else if (modifier
== NARROW
)
3448 /* We don't define any narrowing conditional functions at
3450 gcc_assert (mask_opno
< 0);
3451 tree half_res
= make_ssa_name (vectype_in
);
3452 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3453 gimple_call_set_lhs (call
, half_res
);
3454 gimple_call_set_nothrow (call
, true);
3456 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3459 prev_res
= half_res
;
3462 new_temp
= make_ssa_name (vec_dest
);
3463 gassign
*new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3464 prev_res
, half_res
);
3466 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3471 if (ifn
!= IFN_LAST
)
3472 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3474 call
= gimple_build_call_vec (fndecl
, vargs
);
3475 new_temp
= make_ssa_name (vec_dest
, call
);
3476 gimple_call_set_lhs (call
, new_temp
);
3477 gimple_call_set_nothrow (call
, true);
3479 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3482 if (j
== (modifier
== NARROW
? 1 : 0))
3483 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
3485 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3487 prev_stmt_info
= new_stmt_info
;
3490 else if (modifier
== NARROW
)
3492 /* We don't define any narrowing conditional functions at present. */
3493 gcc_assert (mask_opno
< 0);
3494 for (j
= 0; j
< ncopies
; ++j
)
3496 /* Build argument list for the vectorized call. */
3498 vargs
.create (nargs
* 2);
3504 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3505 vec
<tree
> vec_oprnds0
;
3507 for (i
= 0; i
< nargs
; i
++)
3508 vargs
.quick_push (gimple_call_arg (stmt
, i
));
3509 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
);
3510 vec_oprnds0
= vec_defs
[0];
3512 /* Arguments are ready. Create the new vector stmt. */
3513 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3517 for (k
= 0; k
< nargs
; k
++)
3519 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3520 vargs
.quick_push (vec_oprndsk
[i
]);
3521 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3524 if (ifn
!= IFN_LAST
)
3525 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3527 call
= gimple_build_call_vec (fndecl
, vargs
);
3528 new_temp
= make_ssa_name (vec_dest
, call
);
3529 gimple_call_set_lhs (call
, new_temp
);
3530 gimple_call_set_nothrow (call
, true);
3532 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
3533 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
3536 for (i
= 0; i
< nargs
; i
++)
3538 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3539 vec_oprndsi
.release ();
3544 for (i
= 0; i
< nargs
; i
++)
3546 op
= gimple_call_arg (stmt
, i
);
3550 = vect_get_vec_def_for_operand (op
, stmt_info
);
3552 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3556 vec_oprnd1
= gimple_call_arg (new_stmt_info
->stmt
,
3559 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
3561 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
3564 vargs
.quick_push (vec_oprnd0
);
3565 vargs
.quick_push (vec_oprnd1
);
3568 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3569 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3570 gimple_call_set_lhs (new_stmt
, new_temp
);
3572 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
3575 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
3577 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
3579 prev_stmt_info
= new_stmt_info
;
3582 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
3585 /* No current target implements this case. */
3590 /* The call in STMT might prevent it from being removed in dce.
3591 We however cannot remove it here, due to the way the ssa name
3592 it defines is mapped to the new definition. So just replace
3593 rhs of the statement with something harmless. */
3598 stmt_info
= vect_orig_stmt (stmt_info
);
3599 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3602 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3603 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3609 struct simd_call_arg_info
3613 HOST_WIDE_INT linear_step
;
3614 enum vect_def_type dt
;
3616 bool simd_lane_linear
;
3619 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3620 is linear within simd lane (but not within whole loop), note it in
3624 vect_simd_lane_linear (tree op
, struct loop
*loop
,
3625 struct simd_call_arg_info
*arginfo
)
3627 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3629 if (!is_gimple_assign (def_stmt
)
3630 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3631 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3634 tree base
= gimple_assign_rhs1 (def_stmt
);
3635 HOST_WIDE_INT linear_step
= 0;
3636 tree v
= gimple_assign_rhs2 (def_stmt
);
3637 while (TREE_CODE (v
) == SSA_NAME
)
3640 def_stmt
= SSA_NAME_DEF_STMT (v
);
3641 if (is_gimple_assign (def_stmt
))
3642 switch (gimple_assign_rhs_code (def_stmt
))
3645 t
= gimple_assign_rhs2 (def_stmt
);
3646 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3648 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3649 v
= gimple_assign_rhs1 (def_stmt
);
3652 t
= gimple_assign_rhs2 (def_stmt
);
3653 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3655 linear_step
= tree_to_shwi (t
);
3656 v
= gimple_assign_rhs1 (def_stmt
);
3659 t
= gimple_assign_rhs1 (def_stmt
);
3660 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3661 || (TYPE_PRECISION (TREE_TYPE (v
))
3662 < TYPE_PRECISION (TREE_TYPE (t
))))
3671 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3673 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3674 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3679 arginfo
->linear_step
= linear_step
;
3681 arginfo
->simd_lane_linear
= true;
3687 /* Return the number of elements in vector type VECTYPE, which is associated
3688 with a SIMD clone. At present these vectors always have a constant
3691 static unsigned HOST_WIDE_INT
3692 simd_clone_subparts (tree vectype
)
3694 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3697 /* Function vectorizable_simd_clone_call.
3699 Check if STMT_INFO performs a function call that can be vectorized
3700 by calling a simd clone of the function.
3701 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3702 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3703 Return true if STMT_INFO is vectorizable in this way. */
3706 vectorizable_simd_clone_call (stmt_vec_info stmt_info
,
3707 gimple_stmt_iterator
*gsi
,
3708 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
3709 stmt_vector_for_cost
*)
3714 tree vec_oprnd0
= NULL_TREE
;
3715 stmt_vec_info prev_stmt_info
;
3717 unsigned int nunits
;
3718 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3719 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3720 vec_info
*vinfo
= stmt_info
->vinfo
;
3721 struct loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3722 tree fndecl
, new_temp
;
3724 auto_vec
<simd_call_arg_info
> arginfo
;
3725 vec
<tree
> vargs
= vNULL
;
3727 tree lhs
, rtype
, ratype
;
3728 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3730 /* Is STMT a vectorizable call? */
3731 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3735 fndecl
= gimple_call_fndecl (stmt
);
3736 if (fndecl
== NULL_TREE
)
3739 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3740 if (node
== NULL
|| node
->simd_clones
== NULL
)
3743 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3746 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3750 if (gimple_call_lhs (stmt
)
3751 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3754 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3756 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3758 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3765 /* Process function arguments. */
3766 nargs
= gimple_call_num_args (stmt
);
3768 /* Bail out if the function has zero arguments. */
3772 arginfo
.reserve (nargs
, true);
3774 for (i
= 0; i
< nargs
; i
++)
3776 simd_call_arg_info thisarginfo
;
3779 thisarginfo
.linear_step
= 0;
3780 thisarginfo
.align
= 0;
3781 thisarginfo
.op
= NULL_TREE
;
3782 thisarginfo
.simd_lane_linear
= false;
3784 op
= gimple_call_arg (stmt
, i
);
3785 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3786 &thisarginfo
.vectype
)
3787 || thisarginfo
.dt
== vect_uninitialized_def
)
3789 if (dump_enabled_p ())
3790 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3791 "use not simple.\n");
3795 if (thisarginfo
.dt
== vect_constant_def
3796 || thisarginfo
.dt
== vect_external_def
)
3797 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3799 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3801 /* For linear arguments, the analyze phase should have saved
3802 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3803 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3804 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3806 gcc_assert (vec_stmt
);
3807 thisarginfo
.linear_step
3808 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3810 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3811 thisarginfo
.simd_lane_linear
3812 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3813 == boolean_true_node
);
3814 /* If loop has been peeled for alignment, we need to adjust it. */
3815 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3816 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3817 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3819 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3820 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3821 tree opt
= TREE_TYPE (thisarginfo
.op
);
3822 bias
= fold_convert (TREE_TYPE (step
), bias
);
3823 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3825 = fold_build2 (POINTER_TYPE_P (opt
)
3826 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3827 thisarginfo
.op
, bias
);
3831 && thisarginfo
.dt
!= vect_constant_def
3832 && thisarginfo
.dt
!= vect_external_def
3834 && TREE_CODE (op
) == SSA_NAME
3835 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3837 && tree_fits_shwi_p (iv
.step
))
3839 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3840 thisarginfo
.op
= iv
.base
;
3842 else if ((thisarginfo
.dt
== vect_constant_def
3843 || thisarginfo
.dt
== vect_external_def
)
3844 && POINTER_TYPE_P (TREE_TYPE (op
)))
3845 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3846 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3848 if (POINTER_TYPE_P (TREE_TYPE (op
))
3849 && !thisarginfo
.linear_step
3851 && thisarginfo
.dt
!= vect_constant_def
3852 && thisarginfo
.dt
!= vect_external_def
3855 && TREE_CODE (op
) == SSA_NAME
)
3856 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3858 arginfo
.quick_push (thisarginfo
);
3861 unsigned HOST_WIDE_INT vf
;
3862 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3864 if (dump_enabled_p ())
3865 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3866 "not considering SIMD clones; not yet supported"
3867 " for variable-width vectors.\n");
3871 unsigned int badness
= 0;
3872 struct cgraph_node
*bestn
= NULL
;
3873 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3874 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3876 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3877 n
= n
->simdclone
->next_clone
)
3879 unsigned int this_badness
= 0;
3880 if (n
->simdclone
->simdlen
> vf
3881 || n
->simdclone
->nargs
!= nargs
)
3883 if (n
->simdclone
->simdlen
< vf
)
3884 this_badness
+= (exact_log2 (vf
)
3885 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3886 if (n
->simdclone
->inbranch
)
3887 this_badness
+= 2048;
3888 int target_badness
= targetm
.simd_clone
.usable (n
);
3889 if (target_badness
< 0)
3891 this_badness
+= target_badness
* 512;
3892 /* FORNOW: Have to add code to add the mask argument. */
3893 if (n
->simdclone
->inbranch
)
3895 for (i
= 0; i
< nargs
; i
++)
3897 switch (n
->simdclone
->args
[i
].arg_type
)
3899 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3900 if (!useless_type_conversion_p
3901 (n
->simdclone
->args
[i
].orig_type
,
3902 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3904 else if (arginfo
[i
].dt
== vect_constant_def
3905 || arginfo
[i
].dt
== vect_external_def
3906 || arginfo
[i
].linear_step
)
3909 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3910 if (arginfo
[i
].dt
!= vect_constant_def
3911 && arginfo
[i
].dt
!= vect_external_def
)
3914 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3915 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3916 if (arginfo
[i
].dt
== vect_constant_def
3917 || arginfo
[i
].dt
== vect_external_def
3918 || (arginfo
[i
].linear_step
3919 != n
->simdclone
->args
[i
].linear_step
))
3922 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3923 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3924 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3925 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3926 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3927 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3931 case SIMD_CLONE_ARG_TYPE_MASK
:
3934 if (i
== (size_t) -1)
3936 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3941 if (arginfo
[i
].align
)
3942 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3943 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3945 if (i
== (size_t) -1)
3947 if (bestn
== NULL
|| this_badness
< badness
)
3950 badness
= this_badness
;
3957 for (i
= 0; i
< nargs
; i
++)
3958 if ((arginfo
[i
].dt
== vect_constant_def
3959 || arginfo
[i
].dt
== vect_external_def
)
3960 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3963 = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt
,
3965 if (arginfo
[i
].vectype
== NULL
3966 || (simd_clone_subparts (arginfo
[i
].vectype
)
3967 > bestn
->simdclone
->simdlen
))
3971 fndecl
= bestn
->decl
;
3972 nunits
= bestn
->simdclone
->simdlen
;
3973 ncopies
= vf
/ nunits
;
3975 /* If the function isn't const, only allow it in simd loops where user
3976 has asserted that at least nunits consecutive iterations can be
3977 performed using SIMD instructions. */
3978 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3979 && gimple_vuse (stmt
))
3982 /* Sanity check: make sure that at least one copy of the vectorized stmt
3983 needs to be generated. */
3984 gcc_assert (ncopies
>= 1);
3986 if (!vec_stmt
) /* transformation not required. */
3988 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3989 for (i
= 0; i
< nargs
; i
++)
3990 if ((bestn
->simdclone
->args
[i
].arg_type
3991 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3992 || (bestn
->simdclone
->args
[i
].arg_type
3993 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3995 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3997 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3998 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3999 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
4000 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
4001 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
4002 tree sll
= arginfo
[i
].simd_lane_linear
4003 ? boolean_true_node
: boolean_false_node
;
4004 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
4006 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
4007 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
4008 /* vect_model_simple_cost (stmt_info, ncopies, dt, slp_node, cost_vec); */
4014 if (dump_enabled_p ())
4015 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
4018 scalar_dest
= gimple_call_lhs (stmt
);
4019 vec_dest
= NULL_TREE
;
4024 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4025 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4026 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4029 rtype
= TREE_TYPE (ratype
);
4033 prev_stmt_info
= NULL
;
4034 for (j
= 0; j
< ncopies
; ++j
)
4036 /* Build argument list for the vectorized call. */
4038 vargs
.create (nargs
);
4042 for (i
= 0; i
< nargs
; i
++)
4044 unsigned int k
, l
, m
, o
;
4046 op
= gimple_call_arg (stmt
, i
);
4047 switch (bestn
->simdclone
->args
[i
].arg_type
)
4049 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4050 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4051 o
= nunits
/ simd_clone_subparts (atype
);
4052 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4054 if (simd_clone_subparts (atype
)
4055 < simd_clone_subparts (arginfo
[i
].vectype
))
4057 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4058 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4059 / simd_clone_subparts (atype
));
4060 gcc_assert ((k
& (k
- 1)) == 0);
4063 = vect_get_vec_def_for_operand (op
, stmt_info
);
4066 vec_oprnd0
= arginfo
[i
].op
;
4067 if ((m
& (k
- 1)) == 0)
4069 = vect_get_vec_def_for_stmt_copy (vinfo
,
4072 arginfo
[i
].op
= vec_oprnd0
;
4074 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4076 bitsize_int ((m
& (k
- 1)) * prec
));
4078 = gimple_build_assign (make_ssa_name (atype
),
4080 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4081 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4085 k
= (simd_clone_subparts (atype
)
4086 / simd_clone_subparts (arginfo
[i
].vectype
));
4087 gcc_assert ((k
& (k
- 1)) == 0);
4088 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4090 vec_alloc (ctor_elts
, k
);
4093 for (l
= 0; l
< k
; l
++)
4095 if (m
== 0 && l
== 0)
4097 = vect_get_vec_def_for_operand (op
, stmt_info
);
4100 = vect_get_vec_def_for_stmt_copy (vinfo
,
4102 arginfo
[i
].op
= vec_oprnd0
;
4105 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4109 vargs
.safe_push (vec_oprnd0
);
4112 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4114 = gimple_build_assign (make_ssa_name (atype
),
4116 vect_finish_stmt_generation (stmt_info
, new_stmt
,
4118 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4123 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4124 vargs
.safe_push (op
);
4126 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4127 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4132 = force_gimple_operand (arginfo
[i
].op
, &stmts
, true,
4137 edge pe
= loop_preheader_edge (loop
);
4138 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4139 gcc_assert (!new_bb
);
4141 if (arginfo
[i
].simd_lane_linear
)
4143 vargs
.safe_push (arginfo
[i
].op
);
4146 tree phi_res
= copy_ssa_name (op
);
4147 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4148 loop_vinfo
->add_stmt (new_phi
);
4149 add_phi_arg (new_phi
, arginfo
[i
].op
,
4150 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4152 = POINTER_TYPE_P (TREE_TYPE (op
))
4153 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4154 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4155 ? sizetype
: TREE_TYPE (op
);
4157 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4159 tree tcst
= wide_int_to_tree (type
, cst
);
4160 tree phi_arg
= copy_ssa_name (op
);
4162 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4163 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4164 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4165 loop_vinfo
->add_stmt (new_stmt
);
4166 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4168 arginfo
[i
].op
= phi_res
;
4169 vargs
.safe_push (phi_res
);
4174 = POINTER_TYPE_P (TREE_TYPE (op
))
4175 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4176 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4177 ? sizetype
: TREE_TYPE (op
);
4179 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4181 tree tcst
= wide_int_to_tree (type
, cst
);
4182 new_temp
= make_ssa_name (TREE_TYPE (op
));
4184 = gimple_build_assign (new_temp
, code
,
4185 arginfo
[i
].op
, tcst
);
4186 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4187 vargs
.safe_push (new_temp
);
4190 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4191 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4192 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4193 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4194 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4195 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4201 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4204 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4206 new_temp
= create_tmp_var (ratype
);
4207 else if (simd_clone_subparts (vectype
)
4208 == simd_clone_subparts (rtype
))
4209 new_temp
= make_ssa_name (vec_dest
, new_call
);
4211 new_temp
= make_ssa_name (rtype
, new_call
);
4212 gimple_call_set_lhs (new_call
, new_temp
);
4214 stmt_vec_info new_stmt_info
4215 = vect_finish_stmt_generation (stmt_info
, new_call
, gsi
);
4219 if (simd_clone_subparts (vectype
) < nunits
)
4222 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4223 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4224 k
= nunits
/ simd_clone_subparts (vectype
);
4225 gcc_assert ((k
& (k
- 1)) == 0);
4226 for (l
= 0; l
< k
; l
++)
4231 t
= build_fold_addr_expr (new_temp
);
4232 t
= build2 (MEM_REF
, vectype
, t
,
4233 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4236 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4237 bitsize_int (prec
), bitsize_int (l
* prec
));
4239 = gimple_build_assign (make_ssa_name (vectype
), t
);
4241 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4243 if (j
== 0 && l
== 0)
4244 STMT_VINFO_VEC_STMT (stmt_info
)
4245 = *vec_stmt
= new_stmt_info
;
4247 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4249 prev_stmt_info
= new_stmt_info
;
4253 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4256 else if (simd_clone_subparts (vectype
) > nunits
)
4258 unsigned int k
= (simd_clone_subparts (vectype
)
4259 / simd_clone_subparts (rtype
));
4260 gcc_assert ((k
& (k
- 1)) == 0);
4261 if ((j
& (k
- 1)) == 0)
4262 vec_alloc (ret_ctor_elts
, k
);
4265 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4266 for (m
= 0; m
< o
; m
++)
4268 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4269 size_int (m
), NULL_TREE
, NULL_TREE
);
4271 = gimple_build_assign (make_ssa_name (rtype
), tem
);
4273 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
4275 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4276 gimple_assign_lhs (new_stmt
));
4278 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4281 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4282 if ((j
& (k
- 1)) != k
- 1)
4284 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4286 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4288 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4290 if ((unsigned) j
== k
- 1)
4291 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4293 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4295 prev_stmt_info
= new_stmt_info
;
4300 tree t
= build_fold_addr_expr (new_temp
);
4301 t
= build2 (MEM_REF
, vectype
, t
,
4302 build_int_cst (TREE_TYPE (t
), 0));
4304 = gimple_build_assign (make_ssa_name (vec_dest
), t
);
4306 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4307 vect_clobber_variable (stmt_info
, gsi
, new_temp
);
4312 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
4314 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4316 prev_stmt_info
= new_stmt_info
;
4321 /* The call in STMT might prevent it from being removed in dce.
4322 We however cannot remove it here, due to the way the ssa name
4323 it defines is mapped to the new definition. So just replace
4324 rhs of the statement with something harmless. */
4332 type
= TREE_TYPE (scalar_dest
);
4333 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4334 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4337 new_stmt
= gimple_build_nop ();
4338 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4339 unlink_stmt_vdef (stmt
);
4345 /* Function vect_gen_widened_results_half
4347 Create a vector stmt whose code, type, number of arguments, and result
4348 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4349 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
4350 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4351 needs to be created (DECL is a function-decl of a target-builtin).
4352 STMT_INFO is the original scalar stmt that we are vectorizing. */
4355 vect_gen_widened_results_half (enum tree_code code
,
4357 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4358 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4359 stmt_vec_info stmt_info
)
4364 /* Generate half of the widened result: */
4365 if (code
== CALL_EXPR
)
4367 /* Target specific support */
4368 if (op_type
== binary_op
)
4369 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
4371 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
4372 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4373 gimple_call_set_lhs (new_stmt
, new_temp
);
4377 /* Generic support */
4378 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4379 if (op_type
!= binary_op
)
4381 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4382 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4383 gimple_assign_set_lhs (new_stmt
, new_temp
);
4385 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4391 /* Get vectorized definitions for loop-based vectorization of STMT_INFO.
4392 For the first operand we call vect_get_vec_def_for_operand (with OPRND
4393 containing scalar operand), and for the rest we get a copy with
4394 vect_get_vec_def_for_stmt_copy() using the previous vector definition
4395 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
4396 The vectors are collected into VEC_OPRNDS. */
4399 vect_get_loop_based_defs (tree
*oprnd
, stmt_vec_info stmt_info
,
4400 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
4402 vec_info
*vinfo
= stmt_info
->vinfo
;
4405 /* Get first vector operand. */
4406 /* All the vector operands except the very first one (that is scalar oprnd)
4408 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
4409 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt_info
);
4411 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, *oprnd
);
4413 vec_oprnds
->quick_push (vec_oprnd
);
4415 /* Get second vector operand. */
4416 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd
);
4417 vec_oprnds
->quick_push (vec_oprnd
);
4421 /* For conversion in multiple steps, continue to get operands
4424 vect_get_loop_based_defs (oprnd
, stmt_info
, vec_oprnds
,
4425 multi_step_cvt
- 1);
4429 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4430 For multi-step conversions store the resulting vectors and call the function
4434 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
4436 stmt_vec_info stmt_info
,
4438 gimple_stmt_iterator
*gsi
,
4439 slp_tree slp_node
, enum tree_code code
,
4440 stmt_vec_info
*prev_stmt_info
)
4443 tree vop0
, vop1
, new_tmp
, vec_dest
;
4445 vec_dest
= vec_dsts
.pop ();
4447 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4449 /* Create demotion operation. */
4450 vop0
= (*vec_oprnds
)[i
];
4451 vop1
= (*vec_oprnds
)[i
+ 1];
4452 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4453 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4454 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4455 stmt_vec_info new_stmt_info
4456 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4459 /* Store the resulting vector for next recursive call. */
4460 (*vec_oprnds
)[i
/2] = new_tmp
;
4463 /* This is the last step of the conversion sequence. Store the
4464 vectors in SLP_NODE or in vector info of the scalar statement
4465 (or in STMT_VINFO_RELATED_STMT chain). */
4467 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4470 if (!*prev_stmt_info
)
4471 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
4473 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt_info
;
4475 *prev_stmt_info
= new_stmt_info
;
4480 /* For multi-step demotion operations we first generate demotion operations
4481 from the source type to the intermediate types, and then combine the
4482 results (stored in VEC_OPRNDS) in demotion operation to the destination
4486 /* At each level of recursion we have half of the operands we had at the
4488 vec_oprnds
->truncate ((i
+1)/2);
4489 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
4490 stmt_info
, vec_dsts
, gsi
,
4491 slp_node
, VEC_PACK_TRUNC_EXPR
,
4495 vec_dsts
.quick_push (vec_dest
);
4499 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4500 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4501 STMT_INFO. For multi-step conversions store the resulting vectors and
4502 call the function recursively. */
4505 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
4506 vec
<tree
> *vec_oprnds1
,
4507 stmt_vec_info stmt_info
, tree vec_dest
,
4508 gimple_stmt_iterator
*gsi
,
4509 enum tree_code code1
,
4510 enum tree_code code2
, tree decl1
,
4511 tree decl2
, int op_type
)
4514 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4515 gimple
*new_stmt1
, *new_stmt2
;
4516 vec
<tree
> vec_tmp
= vNULL
;
4518 vec_tmp
.create (vec_oprnds0
->length () * 2);
4519 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4521 if (op_type
== binary_op
)
4522 vop1
= (*vec_oprnds1
)[i
];
4526 /* Generate the two halves of promotion operation. */
4527 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
4528 op_type
, vec_dest
, gsi
,
4530 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
4531 op_type
, vec_dest
, gsi
,
4533 if (is_gimple_call (new_stmt1
))
4535 new_tmp1
= gimple_call_lhs (new_stmt1
);
4536 new_tmp2
= gimple_call_lhs (new_stmt2
);
4540 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4541 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4544 /* Store the results for the next step. */
4545 vec_tmp
.quick_push (new_tmp1
);
4546 vec_tmp
.quick_push (new_tmp2
);
4549 vec_oprnds0
->release ();
4550 *vec_oprnds0
= vec_tmp
;
4554 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4555 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4556 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4557 Return true if STMT_INFO is vectorizable in this way. */
4560 vectorizable_conversion (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4561 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
4562 stmt_vector_for_cost
*cost_vec
)
4566 tree op0
, op1
= NULL_TREE
;
4567 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
4568 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4569 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4570 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4571 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
4573 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4575 stmt_vec_info prev_stmt_info
;
4576 poly_uint64 nunits_in
;
4577 poly_uint64 nunits_out
;
4578 tree vectype_out
, vectype_in
;
4580 tree lhs_type
, rhs_type
;
4581 enum { NARROW
, NONE
, WIDEN
} modifier
;
4582 vec
<tree
> vec_oprnds0
= vNULL
;
4583 vec
<tree
> vec_oprnds1
= vNULL
;
4585 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4586 vec_info
*vinfo
= stmt_info
->vinfo
;
4587 int multi_step_cvt
= 0;
4588 vec
<tree
> interm_types
= vNULL
;
4589 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
4591 unsigned short fltsz
;
4593 /* Is STMT a vectorizable conversion? */
4595 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4598 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4602 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4606 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4609 code
= gimple_assign_rhs_code (stmt
);
4610 if (!CONVERT_EXPR_CODE_P (code
)
4611 && code
!= FIX_TRUNC_EXPR
4612 && code
!= FLOAT_EXPR
4613 && code
!= WIDEN_MULT_EXPR
4614 && code
!= WIDEN_LSHIFT_EXPR
)
4617 op_type
= TREE_CODE_LENGTH (code
);
4619 /* Check types of lhs and rhs. */
4620 scalar_dest
= gimple_assign_lhs (stmt
);
4621 lhs_type
= TREE_TYPE (scalar_dest
);
4622 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4624 op0
= gimple_assign_rhs1 (stmt
);
4625 rhs_type
= TREE_TYPE (op0
);
4627 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4628 && !((INTEGRAL_TYPE_P (lhs_type
)
4629 && INTEGRAL_TYPE_P (rhs_type
))
4630 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4631 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4634 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4635 && ((INTEGRAL_TYPE_P (lhs_type
)
4636 && !type_has_mode_precision_p (lhs_type
))
4637 || (INTEGRAL_TYPE_P (rhs_type
)
4638 && !type_has_mode_precision_p (rhs_type
))))
4640 if (dump_enabled_p ())
4641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4642 "type conversion to/from bit-precision unsupported."
4647 /* Check the operands of the operation. */
4648 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype_in
))
4650 if (dump_enabled_p ())
4651 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4652 "use not simple.\n");
4655 if (op_type
== binary_op
)
4659 op1
= gimple_assign_rhs2 (stmt
);
4660 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4661 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4663 if (CONSTANT_CLASS_P (op0
))
4664 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1], &vectype_in
);
4666 ok
= vect_is_simple_use (op1
, vinfo
, &dt
[1]);
4670 if (dump_enabled_p ())
4671 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4672 "use not simple.\n");
4677 /* If op0 is an external or constant defs use a vector type of
4678 the same size as the output vector type. */
4680 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
4682 gcc_assert (vectype_in
);
4685 if (dump_enabled_p ())
4686 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4687 "no vectype for scalar type %T\n", rhs_type
);
4692 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4693 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4697 "can't convert between boolean and non "
4698 "boolean vectors %T\n", rhs_type
);
4703 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4704 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4705 if (known_eq (nunits_out
, nunits_in
))
4707 else if (multiple_p (nunits_out
, nunits_in
))
4711 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4715 /* Multiple types in SLP are handled by creating the appropriate number of
4716 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4720 else if (modifier
== NARROW
)
4721 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4723 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4725 /* Sanity check: make sure that at least one copy of the vectorized stmt
4726 needs to be generated. */
4727 gcc_assert (ncopies
>= 1);
4729 bool found_mode
= false;
4730 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4731 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4732 opt_scalar_mode rhs_mode_iter
;
4734 /* Supportable by target? */
4738 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4740 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
4745 if (dump_enabled_p ())
4746 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4747 "conversion not supported by target.\n");
4751 if (supportable_widening_operation (code
, stmt_info
, vectype_out
,
4752 vectype_in
, &code1
, &code2
,
4753 &multi_step_cvt
, &interm_types
))
4755 /* Binary widening operation can only be supported directly by the
4757 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4761 if (code
!= FLOAT_EXPR
4762 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4765 fltsz
= GET_MODE_SIZE (lhs_mode
);
4766 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4768 rhs_mode
= rhs_mode_iter
.require ();
4769 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4773 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4774 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4775 if (cvt_type
== NULL_TREE
)
4778 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4780 if (!supportable_convert_operation (code
, vectype_out
,
4781 cvt_type
, &decl1
, &codecvt1
))
4784 else if (!supportable_widening_operation (code
, stmt_info
,
4785 vectype_out
, cvt_type
,
4786 &codecvt1
, &codecvt2
,
4791 gcc_assert (multi_step_cvt
== 0);
4793 if (supportable_widening_operation (NOP_EXPR
, stmt_info
, cvt_type
,
4794 vectype_in
, &code1
, &code2
,
4795 &multi_step_cvt
, &interm_types
))
4805 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4806 codecvt2
= ERROR_MARK
;
4810 interm_types
.safe_push (cvt_type
);
4811 cvt_type
= NULL_TREE
;
4816 gcc_assert (op_type
== unary_op
);
4817 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4818 &code1
, &multi_step_cvt
,
4822 if (code
!= FIX_TRUNC_EXPR
4823 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4827 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4828 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4829 if (cvt_type
== NULL_TREE
)
4831 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4834 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4835 &code1
, &multi_step_cvt
,
4844 if (!vec_stmt
) /* transformation not required. */
4846 DUMP_VECT_SCOPE ("vectorizable_conversion");
4847 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
4849 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4850 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4853 else if (modifier
== NARROW
)
4855 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4856 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4861 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4862 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
,
4865 interm_types
.release ();
4870 if (dump_enabled_p ())
4871 dump_printf_loc (MSG_NOTE
, vect_location
,
4872 "transform conversion. ncopies = %d.\n", ncopies
);
4874 if (op_type
== binary_op
)
4876 if (CONSTANT_CLASS_P (op0
))
4877 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4878 else if (CONSTANT_CLASS_P (op1
))
4879 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4882 /* In case of multi-step conversion, we first generate conversion operations
4883 to the intermediate types, and then from that types to the final one.
4884 We create vector destinations for the intermediate type (TYPES) received
4885 from supportable_*_operation, and store them in the correct order
4886 for future use in vect_create_vectorized_*_stmts (). */
4887 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4888 vec_dest
= vect_create_destination_var (scalar_dest
,
4889 (cvt_type
&& modifier
== WIDEN
)
4890 ? cvt_type
: vectype_out
);
4891 vec_dsts
.quick_push (vec_dest
);
4895 for (i
= interm_types
.length () - 1;
4896 interm_types
.iterate (i
, &intermediate_type
); i
--)
4898 vec_dest
= vect_create_destination_var (scalar_dest
,
4900 vec_dsts
.quick_push (vec_dest
);
4905 vec_dest
= vect_create_destination_var (scalar_dest
,
4907 ? vectype_out
: cvt_type
);
4911 if (modifier
== WIDEN
)
4913 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
4914 if (op_type
== binary_op
)
4915 vec_oprnds1
.create (1);
4917 else if (modifier
== NARROW
)
4918 vec_oprnds0
.create (
4919 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
4921 else if (code
== WIDEN_LSHIFT_EXPR
)
4922 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
4925 prev_stmt_info
= NULL
;
4929 for (j
= 0; j
< ncopies
; j
++)
4932 vect_get_vec_defs (op0
, NULL
, stmt_info
, &vec_oprnds0
,
4935 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, NULL
);
4937 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4939 stmt_vec_info new_stmt_info
;
4940 /* Arguments are ready, create the new vector stmt. */
4941 if (code1
== CALL_EXPR
)
4943 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
4944 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4945 gimple_call_set_lhs (new_stmt
, new_temp
);
4947 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4951 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4953 = gimple_build_assign (vec_dest
, code1
, vop0
);
4954 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4955 gimple_assign_set_lhs (new_stmt
, new_temp
);
4957 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
4961 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
4964 if (!prev_stmt_info
)
4965 STMT_VINFO_VEC_STMT (stmt_info
)
4966 = *vec_stmt
= new_stmt_info
;
4968 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
4969 prev_stmt_info
= new_stmt_info
;
4976 /* In case the vectorization factor (VF) is bigger than the number
4977 of elements that we can fit in a vectype (nunits), we have to
4978 generate more than one vector stmt - i.e - we need to "unroll"
4979 the vector stmt by a factor VF/nunits. */
4980 for (j
= 0; j
< ncopies
; j
++)
4987 if (code
== WIDEN_LSHIFT_EXPR
)
4992 /* Store vec_oprnd1 for every vector stmt to be created
4993 for SLP_NODE. We check during the analysis that all
4994 the shift arguments are the same. */
4995 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
4996 vec_oprnds1
.quick_push (vec_oprnd1
);
4998 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
,
4999 &vec_oprnds0
, NULL
, slp_node
);
5002 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
5003 &vec_oprnds1
, slp_node
);
5007 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt_info
);
5008 vec_oprnds0
.quick_push (vec_oprnd0
);
5009 if (op_type
== binary_op
)
5011 if (code
== WIDEN_LSHIFT_EXPR
)
5015 = vect_get_vec_def_for_operand (op1
, stmt_info
);
5016 vec_oprnds1
.quick_push (vec_oprnd1
);
5022 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
5023 vec_oprnds0
.truncate (0);
5024 vec_oprnds0
.quick_push (vec_oprnd0
);
5025 if (op_type
== binary_op
)
5027 if (code
== WIDEN_LSHIFT_EXPR
)
5030 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (vinfo
,
5032 vec_oprnds1
.truncate (0);
5033 vec_oprnds1
.quick_push (vec_oprnd1
);
5037 /* Arguments are ready. Create the new vector stmts. */
5038 for (i
= multi_step_cvt
; i
>= 0; i
--)
5040 tree this_dest
= vec_dsts
[i
];
5041 enum tree_code c1
= code1
, c2
= code2
;
5042 if (i
== 0 && codecvt2
!= ERROR_MARK
)
5047 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
5048 &vec_oprnds1
, stmt_info
,
5050 c1
, c2
, decl1
, decl2
,
5054 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5056 stmt_vec_info new_stmt_info
;
5059 if (codecvt1
== CALL_EXPR
)
5061 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5062 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5063 gimple_call_set_lhs (new_stmt
, new_temp
);
5065 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5070 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5071 new_temp
= make_ssa_name (vec_dest
);
5073 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5075 = vect_finish_stmt_generation (stmt_info
, new_stmt
,
5080 new_stmt_info
= vinfo
->lookup_def (vop0
);
5083 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5086 if (!prev_stmt_info
)
5087 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt_info
;
5089 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5090 prev_stmt_info
= new_stmt_info
;
5095 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5099 /* In case the vectorization factor (VF) is bigger than the number
5100 of elements that we can fit in a vectype (nunits), we have to
5101 generate more than one vector stmt - i.e - we need to "unroll"
5102 the vector stmt by a factor VF/nunits. */
5103 for (j
= 0; j
< ncopies
; j
++)
5107 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5111 vec_oprnds0
.truncate (0);
5112 vect_get_loop_based_defs (&last_oprnd
, stmt_info
, &vec_oprnds0
,
5113 vect_pow2 (multi_step_cvt
) - 1);
5116 /* Arguments are ready. Create the new vector stmts. */
5118 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5120 if (codecvt1
== CALL_EXPR
)
5122 gcall
*new_stmt
= gimple_build_call (decl1
, 1, vop0
);
5123 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5124 gimple_call_set_lhs (new_stmt
, new_temp
);
5125 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5129 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
5130 new_temp
= make_ssa_name (vec_dest
);
5132 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
5133 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5136 vec_oprnds0
[i
] = new_temp
;
5139 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
5140 stmt_info
, vec_dsts
, gsi
,
5145 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5149 vec_oprnds0
.release ();
5150 vec_oprnds1
.release ();
5151 interm_types
.release ();
5157 /* Function vectorizable_assignment.
5159 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
5160 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5161 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5162 Return true if STMT_INFO is vectorizable in this way. */
5165 vectorizable_assignment (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5166 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5167 stmt_vector_for_cost
*cost_vec
)
5172 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5174 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5178 vec
<tree
> vec_oprnds
= vNULL
;
5180 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5181 vec_info
*vinfo
= stmt_info
->vinfo
;
5182 stmt_vec_info prev_stmt_info
= NULL
;
5183 enum tree_code code
;
5186 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5189 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5193 /* Is vectorizable assignment? */
5194 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5198 scalar_dest
= gimple_assign_lhs (stmt
);
5199 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5202 code
= gimple_assign_rhs_code (stmt
);
5203 if (gimple_assign_single_p (stmt
)
5204 || code
== PAREN_EXPR
5205 || CONVERT_EXPR_CODE_P (code
))
5206 op
= gimple_assign_rhs1 (stmt
);
5210 if (code
== VIEW_CONVERT_EXPR
)
5211 op
= TREE_OPERAND (op
, 0);
5213 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5214 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5216 /* Multiple types in SLP are handled by creating the appropriate number of
5217 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5222 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5224 gcc_assert (ncopies
>= 1);
5226 if (!vect_is_simple_use (op
, vinfo
, &dt
[0], &vectype_in
))
5228 if (dump_enabled_p ())
5229 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5230 "use not simple.\n");
5234 /* We can handle NOP_EXPR conversions that do not change the number
5235 of elements or the vector size. */
5236 if ((CONVERT_EXPR_CODE_P (code
)
5237 || code
== VIEW_CONVERT_EXPR
)
5239 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5240 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5241 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5244 /* We do not handle bit-precision changes. */
5245 if ((CONVERT_EXPR_CODE_P (code
)
5246 || code
== VIEW_CONVERT_EXPR
)
5247 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5248 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5249 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5250 /* But a conversion that does not change the bit-pattern is ok. */
5251 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5252 > TYPE_PRECISION (TREE_TYPE (op
)))
5253 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5254 /* Conversion between boolean types of different sizes is
5255 a simple assignment in case their vectypes are same
5257 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5258 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5260 if (dump_enabled_p ())
5261 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5262 "type conversion to/from bit-precision "
5267 if (!vec_stmt
) /* transformation not required. */
5269 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5270 DUMP_VECT_SCOPE ("vectorizable_assignment");
5271 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5276 if (dump_enabled_p ())
5277 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5280 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5283 for (j
= 0; j
< ncopies
; j
++)
5287 vect_get_vec_defs (op
, NULL
, stmt_info
, &vec_oprnds
, NULL
, slp_node
);
5289 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds
, NULL
);
5291 /* Arguments are ready. create the new vector stmt. */
5292 stmt_vec_info new_stmt_info
= NULL
;
5293 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5295 if (CONVERT_EXPR_CODE_P (code
)
5296 || code
== VIEW_CONVERT_EXPR
)
5297 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5298 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5299 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5300 gimple_assign_set_lhs (new_stmt
, new_temp
);
5302 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5304 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5311 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5313 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5315 prev_stmt_info
= new_stmt_info
;
5318 vec_oprnds
.release ();
5323 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5324 either as shift by a scalar or by a vector. */
5327 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
5330 machine_mode vec_mode
;
5335 vectype
= get_vectype_for_scalar_type (scalar_type
);
5339 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5341 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5343 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5345 || (optab_handler (optab
, TYPE_MODE (vectype
))
5346 == CODE_FOR_nothing
))
5350 vec_mode
= TYPE_MODE (vectype
);
5351 icode
= (int) optab_handler (optab
, vec_mode
);
5352 if (icode
== CODE_FOR_nothing
)
5359 /* Function vectorizable_shift.
5361 Check if STMT_INFO performs a shift operation that can be vectorized.
5362 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5363 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5364 Return true if STMT_INFO is vectorizable in this way. */
5367 vectorizable_shift (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5368 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5369 stmt_vector_for_cost
*cost_vec
)
5373 tree op0
, op1
= NULL
;
5374 tree vec_oprnd1
= NULL_TREE
;
5376 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5377 enum tree_code code
;
5378 machine_mode vec_mode
;
5382 machine_mode optab_op2_mode
;
5383 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5385 stmt_vec_info prev_stmt_info
;
5386 poly_uint64 nunits_in
;
5387 poly_uint64 nunits_out
;
5392 vec
<tree
> vec_oprnds0
= vNULL
;
5393 vec
<tree
> vec_oprnds1
= vNULL
;
5396 bool scalar_shift_arg
= true;
5397 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5398 vec_info
*vinfo
= stmt_info
->vinfo
;
5400 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5403 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5407 /* Is STMT a vectorizable binary/unary operation? */
5408 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5412 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5415 code
= gimple_assign_rhs_code (stmt
);
5417 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5418 || code
== RROTATE_EXPR
))
5421 scalar_dest
= gimple_assign_lhs (stmt
);
5422 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5423 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5425 if (dump_enabled_p ())
5426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5427 "bit-precision shifts not supported.\n");
5431 op0
= gimple_assign_rhs1 (stmt
);
5432 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5434 if (dump_enabled_p ())
5435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5436 "use not simple.\n");
5439 /* If op0 is an external or constant def use a vector type with
5440 the same size as the output vector type. */
5442 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5444 gcc_assert (vectype
);
5447 if (dump_enabled_p ())
5448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5449 "no vectype for scalar type\n");
5453 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5454 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5455 if (maybe_ne (nunits_out
, nunits_in
))
5458 op1
= gimple_assign_rhs2 (stmt
);
5459 stmt_vec_info op1_def_stmt_info
;
5460 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1], &op1_vectype
,
5461 &op1_def_stmt_info
))
5463 if (dump_enabled_p ())
5464 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5465 "use not simple.\n");
5469 /* Multiple types in SLP are handled by creating the appropriate number of
5470 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5475 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5477 gcc_assert (ncopies
>= 1);
5479 /* Determine whether the shift amount is a vector, or scalar. If the
5480 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5482 if ((dt
[1] == vect_internal_def
5483 || dt
[1] == vect_induction_def
)
5485 scalar_shift_arg
= false;
5486 else if (dt
[1] == vect_constant_def
5487 || dt
[1] == vect_external_def
5488 || dt
[1] == vect_internal_def
)
5490 /* In SLP, need to check whether the shift count is the same,
5491 in loops if it is a constant or invariant, it is always
5495 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5496 stmt_vec_info slpstmt_info
;
5498 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5500 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5501 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5502 scalar_shift_arg
= false;
5506 /* If the shift amount is computed by a pattern stmt we cannot
5507 use the scalar amount directly thus give up and use a vector
5509 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5510 scalar_shift_arg
= false;
5514 if (dump_enabled_p ())
5515 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5516 "operand mode requires invariant argument.\n");
5520 /* Vector shifted by vector. */
5521 if (!scalar_shift_arg
)
5523 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5524 if (dump_enabled_p ())
5525 dump_printf_loc (MSG_NOTE
, vect_location
,
5526 "vector/vector shift/rotate found.\n");
5529 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
5530 if (op1_vectype
== NULL_TREE
5531 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
5533 if (dump_enabled_p ())
5534 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5535 "unusable type for last operand in"
5536 " vector/vector shift/rotate.\n");
5540 /* See if the machine has a vector shifted by scalar insn and if not
5541 then see if it has a vector shifted by vector insn. */
5544 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5546 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5548 if (dump_enabled_p ())
5549 dump_printf_loc (MSG_NOTE
, vect_location
,
5550 "vector/scalar shift/rotate found.\n");
5554 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5556 && (optab_handler (optab
, TYPE_MODE (vectype
))
5557 != CODE_FOR_nothing
))
5559 scalar_shift_arg
= false;
5561 if (dump_enabled_p ())
5562 dump_printf_loc (MSG_NOTE
, vect_location
,
5563 "vector/vector shift/rotate found.\n");
5565 /* Unlike the other binary operators, shifts/rotates have
5566 the rhs being int, instead of the same type as the lhs,
5567 so make sure the scalar is the right type if we are
5568 dealing with vectors of long long/long/short/char. */
5569 if (dt
[1] == vect_constant_def
)
5570 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5571 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
5575 && TYPE_MODE (TREE_TYPE (vectype
))
5576 != TYPE_MODE (TREE_TYPE (op1
)))
5578 if (dump_enabled_p ())
5579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5580 "unusable type for last operand in"
5581 " vector/vector shift/rotate.\n");
5584 if (vec_stmt
&& !slp_node
)
5586 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5587 op1
= vect_init_vector (stmt_info
, op1
,
5588 TREE_TYPE (vectype
), NULL
);
5595 /* Supportable by target? */
5598 if (dump_enabled_p ())
5599 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5603 vec_mode
= TYPE_MODE (vectype
);
5604 icode
= (int) optab_handler (optab
, vec_mode
);
5605 if (icode
== CODE_FOR_nothing
)
5607 if (dump_enabled_p ())
5608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5609 "op not supported by target.\n");
5610 /* Check only during analysis. */
5611 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5613 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5615 if (dump_enabled_p ())
5616 dump_printf_loc (MSG_NOTE
, vect_location
,
5617 "proceeding using word mode.\n");
5620 /* Worthwhile without SIMD support? Check only during analysis. */
5622 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5623 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5625 if (dump_enabled_p ())
5626 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5627 "not worthwhile without SIMD support.\n");
5631 if (!vec_stmt
) /* transformation not required. */
5633 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5634 DUMP_VECT_SCOPE ("vectorizable_shift");
5635 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5641 if (dump_enabled_p ())
5642 dump_printf_loc (MSG_NOTE
, vect_location
,
5643 "transform binary/unary operation.\n");
5646 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5648 prev_stmt_info
= NULL
;
5649 for (j
= 0; j
< ncopies
; j
++)
5654 if (scalar_shift_arg
)
5656 /* Vector shl and shr insn patterns can be defined with scalar
5657 operand 2 (shift operand). In this case, use constant or loop
5658 invariant op1 directly, without extending it to vector mode
5660 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5661 if (!VECTOR_MODE_P (optab_op2_mode
))
5663 if (dump_enabled_p ())
5664 dump_printf_loc (MSG_NOTE
, vect_location
,
5665 "operand 1 using scalar mode.\n");
5667 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
5668 vec_oprnds1
.quick_push (vec_oprnd1
);
5671 /* Store vec_oprnd1 for every vector stmt to be created
5672 for SLP_NODE. We check during the analysis that all
5673 the shift arguments are the same.
5674 TODO: Allow different constants for different vector
5675 stmts generated for an SLP instance. */
5676 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
5677 vec_oprnds1
.quick_push (vec_oprnd1
);
5682 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5683 (a special case for certain kind of vector shifts); otherwise,
5684 operand 1 should be of a vector type (the usual case). */
5686 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
5689 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
5693 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
5695 /* Arguments are ready. Create the new vector stmt. */
5696 stmt_vec_info new_stmt_info
= NULL
;
5697 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5699 vop1
= vec_oprnds1
[i
];
5700 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5701 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5702 gimple_assign_set_lhs (new_stmt
, new_temp
);
5704 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
5706 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
5713 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
5715 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
5716 prev_stmt_info
= new_stmt_info
;
5719 vec_oprnds0
.release ();
5720 vec_oprnds1
.release ();
5726 /* Function vectorizable_operation.
5728 Check if STMT_INFO performs a binary, unary or ternary operation that can
5730 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5731 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5732 Return true if STMT_INFO is vectorizable in this way. */
5735 vectorizable_operation (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5736 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
5737 stmt_vector_for_cost
*cost_vec
)
5741 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5743 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5744 enum tree_code code
, orig_code
;
5745 machine_mode vec_mode
;
5749 bool target_support_p
;
5750 enum vect_def_type dt
[3]
5751 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5753 stmt_vec_info prev_stmt_info
;
5754 poly_uint64 nunits_in
;
5755 poly_uint64 nunits_out
;
5759 vec
<tree
> vec_oprnds0
= vNULL
;
5760 vec
<tree
> vec_oprnds1
= vNULL
;
5761 vec
<tree
> vec_oprnds2
= vNULL
;
5762 tree vop0
, vop1
, vop2
;
5763 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5764 vec_info
*vinfo
= stmt_info
->vinfo
;
5766 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5769 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5773 /* Is STMT a vectorizable binary/unary operation? */
5774 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5778 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5781 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5783 /* For pointer addition and subtraction, we should use the normal
5784 plus and minus for the vector operation. */
5785 if (code
== POINTER_PLUS_EXPR
)
5787 if (code
== POINTER_DIFF_EXPR
)
5790 /* Support only unary or binary operations. */
5791 op_type
= TREE_CODE_LENGTH (code
);
5792 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5794 if (dump_enabled_p ())
5795 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5796 "num. args = %d (not unary/binary/ternary op).\n",
5801 scalar_dest
= gimple_assign_lhs (stmt
);
5802 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5804 /* Most operations cannot handle bit-precision types without extra
5806 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
5807 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5808 /* Exception are bitwise binary operations. */
5809 && code
!= BIT_IOR_EXPR
5810 && code
!= BIT_XOR_EXPR
5811 && code
!= BIT_AND_EXPR
)
5813 if (dump_enabled_p ())
5814 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5815 "bit-precision arithmetic not supported.\n");
5819 op0
= gimple_assign_rhs1 (stmt
);
5820 if (!vect_is_simple_use (op0
, vinfo
, &dt
[0], &vectype
))
5822 if (dump_enabled_p ())
5823 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5824 "use not simple.\n");
5827 /* If op0 is an external or constant def use a vector type with
5828 the same size as the output vector type. */
5831 /* For boolean type we cannot determine vectype by
5832 invariant value (don't know whether it is a vector
5833 of booleans or vector of integers). We use output
5834 vectype because operations on boolean don't change
5836 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5838 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5840 if (dump_enabled_p ())
5841 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5842 "not supported operation on bool value.\n");
5845 vectype
= vectype_out
;
5848 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
5851 gcc_assert (vectype
);
5854 if (dump_enabled_p ())
5855 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5856 "no vectype for scalar type %T\n",
5862 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5863 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5864 if (maybe_ne (nunits_out
, nunits_in
))
5867 if (op_type
== binary_op
|| op_type
== ternary_op
)
5869 op1
= gimple_assign_rhs2 (stmt
);
5870 if (!vect_is_simple_use (op1
, vinfo
, &dt
[1]))
5872 if (dump_enabled_p ())
5873 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5874 "use not simple.\n");
5878 if (op_type
== ternary_op
)
5880 op2
= gimple_assign_rhs3 (stmt
);
5881 if (!vect_is_simple_use (op2
, vinfo
, &dt
[2]))
5883 if (dump_enabled_p ())
5884 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5885 "use not simple.\n");
5890 /* Multiple types in SLP are handled by creating the appropriate number of
5891 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5896 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5898 gcc_assert (ncopies
>= 1);
5900 /* Shifts are handled in vectorizable_shift (). */
5901 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5902 || code
== RROTATE_EXPR
)
5905 /* Supportable by target? */
5907 vec_mode
= TYPE_MODE (vectype
);
5908 if (code
== MULT_HIGHPART_EXPR
)
5909 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5912 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5915 if (dump_enabled_p ())
5916 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5920 target_support_p
= (optab_handler (optab
, vec_mode
)
5921 != CODE_FOR_nothing
);
5924 if (!target_support_p
)
5926 if (dump_enabled_p ())
5927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5928 "op not supported by target.\n");
5929 /* Check only during analysis. */
5930 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5931 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5933 if (dump_enabled_p ())
5934 dump_printf_loc (MSG_NOTE
, vect_location
,
5935 "proceeding using word mode.\n");
5938 /* Worthwhile without SIMD support? Check only during analysis. */
5939 if (!VECTOR_MODE_P (vec_mode
)
5941 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5943 if (dump_enabled_p ())
5944 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5945 "not worthwhile without SIMD support.\n");
5949 if (!vec_stmt
) /* transformation not required. */
5951 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5952 DUMP_VECT_SCOPE ("vectorizable_operation");
5953 vect_model_simple_cost (stmt_info
, ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5959 if (dump_enabled_p ())
5960 dump_printf_loc (MSG_NOTE
, vect_location
,
5961 "transform binary/unary operation.\n");
5963 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5964 vectors with unsigned elements, but the result is signed. So, we
5965 need to compute the MINUS_EXPR into vectype temporary and
5966 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5967 tree vec_cvt_dest
= NULL_TREE
;
5968 if (orig_code
== POINTER_DIFF_EXPR
)
5970 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5971 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5975 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5977 /* In case the vectorization factor (VF) is bigger than the number
5978 of elements that we can fit in a vectype (nunits), we have to generate
5979 more than one vector stmt - i.e - we need to "unroll" the
5980 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5981 from one copy of the vector stmt to the next, in the field
5982 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5983 stages to find the correct vector defs to be used when vectorizing
5984 stmts that use the defs of the current stmt. The example below
5985 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5986 we need to create 4 vectorized stmts):
5988 before vectorization:
5989 RELATED_STMT VEC_STMT
5993 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5995 RELATED_STMT VEC_STMT
5996 VS1_0: vx0 = memref0 VS1_1 -
5997 VS1_1: vx1 = memref1 VS1_2 -
5998 VS1_2: vx2 = memref2 VS1_3 -
5999 VS1_3: vx3 = memref3 - -
6000 S1: x = load - VS1_0
6003 step2: vectorize stmt S2 (done here):
6004 To vectorize stmt S2 we first need to find the relevant vector
6005 def for the first operand 'x'. This is, as usual, obtained from
6006 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
6007 that defines 'x' (S1). This way we find the stmt VS1_0, and the
6008 relevant vector def 'vx0'. Having found 'vx0' we can generate
6009 the vector stmt VS2_0, and as usual, record it in the
6010 STMT_VINFO_VEC_STMT of stmt S2.
6011 When creating the second copy (VS2_1), we obtain the relevant vector
6012 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
6013 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
6014 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
6015 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
6016 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
6017 chain of stmts and pointers:
6018 RELATED_STMT VEC_STMT
6019 VS1_0: vx0 = memref0 VS1_1 -
6020 VS1_1: vx1 = memref1 VS1_2 -
6021 VS1_2: vx2 = memref2 VS1_3 -
6022 VS1_3: vx3 = memref3 - -
6023 S1: x = load - VS1_0
6024 VS2_0: vz0 = vx0 + v1 VS2_1 -
6025 VS2_1: vz1 = vx1 + v1 VS2_2 -
6026 VS2_2: vz2 = vx2 + v1 VS2_3 -
6027 VS2_3: vz3 = vx3 + v1 - -
6028 S2: z = x + 1 - VS2_0 */
6030 prev_stmt_info
= NULL
;
6031 for (j
= 0; j
< ncopies
; j
++)
6036 if (op_type
== binary_op
)
6037 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
, &vec_oprnds1
,
6039 else if (op_type
== ternary_op
)
6043 auto_vec
<tree
> ops(3);
6044 ops
.quick_push (op0
);
6045 ops
.quick_push (op1
);
6046 ops
.quick_push (op2
);
6047 auto_vec
<vec
<tree
> > vec_defs(3);
6048 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
6049 vec_oprnds0
= vec_defs
[0];
6050 vec_oprnds1
= vec_defs
[1];
6051 vec_oprnds2
= vec_defs
[2];
6055 vect_get_vec_defs (op0
, op1
, stmt_info
, &vec_oprnds0
,
6056 &vec_oprnds1
, NULL
);
6057 vect_get_vec_defs (op2
, NULL_TREE
, stmt_info
, &vec_oprnds2
,
6062 vect_get_vec_defs (op0
, NULL_TREE
, stmt_info
, &vec_oprnds0
, NULL
,
6067 vect_get_vec_defs_for_stmt_copy (vinfo
, &vec_oprnds0
, &vec_oprnds1
);
6068 if (op_type
== ternary_op
)
6070 tree vec_oprnd
= vec_oprnds2
.pop ();
6071 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (vinfo
,
6076 /* Arguments are ready. Create the new vector stmt. */
6077 stmt_vec_info new_stmt_info
= NULL
;
6078 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
6080 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
6081 ? vec_oprnds1
[i
] : NULL_TREE
);
6082 vop2
= ((op_type
== ternary_op
)
6083 ? vec_oprnds2
[i
] : NULL_TREE
);
6084 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
,
6086 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6087 gimple_assign_set_lhs (new_stmt
, new_temp
);
6089 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6092 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6094 = gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6096 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6097 gimple_assign_set_lhs (new_stmt
, new_temp
);
6099 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6102 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
6109 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6111 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6112 prev_stmt_info
= new_stmt_info
;
6115 vec_oprnds0
.release ();
6116 vec_oprnds1
.release ();
6117 vec_oprnds2
.release ();
6122 /* A helper function to ensure data reference DR_INFO's base alignment. */
6125 ensure_base_align (dr_vec_info
*dr_info
)
6127 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6130 if (dr_info
->base_misaligned
)
6132 tree base_decl
= dr_info
->base_decl
;
6134 unsigned int align_base_to
6135 = DR_TARGET_ALIGNMENT (dr_info
) * BITS_PER_UNIT
;
6137 if (decl_in_symtab_p (base_decl
))
6138 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6141 SET_DECL_ALIGN (base_decl
, align_base_to
);
6142 DECL_USER_ALIGN (base_decl
) = 1;
6144 dr_info
->base_misaligned
= false;
6149 /* Function get_group_alias_ptr_type.
6151 Return the alias type for the group starting at FIRST_STMT_INFO. */
6154 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6156 struct data_reference
*first_dr
, *next_dr
;
6158 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6159 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6160 while (next_stmt_info
)
6162 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6163 if (get_alias_set (DR_REF (first_dr
))
6164 != get_alias_set (DR_REF (next_dr
)))
6166 if (dump_enabled_p ())
6167 dump_printf_loc (MSG_NOTE
, vect_location
,
6168 "conflicting alias set types.\n");
6169 return ptr_type_node
;
6171 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6173 return reference_alias_ptr_type (DR_REF (first_dr
));
6177 /* Function vectorizable_store.
6179 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6180 that can be vectorized.
6181 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6182 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6183 Return true if STMT_INFO is vectorizable in this way. */
6186 vectorizable_store (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6187 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
6188 stmt_vector_for_cost
*cost_vec
)
6192 tree vec_oprnd
= NULL_TREE
;
6194 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6195 struct loop
*loop
= NULL
;
6196 machine_mode vec_mode
;
6198 enum dr_alignment_support alignment_support_scheme
;
6199 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6200 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6201 stmt_vec_info prev_stmt_info
= NULL
;
6202 tree dataref_ptr
= NULL_TREE
;
6203 tree dataref_offset
= NULL_TREE
;
6204 gimple
*ptr_incr
= NULL
;
6207 stmt_vec_info first_stmt_info
;
6209 unsigned int group_size
, i
;
6210 vec
<tree
> oprnds
= vNULL
;
6211 vec
<tree
> result_chain
= vNULL
;
6212 tree offset
= NULL_TREE
;
6213 vec
<tree
> vec_oprnds
= vNULL
;
6214 bool slp
= (slp_node
!= NULL
);
6215 unsigned int vec_num
;
6216 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
6217 vec_info
*vinfo
= stmt_info
->vinfo
;
6219 gather_scatter_info gs_info
;
6221 vec_load_store_type vls_type
;
6224 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6227 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6231 /* Is vectorizable store? */
6233 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6234 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6236 tree scalar_dest
= gimple_assign_lhs (assign
);
6237 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6238 && is_pattern_stmt_p (stmt_info
))
6239 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6240 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6241 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6242 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6243 && TREE_CODE (scalar_dest
) != COMPONENT_REF
6244 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
6245 && TREE_CODE (scalar_dest
) != REALPART_EXPR
6246 && TREE_CODE (scalar_dest
) != MEM_REF
)
6251 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
6252 if (!call
|| !gimple_call_internal_p (call
))
6255 internal_fn ifn
= gimple_call_internal_fn (call
);
6256 if (!internal_store_fn_p (ifn
))
6259 if (slp_node
!= NULL
)
6261 if (dump_enabled_p ())
6262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6263 "SLP of masked stores not supported.\n");
6267 int mask_index
= internal_fn_mask_index (ifn
);
6268 if (mask_index
>= 0)
6270 mask
= gimple_call_arg (call
, mask_index
);
6271 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
6277 op
= vect_get_store_rhs (stmt_info
);
6279 /* Cannot have hybrid store SLP -- that would mean storing to the
6280 same location twice. */
6281 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
6283 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
6284 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
6288 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6289 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
6294 /* Multiple types in SLP are handled by creating the appropriate number of
6295 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
6300 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
6302 gcc_assert (ncopies
>= 1);
6304 /* FORNOW. This restriction should be relaxed. */
6305 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
6307 if (dump_enabled_p ())
6308 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6309 "multiple types in nested loop.\n");
6313 if (!vect_check_store_rhs (stmt_info
, op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
6316 elem_type
= TREE_TYPE (vectype
);
6317 vec_mode
= TYPE_MODE (vectype
);
6319 if (!STMT_VINFO_DATA_REF (stmt_info
))
6322 vect_memory_access_type memory_access_type
;
6323 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, vls_type
, ncopies
,
6324 &memory_access_type
, &gs_info
))
6329 if (memory_access_type
== VMAT_CONTIGUOUS
)
6331 if (!VECTOR_MODE_P (vec_mode
)
6332 || !can_vec_mask_load_store_p (vec_mode
,
6333 TYPE_MODE (mask_vectype
), false))
6336 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
6337 && (memory_access_type
!= VMAT_GATHER_SCATTER
|| gs_info
.decl
))
6339 if (dump_enabled_p ())
6340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6341 "unsupported access type for masked store.\n");
6347 /* FORNOW. In some cases can vectorize even if data-type not supported
6348 (e.g. - array initialization with 0). */
6349 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
6353 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
6354 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6355 && memory_access_type
!= VMAT_GATHER_SCATTER
6356 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
6359 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
6360 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6361 group_size
= DR_GROUP_SIZE (first_stmt_info
);
6365 first_stmt_info
= stmt_info
;
6366 first_dr_info
= dr_info
;
6367 group_size
= vec_num
= 1;
6370 if (!vec_stmt
) /* transformation not required. */
6372 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
6375 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
6376 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
6377 memory_access_type
, &gs_info
);
6379 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
6380 vect_model_store_cost (stmt_info
, ncopies
, rhs_dt
, memory_access_type
,
6381 vls_type
, slp_node
, cost_vec
);
6384 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
6388 ensure_base_align (dr_info
);
6390 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
6392 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
6393 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
6394 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
6395 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
;
6396 edge pe
= loop_preheader_edge (loop
);
6399 enum { NARROW
, NONE
, WIDEN
} modifier
;
6400 poly_uint64 scatter_off_nunits
6401 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
6403 if (known_eq (nunits
, scatter_off_nunits
))
6405 else if (known_eq (nunits
* 2, scatter_off_nunits
))
6409 /* Currently gathers and scatters are only supported for
6410 fixed-length vectors. */
6411 unsigned int count
= scatter_off_nunits
.to_constant ();
6412 vec_perm_builder
sel (count
, count
, 1);
6413 for (i
= 0; i
< (unsigned int) count
; ++i
)
6414 sel
.quick_push (i
| (count
/ 2));
6416 vec_perm_indices
indices (sel
, 1, count
);
6417 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
6419 gcc_assert (perm_mask
!= NULL_TREE
);
6421 else if (known_eq (nunits
, scatter_off_nunits
* 2))
6425 /* Currently gathers and scatters are only supported for
6426 fixed-length vectors. */
6427 unsigned int count
= nunits
.to_constant ();
6428 vec_perm_builder
sel (count
, count
, 1);
6429 for (i
= 0; i
< (unsigned int) count
; ++i
)
6430 sel
.quick_push (i
| (count
/ 2));
6432 vec_perm_indices
indices (sel
, 2, count
);
6433 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
6434 gcc_assert (perm_mask
!= NULL_TREE
);
6440 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
6441 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6442 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6443 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6444 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
6445 scaletype
= TREE_VALUE (arglist
);
6447 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
6448 && TREE_CODE (rettype
) == VOID_TYPE
);
6450 ptr
= fold_convert (ptrtype
, gs_info
.base
);
6451 if (!is_gimple_min_invariant (ptr
))
6453 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
6454 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
6455 gcc_assert (!new_bb
);
6458 /* Currently we support only unconditional scatter stores,
6459 so mask should be all ones. */
6460 mask
= build_int_cst (masktype
, -1);
6461 mask
= vect_init_vector (stmt_info
, mask
, masktype
, NULL
);
6463 scale
= build_int_cst (scaletype
, gs_info
.scale
);
6465 prev_stmt_info
= NULL
;
6466 for (j
= 0; j
< ncopies
; ++j
)
6471 = vect_get_vec_def_for_operand (op
, stmt_info
);
6473 = vect_get_vec_def_for_operand (gs_info
.offset
, stmt_info
);
6475 else if (modifier
!= NONE
&& (j
& 1))
6477 if (modifier
== WIDEN
)
6480 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6481 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
, perm_mask
,
6484 else if (modifier
== NARROW
)
6486 src
= permute_vec_elements (vec_oprnd1
, vec_oprnd1
, perm_mask
,
6489 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6497 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd1
);
6499 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnd0
);
6502 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
6504 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
6505 TYPE_VECTOR_SUBPARTS (srctype
)));
6506 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
6507 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
6509 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
6510 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6514 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
6516 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
6517 TYPE_VECTOR_SUBPARTS (idxtype
)));
6518 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
6519 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
6521 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
6522 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6527 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask
, op
, src
, scale
);
6528 stmt_vec_info new_stmt_info
6529 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
6531 if (prev_stmt_info
== NULL
)
6532 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
6534 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
6535 prev_stmt_info
= new_stmt_info
;
6540 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
6541 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
6546 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
6548 /* We vectorize all the stmts of the interleaving group when we
6549 reach the last stmt in the group. */
6550 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
6551 < DR_GROUP_SIZE (first_stmt_info
)
6560 grouped_store
= false;
6561 /* VEC_NUM is the number of vect stmts to be created for this
6563 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6564 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
6565 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
6566 == first_stmt_info
);
6567 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
6568 op
= vect_get_store_rhs (first_stmt_info
);
6571 /* VEC_NUM is the number of vect stmts to be created for this
6573 vec_num
= group_size
;
6575 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
6578 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
6580 if (dump_enabled_p ())
6581 dump_printf_loc (MSG_NOTE
, vect_location
,
6582 "transform store. ncopies = %d\n", ncopies
);
6584 if (memory_access_type
== VMAT_ELEMENTWISE
6585 || memory_access_type
== VMAT_STRIDED_SLP
)
6587 gimple_stmt_iterator incr_gsi
;
6593 tree stride_base
, stride_step
, alias_off
;
6596 /* Checked by get_load_store_type. */
6597 unsigned int const_nunits
= nunits
.to_constant ();
6599 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
6600 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
6603 = fold_build_pointer_plus
6604 (DR_BASE_ADDRESS (first_dr_info
->dr
),
6605 size_binop (PLUS_EXPR
,
6606 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
6607 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
6608 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
6610 /* For a store with loop-invariant (but other than power-of-2)
6611 stride (i.e. not a grouped access) like so:
6613 for (i = 0; i < n; i += stride)
6616 we generate a new induction variable and new stores from
6617 the components of the (vectorized) rhs:
6619 for (j = 0; ; j += VF*stride)
6624 array[j + stride] = tmp2;
6628 unsigned nstores
= const_nunits
;
6630 tree ltype
= elem_type
;
6631 tree lvectype
= vectype
;
6634 if (group_size
< const_nunits
6635 && const_nunits
% group_size
== 0)
6637 nstores
= const_nunits
/ group_size
;
6639 ltype
= build_vector_type (elem_type
, group_size
);
6642 /* First check if vec_extract optab doesn't support extraction
6643 of vector elts directly. */
6644 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
6646 if (!mode_for_vector (elmode
, group_size
).exists (&vmode
)
6647 || !VECTOR_MODE_P (vmode
)
6648 || !targetm
.vector_mode_supported_p (vmode
)
6649 || (convert_optab_handler (vec_extract_optab
,
6650 TYPE_MODE (vectype
), vmode
)
6651 == CODE_FOR_nothing
))
6653 /* Try to avoid emitting an extract of vector elements
6654 by performing the extracts using an integer type of the
6655 same size, extracting from a vector of those and then
6656 re-interpreting it as the original vector type if
6659 = group_size
* GET_MODE_BITSIZE (elmode
);
6660 elmode
= int_mode_for_size (lsize
, 0).require ();
6661 unsigned int lnunits
= const_nunits
/ group_size
;
6662 /* If we can't construct such a vector fall back to
6663 element extracts from the original vector type and
6664 element size stores. */
6665 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
6666 && VECTOR_MODE_P (vmode
)
6667 && targetm
.vector_mode_supported_p (vmode
)
6668 && (convert_optab_handler (vec_extract_optab
,
6670 != CODE_FOR_nothing
))
6674 ltype
= build_nonstandard_integer_type (lsize
, 1);
6675 lvectype
= build_vector_type (ltype
, nstores
);
6677 /* Else fall back to vector extraction anyway.
6678 Fewer stores are more important than avoiding spilling
6679 of the vector we extract from. Compared to the
6680 construction case in vectorizable_load no store-forwarding
6681 issue exists here for reasonable archs. */
6684 else if (group_size
>= const_nunits
6685 && group_size
% const_nunits
== 0)
6688 lnel
= const_nunits
;
6692 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
6693 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
6696 ivstep
= stride_step
;
6697 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
6698 build_int_cst (TREE_TYPE (ivstep
), vf
));
6700 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
6702 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
6703 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
6704 create_iv (stride_base
, ivstep
, NULL
,
6705 loop
, &incr_gsi
, insert_after
,
6707 incr
= gsi_stmt (incr_gsi
);
6708 loop_vinfo
->add_stmt (incr
);
6710 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
6712 prev_stmt_info
= NULL
;
6713 alias_off
= build_int_cst (ref_type
, 0);
6714 stmt_vec_info next_stmt_info
= first_stmt_info
;
6715 for (g
= 0; g
< group_size
; g
++)
6717 running_off
= offvar
;
6720 tree size
= TYPE_SIZE_UNIT (ltype
);
6721 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
6723 tree newoff
= copy_ssa_name (running_off
, NULL
);
6724 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6726 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6727 running_off
= newoff
;
6729 unsigned int group_el
= 0;
6730 unsigned HOST_WIDE_INT
6731 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
6732 for (j
= 0; j
< ncopies
; j
++)
6734 /* We've set op and dt above, from vect_get_store_rhs,
6735 and first_stmt_info == stmt_info. */
6740 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
,
6741 &vec_oprnds
, NULL
, slp_node
);
6742 vec_oprnd
= vec_oprnds
[0];
6746 op
= vect_get_store_rhs (next_stmt_info
);
6747 vec_oprnd
= vect_get_vec_def_for_operand
6748 (op
, next_stmt_info
);
6754 vec_oprnd
= vec_oprnds
[j
];
6756 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
,
6759 /* Pun the vector to extract from if necessary. */
6760 if (lvectype
!= vectype
)
6762 tree tem
= make_ssa_name (lvectype
);
6764 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
6765 lvectype
, vec_oprnd
));
6766 vect_finish_stmt_generation (stmt_info
, pun
, gsi
);
6769 for (i
= 0; i
< nstores
; i
++)
6771 tree newref
, newoff
;
6772 gimple
*incr
, *assign
;
6773 tree size
= TYPE_SIZE (ltype
);
6774 /* Extract the i'th component. */
6775 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
6776 bitsize_int (i
), size
);
6777 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
6780 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
6784 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
6786 newref
= build2 (MEM_REF
, ltype
,
6787 running_off
, this_off
);
6788 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
6790 /* And store it to *running_off. */
6791 assign
= gimple_build_assign (newref
, elem
);
6792 stmt_vec_info assign_info
6793 = vect_finish_stmt_generation (stmt_info
, assign
, gsi
);
6797 || group_el
== group_size
)
6799 newoff
= copy_ssa_name (running_off
, NULL
);
6800 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
6801 running_off
, stride_step
);
6802 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
6804 running_off
= newoff
;
6807 if (g
== group_size
- 1
6810 if (j
== 0 && i
== 0)
6811 STMT_VINFO_VEC_STMT (stmt_info
)
6812 = *vec_stmt
= assign_info
;
6814 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = assign_info
;
6815 prev_stmt_info
= assign_info
;
6819 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6824 vec_oprnds
.release ();
6828 auto_vec
<tree
> dr_chain (group_size
);
6829 oprnds
.create (group_size
);
6831 alignment_support_scheme
6832 = vect_supportable_dr_alignment (first_dr_info
, false);
6833 gcc_assert (alignment_support_scheme
);
6834 vec_loop_masks
*loop_masks
6835 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6836 ? &LOOP_VINFO_MASKS (loop_vinfo
)
6838 /* Targets with store-lane instructions must not require explicit
6839 realignment. vect_supportable_dr_alignment always returns either
6840 dr_aligned or dr_unaligned_supported for masked operations. */
6841 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
6844 || alignment_support_scheme
== dr_aligned
6845 || alignment_support_scheme
== dr_unaligned_supported
);
6847 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
6848 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
6849 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
6852 tree vec_offset
= NULL_TREE
;
6853 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6855 aggr_type
= NULL_TREE
;
6858 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
6860 aggr_type
= elem_type
;
6861 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
6862 &bump
, &vec_offset
);
6866 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
6867 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
6869 aggr_type
= vectype
;
6870 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
6871 memory_access_type
);
6875 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
6877 /* In case the vectorization factor (VF) is bigger than the number
6878 of elements that we can fit in a vectype (nunits), we have to generate
6879 more than one vector stmt - i.e - we need to "unroll" the
6880 vector stmt by a factor VF/nunits. For more details see documentation in
6881 vect_get_vec_def_for_copy_stmt. */
6883 /* In case of interleaving (non-unit grouped access):
6890 We create vectorized stores starting from base address (the access of the
6891 first stmt in the chain (S2 in the above example), when the last store stmt
6892 of the chain (S4) is reached:
6895 VS2: &base + vec_size*1 = vx0
6896 VS3: &base + vec_size*2 = vx1
6897 VS4: &base + vec_size*3 = vx3
6899 Then permutation statements are generated:
6901 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
6902 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
6905 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
6906 (the order of the data-refs in the output of vect_permute_store_chain
6907 corresponds to the order of scalar stmts in the interleaving chain - see
6908 the documentation of vect_permute_store_chain()).
6910 In case of both multiple types and interleaving, above vector stores and
6911 permutation stmts are created for every copy. The result vector stmts are
6912 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
6913 STMT_VINFO_RELATED_STMT for the next copies.
6916 prev_stmt_info
= NULL
;
6917 tree vec_mask
= NULL_TREE
;
6918 for (j
= 0; j
< ncopies
; j
++)
6920 stmt_vec_info new_stmt_info
;
6925 /* Get vectorized arguments for SLP_NODE. */
6926 vect_get_vec_defs (op
, NULL_TREE
, stmt_info
, &vec_oprnds
,
6929 vec_oprnd
= vec_oprnds
[0];
6933 /* For interleaved stores we collect vectorized defs for all the
6934 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
6935 used as an input to vect_permute_store_chain(), and OPRNDS as
6936 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
6938 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6939 OPRNDS are of size 1. */
6940 stmt_vec_info next_stmt_info
= first_stmt_info
;
6941 for (i
= 0; i
< group_size
; i
++)
6943 /* Since gaps are not supported for interleaved stores,
6944 DR_GROUP_SIZE is the exact number of stmts in the chain.
6945 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
6946 that there is no interleaving, DR_GROUP_SIZE is 1,
6947 and only one iteration of the loop will be executed. */
6948 op
= vect_get_store_rhs (next_stmt_info
);
6949 vec_oprnd
= vect_get_vec_def_for_operand
6950 (op
, next_stmt_info
);
6951 dr_chain
.quick_push (vec_oprnd
);
6952 oprnds
.quick_push (vec_oprnd
);
6953 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6956 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
6960 /* We should have catched mismatched types earlier. */
6961 gcc_assert (useless_type_conversion_p (vectype
,
6962 TREE_TYPE (vec_oprnd
)));
6963 bool simd_lane_access_p
6964 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
6965 if (simd_lane_access_p
6966 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
6967 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
6968 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
6969 && integer_zerop (DR_INIT (first_dr_info
->dr
))
6970 && alias_sets_conflict_p (get_alias_set (aggr_type
),
6971 get_alias_set (TREE_TYPE (ref_type
))))
6973 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
6974 dataref_offset
= build_int_cst (ref_type
, 0);
6976 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6977 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
6978 &dataref_ptr
, &vec_offset
);
6981 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
,
6982 simd_lane_access_p
? loop
: NULL
,
6983 offset
, &dummy
, gsi
, &ptr_incr
,
6984 simd_lane_access_p
, NULL_TREE
, bump
);
6988 /* For interleaved stores we created vectorized defs for all the
6989 defs stored in OPRNDS in the previous iteration (previous copy).
6990 DR_CHAIN is then used as an input to vect_permute_store_chain(),
6991 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
6993 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
6994 OPRNDS are of size 1. */
6995 for (i
= 0; i
< group_size
; i
++)
6998 vec_oprnd
= vect_get_vec_def_for_stmt_copy (vinfo
, op
);
6999 dr_chain
[i
] = vec_oprnd
;
7000 oprnds
[i
] = vec_oprnd
;
7003 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
7006 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7007 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7008 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
7010 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7014 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7018 /* Get an array into which we can store the individual vectors. */
7019 vec_array
= create_vector_array (vectype
, vec_num
);
7021 /* Invalidate the current contents of VEC_ARRAY. This should
7022 become an RTL clobber too, which prevents the vector registers
7023 from being upward-exposed. */
7024 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7026 /* Store the individual vectors into the array. */
7027 for (i
= 0; i
< vec_num
; i
++)
7029 vec_oprnd
= dr_chain
[i
];
7030 write_vector_array (stmt_info
, gsi
, vec_oprnd
, vec_array
, i
);
7033 tree final_mask
= NULL
;
7035 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7038 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7045 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7047 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7048 tree alias_ptr
= build_int_cst (ref_type
, align
);
7049 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7050 dataref_ptr
, alias_ptr
,
7051 final_mask
, vec_array
);
7056 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7057 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7058 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7060 gimple_call_set_lhs (call
, data_ref
);
7062 gimple_call_set_nothrow (call
, true);
7063 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7065 /* Record that VEC_ARRAY is now dead. */
7066 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
7070 new_stmt_info
= NULL
;
7074 result_chain
.create (group_size
);
7076 vect_permute_store_chain (dr_chain
, group_size
, stmt_info
, gsi
,
7080 stmt_vec_info next_stmt_info
= first_stmt_info
;
7081 for (i
= 0; i
< vec_num
; i
++)
7083 unsigned align
, misalign
;
7085 tree final_mask
= NULL_TREE
;
7087 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7089 vectype
, vec_num
* j
+ i
);
7091 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7094 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7096 tree scale
= size_int (gs_info
.scale
);
7099 call
= gimple_build_call_internal
7100 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7101 scale
, vec_oprnd
, final_mask
);
7103 call
= gimple_build_call_internal
7104 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7106 gimple_call_set_nothrow (call
, true);
7108 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7113 /* Bump the vector pointer. */
7114 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
7118 vec_oprnd
= vec_oprnds
[i
];
7119 else if (grouped_store
)
7120 /* For grouped stores vectorized defs are interleaved in
7121 vect_permute_store_chain(). */
7122 vec_oprnd
= result_chain
[i
];
7124 align
= DR_TARGET_ALIGNMENT (first_dr_info
);
7125 if (aligned_access_p (first_dr_info
))
7127 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7129 align
= dr_alignment (vect_dr_behavior (first_dr_info
));
7133 misalign
= DR_MISALIGNMENT (first_dr_info
);
7134 if (dataref_offset
== NULL_TREE
7135 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7136 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7139 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7141 tree perm_mask
= perm_mask_for_reverse (vectype
);
7142 tree perm_dest
= vect_create_destination_var
7143 (vect_get_store_rhs (stmt_info
), vectype
);
7144 tree new_temp
= make_ssa_name (perm_dest
);
7146 /* Generate the permute statement. */
7148 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7149 vec_oprnd
, perm_mask
);
7150 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7152 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7153 vec_oprnd
= new_temp
;
7156 /* Arguments are ready. Create the new vector stmt. */
7159 align
= least_bit_hwi (misalign
| align
);
7160 tree ptr
= build_int_cst (ref_type
, align
);
7162 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7164 final_mask
, vec_oprnd
);
7165 gimple_call_set_nothrow (call
, true);
7167 = vect_finish_stmt_generation (stmt_info
, call
, gsi
);
7171 data_ref
= fold_build2 (MEM_REF
, vectype
,
7175 : build_int_cst (ref_type
, 0));
7176 if (aligned_access_p (first_dr_info
))
7178 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7179 TREE_TYPE (data_ref
)
7180 = build_aligned_type (TREE_TYPE (data_ref
),
7181 align
* BITS_PER_UNIT
);
7183 TREE_TYPE (data_ref
)
7184 = build_aligned_type (TREE_TYPE (data_ref
),
7185 TYPE_ALIGN (elem_type
));
7186 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
7188 = gimple_build_assign (data_ref
, vec_oprnd
);
7190 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7196 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7197 if (!next_stmt_info
)
7204 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7206 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7207 prev_stmt_info
= new_stmt_info
;
7212 result_chain
.release ();
7213 vec_oprnds
.release ();
7218 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
7219 VECTOR_CST mask. No checks are made that the target platform supports the
7220 mask, so callers may wish to test can_vec_perm_const_p separately, or use
7221 vect_gen_perm_mask_checked. */
7224 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
7228 poly_uint64 nunits
= sel
.length ();
7229 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
7231 mask_type
= build_vector_type (ssizetype
, nunits
);
7232 return vec_perm_indices_to_tree (mask_type
, sel
);
7235 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
7236 i.e. that the target supports the pattern _for arbitrary input vectors_. */
7239 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
7241 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
7242 return vect_gen_perm_mask_any (vectype
, sel
);
7245 /* Given a vector variable X and Y, that was generated for the scalar
7246 STMT_INFO, generate instructions to permute the vector elements of X and Y
7247 using permutation mask MASK_VEC, insert them at *GSI and return the
7248 permuted vector variable. */
7251 permute_vec_elements (tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
7252 gimple_stmt_iterator
*gsi
)
7254 tree vectype
= TREE_TYPE (x
);
7255 tree perm_dest
, data_ref
;
7258 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
7259 if (TREE_CODE (scalar_dest
) == SSA_NAME
)
7260 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
7262 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
7263 data_ref
= make_ssa_name (perm_dest
);
7265 /* Generate the permute statement. */
7266 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
7267 vect_finish_stmt_generation (stmt_info
, perm_stmt
, gsi
);
7272 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
7273 inserting them on the loops preheader edge. Returns true if we
7274 were successful in doing so (and thus STMT_INFO can be moved then),
7275 otherwise returns false. */
7278 hoist_defs_of_uses (stmt_vec_info stmt_info
, struct loop
*loop
)
7284 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7286 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7287 if (!gimple_nop_p (def_stmt
)
7288 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7290 /* Make sure we don't need to recurse. While we could do
7291 so in simple cases when there are more complex use webs
7292 we don't have an easy way to preserve stmt order to fulfil
7293 dependencies within them. */
7296 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
7298 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
7300 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
7301 if (!gimple_nop_p (def_stmt2
)
7302 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
7312 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
7314 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
7315 if (!gimple_nop_p (def_stmt
)
7316 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
7318 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
7319 gsi_remove (&gsi
, false);
7320 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
7327 /* vectorizable_load.
7329 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
7330 that can be vectorized.
7331 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
7332 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
7333 Return true if STMT_INFO is vectorizable in this way. */
7336 vectorizable_load (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
7337 stmt_vec_info
*vec_stmt
, slp_tree slp_node
,
7338 slp_instance slp_node_instance
,
7339 stmt_vector_for_cost
*cost_vec
)
7342 tree vec_dest
= NULL
;
7343 tree data_ref
= NULL
;
7344 stmt_vec_info prev_stmt_info
;
7345 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
7346 struct loop
*loop
= NULL
;
7347 struct loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
7348 bool nested_in_vect_loop
= false;
7353 enum dr_alignment_support alignment_support_scheme
;
7354 tree dataref_ptr
= NULL_TREE
;
7355 tree dataref_offset
= NULL_TREE
;
7356 gimple
*ptr_incr
= NULL
;
7359 unsigned int group_size
;
7360 poly_uint64 group_gap_adj
;
7361 tree msq
= NULL_TREE
, lsq
;
7362 tree offset
= NULL_TREE
;
7363 tree byte_offset
= NULL_TREE
;
7364 tree realignment_token
= NULL_TREE
;
7366 vec
<tree
> dr_chain
= vNULL
;
7367 bool grouped_load
= false;
7368 stmt_vec_info first_stmt_info
;
7369 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
7370 bool compute_in_loop
= false;
7371 struct loop
*at_loop
;
7373 bool slp
= (slp_node
!= NULL
);
7374 bool slp_perm
= false;
7375 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
7378 gather_scatter_info gs_info
;
7379 vec_info
*vinfo
= stmt_info
->vinfo
;
7381 enum vect_def_type mask_dt
= vect_unknown_def_type
;
7383 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
7386 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
7390 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
7391 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
7393 scalar_dest
= gimple_assign_lhs (assign
);
7394 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
7397 tree_code code
= gimple_assign_rhs_code (assign
);
7398 if (code
!= ARRAY_REF
7399 && code
!= BIT_FIELD_REF
7400 && code
!= INDIRECT_REF
7401 && code
!= COMPONENT_REF
7402 && code
!= IMAGPART_EXPR
7403 && code
!= REALPART_EXPR
7405 && TREE_CODE_CLASS (code
) != tcc_declaration
)
7410 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7411 if (!call
|| !gimple_call_internal_p (call
))
7414 internal_fn ifn
= gimple_call_internal_fn (call
);
7415 if (!internal_load_fn_p (ifn
))
7418 scalar_dest
= gimple_call_lhs (call
);
7422 if (slp_node
!= NULL
)
7424 if (dump_enabled_p ())
7425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7426 "SLP of masked loads not supported.\n");
7430 int mask_index
= internal_fn_mask_index (ifn
);
7431 if (mask_index
>= 0)
7433 mask
= gimple_call_arg (call
, mask_index
);
7434 if (!vect_check_load_store_mask (stmt_info
, mask
, &mask_dt
,
7440 if (!STMT_VINFO_DATA_REF (stmt_info
))
7443 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
7444 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7448 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7449 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
7450 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7455 /* Multiple types in SLP are handled by creating the appropriate number of
7456 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7461 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7463 gcc_assert (ncopies
>= 1);
7465 /* FORNOW. This restriction should be relaxed. */
7466 if (nested_in_vect_loop
&& ncopies
> 1)
7468 if (dump_enabled_p ())
7469 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7470 "multiple types in nested loop.\n");
7474 /* Invalidate assumptions made by dependence analysis when vectorization
7475 on the unrolled body effectively re-orders stmts. */
7477 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7478 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7479 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7481 if (dump_enabled_p ())
7482 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7483 "cannot perform implicit CSE when unrolling "
7484 "with negative dependence distance\n");
7488 elem_type
= TREE_TYPE (vectype
);
7489 mode
= TYPE_MODE (vectype
);
7491 /* FORNOW. In some cases can vectorize even if data-type not supported
7492 (e.g. - data copies). */
7493 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
7495 if (dump_enabled_p ())
7496 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7497 "Aligned load, but unsupported type.\n");
7501 /* Check if the load is a part of an interleaving chain. */
7502 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7504 grouped_load
= true;
7506 gcc_assert (!nested_in_vect_loop
);
7507 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
7509 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7510 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7512 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7515 /* Invalidate assumptions made by dependence analysis when vectorization
7516 on the unrolled body effectively re-orders stmts. */
7517 if (!PURE_SLP_STMT (stmt_info
)
7518 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
7519 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
7520 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
7522 if (dump_enabled_p ())
7523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7524 "cannot perform implicit CSE when performing "
7525 "group loads with negative dependence distance\n");
7529 /* Similarly when the stmt is a load that is both part of a SLP
7530 instance and a loop vectorized stmt via the same-dr mechanism
7531 we have to give up. */
7532 if (DR_GROUP_SAME_DR_STMT (stmt_info
)
7533 && (STMT_SLP_TYPE (stmt_info
)
7534 != STMT_SLP_TYPE (DR_GROUP_SAME_DR_STMT (stmt_info
))))
7536 if (dump_enabled_p ())
7537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7538 "conflicting SLP types for CSEd load\n");
7545 vect_memory_access_type memory_access_type
;
7546 if (!get_load_store_type (stmt_info
, vectype
, slp
, mask
, VLS_LOAD
, ncopies
,
7547 &memory_access_type
, &gs_info
))
7552 if (memory_access_type
== VMAT_CONTIGUOUS
)
7554 machine_mode vec_mode
= TYPE_MODE (vectype
);
7555 if (!VECTOR_MODE_P (vec_mode
)
7556 || !can_vec_mask_load_store_p (vec_mode
,
7557 TYPE_MODE (mask_vectype
), true))
7560 else if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7562 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7564 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist
))));
7565 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
7567 if (dump_enabled_p ())
7568 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7569 "masked gather with integer mask not"
7574 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7575 && memory_access_type
!= VMAT_GATHER_SCATTER
)
7577 if (dump_enabled_p ())
7578 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7579 "unsupported access type for masked load.\n");
7584 if (!vec_stmt
) /* transformation not required. */
7587 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7590 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7591 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
7592 memory_access_type
, &gs_info
);
7594 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
7595 vect_model_load_cost (stmt_info
, ncopies
, memory_access_type
,
7596 slp_node_instance
, slp_node
, cost_vec
);
7601 gcc_assert (memory_access_type
7602 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7604 if (dump_enabled_p ())
7605 dump_printf_loc (MSG_NOTE
, vect_location
,
7606 "transform load. ncopies = %d\n", ncopies
);
7610 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7611 ensure_base_align (dr_info
);
7613 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7615 vect_build_gather_load_calls (stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
7619 if (memory_access_type
== VMAT_INVARIANT
)
7621 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
7622 /* If we have versioned for aliasing or the loop doesn't
7623 have any data dependencies that would preclude this,
7624 then we are sure this is a loop invariant load and
7625 thus we can insert it on the preheader edge. */
7626 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
7627 && !nested_in_vect_loop
7628 && hoist_defs_of_uses (stmt_info
, loop
));
7631 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
7632 if (dump_enabled_p ())
7633 dump_printf_loc (MSG_NOTE
, vect_location
,
7634 "hoisting out of the vectorized loop: %G", stmt
);
7635 scalar_dest
= copy_ssa_name (scalar_dest
);
7636 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
7637 gsi_insert_on_edge_immediate
7638 (loop_preheader_edge (loop
),
7639 gimple_build_assign (scalar_dest
, rhs
));
7641 /* These copies are all equivalent, but currently the representation
7642 requires a separate STMT_VINFO_VEC_STMT for each one. */
7643 prev_stmt_info
= NULL
;
7644 gimple_stmt_iterator gsi2
= *gsi
;
7646 for (j
= 0; j
< ncopies
; j
++)
7648 stmt_vec_info new_stmt_info
;
7651 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7653 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7654 new_stmt_info
= vinfo
->add_stmt (new_stmt
);
7658 new_temp
= vect_init_vector (stmt_info
, scalar_dest
,
7660 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7663 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7665 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7667 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7668 prev_stmt_info
= new_stmt_info
;
7673 if (memory_access_type
== VMAT_ELEMENTWISE
7674 || memory_access_type
== VMAT_STRIDED_SLP
)
7676 gimple_stmt_iterator incr_gsi
;
7682 vec
<constructor_elt
, va_gc
> *v
= NULL
;
7683 tree stride_base
, stride_step
, alias_off
;
7684 /* Checked by get_load_store_type. */
7685 unsigned int const_nunits
= nunits
.to_constant ();
7686 unsigned HOST_WIDE_INT cst_offset
= 0;
7688 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7689 gcc_assert (!nested_in_vect_loop
);
7693 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7694 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7698 first_stmt_info
= stmt_info
;
7699 first_dr_info
= dr_info
;
7701 if (slp
&& grouped_load
)
7703 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7704 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7710 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
7711 * vect_get_place_in_interleaving_chain (stmt_info
,
7714 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
7718 = fold_build_pointer_plus
7719 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7720 size_binop (PLUS_EXPR
,
7721 convert_to_ptrofftype (DR_OFFSET (first_dr_info
->dr
)),
7722 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7723 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7725 /* For a load with loop-invariant (but other than power-of-2)
7726 stride (i.e. not a grouped access) like so:
7728 for (i = 0; i < n; i += stride)
7731 we generate a new induction variable and new accesses to
7732 form a new vector (or vectors, depending on ncopies):
7734 for (j = 0; ; j += VF*stride)
7736 tmp2 = array[j + stride];
7738 vectemp = {tmp1, tmp2, ...}
7741 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
7742 build_int_cst (TREE_TYPE (stride_step
), vf
));
7744 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7746 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7747 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7748 create_iv (stride_base
, ivstep
, NULL
,
7749 loop
, &incr_gsi
, insert_after
,
7751 incr
= gsi_stmt (incr_gsi
);
7752 loop_vinfo
->add_stmt (incr
);
7754 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7756 prev_stmt_info
= NULL
;
7757 running_off
= offvar
;
7758 alias_off
= build_int_cst (ref_type
, 0);
7759 int nloads
= const_nunits
;
7761 tree ltype
= TREE_TYPE (vectype
);
7762 tree lvectype
= vectype
;
7763 auto_vec
<tree
> dr_chain
;
7764 if (memory_access_type
== VMAT_STRIDED_SLP
)
7766 if (group_size
< const_nunits
)
7768 /* First check if vec_init optab supports construction from
7769 vector elts directly. */
7770 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
7772 if (mode_for_vector (elmode
, group_size
).exists (&vmode
)
7773 && VECTOR_MODE_P (vmode
)
7774 && targetm
.vector_mode_supported_p (vmode
)
7775 && (convert_optab_handler (vec_init_optab
,
7776 TYPE_MODE (vectype
), vmode
)
7777 != CODE_FOR_nothing
))
7779 nloads
= const_nunits
/ group_size
;
7781 ltype
= build_vector_type (TREE_TYPE (vectype
), group_size
);
7785 /* Otherwise avoid emitting a constructor of vector elements
7786 by performing the loads using an integer type of the same
7787 size, constructing a vector of those and then
7788 re-interpreting it as the original vector type.
7789 This avoids a huge runtime penalty due to the general
7790 inability to perform store forwarding from smaller stores
7791 to a larger load. */
7793 = group_size
* TYPE_PRECISION (TREE_TYPE (vectype
));
7794 elmode
= int_mode_for_size (lsize
, 0).require ();
7795 unsigned int lnunits
= const_nunits
/ group_size
;
7796 /* If we can't construct such a vector fall back to
7797 element loads of the original vector type. */
7798 if (mode_for_vector (elmode
, lnunits
).exists (&vmode
)
7799 && VECTOR_MODE_P (vmode
)
7800 && targetm
.vector_mode_supported_p (vmode
)
7801 && (convert_optab_handler (vec_init_optab
, vmode
, elmode
)
7802 != CODE_FOR_nothing
))
7806 ltype
= build_nonstandard_integer_type (lsize
, 1);
7807 lvectype
= build_vector_type (ltype
, nloads
);
7814 lnel
= const_nunits
;
7817 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
7819 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
7820 else if (nloads
== 1)
7825 /* For SLP permutation support we need to load the whole group,
7826 not only the number of vector stmts the permutation result
7830 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
7832 unsigned int const_vf
= vf
.to_constant ();
7833 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
7834 dr_chain
.create (ncopies
);
7837 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7839 unsigned int group_el
= 0;
7840 unsigned HOST_WIDE_INT
7841 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7842 for (j
= 0; j
< ncopies
; j
++)
7845 vec_alloc (v
, nloads
);
7846 stmt_vec_info new_stmt_info
= NULL
;
7847 for (i
= 0; i
< nloads
; i
++)
7849 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7850 group_el
* elsz
+ cst_offset
);
7851 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
7852 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
7854 = gimple_build_assign (make_ssa_name (ltype
), data_ref
);
7856 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7858 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
7859 gimple_assign_lhs (new_stmt
));
7863 || group_el
== group_size
)
7865 tree newoff
= copy_ssa_name (running_off
);
7866 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7867 running_off
, stride_step
);
7868 vect_finish_stmt_generation (stmt_info
, incr
, gsi
);
7870 running_off
= newoff
;
7876 tree vec_inv
= build_constructor (lvectype
, v
);
7877 new_temp
= vect_init_vector (stmt_info
, vec_inv
, lvectype
, gsi
);
7878 new_stmt_info
= vinfo
->lookup_def (new_temp
);
7879 if (lvectype
!= vectype
)
7882 = gimple_build_assign (make_ssa_name (vectype
),
7884 build1 (VIEW_CONVERT_EXPR
,
7885 vectype
, new_temp
));
7887 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
7894 dr_chain
.quick_push (gimple_assign_lhs (new_stmt_info
->stmt
));
7896 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
7901 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
7903 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
7904 prev_stmt_info
= new_stmt_info
;
7910 vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
7911 slp_node_instance
, false, &n_perms
);
7916 if (memory_access_type
== VMAT_GATHER_SCATTER
7917 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
7918 grouped_load
= false;
7922 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7923 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7924 /* For SLP vectorization we directly vectorize a subchain
7925 without permutation. */
7926 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
7927 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7928 /* For BB vectorization always use the first stmt to base
7929 the data ref pointer on. */
7931 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7933 /* Check if the chain of loads is already vectorized. */
7934 if (STMT_VINFO_VEC_STMT (first_stmt_info
)
7935 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
7936 ??? But we can only do so if there is exactly one
7937 as we have no way to get at the rest. Leave the CSE
7939 ??? With the group load eventually participating
7940 in multiple different permutations (having multiple
7941 slp nodes which refer to the same group) the CSE
7942 is even wrong code. See PR56270. */
7945 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
7948 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7951 /* VEC_NUM is the number of vect stmts to be created for this group. */
7954 grouped_load
= false;
7955 /* If an SLP permutation is from N elements to N elements,
7956 and if one vector holds a whole number of N, we can load
7957 the inputs to the permutation in the same way as an
7958 unpermuted sequence. In other cases we need to load the
7959 whole group, not only the number of vector stmts the
7960 permutation result fits in. */
7962 && (group_size
!= SLP_INSTANCE_GROUP_SIZE (slp_node_instance
)
7963 || !multiple_p (nunits
, group_size
)))
7965 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
7966 variable VF; see vect_transform_slp_perm_load. */
7967 unsigned int const_vf
= vf
.to_constant ();
7968 unsigned int const_nunits
= nunits
.to_constant ();
7969 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
7970 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
7974 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7976 = group_size
- SLP_INSTANCE_GROUP_SIZE (slp_node_instance
);
7980 vec_num
= group_size
;
7982 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7986 first_stmt_info
= stmt_info
;
7987 first_dr_info
= dr_info
;
7988 group_size
= vec_num
= 1;
7990 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7993 alignment_support_scheme
7994 = vect_supportable_dr_alignment (first_dr_info
, false);
7995 gcc_assert (alignment_support_scheme
);
7996 vec_loop_masks
*loop_masks
7997 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7998 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8000 /* Targets with store-lane instructions must not require explicit
8001 realignment. vect_supportable_dr_alignment always returns either
8002 dr_aligned or dr_unaligned_supported for masked operations. */
8003 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8006 || alignment_support_scheme
== dr_aligned
8007 || alignment_support_scheme
== dr_unaligned_supported
);
8009 /* In case the vectorization factor (VF) is bigger than the number
8010 of elements that we can fit in a vectype (nunits), we have to generate
8011 more than one vector stmt - i.e - we need to "unroll" the
8012 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8013 from one copy of the vector stmt to the next, in the field
8014 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8015 stages to find the correct vector defs to be used when vectorizing
8016 stmts that use the defs of the current stmt. The example below
8017 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8018 need to create 4 vectorized stmts):
8020 before vectorization:
8021 RELATED_STMT VEC_STMT
8025 step 1: vectorize stmt S1:
8026 We first create the vector stmt VS1_0, and, as usual, record a
8027 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8028 Next, we create the vector stmt VS1_1, and record a pointer to
8029 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8030 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8032 RELATED_STMT VEC_STMT
8033 VS1_0: vx0 = memref0 VS1_1 -
8034 VS1_1: vx1 = memref1 VS1_2 -
8035 VS1_2: vx2 = memref2 VS1_3 -
8036 VS1_3: vx3 = memref3 - -
8037 S1: x = load - VS1_0
8040 See in documentation in vect_get_vec_def_for_stmt_copy for how the
8041 information we recorded in RELATED_STMT field is used to vectorize
8044 /* In case of interleaving (non-unit grouped access):
8051 Vectorized loads are created in the order of memory accesses
8052 starting from the access of the first stmt of the chain:
8055 VS2: vx1 = &base + vec_size*1
8056 VS3: vx3 = &base + vec_size*2
8057 VS4: vx4 = &base + vec_size*3
8059 Then permutation statements are generated:
8061 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8062 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8065 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8066 (the order of the data-refs in the output of vect_permute_load_chain
8067 corresponds to the order of scalar stmts in the interleaving chain - see
8068 the documentation of vect_permute_load_chain()).
8069 The generation of permutation stmts and recording them in
8070 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8072 In case of both multiple types and interleaving, the vector loads and
8073 permutation stmts above are created for every copy. The result vector
8074 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8075 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8077 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8078 on a target that supports unaligned accesses (dr_unaligned_supported)
8079 we generate the following code:
8083 p = p + indx * vectype_size;
8088 Otherwise, the data reference is potentially unaligned on a target that
8089 does not support unaligned accesses (dr_explicit_realign_optimized) -
8090 then generate the following code, in which the data in each iteration is
8091 obtained by two vector loads, one from the previous iteration, and one
8092 from the current iteration:
8094 msq_init = *(floor(p1))
8095 p2 = initial_addr + VS - 1;
8096 realignment_token = call target_builtin;
8099 p2 = p2 + indx * vectype_size
8101 vec_dest = realign_load (msq, lsq, realignment_token)
8106 /* If the misalignment remains the same throughout the execution of the
8107 loop, we can create the init_addr and permutation mask at the loop
8108 preheader. Otherwise, it needs to be created inside the loop.
8109 This can only occur when vectorizing memory accesses in the inner-loop
8110 nested within an outer-loop that is being vectorized. */
8112 if (nested_in_vect_loop
8113 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
8114 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8116 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8117 compute_in_loop
= true;
8120 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8121 || alignment_support_scheme
== dr_explicit_realign
)
8122 && !compute_in_loop
)
8124 msq
= vect_setup_realignment (first_stmt_info
, gsi
, &realignment_token
,
8125 alignment_support_scheme
, NULL_TREE
,
8127 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8129 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8130 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8137 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8138 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8141 tree vec_offset
= NULL_TREE
;
8142 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8144 aggr_type
= NULL_TREE
;
8147 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8149 aggr_type
= elem_type
;
8150 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8151 &bump
, &vec_offset
);
8155 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8156 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8158 aggr_type
= vectype
;
8159 bump
= vect_get_data_ptr_increment (dr_info
, aggr_type
,
8160 memory_access_type
);
8163 tree vec_mask
= NULL_TREE
;
8164 prev_stmt_info
= NULL
;
8165 poly_uint64 group_elt
= 0;
8166 for (j
= 0; j
< ncopies
; j
++)
8168 stmt_vec_info new_stmt_info
= NULL
;
8169 /* 1. Create the vector or array pointer update chain. */
8172 bool simd_lane_access_p
8173 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
8174 if (simd_lane_access_p
8175 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
8176 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
8177 && integer_zerop (DR_OFFSET (first_dr_info
->dr
))
8178 && integer_zerop (DR_INIT (first_dr_info
->dr
))
8179 && alias_sets_conflict_p (get_alias_set (aggr_type
),
8180 get_alias_set (TREE_TYPE (ref_type
)))
8181 && (alignment_support_scheme
== dr_aligned
8182 || alignment_support_scheme
== dr_unaligned_supported
))
8184 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
8185 dataref_offset
= build_int_cst (ref_type
, 0);
8187 else if (first_stmt_info_for_drptr
8188 && first_stmt_info
!= first_stmt_info_for_drptr
)
8191 = vect_create_data_ref_ptr (first_stmt_info_for_drptr
,
8192 aggr_type
, at_loop
, offset
, &dummy
,
8193 gsi
, &ptr_incr
, simd_lane_access_p
,
8195 /* Adjust the pointer by the difference to first_stmt. */
8196 data_reference_p ptrdr
8197 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
8199 = fold_convert (sizetype
,
8200 size_binop (MINUS_EXPR
,
8201 DR_INIT (first_dr_info
->dr
),
8203 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8206 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8207 vect_get_gather_scatter_ops (loop
, stmt_info
, &gs_info
,
8208 &dataref_ptr
, &vec_offset
);
8211 = vect_create_data_ref_ptr (first_stmt_info
, aggr_type
, at_loop
,
8212 offset
, &dummy
, gsi
, &ptr_incr
,
8216 vec_mask
= vect_get_vec_def_for_operand (mask
, stmt_info
,
8222 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
8224 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8225 vec_offset
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_offset
);
8227 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8230 vec_mask
= vect_get_vec_def_for_stmt_copy (vinfo
, vec_mask
);
8233 if (grouped_load
|| slp_perm
)
8234 dr_chain
.create (vec_num
);
8236 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8240 vec_array
= create_vector_array (vectype
, vec_num
);
8242 tree final_mask
= NULL_TREE
;
8244 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
8247 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8254 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
8256 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
8257 tree alias_ptr
= build_int_cst (ref_type
, align
);
8258 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
8259 dataref_ptr
, alias_ptr
,
8265 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
8266 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
8267 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
8269 gimple_call_set_lhs (call
, vec_array
);
8270 gimple_call_set_nothrow (call
, true);
8271 new_stmt_info
= vect_finish_stmt_generation (stmt_info
, call
, gsi
);
8273 /* Extract each vector into an SSA_NAME. */
8274 for (i
= 0; i
< vec_num
; i
++)
8276 new_temp
= read_vector_array (stmt_info
, gsi
, scalar_dest
,
8278 dr_chain
.quick_push (new_temp
);
8281 /* Record the mapping between SSA_NAMEs and statements. */
8282 vect_record_grouped_load_vectors (stmt_info
, dr_chain
);
8284 /* Record that VEC_ARRAY is now dead. */
8285 vect_clobber_variable (stmt_info
, gsi
, vec_array
);
8289 for (i
= 0; i
< vec_num
; i
++)
8291 tree final_mask
= NULL_TREE
;
8293 && memory_access_type
!= VMAT_INVARIANT
)
8294 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
8296 vectype
, vec_num
* j
+ i
);
8298 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
8302 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8305 /* 2. Create the vector-load in the loop. */
8306 gimple
*new_stmt
= NULL
;
8307 switch (alignment_support_scheme
)
8310 case dr_unaligned_supported
:
8312 unsigned int align
, misalign
;
8314 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8316 tree scale
= size_int (gs_info
.scale
);
8319 call
= gimple_build_call_internal
8320 (IFN_MASK_GATHER_LOAD
, 4, dataref_ptr
,
8321 vec_offset
, scale
, final_mask
);
8323 call
= gimple_build_call_internal
8324 (IFN_GATHER_LOAD
, 3, dataref_ptr
,
8326 gimple_call_set_nothrow (call
, true);
8328 data_ref
= NULL_TREE
;
8332 align
= DR_TARGET_ALIGNMENT (dr_info
);
8333 if (alignment_support_scheme
== dr_aligned
)
8335 gcc_assert (aligned_access_p (first_dr_info
));
8338 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8340 align
= dr_alignment
8341 (vect_dr_behavior (first_dr_info
));
8345 misalign
= DR_MISALIGNMENT (first_dr_info
);
8346 if (dataref_offset
== NULL_TREE
8347 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
8348 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
8353 align
= least_bit_hwi (misalign
| align
);
8354 tree ptr
= build_int_cst (ref_type
, align
);
8356 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
8359 gimple_call_set_nothrow (call
, true);
8361 data_ref
= NULL_TREE
;
8366 = fold_build2 (MEM_REF
, vectype
, dataref_ptr
,
8369 : build_int_cst (ref_type
, 0));
8370 if (alignment_support_scheme
== dr_aligned
)
8372 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8373 TREE_TYPE (data_ref
)
8374 = build_aligned_type (TREE_TYPE (data_ref
),
8375 align
* BITS_PER_UNIT
);
8377 TREE_TYPE (data_ref
)
8378 = build_aligned_type (TREE_TYPE (data_ref
),
8379 TYPE_ALIGN (elem_type
));
8383 case dr_explicit_realign
:
8387 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
8389 if (compute_in_loop
)
8390 msq
= vect_setup_realignment (first_stmt_info
, gsi
,
8392 dr_explicit_realign
,
8395 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8396 ptr
= copy_ssa_name (dataref_ptr
);
8398 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8399 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr_info
);
8400 new_stmt
= gimple_build_assign
8401 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
8403 (TREE_TYPE (dataref_ptr
),
8404 -(HOST_WIDE_INT
) align
));
8405 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8407 = build2 (MEM_REF
, vectype
, ptr
,
8408 build_int_cst (ref_type
, 0));
8409 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8410 vec_dest
= vect_create_destination_var (scalar_dest
,
8412 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8413 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8414 gimple_assign_set_lhs (new_stmt
, new_temp
);
8415 gimple_set_vdef (new_stmt
, gimple_vdef (stmt_info
->stmt
));
8416 gimple_set_vuse (new_stmt
, gimple_vuse (stmt_info
->stmt
));
8417 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8420 bump
= size_binop (MULT_EXPR
, vs
,
8421 TYPE_SIZE_UNIT (elem_type
));
8422 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
8423 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
,
8425 new_stmt
= gimple_build_assign
8426 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
8428 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
8429 ptr
= copy_ssa_name (ptr
, new_stmt
);
8430 gimple_assign_set_lhs (new_stmt
, ptr
);
8431 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8433 = build2 (MEM_REF
, vectype
, ptr
,
8434 build_int_cst (ref_type
, 0));
8437 case dr_explicit_realign_optimized
:
8439 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
8440 new_temp
= copy_ssa_name (dataref_ptr
);
8442 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
8443 unsigned int align
= DR_TARGET_ALIGNMENT (first_dr_info
);
8444 new_stmt
= gimple_build_assign
8445 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
8446 build_int_cst (TREE_TYPE (dataref_ptr
),
8447 -(HOST_WIDE_INT
) align
));
8448 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8450 = build2 (MEM_REF
, vectype
, new_temp
,
8451 build_int_cst (ref_type
, 0));
8457 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8458 /* DATA_REF is null if we've already built the statement. */
8461 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8462 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
8464 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8465 gimple_set_lhs (new_stmt
, new_temp
);
8467 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8469 /* 3. Handle explicit realignment if necessary/supported.
8471 vec_dest = realign_load (msq, lsq, realignment_token) */
8472 if (alignment_support_scheme
== dr_explicit_realign_optimized
8473 || alignment_support_scheme
== dr_explicit_realign
)
8475 lsq
= gimple_assign_lhs (new_stmt
);
8476 if (!realignment_token
)
8477 realignment_token
= dataref_ptr
;
8478 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8479 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
8480 msq
, lsq
, realignment_token
);
8481 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
8482 gimple_assign_set_lhs (new_stmt
, new_temp
);
8484 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8486 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8489 if (i
== vec_num
- 1 && j
== ncopies
- 1)
8490 add_phi_arg (phi
, lsq
,
8491 loop_latch_edge (containing_loop
),
8497 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8499 tree perm_mask
= perm_mask_for_reverse (vectype
);
8500 new_temp
= permute_vec_elements (new_temp
, new_temp
,
8501 perm_mask
, stmt_info
, gsi
);
8502 new_stmt_info
= vinfo
->lookup_def (new_temp
);
8505 /* Collect vector loads and later create their permutation in
8506 vect_transform_grouped_load (). */
8507 if (grouped_load
|| slp_perm
)
8508 dr_chain
.quick_push (new_temp
);
8510 /* Store vector loads in the corresponding SLP_NODE. */
8511 if (slp
&& !slp_perm
)
8512 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
8514 /* With SLP permutation we load the gaps as well, without
8515 we need to skip the gaps after we manage to fully load
8516 all elements. group_gap_adj is DR_GROUP_SIZE here. */
8517 group_elt
+= nunits
;
8518 if (maybe_ne (group_gap_adj
, 0U)
8520 && known_eq (group_elt
, group_size
- group_gap_adj
))
8522 poly_wide_int bump_val
8523 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8525 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8526 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8531 /* Bump the vector pointer to account for a gap or for excess
8532 elements loaded for a permuted SLP load. */
8533 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
8535 poly_wide_int bump_val
8536 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
8538 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
8539 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
8544 if (slp
&& !slp_perm
)
8550 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
8551 slp_node_instance
, false,
8554 dr_chain
.release ();
8562 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
8563 vect_transform_grouped_load (stmt_info
, dr_chain
,
8565 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
8570 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
8572 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
8573 prev_stmt_info
= new_stmt_info
;
8576 dr_chain
.release ();
8582 /* Function vect_is_simple_cond.
8585 LOOP - the loop that is being vectorized.
8586 COND - Condition that is checked for simple use.
8589 *COMP_VECTYPE - the vector type for the comparison.
8590 *DTS - The def types for the arguments of the comparison
8592 Returns whether a COND can be vectorized. Checks whether
8593 condition operands are supportable using vec_is_simple_use. */
8596 vect_is_simple_cond (tree cond
, vec_info
*vinfo
,
8597 tree
*comp_vectype
, enum vect_def_type
*dts
,
8601 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8604 if (TREE_CODE (cond
) == SSA_NAME
8605 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
8607 if (!vect_is_simple_use (cond
, vinfo
, &dts
[0], comp_vectype
)
8609 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
8614 if (!COMPARISON_CLASS_P (cond
))
8617 lhs
= TREE_OPERAND (cond
, 0);
8618 rhs
= TREE_OPERAND (cond
, 1);
8620 if (TREE_CODE (lhs
) == SSA_NAME
)
8622 if (!vect_is_simple_use (lhs
, vinfo
, &dts
[0], &vectype1
))
8625 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
8626 || TREE_CODE (lhs
) == FIXED_CST
)
8627 dts
[0] = vect_constant_def
;
8631 if (TREE_CODE (rhs
) == SSA_NAME
)
8633 if (!vect_is_simple_use (rhs
, vinfo
, &dts
[1], &vectype2
))
8636 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
8637 || TREE_CODE (rhs
) == FIXED_CST
)
8638 dts
[1] = vect_constant_def
;
8642 if (vectype1
&& vectype2
8643 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
8644 TYPE_VECTOR_SUBPARTS (vectype2
)))
8647 *comp_vectype
= vectype1
? vectype1
: vectype2
;
8648 /* Invariant comparison. */
8649 if (! *comp_vectype
&& vectype
)
8651 tree scalar_type
= TREE_TYPE (lhs
);
8652 /* If we can widen the comparison to match vectype do so. */
8653 if (INTEGRAL_TYPE_P (scalar_type
)
8654 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
8655 TYPE_SIZE (TREE_TYPE (vectype
))))
8656 scalar_type
= build_nonstandard_integer_type
8657 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype
))),
8658 TYPE_UNSIGNED (scalar_type
));
8659 *comp_vectype
= get_vectype_for_scalar_type (scalar_type
);
8665 /* vectorizable_condition.
8667 Check if STMT_INFO is conditional modify expression that can be vectorized.
8668 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8669 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
8672 When STMT_INFO is vectorized as a nested cycle, REDUC_DEF is the vector
8673 variable to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1,
8674 and in else clause if it is 2).
8676 Return true if STMT_INFO is vectorizable in this way. */
8679 vectorizable_condition (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8680 stmt_vec_info
*vec_stmt
, tree reduc_def
,
8681 int reduc_index
, slp_tree slp_node
,
8682 stmt_vector_for_cost
*cost_vec
)
8684 vec_info
*vinfo
= stmt_info
->vinfo
;
8685 tree scalar_dest
= NULL_TREE
;
8686 tree vec_dest
= NULL_TREE
;
8687 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
8688 tree then_clause
, else_clause
;
8689 tree comp_vectype
= NULL_TREE
;
8690 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
8691 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
8694 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
8695 enum vect_def_type dts
[4]
8696 = {vect_unknown_def_type
, vect_unknown_def_type
,
8697 vect_unknown_def_type
, vect_unknown_def_type
};
8700 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
8701 stmt_vec_info prev_stmt_info
= NULL
;
8703 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
8704 vec
<tree
> vec_oprnds0
= vNULL
;
8705 vec
<tree
> vec_oprnds1
= vNULL
;
8706 vec
<tree
> vec_oprnds2
= vNULL
;
8707 vec
<tree
> vec_oprnds3
= vNULL
;
8709 bool masked
= false;
8711 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
8714 vect_reduction_type reduction_type
8715 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info
);
8716 if (reduction_type
== TREE_CODE_REDUCTION
)
8718 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8721 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8722 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
8726 /* FORNOW: not yet supported. */
8727 if (STMT_VINFO_LIVE_P (stmt_info
))
8729 if (dump_enabled_p ())
8730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8731 "value used after loop.\n");
8736 /* Is vectorizable conditional operation? */
8737 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
8741 code
= gimple_assign_rhs_code (stmt
);
8743 if (code
!= COND_EXPR
)
8746 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8747 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
8752 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8754 gcc_assert (ncopies
>= 1);
8755 if (reduc_index
&& ncopies
> 1)
8756 return false; /* FORNOW */
8758 cond_expr
= gimple_assign_rhs1 (stmt
);
8759 then_clause
= gimple_assign_rhs2 (stmt
);
8760 else_clause
= gimple_assign_rhs3 (stmt
);
8762 if (!vect_is_simple_cond (cond_expr
, stmt_info
->vinfo
,
8763 &comp_vectype
, &dts
[0], slp_node
? NULL
: vectype
)
8767 if (!vect_is_simple_use (then_clause
, stmt_info
->vinfo
, &dts
[2], &vectype1
))
8769 if (!vect_is_simple_use (else_clause
, stmt_info
->vinfo
, &dts
[3], &vectype2
))
8772 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
8775 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
8778 masked
= !COMPARISON_CLASS_P (cond_expr
);
8779 vec_cmp_type
= build_same_sized_truth_vector_type (comp_vectype
);
8781 if (vec_cmp_type
== NULL_TREE
)
8784 cond_code
= TREE_CODE (cond_expr
);
8787 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
8788 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
8791 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
8793 /* Boolean values may have another representation in vectors
8794 and therefore we prefer bit operations over comparison for
8795 them (which also works for scalar masks). We store opcodes
8796 to use in bitop1 and bitop2. Statement is vectorized as
8797 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
8798 depending on bitop1 and bitop2 arity. */
8802 bitop1
= BIT_NOT_EXPR
;
8803 bitop2
= BIT_AND_EXPR
;
8806 bitop1
= BIT_NOT_EXPR
;
8807 bitop2
= BIT_IOR_EXPR
;
8810 bitop1
= BIT_NOT_EXPR
;
8811 bitop2
= BIT_AND_EXPR
;
8812 std::swap (cond_expr0
, cond_expr1
);
8815 bitop1
= BIT_NOT_EXPR
;
8816 bitop2
= BIT_IOR_EXPR
;
8817 std::swap (cond_expr0
, cond_expr1
);
8820 bitop1
= BIT_XOR_EXPR
;
8823 bitop1
= BIT_XOR_EXPR
;
8824 bitop2
= BIT_NOT_EXPR
;
8829 cond_code
= SSA_NAME
;
8834 if (bitop1
!= NOP_EXPR
)
8836 machine_mode mode
= TYPE_MODE (comp_vectype
);
8839 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
8840 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8843 if (bitop2
!= NOP_EXPR
)
8845 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
8847 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
8851 if (expand_vec_cond_expr_p (vectype
, comp_vectype
,
8854 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
8855 vect_model_simple_cost (stmt_info
, ncopies
, dts
, ndts
, slp_node
,
8866 vec_oprnds0
.create (1);
8867 vec_oprnds1
.create (1);
8868 vec_oprnds2
.create (1);
8869 vec_oprnds3
.create (1);
8873 scalar_dest
= gimple_assign_lhs (stmt
);
8874 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
8875 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8877 /* Handle cond expr. */
8878 for (j
= 0; j
< ncopies
; j
++)
8880 stmt_vec_info new_stmt_info
= NULL
;
8885 auto_vec
<tree
, 4> ops
;
8886 auto_vec
<vec
<tree
>, 4> vec_defs
;
8889 ops
.safe_push (cond_expr
);
8892 ops
.safe_push (cond_expr0
);
8893 ops
.safe_push (cond_expr1
);
8895 ops
.safe_push (then_clause
);
8896 ops
.safe_push (else_clause
);
8897 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
8898 vec_oprnds3
= vec_defs
.pop ();
8899 vec_oprnds2
= vec_defs
.pop ();
8901 vec_oprnds1
= vec_defs
.pop ();
8902 vec_oprnds0
= vec_defs
.pop ();
8909 = vect_get_vec_def_for_operand (cond_expr
, stmt_info
,
8911 vect_is_simple_use (cond_expr
, stmt_info
->vinfo
, &dts
[0]);
8916 = vect_get_vec_def_for_operand (cond_expr0
,
8917 stmt_info
, comp_vectype
);
8918 vect_is_simple_use (cond_expr0
, loop_vinfo
, &dts
[0]);
8921 = vect_get_vec_def_for_operand (cond_expr1
,
8922 stmt_info
, comp_vectype
);
8923 vect_is_simple_use (cond_expr1
, loop_vinfo
, &dts
[1]);
8925 if (reduc_index
== 1)
8926 vec_then_clause
= reduc_def
;
8929 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
8931 vect_is_simple_use (then_clause
, loop_vinfo
, &dts
[2]);
8933 if (reduc_index
== 2)
8934 vec_else_clause
= reduc_def
;
8937 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
8939 vect_is_simple_use (else_clause
, loop_vinfo
, &dts
[3]);
8946 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds0
.pop ());
8949 = vect_get_vec_def_for_stmt_copy (vinfo
, vec_oprnds1
.pop ());
8951 vec_then_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8952 vec_oprnds2
.pop ());
8953 vec_else_clause
= vect_get_vec_def_for_stmt_copy (vinfo
,
8954 vec_oprnds3
.pop ());
8959 vec_oprnds0
.quick_push (vec_cond_lhs
);
8961 vec_oprnds1
.quick_push (vec_cond_rhs
);
8962 vec_oprnds2
.quick_push (vec_then_clause
);
8963 vec_oprnds3
.quick_push (vec_else_clause
);
8966 /* Arguments are ready. Create the new vector stmt. */
8967 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
8969 vec_then_clause
= vec_oprnds2
[i
];
8970 vec_else_clause
= vec_oprnds3
[i
];
8973 vec_compare
= vec_cond_lhs
;
8976 vec_cond_rhs
= vec_oprnds1
[i
];
8977 if (bitop1
== NOP_EXPR
)
8978 vec_compare
= build2 (cond_code
, vec_cmp_type
,
8979 vec_cond_lhs
, vec_cond_rhs
);
8982 new_temp
= make_ssa_name (vec_cmp_type
);
8984 if (bitop1
== BIT_NOT_EXPR
)
8985 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
8989 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
8991 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
8992 if (bitop2
== NOP_EXPR
)
8993 vec_compare
= new_temp
;
8994 else if (bitop2
== BIT_NOT_EXPR
)
8996 /* Instead of doing ~x ? y : z do x ? z : y. */
8997 vec_compare
= new_temp
;
8998 std::swap (vec_then_clause
, vec_else_clause
);
9002 vec_compare
= make_ssa_name (vec_cmp_type
);
9004 = gimple_build_assign (vec_compare
, bitop2
,
9005 vec_cond_lhs
, new_temp
);
9006 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9010 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9012 if (!is_gimple_val (vec_compare
))
9014 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9015 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
9017 vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9018 vec_compare
= vec_compare_name
;
9020 gcc_assert (reduc_index
== 2);
9021 gcall
*new_stmt
= gimple_build_call_internal
9022 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
9024 gimple_call_set_lhs (new_stmt
, scalar_dest
);
9025 SSA_NAME_DEF_STMT (scalar_dest
) = new_stmt
;
9026 if (stmt_info
->stmt
== gsi_stmt (*gsi
))
9027 new_stmt_info
= vect_finish_replace_stmt (stmt_info
, new_stmt
);
9030 /* In this case we're moving the definition to later in the
9031 block. That doesn't matter because the only uses of the
9032 lhs are in phi statements. */
9033 gimple_stmt_iterator old_gsi
9034 = gsi_for_stmt (stmt_info
->stmt
);
9035 gsi_remove (&old_gsi
, true);
9037 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9042 new_temp
= make_ssa_name (vec_dest
);
9044 = gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
9045 vec_then_clause
, vec_else_clause
);
9047 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9050 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9057 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9059 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9061 prev_stmt_info
= new_stmt_info
;
9064 vec_oprnds0
.release ();
9065 vec_oprnds1
.release ();
9066 vec_oprnds2
.release ();
9067 vec_oprnds3
.release ();
9072 /* vectorizable_comparison.
9074 Check if STMT_INFO is comparison expression that can be vectorized.
9075 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9076 comparison, put it in VEC_STMT, and insert it at GSI.
9078 Return true if STMT_INFO is vectorizable in this way. */
9081 vectorizable_comparison (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9082 stmt_vec_info
*vec_stmt
, tree reduc_def
,
9083 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9085 vec_info
*vinfo
= stmt_info
->vinfo
;
9086 tree lhs
, rhs1
, rhs2
;
9087 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9089 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
9091 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
9092 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
9096 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9097 stmt_vec_info prev_stmt_info
= NULL
;
9099 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9100 vec
<tree
> vec_oprnds0
= vNULL
;
9101 vec
<tree
> vec_oprnds1
= vNULL
;
9105 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9108 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
9111 mask_type
= vectype
;
9112 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
9117 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9119 gcc_assert (ncopies
>= 1);
9120 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
9121 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
9125 if (STMT_VINFO_LIVE_P (stmt_info
))
9127 if (dump_enabled_p ())
9128 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9129 "value used after loop.\n");
9133 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9137 code
= gimple_assign_rhs_code (stmt
);
9139 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
9142 rhs1
= gimple_assign_rhs1 (stmt
);
9143 rhs2
= gimple_assign_rhs2 (stmt
);
9145 if (!vect_is_simple_use (rhs1
, stmt_info
->vinfo
, &dts
[0], &vectype1
))
9148 if (!vect_is_simple_use (rhs2
, stmt_info
->vinfo
, &dts
[1], &vectype2
))
9151 if (vectype1
&& vectype2
9152 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9153 TYPE_VECTOR_SUBPARTS (vectype2
)))
9156 vectype
= vectype1
? vectype1
: vectype2
;
9158 /* Invariant comparison. */
9161 vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
9162 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
9165 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
9168 /* Can't compare mask and non-mask types. */
9169 if (vectype1
&& vectype2
9170 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
9173 /* Boolean values may have another representation in vectors
9174 and therefore we prefer bit operations over comparison for
9175 them (which also works for scalar masks). We store opcodes
9176 to use in bitop1 and bitop2. Statement is vectorized as
9177 BITOP2 (rhs1 BITOP1 rhs2) or
9178 rhs1 BITOP2 (BITOP1 rhs2)
9179 depending on bitop1 and bitop2 arity. */
9180 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
9182 if (code
== GT_EXPR
)
9184 bitop1
= BIT_NOT_EXPR
;
9185 bitop2
= BIT_AND_EXPR
;
9187 else if (code
== GE_EXPR
)
9189 bitop1
= BIT_NOT_EXPR
;
9190 bitop2
= BIT_IOR_EXPR
;
9192 else if (code
== LT_EXPR
)
9194 bitop1
= BIT_NOT_EXPR
;
9195 bitop2
= BIT_AND_EXPR
;
9196 std::swap (rhs1
, rhs2
);
9197 std::swap (dts
[0], dts
[1]);
9199 else if (code
== LE_EXPR
)
9201 bitop1
= BIT_NOT_EXPR
;
9202 bitop2
= BIT_IOR_EXPR
;
9203 std::swap (rhs1
, rhs2
);
9204 std::swap (dts
[0], dts
[1]);
9208 bitop1
= BIT_XOR_EXPR
;
9209 if (code
== EQ_EXPR
)
9210 bitop2
= BIT_NOT_EXPR
;
9216 if (bitop1
== NOP_EXPR
)
9218 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
9223 machine_mode mode
= TYPE_MODE (vectype
);
9226 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
9227 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9230 if (bitop2
!= NOP_EXPR
)
9232 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
9233 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9238 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
9239 vect_model_simple_cost (stmt_info
, ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
9240 dts
, ndts
, slp_node
, cost_vec
);
9247 vec_oprnds0
.create (1);
9248 vec_oprnds1
.create (1);
9252 lhs
= gimple_assign_lhs (stmt
);
9253 mask
= vect_create_destination_var (lhs
, mask_type
);
9255 /* Handle cmp expr. */
9256 for (j
= 0; j
< ncopies
; j
++)
9258 stmt_vec_info new_stmt_info
= NULL
;
9263 auto_vec
<tree
, 2> ops
;
9264 auto_vec
<vec
<tree
>, 2> vec_defs
;
9266 ops
.safe_push (rhs1
);
9267 ops
.safe_push (rhs2
);
9268 vect_get_slp_defs (ops
, slp_node
, &vec_defs
);
9269 vec_oprnds1
= vec_defs
.pop ();
9270 vec_oprnds0
= vec_defs
.pop ();
9274 vec_rhs1
= vect_get_vec_def_for_operand (rhs1
, stmt_info
,
9276 vec_rhs2
= vect_get_vec_def_for_operand (rhs2
, stmt_info
,
9282 vec_rhs1
= vect_get_vec_def_for_stmt_copy (vinfo
,
9283 vec_oprnds0
.pop ());
9284 vec_rhs2
= vect_get_vec_def_for_stmt_copy (vinfo
,
9285 vec_oprnds1
.pop ());
9290 vec_oprnds0
.quick_push (vec_rhs1
);
9291 vec_oprnds1
.quick_push (vec_rhs2
);
9294 /* Arguments are ready. Create the new vector stmt. */
9295 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
9297 vec_rhs2
= vec_oprnds1
[i
];
9299 new_temp
= make_ssa_name (mask
);
9300 if (bitop1
== NOP_EXPR
)
9302 gassign
*new_stmt
= gimple_build_assign (new_temp
, code
,
9303 vec_rhs1
, vec_rhs2
);
9305 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9310 if (bitop1
== BIT_NOT_EXPR
)
9311 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
9313 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
9316 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9317 if (bitop2
!= NOP_EXPR
)
9319 tree res
= make_ssa_name (mask
);
9320 if (bitop2
== BIT_NOT_EXPR
)
9321 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
9323 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
9326 = vect_finish_stmt_generation (stmt_info
, new_stmt
, gsi
);
9330 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt_info
);
9337 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt_info
;
9339 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt_info
;
9341 prev_stmt_info
= new_stmt_info
;
9344 vec_oprnds0
.release ();
9345 vec_oprnds1
.release ();
9350 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
9351 can handle all live statements in the node. Otherwise return true
9352 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
9353 GSI and VEC_STMT are as for vectorizable_live_operation. */
9356 can_vectorize_live_stmts (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9357 slp_tree slp_node
, stmt_vec_info
*vec_stmt
,
9358 stmt_vector_for_cost
*cost_vec
)
9362 stmt_vec_info slp_stmt_info
;
9364 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
9366 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
9367 && !vectorizable_live_operation (slp_stmt_info
, gsi
, slp_node
, i
,
9368 vec_stmt
, cost_vec
))
9372 else if (STMT_VINFO_LIVE_P (stmt_info
)
9373 && !vectorizable_live_operation (stmt_info
, gsi
, slp_node
, -1,
9374 vec_stmt
, cost_vec
))
9380 /* Make sure the statement is vectorizable. */
9383 vect_analyze_stmt (stmt_vec_info stmt_info
, bool *need_to_vectorize
,
9384 slp_tree node
, slp_instance node_instance
,
9385 stmt_vector_for_cost
*cost_vec
)
9387 vec_info
*vinfo
= stmt_info
->vinfo
;
9388 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
9389 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
9391 gimple_seq pattern_def_seq
;
9393 if (dump_enabled_p ())
9394 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
9397 if (gimple_has_volatile_ops (stmt_info
->stmt
))
9398 return opt_result::failure_at (stmt_info
->stmt
,
9400 " stmt has volatile operands: %G\n",
9403 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9405 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
9407 gimple_stmt_iterator si
;
9409 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
9411 stmt_vec_info pattern_def_stmt_info
9412 = vinfo
->lookup_stmt (gsi_stmt (si
));
9413 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
9414 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
9416 /* Analyze def stmt of STMT if it's a pattern stmt. */
9417 if (dump_enabled_p ())
9418 dump_printf_loc (MSG_NOTE
, vect_location
,
9419 "==> examining pattern def statement: %G",
9420 pattern_def_stmt_info
->stmt
);
9423 = vect_analyze_stmt (pattern_def_stmt_info
,
9424 need_to_vectorize
, node
, node_instance
,
9432 /* Skip stmts that do not need to be vectorized. In loops this is expected
9434 - the COND_EXPR which is the loop exit condition
9435 - any LABEL_EXPRs in the loop
9436 - computations that are used only for array indexing or loop control.
9437 In basic blocks we only analyze statements that are a part of some SLP
9438 instance, therefore, all the statements are relevant.
9440 Pattern statement needs to be analyzed instead of the original statement
9441 if the original statement is not relevant. Otherwise, we analyze both
9442 statements. In basic blocks we are called from some SLP instance
9443 traversal, don't analyze pattern stmts instead, the pattern stmts
9444 already will be part of SLP instance. */
9446 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
9447 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
9448 && !STMT_VINFO_LIVE_P (stmt_info
))
9450 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9451 && pattern_stmt_info
9452 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9453 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9455 /* Analyze PATTERN_STMT instead of the original stmt. */
9456 stmt_info
= pattern_stmt_info
;
9457 if (dump_enabled_p ())
9458 dump_printf_loc (MSG_NOTE
, vect_location
,
9459 "==> examining pattern statement: %G",
9464 if (dump_enabled_p ())
9465 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
9467 return opt_result::success ();
9470 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
9472 && pattern_stmt_info
9473 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
9474 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
9476 /* Analyze PATTERN_STMT too. */
9477 if (dump_enabled_p ())
9478 dump_printf_loc (MSG_NOTE
, vect_location
,
9479 "==> examining pattern statement: %G",
9480 pattern_stmt_info
->stmt
);
9483 = vect_analyze_stmt (pattern_stmt_info
, need_to_vectorize
, node
,
9484 node_instance
, cost_vec
);
9489 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
9491 case vect_internal_def
:
9494 case vect_reduction_def
:
9495 case vect_nested_cycle
:
9496 gcc_assert (!bb_vinfo
9497 && (relevance
== vect_used_in_outer
9498 || relevance
== vect_used_in_outer_by_reduction
9499 || relevance
== vect_used_by_reduction
9500 || relevance
== vect_unused_in_scope
9501 || relevance
== vect_used_only_live
));
9504 case vect_induction_def
:
9505 gcc_assert (!bb_vinfo
);
9508 case vect_constant_def
:
9509 case vect_external_def
:
9510 case vect_unknown_def_type
:
9515 if (STMT_VINFO_RELEVANT_P (stmt_info
))
9517 tree type
= gimple_expr_type (stmt_info
->stmt
);
9518 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
9519 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
9520 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
9521 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
9522 *need_to_vectorize
= true;
9525 if (PURE_SLP_STMT (stmt_info
) && !node
)
9527 dump_printf_loc (MSG_NOTE
, vect_location
,
9528 "handled only by SLP analysis\n");
9529 return opt_result::success ();
9534 && (STMT_VINFO_RELEVANT_P (stmt_info
)
9535 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
9536 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
9537 -mveclibabi= takes preference over library functions with
9538 the simd attribute. */
9539 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9540 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9542 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9543 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9544 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9545 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9546 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9548 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9549 || vectorizable_reduction (stmt_info
, NULL
, NULL
, node
,
9550 node_instance
, cost_vec
)
9551 || vectorizable_induction (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9552 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9554 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9559 ok
= (vectorizable_call (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9560 || vectorizable_simd_clone_call (stmt_info
, NULL
, NULL
, node
,
9562 || vectorizable_conversion (stmt_info
, NULL
, NULL
, node
,
9564 || vectorizable_shift (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9565 || vectorizable_operation (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9566 || vectorizable_assignment (stmt_info
, NULL
, NULL
, node
,
9568 || vectorizable_load (stmt_info
, NULL
, NULL
, node
, node_instance
,
9570 || vectorizable_store (stmt_info
, NULL
, NULL
, node
, cost_vec
)
9571 || vectorizable_condition (stmt_info
, NULL
, NULL
, NULL
, 0, node
,
9573 || vectorizable_comparison (stmt_info
, NULL
, NULL
, NULL
, node
,
9578 return opt_result::failure_at (stmt_info
->stmt
,
9580 " relevant stmt not supported: %G",
9583 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
9584 need extra handling, except for vectorizable reductions. */
9586 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9587 && !can_vectorize_live_stmts (stmt_info
, NULL
, node
, NULL
, cost_vec
))
9588 return opt_result::failure_at (stmt_info
->stmt
,
9590 " live stmt not supported: %G",
9593 return opt_result::success ();
9597 /* Function vect_transform_stmt.
9599 Create a vectorized stmt to replace STMT_INFO, and insert it at BSI. */
9602 vect_transform_stmt (stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9603 slp_tree slp_node
, slp_instance slp_node_instance
)
9605 vec_info
*vinfo
= stmt_info
->vinfo
;
9606 bool is_store
= false;
9607 stmt_vec_info vec_stmt
= NULL
;
9610 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
9611 stmt_vec_info old_vec_stmt_info
= STMT_VINFO_VEC_STMT (stmt_info
);
9613 bool nested_p
= (STMT_VINFO_LOOP_VINFO (stmt_info
)
9614 && nested_in_vect_loop_p
9615 (LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info
)),
9618 gimple
*stmt
= stmt_info
->stmt
;
9619 switch (STMT_VINFO_TYPE (stmt_info
))
9621 case type_demotion_vec_info_type
:
9622 case type_promotion_vec_info_type
:
9623 case type_conversion_vec_info_type
:
9624 done
= vectorizable_conversion (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9629 case induc_vec_info_type
:
9630 done
= vectorizable_induction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9635 case shift_vec_info_type
:
9636 done
= vectorizable_shift (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9640 case op_vec_info_type
:
9641 done
= vectorizable_operation (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9646 case assignment_vec_info_type
:
9647 done
= vectorizable_assignment (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9652 case load_vec_info_type
:
9653 done
= vectorizable_load (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9654 slp_node_instance
, NULL
);
9658 case store_vec_info_type
:
9659 done
= vectorizable_store (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9661 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
9663 /* In case of interleaving, the whole chain is vectorized when the
9664 last store in the chain is reached. Store stmts before the last
9665 one are skipped, and there vec_stmt_info shouldn't be freed
9667 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
9668 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
9675 case condition_vec_info_type
:
9676 done
= vectorizable_condition (stmt_info
, gsi
, &vec_stmt
, NULL
, 0,
9681 case comparison_vec_info_type
:
9682 done
= vectorizable_comparison (stmt_info
, gsi
, &vec_stmt
, NULL
,
9687 case call_vec_info_type
:
9688 done
= vectorizable_call (stmt_info
, gsi
, &vec_stmt
, slp_node
, NULL
);
9689 stmt
= gsi_stmt (*gsi
);
9692 case call_simd_clone_vec_info_type
:
9693 done
= vectorizable_simd_clone_call (stmt_info
, gsi
, &vec_stmt
,
9695 stmt
= gsi_stmt (*gsi
);
9698 case reduc_vec_info_type
:
9699 done
= vectorizable_reduction (stmt_info
, gsi
, &vec_stmt
, slp_node
,
9700 slp_node_instance
, NULL
);
9705 if (!STMT_VINFO_LIVE_P (stmt_info
))
9707 if (dump_enabled_p ())
9708 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9709 "stmt not supported.\n");
9714 /* Verify SLP vectorization doesn't mess with STMT_VINFO_VEC_STMT.
9715 This would break hybrid SLP vectorization. */
9717 gcc_assert (!vec_stmt
9718 && STMT_VINFO_VEC_STMT (stmt_info
) == old_vec_stmt_info
);
9720 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
9721 is being vectorized, but outside the immediately enclosing loop. */
9724 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
9725 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
9726 || STMT_VINFO_RELEVANT (stmt_info
) ==
9727 vect_used_in_outer_by_reduction
))
9729 struct loop
*innerloop
= LOOP_VINFO_LOOP (
9730 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
9731 imm_use_iterator imm_iter
;
9732 use_operand_p use_p
;
9735 if (dump_enabled_p ())
9736 dump_printf_loc (MSG_NOTE
, vect_location
,
9737 "Record the vdef for outer-loop vectorization.\n");
9739 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
9740 (to be used when vectorizing outer-loop stmts that use the DEF of
9742 if (gimple_code (stmt
) == GIMPLE_PHI
)
9743 scalar_dest
= PHI_RESULT (stmt
);
9745 scalar_dest
= gimple_get_lhs (stmt
);
9747 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
9748 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
9750 stmt_vec_info exit_phi_info
9751 = vinfo
->lookup_stmt (USE_STMT (use_p
));
9752 STMT_VINFO_VEC_STMT (exit_phi_info
) = vec_stmt
;
9756 /* Handle stmts whose DEF is used outside the loop-nest that is
9757 being vectorized. */
9758 if (STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
9760 done
= can_vectorize_live_stmts (stmt_info
, gsi
, slp_node
, &vec_stmt
,
9766 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
9772 /* Remove a group of stores (for SLP or interleaving), free their
9776 vect_remove_stores (stmt_vec_info first_stmt_info
)
9778 vec_info
*vinfo
= first_stmt_info
->vinfo
;
9779 stmt_vec_info next_stmt_info
= first_stmt_info
;
9781 while (next_stmt_info
)
9783 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
9784 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
9785 /* Free the attached stmt_vec_info and remove the stmt. */
9786 vinfo
->remove_stmt (next_stmt_info
);
9787 next_stmt_info
= tmp
;
9791 /* Function get_vectype_for_scalar_type_and_size.
9793 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
9797 get_vectype_for_scalar_type_and_size (tree scalar_type
, poly_uint64 size
)
9799 tree orig_scalar_type
= scalar_type
;
9800 scalar_mode inner_mode
;
9801 machine_mode simd_mode
;
9805 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
9806 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
9809 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
9811 /* For vector types of elements whose mode precision doesn't
9812 match their types precision we use a element type of mode
9813 precision. The vectorization routines will have to make sure
9814 they support the proper result truncation/extension.
9815 We also make sure to build vector types with INTEGER_TYPE
9816 component type only. */
9817 if (INTEGRAL_TYPE_P (scalar_type
)
9818 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
9819 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
9820 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
9821 TYPE_UNSIGNED (scalar_type
));
9823 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
9824 When the component mode passes the above test simply use a type
9825 corresponding to that mode. The theory is that any use that
9826 would cause problems with this will disable vectorization anyway. */
9827 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
9828 && !INTEGRAL_TYPE_P (scalar_type
))
9829 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
9831 /* We can't build a vector type of elements with alignment bigger than
9833 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
9834 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
9835 TYPE_UNSIGNED (scalar_type
));
9837 /* If we felt back to using the mode fail if there was
9838 no scalar type for it. */
9839 if (scalar_type
== NULL_TREE
)
9842 /* If no size was supplied use the mode the target prefers. Otherwise
9843 lookup a vector mode of the specified size. */
9844 if (known_eq (size
, 0U))
9845 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
9846 else if (!multiple_p (size
, nbytes
, &nunits
)
9847 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
9849 /* NOTE: nunits == 1 is allowed to support single element vector types. */
9850 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
))
9853 vectype
= build_vector_type (scalar_type
, nunits
);
9855 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
9856 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
9859 /* Re-attach the address-space qualifier if we canonicalized the scalar
9861 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
9862 return build_qualified_type
9863 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
9868 poly_uint64 current_vector_size
;
9870 /* Function get_vectype_for_scalar_type.
9872 Returns the vector type corresponding to SCALAR_TYPE as supported
9876 get_vectype_for_scalar_type (tree scalar_type
)
9879 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
9880 current_vector_size
);
9882 && known_eq (current_vector_size
, 0U))
9883 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
9887 /* Function get_mask_type_for_scalar_type.
9889 Returns the mask type corresponding to a result of comparison
9890 of vectors of specified SCALAR_TYPE as supported by target. */
9893 get_mask_type_for_scalar_type (tree scalar_type
)
9895 tree vectype
= get_vectype_for_scalar_type (scalar_type
);
9900 return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype
),
9901 current_vector_size
);
9904 /* Function get_same_sized_vectype
9906 Returns a vector type corresponding to SCALAR_TYPE of size
9907 VECTOR_TYPE if supported by the target. */
9910 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
9912 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9913 return build_same_sized_truth_vector_type (vector_type
);
9915 return get_vectype_for_scalar_type_and_size
9916 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
9919 /* Function vect_is_simple_use.
9922 VINFO - the vect info of the loop or basic block that is being vectorized.
9923 OPERAND - operand in the loop or bb.
9925 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
9926 case OPERAND is an SSA_NAME that is defined in the vectorizable region
9927 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
9928 the definition could be anywhere in the function
9929 DT - the type of definition
9931 Returns whether a stmt with OPERAND can be vectorized.
9932 For loops, supportable operands are constants, loop invariants, and operands
9933 that are defined by the current iteration of the loop. Unsupportable
9934 operands are those that are defined by a previous iteration of the loop (as
9935 is the case in reduction/induction computations).
9936 For basic blocks, supportable operands are constants and bb invariants.
9937 For now, operands defined outside the basic block are not supported. */
9940 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
9941 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
9943 if (def_stmt_info_out
)
9944 *def_stmt_info_out
= NULL
;
9946 *def_stmt_out
= NULL
;
9947 *dt
= vect_unknown_def_type
;
9949 if (dump_enabled_p ())
9951 dump_printf_loc (MSG_NOTE
, vect_location
,
9952 "vect_is_simple_use: operand ");
9953 if (TREE_CODE (operand
) == SSA_NAME
9954 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
9955 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
9957 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
9960 if (CONSTANT_CLASS_P (operand
))
9961 *dt
= vect_constant_def
;
9962 else if (is_gimple_min_invariant (operand
))
9963 *dt
= vect_external_def
;
9964 else if (TREE_CODE (operand
) != SSA_NAME
)
9965 *dt
= vect_unknown_def_type
;
9966 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
9967 *dt
= vect_external_def
;
9970 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
9971 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
9973 *dt
= vect_external_def
;
9976 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
9977 def_stmt
= stmt_vinfo
->stmt
;
9978 switch (gimple_code (def_stmt
))
9983 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
9986 *dt
= vect_unknown_def_type
;
9989 if (def_stmt_info_out
)
9990 *def_stmt_info_out
= stmt_vinfo
;
9993 *def_stmt_out
= def_stmt
;
9996 if (dump_enabled_p ())
9998 dump_printf (MSG_NOTE
, ", type of def: ");
10001 case vect_uninitialized_def
:
10002 dump_printf (MSG_NOTE
, "uninitialized\n");
10004 case vect_constant_def
:
10005 dump_printf (MSG_NOTE
, "constant\n");
10007 case vect_external_def
:
10008 dump_printf (MSG_NOTE
, "external\n");
10010 case vect_internal_def
:
10011 dump_printf (MSG_NOTE
, "internal\n");
10013 case vect_induction_def
:
10014 dump_printf (MSG_NOTE
, "induction\n");
10016 case vect_reduction_def
:
10017 dump_printf (MSG_NOTE
, "reduction\n");
10019 case vect_double_reduction_def
:
10020 dump_printf (MSG_NOTE
, "double reduction\n");
10022 case vect_nested_cycle
:
10023 dump_printf (MSG_NOTE
, "nested cycle\n");
10025 case vect_unknown_def_type
:
10026 dump_printf (MSG_NOTE
, "unknown\n");
10031 if (*dt
== vect_unknown_def_type
)
10033 if (dump_enabled_p ())
10034 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10035 "Unsupported pattern.\n");
10042 /* Function vect_is_simple_use.
10044 Same as vect_is_simple_use but also determines the vector operand
10045 type of OPERAND and stores it to *VECTYPE. If the definition of
10046 OPERAND is vect_uninitialized_def, vect_constant_def or
10047 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
10048 is responsible to compute the best suited vector type for the
10052 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
10053 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
10054 gimple
**def_stmt_out
)
10056 stmt_vec_info def_stmt_info
;
10058 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
10062 *def_stmt_out
= def_stmt
;
10063 if (def_stmt_info_out
)
10064 *def_stmt_info_out
= def_stmt_info
;
10066 /* Now get a vector type if the def is internal, otherwise supply
10067 NULL_TREE and leave it up to the caller to figure out a proper
10068 type for the use stmt. */
10069 if (*dt
== vect_internal_def
10070 || *dt
== vect_induction_def
10071 || *dt
== vect_reduction_def
10072 || *dt
== vect_double_reduction_def
10073 || *dt
== vect_nested_cycle
)
10075 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
10076 gcc_assert (*vectype
!= NULL_TREE
);
10077 if (dump_enabled_p ())
10078 dump_printf_loc (MSG_NOTE
, vect_location
,
10079 "vect_is_simple_use: vectype %T\n", *vectype
);
10081 else if (*dt
== vect_uninitialized_def
10082 || *dt
== vect_constant_def
10083 || *dt
== vect_external_def
)
10084 *vectype
= NULL_TREE
;
10086 gcc_unreachable ();
10092 /* Function supportable_widening_operation
10094 Check whether an operation represented by the code CODE is a
10095 widening operation that is supported by the target platform in
10096 vector form (i.e., when operating on arguments of type VECTYPE_IN
10097 producing a result of type VECTYPE_OUT).
10099 Widening operations we currently support are NOP (CONVERT), FLOAT,
10100 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
10101 are supported by the target platform either directly (via vector
10102 tree-codes), or via target builtins.
10105 - CODE1 and CODE2 are codes of vector operations to be used when
10106 vectorizing the operation, if available.
10107 - MULTI_STEP_CVT determines the number of required intermediate steps in
10108 case of multi-step conversion (like char->short->int - in that case
10109 MULTI_STEP_CVT will be 1).
10110 - INTERM_TYPES contains the intermediate type required to perform the
10111 widening operation (short in the above example). */
10114 supportable_widening_operation (enum tree_code code
, stmt_vec_info stmt_info
,
10115 tree vectype_out
, tree vectype_in
,
10116 enum tree_code
*code1
, enum tree_code
*code2
,
10117 int *multi_step_cvt
,
10118 vec
<tree
> *interm_types
)
10120 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
10121 struct loop
*vect_loop
= NULL
;
10122 machine_mode vec_mode
;
10123 enum insn_code icode1
, icode2
;
10124 optab optab1
, optab2
;
10125 tree vectype
= vectype_in
;
10126 tree wide_vectype
= vectype_out
;
10127 enum tree_code c1
, c2
;
10129 tree prev_type
, intermediate_type
;
10130 machine_mode intermediate_mode
, prev_mode
;
10131 optab optab3
, optab4
;
10133 *multi_step_cvt
= 0;
10135 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
10139 case WIDEN_MULT_EXPR
:
10140 /* The result of a vectorized widening operation usually requires
10141 two vectors (because the widened results do not fit into one vector).
10142 The generated vector results would normally be expected to be
10143 generated in the same order as in the original scalar computation,
10144 i.e. if 8 results are generated in each vector iteration, they are
10145 to be organized as follows:
10146 vect1: [res1,res2,res3,res4],
10147 vect2: [res5,res6,res7,res8].
10149 However, in the special case that the result of the widening
10150 operation is used in a reduction computation only, the order doesn't
10151 matter (because when vectorizing a reduction we change the order of
10152 the computation). Some targets can take advantage of this and
10153 generate more efficient code. For example, targets like Altivec,
10154 that support widen_mult using a sequence of {mult_even,mult_odd}
10155 generate the following vectors:
10156 vect1: [res1,res3,res5,res7],
10157 vect2: [res2,res4,res6,res8].
10159 When vectorizing outer-loops, we execute the inner-loop sequentially
10160 (each vectorized inner-loop iteration contributes to VF outer-loop
10161 iterations in parallel). We therefore don't allow to change the
10162 order of the computation in the inner-loop during outer-loop
10164 /* TODO: Another case in which order doesn't *really* matter is when we
10165 widen and then contract again, e.g. (short)((int)x * y >> 8).
10166 Normally, pack_trunc performs an even/odd permute, whereas the
10167 repack from an even/odd expansion would be an interleave, which
10168 would be significantly simpler for e.g. AVX2. */
10169 /* In any case, in order to avoid duplicating the code below, recurse
10170 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
10171 are properly set up for the caller. If we fail, we'll continue with
10172 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
10174 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
10175 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
10176 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
10177 stmt_info
, vectype_out
,
10178 vectype_in
, code1
, code2
,
10179 multi_step_cvt
, interm_types
))
10181 /* Elements in a vector with vect_used_by_reduction property cannot
10182 be reordered if the use chain with this property does not have the
10183 same operation. One such an example is s += a * b, where elements
10184 in a and b cannot be reordered. Here we check if the vector defined
10185 by STMT is only directly used in the reduction statement. */
10186 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
10187 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
10189 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
10192 c1
= VEC_WIDEN_MULT_LO_EXPR
;
10193 c2
= VEC_WIDEN_MULT_HI_EXPR
;
10196 case DOT_PROD_EXPR
:
10197 c1
= DOT_PROD_EXPR
;
10198 c2
= DOT_PROD_EXPR
;
10206 case VEC_WIDEN_MULT_EVEN_EXPR
:
10207 /* Support the recursion induced just above. */
10208 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
10209 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
10212 case WIDEN_LSHIFT_EXPR
:
10213 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
10214 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
10218 c1
= VEC_UNPACK_LO_EXPR
;
10219 c2
= VEC_UNPACK_HI_EXPR
;
10223 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
10224 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
10227 case FIX_TRUNC_EXPR
:
10228 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
10229 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
10233 gcc_unreachable ();
10236 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
10237 std::swap (c1
, c2
);
10239 if (code
== FIX_TRUNC_EXPR
)
10241 /* The signedness is determined from output operand. */
10242 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10243 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
10247 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10248 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
10251 if (!optab1
|| !optab2
)
10254 vec_mode
= TYPE_MODE (vectype
);
10255 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
10256 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
10262 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10263 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10264 /* For scalar masks we may have different boolean
10265 vector types having the same QImode. Thus we
10266 add additional check for elements number. */
10267 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10268 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
10269 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10271 /* Check if it's a multi-step conversion that can be done using intermediate
10274 prev_type
= vectype
;
10275 prev_mode
= vec_mode
;
10277 if (!CONVERT_EXPR_CODE_P (code
))
10280 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10281 intermediate steps in promotion sequence. We try
10282 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
10284 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10285 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10287 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10288 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10290 intermediate_type
= vect_halve_mask_nunits (prev_type
);
10291 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10296 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
10297 TYPE_UNSIGNED (prev_type
));
10299 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10300 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
10302 if (!optab3
|| !optab4
10303 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
10304 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10305 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
10306 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
10307 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
10308 == CODE_FOR_nothing
)
10309 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
10310 == CODE_FOR_nothing
))
10313 interm_types
->quick_push (intermediate_type
);
10314 (*multi_step_cvt
)++;
10316 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
10317 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
10318 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10319 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
10320 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2));
10322 prev_type
= intermediate_type
;
10323 prev_mode
= intermediate_mode
;
10326 interm_types
->release ();
10331 /* Function supportable_narrowing_operation
10333 Check whether an operation represented by the code CODE is a
10334 narrowing operation that is supported by the target platform in
10335 vector form (i.e., when operating on arguments of type VECTYPE_IN
10336 and producing a result of type VECTYPE_OUT).
10338 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
10339 and FLOAT. This function checks if these operations are supported by
10340 the target platform directly via vector tree-codes.
10343 - CODE1 is the code of a vector operation to be used when
10344 vectorizing the operation, if available.
10345 - MULTI_STEP_CVT determines the number of required intermediate steps in
10346 case of multi-step conversion (like int->short->char - in that case
10347 MULTI_STEP_CVT will be 1).
10348 - INTERM_TYPES contains the intermediate type required to perform the
10349 narrowing operation (short in the above example). */
10352 supportable_narrowing_operation (enum tree_code code
,
10353 tree vectype_out
, tree vectype_in
,
10354 enum tree_code
*code1
, int *multi_step_cvt
,
10355 vec
<tree
> *interm_types
)
10357 machine_mode vec_mode
;
10358 enum insn_code icode1
;
10359 optab optab1
, interm_optab
;
10360 tree vectype
= vectype_in
;
10361 tree narrow_vectype
= vectype_out
;
10363 tree intermediate_type
, prev_type
;
10364 machine_mode intermediate_mode
, prev_mode
;
10368 *multi_step_cvt
= 0;
10372 c1
= VEC_PACK_TRUNC_EXPR
;
10375 case FIX_TRUNC_EXPR
:
10376 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
10380 c1
= VEC_PACK_FLOAT_EXPR
;
10384 gcc_unreachable ();
10387 if (code
== FIX_TRUNC_EXPR
)
10388 /* The signedness is determined from output operand. */
10389 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
10391 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
10396 vec_mode
= TYPE_MODE (vectype
);
10397 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
10402 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10403 /* For scalar masks we may have different boolean
10404 vector types having the same QImode. Thus we
10405 add additional check for elements number. */
10406 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10407 || known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
10408 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10410 if (code
== FLOAT_EXPR
)
10413 /* Check if it's a multi-step conversion that can be done using intermediate
10415 prev_mode
= vec_mode
;
10416 prev_type
= vectype
;
10417 if (code
== FIX_TRUNC_EXPR
)
10418 uns
= TYPE_UNSIGNED (vectype_out
);
10420 uns
= TYPE_UNSIGNED (vectype
);
10422 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
10423 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
10424 costly than signed. */
10425 if (code
== FIX_TRUNC_EXPR
&& uns
)
10427 enum insn_code icode2
;
10430 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
10432 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
10433 if (interm_optab
!= unknown_optab
10434 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
10435 && insn_data
[icode1
].operand
[0].mode
10436 == insn_data
[icode2
].operand
[0].mode
)
10439 optab1
= interm_optab
;
10444 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
10445 intermediate steps in promotion sequence. We try
10446 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
10447 interm_types
->create (MAX_INTERM_CVT_STEPS
);
10448 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
10450 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
10451 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
10453 intermediate_type
= vect_double_mask_nunits (prev_type
);
10454 if (intermediate_mode
!= TYPE_MODE (intermediate_type
))
10459 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
10461 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
10464 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
10465 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
10466 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
10467 == CODE_FOR_nothing
))
10470 interm_types
->quick_push (intermediate_type
);
10471 (*multi_step_cvt
)++;
10473 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
10474 return (!VECTOR_BOOLEAN_TYPE_P (vectype
)
10475 || known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
10476 TYPE_VECTOR_SUBPARTS (narrow_vectype
)));
10478 prev_mode
= intermediate_mode
;
10479 prev_type
= intermediate_type
;
10480 optab1
= interm_optab
;
10483 interm_types
->release ();
10487 /* Generate and return a statement that sets vector mask MASK such that
10488 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
10491 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
10493 tree cmp_type
= TREE_TYPE (start_index
);
10494 tree mask_type
= TREE_TYPE (mask
);
10495 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
10496 cmp_type
, mask_type
,
10497 OPTIMIZE_FOR_SPEED
));
10498 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
10499 start_index
, end_index
,
10500 build_zero_cst (mask_type
));
10501 gimple_call_set_lhs (call
, mask
);
10505 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
10506 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
10509 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
10512 tree tmp
= make_ssa_name (mask_type
);
10513 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
10514 gimple_seq_add_stmt (seq
, call
);
10515 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
10518 /* Try to compute the vector types required to vectorize STMT_INFO,
10519 returning true on success and false if vectorization isn't possible.
10523 - Set *STMT_VECTYPE_OUT to:
10524 - NULL_TREE if the statement doesn't need to be vectorized;
10525 - boolean_type_node if the statement is a boolean operation whose
10526 vector type can only be determined once all the other vector types
10528 - the equivalent of STMT_VINFO_VECTYPE otherwise.
10530 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
10531 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
10532 statement does not help to determine the overall number of units. */
10535 vect_get_vector_types_for_stmt (stmt_vec_info stmt_info
,
10536 tree
*stmt_vectype_out
,
10537 tree
*nunits_vectype_out
)
10539 gimple
*stmt
= stmt_info
->stmt
;
10541 *stmt_vectype_out
= NULL_TREE
;
10542 *nunits_vectype_out
= NULL_TREE
;
10544 if (gimple_get_lhs (stmt
) == NULL_TREE
10545 /* MASK_STORE has no lhs, but is ok. */
10546 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10548 if (is_a
<gcall
*> (stmt
))
10550 /* Ignore calls with no lhs. These must be calls to
10551 #pragma omp simd functions, and what vectorization factor
10552 it really needs can't be determined until
10553 vectorizable_simd_clone_call. */
10554 if (dump_enabled_p ())
10555 dump_printf_loc (MSG_NOTE
, vect_location
,
10556 "defer to SIMD clone analysis.\n");
10557 return opt_result::success ();
10560 return opt_result::failure_at (stmt
,
10561 "not vectorized: irregular stmt.%G", stmt
);
10564 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
10565 return opt_result::failure_at (stmt
,
10566 "not vectorized: vector stmt in loop:%G",
10570 tree scalar_type
= NULL_TREE
;
10571 if (STMT_VINFO_VECTYPE (stmt_info
))
10572 *stmt_vectype_out
= vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10575 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info
));
10576 if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
10577 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
10579 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
10581 /* Pure bool ops don't participate in number-of-units computation.
10582 For comparisons use the types being compared. */
10583 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
)
10584 && is_gimple_assign (stmt
)
10585 && gimple_assign_rhs_code (stmt
) != COND_EXPR
)
10587 *stmt_vectype_out
= boolean_type_node
;
10589 tree rhs1
= gimple_assign_rhs1 (stmt
);
10590 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10591 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10592 scalar_type
= TREE_TYPE (rhs1
);
10595 if (dump_enabled_p ())
10596 dump_printf_loc (MSG_NOTE
, vect_location
,
10597 "pure bool operation.\n");
10598 return opt_result::success ();
10602 if (dump_enabled_p ())
10603 dump_printf_loc (MSG_NOTE
, vect_location
,
10604 "get vectype for scalar type: %T\n", scalar_type
);
10605 vectype
= get_vectype_for_scalar_type (scalar_type
);
10607 return opt_result::failure_at (stmt
,
10609 " unsupported data-type %T\n",
10612 if (!*stmt_vectype_out
)
10613 *stmt_vectype_out
= vectype
;
10615 if (dump_enabled_p ())
10616 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
10619 /* Don't try to compute scalar types if the stmt produces a boolean
10620 vector; use the existing vector type instead. */
10621 tree nunits_vectype
;
10622 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10623 nunits_vectype
= vectype
;
10626 /* The number of units is set according to the smallest scalar
10627 type (or the largest vector size, but we only support one
10628 vector size per vectorization). */
10629 if (*stmt_vectype_out
!= boolean_type_node
)
10631 HOST_WIDE_INT dummy
;
10632 scalar_type
= vect_get_smallest_scalar_type (stmt_info
,
10635 if (dump_enabled_p ())
10636 dump_printf_loc (MSG_NOTE
, vect_location
,
10637 "get vectype for scalar type: %T\n", scalar_type
);
10638 nunits_vectype
= get_vectype_for_scalar_type (scalar_type
);
10640 if (!nunits_vectype
)
10641 return opt_result::failure_at (stmt
,
10642 "not vectorized: unsupported data-type %T\n",
10645 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
10646 GET_MODE_SIZE (TYPE_MODE (nunits_vectype
))))
10647 return opt_result::failure_at (stmt
,
10648 "not vectorized: different sized vector "
10649 "types in statement, %T and %T\n",
10650 vectype
, nunits_vectype
);
10652 if (dump_enabled_p ())
10654 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n",
10657 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
10658 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
10659 dump_printf (MSG_NOTE
, "\n");
10662 *nunits_vectype_out
= nunits_vectype
;
10663 return opt_result::success ();
10666 /* Try to determine the correct vector type for STMT_INFO, which is a
10667 statement that produces a scalar boolean result. Return the vector
10668 type on success, otherwise return NULL_TREE. */
10671 vect_get_mask_type_for_stmt (stmt_vec_info stmt_info
)
10673 gimple
*stmt
= stmt_info
->stmt
;
10674 tree mask_type
= NULL
;
10675 tree vectype
, scalar_type
;
10677 if (is_gimple_assign (stmt
)
10678 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
10679 && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt
))))
10681 scalar_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
10682 mask_type
= get_mask_type_for_scalar_type (scalar_type
);
10685 return opt_tree::failure_at (stmt
,
10686 "not vectorized: unsupported mask\n");
10692 enum vect_def_type dt
;
10694 FOR_EACH_SSA_TREE_OPERAND (rhs
, stmt
, iter
, SSA_OP_USE
)
10696 if (!vect_is_simple_use (rhs
, stmt_info
->vinfo
, &dt
, &vectype
))
10697 return opt_tree::failure_at (stmt
,
10698 "not vectorized:can't compute mask"
10699 " type for statement, %G", stmt
);
10701 /* No vectype probably means external definition.
10702 Allow it in case there is another operand which
10703 allows to determine mask type. */
10708 mask_type
= vectype
;
10709 else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type
),
10710 TYPE_VECTOR_SUBPARTS (vectype
)))
10711 return opt_tree::failure_at (stmt
,
10712 "not vectorized: different sized mask"
10713 " types in statement, %T and %T\n",
10714 mask_type
, vectype
);
10715 else if (VECTOR_BOOLEAN_TYPE_P (mask_type
)
10716 != VECTOR_BOOLEAN_TYPE_P (vectype
))
10717 return opt_tree::failure_at (stmt
,
10718 "not vectorized: mixed mask and "
10719 "nonmask vector types in statement, "
10721 mask_type
, vectype
);
10724 /* We may compare boolean value loaded as vector of integers.
10725 Fix mask_type in such case. */
10727 && !VECTOR_BOOLEAN_TYPE_P (mask_type
)
10728 && gimple_code (stmt
) == GIMPLE_ASSIGN
10729 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt
)) == tcc_comparison
)
10730 mask_type
= build_same_sized_truth_vector_type (mask_type
);
10733 /* No mask_type should mean loop invariant predicate.
10734 This is probably a subject for optimization in if-conversion. */
10736 return opt_tree::failure_at (stmt
,
10737 "not vectorized: can't compute mask type "
10738 "for statement: %G", stmt
);
10740 return opt_tree::success (mask_type
);