1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
31 #include "optabs-tree.h"
32 #include "insn-config.h"
33 #include "recog.h" /* FIXME: for insn_data */
37 #include "fold-const.h"
38 #include "stor-layout.h"
41 #include "gimple-iterator.h"
42 #include "gimplify-me.h"
44 #include "tree-ssa-loop-manip.h"
47 #include "tree-ssa-loop.h"
48 #include "tree-scalar-evolution.h"
49 #include "tree-vectorizer.h"
51 #include "internal-fn.h"
52 #include "tree-vector-builder.h"
53 #include "vec-perm-indices.h"
54 #include "tree-ssa-loop-niter.h"
55 #include "gimple-fold.h"
59 /* For lang_hooks.types.type_for_mode. */
60 #include "langhooks.h"
62 /* Return the vectorized type for the given statement. */
65 stmt_vectype (class _stmt_vec_info
*stmt_info
)
67 return STMT_VINFO_VECTYPE (stmt_info
);
70 /* Return TRUE iff the given statement is in an inner loop relative to
71 the loop being vectorized. */
73 stmt_in_inner_loop_p (vec_info
*vinfo
, class _stmt_vec_info
*stmt_info
)
75 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
76 basic_block bb
= gimple_bb (stmt
);
77 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
83 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
85 return (bb
->loop_father
== loop
->inner
);
88 /* Record the cost of a statement, either by directly informing the
89 target model or by saving it in a vector for later processing.
90 Return a preliminary estimate of the statement's cost. */
93 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
94 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
95 tree vectype
, int misalign
,
96 enum vect_cost_model_location where
)
98 if ((kind
== vector_load
|| kind
== unaligned_load
)
99 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
100 kind
= vector_gather_load
;
101 if ((kind
== vector_store
|| kind
== unaligned_store
)
102 && (stmt_info
&& STMT_VINFO_GATHER_SCATTER_P (stmt_info
)))
103 kind
= vector_scatter_store
;
105 stmt_info_for_cost si
= { count
, kind
, where
, stmt_info
, vectype
, misalign
};
106 body_cost_vec
->safe_push (si
);
109 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
112 /* Return a variable of type ELEM_TYPE[NELEMS]. */
115 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
117 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
121 /* ARRAY is an array of vectors created by create_vector_array.
122 Return an SSA_NAME for the vector in index N. The reference
123 is part of the vectorization of STMT_INFO and the vector is associated
124 with scalar destination SCALAR_DEST. */
127 read_vector_array (vec_info
*vinfo
,
128 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
129 tree scalar_dest
, tree array
, unsigned HOST_WIDE_INT n
)
131 tree vect_type
, vect
, vect_name
, array_ref
;
134 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
135 vect_type
= TREE_TYPE (TREE_TYPE (array
));
136 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
137 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
138 build_int_cst (size_type_node
, n
),
139 NULL_TREE
, NULL_TREE
);
141 new_stmt
= gimple_build_assign (vect
, array_ref
);
142 vect_name
= make_ssa_name (vect
, new_stmt
);
143 gimple_assign_set_lhs (new_stmt
, vect_name
);
144 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
149 /* ARRAY is an array of vectors created by create_vector_array.
150 Emit code to store SSA_NAME VECT in index N of the array.
151 The store is part of the vectorization of STMT_INFO. */
154 write_vector_array (vec_info
*vinfo
,
155 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
156 tree vect
, tree array
, unsigned HOST_WIDE_INT n
)
161 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
162 build_int_cst (size_type_node
, n
),
163 NULL_TREE
, NULL_TREE
);
165 new_stmt
= gimple_build_assign (array_ref
, vect
);
166 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
169 /* PTR is a pointer to an array of type TYPE. Return a representation
170 of *PTR. The memory reference replaces those in FIRST_DR
174 create_array_ref (tree type
, tree ptr
, tree alias_ptr_type
)
178 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
179 /* Arrays have the same alignment as their type. */
180 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
184 /* Add a clobber of variable VAR to the vectorization of STMT_INFO.
185 Emit the clobber before *GSI. */
188 vect_clobber_variable (vec_info
*vinfo
, stmt_vec_info stmt_info
,
189 gimple_stmt_iterator
*gsi
, tree var
)
191 tree clobber
= build_clobber (TREE_TYPE (var
));
192 gimple
*new_stmt
= gimple_build_assign (var
, clobber
);
193 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
196 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
198 /* Function vect_mark_relevant.
200 Mark STMT_INFO as "relevant for vectorization" and add it to WORKLIST. */
203 vect_mark_relevant (vec
<stmt_vec_info
> *worklist
, stmt_vec_info stmt_info
,
204 enum vect_relevant relevant
, bool live_p
)
206 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
207 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
209 if (dump_enabled_p ())
210 dump_printf_loc (MSG_NOTE
, vect_location
,
211 "mark relevant %d, live %d: %G", relevant
, live_p
,
214 /* If this stmt is an original stmt in a pattern, we might need to mark its
215 related pattern stmt instead of the original stmt. However, such stmts
216 may have their own uses that are not in any pattern, in such cases the
217 stmt itself should be marked. */
218 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
220 /* This is the last stmt in a sequence that was detected as a
221 pattern that can potentially be vectorized. Don't mark the stmt
222 as relevant/live because it's not going to be vectorized.
223 Instead mark the pattern-stmt that replaces it. */
225 if (dump_enabled_p ())
226 dump_printf_loc (MSG_NOTE
, vect_location
,
227 "last stmt in pattern. don't mark"
228 " relevant/live.\n");
229 stmt_vec_info old_stmt_info
= stmt_info
;
230 stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
231 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == old_stmt_info
);
232 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
233 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
236 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
237 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
238 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
240 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
241 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
243 if (dump_enabled_p ())
244 dump_printf_loc (MSG_NOTE
, vect_location
,
245 "already marked relevant/live.\n");
249 worklist
->safe_push (stmt_info
);
253 /* Function is_simple_and_all_uses_invariant
255 Return true if STMT_INFO is simple and all uses of it are invariant. */
258 is_simple_and_all_uses_invariant (stmt_vec_info stmt_info
,
259 loop_vec_info loop_vinfo
)
264 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
268 FOR_EACH_SSA_TREE_OPERAND (op
, stmt
, iter
, SSA_OP_USE
)
270 enum vect_def_type dt
= vect_uninitialized_def
;
272 if (!vect_is_simple_use (op
, loop_vinfo
, &dt
))
274 if (dump_enabled_p ())
275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
276 "use not simple.\n");
280 if (dt
!= vect_external_def
&& dt
!= vect_constant_def
)
286 /* Function vect_stmt_relevant_p.
288 Return true if STMT_INFO, in the loop that is represented by LOOP_VINFO,
289 is "relevant for vectorization".
291 A stmt is considered "relevant for vectorization" if:
292 - it has uses outside the loop.
293 - it has vdefs (it alters memory).
294 - control stmts in the loop (except for the exit condition).
296 CHECKME: what other side effects would the vectorizer allow? */
299 vect_stmt_relevant_p (stmt_vec_info stmt_info
, loop_vec_info loop_vinfo
,
300 enum vect_relevant
*relevant
, bool *live_p
)
302 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
304 imm_use_iterator imm_iter
;
308 *relevant
= vect_unused_in_scope
;
311 /* cond stmt other than loop exit cond. */
312 if (is_ctrl_stmt (stmt_info
->stmt
)
313 && STMT_VINFO_TYPE (stmt_info
) != loop_exit_ctrl_vec_info_type
)
314 *relevant
= vect_used_in_scope
;
316 /* changing memory. */
317 if (gimple_code (stmt_info
->stmt
) != GIMPLE_PHI
)
318 if (gimple_vdef (stmt_info
->stmt
)
319 && !gimple_clobber_p (stmt_info
->stmt
))
321 if (dump_enabled_p ())
322 dump_printf_loc (MSG_NOTE
, vect_location
,
323 "vec_stmt_relevant_p: stmt has vdefs.\n");
324 *relevant
= vect_used_in_scope
;
327 /* uses outside the loop. */
328 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt_info
->stmt
, op_iter
, SSA_OP_DEF
)
330 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
332 basic_block bb
= gimple_bb (USE_STMT (use_p
));
333 if (!flow_bb_inside_loop_p (loop
, bb
))
335 if (is_gimple_debug (USE_STMT (use_p
)))
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_NOTE
, vect_location
,
340 "vec_stmt_relevant_p: used out of loop.\n");
342 /* We expect all such uses to be in the loop exit phis
343 (because of loop closed form) */
344 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
345 gcc_assert (bb
== single_exit (loop
)->dest
);
352 if (*live_p
&& *relevant
== vect_unused_in_scope
353 && !is_simple_and_all_uses_invariant (stmt_info
, loop_vinfo
))
355 if (dump_enabled_p ())
356 dump_printf_loc (MSG_NOTE
, vect_location
,
357 "vec_stmt_relevant_p: stmt live but not relevant.\n");
358 *relevant
= vect_used_only_live
;
361 return (*live_p
|| *relevant
);
365 /* Function exist_non_indexing_operands_for_use_p
367 USE is one of the uses attached to STMT_INFO. Check if USE is
368 used in STMT_INFO for anything other than indexing an array. */
371 exist_non_indexing_operands_for_use_p (tree use
, stmt_vec_info stmt_info
)
375 /* USE corresponds to some operand in STMT. If there is no data
376 reference in STMT, then any operand that corresponds to USE
377 is not indexing an array. */
378 if (!STMT_VINFO_DATA_REF (stmt_info
))
381 /* STMT has a data_ref. FORNOW this means that its of one of
385 (This should have been verified in analyze_data_refs).
387 'var' in the second case corresponds to a def, not a use,
388 so USE cannot correspond to any operands that are not used
391 Therefore, all we need to check is if STMT falls into the
392 first case, and whether var corresponds to USE. */
394 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
395 if (!assign
|| !gimple_assign_copy_p (assign
))
397 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
398 if (call
&& gimple_call_internal_p (call
))
400 internal_fn ifn
= gimple_call_internal_fn (call
);
401 int mask_index
= internal_fn_mask_index (ifn
);
403 && use
== gimple_call_arg (call
, mask_index
))
405 int stored_value_index
= internal_fn_stored_value_index (ifn
);
406 if (stored_value_index
>= 0
407 && use
== gimple_call_arg (call
, stored_value_index
))
409 if (internal_gather_scatter_fn_p (ifn
)
410 && use
== gimple_call_arg (call
, 1))
416 if (TREE_CODE (gimple_assign_lhs (assign
)) == SSA_NAME
)
418 operand
= gimple_assign_rhs1 (assign
);
419 if (TREE_CODE (operand
) != SSA_NAME
)
430 Function process_use.
433 - a USE in STMT_VINFO in a loop represented by LOOP_VINFO
434 - RELEVANT - enum value to be set in the STMT_VINFO of the stmt
435 that defined USE. This is done by calling mark_relevant and passing it
436 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
437 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
441 Generally, LIVE_P and RELEVANT are used to define the liveness and
442 relevance info of the DEF_STMT of this USE:
443 STMT_VINFO_LIVE_P (DEF_stmt_vinfo) <-- live_p
444 STMT_VINFO_RELEVANT (DEF_stmt_vinfo) <-- relevant
446 - case 1: If USE is used only for address computations (e.g. array indexing),
447 which does not need to be directly vectorized, then the liveness/relevance
448 of the respective DEF_STMT is left unchanged.
449 - case 2: If STMT_VINFO is a reduction phi and DEF_STMT is a reduction stmt,
450 we skip DEF_STMT cause it had already been processed.
451 - case 3: If DEF_STMT and STMT_VINFO are in different nests, then
452 "relevant" will be modified accordingly.
454 Return true if everything is as expected. Return false otherwise. */
457 process_use (stmt_vec_info stmt_vinfo
, tree use
, loop_vec_info loop_vinfo
,
458 enum vect_relevant relevant
, vec
<stmt_vec_info
> *worklist
,
461 stmt_vec_info dstmt_vinfo
;
462 enum vect_def_type dt
;
464 /* case 1: we are only interested in uses that need to be vectorized. Uses
465 that are used for address computation are not considered relevant. */
466 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt_vinfo
))
467 return opt_result::success ();
469 if (!vect_is_simple_use (use
, loop_vinfo
, &dt
, &dstmt_vinfo
))
470 return opt_result::failure_at (stmt_vinfo
->stmt
,
472 " unsupported use in stmt.\n");
475 return opt_result::success ();
477 basic_block def_bb
= gimple_bb (dstmt_vinfo
->stmt
);
478 basic_block bb
= gimple_bb (stmt_vinfo
->stmt
);
480 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
481 We have to force the stmt live since the epilogue loop needs it to
482 continue computing the reduction. */
483 if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
484 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
485 && gimple_code (dstmt_vinfo
->stmt
) != GIMPLE_PHI
486 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
487 && bb
->loop_father
== def_bb
->loop_father
)
489 if (dump_enabled_p ())
490 dump_printf_loc (MSG_NOTE
, vect_location
,
491 "reduc-stmt defining reduc-phi in the same nest.\n");
492 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, true);
493 return opt_result::success ();
496 /* case 3a: outer-loop stmt defining an inner-loop stmt:
497 outer-loop-header-bb:
503 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
505 if (dump_enabled_p ())
506 dump_printf_loc (MSG_NOTE
, vect_location
,
507 "outer-loop def-stmt defining inner-loop stmt.\n");
511 case vect_unused_in_scope
:
512 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
513 vect_used_in_scope
: vect_unused_in_scope
;
516 case vect_used_in_outer_by_reduction
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_by_reduction
;
521 case vect_used_in_outer
:
522 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
523 relevant
= vect_used_in_scope
;
526 case vect_used_in_scope
:
534 /* case 3b: inner-loop stmt defining an outer-loop stmt:
535 outer-loop-header-bb:
539 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
541 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
543 if (dump_enabled_p ())
544 dump_printf_loc (MSG_NOTE
, vect_location
,
545 "inner-loop def-stmt defining outer-loop stmt.\n");
549 case vect_unused_in_scope
:
550 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
551 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
552 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
555 case vect_used_by_reduction
:
556 case vect_used_only_live
:
557 relevant
= vect_used_in_outer_by_reduction
;
560 case vect_used_in_scope
:
561 relevant
= vect_used_in_outer
;
568 /* We are also not interested in uses on loop PHI backedges that are
569 inductions. Otherwise we'll needlessly vectorize the IV increment
570 and cause hybrid SLP for SLP inductions. Unless the PHI is live
572 else if (gimple_code (stmt_vinfo
->stmt
) == GIMPLE_PHI
573 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_induction_def
574 && ! STMT_VINFO_LIVE_P (stmt_vinfo
)
575 && (PHI_ARG_DEF_FROM_EDGE (stmt_vinfo
->stmt
,
576 loop_latch_edge (bb
->loop_father
))
579 if (dump_enabled_p ())
580 dump_printf_loc (MSG_NOTE
, vect_location
,
581 "induction value on backedge.\n");
582 return opt_result::success ();
586 vect_mark_relevant (worklist
, dstmt_vinfo
, relevant
, false);
587 return opt_result::success ();
591 /* Function vect_mark_stmts_to_be_vectorized.
593 Not all stmts in the loop need to be vectorized. For example:
602 Stmt 1 and 3 do not need to be vectorized, because loop control and
603 addressing of vectorized data-refs are handled differently.
605 This pass detects such stmts. */
608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
, bool *fatal
)
610 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
611 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
612 unsigned int nbbs
= loop
->num_nodes
;
613 gimple_stmt_iterator si
;
617 enum vect_relevant relevant
;
619 DUMP_VECT_SCOPE ("vect_mark_stmts_to_be_vectorized");
621 auto_vec
<stmt_vec_info
, 64> worklist
;
623 /* 1. Init worklist. */
624 for (i
= 0; i
< nbbs
; i
++)
627 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
629 stmt_vec_info phi_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
630 if (dump_enabled_p ())
631 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? %G",
634 if (vect_stmt_relevant_p (phi_info
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, phi_info
, relevant
, live_p
);
637 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
639 stmt_vec_info stmt_info
= loop_vinfo
->lookup_stmt (gsi_stmt (si
));
640 if (dump_enabled_p ())
641 dump_printf_loc (MSG_NOTE
, vect_location
,
642 "init: stmt relevant? %G", stmt_info
->stmt
);
644 if (vect_stmt_relevant_p (stmt_info
, loop_vinfo
, &relevant
, &live_p
))
645 vect_mark_relevant (&worklist
, stmt_info
, relevant
, live_p
);
649 /* 2. Process_worklist */
650 while (worklist
.length () > 0)
655 stmt_vec_info stmt_vinfo
= worklist
.pop ();
656 if (dump_enabled_p ())
657 dump_printf_loc (MSG_NOTE
, vect_location
,
658 "worklist: examine stmt: %G", stmt_vinfo
->stmt
);
660 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
661 (DEF_STMT) as relevant/irrelevant according to the relevance property
663 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
665 /* Generally, the relevance property of STMT (in STMT_VINFO_RELEVANT) is
666 propagated as is to the DEF_STMTs of its USEs.
668 One exception is when STMT has been identified as defining a reduction
669 variable; in this case we set the relevance to vect_used_by_reduction.
670 This is because we distinguish between two kinds of relevant stmts -
671 those that are used by a reduction computation, and those that are
672 (also) used by a regular computation. This allows us later on to
673 identify stmts that are used solely by a reduction, and therefore the
674 order of the results that they produce does not have to be kept. */
676 switch (STMT_VINFO_DEF_TYPE (stmt_vinfo
))
678 case vect_reduction_def
:
679 gcc_assert (relevant
!= vect_unused_in_scope
);
680 if (relevant
!= vect_unused_in_scope
681 && relevant
!= vect_used_in_scope
682 && relevant
!= vect_used_by_reduction
683 && relevant
!= vect_used_only_live
)
684 return opt_result::failure_at
685 (stmt_vinfo
->stmt
, "unsupported use of reduction.\n");
688 case vect_nested_cycle
:
689 if (relevant
!= vect_unused_in_scope
690 && relevant
!= vect_used_in_outer_by_reduction
691 && relevant
!= vect_used_in_outer
)
692 return opt_result::failure_at
693 (stmt_vinfo
->stmt
, "unsupported use of nested cycle.\n");
696 case vect_double_reduction_def
:
697 if (relevant
!= vect_unused_in_scope
698 && relevant
!= vect_used_by_reduction
699 && relevant
!= vect_used_only_live
)
700 return opt_result::failure_at
701 (stmt_vinfo
->stmt
, "unsupported use of double reduction.\n");
708 if (is_pattern_stmt_p (stmt_vinfo
))
710 /* Pattern statements are not inserted into the code, so
711 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
712 have to scan the RHS or function arguments instead. */
713 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
))
715 enum tree_code rhs_code
= gimple_assign_rhs_code (assign
);
716 tree op
= gimple_assign_rhs1 (assign
);
719 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
722 = process_use (stmt_vinfo
, TREE_OPERAND (op
, 0),
723 loop_vinfo
, relevant
, &worklist
, false);
726 res
= process_use (stmt_vinfo
, TREE_OPERAND (op
, 1),
727 loop_vinfo
, relevant
, &worklist
, false);
732 for (; i
< gimple_num_ops (assign
); i
++)
734 op
= gimple_op (assign
, i
);
735 if (TREE_CODE (op
) == SSA_NAME
)
738 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
745 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt_vinfo
->stmt
))
747 for (i
= 0; i
< gimple_call_num_args (call
); i
++)
749 tree arg
= gimple_call_arg (call
, i
);
751 = process_use (stmt_vinfo
, arg
, loop_vinfo
, relevant
,
759 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt_vinfo
->stmt
, iter
, SSA_OP_USE
)
761 tree op
= USE_FROM_PTR (use_p
);
763 = process_use (stmt_vinfo
, op
, loop_vinfo
, relevant
,
769 if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo
))
771 gather_scatter_info gs_info
;
772 if (!vect_check_gather_scatter (stmt_vinfo
, loop_vinfo
, &gs_info
))
775 = process_use (stmt_vinfo
, gs_info
.offset
, loop_vinfo
, relevant
,
784 } /* while worklist */
786 return opt_result::success ();
789 /* Function vect_model_simple_cost.
791 Models cost for simple operations, i.e. those that only emit ncopies of a
792 single op. Right now, this does not account for multiple insns that could
793 be generated for the single vector op. We will handle that shortly. */
796 vect_model_simple_cost (vec_info
*,
797 stmt_vec_info stmt_info
, int ncopies
,
798 enum vect_def_type
*dt
,
801 stmt_vector_for_cost
*cost_vec
,
802 vect_cost_for_stmt kind
= vector_stmt
)
804 int inside_cost
= 0, prologue_cost
= 0;
806 gcc_assert (cost_vec
!= NULL
);
808 /* ??? Somehow we need to fix this at the callers. */
810 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (node
);
813 /* Cost the "broadcast" of a scalar operand in to a vector operand.
814 Use scalar_to_vec to cost the broadcast, as elsewhere in the vector
816 for (int i
= 0; i
< ndts
; i
++)
817 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
818 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
819 stmt_info
, 0, vect_prologue
);
821 /* Adjust for two-operator SLP nodes. */
822 if (node
&& SLP_TREE_TWO_OPERATORS (node
))
825 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_perm
,
826 stmt_info
, 0, vect_body
);
829 /* Pass the inside-of-loop statements to the target-specific cost model. */
830 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, kind
,
831 stmt_info
, 0, vect_body
);
833 if (dump_enabled_p ())
834 dump_printf_loc (MSG_NOTE
, vect_location
,
835 "vect_model_simple_cost: inside_cost = %d, "
836 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
840 /* Model cost for type demotion and promotion operations. PWR is
841 normally zero for single-step promotions and demotions. It will be
842 one if two-step promotion/demotion is required, and so on. NCOPIES
843 is the number of vector results (and thus number of instructions)
844 for the narrowest end of the operation chain. Each additional
845 step doubles the number of instructions required. */
848 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
849 enum vect_def_type
*dt
,
850 unsigned int ncopies
, int pwr
,
851 stmt_vector_for_cost
*cost_vec
)
854 int inside_cost
= 0, prologue_cost
= 0;
856 for (i
= 0; i
< pwr
+ 1; i
++)
858 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_promote_demote
,
859 stmt_info
, 0, vect_body
);
863 /* FORNOW: Assuming maximum 2 args per stmts. */
864 for (i
= 0; i
< 2; i
++)
865 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
866 prologue_cost
+= record_stmt_cost (cost_vec
, 1, vector_stmt
,
867 stmt_info
, 0, vect_prologue
);
869 if (dump_enabled_p ())
870 dump_printf_loc (MSG_NOTE
, vect_location
,
871 "vect_model_promotion_demotion_cost: inside_cost = %d, "
872 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
875 /* Returns true if the current function returns DECL. */
878 cfun_returns (tree decl
)
882 FOR_EACH_EDGE (e
, ei
, EXIT_BLOCK_PTR_FOR_FN (cfun
)->preds
)
884 greturn
*ret
= safe_dyn_cast
<greturn
*> (last_stmt (e
->src
));
887 if (gimple_return_retval (ret
) == decl
)
889 /* We often end up with an aggregate copy to the result decl,
890 handle that case as well. First skip intermediate clobbers
895 def
= SSA_NAME_DEF_STMT (gimple_vuse (def
));
897 while (gimple_clobber_p (def
));
898 if (is_a
<gassign
*> (def
)
899 && gimple_assign_lhs (def
) == gimple_return_retval (ret
)
900 && gimple_assign_rhs1 (def
) == decl
)
906 /* Function vect_model_store_cost
908 Models cost for stores. In the case of grouped accesses, one access
909 has the overhead of the grouped access attributed to it. */
912 vect_model_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
913 vect_memory_access_type memory_access_type
,
914 vec_load_store_type vls_type
, slp_tree slp_node
,
915 stmt_vector_for_cost
*cost_vec
)
917 unsigned int inside_cost
= 0, prologue_cost
= 0;
918 stmt_vec_info first_stmt_info
= stmt_info
;
919 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
921 /* ??? Somehow we need to fix this at the callers. */
923 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
925 if (vls_type
== VLS_STORE_INVARIANT
)
928 prologue_cost
+= record_stmt_cost (cost_vec
, 1, scalar_to_vec
,
929 stmt_info
, 0, vect_prologue
);
932 /* Grouped stores update all elements in the group at once,
933 so we want the DR for the first statement. */
934 if (!slp_node
&& grouped_access_p
)
935 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
937 /* True if we should include any once-per-group costs as well as
938 the cost of the statement itself. For SLP we only get called
939 once per group anyhow. */
940 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
942 /* We assume that the cost of a single store-lanes instruction is
943 equivalent to the cost of DR_GROUP_SIZE separate stores. If a grouped
944 access is instead being provided by a permute-and-store operation,
945 include the cost of the permutes. */
947 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
949 /* Uses a high and low interleave or shuffle operations for each
951 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
952 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
963 /* Costs of the stores. */
964 if (memory_access_type
== VMAT_ELEMENTWISE
965 || memory_access_type
== VMAT_GATHER_SCATTER
)
967 /* N scalar stores plus extracting the elements. */
968 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
969 inside_cost
+= record_stmt_cost (cost_vec
,
970 ncopies
* assumed_nunits
,
971 scalar_store
, stmt_info
, 0, vect_body
);
974 vect_get_store_cost (vinfo
, stmt_info
, ncopies
, &inside_cost
, cost_vec
);
976 if (memory_access_type
== VMAT_ELEMENTWISE
977 || memory_access_type
== VMAT_STRIDED_SLP
)
979 /* N scalar stores plus extracting the elements. */
980 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
981 inside_cost
+= record_stmt_cost (cost_vec
,
982 ncopies
* assumed_nunits
,
983 vec_to_scalar
, stmt_info
, 0, vect_body
);
986 /* When vectorizing a store into the function result assign
987 a penalty if the function returns in a multi-register location.
988 In this case we assume we'll end up with having to spill the
989 vector result and do piecewise loads as a conservative estimate. */
990 tree base
= get_base_address (STMT_VINFO_DATA_REF (stmt_info
)->ref
);
992 && (TREE_CODE (base
) == RESULT_DECL
993 || (DECL_P (base
) && cfun_returns (base
)))
994 && !aggregate_value_p (base
, cfun
->decl
))
996 rtx reg
= hard_function_value (TREE_TYPE (base
), cfun
->decl
, 0, 1);
997 /* ??? Handle PARALLEL in some way. */
1000 int nregs
= hard_regno_nregs (REGNO (reg
), GET_MODE (reg
));
1001 /* Assume that a single reg-reg move is possible and cheap,
1002 do not account for vector to gp register move cost. */
1006 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
,
1008 stmt_info
, 0, vect_epilogue
);
1010 prologue_cost
+= record_stmt_cost (cost_vec
, ncopies
* nregs
,
1012 stmt_info
, 0, vect_epilogue
);
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_NOTE
, vect_location
,
1019 "vect_model_store_cost: inside_cost = %d, "
1020 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1024 /* Calculate cost of DR's memory access. */
1026 vect_get_store_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1027 unsigned int *inside_cost
,
1028 stmt_vector_for_cost
*body_cost_vec
)
1030 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1031 int alignment_support_scheme
1032 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1034 switch (alignment_support_scheme
)
1038 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1039 vector_store
, stmt_info
, 0,
1042 if (dump_enabled_p ())
1043 dump_printf_loc (MSG_NOTE
, vect_location
,
1044 "vect_model_store_cost: aligned.\n");
1048 case dr_unaligned_supported
:
1050 /* Here, we assign an additional cost for the unaligned store. */
1051 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1052 unaligned_store
, stmt_info
,
1053 DR_MISALIGNMENT (dr_info
),
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_NOTE
, vect_location
,
1057 "vect_model_store_cost: unaligned supported by "
1062 case dr_unaligned_unsupported
:
1064 *inside_cost
= VECT_MAX_COST
;
1066 if (dump_enabled_p ())
1067 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1068 "vect_model_store_cost: unsupported access.\n");
1078 /* Function vect_model_load_cost
1080 Models cost for loads. In the case of grouped accesses, one access has
1081 the overhead of the grouped access attributed to it. Since unaligned
1082 accesses are supported for loads, we also account for the costs of the
1083 access scheme chosen. */
1086 vect_model_load_cost (vec_info
*vinfo
,
1087 stmt_vec_info stmt_info
, unsigned ncopies
, poly_uint64 vf
,
1088 vect_memory_access_type memory_access_type
,
1090 stmt_vector_for_cost
*cost_vec
)
1092 unsigned int inside_cost
= 0, prologue_cost
= 0;
1093 bool grouped_access_p
= STMT_VINFO_GROUPED_ACCESS (stmt_info
);
1095 gcc_assert (cost_vec
);
1097 /* ??? Somehow we need to fix this at the callers. */
1099 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
1101 if (slp_node
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
1103 /* If the load is permuted then the alignment is determined by
1104 the first group element not by the first scalar stmt DR. */
1105 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1106 /* Record the cost for the permutation. */
1108 unsigned assumed_nunits
1109 = vect_nunits_for_cost (STMT_VINFO_VECTYPE (first_stmt_info
));
1110 vect_transform_slp_perm_load (vinfo
, slp_node
, vNULL
, NULL
,
1111 vf
, true, &n_perms
);
1112 inside_cost
+= record_stmt_cost (cost_vec
, n_perms
, vec_perm
,
1113 first_stmt_info
, 0, vect_body
);
1114 /* And adjust the number of loads performed. This handles
1115 redundancies as well as loads that are later dead. */
1116 auto_sbitmap
perm (DR_GROUP_SIZE (first_stmt_info
));
1117 bitmap_clear (perm
);
1118 for (unsigned i
= 0;
1119 i
< SLP_TREE_LOAD_PERMUTATION (slp_node
).length (); ++i
)
1120 bitmap_set_bit (perm
, SLP_TREE_LOAD_PERMUTATION (slp_node
)[i
]);
1122 bool load_seen
= false;
1123 for (unsigned i
= 0; i
< DR_GROUP_SIZE (first_stmt_info
); ++i
)
1125 if (i
% assumed_nunits
== 0)
1131 if (bitmap_bit_p (perm
, i
))
1137 <= (DR_GROUP_SIZE (first_stmt_info
)
1138 - DR_GROUP_GAP (first_stmt_info
)
1139 + assumed_nunits
- 1) / assumed_nunits
);
1142 /* Grouped loads read all elements in the group at once,
1143 so we want the DR for the first statement. */
1144 stmt_vec_info first_stmt_info
= stmt_info
;
1145 if (!slp_node
&& grouped_access_p
)
1146 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
1148 /* True if we should include any once-per-group costs as well as
1149 the cost of the statement itself. For SLP we only get called
1150 once per group anyhow. */
1151 bool first_stmt_p
= (first_stmt_info
== stmt_info
);
1153 /* We assume that the cost of a single load-lanes instruction is
1154 equivalent to the cost of DR_GROUP_SIZE separate loads. If a grouped
1155 access is instead being provided by a load-and-permute operation,
1156 include the cost of the permutes. */
1158 && memory_access_type
== VMAT_CONTIGUOUS_PERMUTE
)
1160 /* Uses an even and odd extract operations or shuffle operations
1161 for each needed permute. */
1162 int group_size
= DR_GROUP_SIZE (first_stmt_info
);
1163 int nstmts
= ncopies
* ceil_log2 (group_size
) * group_size
;
1164 inside_cost
+= record_stmt_cost (cost_vec
, nstmts
, vec_perm
,
1165 stmt_info
, 0, vect_body
);
1167 if (dump_enabled_p ())
1168 dump_printf_loc (MSG_NOTE
, vect_location
,
1169 "vect_model_load_cost: strided group_size = %d .\n",
1173 /* The loads themselves. */
1174 if (memory_access_type
== VMAT_ELEMENTWISE
1175 || memory_access_type
== VMAT_GATHER_SCATTER
)
1177 /* N scalar loads plus gathering them into a vector. */
1178 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1179 unsigned int assumed_nunits
= vect_nunits_for_cost (vectype
);
1180 inside_cost
+= record_stmt_cost (cost_vec
,
1181 ncopies
* assumed_nunits
,
1182 scalar_load
, stmt_info
, 0, vect_body
);
1185 vect_get_load_cost (vinfo
, stmt_info
, ncopies
, first_stmt_p
,
1186 &inside_cost
, &prologue_cost
,
1187 cost_vec
, cost_vec
, true);
1188 if (memory_access_type
== VMAT_ELEMENTWISE
1189 || memory_access_type
== VMAT_STRIDED_SLP
)
1190 inside_cost
+= record_stmt_cost (cost_vec
, ncopies
, vec_construct
,
1191 stmt_info
, 0, vect_body
);
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE
, vect_location
,
1195 "vect_model_load_cost: inside_cost = %d, "
1196 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1200 /* Calculate cost of DR's memory access. */
1202 vect_get_load_cost (vec_info
*vinfo
, stmt_vec_info stmt_info
, int ncopies
,
1203 bool add_realign_cost
, unsigned int *inside_cost
,
1204 unsigned int *prologue_cost
,
1205 stmt_vector_for_cost
*prologue_cost_vec
,
1206 stmt_vector_for_cost
*body_cost_vec
,
1207 bool record_prologue_costs
)
1209 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1210 int alignment_support_scheme
1211 = vect_supportable_dr_alignment (vinfo
, dr_info
, false);
1213 switch (alignment_support_scheme
)
1217 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1218 stmt_info
, 0, vect_body
);
1220 if (dump_enabled_p ())
1221 dump_printf_loc (MSG_NOTE
, vect_location
,
1222 "vect_model_load_cost: aligned.\n");
1226 case dr_unaligned_supported
:
1228 /* Here, we assign an additional cost for the unaligned load. */
1229 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1230 unaligned_load
, stmt_info
,
1231 DR_MISALIGNMENT (dr_info
),
1234 if (dump_enabled_p ())
1235 dump_printf_loc (MSG_NOTE
, vect_location
,
1236 "vect_model_load_cost: unaligned supported by "
1241 case dr_explicit_realign
:
1243 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1244 vector_load
, stmt_info
, 0, vect_body
);
1245 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1246 vec_perm
, stmt_info
, 0, vect_body
);
1248 /* FIXME: If the misalignment remains fixed across the iterations of
1249 the containing loop, the following cost should be added to the
1251 if (targetm
.vectorize
.builtin_mask_for_load
)
1252 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1253 stmt_info
, 0, vect_body
);
1255 if (dump_enabled_p ())
1256 dump_printf_loc (MSG_NOTE
, vect_location
,
1257 "vect_model_load_cost: explicit realign\n");
1261 case dr_explicit_realign_optimized
:
1263 if (dump_enabled_p ())
1264 dump_printf_loc (MSG_NOTE
, vect_location
,
1265 "vect_model_load_cost: unaligned software "
1268 /* Unaligned software pipeline has a load of an address, an initial
1269 load, and possibly a mask operation to "prime" the loop. However,
1270 if this is an access in a group of loads, which provide grouped
1271 access, then the above cost should only be considered for one
1272 access in the group. Inside the loop, there is a load op
1273 and a realignment op. */
1275 if (add_realign_cost
&& record_prologue_costs
)
1277 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1278 vector_stmt
, stmt_info
,
1280 if (targetm
.vectorize
.builtin_mask_for_load
)
1281 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1282 vector_stmt
, stmt_info
,
1286 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1287 stmt_info
, 0, vect_body
);
1288 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1289 stmt_info
, 0, vect_body
);
1291 if (dump_enabled_p ())
1292 dump_printf_loc (MSG_NOTE
, vect_location
,
1293 "vect_model_load_cost: explicit realign optimized"
1299 case dr_unaligned_unsupported
:
1301 *inside_cost
= VECT_MAX_COST
;
1303 if (dump_enabled_p ())
1304 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1305 "vect_model_load_cost: unsupported access.\n");
1314 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1315 the loop preheader for the vectorized stmt STMT_VINFO. */
1318 vect_init_vector_1 (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
, gimple
*new_stmt
,
1319 gimple_stmt_iterator
*gsi
)
1322 vect_finish_stmt_generation (vinfo
, stmt_vinfo
, new_stmt
, gsi
);
1325 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1329 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1333 if (stmt_vinfo
&& nested_in_vect_loop_p (loop
, stmt_vinfo
))
1336 pe
= loop_preheader_edge (loop
);
1337 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1338 gcc_assert (!new_bb
);
1342 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
1343 gimple_stmt_iterator gsi_region_begin
= bb_vinfo
->region_begin
;
1344 gsi_insert_before (&gsi_region_begin
, new_stmt
, GSI_SAME_STMT
);
1348 if (dump_enabled_p ())
1349 dump_printf_loc (MSG_NOTE
, vect_location
,
1350 "created new init_stmt: %G", new_stmt
);
1353 /* Function vect_init_vector.
1355 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1356 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1357 vector type a vector with all elements equal to VAL is created first.
1358 Place the initialization at GSI if it is not NULL. Otherwise, place the
1359 initialization at the loop preheader.
1360 Return the DEF of INIT_STMT.
1361 It will be used in the vectorization of STMT_INFO. */
1364 vect_init_vector (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree val
, tree type
,
1365 gimple_stmt_iterator
*gsi
)
1370 /* We abuse this function to push sth to a SSA name with initial 'val'. */
1371 if (! useless_type_conversion_p (type
, TREE_TYPE (val
)))
1373 gcc_assert (TREE_CODE (type
) == VECTOR_TYPE
);
1374 if (! types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1376 /* Scalar boolean value should be transformed into
1377 all zeros or all ones value before building a vector. */
1378 if (VECTOR_BOOLEAN_TYPE_P (type
))
1380 tree true_val
= build_all_ones_cst (TREE_TYPE (type
));
1381 tree false_val
= build_zero_cst (TREE_TYPE (type
));
1383 if (CONSTANT_CLASS_P (val
))
1384 val
= integer_zerop (val
) ? false_val
: true_val
;
1387 new_temp
= make_ssa_name (TREE_TYPE (type
));
1388 init_stmt
= gimple_build_assign (new_temp
, COND_EXPR
,
1389 val
, true_val
, false_val
);
1390 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1396 gimple_seq stmts
= NULL
;
1397 if (! INTEGRAL_TYPE_P (TREE_TYPE (val
)))
1398 val
= gimple_build (&stmts
, VIEW_CONVERT_EXPR
,
1399 TREE_TYPE (type
), val
);
1401 /* ??? Condition vectorization expects us to do
1402 promotion of invariant/external defs. */
1403 val
= gimple_convert (&stmts
, TREE_TYPE (type
), val
);
1404 for (gimple_stmt_iterator gsi2
= gsi_start (stmts
);
1405 !gsi_end_p (gsi2
); )
1407 init_stmt
= gsi_stmt (gsi2
);
1408 gsi_remove (&gsi2
, false);
1409 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1413 val
= build_vector_from_val (type
, val
);
1416 new_temp
= vect_get_new_ssa_name (type
, vect_simple_var
, "cst_");
1417 init_stmt
= gimple_build_assign (new_temp
, val
);
1418 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, gsi
);
1423 /* Function vect_get_vec_defs_for_operand.
1425 OP is an operand in STMT_VINFO. This function returns a vector of
1426 NCOPIES defs that will be used in the vectorized stmts for STMT_VINFO.
1428 In the case that OP is an SSA_NAME which is defined in the loop, then
1429 STMT_VINFO_VEC_STMTS of the defining stmt holds the relevant defs.
1431 In case OP is an invariant or constant, a new stmt that creates a vector def
1432 needs to be introduced. VECTYPE may be used to specify a required type for
1433 vector invariant. */
1436 vect_get_vec_defs_for_operand (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1438 tree op
, vec
<tree
> *vec_oprnds
, tree vectype
)
1441 enum vect_def_type dt
;
1443 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
1445 if (dump_enabled_p ())
1446 dump_printf_loc (MSG_NOTE
, vect_location
,
1447 "vect_get_vec_defs_for_operand: %T\n", op
);
1449 stmt_vec_info def_stmt_info
;
1450 is_simple_use
= vect_is_simple_use (op
, loop_vinfo
, &dt
,
1451 &def_stmt_info
, &def_stmt
);
1452 gcc_assert (is_simple_use
);
1453 if (def_stmt
&& dump_enabled_p ())
1454 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = %G", def_stmt
);
1456 vec_oprnds
->create (ncopies
);
1457 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
1459 tree stmt_vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1463 vector_type
= vectype
;
1464 else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op
))
1465 && VECTOR_BOOLEAN_TYPE_P (stmt_vectype
))
1466 vector_type
= truth_type_for (stmt_vectype
);
1468 vector_type
= get_vectype_for_scalar_type (loop_vinfo
, TREE_TYPE (op
));
1470 gcc_assert (vector_type
);
1471 tree vop
= vect_init_vector (vinfo
, stmt_vinfo
, op
, vector_type
, NULL
);
1473 vec_oprnds
->quick_push (vop
);
1477 def_stmt_info
= vect_stmt_to_vectorize (def_stmt_info
);
1478 gcc_assert (STMT_VINFO_VEC_STMTS (def_stmt_info
).length () == ncopies
);
1479 for (unsigned i
= 0; i
< ncopies
; ++i
)
1480 vec_oprnds
->quick_push (gimple_get_lhs
1481 (STMT_VINFO_VEC_STMTS (def_stmt_info
)[i
]));
1486 /* Get vectorized definitions for OP0 and OP1. */
1489 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1491 tree op0
, vec
<tree
> *vec_oprnds0
, tree vectype0
,
1492 tree op1
, vec
<tree
> *vec_oprnds1
, tree vectype1
,
1493 tree op2
, vec
<tree
> *vec_oprnds2
, tree vectype2
,
1494 tree op3
, vec
<tree
> *vec_oprnds3
, tree vectype3
)
1499 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[0], vec_oprnds0
);
1501 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[1], vec_oprnds1
);
1503 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[2], vec_oprnds2
);
1505 vect_get_slp_defs (SLP_TREE_CHILDREN (slp_node
)[3], vec_oprnds3
);
1510 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1511 op0
, vec_oprnds0
, vectype0
);
1513 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1514 op1
, vec_oprnds1
, vectype1
);
1516 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1517 op2
, vec_oprnds2
, vectype2
);
1519 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
1520 op3
, vec_oprnds3
, vectype3
);
1525 vect_get_vec_defs (vec_info
*vinfo
, stmt_vec_info stmt_info
, slp_tree slp_node
,
1527 tree op0
, vec
<tree
> *vec_oprnds0
,
1528 tree op1
, vec
<tree
> *vec_oprnds1
,
1529 tree op2
, vec
<tree
> *vec_oprnds2
,
1530 tree op3
, vec
<tree
> *vec_oprnds3
)
1532 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
1533 op0
, vec_oprnds0
, NULL_TREE
,
1534 op1
, vec_oprnds1
, NULL_TREE
,
1535 op2
, vec_oprnds2
, NULL_TREE
,
1536 op3
, vec_oprnds3
, NULL_TREE
);
1539 /* Helper function called by vect_finish_replace_stmt and
1540 vect_finish_stmt_generation. Set the location of the new
1541 statement and create and return a stmt_vec_info for it. */
1544 vect_finish_stmt_generation_1 (vec_info
*,
1545 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1547 if (dump_enabled_p ())
1548 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: %G", vec_stmt
);
1552 gimple_set_location (vec_stmt
, gimple_location (stmt_info
->stmt
));
1554 /* While EH edges will generally prevent vectorization, stmt might
1555 e.g. be in a must-not-throw region. Ensure newly created stmts
1556 that could throw are part of the same region. */
1557 int lp_nr
= lookup_stmt_eh_lp (stmt_info
->stmt
);
1558 if (lp_nr
!= 0 && stmt_could_throw_p (cfun
, vec_stmt
))
1559 add_stmt_to_eh_lp (vec_stmt
, lp_nr
);
1562 gcc_assert (!stmt_could_throw_p (cfun
, vec_stmt
));
1565 /* Replace the scalar statement STMT_INFO with a new vector statement VEC_STMT,
1566 which sets the same scalar result as STMT_INFO did. Create and return a
1567 stmt_vec_info for VEC_STMT. */
1570 vect_finish_replace_stmt (vec_info
*vinfo
,
1571 stmt_vec_info stmt_info
, gimple
*vec_stmt
)
1573 gimple
*scalar_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
1574 gcc_assert (gimple_get_lhs (scalar_stmt
) == gimple_get_lhs (vec_stmt
));
1576 gimple_stmt_iterator gsi
= gsi_for_stmt (scalar_stmt
);
1577 gsi_replace (&gsi
, vec_stmt
, true);
1579 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1582 /* Add VEC_STMT to the vectorized implementation of STMT_INFO and insert it
1583 before *GSI. Create and return a stmt_vec_info for VEC_STMT. */
1586 vect_finish_stmt_generation (vec_info
*vinfo
,
1587 stmt_vec_info stmt_info
, gimple
*vec_stmt
,
1588 gimple_stmt_iterator
*gsi
)
1590 gcc_assert (!stmt_info
|| gimple_code (stmt_info
->stmt
) != GIMPLE_LABEL
);
1592 if (!gsi_end_p (*gsi
)
1593 && gimple_has_mem_ops (vec_stmt
))
1595 gimple
*at_stmt
= gsi_stmt (*gsi
);
1596 tree vuse
= gimple_vuse (at_stmt
);
1597 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1599 tree vdef
= gimple_vdef (at_stmt
);
1600 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1601 /* If we have an SSA vuse and insert a store, update virtual
1602 SSA form to avoid triggering the renamer. Do so only
1603 if we can easily see all uses - which is what almost always
1604 happens with the way vectorized stmts are inserted. */
1605 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1606 && ((is_gimple_assign (vec_stmt
)
1607 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1608 || (is_gimple_call (vec_stmt
)
1609 && !(gimple_call_flags (vec_stmt
)
1610 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1612 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1613 gimple_set_vdef (vec_stmt
, new_vdef
);
1614 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1618 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1619 vect_finish_stmt_generation_1 (vinfo
, stmt_info
, vec_stmt
);
1622 /* We want to vectorize a call to combined function CFN with function
1623 decl FNDECL, using VECTYPE_OUT as the type of the output and VECTYPE_IN
1624 as the types of all inputs. Check whether this is possible using
1625 an internal function, returning its code if so or IFN_LAST if not. */
1628 vectorizable_internal_function (combined_fn cfn
, tree fndecl
,
1629 tree vectype_out
, tree vectype_in
)
1632 if (internal_fn_p (cfn
))
1633 ifn
= as_internal_fn (cfn
);
1635 ifn
= associated_internal_fn (fndecl
);
1636 if (ifn
!= IFN_LAST
&& direct_internal_fn_p (ifn
))
1638 const direct_internal_fn_info
&info
= direct_internal_fn (ifn
);
1639 if (info
.vectorizable
)
1641 tree type0
= (info
.type0
< 0 ? vectype_out
: vectype_in
);
1642 tree type1
= (info
.type1
< 0 ? vectype_out
: vectype_in
);
1643 if (direct_internal_fn_supported_p (ifn
, tree_pair (type0
, type1
),
1644 OPTIMIZE_FOR_SPEED
))
1652 static tree
permute_vec_elements (vec_info
*, tree
, tree
, tree
, stmt_vec_info
,
1653 gimple_stmt_iterator
*);
1655 /* Check whether a load or store statement in the loop described by
1656 LOOP_VINFO is possible in a fully-masked loop. This is testing
1657 whether the vectorizer pass has the appropriate support, as well as
1658 whether the target does.
1660 VLS_TYPE says whether the statement is a load or store and VECTYPE
1661 is the type of the vector being loaded or stored. MEMORY_ACCESS_TYPE
1662 says how the load or store is going to be implemented and GROUP_SIZE
1663 is the number of load or store statements in the containing group.
1664 If the access is a gather load or scatter store, GS_INFO describes
1665 its arguments. If the load or store is conditional, SCALAR_MASK is the
1666 condition under which it occurs.
1668 Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
1669 supported, otherwise record the required mask types. */
1672 check_load_store_masking (loop_vec_info loop_vinfo
, tree vectype
,
1673 vec_load_store_type vls_type
, int group_size
,
1674 vect_memory_access_type memory_access_type
,
1675 gather_scatter_info
*gs_info
, tree scalar_mask
)
1677 /* Invariant loads need no special support. */
1678 if (memory_access_type
== VMAT_INVARIANT
)
1681 vec_loop_masks
*masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
1682 machine_mode vecmode
= TYPE_MODE (vectype
);
1683 bool is_load
= (vls_type
== VLS_LOAD
);
1684 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
1687 ? !vect_load_lanes_supported (vectype
, group_size
, true)
1688 : !vect_store_lanes_supported (vectype
, group_size
, true))
1690 if (dump_enabled_p ())
1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1692 "can't use a fully-masked loop because the"
1693 " target doesn't have an appropriate masked"
1694 " load/store-lanes instruction.\n");
1695 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1698 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1699 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1703 if (memory_access_type
== VMAT_GATHER_SCATTER
)
1705 internal_fn ifn
= (is_load
1706 ? IFN_MASK_GATHER_LOAD
1707 : IFN_MASK_SCATTER_STORE
);
1708 if (!internal_gather_scatter_fn_supported_p (ifn
, vectype
,
1709 gs_info
->memory_type
,
1710 gs_info
->offset_vectype
,
1713 if (dump_enabled_p ())
1714 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1715 "can't use a fully-masked loop because the"
1716 " target doesn't have an appropriate masked"
1717 " gather load or scatter store instruction.\n");
1718 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1721 unsigned int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
1722 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
, vectype
, scalar_mask
);
1726 if (memory_access_type
!= VMAT_CONTIGUOUS
1727 && memory_access_type
!= VMAT_CONTIGUOUS_PERMUTE
)
1729 /* Element X of the data must come from iteration i * VF + X of the
1730 scalar loop. We need more work to support other mappings. */
1731 if (dump_enabled_p ())
1732 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1733 "can't use a fully-masked loop because an access"
1734 " isn't contiguous.\n");
1735 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1739 machine_mode mask_mode
;
1740 if (!VECTOR_MODE_P (vecmode
)
1741 || !targetm
.vectorize
.get_mask_mode (vecmode
).exists (&mask_mode
)
1742 || !can_vec_mask_load_store_p (vecmode
, mask_mode
, is_load
))
1744 if (dump_enabled_p ())
1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1746 "can't use a fully-masked loop because the target"
1747 " doesn't have the appropriate masked load or"
1749 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
1752 /* We might load more scalars than we need for permuting SLP loads.
1753 We checked in get_group_load_store_type that the extra elements
1754 don't leak into a new vector. */
1755 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1756 poly_uint64 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1757 unsigned int nvectors
;
1758 if (can_div_away_from_zero_p (group_size
* vf
, nunits
, &nvectors
))
1759 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
, vectype
, scalar_mask
);
1764 /* Return the mask input to a masked load or store. VEC_MASK is the vectorized
1765 form of the scalar mask condition and LOOP_MASK, if nonnull, is the mask
1766 that needs to be applied to all loads and stores in a vectorized loop.
1767 Return VEC_MASK if LOOP_MASK is null, otherwise return VEC_MASK & LOOP_MASK.
1769 MASK_TYPE is the type of both masks. If new statements are needed,
1770 insert them before GSI. */
1773 prepare_load_store_mask (tree mask_type
, tree loop_mask
, tree vec_mask
,
1774 gimple_stmt_iterator
*gsi
)
1776 gcc_assert (useless_type_conversion_p (mask_type
, TREE_TYPE (vec_mask
)));
1780 gcc_assert (TREE_TYPE (loop_mask
) == mask_type
);
1781 tree and_res
= make_temp_ssa_name (mask_type
, NULL
, "vec_mask_and");
1782 gimple
*and_stmt
= gimple_build_assign (and_res
, BIT_AND_EXPR
,
1783 vec_mask
, loop_mask
);
1784 gsi_insert_before (gsi
, and_stmt
, GSI_SAME_STMT
);
1788 /* Determine whether we can use a gather load or scatter store to vectorize
1789 strided load or store STMT_INFO by truncating the current offset to a
1790 smaller width. We need to be able to construct an offset vector:
1792 { 0, X, X*2, X*3, ... }
1794 without loss of precision, where X is STMT_INFO's DR_STEP.
1796 Return true if this is possible, describing the gather load or scatter
1797 store in GS_INFO. MASKED_P is true if the load or store is conditional. */
1800 vect_truncate_gather_scatter_offset (stmt_vec_info stmt_info
,
1801 loop_vec_info loop_vinfo
, bool masked_p
,
1802 gather_scatter_info
*gs_info
)
1804 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1805 data_reference
*dr
= dr_info
->dr
;
1806 tree step
= DR_STEP (dr
);
1807 if (TREE_CODE (step
) != INTEGER_CST
)
1809 /* ??? Perhaps we could use range information here? */
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_NOTE
, vect_location
,
1812 "cannot truncate variable step.\n");
1816 /* Get the number of bits in an element. */
1817 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1818 scalar_mode element_mode
= SCALAR_TYPE_MODE (TREE_TYPE (vectype
));
1819 unsigned int element_bits
= GET_MODE_BITSIZE (element_mode
);
1821 /* Set COUNT to the upper limit on the number of elements - 1.
1822 Start with the maximum vectorization factor. */
1823 unsigned HOST_WIDE_INT count
= vect_max_vf (loop_vinfo
) - 1;
1825 /* Try lowering COUNT to the number of scalar latch iterations. */
1826 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1827 widest_int max_iters
;
1828 if (max_loop_iterations (loop
, &max_iters
)
1829 && max_iters
< count
)
1830 count
= max_iters
.to_shwi ();
1832 /* Try scales of 1 and the element size. */
1833 int scales
[] = { 1, vect_get_scalar_dr_size (dr_info
) };
1834 wi::overflow_type overflow
= wi::OVF_NONE
;
1835 for (int i
= 0; i
< 2; ++i
)
1837 int scale
= scales
[i
];
1839 if (!wi::multiple_of_p (wi::to_widest (step
), scale
, SIGNED
, &factor
))
1842 /* Determine the minimum precision of (COUNT - 1) * STEP / SCALE. */
1843 widest_int range
= wi::mul (count
, factor
, SIGNED
, &overflow
);
1846 signop sign
= range
>= 0 ? UNSIGNED
: SIGNED
;
1847 unsigned int min_offset_bits
= wi::min_precision (range
, sign
);
1849 /* Find the narrowest viable offset type. */
1850 unsigned int offset_bits
= 1U << ceil_log2 (min_offset_bits
);
1851 tree offset_type
= build_nonstandard_integer_type (offset_bits
,
1854 /* See whether the target supports the operation with an offset
1855 no narrower than OFFSET_TYPE. */
1856 tree memory_type
= TREE_TYPE (DR_REF (dr
));
1857 if (!vect_gather_scatter_fn_p (loop_vinfo
, DR_IS_READ (dr
), masked_p
,
1858 vectype
, memory_type
, offset_type
, scale
,
1859 &gs_info
->ifn
, &gs_info
->offset_vectype
))
1862 gs_info
->decl
= NULL_TREE
;
1863 /* Logically the sum of DR_BASE_ADDRESS, DR_INIT and DR_OFFSET,
1864 but we don't need to store that here. */
1865 gs_info
->base
= NULL_TREE
;
1866 gs_info
->element_type
= TREE_TYPE (vectype
);
1867 gs_info
->offset
= fold_convert (offset_type
, step
);
1868 gs_info
->offset_dt
= vect_constant_def
;
1869 gs_info
->scale
= scale
;
1870 gs_info
->memory_type
= memory_type
;
1874 if (overflow
&& dump_enabled_p ())
1875 dump_printf_loc (MSG_NOTE
, vect_location
,
1876 "truncating gather/scatter offset to %d bits"
1877 " might change its value.\n", element_bits
);
1882 /* Return true if we can use gather/scatter internal functions to
1883 vectorize STMT_INFO, which is a grouped or strided load or store.
1884 MASKED_P is true if load or store is conditional. When returning
1885 true, fill in GS_INFO with the information required to perform the
1889 vect_use_strided_gather_scatters_p (stmt_vec_info stmt_info
,
1890 loop_vec_info loop_vinfo
, bool masked_p
,
1891 gather_scatter_info
*gs_info
)
1893 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
)
1895 return vect_truncate_gather_scatter_offset (stmt_info
, loop_vinfo
,
1898 tree old_offset_type
= TREE_TYPE (gs_info
->offset
);
1899 tree new_offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
1901 gcc_assert (TYPE_PRECISION (new_offset_type
)
1902 >= TYPE_PRECISION (old_offset_type
));
1903 gs_info
->offset
= fold_convert (new_offset_type
, gs_info
->offset
);
1905 if (dump_enabled_p ())
1906 dump_printf_loc (MSG_NOTE
, vect_location
,
1907 "using gather/scatter for strided/grouped access,"
1908 " scale = %d\n", gs_info
->scale
);
1913 /* STMT_INFO is a non-strided load or store, meaning that it accesses
1914 elements with a known constant step. Return -1 if that step
1915 is negative, 0 if it is zero, and 1 if it is greater than zero. */
1918 compare_step_with_zero (vec_info
*vinfo
, stmt_vec_info stmt_info
)
1920 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1921 return tree_int_cst_compare (vect_dr_behavior (vinfo
, dr_info
)->step
,
1925 /* If the target supports a permute mask that reverses the elements in
1926 a vector of type VECTYPE, return that mask, otherwise return null. */
1929 perm_mask_for_reverse (tree vectype
)
1931 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
1933 /* The encoding has a single stepped pattern. */
1934 vec_perm_builder
sel (nunits
, 1, 3);
1935 for (int i
= 0; i
< 3; ++i
)
1936 sel
.quick_push (nunits
- 1 - i
);
1938 vec_perm_indices
indices (sel
, 1, nunits
);
1939 if (!can_vec_perm_const_p (TYPE_MODE (vectype
), indices
))
1941 return vect_gen_perm_mask_checked (vectype
, indices
);
1944 /* A subroutine of get_load_store_type, with a subset of the same
1945 arguments. Handle the case where STMT_INFO is a load or store that
1946 accesses consecutive elements with a negative step. */
1948 static vect_memory_access_type
1949 get_negative_load_store_type (vec_info
*vinfo
,
1950 stmt_vec_info stmt_info
, tree vectype
,
1951 vec_load_store_type vls_type
,
1952 unsigned int ncopies
)
1954 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
1955 dr_alignment_support alignment_support_scheme
;
1959 if (dump_enabled_p ())
1960 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1961 "multiple types with negative step.\n");
1962 return VMAT_ELEMENTWISE
;
1965 alignment_support_scheme
= vect_supportable_dr_alignment (vinfo
,
1967 if (alignment_support_scheme
!= dr_aligned
1968 && alignment_support_scheme
!= dr_unaligned_supported
)
1970 if (dump_enabled_p ())
1971 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1972 "negative step but alignment required.\n");
1973 return VMAT_ELEMENTWISE
;
1976 if (vls_type
== VLS_STORE_INVARIANT
)
1978 if (dump_enabled_p ())
1979 dump_printf_loc (MSG_NOTE
, vect_location
,
1980 "negative step with invariant source;"
1981 " no permute needed.\n");
1982 return VMAT_CONTIGUOUS_DOWN
;
1985 if (!perm_mask_for_reverse (vectype
))
1987 if (dump_enabled_p ())
1988 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1989 "negative step and reversing not supported.\n");
1990 return VMAT_ELEMENTWISE
;
1993 return VMAT_CONTIGUOUS_REVERSE
;
1996 /* STMT_INFO is either a masked or unconditional store. Return the value
2000 vect_get_store_rhs (stmt_vec_info stmt_info
)
2002 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
2004 gcc_assert (gimple_assign_single_p (assign
));
2005 return gimple_assign_rhs1 (assign
);
2007 if (gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
2009 internal_fn ifn
= gimple_call_internal_fn (call
);
2010 int index
= internal_fn_stored_value_index (ifn
);
2011 gcc_assert (index
>= 0);
2012 return gimple_call_arg (call
, index
);
2017 /* Function VECTOR_VECTOR_COMPOSITION_TYPE
2019 This function returns a vector type which can be composed with NETLS pieces,
2020 whose type is recorded in PTYPE. VTYPE should be a vector type, and has the
2021 same vector size as the return vector. It checks target whether supports
2022 pieces-size vector mode for construction firstly, if target fails to, check
2023 pieces-size scalar mode for construction further. It returns NULL_TREE if
2024 fails to find the available composition.
2026 For example, for (vtype=V16QI, nelts=4), we can probably get:
2027 - V16QI with PTYPE V4QI.
2028 - V4SI with PTYPE SI.
2032 vector_vector_composition_type (tree vtype
, poly_uint64 nelts
, tree
*ptype
)
2034 gcc_assert (VECTOR_TYPE_P (vtype
));
2035 gcc_assert (known_gt (nelts
, 0U));
2037 machine_mode vmode
= TYPE_MODE (vtype
);
2038 if (!VECTOR_MODE_P (vmode
))
2041 poly_uint64 vbsize
= GET_MODE_BITSIZE (vmode
);
2042 unsigned int pbsize
;
2043 if (constant_multiple_p (vbsize
, nelts
, &pbsize
))
2045 /* First check if vec_init optab supports construction from
2046 vector pieces directly. */
2047 scalar_mode elmode
= SCALAR_TYPE_MODE (TREE_TYPE (vtype
));
2048 poly_uint64 inelts
= pbsize
/ GET_MODE_BITSIZE (elmode
);
2050 if (related_vector_mode (vmode
, elmode
, inelts
).exists (&rmode
)
2051 && (convert_optab_handler (vec_init_optab
, vmode
, rmode
)
2052 != CODE_FOR_nothing
))
2054 *ptype
= build_vector_type (TREE_TYPE (vtype
), inelts
);
2058 /* Otherwise check if exists an integer type of the same piece size and
2059 if vec_init optab supports construction from it directly. */
2060 if (int_mode_for_size (pbsize
, 0).exists (&elmode
)
2061 && related_vector_mode (vmode
, elmode
, nelts
).exists (&rmode
)
2062 && (convert_optab_handler (vec_init_optab
, rmode
, elmode
)
2063 != CODE_FOR_nothing
))
2065 *ptype
= build_nonstandard_integer_type (pbsize
, 1);
2066 return build_vector_type (*ptype
, nelts
);
2073 /* A subroutine of get_load_store_type, with a subset of the same
2074 arguments. Handle the case where STMT_INFO is part of a grouped load
2077 For stores, the statements in the group are all consecutive
2078 and there is no gap at the end. For loads, the statements in the
2079 group might not be consecutive; there can be gaps between statements
2080 as well as at the end. */
2083 get_group_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2084 tree vectype
, bool slp
,
2085 bool masked_p
, vec_load_store_type vls_type
,
2086 vect_memory_access_type
*memory_access_type
,
2087 gather_scatter_info
*gs_info
)
2089 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2090 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
2091 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2092 dr_vec_info
*first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
2093 unsigned int group_size
= DR_GROUP_SIZE (first_stmt_info
);
2094 bool single_element_p
= (stmt_info
== first_stmt_info
2095 && !DR_GROUP_NEXT_ELEMENT (stmt_info
));
2096 unsigned HOST_WIDE_INT gap
= DR_GROUP_GAP (first_stmt_info
);
2097 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2099 /* True if the vectorized statements would access beyond the last
2100 statement in the group. */
2101 bool overrun_p
= false;
2103 /* True if we can cope with such overrun by peeling for gaps, so that
2104 there is at least one final scalar iteration after the vector loop. */
2105 bool can_overrun_p
= (!masked_p
2106 && vls_type
== VLS_LOAD
2110 /* There can only be a gap at the end of the group if the stride is
2111 known at compile time. */
2112 gcc_assert (!STMT_VINFO_STRIDED_P (first_stmt_info
) || gap
== 0);
2114 /* Stores can't yet have gaps. */
2115 gcc_assert (slp
|| vls_type
== VLS_LOAD
|| gap
== 0);
2119 if (STMT_VINFO_STRIDED_P (first_stmt_info
))
2121 /* Try to use consecutive accesses of DR_GROUP_SIZE elements,
2122 separated by the stride, until we have a complete vector.
2123 Fall back to scalar accesses if that isn't possible. */
2124 if (multiple_p (nunits
, group_size
))
2125 *memory_access_type
= VMAT_STRIDED_SLP
;
2127 *memory_access_type
= VMAT_ELEMENTWISE
;
2131 overrun_p
= loop_vinfo
&& gap
!= 0;
2132 if (overrun_p
&& vls_type
!= VLS_LOAD
)
2134 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2135 "Grouped store with gaps requires"
2136 " non-consecutive accesses\n");
2139 /* An overrun is fine if the trailing elements are smaller
2140 than the alignment boundary B. Every vector access will
2141 be a multiple of B and so we are guaranteed to access a
2142 non-gap element in the same B-sized block. */
2144 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2145 / vect_get_scalar_dr_size (first_dr_info
)))
2148 /* If the gap splits the vector in half and the target
2149 can do half-vector operations avoid the epilogue peeling
2150 by simply loading half of the vector only. Usually
2151 the construction with an upper zero half will be elided. */
2152 dr_alignment_support alignment_support_scheme
;
2156 && (((alignment_support_scheme
2157 = vect_supportable_dr_alignment (vinfo
,
2158 first_dr_info
, false)))
2160 || alignment_support_scheme
== dr_unaligned_supported
)
2161 && known_eq (nunits
, (group_size
- gap
) * 2)
2162 && known_eq (nunits
, group_size
)
2163 && (vector_vector_composition_type (vectype
, 2, &half_vtype
)
2167 if (overrun_p
&& !can_overrun_p
)
2169 if (dump_enabled_p ())
2170 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2171 "Peeling for outer loop is not supported\n");
2174 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2176 *memory_access_type
= get_negative_load_store_type
2177 (vinfo
, stmt_info
, vectype
, vls_type
, 1);
2180 gcc_assert (!loop_vinfo
|| cmp
> 0);
2181 *memory_access_type
= VMAT_CONTIGUOUS
;
2187 /* We can always handle this case using elementwise accesses,
2188 but see if something more efficient is available. */
2189 *memory_access_type
= VMAT_ELEMENTWISE
;
2191 /* If there is a gap at the end of the group then these optimizations
2192 would access excess elements in the last iteration. */
2193 bool would_overrun_p
= (gap
!= 0);
2194 /* An overrun is fine if the trailing elements are smaller than the
2195 alignment boundary B. Every vector access will be a multiple of B
2196 and so we are guaranteed to access a non-gap element in the
2197 same B-sized block. */
2200 && gap
< (vect_known_alignment_in_bytes (first_dr_info
)
2201 / vect_get_scalar_dr_size (first_dr_info
)))
2202 would_overrun_p
= false;
2204 if (!STMT_VINFO_STRIDED_P (first_stmt_info
)
2205 && (can_overrun_p
|| !would_overrun_p
)
2206 && compare_step_with_zero (vinfo
, stmt_info
) > 0)
2208 /* First cope with the degenerate case of a single-element
2210 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
), 1U))
2211 *memory_access_type
= VMAT_CONTIGUOUS
;
2213 /* Otherwise try using LOAD/STORE_LANES. */
2214 if (*memory_access_type
== VMAT_ELEMENTWISE
2215 && (vls_type
== VLS_LOAD
2216 ? vect_load_lanes_supported (vectype
, group_size
, masked_p
)
2217 : vect_store_lanes_supported (vectype
, group_size
,
2220 *memory_access_type
= VMAT_LOAD_STORE_LANES
;
2221 overrun_p
= would_overrun_p
;
2224 /* If that fails, try using permuting loads. */
2225 if (*memory_access_type
== VMAT_ELEMENTWISE
2226 && (vls_type
== VLS_LOAD
2227 ? vect_grouped_load_supported (vectype
, single_element_p
,
2229 : vect_grouped_store_supported (vectype
, group_size
)))
2231 *memory_access_type
= VMAT_CONTIGUOUS_PERMUTE
;
2232 overrun_p
= would_overrun_p
;
2236 /* As a last resort, trying using a gather load or scatter store.
2238 ??? Although the code can handle all group sizes correctly,
2239 it probably isn't a win to use separate strided accesses based
2240 on nearby locations. Or, even if it's a win over scalar code,
2241 it might not be a win over vectorizing at a lower VF, if that
2242 allows us to use contiguous accesses. */
2243 if (*memory_access_type
== VMAT_ELEMENTWISE
2246 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2248 *memory_access_type
= VMAT_GATHER_SCATTER
;
2251 if (vls_type
!= VLS_LOAD
&& first_stmt_info
== stmt_info
)
2253 /* STMT is the leader of the group. Check the operands of all the
2254 stmts of the group. */
2255 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (stmt_info
);
2256 while (next_stmt_info
)
2258 tree op
= vect_get_store_rhs (next_stmt_info
);
2259 enum vect_def_type dt
;
2260 if (!vect_is_simple_use (op
, vinfo
, &dt
))
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2264 "use not simple.\n");
2267 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
2273 gcc_assert (can_overrun_p
);
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2276 "Data access with gaps requires scalar "
2278 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2284 /* Analyze load or store statement STMT_INFO of type VLS_TYPE. Return true
2285 if there is a memory access type that the vectorized form can use,
2286 storing it in *MEMORY_ACCESS_TYPE if so. If we decide to use gathers
2287 or scatters, fill in GS_INFO accordingly.
2289 SLP says whether we're performing SLP rather than loop vectorization.
2290 MASKED_P is true if the statement is conditional on a vectorized mask.
2291 VECTYPE is the vector type that the vectorized statements will use.
2292 NCOPIES is the number of vector statements that will be needed. */
2295 get_load_store_type (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2296 tree vectype
, bool slp
,
2297 bool masked_p
, vec_load_store_type vls_type
,
2298 unsigned int ncopies
,
2299 vect_memory_access_type
*memory_access_type
,
2300 gather_scatter_info
*gs_info
)
2302 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2303 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2304 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
2306 *memory_access_type
= VMAT_GATHER_SCATTER
;
2307 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, gs_info
))
2309 else if (!vect_is_simple_use (gs_info
->offset
, vinfo
,
2310 &gs_info
->offset_dt
,
2311 &gs_info
->offset_vectype
))
2313 if (dump_enabled_p ())
2314 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2315 "%s index use not simple.\n",
2316 vls_type
== VLS_LOAD
? "gather" : "scatter");
2320 else if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
2322 if (!get_group_load_store_type (vinfo
, stmt_info
, vectype
, slp
, masked_p
,
2323 vls_type
, memory_access_type
, gs_info
))
2326 else if (STMT_VINFO_STRIDED_P (stmt_info
))
2330 && vect_use_strided_gather_scatters_p (stmt_info
, loop_vinfo
,
2332 *memory_access_type
= VMAT_GATHER_SCATTER
;
2334 *memory_access_type
= VMAT_ELEMENTWISE
;
2338 int cmp
= compare_step_with_zero (vinfo
, stmt_info
);
2340 *memory_access_type
= get_negative_load_store_type
2341 (vinfo
, stmt_info
, vectype
, vls_type
, ncopies
);
2344 gcc_assert (vls_type
== VLS_LOAD
);
2345 *memory_access_type
= VMAT_INVARIANT
;
2348 *memory_access_type
= VMAT_CONTIGUOUS
;
2351 if ((*memory_access_type
== VMAT_ELEMENTWISE
2352 || *memory_access_type
== VMAT_STRIDED_SLP
)
2353 && !nunits
.is_constant ())
2355 if (dump_enabled_p ())
2356 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2357 "Not using elementwise accesses due to variable "
2358 "vectorization factor.\n");
2362 /* FIXME: At the moment the cost model seems to underestimate the
2363 cost of using elementwise accesses. This check preserves the
2364 traditional behavior until that can be fixed. */
2365 stmt_vec_info first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
2366 if (!first_stmt_info
)
2367 first_stmt_info
= stmt_info
;
2368 if (*memory_access_type
== VMAT_ELEMENTWISE
2369 && !STMT_VINFO_STRIDED_P (first_stmt_info
)
2370 && !(stmt_info
== DR_GROUP_FIRST_ELEMENT (stmt_info
)
2371 && !DR_GROUP_NEXT_ELEMENT (stmt_info
)
2372 && !pow2p_hwi (DR_GROUP_SIZE (stmt_info
))))
2374 if (dump_enabled_p ())
2375 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2376 "not falling back to elementwise accesses\n");
2382 /* Return true if boolean argument MASK is suitable for vectorizing
2383 conditional operation STMT_INFO. When returning true, store the type
2384 of the definition in *MASK_DT_OUT and the type of the vectorized mask
2385 in *MASK_VECTYPE_OUT. */
2388 vect_check_scalar_mask (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree mask
,
2389 vect_def_type
*mask_dt_out
,
2390 tree
*mask_vectype_out
)
2392 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask
)))
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2396 "mask argument is not a boolean.\n");
2400 if (TREE_CODE (mask
) != SSA_NAME
)
2402 if (dump_enabled_p ())
2403 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2404 "mask argument is not an SSA name.\n");
2408 enum vect_def_type mask_dt
;
2410 if (!vect_is_simple_use (mask
, vinfo
, &mask_dt
, &mask_vectype
))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2414 "mask use not simple.\n");
2418 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2420 mask_vectype
= get_mask_type_for_scalar_type (vinfo
, TREE_TYPE (vectype
));
2422 if (!mask_vectype
|| !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))
2424 if (dump_enabled_p ())
2425 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2426 "could not find an appropriate vector mask type.\n");
2430 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_vectype
),
2431 TYPE_VECTOR_SUBPARTS (vectype
)))
2433 if (dump_enabled_p ())
2434 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2435 "vector mask type %T"
2436 " does not match vector data type %T.\n",
2437 mask_vectype
, vectype
);
2442 *mask_dt_out
= mask_dt
;
2443 *mask_vectype_out
= mask_vectype
;
2447 /* Return true if stored value RHS is suitable for vectorizing store
2448 statement STMT_INFO. When returning true, store the type of the
2449 definition in *RHS_DT_OUT, the type of the vectorized store value in
2450 *RHS_VECTYPE_OUT and the type of the store in *VLS_TYPE_OUT. */
2453 vect_check_store_rhs (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2454 slp_tree slp_node
, tree rhs
,
2455 vect_def_type
*rhs_dt_out
, tree
*rhs_vectype_out
,
2456 vec_load_store_type
*vls_type_out
)
2458 /* In the case this is a store from a constant make sure
2459 native_encode_expr can handle it. */
2460 if (CONSTANT_CLASS_P (rhs
) && native_encode_expr (rhs
, NULL
, 64) == 0)
2462 if (dump_enabled_p ())
2463 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2464 "cannot encode constant as a byte sequence.\n");
2468 enum vect_def_type rhs_dt
;
2471 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
2472 &rhs
, &slp_op
, &rhs_dt
, &rhs_vectype
))
2474 if (dump_enabled_p ())
2475 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2476 "use not simple.\n");
2480 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2481 if (rhs_vectype
&& !useless_type_conversion_p (vectype
, rhs_vectype
))
2483 if (dump_enabled_p ())
2484 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2485 "incompatible vector types.\n");
2489 *rhs_dt_out
= rhs_dt
;
2490 *rhs_vectype_out
= rhs_vectype
;
2491 if (rhs_dt
== vect_constant_def
|| rhs_dt
== vect_external_def
)
2492 *vls_type_out
= VLS_STORE_INVARIANT
;
2494 *vls_type_out
= VLS_STORE
;
2498 /* Build an all-ones vector mask of type MASKTYPE while vectorizing STMT_INFO.
2499 Note that we support masks with floating-point type, in which case the
2500 floats are interpreted as a bitmask. */
2503 vect_build_all_ones_mask (vec_info
*vinfo
,
2504 stmt_vec_info stmt_info
, tree masktype
)
2506 if (TREE_CODE (masktype
) == INTEGER_TYPE
)
2507 return build_int_cst (masktype
, -1);
2508 else if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
2510 tree mask
= build_int_cst (TREE_TYPE (masktype
), -1);
2511 mask
= build_vector_from_val (masktype
, mask
);
2512 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2514 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
2518 for (int j
= 0; j
< 6; ++j
)
2520 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
2521 tree mask
= build_real (TREE_TYPE (masktype
), r
);
2522 mask
= build_vector_from_val (masktype
, mask
);
2523 return vect_init_vector (vinfo
, stmt_info
, mask
, masktype
, NULL
);
2528 /* Build an all-zero merge value of type VECTYPE while vectorizing
2529 STMT_INFO as a gather load. */
2532 vect_build_zero_merge_argument (vec_info
*vinfo
,
2533 stmt_vec_info stmt_info
, tree vectype
)
2536 if (TREE_CODE (TREE_TYPE (vectype
)) == INTEGER_TYPE
)
2537 merge
= build_int_cst (TREE_TYPE (vectype
), 0);
2538 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (vectype
)))
2542 for (int j
= 0; j
< 6; ++j
)
2544 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (vectype
)));
2545 merge
= build_real (TREE_TYPE (vectype
), r
);
2549 merge
= build_vector_from_val (vectype
, merge
);
2550 return vect_init_vector (vinfo
, stmt_info
, merge
, vectype
, NULL
);
2553 /* Build a gather load call while vectorizing STMT_INFO. Insert new
2554 instructions before GSI and add them to VEC_STMT. GS_INFO describes
2555 the gather load operation. If the load is conditional, MASK is the
2556 unvectorized condition and MASK_DT is its definition type, otherwise
2560 vect_build_gather_load_calls (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2561 gimple_stmt_iterator
*gsi
,
2563 gather_scatter_info
*gs_info
,
2566 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2567 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2568 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2569 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2570 int ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2571 edge pe
= loop_preheader_edge (loop
);
2572 enum { NARROW
, NONE
, WIDEN
} modifier
;
2573 poly_uint64 gather_off_nunits
2574 = TYPE_VECTOR_SUBPARTS (gs_info
->offset_vectype
);
2576 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
->decl
));
2577 tree rettype
= TREE_TYPE (TREE_TYPE (gs_info
->decl
));
2578 tree srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2579 tree ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2580 tree idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2581 tree masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
2582 tree scaletype
= TREE_VALUE (arglist
);
2583 tree real_masktype
= masktype
;
2584 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
2586 || TREE_CODE (masktype
) == INTEGER_TYPE
2587 || types_compatible_p (srctype
, masktype
)));
2588 if (mask
&& TREE_CODE (masktype
) == INTEGER_TYPE
)
2589 masktype
= truth_type_for (srctype
);
2591 tree mask_halftype
= masktype
;
2592 tree perm_mask
= NULL_TREE
;
2593 tree mask_perm_mask
= NULL_TREE
;
2594 if (known_eq (nunits
, gather_off_nunits
))
2596 else if (known_eq (nunits
* 2, gather_off_nunits
))
2600 /* Currently widening gathers and scatters are only supported for
2601 fixed-length vectors. */
2602 int count
= gather_off_nunits
.to_constant ();
2603 vec_perm_builder
sel (count
, count
, 1);
2604 for (int i
= 0; i
< count
; ++i
)
2605 sel
.quick_push (i
| (count
/ 2));
2607 vec_perm_indices
indices (sel
, 1, count
);
2608 perm_mask
= vect_gen_perm_mask_checked (gs_info
->offset_vectype
,
2611 else if (known_eq (nunits
, gather_off_nunits
* 2))
2615 /* Currently narrowing gathers and scatters are only supported for
2616 fixed-length vectors. */
2617 int count
= nunits
.to_constant ();
2618 vec_perm_builder
sel (count
, count
, 1);
2619 sel
.quick_grow (count
);
2620 for (int i
= 0; i
< count
; ++i
)
2621 sel
[i
] = i
< count
/ 2 ? i
: i
+ count
/ 2;
2622 vec_perm_indices
indices (sel
, 2, count
);
2623 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
2627 if (mask
&& masktype
== real_masktype
)
2629 for (int i
= 0; i
< count
; ++i
)
2630 sel
[i
] = i
| (count
/ 2);
2631 indices
.new_vector (sel
, 2, count
);
2632 mask_perm_mask
= vect_gen_perm_mask_checked (masktype
, indices
);
2635 mask_halftype
= truth_type_for (gs_info
->offset_vectype
);
2640 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
2641 tree vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
2643 tree ptr
= fold_convert (ptrtype
, gs_info
->base
);
2644 if (!is_gimple_min_invariant (ptr
))
2647 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
2648 basic_block new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
2649 gcc_assert (!new_bb
);
2652 tree scale
= build_int_cst (scaletype
, gs_info
->scale
);
2654 tree vec_oprnd0
= NULL_TREE
;
2655 tree vec_mask
= NULL_TREE
;
2656 tree src_op
= NULL_TREE
;
2657 tree mask_op
= NULL_TREE
;
2658 tree prev_res
= NULL_TREE
;
2662 src_op
= vect_build_zero_merge_argument (vinfo
, stmt_info
, rettype
);
2663 mask_op
= vect_build_all_ones_mask (vinfo
, stmt_info
, masktype
);
2666 auto_vec
<tree
> vec_oprnds0
;
2667 auto_vec
<tree
> vec_masks
;
2668 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2669 modifier
== WIDEN
? ncopies
/ 2 : ncopies
,
2670 gs_info
->offset
, &vec_oprnds0
);
2672 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
2673 modifier
== NARROW
? ncopies
/ 2 : ncopies
,
2675 for (int j
= 0; j
< ncopies
; ++j
)
2678 if (modifier
== WIDEN
&& (j
& 1))
2679 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
2680 perm_mask
, stmt_info
, gsi
);
2682 op
= vec_oprnd0
= vec_oprnds0
[modifier
== WIDEN
? j
/ 2 : j
];
2684 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
2686 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
2687 TYPE_VECTOR_SUBPARTS (idxtype
)));
2688 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
2689 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
2690 gassign
*new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2691 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2697 if (mask_perm_mask
&& (j
& 1))
2698 mask_op
= permute_vec_elements (vinfo
, mask_op
, mask_op
,
2699 mask_perm_mask
, stmt_info
, gsi
);
2702 if (modifier
== NARROW
)
2705 vec_mask
= vec_masks
[j
/ 2];
2708 vec_mask
= vec_masks
[j
];
2711 if (!useless_type_conversion_p (masktype
, TREE_TYPE (vec_mask
)))
2713 poly_uint64 sub1
= TYPE_VECTOR_SUBPARTS (TREE_TYPE (mask_op
));
2714 poly_uint64 sub2
= TYPE_VECTOR_SUBPARTS (masktype
);
2715 gcc_assert (known_eq (sub1
, sub2
));
2716 var
= vect_get_new_ssa_name (masktype
, vect_simple_var
);
2717 mask_op
= build1 (VIEW_CONVERT_EXPR
, masktype
, mask_op
);
2719 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_op
);
2720 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2724 if (modifier
== NARROW
&& masktype
!= real_masktype
)
2726 var
= vect_get_new_ssa_name (mask_halftype
, vect_simple_var
);
2728 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
2729 : VEC_UNPACK_LO_EXPR
,
2731 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2737 tree mask_arg
= mask_op
;
2738 if (masktype
!= real_masktype
)
2740 tree utype
, optype
= TREE_TYPE (mask_op
);
2741 if (TYPE_MODE (real_masktype
) == TYPE_MODE (optype
))
2742 utype
= real_masktype
;
2744 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
2745 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
2746 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_op
);
2748 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
2749 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2751 if (!useless_type_conversion_p (real_masktype
, utype
))
2753 gcc_assert (TYPE_PRECISION (utype
)
2754 <= TYPE_PRECISION (real_masktype
));
2755 var
= vect_get_new_ssa_name (real_masktype
, vect_scalar_var
);
2756 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
2757 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2760 src_op
= build_zero_cst (srctype
);
2762 gimple
*new_stmt
= gimple_build_call (gs_info
->decl
, 5, src_op
, ptr
, op
,
2765 if (!useless_type_conversion_p (vectype
, rettype
))
2767 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
2768 TYPE_VECTOR_SUBPARTS (rettype
)));
2769 op
= vect_get_new_ssa_name (rettype
, vect_simple_var
);
2770 gimple_call_set_lhs (new_stmt
, op
);
2771 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2772 var
= make_ssa_name (vec_dest
);
2773 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
2774 new_stmt
= gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
2775 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2779 var
= make_ssa_name (vec_dest
, new_stmt
);
2780 gimple_call_set_lhs (new_stmt
, var
);
2781 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2784 if (modifier
== NARROW
)
2791 var
= permute_vec_elements (vinfo
, prev_res
, var
, perm_mask
,
2793 new_stmt
= SSA_NAME_DEF_STMT (var
);
2796 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2798 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2801 /* Prepare the base and offset in GS_INFO for vectorization.
2802 Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
2803 to the vectorized offset argument for the first copy of STMT_INFO.
2804 STMT_INFO is the statement described by GS_INFO and LOOP is the
2808 vect_get_gather_scatter_ops (vec_info
*vinfo
,
2809 class loop
*loop
, stmt_vec_info stmt_info
,
2810 gather_scatter_info
*gs_info
,
2811 tree
*dataref_ptr
, vec
<tree
> *vec_offset
,
2814 gimple_seq stmts
= NULL
;
2815 *dataref_ptr
= force_gimple_operand (gs_info
->base
, &stmts
, true, NULL_TREE
);
2819 edge pe
= loop_preheader_edge (loop
);
2820 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
2821 gcc_assert (!new_bb
);
2823 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
, gs_info
->offset
,
2824 vec_offset
, gs_info
->offset_vectype
);
2827 /* Prepare to implement a grouped or strided load or store using
2828 the gather load or scatter store operation described by GS_INFO.
2829 STMT_INFO is the load or store statement.
2831 Set *DATAREF_BUMP to the amount that should be added to the base
2832 address after each copy of the vectorized statement. Set *VEC_OFFSET
2833 to an invariant offset vector in which element I has the value
2834 I * DR_STEP / SCALE. */
2837 vect_get_strided_load_store_ops (stmt_vec_info stmt_info
,
2838 loop_vec_info loop_vinfo
,
2839 gather_scatter_info
*gs_info
,
2840 tree
*dataref_bump
, tree
*vec_offset
)
2842 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2843 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2844 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2847 tree bump
= size_binop (MULT_EXPR
,
2848 fold_convert (sizetype
, unshare_expr (DR_STEP (dr
))),
2849 size_int (TYPE_VECTOR_SUBPARTS (vectype
)));
2850 *dataref_bump
= force_gimple_operand (bump
, &stmts
, true, NULL_TREE
);
2852 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2854 /* The offset given in GS_INFO can have pointer type, so use the element
2855 type of the vector instead. */
2856 tree offset_type
= TREE_TYPE (gs_info
->offset
);
2857 offset_type
= TREE_TYPE (gs_info
->offset_vectype
);
2859 /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type. */
2860 tree step
= size_binop (EXACT_DIV_EXPR
, unshare_expr (DR_STEP (dr
)),
2861 ssize_int (gs_info
->scale
));
2862 step
= fold_convert (offset_type
, step
);
2863 step
= force_gimple_operand (step
, &stmts
, true, NULL_TREE
);
2865 /* Create {0, X, X*2, X*3, ...}. */
2866 *vec_offset
= gimple_build (&stmts
, VEC_SERIES_EXPR
, gs_info
->offset_vectype
,
2867 build_zero_cst (offset_type
), step
);
2869 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
2872 /* Return the amount that should be added to a vector pointer to move
2873 to the next or previous copy of AGGR_TYPE. DR_INFO is the data reference
2874 being vectorized and MEMORY_ACCESS_TYPE describes the type of
2878 vect_get_data_ptr_increment (vec_info
*vinfo
,
2879 dr_vec_info
*dr_info
, tree aggr_type
,
2880 vect_memory_access_type memory_access_type
)
2882 if (memory_access_type
== VMAT_INVARIANT
)
2883 return size_zero_node
;
2885 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
2886 tree step
= vect_dr_behavior (vinfo
, dr_info
)->step
;
2887 if (tree_int_cst_sgn (step
) == -1)
2888 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
2892 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64,128}. */
2895 vectorizable_bswap (vec_info
*vinfo
,
2896 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
2897 gimple
**vec_stmt
, slp_tree slp_node
,
2899 tree vectype_in
, stmt_vector_for_cost
*cost_vec
)
2902 gcall
*stmt
= as_a
<gcall
*> (stmt_info
->stmt
);
2903 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
2906 op
= gimple_call_arg (stmt
, 0);
2907 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2908 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2910 /* Multiple types in SLP are handled by creating the appropriate number of
2911 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2916 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
2918 gcc_assert (ncopies
>= 1);
2920 tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype_in
);
2924 poly_uint64 num_bytes
= TYPE_VECTOR_SUBPARTS (char_vectype
);
2925 unsigned word_bytes
;
2926 if (!constant_multiple_p (num_bytes
, nunits
, &word_bytes
))
2929 /* The encoding uses one stepped pattern for each byte in the word. */
2930 vec_perm_builder
elts (num_bytes
, word_bytes
, 3);
2931 for (unsigned i
= 0; i
< 3; ++i
)
2932 for (unsigned j
= 0; j
< word_bytes
; ++j
)
2933 elts
.quick_push ((i
+ 1) * word_bytes
- j
- 1);
2935 vec_perm_indices
indices (elts
, 1, num_bytes
);
2936 if (!can_vec_perm_const_p (TYPE_MODE (char_vectype
), indices
))
2942 && !vect_maybe_update_slp_op_vectype (slp_op
[0], vectype_in
))
2944 if (dump_enabled_p ())
2945 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2946 "incompatible vector types for invariants\n");
2950 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
2951 DUMP_VECT_SCOPE ("vectorizable_bswap");
2954 record_stmt_cost (cost_vec
,
2955 1, vector_stmt
, stmt_info
, 0, vect_prologue
);
2956 record_stmt_cost (cost_vec
,
2957 ncopies
, vec_perm
, stmt_info
, 0, vect_body
);
2962 tree bswap_vconst
= vec_perm_indices_to_tree (char_vectype
, indices
);
2965 vec
<tree
> vec_oprnds
= vNULL
;
2966 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
2968 /* Arguments are ready. create the new vector stmt. */
2971 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
2974 tree tem
= make_ssa_name (char_vectype
);
2975 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2976 char_vectype
, vop
));
2977 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2978 tree tem2
= make_ssa_name (char_vectype
);
2979 new_stmt
= gimple_build_assign (tem2
, VEC_PERM_EXPR
,
2980 tem
, tem
, bswap_vconst
);
2981 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2982 tem
= make_ssa_name (vectype
);
2983 new_stmt
= gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
2985 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
2987 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2989 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
2993 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
2995 vec_oprnds
.release ();
2999 /* Return true if vector types VECTYPE_IN and VECTYPE_OUT have
3000 integer elements and if we can narrow VECTYPE_IN to VECTYPE_OUT
3001 in a single step. On success, store the binary pack code in
3005 simple_integer_narrowing (tree vectype_out
, tree vectype_in
,
3006 tree_code
*convert_code
)
3008 if (!INTEGRAL_TYPE_P (TREE_TYPE (vectype_out
))
3009 || !INTEGRAL_TYPE_P (TREE_TYPE (vectype_in
)))
3013 int multi_step_cvt
= 0;
3014 auto_vec
<tree
, 8> interm_types
;
3015 if (!supportable_narrowing_operation (NOP_EXPR
, vectype_out
, vectype_in
,
3016 &code
, &multi_step_cvt
, &interm_types
)
3020 *convert_code
= code
;
3024 /* Function vectorizable_call.
3026 Check if STMT_INFO performs a function call that can be vectorized.
3027 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3028 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3029 Return true if STMT_INFO is vectorizable in this way. */
3032 vectorizable_call (vec_info
*vinfo
,
3033 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
3034 gimple
**vec_stmt
, slp_tree slp_node
,
3035 stmt_vector_for_cost
*cost_vec
)
3041 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
3042 tree vectype_out
, vectype_in
;
3043 poly_uint64 nunits_in
;
3044 poly_uint64 nunits_out
;
3045 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3046 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3047 tree fndecl
, new_temp
, rhs_type
;
3048 enum vect_def_type dt
[4]
3049 = { vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
,
3050 vect_unknown_def_type
};
3051 tree vectypes
[ARRAY_SIZE (dt
)] = {};
3052 slp_tree slp_op
[ARRAY_SIZE (dt
)] = {};
3053 int ndts
= ARRAY_SIZE (dt
);
3055 auto_vec
<tree
, 8> vargs
;
3056 auto_vec
<tree
, 8> orig_vargs
;
3057 enum { NARROW
, NONE
, WIDEN
} modifier
;
3061 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3064 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3068 /* Is STMT_INFO a vectorizable call? */
3069 stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3073 if (gimple_call_internal_p (stmt
)
3074 && (internal_load_fn_p (gimple_call_internal_fn (stmt
))
3075 || internal_store_fn_p (gimple_call_internal_fn (stmt
))))
3076 /* Handled by vectorizable_load and vectorizable_store. */
3079 if (gimple_call_lhs (stmt
) == NULL_TREE
3080 || TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3083 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3085 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3087 /* Process function arguments. */
3088 rhs_type
= NULL_TREE
;
3089 vectype_in
= NULL_TREE
;
3090 nargs
= gimple_call_num_args (stmt
);
3092 /* Bail out if the function has more than four arguments, we do not have
3093 interesting builtin functions to vectorize with more than two arguments
3094 except for fma. No arguments is also not good. */
3095 if (nargs
== 0 || nargs
> 4)
3098 /* Ignore the arguments of IFN_GOMP_SIMD_LANE, they are magic. */
3099 combined_fn cfn
= gimple_call_combined_fn (stmt
);
3100 if (cfn
== CFN_GOMP_SIMD_LANE
)
3103 rhs_type
= unsigned_type_node
;
3107 if (internal_fn_p (cfn
))
3108 mask_opno
= internal_fn_mask_index (as_internal_fn (cfn
));
3110 for (i
= 0; i
< nargs
; i
++)
3112 if ((int) i
== mask_opno
)
3114 op
= gimple_call_arg (stmt
, i
);
3115 if (!vect_check_scalar_mask (vinfo
,
3116 stmt_info
, op
, &dt
[i
], &vectypes
[i
]))
3121 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
3122 i
, &op
, &slp_op
[i
], &dt
[i
], &vectypes
[i
]))
3124 if (dump_enabled_p ())
3125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3126 "use not simple.\n");
3130 /* We can only handle calls with arguments of the same type. */
3132 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
3134 if (dump_enabled_p ())
3135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3136 "argument types differ.\n");
3140 rhs_type
= TREE_TYPE (op
);
3143 vectype_in
= vectypes
[i
];
3144 else if (vectypes
[i
]
3145 && !types_compatible_p (vectypes
[i
], vectype_in
))
3147 if (dump_enabled_p ())
3148 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3149 "argument vector types differ.\n");
3153 /* If all arguments are external or constant defs, infer the vector type
3154 from the scalar type. */
3156 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
3158 gcc_assert (vectype_in
);
3161 if (dump_enabled_p ())
3162 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3163 "no vectype for scalar type %T\n", rhs_type
);
3167 /* FORNOW: we don't yet support mixtures of vector sizes for calls,
3168 just mixtures of nunits. E.g. DI->SI versions of __builtin_ctz*
3169 are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
3170 by a pack of the two vectors into an SI vector. We would need
3171 separate code to handle direct VnDI->VnSI IFN_CTZs. */
3172 if (TYPE_SIZE (vectype_in
) != TYPE_SIZE (vectype_out
))
3174 if (dump_enabled_p ())
3175 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3176 "mismatched vector sizes %T and %T\n",
3177 vectype_in
, vectype_out
);
3181 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
3182 != VECTOR_BOOLEAN_TYPE_P (vectype_in
))
3184 if (dump_enabled_p ())
3185 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3186 "mixed mask and nonmask vector types\n");
3191 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
3192 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3193 if (known_eq (nunits_in
* 2, nunits_out
))
3195 else if (known_eq (nunits_out
, nunits_in
))
3197 else if (known_eq (nunits_out
* 2, nunits_in
))
3202 /* We only handle functions that do not read or clobber memory. */
3203 if (gimple_vuse (stmt
))
3205 if (dump_enabled_p ())
3206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3207 "function reads from or writes to memory.\n");
3211 /* For now, we only vectorize functions if a target specific builtin
3212 is available. TODO -- in some cases, it might be profitable to
3213 insert the calls for pieces of the vector, in order to be able
3214 to vectorize other operations in the loop. */
3216 internal_fn ifn
= IFN_LAST
;
3217 tree callee
= gimple_call_fndecl (stmt
);
3219 /* First try using an internal function. */
3220 tree_code convert_code
= ERROR_MARK
;
3222 && (modifier
== NONE
3223 || (modifier
== NARROW
3224 && simple_integer_narrowing (vectype_out
, vectype_in
,
3226 ifn
= vectorizable_internal_function (cfn
, callee
, vectype_out
,
3229 /* If that fails, try asking for a target-specific built-in function. */
3230 if (ifn
== IFN_LAST
)
3232 if (cfn
!= CFN_LAST
)
3233 fndecl
= targetm
.vectorize
.builtin_vectorized_function
3234 (cfn
, vectype_out
, vectype_in
);
3235 else if (callee
&& fndecl_built_in_p (callee
, BUILT_IN_MD
))
3236 fndecl
= targetm
.vectorize
.builtin_md_vectorized_function
3237 (callee
, vectype_out
, vectype_in
);
3240 if (ifn
== IFN_LAST
&& !fndecl
)
3242 if (cfn
== CFN_GOMP_SIMD_LANE
3245 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3246 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
3247 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
3248 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
3250 /* We can handle IFN_GOMP_SIMD_LANE by returning a
3251 { 0, 1, 2, ... vf - 1 } vector. */
3252 gcc_assert (nargs
== 0);
3254 else if (modifier
== NONE
3255 && (gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP16
)
3256 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP32
)
3257 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP64
)
3258 || gimple_call_builtin_p (stmt
, BUILT_IN_BSWAP128
)))
3259 return vectorizable_bswap (vinfo
, stmt_info
, gsi
, vec_stmt
, slp_node
,
3260 slp_op
, vectype_in
, cost_vec
);
3263 if (dump_enabled_p ())
3264 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3265 "function is not vectorizable.\n");
3272 else if (modifier
== NARROW
&& ifn
== IFN_LAST
)
3273 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
3275 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
3277 /* Sanity check: make sure that at least one copy of the vectorized stmt
3278 needs to be generated. */
3279 gcc_assert (ncopies
>= 1);
3281 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
3282 if (!vec_stmt
) /* transformation not required. */
3285 for (i
= 0; i
< nargs
; ++i
)
3286 if (!vect_maybe_update_slp_op_vectype (slp_op
[i
], vectype_in
))
3288 if (dump_enabled_p ())
3289 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3290 "incompatible vector types for invariants\n");
3293 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
3294 DUMP_VECT_SCOPE ("vectorizable_call");
3295 vect_model_simple_cost (vinfo
, stmt_info
,
3296 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
3297 if (ifn
!= IFN_LAST
&& modifier
== NARROW
&& !slp_node
)
3298 record_stmt_cost (cost_vec
, ncopies
/ 2,
3299 vec_promote_demote
, stmt_info
, 0, vect_body
);
3301 if (loop_vinfo
&& mask_opno
>= 0)
3303 unsigned int nvectors
= (slp_node
3304 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
)
3306 tree scalar_mask
= gimple_call_arg (stmt_info
->stmt
, mask_opno
);
3307 vect_record_loop_mask (loop_vinfo
, masks
, nvectors
,
3308 vectype_out
, scalar_mask
);
3315 if (dump_enabled_p ())
3316 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3319 scalar_dest
= gimple_call_lhs (stmt
);
3320 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
3322 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
3324 if (modifier
== NONE
|| ifn
!= IFN_LAST
)
3326 tree prev_res
= NULL_TREE
;
3327 vargs
.safe_grow (nargs
);
3328 orig_vargs
.safe_grow (nargs
);
3329 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3330 for (j
= 0; j
< ncopies
; ++j
)
3332 /* Build argument list for the vectorized call. */
3335 vec
<tree
> vec_oprnds0
;
3337 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3338 vec_oprnds0
= vec_defs
[0];
3340 /* Arguments are ready. Create the new vector stmt. */
3341 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
3344 for (k
= 0; k
< nargs
; k
++)
3346 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3347 vargs
[k
] = vec_oprndsk
[i
];
3350 if (modifier
== NARROW
)
3352 /* We don't define any narrowing conditional functions
3354 gcc_assert (mask_opno
< 0);
3355 tree half_res
= make_ssa_name (vectype_in
);
3357 = gimple_build_call_internal_vec (ifn
, vargs
);
3358 gimple_call_set_lhs (call
, half_res
);
3359 gimple_call_set_nothrow (call
, true);
3360 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3363 prev_res
= half_res
;
3366 new_temp
= make_ssa_name (vec_dest
);
3367 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3368 prev_res
, half_res
);
3369 vect_finish_stmt_generation (vinfo
, stmt_info
,
3374 if (mask_opno
>= 0 && masked_loop_p
)
3376 unsigned int vec_num
= vec_oprnds0
.length ();
3377 /* Always true for SLP. */
3378 gcc_assert (ncopies
== 1);
3379 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
,
3381 vargs
[mask_opno
] = prepare_load_store_mask
3382 (TREE_TYPE (mask
), mask
, vargs
[mask_opno
], gsi
);
3386 if (ifn
!= IFN_LAST
)
3387 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3389 call
= gimple_build_call_vec (fndecl
, vargs
);
3390 new_temp
= make_ssa_name (vec_dest
, call
);
3391 gimple_call_set_lhs (call
, new_temp
);
3392 gimple_call_set_nothrow (call
, true);
3393 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3396 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3401 for (i
= 0; i
< nargs
; i
++)
3403 op
= gimple_call_arg (stmt
, i
);
3406 vec_defs
.quick_push (vNULL
);
3407 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
3410 orig_vargs
[i
] = vargs
[i
] = vec_defs
[i
][j
];
3413 if (mask_opno
>= 0 && masked_loop_p
)
3415 tree mask
= vect_get_loop_mask (gsi
, masks
, ncopies
,
3418 = prepare_load_store_mask (TREE_TYPE (mask
), mask
,
3419 vargs
[mask_opno
], gsi
);
3423 if (cfn
== CFN_GOMP_SIMD_LANE
)
3425 tree cst
= build_index_vector (vectype_out
, j
* nunits_out
, 1);
3427 = vect_get_new_ssa_name (vectype_out
, vect_simple_var
, "cst_");
3428 gimple
*init_stmt
= gimple_build_assign (new_var
, cst
);
3429 vect_init_vector_1 (vinfo
, stmt_info
, init_stmt
, NULL
);
3430 new_temp
= make_ssa_name (vec_dest
);
3431 new_stmt
= gimple_build_assign (new_temp
, new_var
);
3432 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3434 else if (modifier
== NARROW
)
3436 /* We don't define any narrowing conditional functions at
3438 gcc_assert (mask_opno
< 0);
3439 tree half_res
= make_ssa_name (vectype_in
);
3440 gcall
*call
= gimple_build_call_internal_vec (ifn
, vargs
);
3441 gimple_call_set_lhs (call
, half_res
);
3442 gimple_call_set_nothrow (call
, true);
3443 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3446 prev_res
= half_res
;
3449 new_temp
= make_ssa_name (vec_dest
);
3450 new_stmt
= gimple_build_assign (new_temp
, convert_code
,
3451 prev_res
, half_res
);
3452 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3457 if (ifn
!= IFN_LAST
)
3458 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3460 call
= gimple_build_call_vec (fndecl
, vargs
);
3461 new_temp
= make_ssa_name (vec_dest
, call
);
3462 gimple_call_set_lhs (call
, new_temp
);
3463 gimple_call_set_nothrow (call
, true);
3464 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3468 if (j
== (modifier
== NARROW
? 1 : 0))
3469 *vec_stmt
= new_stmt
;
3470 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3472 for (i
= 0; i
< nargs
; i
++)
3474 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3475 vec_oprndsi
.release ();
3478 else if (modifier
== NARROW
)
3480 auto_vec
<vec
<tree
> > vec_defs (nargs
);
3481 /* We don't define any narrowing conditional functions at present. */
3482 gcc_assert (mask_opno
< 0);
3483 for (j
= 0; j
< ncopies
; ++j
)
3485 /* Build argument list for the vectorized call. */
3487 vargs
.create (nargs
* 2);
3493 vec
<tree
> vec_oprnds0
;
3495 vect_get_slp_defs (vinfo
, slp_node
, &vec_defs
);
3496 vec_oprnds0
= vec_defs
[0];
3498 /* Arguments are ready. Create the new vector stmt. */
3499 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
3503 for (k
= 0; k
< nargs
; k
++)
3505 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
3506 vargs
.quick_push (vec_oprndsk
[i
]);
3507 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
3510 if (ifn
!= IFN_LAST
)
3511 call
= gimple_build_call_internal_vec (ifn
, vargs
);
3513 call
= gimple_build_call_vec (fndecl
, vargs
);
3514 new_temp
= make_ssa_name (vec_dest
, call
);
3515 gimple_call_set_lhs (call
, new_temp
);
3516 gimple_call_set_nothrow (call
, true);
3517 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
3518 SLP_TREE_VEC_STMTS (slp_node
).quick_push (call
);
3523 for (i
= 0; i
< nargs
; i
++)
3525 op
= gimple_call_arg (stmt
, i
);
3528 vec_defs
.quick_push (vNULL
);
3529 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, 2 * ncopies
,
3530 op
, &vec_defs
[i
], vectypes
[i
]);
3532 vec_oprnd0
= vec_defs
[i
][2*j
];
3533 vec_oprnd1
= vec_defs
[i
][2*j
+1];
3535 vargs
.quick_push (vec_oprnd0
);
3536 vargs
.quick_push (vec_oprnd1
);
3539 gcall
*new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
3540 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3541 gimple_call_set_lhs (new_stmt
, new_temp
);
3542 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
3544 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
3548 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
3550 for (i
= 0; i
< nargs
; i
++)
3552 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
3553 vec_oprndsi
.release ();
3557 /* No current target implements this case. */
3562 /* The call in STMT might prevent it from being removed in dce.
3563 We however cannot remove it here, due to the way the ssa name
3564 it defines is mapped to the new definition. So just replace
3565 rhs of the statement with something harmless. */
3570 stmt_info
= vect_orig_stmt (stmt_info
);
3571 lhs
= gimple_get_lhs (stmt_info
->stmt
);
3574 = gimple_build_assign (lhs
, build_zero_cst (TREE_TYPE (lhs
)));
3575 vinfo
->replace_stmt (gsi
, stmt_info
, new_stmt
);
3581 struct simd_call_arg_info
3585 HOST_WIDE_INT linear_step
;
3586 enum vect_def_type dt
;
3588 bool simd_lane_linear
;
3591 /* Helper function of vectorizable_simd_clone_call. If OP, an SSA_NAME,
3592 is linear within simd lane (but not within whole loop), note it in
3596 vect_simd_lane_linear (tree op
, class loop
*loop
,
3597 struct simd_call_arg_info
*arginfo
)
3599 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
3601 if (!is_gimple_assign (def_stmt
)
3602 || gimple_assign_rhs_code (def_stmt
) != POINTER_PLUS_EXPR
3603 || !is_gimple_min_invariant (gimple_assign_rhs1 (def_stmt
)))
3606 tree base
= gimple_assign_rhs1 (def_stmt
);
3607 HOST_WIDE_INT linear_step
= 0;
3608 tree v
= gimple_assign_rhs2 (def_stmt
);
3609 while (TREE_CODE (v
) == SSA_NAME
)
3612 def_stmt
= SSA_NAME_DEF_STMT (v
);
3613 if (is_gimple_assign (def_stmt
))
3614 switch (gimple_assign_rhs_code (def_stmt
))
3617 t
= gimple_assign_rhs2 (def_stmt
);
3618 if (linear_step
|| TREE_CODE (t
) != INTEGER_CST
)
3620 base
= fold_build2 (POINTER_PLUS_EXPR
, TREE_TYPE (base
), base
, t
);
3621 v
= gimple_assign_rhs1 (def_stmt
);
3624 t
= gimple_assign_rhs2 (def_stmt
);
3625 if (linear_step
|| !tree_fits_shwi_p (t
) || integer_zerop (t
))
3627 linear_step
= tree_to_shwi (t
);
3628 v
= gimple_assign_rhs1 (def_stmt
);
3631 t
= gimple_assign_rhs1 (def_stmt
);
3632 if (TREE_CODE (TREE_TYPE (t
)) != INTEGER_TYPE
3633 || (TYPE_PRECISION (TREE_TYPE (v
))
3634 < TYPE_PRECISION (TREE_TYPE (t
))))
3643 else if (gimple_call_internal_p (def_stmt
, IFN_GOMP_SIMD_LANE
)
3645 && TREE_CODE (gimple_call_arg (def_stmt
, 0)) == SSA_NAME
3646 && (SSA_NAME_VAR (gimple_call_arg (def_stmt
, 0))
3651 arginfo
->linear_step
= linear_step
;
3653 arginfo
->simd_lane_linear
= true;
3659 /* Return the number of elements in vector type VECTYPE, which is associated
3660 with a SIMD clone. At present these vectors always have a constant
3663 static unsigned HOST_WIDE_INT
3664 simd_clone_subparts (tree vectype
)
3666 return TYPE_VECTOR_SUBPARTS (vectype
).to_constant ();
3669 /* Function vectorizable_simd_clone_call.
3671 Check if STMT_INFO performs a function call that can be vectorized
3672 by calling a simd clone of the function.
3673 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
3674 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3675 Return true if STMT_INFO is vectorizable in this way. */
3678 vectorizable_simd_clone_call (vec_info
*vinfo
, stmt_vec_info stmt_info
,
3679 gimple_stmt_iterator
*gsi
,
3680 gimple
**vec_stmt
, slp_tree slp_node
,
3681 stmt_vector_for_cost
*)
3686 tree vec_oprnd0
= NULL_TREE
;
3688 unsigned int nunits
;
3689 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
3690 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
3691 class loop
*loop
= loop_vinfo
? LOOP_VINFO_LOOP (loop_vinfo
) : NULL
;
3692 tree fndecl
, new_temp
;
3694 auto_vec
<simd_call_arg_info
> arginfo
;
3695 vec
<tree
> vargs
= vNULL
;
3697 tree lhs
, rtype
, ratype
;
3698 vec
<constructor_elt
, va_gc
> *ret_ctor_elts
= NULL
;
3700 /* Is STMT a vectorizable call? */
3701 gcall
*stmt
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
3705 fndecl
= gimple_call_fndecl (stmt
);
3706 if (fndecl
== NULL_TREE
)
3709 struct cgraph_node
*node
= cgraph_node::get (fndecl
);
3710 if (node
== NULL
|| node
->simd_clones
== NULL
)
3713 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3716 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
3720 if (gimple_call_lhs (stmt
)
3721 && TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
3724 gcc_checking_assert (!stmt_can_throw_internal (cfun
, stmt
));
3726 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3728 if (loop_vinfo
&& nested_in_vect_loop_p (loop
, stmt_info
))
3735 /* Process function arguments. */
3736 nargs
= gimple_call_num_args (stmt
);
3738 /* Bail out if the function has zero arguments. */
3742 arginfo
.reserve (nargs
, true);
3744 for (i
= 0; i
< nargs
; i
++)
3746 simd_call_arg_info thisarginfo
;
3749 thisarginfo
.linear_step
= 0;
3750 thisarginfo
.align
= 0;
3751 thisarginfo
.op
= NULL_TREE
;
3752 thisarginfo
.simd_lane_linear
= false;
3754 op
= gimple_call_arg (stmt
, i
);
3755 if (!vect_is_simple_use (op
, vinfo
, &thisarginfo
.dt
,
3756 &thisarginfo
.vectype
)
3757 || thisarginfo
.dt
== vect_uninitialized_def
)
3759 if (dump_enabled_p ())
3760 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3761 "use not simple.\n");
3765 if (thisarginfo
.dt
== vect_constant_def
3766 || thisarginfo
.dt
== vect_external_def
)
3767 gcc_assert (thisarginfo
.vectype
== NULL_TREE
);
3770 gcc_assert (thisarginfo
.vectype
!= NULL_TREE
);
3771 if (VECTOR_BOOLEAN_TYPE_P (thisarginfo
.vectype
))
3773 if (dump_enabled_p ())
3774 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3775 "vector mask arguments are not supported\n");
3780 /* For linear arguments, the analyze phase should have saved
3781 the base and step in STMT_VINFO_SIMD_CLONE_INFO. */
3782 if (i
* 3 + 4 <= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).length ()
3783 && STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2])
3785 gcc_assert (vec_stmt
);
3786 thisarginfo
.linear_step
3787 = tree_to_shwi (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2]);
3789 = STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 1];
3790 thisarginfo
.simd_lane_linear
3791 = (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 3]
3792 == boolean_true_node
);
3793 /* If loop has been peeled for alignment, we need to adjust it. */
3794 tree n1
= LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo
);
3795 tree n2
= LOOP_VINFO_NITERS (loop_vinfo
);
3796 if (n1
!= n2
&& !thisarginfo
.simd_lane_linear
)
3798 tree bias
= fold_build2 (MINUS_EXPR
, TREE_TYPE (n1
), n1
, n2
);
3799 tree step
= STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[i
* 3 + 2];
3800 tree opt
= TREE_TYPE (thisarginfo
.op
);
3801 bias
= fold_convert (TREE_TYPE (step
), bias
);
3802 bias
= fold_build2 (MULT_EXPR
, TREE_TYPE (step
), bias
, step
);
3804 = fold_build2 (POINTER_TYPE_P (opt
)
3805 ? POINTER_PLUS_EXPR
: PLUS_EXPR
, opt
,
3806 thisarginfo
.op
, bias
);
3810 && thisarginfo
.dt
!= vect_constant_def
3811 && thisarginfo
.dt
!= vect_external_def
3813 && TREE_CODE (op
) == SSA_NAME
3814 && simple_iv (loop
, loop_containing_stmt (stmt
), op
,
3816 && tree_fits_shwi_p (iv
.step
))
3818 thisarginfo
.linear_step
= tree_to_shwi (iv
.step
);
3819 thisarginfo
.op
= iv
.base
;
3821 else if ((thisarginfo
.dt
== vect_constant_def
3822 || thisarginfo
.dt
== vect_external_def
)
3823 && POINTER_TYPE_P (TREE_TYPE (op
)))
3824 thisarginfo
.align
= get_pointer_alignment (op
) / BITS_PER_UNIT
;
3825 /* Addresses of array elements indexed by GOMP_SIMD_LANE are
3827 if (POINTER_TYPE_P (TREE_TYPE (op
))
3828 && !thisarginfo
.linear_step
3830 && thisarginfo
.dt
!= vect_constant_def
3831 && thisarginfo
.dt
!= vect_external_def
3834 && TREE_CODE (op
) == SSA_NAME
)
3835 vect_simd_lane_linear (op
, loop
, &thisarginfo
);
3837 arginfo
.quick_push (thisarginfo
);
3840 unsigned HOST_WIDE_INT vf
;
3841 if (!LOOP_VINFO_VECT_FACTOR (loop_vinfo
).is_constant (&vf
))
3843 if (dump_enabled_p ())
3844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3845 "not considering SIMD clones; not yet supported"
3846 " for variable-width vectors.\n");
3850 unsigned int badness
= 0;
3851 struct cgraph_node
*bestn
= NULL
;
3852 if (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).exists ())
3853 bestn
= cgraph_node::get (STMT_VINFO_SIMD_CLONE_INFO (stmt_info
)[0]);
3855 for (struct cgraph_node
*n
= node
->simd_clones
; n
!= NULL
;
3856 n
= n
->simdclone
->next_clone
)
3858 unsigned int this_badness
= 0;
3859 if (n
->simdclone
->simdlen
> vf
3860 || n
->simdclone
->nargs
!= nargs
)
3862 if (n
->simdclone
->simdlen
< vf
)
3863 this_badness
+= (exact_log2 (vf
)
3864 - exact_log2 (n
->simdclone
->simdlen
)) * 1024;
3865 if (n
->simdclone
->inbranch
)
3866 this_badness
+= 2048;
3867 int target_badness
= targetm
.simd_clone
.usable (n
);
3868 if (target_badness
< 0)
3870 this_badness
+= target_badness
* 512;
3871 /* FORNOW: Have to add code to add the mask argument. */
3872 if (n
->simdclone
->inbranch
)
3874 for (i
= 0; i
< nargs
; i
++)
3876 switch (n
->simdclone
->args
[i
].arg_type
)
3878 case SIMD_CLONE_ARG_TYPE_VECTOR
:
3879 if (!useless_type_conversion_p
3880 (n
->simdclone
->args
[i
].orig_type
,
3881 TREE_TYPE (gimple_call_arg (stmt
, i
))))
3883 else if (arginfo
[i
].dt
== vect_constant_def
3884 || arginfo
[i
].dt
== vect_external_def
3885 || arginfo
[i
].linear_step
)
3888 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
3889 if (arginfo
[i
].dt
!= vect_constant_def
3890 && arginfo
[i
].dt
!= vect_external_def
)
3893 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
3894 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
3895 if (arginfo
[i
].dt
== vect_constant_def
3896 || arginfo
[i
].dt
== vect_external_def
3897 || (arginfo
[i
].linear_step
3898 != n
->simdclone
->args
[i
].linear_step
))
3901 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
3902 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
3903 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
3904 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
3905 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
3906 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
3910 case SIMD_CLONE_ARG_TYPE_MASK
:
3913 if (i
== (size_t) -1)
3915 if (n
->simdclone
->args
[i
].alignment
> arginfo
[i
].align
)
3920 if (arginfo
[i
].align
)
3921 this_badness
+= (exact_log2 (arginfo
[i
].align
)
3922 - exact_log2 (n
->simdclone
->args
[i
].alignment
));
3924 if (i
== (size_t) -1)
3926 if (bestn
== NULL
|| this_badness
< badness
)
3929 badness
= this_badness
;
3936 for (i
= 0; i
< nargs
; i
++)
3937 if ((arginfo
[i
].dt
== vect_constant_def
3938 || arginfo
[i
].dt
== vect_external_def
)
3939 && bestn
->simdclone
->args
[i
].arg_type
== SIMD_CLONE_ARG_TYPE_VECTOR
)
3941 tree arg_type
= TREE_TYPE (gimple_call_arg (stmt
, i
));
3942 arginfo
[i
].vectype
= get_vectype_for_scalar_type (vinfo
, arg_type
,
3944 if (arginfo
[i
].vectype
== NULL
3945 || (simd_clone_subparts (arginfo
[i
].vectype
)
3946 > bestn
->simdclone
->simdlen
))
3950 fndecl
= bestn
->decl
;
3951 nunits
= bestn
->simdclone
->simdlen
;
3952 ncopies
= vf
/ nunits
;
3954 /* If the function isn't const, only allow it in simd loops where user
3955 has asserted that at least nunits consecutive iterations can be
3956 performed using SIMD instructions. */
3957 if ((loop
== NULL
|| (unsigned) loop
->safelen
< nunits
)
3958 && gimple_vuse (stmt
))
3961 /* Sanity check: make sure that at least one copy of the vectorized stmt
3962 needs to be generated. */
3963 gcc_assert (ncopies
>= 1);
3965 if (!vec_stmt
) /* transformation not required. */
3967 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (bestn
->decl
);
3968 for (i
= 0; i
< nargs
; i
++)
3969 if ((bestn
->simdclone
->args
[i
].arg_type
3970 == SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
)
3971 || (bestn
->simdclone
->args
[i
].arg_type
3972 == SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
))
3974 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_grow_cleared (i
* 3
3976 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (arginfo
[i
].op
);
3977 tree lst
= POINTER_TYPE_P (TREE_TYPE (arginfo
[i
].op
))
3978 ? size_type_node
: TREE_TYPE (arginfo
[i
].op
);
3979 tree ls
= build_int_cst (lst
, arginfo
[i
].linear_step
);
3980 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (ls
);
3981 tree sll
= arginfo
[i
].simd_lane_linear
3982 ? boolean_true_node
: boolean_false_node
;
3983 STMT_VINFO_SIMD_CLONE_INFO (stmt_info
).safe_push (sll
);
3985 STMT_VINFO_TYPE (stmt_info
) = call_simd_clone_vec_info_type
;
3986 DUMP_VECT_SCOPE ("vectorizable_simd_clone_call");
3987 /* vect_model_simple_cost (vinfo, stmt_info, ncopies,
3988 dt, slp_node, cost_vec); */
3994 if (dump_enabled_p ())
3995 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
3998 scalar_dest
= gimple_call_lhs (stmt
);
3999 vec_dest
= NULL_TREE
;
4004 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4005 rtype
= TREE_TYPE (TREE_TYPE (fndecl
));
4006 if (TREE_CODE (rtype
) == ARRAY_TYPE
)
4009 rtype
= TREE_TYPE (ratype
);
4013 auto_vec
<vec
<tree
> > vec_oprnds
;
4014 auto_vec
<unsigned> vec_oprnds_i
;
4015 vec_oprnds
.safe_grow_cleared (nargs
);
4016 vec_oprnds_i
.safe_grow_cleared (nargs
);
4017 for (j
= 0; j
< ncopies
; ++j
)
4019 /* Build argument list for the vectorized call. */
4021 vargs
.create (nargs
);
4025 for (i
= 0; i
< nargs
; i
++)
4027 unsigned int k
, l
, m
, o
;
4029 op
= gimple_call_arg (stmt
, i
);
4030 switch (bestn
->simdclone
->args
[i
].arg_type
)
4032 case SIMD_CLONE_ARG_TYPE_VECTOR
:
4033 atype
= bestn
->simdclone
->args
[i
].vector_type
;
4034 o
= nunits
/ simd_clone_subparts (atype
);
4035 for (m
= j
* o
; m
< (j
+ 1) * o
; m
++)
4037 if (simd_clone_subparts (atype
)
4038 < simd_clone_subparts (arginfo
[i
].vectype
))
4040 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (atype
));
4041 k
= (simd_clone_subparts (arginfo
[i
].vectype
)
4042 / simd_clone_subparts (atype
));
4043 gcc_assert ((k
& (k
- 1)) == 0);
4046 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4047 ncopies
* o
/ k
, op
,
4049 vec_oprnds_i
[i
] = 0;
4050 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4054 vec_oprnd0
= arginfo
[i
].op
;
4055 if ((m
& (k
- 1)) == 0)
4056 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4058 arginfo
[i
].op
= vec_oprnd0
;
4060 = build3 (BIT_FIELD_REF
, atype
, vec_oprnd0
,
4062 bitsize_int ((m
& (k
- 1)) * prec
));
4064 = gimple_build_assign (make_ssa_name (atype
),
4066 vect_finish_stmt_generation (vinfo
, stmt_info
,
4068 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4072 k
= (simd_clone_subparts (atype
)
4073 / simd_clone_subparts (arginfo
[i
].vectype
));
4074 gcc_assert ((k
& (k
- 1)) == 0);
4075 vec
<constructor_elt
, va_gc
> *ctor_elts
;
4077 vec_alloc (ctor_elts
, k
);
4080 for (l
= 0; l
< k
; l
++)
4082 if (m
== 0 && l
== 0)
4084 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
4088 vec_oprnds_i
[i
] = 0;
4089 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4092 vec_oprnd0
= vec_oprnds
[i
][vec_oprnds_i
[i
]++];
4093 arginfo
[i
].op
= vec_oprnd0
;
4096 CONSTRUCTOR_APPEND_ELT (ctor_elts
, NULL_TREE
,
4100 vargs
.safe_push (vec_oprnd0
);
4103 vec_oprnd0
= build_constructor (atype
, ctor_elts
);
4105 = gimple_build_assign (make_ssa_name (atype
),
4107 vect_finish_stmt_generation (vinfo
, stmt_info
,
4109 vargs
.safe_push (gimple_assign_lhs (new_stmt
));
4114 case SIMD_CLONE_ARG_TYPE_UNIFORM
:
4115 vargs
.safe_push (op
);
4117 case SIMD_CLONE_ARG_TYPE_LINEAR_CONSTANT_STEP
:
4118 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_CONSTANT_STEP
:
4123 = force_gimple_operand (unshare_expr (arginfo
[i
].op
),
4124 &stmts
, true, NULL_TREE
);
4128 edge pe
= loop_preheader_edge (loop
);
4129 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4130 gcc_assert (!new_bb
);
4132 if (arginfo
[i
].simd_lane_linear
)
4134 vargs
.safe_push (arginfo
[i
].op
);
4137 tree phi_res
= copy_ssa_name (op
);
4138 gphi
*new_phi
= create_phi_node (phi_res
, loop
->header
);
4139 add_phi_arg (new_phi
, arginfo
[i
].op
,
4140 loop_preheader_edge (loop
), UNKNOWN_LOCATION
);
4142 = POINTER_TYPE_P (TREE_TYPE (op
))
4143 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4144 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4145 ? sizetype
: TREE_TYPE (op
);
4147 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4149 tree tcst
= wide_int_to_tree (type
, cst
);
4150 tree phi_arg
= copy_ssa_name (op
);
4152 = gimple_build_assign (phi_arg
, code
, phi_res
, tcst
);
4153 gimple_stmt_iterator si
= gsi_after_labels (loop
->header
);
4154 gsi_insert_after (&si
, new_stmt
, GSI_NEW_STMT
);
4155 add_phi_arg (new_phi
, phi_arg
, loop_latch_edge (loop
),
4157 arginfo
[i
].op
= phi_res
;
4158 vargs
.safe_push (phi_res
);
4163 = POINTER_TYPE_P (TREE_TYPE (op
))
4164 ? POINTER_PLUS_EXPR
: PLUS_EXPR
;
4165 tree type
= POINTER_TYPE_P (TREE_TYPE (op
))
4166 ? sizetype
: TREE_TYPE (op
);
4168 = wi::mul (bestn
->simdclone
->args
[i
].linear_step
,
4170 tree tcst
= wide_int_to_tree (type
, cst
);
4171 new_temp
= make_ssa_name (TREE_TYPE (op
));
4173 = gimple_build_assign (new_temp
, code
,
4174 arginfo
[i
].op
, tcst
);
4175 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4176 vargs
.safe_push (new_temp
);
4179 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP
:
4180 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_CONSTANT_STEP
:
4181 case SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP
:
4182 case SIMD_CLONE_ARG_TYPE_LINEAR_REF_VARIABLE_STEP
:
4183 case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP
:
4184 case SIMD_CLONE_ARG_TYPE_LINEAR_UVAL_VARIABLE_STEP
:
4190 gcall
*new_call
= gimple_build_call_vec (fndecl
, vargs
);
4193 gcc_assert (ratype
|| simd_clone_subparts (rtype
) == nunits
);
4195 new_temp
= create_tmp_var (ratype
);
4196 else if (simd_clone_subparts (vectype
)
4197 == simd_clone_subparts (rtype
))
4198 new_temp
= make_ssa_name (vec_dest
, new_call
);
4200 new_temp
= make_ssa_name (rtype
, new_call
);
4201 gimple_call_set_lhs (new_call
, new_temp
);
4203 vect_finish_stmt_generation (vinfo
, stmt_info
, new_call
, gsi
);
4204 gimple
*new_stmt
= new_call
;
4208 if (simd_clone_subparts (vectype
) < nunits
)
4211 poly_uint64 prec
= GET_MODE_BITSIZE (TYPE_MODE (vectype
));
4212 poly_uint64 bytes
= GET_MODE_SIZE (TYPE_MODE (vectype
));
4213 k
= nunits
/ simd_clone_subparts (vectype
);
4214 gcc_assert ((k
& (k
- 1)) == 0);
4215 for (l
= 0; l
< k
; l
++)
4220 t
= build_fold_addr_expr (new_temp
);
4221 t
= build2 (MEM_REF
, vectype
, t
,
4222 build_int_cst (TREE_TYPE (t
), l
* bytes
));
4225 t
= build3 (BIT_FIELD_REF
, vectype
, new_temp
,
4226 bitsize_int (prec
), bitsize_int (l
* prec
));
4227 new_stmt
= gimple_build_assign (make_ssa_name (vectype
), t
);
4228 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4230 if (j
== 0 && l
== 0)
4231 *vec_stmt
= new_stmt
;
4232 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4236 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4239 else if (simd_clone_subparts (vectype
) > nunits
)
4241 unsigned int k
= (simd_clone_subparts (vectype
)
4242 / simd_clone_subparts (rtype
));
4243 gcc_assert ((k
& (k
- 1)) == 0);
4244 if ((j
& (k
- 1)) == 0)
4245 vec_alloc (ret_ctor_elts
, k
);
4248 unsigned int m
, o
= nunits
/ simd_clone_subparts (rtype
);
4249 for (m
= 0; m
< o
; m
++)
4251 tree tem
= build4 (ARRAY_REF
, rtype
, new_temp
,
4252 size_int (m
), NULL_TREE
, NULL_TREE
);
4253 new_stmt
= gimple_build_assign (make_ssa_name (rtype
),
4255 vect_finish_stmt_generation (vinfo
, stmt_info
,
4257 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
,
4258 gimple_assign_lhs (new_stmt
));
4260 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4263 CONSTRUCTOR_APPEND_ELT (ret_ctor_elts
, NULL_TREE
, new_temp
);
4264 if ((j
& (k
- 1)) != k
- 1)
4266 vec_oprnd0
= build_constructor (vectype
, ret_ctor_elts
);
4268 = gimple_build_assign (make_ssa_name (vec_dest
), vec_oprnd0
);
4269 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4271 if ((unsigned) j
== k
- 1)
4272 *vec_stmt
= new_stmt
;
4273 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4278 tree t
= build_fold_addr_expr (new_temp
);
4279 t
= build2 (MEM_REF
, vectype
, t
,
4280 build_int_cst (TREE_TYPE (t
), 0));
4281 new_stmt
= gimple_build_assign (make_ssa_name (vec_dest
), t
);
4282 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4283 vect_clobber_variable (vinfo
, stmt_info
, gsi
, new_temp
);
4288 *vec_stmt
= new_stmt
;
4289 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4292 for (i
= 0; i
< nargs
; ++i
)
4294 vec
<tree
> oprndsi
= vec_oprnds
[i
];
4299 /* The call in STMT might prevent it from being removed in dce.
4300 We however cannot remove it here, due to the way the ssa name
4301 it defines is mapped to the new definition. So just replace
4302 rhs of the statement with something harmless. */
4310 type
= TREE_TYPE (scalar_dest
);
4311 lhs
= gimple_call_lhs (vect_orig_stmt (stmt_info
)->stmt
);
4312 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
4315 new_stmt
= gimple_build_nop ();
4316 vinfo
->replace_stmt (gsi
, vect_orig_stmt (stmt_info
), new_stmt
);
4317 unlink_stmt_vdef (stmt
);
4323 /* Function vect_gen_widened_results_half
4325 Create a vector stmt whose code, type, number of arguments, and result
4326 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
4327 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at GSI.
4328 In the case that CODE is a CALL_EXPR, this means that a call to DECL
4329 needs to be created (DECL is a function-decl of a target-builtin).
4330 STMT_INFO is the original scalar stmt that we are vectorizing. */
4333 vect_gen_widened_results_half (vec_info
*vinfo
, enum tree_code code
,
4334 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
4335 tree vec_dest
, gimple_stmt_iterator
*gsi
,
4336 stmt_vec_info stmt_info
)
4341 /* Generate half of the widened result: */
4342 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
4343 if (op_type
!= binary_op
)
4345 new_stmt
= gimple_build_assign (vec_dest
, code
, vec_oprnd0
, vec_oprnd1
);
4346 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4347 gimple_assign_set_lhs (new_stmt
, new_temp
);
4348 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4354 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
4355 For multi-step conversions store the resulting vectors and call the function
4359 vect_create_vectorized_demotion_stmts (vec_info
*vinfo
, vec
<tree
> *vec_oprnds
,
4361 stmt_vec_info stmt_info
,
4363 gimple_stmt_iterator
*gsi
,
4364 slp_tree slp_node
, enum tree_code code
)
4367 tree vop0
, vop1
, new_tmp
, vec_dest
;
4369 vec_dest
= vec_dsts
.pop ();
4371 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
4373 /* Create demotion operation. */
4374 vop0
= (*vec_oprnds
)[i
];
4375 vop1
= (*vec_oprnds
)[i
+ 1];
4376 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
4377 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
4378 gimple_assign_set_lhs (new_stmt
, new_tmp
);
4379 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4382 /* Store the resulting vector for next recursive call. */
4383 (*vec_oprnds
)[i
/2] = new_tmp
;
4386 /* This is the last step of the conversion sequence. Store the
4387 vectors in SLP_NODE or in vector info of the scalar statement
4388 (or in STMT_VINFO_RELATED_STMT chain). */
4390 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4392 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4396 /* For multi-step demotion operations we first generate demotion operations
4397 from the source type to the intermediate types, and then combine the
4398 results (stored in VEC_OPRNDS) in demotion operation to the destination
4402 /* At each level of recursion we have half of the operands we had at the
4404 vec_oprnds
->truncate ((i
+1)/2);
4405 vect_create_vectorized_demotion_stmts (vinfo
, vec_oprnds
,
4407 stmt_info
, vec_dsts
, gsi
,
4408 slp_node
, VEC_PACK_TRUNC_EXPR
);
4411 vec_dsts
.quick_push (vec_dest
);
4415 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
4416 and VEC_OPRNDS1, for a binary operation associated with scalar statement
4417 STMT_INFO. For multi-step conversions store the resulting vectors and
4418 call the function recursively. */
4421 vect_create_vectorized_promotion_stmts (vec_info
*vinfo
,
4422 vec
<tree
> *vec_oprnds0
,
4423 vec
<tree
> *vec_oprnds1
,
4424 stmt_vec_info stmt_info
, tree vec_dest
,
4425 gimple_stmt_iterator
*gsi
,
4426 enum tree_code code1
,
4427 enum tree_code code2
, int op_type
)
4430 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
4431 gimple
*new_stmt1
, *new_stmt2
;
4432 vec
<tree
> vec_tmp
= vNULL
;
4434 vec_tmp
.create (vec_oprnds0
->length () * 2);
4435 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
4437 if (op_type
== binary_op
)
4438 vop1
= (*vec_oprnds1
)[i
];
4442 /* Generate the two halves of promotion operation. */
4443 new_stmt1
= vect_gen_widened_results_half (vinfo
, code1
, vop0
, vop1
,
4444 op_type
, vec_dest
, gsi
,
4446 new_stmt2
= vect_gen_widened_results_half (vinfo
, code2
, vop0
, vop1
,
4447 op_type
, vec_dest
, gsi
,
4449 if (is_gimple_call (new_stmt1
))
4451 new_tmp1
= gimple_call_lhs (new_stmt1
);
4452 new_tmp2
= gimple_call_lhs (new_stmt2
);
4456 new_tmp1
= gimple_assign_lhs (new_stmt1
);
4457 new_tmp2
= gimple_assign_lhs (new_stmt2
);
4460 /* Store the results for the next step. */
4461 vec_tmp
.quick_push (new_tmp1
);
4462 vec_tmp
.quick_push (new_tmp2
);
4465 vec_oprnds0
->release ();
4466 *vec_oprnds0
= vec_tmp
;
4470 /* Check if STMT_INFO performs a conversion operation that can be vectorized.
4471 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
4472 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4473 Return true if STMT_INFO is vectorizable in this way. */
4476 vectorizable_conversion (vec_info
*vinfo
,
4477 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4478 gimple
**vec_stmt
, slp_tree slp_node
,
4479 stmt_vector_for_cost
*cost_vec
)
4483 tree op0
, op1
= NULL_TREE
;
4484 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
4485 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
4486 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
4488 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
4490 poly_uint64 nunits_in
;
4491 poly_uint64 nunits_out
;
4492 tree vectype_out
, vectype_in
;
4494 tree lhs_type
, rhs_type
;
4495 enum { NARROW
, NONE
, WIDEN
} modifier
;
4496 vec
<tree
> vec_oprnds0
= vNULL
;
4497 vec
<tree
> vec_oprnds1
= vNULL
;
4499 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
4500 int multi_step_cvt
= 0;
4501 vec
<tree
> interm_types
= vNULL
;
4502 tree intermediate_type
, cvt_type
= NULL_TREE
;
4504 unsigned short fltsz
;
4506 /* Is STMT a vectorizable conversion? */
4508 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4511 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
4515 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4519 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
4522 code
= gimple_assign_rhs_code (stmt
);
4523 if (!CONVERT_EXPR_CODE_P (code
)
4524 && code
!= FIX_TRUNC_EXPR
4525 && code
!= FLOAT_EXPR
4526 && code
!= WIDEN_MULT_EXPR
4527 && code
!= WIDEN_LSHIFT_EXPR
)
4530 op_type
= TREE_CODE_LENGTH (code
);
4532 /* Check types of lhs and rhs. */
4533 scalar_dest
= gimple_assign_lhs (stmt
);
4534 lhs_type
= TREE_TYPE (scalar_dest
);
4535 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
4537 /* Check the operands of the operation. */
4538 slp_tree slp_op0
, slp_op1
= NULL
;
4539 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
4540 0, &op0
, &slp_op0
, &dt
[0], &vectype_in
))
4542 if (dump_enabled_p ())
4543 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4544 "use not simple.\n");
4548 rhs_type
= TREE_TYPE (op0
);
4549 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
4550 && !((INTEGRAL_TYPE_P (lhs_type
)
4551 && INTEGRAL_TYPE_P (rhs_type
))
4552 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
4553 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
4556 if (!VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4557 && ((INTEGRAL_TYPE_P (lhs_type
)
4558 && !type_has_mode_precision_p (lhs_type
))
4559 || (INTEGRAL_TYPE_P (rhs_type
)
4560 && !type_has_mode_precision_p (rhs_type
))))
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4564 "type conversion to/from bit-precision unsupported."
4569 if (op_type
== binary_op
)
4571 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
4573 op1
= gimple_assign_rhs2 (stmt
);
4575 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
4576 &op1
, &slp_op1
, &dt
[1], &vectype1_in
))
4578 if (dump_enabled_p ())
4579 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4580 "use not simple.\n");
4583 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
4586 vectype_in
= vectype1_in
;
4589 /* If op0 is an external or constant def, infer the vector type
4590 from the scalar type. */
4592 vectype_in
= get_vectype_for_scalar_type (vinfo
, rhs_type
, slp_node
);
4594 gcc_assert (vectype_in
);
4597 if (dump_enabled_p ())
4598 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4599 "no vectype for scalar type %T\n", rhs_type
);
4604 if (VECTOR_BOOLEAN_TYPE_P (vectype_out
)
4605 && !VECTOR_BOOLEAN_TYPE_P (vectype_in
))
4607 if (dump_enabled_p ())
4608 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4609 "can't convert between boolean and non "
4610 "boolean vectors %T\n", rhs_type
);
4615 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
4616 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
4617 if (known_eq (nunits_out
, nunits_in
))
4619 else if (multiple_p (nunits_out
, nunits_in
))
4623 gcc_checking_assert (multiple_p (nunits_in
, nunits_out
));
4627 /* Multiple types in SLP are handled by creating the appropriate number of
4628 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4632 else if (modifier
== NARROW
)
4633 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_out
);
4635 ncopies
= vect_get_num_copies (loop_vinfo
, vectype_in
);
4637 /* Sanity check: make sure that at least one copy of the vectorized stmt
4638 needs to be generated. */
4639 gcc_assert (ncopies
>= 1);
4641 bool found_mode
= false;
4642 scalar_mode lhs_mode
= SCALAR_TYPE_MODE (lhs_type
);
4643 scalar_mode rhs_mode
= SCALAR_TYPE_MODE (rhs_type
);
4644 opt_scalar_mode rhs_mode_iter
;
4646 /* Supportable by target? */
4650 if (code
!= FIX_TRUNC_EXPR
4651 && code
!= FLOAT_EXPR
4652 && !CONVERT_EXPR_CODE_P (code
))
4654 if (supportable_convert_operation (code
, vectype_out
, vectype_in
, &code1
))
4658 if (dump_enabled_p ())
4659 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4660 "conversion not supported by target.\n");
4664 if (supportable_widening_operation (vinfo
, code
, stmt_info
, vectype_out
,
4665 vectype_in
, &code1
, &code2
,
4666 &multi_step_cvt
, &interm_types
))
4668 /* Binary widening operation can only be supported directly by the
4670 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
4674 if (code
!= FLOAT_EXPR
4675 || GET_MODE_SIZE (lhs_mode
) <= GET_MODE_SIZE (rhs_mode
))
4678 fltsz
= GET_MODE_SIZE (lhs_mode
);
4679 FOR_EACH_2XWIDER_MODE (rhs_mode_iter
, rhs_mode
)
4681 rhs_mode
= rhs_mode_iter
.require ();
4682 if (GET_MODE_SIZE (rhs_mode
) > fltsz
)
4686 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4687 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4688 if (cvt_type
== NULL_TREE
)
4691 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4693 if (!supportable_convert_operation (code
, vectype_out
,
4694 cvt_type
, &codecvt1
))
4697 else if (!supportable_widening_operation (vinfo
, code
, stmt_info
,
4698 vectype_out
, cvt_type
,
4699 &codecvt1
, &codecvt2
,
4704 gcc_assert (multi_step_cvt
== 0);
4706 if (supportable_widening_operation (vinfo
, NOP_EXPR
, stmt_info
,
4708 vectype_in
, &code1
, &code2
,
4709 &multi_step_cvt
, &interm_types
))
4719 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
4720 codecvt2
= ERROR_MARK
;
4724 interm_types
.safe_push (cvt_type
);
4725 cvt_type
= NULL_TREE
;
4730 gcc_assert (op_type
== unary_op
);
4731 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
4732 &code1
, &multi_step_cvt
,
4736 if (code
!= FIX_TRUNC_EXPR
4737 || GET_MODE_SIZE (lhs_mode
) >= GET_MODE_SIZE (rhs_mode
))
4741 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
4742 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
4743 if (cvt_type
== NULL_TREE
)
4745 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
4748 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
4749 &code1
, &multi_step_cvt
,
4758 if (!vec_stmt
) /* transformation not required. */
4761 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype_in
)
4762 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype_in
)))
4764 if (dump_enabled_p ())
4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4766 "incompatible vector types for invariants\n");
4769 DUMP_VECT_SCOPE ("vectorizable_conversion");
4770 if (modifier
== NONE
)
4772 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
4773 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
4776 else if (modifier
== NARROW
)
4778 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
4779 /* The final packing step produces one vector result per copy. */
4780 unsigned int nvectors
4781 = (slp_node
? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) : ncopies
);
4782 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4783 multi_step_cvt
, cost_vec
);
4787 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
4788 /* The initial unpacking step produces two vector results
4789 per copy. MULTI_STEP_CVT is 0 for a single conversion,
4790 so >> MULTI_STEP_CVT divides by 2^(number of steps - 1). */
4791 unsigned int nvectors
4793 ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
) >> multi_step_cvt
4795 vect_model_promotion_demotion_cost (stmt_info
, dt
, nvectors
,
4796 multi_step_cvt
, cost_vec
);
4798 interm_types
.release ();
4803 if (dump_enabled_p ())
4804 dump_printf_loc (MSG_NOTE
, vect_location
,
4805 "transform conversion. ncopies = %d.\n", ncopies
);
4807 if (op_type
== binary_op
)
4809 if (CONSTANT_CLASS_P (op0
))
4810 op0
= fold_convert (TREE_TYPE (op1
), op0
);
4811 else if (CONSTANT_CLASS_P (op1
))
4812 op1
= fold_convert (TREE_TYPE (op0
), op1
);
4815 /* In case of multi-step conversion, we first generate conversion operations
4816 to the intermediate types, and then from that types to the final one.
4817 We create vector destinations for the intermediate type (TYPES) received
4818 from supportable_*_operation, and store them in the correct order
4819 for future use in vect_create_vectorized_*_stmts (). */
4820 auto_vec
<tree
> vec_dsts (multi_step_cvt
+ 1);
4821 vec_dest
= vect_create_destination_var (scalar_dest
,
4822 (cvt_type
&& modifier
== WIDEN
)
4823 ? cvt_type
: vectype_out
);
4824 vec_dsts
.quick_push (vec_dest
);
4828 for (i
= interm_types
.length () - 1;
4829 interm_types
.iterate (i
, &intermediate_type
); i
--)
4831 vec_dest
= vect_create_destination_var (scalar_dest
,
4833 vec_dsts
.quick_push (vec_dest
);
4838 vec_dest
= vect_create_destination_var (scalar_dest
,
4840 ? vectype_out
: cvt_type
);
4845 if (modifier
== WIDEN
)
4847 else if (modifier
== NARROW
)
4850 ninputs
= vect_pow2 (multi_step_cvt
);
4858 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
4860 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4862 /* Arguments are ready, create the new vector stmt. */
4863 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
4864 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code1
, vop0
);
4865 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4866 gimple_assign_set_lhs (new_stmt
, new_temp
);
4867 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4870 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4872 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4877 /* In case the vectorization factor (VF) is bigger than the number
4878 of elements that we can fit in a vectype (nunits), we have to
4879 generate more than one vector stmt - i.e - we need to "unroll"
4880 the vector stmt by a factor VF/nunits. */
4881 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4883 code
== WIDEN_LSHIFT_EXPR
? NULL_TREE
: op1
,
4885 if (code
== WIDEN_LSHIFT_EXPR
)
4887 vec_oprnds1
.create (ncopies
* ninputs
);
4888 for (i
= 0; i
< ncopies
* ninputs
; ++i
)
4889 vec_oprnds1
.quick_push (op1
);
4891 /* Arguments are ready. Create the new vector stmts. */
4892 for (i
= multi_step_cvt
; i
>= 0; i
--)
4894 tree this_dest
= vec_dsts
[i
];
4895 enum tree_code c1
= code1
, c2
= code2
;
4896 if (i
== 0 && codecvt2
!= ERROR_MARK
)
4901 vect_create_vectorized_promotion_stmts (vinfo
, &vec_oprnds0
,
4902 &vec_oprnds1
, stmt_info
,
4907 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4912 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4913 new_temp
= make_ssa_name (vec_dest
);
4914 new_stmt
= gimple_build_assign (new_temp
, codecvt1
, vop0
);
4915 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4918 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
4921 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
4923 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
4928 /* In case the vectorization factor (VF) is bigger than the number
4929 of elements that we can fit in a vectype (nunits), we have to
4930 generate more than one vector stmt - i.e - we need to "unroll"
4931 the vector stmt by a factor VF/nunits. */
4932 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
* ninputs
,
4934 /* Arguments are ready. Create the new vector stmts. */
4936 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
4938 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
4939 new_temp
= make_ssa_name (vec_dest
);
4941 = gimple_build_assign (new_temp
, codecvt1
, vop0
);
4942 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
4943 vec_oprnds0
[i
] = new_temp
;
4946 vect_create_vectorized_demotion_stmts (vinfo
, &vec_oprnds0
,
4948 stmt_info
, vec_dsts
, gsi
,
4953 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
4955 vec_oprnds0
.release ();
4956 vec_oprnds1
.release ();
4957 interm_types
.release ();
4962 /* Return true if we can assume from the scalar form of STMT_INFO that
4963 neither the scalar nor the vector forms will generate code. STMT_INFO
4964 is known not to involve a data reference. */
4967 vect_nop_conversion_p (stmt_vec_info stmt_info
)
4969 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
4973 tree lhs
= gimple_assign_lhs (stmt
);
4974 tree_code code
= gimple_assign_rhs_code (stmt
);
4975 tree rhs
= gimple_assign_rhs1 (stmt
);
4977 if (code
== SSA_NAME
|| code
== VIEW_CONVERT_EXPR
)
4980 if (CONVERT_EXPR_CODE_P (code
))
4981 return tree_nop_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
));
4986 /* Function vectorizable_assignment.
4988 Check if STMT_INFO performs an assignment (copy) that can be vectorized.
4989 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
4990 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
4991 Return true if STMT_INFO is vectorizable in this way. */
4994 vectorizable_assignment (vec_info
*vinfo
,
4995 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
4996 gimple
**vec_stmt
, slp_tree slp_node
,
4997 stmt_vector_for_cost
*cost_vec
)
5002 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5004 enum vect_def_type dt
[1] = {vect_unknown_def_type
};
5008 vec
<tree
> vec_oprnds
= vNULL
;
5010 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5011 enum tree_code code
;
5014 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5017 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5021 /* Is vectorizable assignment? */
5022 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5026 scalar_dest
= gimple_assign_lhs (stmt
);
5027 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
5030 if (STMT_VINFO_DATA_REF (stmt_info
))
5033 code
= gimple_assign_rhs_code (stmt
);
5034 if (!(gimple_assign_single_p (stmt
)
5035 || code
== PAREN_EXPR
5036 || CONVERT_EXPR_CODE_P (code
)))
5039 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5040 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5042 /* Multiple types in SLP are handled by creating the appropriate number of
5043 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5048 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5050 gcc_assert (ncopies
>= 1);
5053 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &op
, &slp_op
,
5054 &dt
[0], &vectype_in
))
5056 if (dump_enabled_p ())
5057 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5058 "use not simple.\n");
5062 vectype_in
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op
), slp_node
);
5064 /* We can handle NOP_EXPR conversions that do not change the number
5065 of elements or the vector size. */
5066 if ((CONVERT_EXPR_CODE_P (code
)
5067 || code
== VIEW_CONVERT_EXPR
)
5069 || maybe_ne (TYPE_VECTOR_SUBPARTS (vectype_in
), nunits
)
5070 || maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype
)),
5071 GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
5074 /* We do not handle bit-precision changes. */
5075 if ((CONVERT_EXPR_CODE_P (code
)
5076 || code
== VIEW_CONVERT_EXPR
)
5077 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
5078 && (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5079 || !type_has_mode_precision_p (TREE_TYPE (op
)))
5080 /* But a conversion that does not change the bit-pattern is ok. */
5081 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
5082 > TYPE_PRECISION (TREE_TYPE (op
)))
5083 && TYPE_UNSIGNED (TREE_TYPE (op
)))
5084 /* Conversion between boolean types of different sizes is
5085 a simple assignment in case their vectypes are same
5087 && (!VECTOR_BOOLEAN_TYPE_P (vectype
)
5088 || !VECTOR_BOOLEAN_TYPE_P (vectype_in
)))
5090 if (dump_enabled_p ())
5091 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5092 "type conversion to/from bit-precision "
5097 if (!vec_stmt
) /* transformation not required. */
5100 && !vect_maybe_update_slp_op_vectype (slp_op
, vectype_in
))
5102 if (dump_enabled_p ())
5103 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5104 "incompatible vector types for invariants\n");
5107 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
5108 DUMP_VECT_SCOPE ("vectorizable_assignment");
5109 if (!vect_nop_conversion_p (stmt_info
))
5110 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
, ndts
, slp_node
,
5116 if (dump_enabled_p ())
5117 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
5120 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5123 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
, op
, &vec_oprnds
);
5125 /* Arguments are ready. create the new vector stmt. */
5126 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
5128 if (CONVERT_EXPR_CODE_P (code
)
5129 || code
== VIEW_CONVERT_EXPR
)
5130 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
5131 gassign
*new_stmt
= gimple_build_assign (vec_dest
, vop
);
5132 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5133 gimple_assign_set_lhs (new_stmt
, new_temp
);
5134 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5136 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5138 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5141 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5143 vec_oprnds
.release ();
5148 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
5149 either as shift by a scalar or by a vector. */
5152 vect_supportable_shift (vec_info
*vinfo
, enum tree_code code
, tree scalar_type
)
5155 machine_mode vec_mode
;
5160 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5164 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5166 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
5168 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5170 || (optab_handler (optab
, TYPE_MODE (vectype
))
5171 == CODE_FOR_nothing
))
5175 vec_mode
= TYPE_MODE (vectype
);
5176 icode
= (int) optab_handler (optab
, vec_mode
);
5177 if (icode
== CODE_FOR_nothing
)
5184 /* Function vectorizable_shift.
5186 Check if STMT_INFO performs a shift operation that can be vectorized.
5187 If VEC_STMT is also passed, vectorize the STMT_INFO: create a vectorized
5188 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5189 Return true if STMT_INFO is vectorizable in this way. */
5192 vectorizable_shift (vec_info
*vinfo
,
5193 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5194 gimple
**vec_stmt
, slp_tree slp_node
,
5195 stmt_vector_for_cost
*cost_vec
)
5199 tree op0
, op1
= NULL
;
5200 tree vec_oprnd1
= NULL_TREE
;
5202 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5203 enum tree_code code
;
5204 machine_mode vec_mode
;
5208 machine_mode optab_op2_mode
;
5209 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
5211 poly_uint64 nunits_in
;
5212 poly_uint64 nunits_out
;
5217 vec
<tree
> vec_oprnds0
= vNULL
;
5218 vec
<tree
> vec_oprnds1
= vNULL
;
5221 bool scalar_shift_arg
= true;
5222 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5223 bool incompatible_op1_vectype_p
= false;
5225 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5228 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5229 && STMT_VINFO_DEF_TYPE (stmt_info
) != vect_nested_cycle
5233 /* Is STMT a vectorizable binary/unary operation? */
5234 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5238 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
5241 code
= gimple_assign_rhs_code (stmt
);
5243 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
5244 || code
== RROTATE_EXPR
))
5247 scalar_dest
= gimple_assign_lhs (stmt
);
5248 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5249 if (!type_has_mode_precision_p (TREE_TYPE (scalar_dest
)))
5251 if (dump_enabled_p ())
5252 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5253 "bit-precision shifts not supported.\n");
5258 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5259 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5261 if (dump_enabled_p ())
5262 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5263 "use not simple.\n");
5266 /* If op0 is an external or constant def, infer the vector type
5267 from the scalar type. */
5269 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
), slp_node
);
5271 gcc_assert (vectype
);
5274 if (dump_enabled_p ())
5275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5276 "no vectype for scalar type\n");
5280 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5281 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5282 if (maybe_ne (nunits_out
, nunits_in
))
5285 stmt_vec_info op1_def_stmt_info
;
5287 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1, &op1
, &slp_op1
,
5288 &dt
[1], &op1_vectype
, &op1_def_stmt_info
))
5290 if (dump_enabled_p ())
5291 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5292 "use not simple.\n");
5296 /* Multiple types in SLP are handled by creating the appropriate number of
5297 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5302 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5304 gcc_assert (ncopies
>= 1);
5306 /* Determine whether the shift amount is a vector, or scalar. If the
5307 shift/rotate amount is a vector, use the vector/vector shift optabs. */
5309 if ((dt
[1] == vect_internal_def
5310 || dt
[1] == vect_induction_def
5311 || dt
[1] == vect_nested_cycle
)
5313 scalar_shift_arg
= false;
5314 else if (dt
[1] == vect_constant_def
5315 || dt
[1] == vect_external_def
5316 || dt
[1] == vect_internal_def
)
5318 /* In SLP, need to check whether the shift count is the same,
5319 in loops if it is a constant or invariant, it is always
5323 vec
<stmt_vec_info
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
5324 stmt_vec_info slpstmt_info
;
5326 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt_info
)
5328 gassign
*slpstmt
= as_a
<gassign
*> (slpstmt_info
->stmt
);
5329 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
5330 scalar_shift_arg
= false;
5333 /* For internal SLP defs we have to make sure we see scalar stmts
5334 for all vector elements.
5335 ??? For different vectors we could resort to a different
5336 scalar shift operand but code-generation below simply always
5338 if (dt
[1] == vect_internal_def
5339 && maybe_ne (nunits_out
* SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
),
5341 scalar_shift_arg
= false;
5344 /* If the shift amount is computed by a pattern stmt we cannot
5345 use the scalar amount directly thus give up and use a vector
5347 if (op1_def_stmt_info
&& is_pattern_stmt_p (op1_def_stmt_info
))
5348 scalar_shift_arg
= false;
5352 if (dump_enabled_p ())
5353 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5354 "operand mode requires invariant argument.\n");
5358 /* Vector shifted by vector. */
5359 bool was_scalar_shift_arg
= scalar_shift_arg
;
5360 if (!scalar_shift_arg
)
5362 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5363 if (dump_enabled_p ())
5364 dump_printf_loc (MSG_NOTE
, vect_location
,
5365 "vector/vector shift/rotate found.\n");
5368 op1_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op1
),
5370 incompatible_op1_vectype_p
5371 = (op1_vectype
== NULL_TREE
5372 || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype
),
5373 TYPE_VECTOR_SUBPARTS (vectype
))
5374 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
));
5375 if (incompatible_op1_vectype_p
5377 || SLP_TREE_DEF_TYPE (slp_op1
) != vect_constant_def
5378 || slp_op1
->refcnt
!= 1))
5380 if (dump_enabled_p ())
5381 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5382 "unusable type for last operand in"
5383 " vector/vector shift/rotate.\n");
5387 /* See if the machine has a vector shifted by scalar insn and if not
5388 then see if it has a vector shifted by vector insn. */
5391 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
5393 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
5395 if (dump_enabled_p ())
5396 dump_printf_loc (MSG_NOTE
, vect_location
,
5397 "vector/scalar shift/rotate found.\n");
5401 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
5403 && (optab_handler (optab
, TYPE_MODE (vectype
))
5404 != CODE_FOR_nothing
))
5406 scalar_shift_arg
= false;
5408 if (dump_enabled_p ())
5409 dump_printf_loc (MSG_NOTE
, vect_location
,
5410 "vector/vector shift/rotate found.\n");
5413 op1_vectype
= get_vectype_for_scalar_type (vinfo
,
5417 /* Unlike the other binary operators, shifts/rotates have
5418 the rhs being int, instead of the same type as the lhs,
5419 so make sure the scalar is the right type if we are
5420 dealing with vectors of long long/long/short/char. */
5421 incompatible_op1_vectype_p
5423 || !tree_nop_conversion_p (TREE_TYPE (vectype
),
5429 /* Supportable by target? */
5432 if (dump_enabled_p ())
5433 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5437 vec_mode
= TYPE_MODE (vectype
);
5438 icode
= (int) optab_handler (optab
, vec_mode
);
5439 if (icode
== CODE_FOR_nothing
)
5441 if (dump_enabled_p ())
5442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5443 "op not supported by target.\n");
5444 /* Check only during analysis. */
5445 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5447 && !vect_worthwhile_without_simd_p (vinfo
, code
)))
5449 if (dump_enabled_p ())
5450 dump_printf_loc (MSG_NOTE
, vect_location
,
5451 "proceeding using word mode.\n");
5454 /* Worthwhile without SIMD support? Check only during analysis. */
5456 && !VECTOR_MODE_P (TYPE_MODE (vectype
))
5457 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5459 if (dump_enabled_p ())
5460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5461 "not worthwhile without SIMD support.\n");
5465 if (!vec_stmt
) /* transformation not required. */
5468 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5469 || (!scalar_shift_arg
5470 && (!incompatible_op1_vectype_p
5471 || dt
[1] == vect_constant_def
)
5472 && !vect_maybe_update_slp_op_vectype
5474 incompatible_op1_vectype_p
? vectype
: op1_vectype
))))
5476 if (dump_enabled_p ())
5477 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5478 "incompatible vector types for invariants\n");
5481 /* Now adjust the constant shift amount in place. */
5483 && incompatible_op1_vectype_p
5484 && dt
[1] == vect_constant_def
)
5486 for (unsigned i
= 0;
5487 i
< SLP_TREE_SCALAR_OPS (slp_op1
).length (); ++i
)
5489 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]
5490 = fold_convert (TREE_TYPE (vectype
),
5491 SLP_TREE_SCALAR_OPS (slp_op1
)[i
]);
5492 gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (slp_op1
)[i
])
5496 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
5497 DUMP_VECT_SCOPE ("vectorizable_shift");
5498 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dt
,
5499 scalar_shift_arg
? 1 : ndts
, slp_node
, cost_vec
);
5505 if (dump_enabled_p ())
5506 dump_printf_loc (MSG_NOTE
, vect_location
,
5507 "transform binary/unary operation.\n");
5509 if (incompatible_op1_vectype_p
&& !slp_node
)
5511 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5512 if (dt
[1] != vect_constant_def
)
5513 op1
= vect_init_vector (vinfo
, stmt_info
, op1
,
5514 TREE_TYPE (vectype
), NULL
);
5518 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5520 if (scalar_shift_arg
)
5522 /* Vector shl and shr insn patterns can be defined with scalar
5523 operand 2 (shift operand). In this case, use constant or loop
5524 invariant op1 directly, without extending it to vector mode
5526 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
5527 if (!VECTOR_MODE_P (optab_op2_mode
))
5529 if (dump_enabled_p ())
5530 dump_printf_loc (MSG_NOTE
, vect_location
,
5531 "operand 1 using scalar mode.\n");
5533 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: ncopies
);
5534 vec_oprnds1
.quick_push (vec_oprnd1
);
5535 /* Store vec_oprnd1 for every vector stmt to be created.
5536 We check during the analysis that all the shift arguments
5538 TODO: Allow different constants for different vector
5539 stmts generated for an SLP instance. */
5541 k
< (slp_node
? slp_node
->vec_stmts_size
- 1 : ncopies
- 1); k
++)
5542 vec_oprnds1
.quick_push (vec_oprnd1
);
5545 else if (slp_node
&& incompatible_op1_vectype_p
)
5547 if (was_scalar_shift_arg
)
5549 /* If the argument was the same in all lanes create
5550 the correctly typed vector shift amount directly. */
5551 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
5552 op1
= vect_init_vector (vinfo
, stmt_info
, op1
, TREE_TYPE (vectype
),
5553 !loop_vinfo
? gsi
: NULL
);
5554 vec_oprnd1
= vect_init_vector (vinfo
, stmt_info
, op1
, vectype
,
5555 !loop_vinfo
? gsi
: NULL
);
5556 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
5557 for (k
= 0; k
< slp_node
->vec_stmts_size
; k
++)
5558 vec_oprnds1
.quick_push (vec_oprnd1
);
5560 else if (dt
[1] == vect_constant_def
)
5561 /* The constant shift amount has been adjusted in place. */
5564 gcc_assert (TYPE_MODE (op1_vectype
) == TYPE_MODE (vectype
));
5567 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
5568 (a special case for certain kind of vector shifts); otherwise,
5569 operand 1 should be of a vector type (the usual case). */
5570 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5572 vec_oprnd1
? NULL_TREE
: op1
, &vec_oprnds1
);
5574 /* Arguments are ready. Create the new vector stmt. */
5575 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5577 vop1
= vec_oprnds1
[i
];
5578 gassign
*new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
);
5579 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5580 gimple_assign_set_lhs (new_stmt
, new_temp
);
5581 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
5583 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5585 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
5589 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
5591 vec_oprnds0
.release ();
5592 vec_oprnds1
.release ();
5598 /* Function vectorizable_operation.
5600 Check if STMT_INFO performs a binary, unary or ternary operation that can
5602 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5603 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5604 Return true if STMT_INFO is vectorizable in this way. */
5607 vectorizable_operation (vec_info
*vinfo
,
5608 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
5609 gimple
**vec_stmt
, slp_tree slp_node
,
5610 stmt_vector_for_cost
*cost_vec
)
5614 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
5616 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5617 enum tree_code code
, orig_code
;
5618 machine_mode vec_mode
;
5622 bool target_support_p
;
5623 enum vect_def_type dt
[3]
5624 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
5626 poly_uint64 nunits_in
;
5627 poly_uint64 nunits_out
;
5629 int ncopies
, vec_num
;
5631 vec
<tree
> vec_oprnds0
= vNULL
;
5632 vec
<tree
> vec_oprnds1
= vNULL
;
5633 vec
<tree
> vec_oprnds2
= vNULL
;
5634 tree vop0
, vop1
, vop2
;
5635 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
5637 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5640 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5644 /* Is STMT a vectorizable binary/unary operation? */
5645 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
5649 /* Loads and stores are handled in vectorizable_{load,store}. */
5650 if (STMT_VINFO_DATA_REF (stmt_info
))
5653 orig_code
= code
= gimple_assign_rhs_code (stmt
);
5655 /* Shifts are handled in vectorizable_shift. */
5656 if (code
== LSHIFT_EXPR
5657 || code
== RSHIFT_EXPR
5658 || code
== LROTATE_EXPR
5659 || code
== RROTATE_EXPR
)
5662 /* Comparisons are handled in vectorizable_comparison. */
5663 if (TREE_CODE_CLASS (code
) == tcc_comparison
)
5666 /* Conditions are handled in vectorizable_condition. */
5667 if (code
== COND_EXPR
)
5670 /* For pointer addition and subtraction, we should use the normal
5671 plus and minus for the vector operation. */
5672 if (code
== POINTER_PLUS_EXPR
)
5674 if (code
== POINTER_DIFF_EXPR
)
5677 /* Support only unary or binary operations. */
5678 op_type
= TREE_CODE_LENGTH (code
);
5679 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
5681 if (dump_enabled_p ())
5682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5683 "num. args = %d (not unary/binary/ternary op).\n",
5688 scalar_dest
= gimple_assign_lhs (stmt
);
5689 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
5691 /* Most operations cannot handle bit-precision types without extra
5693 bool mask_op_p
= VECTOR_BOOLEAN_TYPE_P (vectype_out
);
5695 && !type_has_mode_precision_p (TREE_TYPE (scalar_dest
))
5696 /* Exception are bitwise binary operations. */
5697 && code
!= BIT_IOR_EXPR
5698 && code
!= BIT_XOR_EXPR
5699 && code
!= BIT_AND_EXPR
)
5701 if (dump_enabled_p ())
5702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5703 "bit-precision arithmetic not supported.\n");
5708 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5709 0, &op0
, &slp_op0
, &dt
[0], &vectype
))
5711 if (dump_enabled_p ())
5712 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5713 "use not simple.\n");
5716 /* If op0 is an external or constant def, infer the vector type
5717 from the scalar type. */
5720 /* For boolean type we cannot determine vectype by
5721 invariant value (don't know whether it is a vector
5722 of booleans or vector of integers). We use output
5723 vectype because operations on boolean don't change
5725 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op0
)))
5727 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (scalar_dest
)))
5729 if (dump_enabled_p ())
5730 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5731 "not supported operation on bool value.\n");
5734 vectype
= vectype_out
;
5737 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (op0
),
5741 gcc_assert (vectype
);
5744 if (dump_enabled_p ())
5745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5746 "no vectype for scalar type %T\n",
5752 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
5753 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
5754 if (maybe_ne (nunits_out
, nunits_in
))
5757 tree vectype2
= NULL_TREE
, vectype3
= NULL_TREE
;
5758 slp_tree slp_op1
= NULL
, slp_op2
= NULL
;
5759 if (op_type
== binary_op
|| op_type
== ternary_op
)
5761 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5762 1, &op1
, &slp_op1
, &dt
[1], &vectype2
))
5764 if (dump_enabled_p ())
5765 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5766 "use not simple.\n");
5770 if (op_type
== ternary_op
)
5772 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
5773 2, &op2
, &slp_op2
, &dt
[2], &vectype3
))
5775 if (dump_enabled_p ())
5776 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5777 "use not simple.\n");
5782 /* Multiple types in SLP are handled by creating the appropriate number of
5783 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
5788 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
5792 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
5796 gcc_assert (ncopies
>= 1);
5798 /* Reject attempts to combine mask types with nonmask types, e.g. if
5799 we have an AND between a (nonmask) boolean loaded from memory and
5800 a (mask) boolean result of a comparison.
5802 TODO: We could easily fix these cases up using pattern statements. */
5803 if (VECTOR_BOOLEAN_TYPE_P (vectype
) != mask_op_p
5804 || (vectype2
&& VECTOR_BOOLEAN_TYPE_P (vectype2
) != mask_op_p
)
5805 || (vectype3
&& VECTOR_BOOLEAN_TYPE_P (vectype3
) != mask_op_p
))
5807 if (dump_enabled_p ())
5808 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5809 "mixed mask and nonmask vector types\n");
5813 /* Supportable by target? */
5815 vec_mode
= TYPE_MODE (vectype
);
5816 if (code
== MULT_HIGHPART_EXPR
)
5817 target_support_p
= can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
));
5820 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
5823 if (dump_enabled_p ())
5824 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5828 target_support_p
= (optab_handler (optab
, vec_mode
)
5829 != CODE_FOR_nothing
);
5832 if (!target_support_p
)
5834 if (dump_enabled_p ())
5835 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5836 "op not supported by target.\n");
5837 /* Check only during analysis. */
5838 if (maybe_ne (GET_MODE_SIZE (vec_mode
), UNITS_PER_WORD
)
5839 || (!vec_stmt
&& !vect_worthwhile_without_simd_p (vinfo
, code
)))
5841 if (dump_enabled_p ())
5842 dump_printf_loc (MSG_NOTE
, vect_location
,
5843 "proceeding using word mode.\n");
5846 /* Worthwhile without SIMD support? Check only during analysis. */
5847 if (!VECTOR_MODE_P (vec_mode
)
5849 && !vect_worthwhile_without_simd_p (vinfo
, code
))
5851 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5853 "not worthwhile without SIMD support.\n");
5857 int reduc_idx
= STMT_VINFO_REDUC_IDX (stmt_info
);
5858 vec_loop_masks
*masks
= (loop_vinfo
? &LOOP_VINFO_MASKS (loop_vinfo
) : NULL
);
5859 internal_fn cond_fn
= get_conditional_internal_fn (code
);
5861 if (!vec_stmt
) /* transformation not required. */
5863 /* If this operation is part of a reduction, a fully-masked loop
5864 should only change the active lanes of the reduction chain,
5865 keeping the inactive lanes as-is. */
5867 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
5870 if (cond_fn
== IFN_LAST
5871 || !direct_internal_fn_supported_p (cond_fn
, vectype
,
5872 OPTIMIZE_FOR_SPEED
))
5874 if (dump_enabled_p ())
5875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5876 "can't use a fully-masked loop because no"
5877 " conditional operation is available.\n");
5878 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
) = false;
5881 vect_record_loop_mask (loop_vinfo
, masks
, ncopies
* vec_num
,
5885 /* Put types on constant and invariant SLP children. */
5887 && (!vect_maybe_update_slp_op_vectype (slp_op0
, vectype
)
5888 || !vect_maybe_update_slp_op_vectype (slp_op1
, vectype
)
5889 || !vect_maybe_update_slp_op_vectype (slp_op2
, vectype
)))
5891 if (dump_enabled_p ())
5892 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5893 "incompatible vector types for invariants\n");
5897 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
5898 DUMP_VECT_SCOPE ("vectorizable_operation");
5899 vect_model_simple_cost (vinfo
, stmt_info
,
5900 ncopies
, dt
, ndts
, slp_node
, cost_vec
);
5906 if (dump_enabled_p ())
5907 dump_printf_loc (MSG_NOTE
, vect_location
,
5908 "transform binary/unary operation.\n");
5910 bool masked_loop_p
= loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
);
5912 /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
5913 vectors with unsigned elements, but the result is signed. So, we
5914 need to compute the MINUS_EXPR into vectype temporary and
5915 VIEW_CONVERT_EXPR it into the final vectype_out result. */
5916 tree vec_cvt_dest
= NULL_TREE
;
5917 if (orig_code
== POINTER_DIFF_EXPR
)
5919 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5920 vec_cvt_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5924 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
5926 /* In case the vectorization factor (VF) is bigger than the number
5927 of elements that we can fit in a vectype (nunits), we have to generate
5928 more than one vector stmt - i.e - we need to "unroll" the
5929 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5930 from one copy of the vector stmt to the next, in the field
5931 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5932 stages to find the correct vector defs to be used when vectorizing
5933 stmts that use the defs of the current stmt. The example below
5934 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
5935 we need to create 4 vectorized stmts):
5937 before vectorization:
5938 RELATED_STMT VEC_STMT
5942 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
5944 RELATED_STMT VEC_STMT
5945 VS1_0: vx0 = memref0 VS1_1 -
5946 VS1_1: vx1 = memref1 VS1_2 -
5947 VS1_2: vx2 = memref2 VS1_3 -
5948 VS1_3: vx3 = memref3 - -
5949 S1: x = load - VS1_0
5952 step2: vectorize stmt S2 (done here):
5953 To vectorize stmt S2 we first need to find the relevant vector
5954 def for the first operand 'x'. This is, as usual, obtained from
5955 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
5956 that defines 'x' (S1). This way we find the stmt VS1_0, and the
5957 relevant vector def 'vx0'. Having found 'vx0' we can generate
5958 the vector stmt VS2_0, and as usual, record it in the
5959 STMT_VINFO_VEC_STMT of stmt S2.
5960 When creating the second copy (VS2_1), we obtain the relevant vector
5961 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
5962 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
5963 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
5964 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
5965 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
5966 chain of stmts and pointers:
5967 RELATED_STMT VEC_STMT
5968 VS1_0: vx0 = memref0 VS1_1 -
5969 VS1_1: vx1 = memref1 VS1_2 -
5970 VS1_2: vx2 = memref2 VS1_3 -
5971 VS1_3: vx3 = memref3 - -
5972 S1: x = load - VS1_0
5973 VS2_0: vz0 = vx0 + v1 VS2_1 -
5974 VS2_1: vz1 = vx1 + v1 VS2_2 -
5975 VS2_2: vz2 = vx2 + v1 VS2_3 -
5976 VS2_3: vz3 = vx3 + v1 - -
5977 S2: z = x + 1 - VS2_0 */
5979 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
5980 op0
, &vec_oprnds0
, op1
, &vec_oprnds1
, op2
, &vec_oprnds2
);
5981 /* Arguments are ready. Create the new vector stmt. */
5982 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
5984 gimple
*new_stmt
= NULL
;
5985 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
5986 ? vec_oprnds1
[i
] : NULL_TREE
);
5987 vop2
= ((op_type
== ternary_op
) ? vec_oprnds2
[i
] : NULL_TREE
);
5988 if (masked_loop_p
&& reduc_idx
>= 0)
5990 /* Perform the operation on active elements only and take
5991 inactive elements from the reduction chain input. */
5993 vop2
= reduc_idx
== 1 ? vop1
: vop0
;
5994 tree mask
= vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
5996 gcall
*call
= gimple_build_call_internal (cond_fn
, 4, mask
,
5998 new_temp
= make_ssa_name (vec_dest
, call
);
5999 gimple_call_set_lhs (call
, new_temp
);
6000 gimple_call_set_nothrow (call
, true);
6001 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
6006 new_stmt
= gimple_build_assign (vec_dest
, code
, vop0
, vop1
, vop2
);
6007 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
6008 gimple_assign_set_lhs (new_stmt
, new_temp
);
6009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
6012 new_temp
= build1 (VIEW_CONVERT_EXPR
, vectype_out
, new_temp
);
6013 new_stmt
= gimple_build_assign (vec_cvt_dest
, VIEW_CONVERT_EXPR
,
6015 new_temp
= make_ssa_name (vec_cvt_dest
, new_stmt
);
6016 gimple_assign_set_lhs (new_stmt
, new_temp
);
6017 vect_finish_stmt_generation (vinfo
, stmt_info
,
6022 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
6024 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
6028 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6030 vec_oprnds0
.release ();
6031 vec_oprnds1
.release ();
6032 vec_oprnds2
.release ();
6037 /* A helper function to ensure data reference DR_INFO's base alignment. */
6040 ensure_base_align (dr_vec_info
*dr_info
)
6042 if (dr_info
->misalignment
== DR_MISALIGNMENT_UNINITIALIZED
)
6045 if (dr_info
->base_misaligned
)
6047 tree base_decl
= dr_info
->base_decl
;
6049 // We should only be able to increase the alignment of a base object if
6050 // we know what its new alignment should be at compile time.
6051 unsigned HOST_WIDE_INT align_base_to
=
6052 DR_TARGET_ALIGNMENT (dr_info
).to_constant () * BITS_PER_UNIT
;
6054 if (decl_in_symtab_p (base_decl
))
6055 symtab_node::get (base_decl
)->increase_alignment (align_base_to
);
6056 else if (DECL_ALIGN (base_decl
) < align_base_to
)
6058 SET_DECL_ALIGN (base_decl
, align_base_to
);
6059 DECL_USER_ALIGN (base_decl
) = 1;
6061 dr_info
->base_misaligned
= false;
6066 /* Function get_group_alias_ptr_type.
6068 Return the alias type for the group starting at FIRST_STMT_INFO. */
6071 get_group_alias_ptr_type (stmt_vec_info first_stmt_info
)
6073 struct data_reference
*first_dr
, *next_dr
;
6075 first_dr
= STMT_VINFO_DATA_REF (first_stmt_info
);
6076 stmt_vec_info next_stmt_info
= DR_GROUP_NEXT_ELEMENT (first_stmt_info
);
6077 while (next_stmt_info
)
6079 next_dr
= STMT_VINFO_DATA_REF (next_stmt_info
);
6080 if (get_alias_set (DR_REF (first_dr
))
6081 != get_alias_set (DR_REF (next_dr
)))
6083 if (dump_enabled_p ())
6084 dump_printf_loc (MSG_NOTE
, vect_location
,
6085 "conflicting alias set types.\n");
6086 return ptr_type_node
;
6088 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
6090 return reference_alias_ptr_type (DR_REF (first_dr
));
6094 /* Function scan_operand_equal_p.
6096 Helper function for check_scan_store. Compare two references
6097 with .GOMP_SIMD_LANE bases. */
6100 scan_operand_equal_p (tree ref1
, tree ref2
)
6102 tree ref
[2] = { ref1
, ref2
};
6103 poly_int64 bitsize
[2], bitpos
[2];
6104 tree offset
[2], base
[2];
6105 for (int i
= 0; i
< 2; ++i
)
6108 int unsignedp
, reversep
, volatilep
= 0;
6109 base
[i
] = get_inner_reference (ref
[i
], &bitsize
[i
], &bitpos
[i
],
6110 &offset
[i
], &mode
, &unsignedp
,
6111 &reversep
, &volatilep
);
6112 if (reversep
|| volatilep
|| maybe_ne (bitpos
[i
], 0))
6114 if (TREE_CODE (base
[i
]) == MEM_REF
6115 && offset
[i
] == NULL_TREE
6116 && TREE_CODE (TREE_OPERAND (base
[i
], 0)) == SSA_NAME
)
6118 gimple
*def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
[i
], 0));
6119 if (is_gimple_assign (def_stmt
)
6120 && gimple_assign_rhs_code (def_stmt
) == POINTER_PLUS_EXPR
6121 && TREE_CODE (gimple_assign_rhs1 (def_stmt
)) == ADDR_EXPR
6122 && TREE_CODE (gimple_assign_rhs2 (def_stmt
)) == SSA_NAME
)
6124 if (maybe_ne (mem_ref_offset (base
[i
]), 0))
6126 base
[i
] = TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
6127 offset
[i
] = gimple_assign_rhs2 (def_stmt
);
6132 if (!operand_equal_p (base
[0], base
[1], 0))
6134 if (maybe_ne (bitsize
[0], bitsize
[1]))
6136 if (offset
[0] != offset
[1])
6138 if (!offset
[0] || !offset
[1])
6140 if (!operand_equal_p (offset
[0], offset
[1], 0))
6143 for (int i
= 0; i
< 2; ++i
)
6145 step
[i
] = integer_one_node
;
6146 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6148 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6149 if (is_gimple_assign (def_stmt
)
6150 && gimple_assign_rhs_code (def_stmt
) == MULT_EXPR
6151 && (TREE_CODE (gimple_assign_rhs2 (def_stmt
))
6154 step
[i
] = gimple_assign_rhs2 (def_stmt
);
6155 offset
[i
] = gimple_assign_rhs1 (def_stmt
);
6158 else if (TREE_CODE (offset
[i
]) == MULT_EXPR
)
6160 step
[i
] = TREE_OPERAND (offset
[i
], 1);
6161 offset
[i
] = TREE_OPERAND (offset
[i
], 0);
6163 tree rhs1
= NULL_TREE
;
6164 if (TREE_CODE (offset
[i
]) == SSA_NAME
)
6166 gimple
*def_stmt
= SSA_NAME_DEF_STMT (offset
[i
]);
6167 if (gimple_assign_cast_p (def_stmt
))
6168 rhs1
= gimple_assign_rhs1 (def_stmt
);
6170 else if (CONVERT_EXPR_P (offset
[i
]))
6171 rhs1
= TREE_OPERAND (offset
[i
], 0);
6173 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1
))
6174 && INTEGRAL_TYPE_P (TREE_TYPE (offset
[i
]))
6175 && (TYPE_PRECISION (TREE_TYPE (offset
[i
]))
6176 >= TYPE_PRECISION (TREE_TYPE (rhs1
))))
6179 if (!operand_equal_p (offset
[0], offset
[1], 0)
6180 || !operand_equal_p (step
[0], step
[1], 0))
6188 enum scan_store_kind
{
6189 /* Normal permutation. */
6190 scan_store_kind_perm
,
6192 /* Whole vector left shift permutation with zero init. */
6193 scan_store_kind_lshift_zero
,
6195 /* Whole vector left shift permutation and VEC_COND_EXPR. */
6196 scan_store_kind_lshift_cond
6199 /* Function check_scan_store.
6201 Verify if we can perform the needed permutations or whole vector shifts.
6202 Return -1 on failure, otherwise exact log2 of vectype's nunits.
6203 USE_WHOLE_VECTOR is a vector of enum scan_store_kind which operation
6204 to do at each step. */
6207 scan_store_can_perm_p (tree vectype
, tree init
,
6208 vec
<enum scan_store_kind
> *use_whole_vector
= NULL
)
6210 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6211 unsigned HOST_WIDE_INT nunits
;
6212 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6214 int units_log2
= exact_log2 (nunits
);
6215 if (units_log2
<= 0)
6219 enum scan_store_kind whole_vector_shift_kind
= scan_store_kind_perm
;
6220 for (i
= 0; i
<= units_log2
; ++i
)
6222 unsigned HOST_WIDE_INT j
, k
;
6223 enum scan_store_kind kind
= scan_store_kind_perm
;
6224 vec_perm_builder
sel (nunits
, nunits
, 1);
6225 sel
.quick_grow (nunits
);
6226 if (i
== units_log2
)
6228 for (j
= 0; j
< nunits
; ++j
)
6229 sel
[j
] = nunits
- 1;
6233 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6235 for (k
= 0; j
< nunits
; ++j
, ++k
)
6236 sel
[j
] = nunits
+ k
;
6238 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6239 if (!can_vec_perm_const_p (vec_mode
, indices
))
6241 if (i
== units_log2
)
6244 if (whole_vector_shift_kind
== scan_store_kind_perm
)
6246 if (optab_handler (vec_shl_optab
, vec_mode
) == CODE_FOR_nothing
)
6248 whole_vector_shift_kind
= scan_store_kind_lshift_zero
;
6249 /* Whole vector shifts shift in zeros, so if init is all zero
6250 constant, there is no need to do anything further. */
6251 if ((TREE_CODE (init
) != INTEGER_CST
6252 && TREE_CODE (init
) != REAL_CST
)
6253 || !initializer_zerop (init
))
6255 tree masktype
= truth_type_for (vectype
);
6256 if (!expand_vec_cond_expr_p (vectype
, masktype
, VECTOR_CST
))
6258 whole_vector_shift_kind
= scan_store_kind_lshift_cond
;
6261 kind
= whole_vector_shift_kind
;
6263 if (use_whole_vector
)
6265 if (kind
!= scan_store_kind_perm
&& use_whole_vector
->is_empty ())
6266 use_whole_vector
->safe_grow_cleared (i
);
6267 if (kind
!= scan_store_kind_perm
|| !use_whole_vector
->is_empty ())
6268 use_whole_vector
->safe_push (kind
);
6276 /* Function check_scan_store.
6278 Check magic stores for #pragma omp scan {in,ex}clusive reductions. */
6281 check_scan_store (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree vectype
,
6282 enum vect_def_type rhs_dt
, bool slp
, tree mask
,
6283 vect_memory_access_type memory_access_type
)
6285 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6286 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6289 gcc_assert (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1);
6292 || memory_access_type
!= VMAT_CONTIGUOUS
6293 || TREE_CODE (DR_BASE_ADDRESS (dr_info
->dr
)) != ADDR_EXPR
6294 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0))
6295 || loop_vinfo
== NULL
6296 || LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
6297 || STMT_VINFO_GROUPED_ACCESS (stmt_info
)
6298 || !integer_zerop (get_dr_vinfo_offset (vinfo
, dr_info
))
6299 || !integer_zerop (DR_INIT (dr_info
->dr
))
6300 || !(ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
)))
6301 || !alias_sets_conflict_p (get_alias_set (vectype
),
6302 get_alias_set (TREE_TYPE (ref_type
))))
6304 if (dump_enabled_p ())
6305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6306 "unsupported OpenMP scan store.\n");
6310 /* We need to pattern match code built by OpenMP lowering and simplified
6311 by following optimizations into something we can handle.
6312 #pragma omp simd reduction(inscan,+:r)
6316 #pragma omp scan inclusive (r)
6319 shall have body with:
6320 // Initialization for input phase, store the reduction initializer:
6321 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6322 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6324 // Actual input phase:
6326 r.0_5 = D.2042[_20];
6329 // Initialization for scan phase:
6330 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 2);
6336 // Actual scan phase:
6338 r.1_8 = D.2042[_20];
6340 The "omp simd array" variable D.2042 holds the privatized copy used
6341 inside of the loop and D.2043 is another one that holds copies of
6342 the current original list item. The separate GOMP_SIMD_LANE ifn
6343 kinds are there in order to allow optimizing the initializer store
6344 and combiner sequence, e.g. if it is originally some C++ish user
6345 defined reduction, but allow the vectorizer to pattern recognize it
6346 and turn into the appropriate vectorized scan.
6348 For exclusive scan, this is slightly different:
6349 #pragma omp simd reduction(inscan,+:r)
6353 #pragma omp scan exclusive (r)
6356 shall have body with:
6357 // Initialization for input phase, store the reduction initializer:
6358 _20 = .GOMP_SIMD_LANE (simduid.3_14(D), 0);
6359 _21 = .GOMP_SIMD_LANE (simduid.3_14(D), 1);
6361 // Actual input phase:
6363 r.0_5 = D.2042[_20];
6366 // Initialization for scan phase:
6367 _25 = .GOMP_SIMD_LANE (simduid.3_14(D), 3);
6373 // Actual scan phase:
6375 r.1_8 = D.2044[_20];
6378 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 2)
6380 /* Match the D.2042[_21] = 0; store above. Just require that
6381 it is a constant or external definition store. */
6382 if (rhs_dt
!= vect_constant_def
&& rhs_dt
!= vect_external_def
)
6385 if (dump_enabled_p ())
6386 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6387 "unsupported OpenMP scan initializer store.\n");
6391 if (! loop_vinfo
->scan_map
)
6392 loop_vinfo
->scan_map
= new hash_map
<tree
, tree
>;
6393 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6394 tree
&cached
= loop_vinfo
->scan_map
->get_or_insert (var
);
6397 cached
= gimple_assign_rhs1 (STMT_VINFO_STMT (stmt_info
));
6399 /* These stores can be vectorized normally. */
6403 if (rhs_dt
!= vect_internal_def
)
6406 if (dump_enabled_p ())
6407 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6408 "unsupported OpenMP scan combiner pattern.\n");
6412 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6413 tree rhs
= gimple_assign_rhs1 (stmt
);
6414 if (TREE_CODE (rhs
) != SSA_NAME
)
6417 gimple
*other_store_stmt
= NULL
;
6418 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6419 bool inscan_var_store
6420 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6422 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6424 if (!inscan_var_store
)
6426 use_operand_p use_p
;
6427 imm_use_iterator iter
;
6428 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6430 gimple
*use_stmt
= USE_STMT (use_p
);
6431 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6433 if (gimple_bb (use_stmt
) != gimple_bb (stmt
)
6434 || !is_gimple_assign (use_stmt
)
6435 || gimple_assign_rhs_class (use_stmt
) != GIMPLE_BINARY_RHS
6437 || TREE_CODE (gimple_assign_lhs (use_stmt
)) != SSA_NAME
)
6439 other_store_stmt
= use_stmt
;
6441 if (other_store_stmt
== NULL
)
6443 rhs
= gimple_assign_lhs (other_store_stmt
);
6444 if (!single_imm_use (rhs
, &use_p
, &other_store_stmt
))
6448 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3)
6450 use_operand_p use_p
;
6451 imm_use_iterator iter
;
6452 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6454 gimple
*use_stmt
= USE_STMT (use_p
);
6455 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6457 if (other_store_stmt
)
6459 other_store_stmt
= use_stmt
;
6465 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6466 if (gimple_bb (def_stmt
) != gimple_bb (stmt
)
6467 || !is_gimple_assign (def_stmt
)
6468 || gimple_assign_rhs_class (def_stmt
) != GIMPLE_BINARY_RHS
)
6471 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6472 /* For pointer addition, we should use the normal plus for the vector
6476 case POINTER_PLUS_EXPR
:
6479 case MULT_HIGHPART_EXPR
:
6484 if (TREE_CODE_LENGTH (code
) != binary_op
|| !commutative_tree_code (code
))
6487 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6488 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6489 if (TREE_CODE (rhs1
) != SSA_NAME
|| TREE_CODE (rhs2
) != SSA_NAME
)
6492 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6493 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6494 if (gimple_bb (load1_stmt
) != gimple_bb (stmt
)
6495 || !gimple_assign_load_p (load1_stmt
)
6496 || gimple_bb (load2_stmt
) != gimple_bb (stmt
)
6497 || !gimple_assign_load_p (load2_stmt
))
6500 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6501 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6502 if (load1_stmt_info
== NULL
6503 || load2_stmt_info
== NULL
6504 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load1_stmt_info
)
6505 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
))
6506 || (STMT_VINFO_SIMD_LANE_ACCESS_P (load2_stmt_info
)
6507 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6510 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && inscan_var_store
)
6512 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6513 if (TREE_CODE (DR_BASE_ADDRESS (load1_dr_info
->dr
)) != ADDR_EXPR
6514 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0)))
6516 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6518 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6522 use_operand_p use_p
;
6523 imm_use_iterator iter
;
6524 FOR_EACH_IMM_USE_FAST (use_p
, iter
, lrhs
)
6526 gimple
*use_stmt
= USE_STMT (use_p
);
6527 if (use_stmt
== def_stmt
|| is_gimple_debug (use_stmt
))
6529 if (other_store_stmt
)
6531 other_store_stmt
= use_stmt
;
6535 if (other_store_stmt
== NULL
)
6537 if (gimple_bb (other_store_stmt
) != gimple_bb (stmt
)
6538 || !gimple_store_p (other_store_stmt
))
6541 stmt_vec_info other_store_stmt_info
6542 = loop_vinfo
->lookup_stmt (other_store_stmt
);
6543 if (other_store_stmt_info
== NULL
6544 || (STMT_VINFO_SIMD_LANE_ACCESS_P (other_store_stmt_info
)
6545 != STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
)))
6548 gimple
*stmt1
= stmt
;
6549 gimple
*stmt2
= other_store_stmt
;
6550 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6551 std::swap (stmt1
, stmt2
);
6552 if (scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6553 gimple_assign_rhs1 (load2_stmt
)))
6555 std::swap (rhs1
, rhs2
);
6556 std::swap (load1_stmt
, load2_stmt
);
6557 std::swap (load1_stmt_info
, load2_stmt_info
);
6559 if (!scan_operand_equal_p (gimple_assign_lhs (stmt1
),
6560 gimple_assign_rhs1 (load1_stmt
)))
6563 tree var3
= NULL_TREE
;
6564 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 3
6565 && !scan_operand_equal_p (gimple_assign_lhs (stmt2
),
6566 gimple_assign_rhs1 (load2_stmt
)))
6568 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6570 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6571 if (TREE_CODE (DR_BASE_ADDRESS (load2_dr_info
->dr
)) != ADDR_EXPR
6572 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0)))
6574 var3
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6575 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var3
))
6576 || lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var3
))
6577 || lookup_attribute ("omp simd inscan exclusive",
6578 DECL_ATTRIBUTES (var3
)))
6582 dr_vec_info
*other_dr_info
= STMT_VINFO_DR_INFO (other_store_stmt_info
);
6583 if (TREE_CODE (DR_BASE_ADDRESS (other_dr_info
->dr
)) != ADDR_EXPR
6584 || !VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0)))
6587 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6588 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (other_dr_info
->dr
), 0);
6589 if (!lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var1
))
6590 || !lookup_attribute ("omp simd array", DECL_ATTRIBUTES (var2
))
6591 || (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6592 == (!lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var2
))))
6595 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6596 std::swap (var1
, var2
);
6598 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6600 if (!lookup_attribute ("omp simd inscan exclusive",
6601 DECL_ATTRIBUTES (var1
)))
6606 if (loop_vinfo
->scan_map
== NULL
)
6608 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6612 /* The IL is as expected, now check if we can actually vectorize it.
6619 should be vectorized as (where _40 is the vectorized rhs
6620 from the D.2042[_21] = 0; store):
6621 _30 = MEM <vector(8) int> [(int *)&D.2043];
6622 _31 = MEM <vector(8) int> [(int *)&D.2042];
6623 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6625 // _33 = { _31[0], _31[0]+_31[1], _31[1]+_31[2], ..., _31[6]+_31[7] };
6626 _34 = VEC_PERM_EXPR <_40, _33, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6628 // _35 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6629 // _31[1]+.._31[4], ... _31[4]+.._31[7] };
6630 _36 = VEC_PERM_EXPR <_40, _35, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6632 // _37 = { _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6633 // _31[0]+.._31[4], ... _31[0]+.._31[7] };
6635 _39 = VEC_PERM_EXPR <_38, _38, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6636 MEM <vector(8) int> [(int *)&D.2043] = _39;
6637 MEM <vector(8) int> [(int *)&D.2042] = _38;
6644 should be vectorized as (where _40 is the vectorized rhs
6645 from the D.2042[_21] = 0; store):
6646 _30 = MEM <vector(8) int> [(int *)&D.2043];
6647 _31 = MEM <vector(8) int> [(int *)&D.2042];
6648 _32 = VEC_PERM_EXPR <_40, _31, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6649 _33 = VEC_PERM_EXPR <_40, _32, { 0, 8, 9, 10, 11, 12, 13, 14 }>;
6651 // _34 = { 0, _31[0], _31[0]+_31[1], _31[1]+_31[2], _31[2]+_31[3],
6652 // _31[3]+_31[4], ... _31[5]+.._31[6] };
6653 _35 = VEC_PERM_EXPR <_40, _34, { 0, 1, 8, 9, 10, 11, 12, 13 }>;
6655 // _36 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6656 // _31[1]+.._31[4], ... _31[3]+.._31[6] };
6657 _37 = VEC_PERM_EXPR <_40, _36, { 0, 1, 2, 3, 8, 9, 10, 11 }>;
6659 // _38 = { 0, _31[0], _31[0]+_31[1], _31[0]+.._31[2], _31[0]+.._31[3],
6660 // _31[0]+.._31[4], ... _31[0]+.._31[6] };
6663 _51 = VEC_PERM_EXPR <_50, _50, { 7, 7, 7, 7, 7, 7, 7, 7 }>;
6664 MEM <vector(8) int> [(int *)&D.2044] = _39;
6665 MEM <vector(8) int> [(int *)&D.2042] = _51; */
6666 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
6667 optab optab
= optab_for_tree_code (code
, vectype
, optab_default
);
6668 if (!optab
|| optab_handler (optab
, vec_mode
) == CODE_FOR_nothing
)
6671 int units_log2
= scan_store_can_perm_p (vectype
, *init
);
6672 if (units_log2
== -1)
6679 /* Function vectorizable_scan_store.
6681 Helper of vectorizable_score, arguments like on vectorizable_store.
6682 Handle only the transformation, checking is done in check_scan_store. */
6685 vectorizable_scan_store (vec_info
*vinfo
,
6686 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6687 gimple
**vec_stmt
, int ncopies
)
6689 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6690 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
);
6691 tree ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
6692 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6694 if (dump_enabled_p ())
6695 dump_printf_loc (MSG_NOTE
, vect_location
,
6696 "transform scan store. ncopies = %d\n", ncopies
);
6698 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6699 tree rhs
= gimple_assign_rhs1 (stmt
);
6700 gcc_assert (TREE_CODE (rhs
) == SSA_NAME
);
6702 tree var
= TREE_OPERAND (DR_BASE_ADDRESS (dr_info
->dr
), 0);
6703 bool inscan_var_store
6704 = lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var
)) != NULL
;
6706 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6708 use_operand_p use_p
;
6709 imm_use_iterator iter
;
6710 FOR_EACH_IMM_USE_FAST (use_p
, iter
, rhs
)
6712 gimple
*use_stmt
= USE_STMT (use_p
);
6713 if (use_stmt
== stmt
|| is_gimple_debug (use_stmt
))
6715 rhs
= gimple_assign_lhs (use_stmt
);
6720 gimple
*def_stmt
= SSA_NAME_DEF_STMT (rhs
);
6721 enum tree_code code
= gimple_assign_rhs_code (def_stmt
);
6722 if (code
== POINTER_PLUS_EXPR
)
6724 gcc_assert (TREE_CODE_LENGTH (code
) == binary_op
6725 && commutative_tree_code (code
));
6726 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
6727 tree rhs2
= gimple_assign_rhs2 (def_stmt
);
6728 gcc_assert (TREE_CODE (rhs1
) == SSA_NAME
&& TREE_CODE (rhs2
) == SSA_NAME
);
6729 gimple
*load1_stmt
= SSA_NAME_DEF_STMT (rhs1
);
6730 gimple
*load2_stmt
= SSA_NAME_DEF_STMT (rhs2
);
6731 stmt_vec_info load1_stmt_info
= loop_vinfo
->lookup_stmt (load1_stmt
);
6732 stmt_vec_info load2_stmt_info
= loop_vinfo
->lookup_stmt (load2_stmt
);
6733 dr_vec_info
*load1_dr_info
= STMT_VINFO_DR_INFO (load1_stmt_info
);
6734 dr_vec_info
*load2_dr_info
= STMT_VINFO_DR_INFO (load2_stmt_info
);
6735 tree var1
= TREE_OPERAND (DR_BASE_ADDRESS (load1_dr_info
->dr
), 0);
6736 tree var2
= TREE_OPERAND (DR_BASE_ADDRESS (load2_dr_info
->dr
), 0);
6738 if (lookup_attribute ("omp simd inscan", DECL_ATTRIBUTES (var1
)))
6740 std::swap (rhs1
, rhs2
);
6741 std::swap (var1
, var2
);
6742 std::swap (load1_dr_info
, load2_dr_info
);
6745 tree
*init
= loop_vinfo
->scan_map
->get (var1
);
6748 unsigned HOST_WIDE_INT nunits
;
6749 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
))
6751 auto_vec
<enum scan_store_kind
, 16> use_whole_vector
;
6752 int units_log2
= scan_store_can_perm_p (vectype
, *init
, &use_whole_vector
);
6753 gcc_assert (units_log2
> 0);
6754 auto_vec
<tree
, 16> perms
;
6755 perms
.quick_grow (units_log2
+ 1);
6756 tree zero_vec
= NULL_TREE
, masktype
= NULL_TREE
;
6757 for (int i
= 0; i
<= units_log2
; ++i
)
6759 unsigned HOST_WIDE_INT j
, k
;
6760 vec_perm_builder
sel (nunits
, nunits
, 1);
6761 sel
.quick_grow (nunits
);
6762 if (i
== units_log2
)
6763 for (j
= 0; j
< nunits
; ++j
)
6764 sel
[j
] = nunits
- 1;
6767 for (j
= 0; j
< (HOST_WIDE_INT_1U
<< i
); ++j
)
6769 for (k
= 0; j
< nunits
; ++j
, ++k
)
6770 sel
[j
] = nunits
+ k
;
6772 vec_perm_indices
indices (sel
, i
== units_log2
? 1 : 2, nunits
);
6773 if (!use_whole_vector
.is_empty ()
6774 && use_whole_vector
[i
] != scan_store_kind_perm
)
6776 if (zero_vec
== NULL_TREE
)
6777 zero_vec
= build_zero_cst (vectype
);
6778 if (masktype
== NULL_TREE
6779 && use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6780 masktype
= truth_type_for (vectype
);
6781 perms
[i
] = vect_gen_perm_mask_any (vectype
, indices
);
6784 perms
[i
] = vect_gen_perm_mask_checked (vectype
, indices
);
6787 tree vec_oprnd1
= NULL_TREE
;
6788 tree vec_oprnd2
= NULL_TREE
;
6789 tree vec_oprnd3
= NULL_TREE
;
6790 tree dataref_ptr
= DR_BASE_ADDRESS (dr_info
->dr
);
6791 tree dataref_offset
= build_int_cst (ref_type
, 0);
6792 tree bump
= vect_get_data_ptr_increment (vinfo
, dr_info
,
6793 vectype
, VMAT_CONTIGUOUS
);
6794 tree ldataref_ptr
= NULL_TREE
;
6795 tree orig
= NULL_TREE
;
6796 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4 && !inscan_var_store
)
6797 ldataref_ptr
= DR_BASE_ADDRESS (load1_dr_info
->dr
);
6798 auto_vec
<tree
> vec_oprnds1
;
6799 auto_vec
<tree
> vec_oprnds2
;
6800 auto_vec
<tree
> vec_oprnds3
;
6801 vect_get_vec_defs (vinfo
, stmt_info
, NULL
, ncopies
,
6802 *init
, &vec_oprnds1
,
6803 ldataref_ptr
== NULL
? rhs1
: NULL
, &vec_oprnds2
,
6804 rhs2
, &vec_oprnds3
);
6805 for (int j
= 0; j
< ncopies
; j
++)
6807 vec_oprnd1
= vec_oprnds1
[j
];
6808 if (ldataref_ptr
== NULL
)
6809 vec_oprnd2
= vec_oprnds2
[j
];
6810 vec_oprnd3
= vec_oprnds3
[j
];
6813 else if (!inscan_var_store
)
6814 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6818 vec_oprnd2
= make_ssa_name (vectype
);
6819 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6820 unshare_expr (ldataref_ptr
),
6822 vect_copy_ref_info (data_ref
, DR_REF (load1_dr_info
->dr
));
6823 gimple
*g
= gimple_build_assign (vec_oprnd2
, data_ref
);
6824 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6825 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6826 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6829 tree v
= vec_oprnd2
;
6830 for (int i
= 0; i
< units_log2
; ++i
)
6832 tree new_temp
= make_ssa_name (vectype
);
6833 gimple
*g
= gimple_build_assign (new_temp
, VEC_PERM_EXPR
,
6835 && (use_whole_vector
[i
]
6836 != scan_store_kind_perm
))
6837 ? zero_vec
: vec_oprnd1
, v
,
6839 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6840 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6841 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
6843 if (zero_vec
&& use_whole_vector
[i
] == scan_store_kind_lshift_cond
)
6845 /* Whole vector shift shifted in zero bits, but if *init
6846 is not initializer_zerop, we need to replace those elements
6847 with elements from vec_oprnd1. */
6848 tree_vector_builder
vb (masktype
, nunits
, 1);
6849 for (unsigned HOST_WIDE_INT k
= 0; k
< nunits
; ++k
)
6850 vb
.quick_push (k
< (HOST_WIDE_INT_1U
<< i
)
6851 ? boolean_false_node
: boolean_true_node
);
6853 tree new_temp2
= make_ssa_name (vectype
);
6854 g
= gimple_build_assign (new_temp2
, VEC_COND_EXPR
, vb
.build (),
6855 new_temp
, vec_oprnd1
);
6856 vect_finish_stmt_generation (vinfo
, stmt_info
,
6858 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6859 new_temp
= new_temp2
;
6862 /* For exclusive scan, perform the perms[i] permutation once
6865 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4
6873 tree new_temp2
= make_ssa_name (vectype
);
6874 g
= gimple_build_assign (new_temp2
, code
, v
, new_temp
);
6875 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6876 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6881 tree new_temp
= make_ssa_name (vectype
);
6882 gimple
*g
= gimple_build_assign (new_temp
, code
, orig
, v
);
6883 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6884 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6886 tree last_perm_arg
= new_temp
;
6887 /* For exclusive scan, new_temp computed above is the exclusive scan
6888 prefix sum. Turn it into inclusive prefix sum for the broadcast
6889 of the last element into orig. */
6890 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) == 4)
6892 last_perm_arg
= make_ssa_name (vectype
);
6893 g
= gimple_build_assign (last_perm_arg
, code
, new_temp
, vec_oprnd2
);
6894 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6895 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6898 orig
= make_ssa_name (vectype
);
6899 g
= gimple_build_assign (orig
, VEC_PERM_EXPR
, last_perm_arg
,
6900 last_perm_arg
, perms
[units_log2
]);
6901 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6902 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6904 if (!inscan_var_store
)
6906 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6907 unshare_expr (dataref_ptr
),
6909 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
6910 g
= gimple_build_assign (data_ref
, new_temp
);
6911 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6912 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6916 if (inscan_var_store
)
6917 for (int j
= 0; j
< ncopies
; j
++)
6920 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
6922 tree data_ref
= fold_build2 (MEM_REF
, vectype
,
6923 unshare_expr (dataref_ptr
),
6925 vect_copy_ref_info (data_ref
, DR_REF (dr_info
->dr
));
6926 gimple
*g
= gimple_build_assign (data_ref
, orig
);
6927 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
6928 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (g
);
6934 /* Function vectorizable_store.
6936 Check if STMT_INFO defines a non scalar data-ref (array/pointer/structure)
6937 that can be vectorized.
6938 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
6939 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
6940 Return true if STMT_INFO is vectorizable in this way. */
6943 vectorizable_store (vec_info
*vinfo
,
6944 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
6945 gimple
**vec_stmt
, slp_tree slp_node
,
6946 stmt_vector_for_cost
*cost_vec
)
6950 tree vec_oprnd
= NULL_TREE
;
6952 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6953 class loop
*loop
= NULL
;
6954 machine_mode vec_mode
;
6956 enum dr_alignment_support alignment_support_scheme
;
6957 enum vect_def_type rhs_dt
= vect_unknown_def_type
;
6958 enum vect_def_type mask_dt
= vect_unknown_def_type
;
6959 tree dataref_ptr
= NULL_TREE
;
6960 tree dataref_offset
= NULL_TREE
;
6961 gimple
*ptr_incr
= NULL
;
6964 stmt_vec_info first_stmt_info
;
6966 unsigned int group_size
, i
;
6967 vec
<tree
> oprnds
= vNULL
;
6968 vec
<tree
> result_chain
= vNULL
;
6969 tree offset
= NULL_TREE
;
6970 vec
<tree
> vec_oprnds
= vNULL
;
6971 bool slp
= (slp_node
!= NULL
);
6972 unsigned int vec_num
;
6973 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
6975 gather_scatter_info gs_info
;
6977 vec_load_store_type vls_type
;
6980 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
6983 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
6987 /* Is vectorizable store? */
6989 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
6990 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6992 tree scalar_dest
= gimple_assign_lhs (assign
);
6993 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
6994 && is_pattern_stmt_p (stmt_info
))
6995 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
6996 if (TREE_CODE (scalar_dest
) != ARRAY_REF
6997 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
6998 && TREE_CODE (scalar_dest
) != INDIRECT_REF
6999 && TREE_CODE (scalar_dest
) != COMPONENT_REF
7000 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
7001 && TREE_CODE (scalar_dest
) != REALPART_EXPR
7002 && TREE_CODE (scalar_dest
) != MEM_REF
)
7007 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
7008 if (!call
|| !gimple_call_internal_p (call
))
7011 internal_fn ifn
= gimple_call_internal_fn (call
);
7012 if (!internal_store_fn_p (ifn
))
7015 if (slp_node
!= NULL
)
7017 if (dump_enabled_p ())
7018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7019 "SLP of masked stores not supported.\n");
7023 int mask_index
= internal_fn_mask_index (ifn
);
7024 if (mask_index
>= 0)
7026 mask
= gimple_call_arg (call
, mask_index
);
7027 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
7033 op
= vect_get_store_rhs (stmt_info
);
7035 /* Cannot have hybrid store SLP -- that would mean storing to the
7036 same location twice. */
7037 gcc_assert (slp
== PURE_SLP_STMT (stmt_info
));
7039 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
), rhs_vectype
= NULL_TREE
;
7040 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
7044 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7045 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
7050 /* Multiple types in SLP are handled by creating the appropriate number of
7051 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
7056 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
7058 gcc_assert (ncopies
>= 1);
7060 /* FORNOW. This restriction should be relaxed. */
7061 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
) && ncopies
> 1)
7063 if (dump_enabled_p ())
7064 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7065 "multiple types in nested loop.\n");
7069 if (!vect_check_store_rhs (vinfo
, stmt_info
, slp_node
,
7070 op
, &rhs_dt
, &rhs_vectype
, &vls_type
))
7073 elem_type
= TREE_TYPE (vectype
);
7074 vec_mode
= TYPE_MODE (vectype
);
7076 if (!STMT_VINFO_DATA_REF (stmt_info
))
7079 vect_memory_access_type memory_access_type
;
7080 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, vls_type
,
7081 ncopies
, &memory_access_type
, &gs_info
))
7086 if (memory_access_type
== VMAT_CONTIGUOUS
)
7088 if (!VECTOR_MODE_P (vec_mode
)
7089 || !can_vec_mask_load_store_p (vec_mode
,
7090 TYPE_MODE (mask_vectype
), false))
7093 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
7094 && (memory_access_type
!= VMAT_GATHER_SCATTER
7095 || (gs_info
.decl
&& !VECTOR_BOOLEAN_TYPE_P (mask_vectype
))))
7097 if (dump_enabled_p ())
7098 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7099 "unsupported access type for masked store.\n");
7105 /* FORNOW. In some cases can vectorize even if data-type not supported
7106 (e.g. - array initialization with 0). */
7107 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
7111 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
7112 grouped_store
= (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
7113 && memory_access_type
!= VMAT_GATHER_SCATTER
7114 && (slp
|| memory_access_type
!= VMAT_CONTIGUOUS
));
7117 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
7118 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7119 group_size
= DR_GROUP_SIZE (first_stmt_info
);
7123 first_stmt_info
= stmt_info
;
7124 first_dr_info
= dr_info
;
7125 group_size
= vec_num
= 1;
7128 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) > 1 && !vec_stmt
)
7130 if (!check_scan_store (vinfo
, stmt_info
, vectype
, rhs_dt
, slp
, mask
,
7131 memory_access_type
))
7135 if (!vec_stmt
) /* transformation not required. */
7137 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
7140 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
7141 check_load_store_masking (loop_vinfo
, vectype
, vls_type
, group_size
,
7142 memory_access_type
, &gs_info
, mask
);
7145 && !vect_maybe_update_slp_op_vectype (SLP_TREE_CHILDREN (slp_node
)[0],
7148 if (dump_enabled_p ())
7149 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
7150 "incompatible vector types for invariants\n");
7154 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
7155 vect_model_store_cost (vinfo
, stmt_info
, ncopies
,
7156 memory_access_type
, vls_type
, slp_node
, cost_vec
);
7159 gcc_assert (memory_access_type
== STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
7163 ensure_base_align (dr_info
);
7165 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
7167 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
, src
;
7168 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gs_info
.decl
));
7169 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
7170 tree ptr
, var
, scale
, vec_mask
;
7171 tree mask_arg
= NULL_TREE
, mask_op
= NULL_TREE
, perm_mask
= NULL_TREE
;
7172 tree mask_halfvectype
= mask_vectype
;
7173 edge pe
= loop_preheader_edge (loop
);
7176 enum { NARROW
, NONE
, WIDEN
} modifier
;
7177 poly_uint64 scatter_off_nunits
7178 = TYPE_VECTOR_SUBPARTS (gs_info
.offset_vectype
);
7180 if (known_eq (nunits
, scatter_off_nunits
))
7182 else if (known_eq (nunits
* 2, scatter_off_nunits
))
7186 /* Currently gathers and scatters are only supported for
7187 fixed-length vectors. */
7188 unsigned int count
= scatter_off_nunits
.to_constant ();
7189 vec_perm_builder
sel (count
, count
, 1);
7190 for (i
= 0; i
< (unsigned int) count
; ++i
)
7191 sel
.quick_push (i
| (count
/ 2));
7193 vec_perm_indices
indices (sel
, 1, count
);
7194 perm_mask
= vect_gen_perm_mask_checked (gs_info
.offset_vectype
,
7196 gcc_assert (perm_mask
!= NULL_TREE
);
7198 else if (known_eq (nunits
, scatter_off_nunits
* 2))
7202 /* Currently gathers and scatters are only supported for
7203 fixed-length vectors. */
7204 unsigned int count
= nunits
.to_constant ();
7205 vec_perm_builder
sel (count
, count
, 1);
7206 for (i
= 0; i
< (unsigned int) count
; ++i
)
7207 sel
.quick_push (i
| (count
/ 2));
7209 vec_perm_indices
indices (sel
, 2, count
);
7210 perm_mask
= vect_gen_perm_mask_checked (vectype
, indices
);
7211 gcc_assert (perm_mask
!= NULL_TREE
);
7215 mask_halfvectype
= truth_type_for (gs_info
.offset_vectype
);
7220 rettype
= TREE_TYPE (TREE_TYPE (gs_info
.decl
));
7221 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7222 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7223 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7224 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
7225 scaletype
= TREE_VALUE (arglist
);
7227 gcc_checking_assert (TREE_CODE (masktype
) == INTEGER_TYPE
7228 && TREE_CODE (rettype
) == VOID_TYPE
);
7230 ptr
= fold_convert (ptrtype
, gs_info
.base
);
7231 if (!is_gimple_min_invariant (ptr
))
7233 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
7234 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
7235 gcc_assert (!new_bb
);
7238 if (mask
== NULL_TREE
)
7240 mask_arg
= build_int_cst (masktype
, -1);
7241 mask_arg
= vect_init_vector (vinfo
, stmt_info
,
7242 mask_arg
, masktype
, NULL
);
7245 scale
= build_int_cst (scaletype
, gs_info
.scale
);
7247 auto_vec
<tree
> vec_oprnds0
;
7248 auto_vec
<tree
> vec_oprnds1
;
7249 auto_vec
<tree
> vec_masks
;
7252 tree mask_vectype
= truth_type_for (vectype
);
7253 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7255 ? ncopies
/ 2 : ncopies
,
7256 mask
, &vec_masks
, mask_vectype
);
7258 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7260 ? ncopies
/ 2 : ncopies
,
7261 gs_info
.offset
, &vec_oprnds0
);
7262 vect_get_vec_defs_for_operand (vinfo
, stmt_info
,
7264 ? ncopies
/ 2 : ncopies
,
7266 for (j
= 0; j
< ncopies
; ++j
)
7268 if (modifier
== WIDEN
)
7271 op
= permute_vec_elements (vinfo
, vec_oprnd0
, vec_oprnd0
,
7272 perm_mask
, stmt_info
, gsi
);
7274 op
= vec_oprnd0
= vec_oprnds0
[j
/ 2];
7275 src
= vec_oprnd1
= vec_oprnds1
[j
];
7277 mask_op
= vec_mask
= vec_masks
[j
];
7279 else if (modifier
== NARROW
)
7282 src
= permute_vec_elements (vinfo
, vec_oprnd1
, vec_oprnd1
,
7283 perm_mask
, stmt_info
, gsi
);
7285 src
= vec_oprnd1
= vec_oprnds1
[j
/ 2];
7286 op
= vec_oprnd0
= vec_oprnds0
[j
];
7288 mask_op
= vec_mask
= vec_masks
[j
/ 2];
7292 op
= vec_oprnd0
= vec_oprnds0
[j
];
7293 src
= vec_oprnd1
= vec_oprnds1
[j
];
7295 mask_op
= vec_mask
= vec_masks
[j
];
7298 if (!useless_type_conversion_p (srctype
, TREE_TYPE (src
)))
7300 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (src
)),
7301 TYPE_VECTOR_SUBPARTS (srctype
)));
7302 var
= vect_get_new_ssa_name (srctype
, vect_simple_var
);
7303 src
= build1 (VIEW_CONVERT_EXPR
, srctype
, src
);
7305 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, src
);
7306 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7310 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
7312 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
)),
7313 TYPE_VECTOR_SUBPARTS (idxtype
)));
7314 var
= vect_get_new_ssa_name (idxtype
, vect_simple_var
);
7315 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
7317 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, op
);
7318 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7326 if (modifier
== NARROW
)
7328 var
= vect_get_new_ssa_name (mask_halfvectype
,
7331 = gimple_build_assign (var
, (j
& 1) ? VEC_UNPACK_HI_EXPR
7332 : VEC_UNPACK_LO_EXPR
,
7334 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7337 tree optype
= TREE_TYPE (mask_arg
);
7338 if (TYPE_MODE (masktype
) == TYPE_MODE (optype
))
7341 utype
= lang_hooks
.types
.type_for_mode (TYPE_MODE (optype
), 1);
7342 var
= vect_get_new_ssa_name (utype
, vect_scalar_var
);
7343 mask_arg
= build1 (VIEW_CONVERT_EXPR
, utype
, mask_arg
);
7345 = gimple_build_assign (var
, VIEW_CONVERT_EXPR
, mask_arg
);
7346 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7348 if (!useless_type_conversion_p (masktype
, utype
))
7350 gcc_assert (TYPE_PRECISION (utype
)
7351 <= TYPE_PRECISION (masktype
));
7352 var
= vect_get_new_ssa_name (masktype
, vect_scalar_var
);
7353 new_stmt
= gimple_build_assign (var
, NOP_EXPR
, mask_arg
);
7354 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7360 = gimple_build_call (gs_info
.decl
, 5, ptr
, mask_arg
, op
, src
, scale
);
7361 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
7363 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
7365 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
7368 else if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) >= 3)
7369 return vectorizable_scan_store (vinfo
, stmt_info
, gsi
, vec_stmt
, ncopies
);
7371 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
7372 DR_GROUP_STORE_COUNT (DR_GROUP_FIRST_ELEMENT (stmt_info
))++;
7377 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt_info
));
7379 /* We vectorize all the stmts of the interleaving group when we
7380 reach the last stmt in the group. */
7381 if (DR_GROUP_STORE_COUNT (first_stmt_info
)
7382 < DR_GROUP_SIZE (first_stmt_info
)
7391 grouped_store
= false;
7392 /* VEC_NUM is the number of vect stmts to be created for this
7394 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7395 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
7396 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_stmt_info
)
7397 == first_stmt_info
);
7398 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
7399 op
= vect_get_store_rhs (first_stmt_info
);
7402 /* VEC_NUM is the number of vect stmts to be created for this
7404 vec_num
= group_size
;
7406 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
7409 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
7411 if (dump_enabled_p ())
7412 dump_printf_loc (MSG_NOTE
, vect_location
,
7413 "transform store. ncopies = %d\n", ncopies
);
7415 if (memory_access_type
== VMAT_ELEMENTWISE
7416 || memory_access_type
== VMAT_STRIDED_SLP
)
7418 gimple_stmt_iterator incr_gsi
;
7424 tree stride_base
, stride_step
, alias_off
;
7428 /* Checked by get_load_store_type. */
7429 unsigned int const_nunits
= nunits
.to_constant ();
7431 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
7432 gcc_assert (!nested_in_vect_loop_p (loop
, stmt_info
));
7434 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
7436 = fold_build_pointer_plus
7437 (DR_BASE_ADDRESS (first_dr_info
->dr
),
7438 size_binop (PLUS_EXPR
,
7439 convert_to_ptrofftype (dr_offset
),
7440 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
7441 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
7443 /* For a store with loop-invariant (but other than power-of-2)
7444 stride (i.e. not a grouped access) like so:
7446 for (i = 0; i < n; i += stride)
7449 we generate a new induction variable and new stores from
7450 the components of the (vectorized) rhs:
7452 for (j = 0; ; j += VF*stride)
7457 array[j + stride] = tmp2;
7461 unsigned nstores
= const_nunits
;
7463 tree ltype
= elem_type
;
7464 tree lvectype
= vectype
;
7467 if (group_size
< const_nunits
7468 && const_nunits
% group_size
== 0)
7470 nstores
= const_nunits
/ group_size
;
7472 ltype
= build_vector_type (elem_type
, group_size
);
7475 /* First check if vec_extract optab doesn't support extraction
7476 of vector elts directly. */
7477 scalar_mode elmode
= SCALAR_TYPE_MODE (elem_type
);
7479 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
7480 || !related_vector_mode (TYPE_MODE (vectype
), elmode
,
7481 group_size
).exists (&vmode
)
7482 || (convert_optab_handler (vec_extract_optab
,
7483 TYPE_MODE (vectype
), vmode
)
7484 == CODE_FOR_nothing
))
7486 /* Try to avoid emitting an extract of vector elements
7487 by performing the extracts using an integer type of the
7488 same size, extracting from a vector of those and then
7489 re-interpreting it as the original vector type if
7492 = group_size
* GET_MODE_BITSIZE (elmode
);
7493 unsigned int lnunits
= const_nunits
/ group_size
;
7494 /* If we can't construct such a vector fall back to
7495 element extracts from the original vector type and
7496 element size stores. */
7497 if (int_mode_for_size (lsize
, 0).exists (&elmode
)
7498 && VECTOR_MODE_P (TYPE_MODE (vectype
))
7499 && related_vector_mode (TYPE_MODE (vectype
), elmode
,
7500 lnunits
).exists (&vmode
)
7501 && (convert_optab_handler (vec_extract_optab
,
7503 != CODE_FOR_nothing
))
7507 ltype
= build_nonstandard_integer_type (lsize
, 1);
7508 lvectype
= build_vector_type (ltype
, nstores
);
7510 /* Else fall back to vector extraction anyway.
7511 Fewer stores are more important than avoiding spilling
7512 of the vector we extract from. Compared to the
7513 construction case in vectorizable_load no store-forwarding
7514 issue exists here for reasonable archs. */
7517 else if (group_size
>= const_nunits
7518 && group_size
% const_nunits
== 0)
7521 lnel
= const_nunits
;
7525 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (elem_type
));
7526 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
7529 ivstep
= stride_step
;
7530 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
7531 build_int_cst (TREE_TYPE (ivstep
), vf
));
7533 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
7535 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
7536 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
7537 create_iv (stride_base
, ivstep
, NULL
,
7538 loop
, &incr_gsi
, insert_after
,
7540 incr
= gsi_stmt (incr_gsi
);
7542 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
7544 alias_off
= build_int_cst (ref_type
, 0);
7545 stmt_vec_info next_stmt_info
= first_stmt_info
;
7546 for (g
= 0; g
< group_size
; g
++)
7548 running_off
= offvar
;
7551 tree size
= TYPE_SIZE_UNIT (ltype
);
7552 tree pos
= fold_build2 (MULT_EXPR
, sizetype
, size_int (g
),
7554 tree newoff
= copy_ssa_name (running_off
, NULL
);
7555 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7557 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7558 running_off
= newoff
;
7561 op
= vect_get_store_rhs (next_stmt_info
);
7562 vect_get_vec_defs (vinfo
, next_stmt_info
, slp_node
, ncopies
,
7564 unsigned int group_el
= 0;
7565 unsigned HOST_WIDE_INT
7566 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
7567 for (j
= 0; j
< ncopies
; j
++)
7569 vec_oprnd
= vec_oprnds
[j
];
7570 /* Pun the vector to extract from if necessary. */
7571 if (lvectype
!= vectype
)
7573 tree tem
= make_ssa_name (lvectype
);
7575 = gimple_build_assign (tem
, build1 (VIEW_CONVERT_EXPR
,
7576 lvectype
, vec_oprnd
));
7577 vect_finish_stmt_generation (vinfo
, stmt_info
, pun
, gsi
);
7580 for (i
= 0; i
< nstores
; i
++)
7582 tree newref
, newoff
;
7583 gimple
*incr
, *assign
;
7584 tree size
= TYPE_SIZE (ltype
);
7585 /* Extract the i'th component. */
7586 tree pos
= fold_build2 (MULT_EXPR
, bitsizetype
,
7587 bitsize_int (i
), size
);
7588 tree elem
= fold_build3 (BIT_FIELD_REF
, ltype
, vec_oprnd
,
7591 elem
= force_gimple_operand_gsi (gsi
, elem
, true,
7595 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
7597 newref
= build2 (MEM_REF
, ltype
,
7598 running_off
, this_off
);
7599 vect_copy_ref_info (newref
, DR_REF (first_dr_info
->dr
));
7601 /* And store it to *running_off. */
7602 assign
= gimple_build_assign (newref
, elem
);
7603 vect_finish_stmt_generation (vinfo
, stmt_info
, assign
, gsi
);
7607 || group_el
== group_size
)
7609 newoff
= copy_ssa_name (running_off
, NULL
);
7610 incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
7611 running_off
, stride_step
);
7612 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
7614 running_off
= newoff
;
7617 if (g
== group_size
- 1
7620 if (j
== 0 && i
== 0)
7622 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (assign
);
7626 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7631 vec_oprnds
.release ();
7635 auto_vec
<tree
> dr_chain (group_size
);
7636 oprnds
.create (group_size
);
7638 /* Gather-scatter accesses perform only component accesses, alignment
7639 is irrelevant for them. */
7640 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7641 alignment_support_scheme
= dr_unaligned_supported
;
7643 alignment_support_scheme
7644 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
7646 gcc_assert (alignment_support_scheme
);
7647 vec_loop_masks
*loop_masks
7648 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
7649 ? &LOOP_VINFO_MASKS (loop_vinfo
)
7651 /* Targets with store-lane instructions must not require explicit
7652 realignment. vect_supportable_dr_alignment always returns either
7653 dr_aligned or dr_unaligned_supported for masked operations. */
7654 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
7657 || alignment_support_scheme
== dr_aligned
7658 || alignment_support_scheme
== dr_unaligned_supported
);
7660 if (memory_access_type
== VMAT_CONTIGUOUS_DOWN
7661 || memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7662 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
7665 tree vec_offset
= NULL_TREE
;
7666 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7668 aggr_type
= NULL_TREE
;
7671 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
7673 aggr_type
= elem_type
;
7674 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
7675 &bump
, &vec_offset
);
7679 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7680 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
7682 aggr_type
= vectype
;
7683 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
7684 memory_access_type
);
7688 LOOP_VINFO_HAS_MASK_STORE (loop_vinfo
) = true;
7690 /* In case the vectorization factor (VF) is bigger than the number
7691 of elements that we can fit in a vectype (nunits), we have to generate
7692 more than one vector stmt - i.e - we need to "unroll" the
7693 vector stmt by a factor VF/nunits. */
7695 /* In case of interleaving (non-unit grouped access):
7702 We create vectorized stores starting from base address (the access of the
7703 first stmt in the chain (S2 in the above example), when the last store stmt
7704 of the chain (S4) is reached:
7707 VS2: &base + vec_size*1 = vx0
7708 VS3: &base + vec_size*2 = vx1
7709 VS4: &base + vec_size*3 = vx3
7711 Then permutation statements are generated:
7713 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
7714 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
7717 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
7718 (the order of the data-refs in the output of vect_permute_store_chain
7719 corresponds to the order of scalar stmts in the interleaving chain - see
7720 the documentation of vect_permute_store_chain()).
7722 In case of both multiple types and interleaving, above vector stores and
7723 permutation stmts are created for every copy. The result vector stmts are
7724 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
7725 STMT_VINFO_RELATED_STMT for the next copies.
7728 auto_vec
<tree
> vec_masks
;
7729 tree vec_mask
= NULL
;
7730 auto_vec
<tree
> vec_offsets
;
7731 auto_vec
<vec
<tree
> > gvec_oprnds
;
7732 gvec_oprnds
.safe_grow_cleared (group_size
);
7733 for (j
= 0; j
< ncopies
; j
++)
7740 /* Get vectorized arguments for SLP_NODE. */
7741 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, 1,
7743 vec_oprnd
= vec_oprnds
[0];
7747 /* For interleaved stores we collect vectorized defs for all the
7748 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
7749 used as an input to vect_permute_store_chain().
7751 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN
7752 and OPRNDS are of size 1. */
7753 stmt_vec_info next_stmt_info
= first_stmt_info
;
7754 for (i
= 0; i
< group_size
; i
++)
7756 /* Since gaps are not supported for interleaved stores,
7757 DR_GROUP_SIZE is the exact number of stmts in the chain.
7758 Therefore, NEXT_STMT_INFO can't be NULL_TREE. In case
7759 that there is no interleaving, DR_GROUP_SIZE is 1,
7760 and only one iteration of the loop will be executed. */
7761 op
= vect_get_store_rhs (next_stmt_info
);
7762 vect_get_vec_defs_for_operand (vinfo
, next_stmt_info
,
7763 ncopies
, op
, &gvec_oprnds
[i
]);
7764 vec_oprnd
= gvec_oprnds
[i
][0];
7765 dr_chain
.quick_push (gvec_oprnds
[i
][0]);
7766 oprnds
.quick_push (gvec_oprnds
[i
][0]);
7767 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
7771 vect_get_vec_defs_for_operand (vinfo
, stmt_info
, ncopies
,
7772 mask
, &vec_masks
, mask_vectype
);
7773 vec_mask
= vec_masks
[0];
7777 /* We should have catched mismatched types earlier. */
7778 gcc_assert (useless_type_conversion_p (vectype
,
7779 TREE_TYPE (vec_oprnd
)));
7780 bool simd_lane_access_p
7781 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
7782 if (simd_lane_access_p
7784 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
7785 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
7786 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
7787 && integer_zerop (DR_INIT (first_dr_info
->dr
))
7788 && alias_sets_conflict_p (get_alias_set (aggr_type
),
7789 get_alias_set (TREE_TYPE (ref_type
))))
7791 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
7792 dataref_offset
= build_int_cst (ref_type
, 0);
7794 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7796 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
7797 &dataref_ptr
, &vec_offsets
, ncopies
);
7798 vec_offset
= vec_offsets
[0];
7802 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
7803 simd_lane_access_p
? loop
: NULL
,
7804 offset
, &dummy
, gsi
, &ptr_incr
,
7805 simd_lane_access_p
, NULL_TREE
, bump
);
7809 /* For interleaved stores we created vectorized defs for all the
7810 defs stored in OPRNDS in the previous iteration (previous copy).
7811 DR_CHAIN is then used as an input to vect_permute_store_chain().
7812 If the store is not grouped, DR_GROUP_SIZE is 1, and DR_CHAIN and
7813 OPRNDS are of size 1. */
7814 for (i
= 0; i
< group_size
; i
++)
7816 vec_oprnd
= gvec_oprnds
[i
][j
];
7817 dr_chain
[i
] = gvec_oprnds
[i
][j
];
7818 oprnds
[i
] = gvec_oprnds
[i
][j
];
7821 vec_mask
= vec_masks
[j
];
7824 = int_const_binop (PLUS_EXPR
, dataref_offset
, bump
);
7825 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
7826 vec_offset
= vec_offsets
[j
];
7828 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
7832 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
7836 /* Get an array into which we can store the individual vectors. */
7837 vec_array
= create_vector_array (vectype
, vec_num
);
7839 /* Invalidate the current contents of VEC_ARRAY. This should
7840 become an RTL clobber too, which prevents the vector registers
7841 from being upward-exposed. */
7842 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7844 /* Store the individual vectors into the array. */
7845 for (i
= 0; i
< vec_num
; i
++)
7847 vec_oprnd
= dr_chain
[i
];
7848 write_vector_array (vinfo
, stmt_info
,
7849 gsi
, vec_oprnd
, vec_array
, i
);
7852 tree final_mask
= NULL
;
7854 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
7857 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7864 MASK_STORE_LANES (DATAREF_PTR, ALIAS_PTR, VEC_MASK,
7866 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
7867 tree alias_ptr
= build_int_cst (ref_type
, align
);
7868 call
= gimple_build_call_internal (IFN_MASK_STORE_LANES
, 4,
7869 dataref_ptr
, alias_ptr
,
7870 final_mask
, vec_array
);
7875 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
7876 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
7877 call
= gimple_build_call_internal (IFN_STORE_LANES
, 1,
7879 gimple_call_set_lhs (call
, data_ref
);
7881 gimple_call_set_nothrow (call
, true);
7882 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7885 /* Record that VEC_ARRAY is now dead. */
7886 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
7894 result_chain
.create (group_size
);
7896 vect_permute_store_chain (vinfo
, dr_chain
, group_size
, stmt_info
,
7897 gsi
, &result_chain
);
7900 stmt_vec_info next_stmt_info
= first_stmt_info
;
7901 for (i
= 0; i
< vec_num
; i
++)
7904 unsigned HOST_WIDE_INT align
;
7906 tree final_mask
= NULL_TREE
;
7908 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
7910 vectype
, vec_num
* j
+ i
);
7912 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
7915 if (memory_access_type
== VMAT_GATHER_SCATTER
)
7917 tree scale
= size_int (gs_info
.scale
);
7920 call
= gimple_build_call_internal
7921 (IFN_MASK_SCATTER_STORE
, 5, dataref_ptr
, vec_offset
,
7922 scale
, vec_oprnd
, final_mask
);
7924 call
= gimple_build_call_internal
7925 (IFN_SCATTER_STORE
, 4, dataref_ptr
, vec_offset
,
7927 gimple_call_set_nothrow (call
, true);
7928 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7934 /* Bump the vector pointer. */
7935 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
7936 gsi
, stmt_info
, bump
);
7939 vec_oprnd
= vec_oprnds
[i
];
7940 else if (grouped_store
)
7941 /* For grouped stores vectorized defs are interleaved in
7942 vect_permute_store_chain(). */
7943 vec_oprnd
= result_chain
[i
];
7945 align
= known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
7946 if (aligned_access_p (first_dr_info
))
7948 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
7950 align
= dr_alignment (vect_dr_behavior (vinfo
, first_dr_info
));
7954 misalign
= DR_MISALIGNMENT (first_dr_info
);
7955 if (dataref_offset
== NULL_TREE
7956 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
7957 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
7960 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
7962 tree perm_mask
= perm_mask_for_reverse (vectype
);
7963 tree perm_dest
= vect_create_destination_var
7964 (vect_get_store_rhs (stmt_info
), vectype
);
7965 tree new_temp
= make_ssa_name (perm_dest
);
7967 /* Generate the permute statement. */
7969 = gimple_build_assign (new_temp
, VEC_PERM_EXPR
, vec_oprnd
,
7970 vec_oprnd
, perm_mask
);
7971 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
7973 perm_stmt
= SSA_NAME_DEF_STMT (new_temp
);
7974 vec_oprnd
= new_temp
;
7977 /* Arguments are ready. Create the new vector stmt. */
7980 align
= least_bit_hwi (misalign
| align
);
7981 tree ptr
= build_int_cst (ref_type
, align
);
7983 = gimple_build_call_internal (IFN_MASK_STORE
, 4,
7985 final_mask
, vec_oprnd
);
7986 gimple_call_set_nothrow (call
, true);
7987 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
7992 data_ref
= fold_build2 (MEM_REF
, vectype
,
7996 : build_int_cst (ref_type
, 0));
7997 if (aligned_access_p (first_dr_info
))
7999 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
8000 TREE_TYPE (data_ref
)
8001 = build_aligned_type (TREE_TYPE (data_ref
),
8002 align
* BITS_PER_UNIT
);
8004 TREE_TYPE (data_ref
)
8005 = build_aligned_type (TREE_TYPE (data_ref
),
8006 TYPE_ALIGN (elem_type
));
8007 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8008 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
8009 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8015 next_stmt_info
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
8016 if (!next_stmt_info
)
8023 *vec_stmt
= new_stmt
;
8024 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8028 for (i
= 0; i
< group_size
; ++i
)
8030 vec
<tree
> oprndsi
= gvec_oprnds
[i
];
8034 result_chain
.release ();
8035 vec_oprnds
.release ();
8040 /* Given a vector type VECTYPE, turns permutation SEL into the equivalent
8041 VECTOR_CST mask. No checks are made that the target platform supports the
8042 mask, so callers may wish to test can_vec_perm_const_p separately, or use
8043 vect_gen_perm_mask_checked. */
8046 vect_gen_perm_mask_any (tree vectype
, const vec_perm_indices
&sel
)
8050 poly_uint64 nunits
= sel
.length ();
8051 gcc_assert (known_eq (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)));
8053 mask_type
= build_vector_type (ssizetype
, nunits
);
8054 return vec_perm_indices_to_tree (mask_type
, sel
);
8057 /* Checked version of vect_gen_perm_mask_any. Asserts can_vec_perm_const_p,
8058 i.e. that the target supports the pattern _for arbitrary input vectors_. */
8061 vect_gen_perm_mask_checked (tree vectype
, const vec_perm_indices
&sel
)
8063 gcc_assert (can_vec_perm_const_p (TYPE_MODE (vectype
), sel
));
8064 return vect_gen_perm_mask_any (vectype
, sel
);
8067 /* Given a vector variable X and Y, that was generated for the scalar
8068 STMT_INFO, generate instructions to permute the vector elements of X and Y
8069 using permutation mask MASK_VEC, insert them at *GSI and return the
8070 permuted vector variable. */
8073 permute_vec_elements (vec_info
*vinfo
,
8074 tree x
, tree y
, tree mask_vec
, stmt_vec_info stmt_info
,
8075 gimple_stmt_iterator
*gsi
)
8077 tree vectype
= TREE_TYPE (x
);
8078 tree perm_dest
, data_ref
;
8081 tree scalar_dest
= gimple_get_lhs (stmt_info
->stmt
);
8082 if (scalar_dest
&& TREE_CODE (scalar_dest
) == SSA_NAME
)
8083 perm_dest
= vect_create_destination_var (scalar_dest
, vectype
);
8085 perm_dest
= vect_get_new_vect_var (vectype
, vect_simple_var
, NULL
);
8086 data_ref
= make_ssa_name (perm_dest
);
8088 /* Generate the permute statement. */
8089 perm_stmt
= gimple_build_assign (data_ref
, VEC_PERM_EXPR
, x
, y
, mask_vec
);
8090 vect_finish_stmt_generation (vinfo
, stmt_info
, perm_stmt
, gsi
);
8095 /* Hoist the definitions of all SSA uses on STMT_INFO out of the loop LOOP,
8096 inserting them on the loops preheader edge. Returns true if we
8097 were successful in doing so (and thus STMT_INFO can be moved then),
8098 otherwise returns false. */
8101 hoist_defs_of_uses (stmt_vec_info stmt_info
, class loop
*loop
)
8107 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8109 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8110 if (!gimple_nop_p (def_stmt
)
8111 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8113 /* Make sure we don't need to recurse. While we could do
8114 so in simple cases when there are more complex use webs
8115 we don't have an easy way to preserve stmt order to fulfil
8116 dependencies within them. */
8119 if (gimple_code (def_stmt
) == GIMPLE_PHI
)
8121 FOR_EACH_SSA_TREE_OPERAND (op2
, def_stmt
, i2
, SSA_OP_USE
)
8123 gimple
*def_stmt2
= SSA_NAME_DEF_STMT (op2
);
8124 if (!gimple_nop_p (def_stmt2
)
8125 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt2
)))
8135 FOR_EACH_SSA_TREE_OPERAND (op
, stmt_info
->stmt
, i
, SSA_OP_USE
)
8137 gimple
*def_stmt
= SSA_NAME_DEF_STMT (op
);
8138 if (!gimple_nop_p (def_stmt
)
8139 && flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
)))
8141 gimple_stmt_iterator gsi
= gsi_for_stmt (def_stmt
);
8142 gsi_remove (&gsi
, false);
8143 gsi_insert_on_edge_immediate (loop_preheader_edge (loop
), def_stmt
);
8150 /* vectorizable_load.
8152 Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
8153 that can be vectorized.
8154 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
8155 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
8156 Return true if STMT_INFO is vectorizable in this way. */
8159 vectorizable_load (vec_info
*vinfo
,
8160 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
8161 gimple
**vec_stmt
, slp_tree slp_node
,
8162 stmt_vector_for_cost
*cost_vec
)
8165 tree vec_dest
= NULL
;
8166 tree data_ref
= NULL
;
8167 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
8168 class loop
*loop
= NULL
;
8169 class loop
*containing_loop
= gimple_bb (stmt_info
->stmt
)->loop_father
;
8170 bool nested_in_vect_loop
= false;
8175 enum dr_alignment_support alignment_support_scheme
;
8176 tree dataref_ptr
= NULL_TREE
;
8177 tree dataref_offset
= NULL_TREE
;
8178 gimple
*ptr_incr
= NULL
;
8181 unsigned int group_size
;
8182 poly_uint64 group_gap_adj
;
8183 tree msq
= NULL_TREE
, lsq
;
8184 tree offset
= NULL_TREE
;
8185 tree byte_offset
= NULL_TREE
;
8186 tree realignment_token
= NULL_TREE
;
8188 vec
<tree
> dr_chain
= vNULL
;
8189 bool grouped_load
= false;
8190 stmt_vec_info first_stmt_info
;
8191 stmt_vec_info first_stmt_info_for_drptr
= NULL
;
8192 bool compute_in_loop
= false;
8193 class loop
*at_loop
;
8195 bool slp
= (slp_node
!= NULL
);
8196 bool slp_perm
= false;
8197 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
8200 gather_scatter_info gs_info
;
8202 enum vect_def_type mask_dt
= vect_unknown_def_type
;
8204 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
8207 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
8211 if (!STMT_VINFO_DATA_REF (stmt_info
))
8214 /* ??? Alignment analysis for SLP looks at SLP_TREE_SCALAR_STMTS[0]
8215 for unpermuted loads but we get passed SLP_TREE_REPRESENTATIVE
8216 which can be different when reduction chains were re-ordered.
8217 Now that we figured we're a dataref reset stmt_info back to
8218 SLP_TREE_SCALAR_STMTS[0]. When we're SLP only things should be
8219 refactored in a way to maintain the dr_vec_info pointer for the
8220 relevant access explicitely. */
8221 stmt_vec_info orig_stmt_info
= stmt_info
;
8223 stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8225 tree mask
= NULL_TREE
, mask_vectype
= NULL_TREE
;
8226 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
8228 scalar_dest
= gimple_assign_lhs (assign
);
8229 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
8232 tree_code code
= gimple_assign_rhs_code (assign
);
8233 if (code
!= ARRAY_REF
8234 && code
!= BIT_FIELD_REF
8235 && code
!= INDIRECT_REF
8236 && code
!= COMPONENT_REF
8237 && code
!= IMAGPART_EXPR
8238 && code
!= REALPART_EXPR
8240 && TREE_CODE_CLASS (code
) != tcc_declaration
)
8245 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
8246 if (!call
|| !gimple_call_internal_p (call
))
8249 internal_fn ifn
= gimple_call_internal_fn (call
);
8250 if (!internal_load_fn_p (ifn
))
8253 scalar_dest
= gimple_call_lhs (call
);
8257 int mask_index
= internal_fn_mask_index (ifn
);
8258 if (mask_index
>= 0)
8260 mask
= gimple_call_arg (call
, mask_index
);
8261 if (!vect_check_scalar_mask (vinfo
, stmt_info
, mask
, &mask_dt
,
8267 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
8268 poly_uint64 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
8272 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
8273 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt_info
);
8274 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
8279 /* Multiple types in SLP are handled by creating the appropriate number of
8280 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
8285 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
8287 gcc_assert (ncopies
>= 1);
8289 /* FORNOW. This restriction should be relaxed. */
8290 if (nested_in_vect_loop
&& ncopies
> 1)
8292 if (dump_enabled_p ())
8293 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8294 "multiple types in nested loop.\n");
8298 /* Invalidate assumptions made by dependence analysis when vectorization
8299 on the unrolled body effectively re-orders stmts. */
8301 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8302 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8303 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8305 if (dump_enabled_p ())
8306 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8307 "cannot perform implicit CSE when unrolling "
8308 "with negative dependence distance\n");
8312 elem_type
= TREE_TYPE (vectype
);
8313 mode
= TYPE_MODE (vectype
);
8315 /* FORNOW. In some cases can vectorize even if data-type not supported
8316 (e.g. - data copies). */
8317 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
8319 if (dump_enabled_p ())
8320 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8321 "Aligned load, but unsupported type.\n");
8325 /* Check if the load is a part of an interleaving chain. */
8326 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
8328 grouped_load
= true;
8330 gcc_assert (!nested_in_vect_loop
);
8331 gcc_assert (!STMT_VINFO_GATHER_SCATTER_P (stmt_info
));
8333 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8334 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8336 /* Refuse non-SLP vectorization of SLP-only groups. */
8337 if (!slp
&& STMT_VINFO_SLP_VECT_ONLY (first_stmt_info
))
8339 if (dump_enabled_p ())
8340 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8341 "cannot vectorize load in non-SLP mode.\n");
8345 if (slp
&& SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8351 /* In BB vectorization we may not actually use a loaded vector
8352 accessing elements in excess of DR_GROUP_SIZE. */
8353 stmt_vec_info group_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8354 group_info
= DR_GROUP_FIRST_ELEMENT (group_info
);
8355 unsigned HOST_WIDE_INT nunits
;
8356 unsigned j
, k
, maxk
= 0;
8357 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node
), j
, k
)
8360 tree vectype
= STMT_VINFO_VECTYPE (group_info
);
8361 if (!TYPE_VECTOR_SUBPARTS (vectype
).is_constant (&nunits
)
8362 || maxk
>= (DR_GROUP_SIZE (group_info
) & ~(nunits
- 1)))
8364 if (dump_enabled_p ())
8365 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8366 "BB vectorization with gaps at the end of "
8367 "a load is not supported\n");
8374 if (!vect_transform_slp_perm_load (vinfo
, slp_node
, tem
, NULL
, vf
,
8377 if (dump_enabled_p ())
8378 dump_printf_loc (MSG_MISSED_OPTIMIZATION
,
8380 "unsupported load permutation\n");
8385 /* Invalidate assumptions made by dependence analysis when vectorization
8386 on the unrolled body effectively re-orders stmts. */
8387 if (!PURE_SLP_STMT (stmt_info
)
8388 && STMT_VINFO_MIN_NEG_DIST (stmt_info
) != 0
8389 && maybe_gt (LOOP_VINFO_VECT_FACTOR (loop_vinfo
),
8390 STMT_VINFO_MIN_NEG_DIST (stmt_info
)))
8392 if (dump_enabled_p ())
8393 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8394 "cannot perform implicit CSE when performing "
8395 "group loads with negative dependence distance\n");
8402 vect_memory_access_type memory_access_type
;
8403 if (!get_load_store_type (vinfo
, stmt_info
, vectype
, slp
, mask
, VLS_LOAD
,
8404 ncopies
, &memory_access_type
, &gs_info
))
8409 if (memory_access_type
== VMAT_CONTIGUOUS
)
8411 machine_mode vec_mode
= TYPE_MODE (vectype
);
8412 if (!VECTOR_MODE_P (vec_mode
)
8413 || !can_vec_mask_load_store_p (vec_mode
,
8414 TYPE_MODE (mask_vectype
), true))
8417 else if (memory_access_type
!= VMAT_LOAD_STORE_LANES
8418 && memory_access_type
!= VMAT_GATHER_SCATTER
)
8420 if (dump_enabled_p ())
8421 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
8422 "unsupported access type for masked load.\n");
8427 if (!vec_stmt
) /* transformation not required. */
8430 STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
) = memory_access_type
;
8433 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
))
8434 check_load_store_masking (loop_vinfo
, vectype
, VLS_LOAD
, group_size
,
8435 memory_access_type
, &gs_info
, mask
);
8437 STMT_VINFO_TYPE (orig_stmt_info
) = load_vec_info_type
;
8438 vect_model_load_cost (vinfo
, stmt_info
, ncopies
, vf
, memory_access_type
,
8439 slp_node
, cost_vec
);
8444 gcc_assert (memory_access_type
8445 == STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info
));
8447 if (dump_enabled_p ())
8448 dump_printf_loc (MSG_NOTE
, vect_location
,
8449 "transform load. ncopies = %d\n", ncopies
);
8453 dr_vec_info
*dr_info
= STMT_VINFO_DR_INFO (stmt_info
), *first_dr_info
= NULL
;
8454 ensure_base_align (dr_info
);
8456 if (memory_access_type
== VMAT_GATHER_SCATTER
&& gs_info
.decl
)
8458 vect_build_gather_load_calls (vinfo
,
8459 stmt_info
, gsi
, vec_stmt
, &gs_info
, mask
);
8463 if (memory_access_type
== VMAT_INVARIANT
)
8465 gcc_assert (!grouped_load
&& !mask
&& !bb_vinfo
);
8466 /* If we have versioned for aliasing or the loop doesn't
8467 have any data dependencies that would preclude this,
8468 then we are sure this is a loop invariant load and
8469 thus we can insert it on the preheader edge. */
8470 bool hoist_p
= (LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
)
8471 && !nested_in_vect_loop
8472 && hoist_defs_of_uses (stmt_info
, loop
));
8475 gassign
*stmt
= as_a
<gassign
*> (stmt_info
->stmt
);
8476 if (dump_enabled_p ())
8477 dump_printf_loc (MSG_NOTE
, vect_location
,
8478 "hoisting out of the vectorized loop: %G", stmt
);
8479 scalar_dest
= copy_ssa_name (scalar_dest
);
8480 tree rhs
= unshare_expr (gimple_assign_rhs1 (stmt
));
8481 gsi_insert_on_edge_immediate
8482 (loop_preheader_edge (loop
),
8483 gimple_build_assign (scalar_dest
, rhs
));
8485 /* These copies are all equivalent, but currently the representation
8486 requires a separate STMT_VINFO_VEC_STMT for each one. */
8487 gimple_stmt_iterator gsi2
= *gsi
;
8489 for (j
= 0; j
< ncopies
; j
++)
8492 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8495 new_temp
= vect_init_vector (vinfo
, stmt_info
, scalar_dest
,
8497 gimple
*new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8499 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8503 *vec_stmt
= new_stmt
;
8504 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8510 if (memory_access_type
== VMAT_ELEMENTWISE
8511 || memory_access_type
== VMAT_STRIDED_SLP
)
8513 gimple_stmt_iterator incr_gsi
;
8518 vec
<constructor_elt
, va_gc
> *v
= NULL
;
8519 tree stride_base
, stride_step
, alias_off
;
8520 /* Checked by get_load_store_type. */
8521 unsigned int const_nunits
= nunits
.to_constant ();
8522 unsigned HOST_WIDE_INT cst_offset
= 0;
8525 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
));
8526 gcc_assert (!nested_in_vect_loop
);
8530 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8531 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8535 first_stmt_info
= stmt_info
;
8536 first_dr_info
= dr_info
;
8538 if (slp
&& grouped_load
)
8540 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8541 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8547 = (tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)))
8548 * vect_get_place_in_interleaving_chain (stmt_info
,
8551 ref_type
= reference_alias_ptr_type (DR_REF (dr_info
->dr
));
8554 dr_offset
= get_dr_vinfo_offset (vinfo
, first_dr_info
);
8556 = fold_build_pointer_plus
8557 (DR_BASE_ADDRESS (first_dr_info
->dr
),
8558 size_binop (PLUS_EXPR
,
8559 convert_to_ptrofftype (dr_offset
),
8560 convert_to_ptrofftype (DR_INIT (first_dr_info
->dr
))));
8561 stride_step
= fold_convert (sizetype
, DR_STEP (first_dr_info
->dr
));
8563 /* For a load with loop-invariant (but other than power-of-2)
8564 stride (i.e. not a grouped access) like so:
8566 for (i = 0; i < n; i += stride)
8569 we generate a new induction variable and new accesses to
8570 form a new vector (or vectors, depending on ncopies):
8572 for (j = 0; ; j += VF*stride)
8574 tmp2 = array[j + stride];
8576 vectemp = {tmp1, tmp2, ...}
8579 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (stride_step
), stride_step
,
8580 build_int_cst (TREE_TYPE (stride_step
), vf
));
8582 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
8584 stride_base
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_base
);
8585 ivstep
= cse_and_gimplify_to_preheader (loop_vinfo
, ivstep
);
8586 create_iv (stride_base
, ivstep
, NULL
,
8587 loop
, &incr_gsi
, insert_after
,
8590 stride_step
= cse_and_gimplify_to_preheader (loop_vinfo
, stride_step
);
8592 running_off
= offvar
;
8593 alias_off
= build_int_cst (ref_type
, 0);
8594 int nloads
= const_nunits
;
8596 tree ltype
= TREE_TYPE (vectype
);
8597 tree lvectype
= vectype
;
8598 auto_vec
<tree
> dr_chain
;
8599 if (memory_access_type
== VMAT_STRIDED_SLP
)
8601 if (group_size
< const_nunits
)
8603 /* First check if vec_init optab supports construction from vector
8604 elts directly. Otherwise avoid emitting a constructor of
8605 vector elements by performing the loads using an integer type
8606 of the same size, constructing a vector of those and then
8607 re-interpreting it as the original vector type. This avoids a
8608 huge runtime penalty due to the general inability to perform
8609 store forwarding from smaller stores to a larger load. */
8612 = vector_vector_composition_type (vectype
,
8613 const_nunits
/ group_size
,
8615 if (vtype
!= NULL_TREE
)
8617 nloads
= const_nunits
/ group_size
;
8626 lnel
= const_nunits
;
8629 ltype
= build_aligned_type (ltype
, TYPE_ALIGN (TREE_TYPE (vectype
)));
8631 /* Load vector(1) scalar_type if it's 1 element-wise vectype. */
8632 else if (nloads
== 1)
8637 /* For SLP permutation support we need to load the whole group,
8638 not only the number of vector stmts the permutation result
8642 /* We don't yet generate SLP_TREE_LOAD_PERMUTATIONs for
8644 unsigned int const_vf
= vf
.to_constant ();
8645 ncopies
= CEIL (group_size
* const_vf
, const_nunits
);
8646 dr_chain
.create (ncopies
);
8649 ncopies
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8651 unsigned int group_el
= 0;
8652 unsigned HOST_WIDE_INT
8653 elsz
= tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
8654 for (j
= 0; j
< ncopies
; j
++)
8657 vec_alloc (v
, nloads
);
8658 gimple
*new_stmt
= NULL
;
8659 for (i
= 0; i
< nloads
; i
++)
8661 tree this_off
= build_int_cst (TREE_TYPE (alias_off
),
8662 group_el
* elsz
+ cst_offset
);
8663 tree data_ref
= build2 (MEM_REF
, ltype
, running_off
, this_off
);
8664 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
8665 new_stmt
= gimple_build_assign (make_ssa_name (ltype
), data_ref
);
8666 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8668 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
8669 gimple_assign_lhs (new_stmt
));
8673 || group_el
== group_size
)
8675 tree newoff
= copy_ssa_name (running_off
);
8676 gimple
*incr
= gimple_build_assign (newoff
, POINTER_PLUS_EXPR
,
8677 running_off
, stride_step
);
8678 vect_finish_stmt_generation (vinfo
, stmt_info
, incr
, gsi
);
8680 running_off
= newoff
;
8686 tree vec_inv
= build_constructor (lvectype
, v
);
8687 new_temp
= vect_init_vector (vinfo
, stmt_info
,
8688 vec_inv
, lvectype
, gsi
);
8689 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
8690 if (lvectype
!= vectype
)
8692 new_stmt
= gimple_build_assign (make_ssa_name (vectype
),
8694 build1 (VIEW_CONVERT_EXPR
,
8695 vectype
, new_temp
));
8696 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
8703 dr_chain
.quick_push (gimple_assign_lhs (new_stmt
));
8705 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
8710 *vec_stmt
= new_stmt
;
8711 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
8717 vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
, gsi
, vf
,
8723 if (memory_access_type
== VMAT_GATHER_SCATTER
8724 || (!slp
&& memory_access_type
== VMAT_CONTIGUOUS
))
8725 grouped_load
= false;
8729 first_stmt_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
8730 group_size
= DR_GROUP_SIZE (first_stmt_info
);
8731 /* For SLP vectorization we directly vectorize a subchain
8732 without permutation. */
8733 if (slp
&& ! SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
8734 first_stmt_info
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8735 /* For BB vectorization always use the first stmt to base
8736 the data ref pointer on. */
8738 first_stmt_info_for_drptr
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
8740 /* Check if the chain of loads is already vectorized. */
8741 if (STMT_VINFO_VEC_STMTS (first_stmt_info
).exists ()
8742 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
8743 ??? But we can only do so if there is exactly one
8744 as we have no way to get at the rest. Leave the CSE
8746 ??? With the group load eventually participating
8747 in multiple different permutations (having multiple
8748 slp nodes which refer to the same group) the CSE
8749 is even wrong code. See PR56270. */
8752 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
8755 first_dr_info
= STMT_VINFO_DR_INFO (first_stmt_info
);
8758 /* VEC_NUM is the number of vect stmts to be created for this group. */
8761 grouped_load
= false;
8762 /* If an SLP permutation is from N elements to N elements,
8763 and if one vector holds a whole number of N, we can load
8764 the inputs to the permutation in the same way as an
8765 unpermuted sequence. In other cases we need to load the
8766 whole group, not only the number of vector stmts the
8767 permutation result fits in. */
8768 unsigned scalar_lanes
= SLP_TREE_LANES (slp_node
);
8770 && (group_size
!= scalar_lanes
8771 || !multiple_p (nunits
, group_size
)))
8773 /* We don't yet generate such SLP_TREE_LOAD_PERMUTATIONs for
8774 variable VF; see vect_transform_slp_perm_load. */
8775 unsigned int const_vf
= vf
.to_constant ();
8776 unsigned int const_nunits
= nunits
.to_constant ();
8777 vec_num
= CEIL (group_size
* const_vf
, const_nunits
);
8778 group_gap_adj
= vf
* group_size
- nunits
* vec_num
;
8782 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
8784 = group_size
- scalar_lanes
;
8788 vec_num
= group_size
;
8790 ref_type
= get_group_alias_ptr_type (first_stmt_info
);
8794 first_stmt_info
= stmt_info
;
8795 first_dr_info
= dr_info
;
8796 group_size
= vec_num
= 1;
8798 ref_type
= reference_alias_ptr_type (DR_REF (first_dr_info
->dr
));
8801 /* Gather-scatter accesses perform only component accesses, alignment
8802 is irrelevant for them. */
8803 if (memory_access_type
== VMAT_GATHER_SCATTER
)
8804 alignment_support_scheme
= dr_unaligned_supported
;
8806 alignment_support_scheme
8807 = vect_supportable_dr_alignment (vinfo
, first_dr_info
, false);
8809 gcc_assert (alignment_support_scheme
);
8810 vec_loop_masks
*loop_masks
8811 = (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
)
8812 ? &LOOP_VINFO_MASKS (loop_vinfo
)
8814 /* Targets with store-lane instructions must not require explicit
8815 realignment. vect_supportable_dr_alignment always returns either
8816 dr_aligned or dr_unaligned_supported for masked operations. */
8817 gcc_assert ((memory_access_type
!= VMAT_LOAD_STORE_LANES
8820 || alignment_support_scheme
== dr_aligned
8821 || alignment_support_scheme
== dr_unaligned_supported
);
8823 /* In case the vectorization factor (VF) is bigger than the number
8824 of elements that we can fit in a vectype (nunits), we have to generate
8825 more than one vector stmt - i.e - we need to "unroll" the
8826 vector stmt by a factor VF/nunits. In doing so, we record a pointer
8827 from one copy of the vector stmt to the next, in the field
8828 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
8829 stages to find the correct vector defs to be used when vectorizing
8830 stmts that use the defs of the current stmt. The example below
8831 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
8832 need to create 4 vectorized stmts):
8834 before vectorization:
8835 RELATED_STMT VEC_STMT
8839 step 1: vectorize stmt S1:
8840 We first create the vector stmt VS1_0, and, as usual, record a
8841 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
8842 Next, we create the vector stmt VS1_1, and record a pointer to
8843 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
8844 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
8846 RELATED_STMT VEC_STMT
8847 VS1_0: vx0 = memref0 VS1_1 -
8848 VS1_1: vx1 = memref1 VS1_2 -
8849 VS1_2: vx2 = memref2 VS1_3 -
8850 VS1_3: vx3 = memref3 - -
8851 S1: x = load - VS1_0
8855 /* In case of interleaving (non-unit grouped access):
8862 Vectorized loads are created in the order of memory accesses
8863 starting from the access of the first stmt of the chain:
8866 VS2: vx1 = &base + vec_size*1
8867 VS3: vx3 = &base + vec_size*2
8868 VS4: vx4 = &base + vec_size*3
8870 Then permutation statements are generated:
8872 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
8873 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
8876 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
8877 (the order of the data-refs in the output of vect_permute_load_chain
8878 corresponds to the order of scalar stmts in the interleaving chain - see
8879 the documentation of vect_permute_load_chain()).
8880 The generation of permutation stmts and recording them in
8881 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
8883 In case of both multiple types and interleaving, the vector loads and
8884 permutation stmts above are created for every copy. The result vector
8885 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
8886 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
8888 /* If the data reference is aligned (dr_aligned) or potentially unaligned
8889 on a target that supports unaligned accesses (dr_unaligned_supported)
8890 we generate the following code:
8894 p = p + indx * vectype_size;
8899 Otherwise, the data reference is potentially unaligned on a target that
8900 does not support unaligned accesses (dr_explicit_realign_optimized) -
8901 then generate the following code, in which the data in each iteration is
8902 obtained by two vector loads, one from the previous iteration, and one
8903 from the current iteration:
8905 msq_init = *(floor(p1))
8906 p2 = initial_addr + VS - 1;
8907 realignment_token = call target_builtin;
8910 p2 = p2 + indx * vectype_size
8912 vec_dest = realign_load (msq, lsq, realignment_token)
8917 /* If the misalignment remains the same throughout the execution of the
8918 loop, we can create the init_addr and permutation mask at the loop
8919 preheader. Otherwise, it needs to be created inside the loop.
8920 This can only occur when vectorizing memory accesses in the inner-loop
8921 nested within an outer-loop that is being vectorized. */
8923 if (nested_in_vect_loop
8924 && !multiple_p (DR_STEP_ALIGNMENT (dr_info
->dr
),
8925 GET_MODE_SIZE (TYPE_MODE (vectype
))))
8927 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
8928 compute_in_loop
= true;
8931 bool diff_first_stmt_info
8932 = first_stmt_info_for_drptr
&& first_stmt_info
!= first_stmt_info_for_drptr
;
8934 if ((alignment_support_scheme
== dr_explicit_realign_optimized
8935 || alignment_support_scheme
== dr_explicit_realign
)
8936 && !compute_in_loop
)
8938 /* If we have different first_stmt_info, we can't set up realignment
8939 here, since we can't guarantee first_stmt_info DR has been
8940 initialized yet, use first_stmt_info_for_drptr DR by bumping the
8941 distance from first_stmt_info DR instead as below. */
8942 if (!diff_first_stmt_info
)
8943 msq
= vect_setup_realignment (vinfo
,
8944 first_stmt_info
, gsi
, &realignment_token
,
8945 alignment_support_scheme
, NULL_TREE
,
8947 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
8949 phi
= as_a
<gphi
*> (SSA_NAME_DEF_STMT (msq
));
8950 byte_offset
= size_binop (MINUS_EXPR
, TYPE_SIZE_UNIT (vectype
),
8952 gcc_assert (!first_stmt_info_for_drptr
);
8958 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
8959 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
8962 tree vec_offset
= NULL_TREE
;
8963 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
8965 aggr_type
= NULL_TREE
;
8968 else if (memory_access_type
== VMAT_GATHER_SCATTER
)
8970 aggr_type
= elem_type
;
8971 vect_get_strided_load_store_ops (stmt_info
, loop_vinfo
, &gs_info
,
8972 &bump
, &vec_offset
);
8976 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
8977 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
8979 aggr_type
= vectype
;
8980 bump
= vect_get_data_ptr_increment (vinfo
, dr_info
, aggr_type
,
8981 memory_access_type
);
8984 vec
<tree
> vec_offsets
= vNULL
;
8985 auto_vec
<tree
> vec_masks
;
8987 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
8988 mask
, &vec_masks
, mask_vectype
, NULL_TREE
);
8989 tree vec_mask
= NULL_TREE
;
8990 poly_uint64 group_elt
= 0;
8991 for (j
= 0; j
< ncopies
; j
++)
8993 /* 1. Create the vector or array pointer update chain. */
8996 bool simd_lane_access_p
8997 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) != 0;
8998 if (simd_lane_access_p
8999 && TREE_CODE (DR_BASE_ADDRESS (first_dr_info
->dr
)) == ADDR_EXPR
9000 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr_info
->dr
), 0))
9001 && integer_zerop (get_dr_vinfo_offset (vinfo
, first_dr_info
))
9002 && integer_zerop (DR_INIT (first_dr_info
->dr
))
9003 && alias_sets_conflict_p (get_alias_set (aggr_type
),
9004 get_alias_set (TREE_TYPE (ref_type
)))
9005 && (alignment_support_scheme
== dr_aligned
9006 || alignment_support_scheme
== dr_unaligned_supported
))
9008 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr_info
->dr
));
9009 dataref_offset
= build_int_cst (ref_type
, 0);
9011 else if (diff_first_stmt_info
)
9014 = vect_create_data_ref_ptr (vinfo
, first_stmt_info_for_drptr
,
9015 aggr_type
, at_loop
, offset
, &dummy
,
9016 gsi
, &ptr_incr
, simd_lane_access_p
,
9018 /* Adjust the pointer by the difference to first_stmt. */
9019 data_reference_p ptrdr
9020 = STMT_VINFO_DATA_REF (first_stmt_info_for_drptr
);
9022 = fold_convert (sizetype
,
9023 size_binop (MINUS_EXPR
,
9024 DR_INIT (first_dr_info
->dr
),
9026 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9028 if (alignment_support_scheme
== dr_explicit_realign
)
9030 msq
= vect_setup_realignment (vinfo
,
9031 first_stmt_info_for_drptr
, gsi
,
9033 alignment_support_scheme
,
9034 dataref_ptr
, &at_loop
);
9035 gcc_assert (!compute_in_loop
);
9038 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9040 vect_get_gather_scatter_ops (vinfo
, loop
, stmt_info
, &gs_info
,
9041 &dataref_ptr
, &vec_offsets
, ncopies
);
9042 vec_offset
= vec_offsets
[0];
9046 = vect_create_data_ref_ptr (vinfo
, first_stmt_info
, aggr_type
,
9048 offset
, &dummy
, gsi
, &ptr_incr
,
9052 vec_mask
= vec_masks
[0];
9057 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
9059 else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
9060 vec_offset
= vec_offsets
[j
];
9062 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9065 vec_mask
= vec_masks
[j
];
9068 if (grouped_load
|| slp_perm
)
9069 dr_chain
.create (vec_num
);
9071 gimple
*new_stmt
= NULL
;
9072 if (memory_access_type
== VMAT_LOAD_STORE_LANES
)
9076 vec_array
= create_vector_array (vectype
, vec_num
);
9078 tree final_mask
= NULL_TREE
;
9080 final_mask
= vect_get_loop_mask (gsi
, loop_masks
, ncopies
,
9083 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9090 VEC_ARRAY = MASK_LOAD_LANES (DATAREF_PTR, ALIAS_PTR,
9092 unsigned int align
= TYPE_ALIGN_UNIT (TREE_TYPE (vectype
));
9093 tree alias_ptr
= build_int_cst (ref_type
, align
);
9094 call
= gimple_build_call_internal (IFN_MASK_LOAD_LANES
, 3,
9095 dataref_ptr
, alias_ptr
,
9101 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
9102 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, ref_type
);
9103 call
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
9105 gimple_call_set_lhs (call
, vec_array
);
9106 gimple_call_set_nothrow (call
, true);
9107 vect_finish_stmt_generation (vinfo
, stmt_info
, call
, gsi
);
9110 /* Extract each vector into an SSA_NAME. */
9111 for (i
= 0; i
< vec_num
; i
++)
9113 new_temp
= read_vector_array (vinfo
, stmt_info
, gsi
, scalar_dest
,
9115 dr_chain
.quick_push (new_temp
);
9118 /* Record the mapping between SSA_NAMEs and statements. */
9119 vect_record_grouped_load_vectors (vinfo
, stmt_info
, dr_chain
);
9121 /* Record that VEC_ARRAY is now dead. */
9122 vect_clobber_variable (vinfo
, stmt_info
, gsi
, vec_array
);
9126 for (i
= 0; i
< vec_num
; i
++)
9128 tree final_mask
= NULL_TREE
;
9130 && memory_access_type
!= VMAT_INVARIANT
)
9131 final_mask
= vect_get_loop_mask (gsi
, loop_masks
,
9133 vectype
, vec_num
* j
+ i
);
9135 final_mask
= prepare_load_store_mask (mask_vectype
, final_mask
,
9139 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9140 gsi
, stmt_info
, bump
);
9142 /* 2. Create the vector-load in the loop. */
9143 switch (alignment_support_scheme
)
9146 case dr_unaligned_supported
:
9148 unsigned int misalign
;
9149 unsigned HOST_WIDE_INT align
;
9151 if (memory_access_type
== VMAT_GATHER_SCATTER
)
9153 tree zero
= build_zero_cst (vectype
);
9154 tree scale
= size_int (gs_info
.scale
);
9157 call
= gimple_build_call_internal
9158 (IFN_MASK_GATHER_LOAD
, 5, dataref_ptr
,
9159 vec_offset
, scale
, zero
, final_mask
);
9161 call
= gimple_build_call_internal
9162 (IFN_GATHER_LOAD
, 4, dataref_ptr
,
9163 vec_offset
, scale
, zero
);
9164 gimple_call_set_nothrow (call
, true);
9166 data_ref
= NULL_TREE
;
9171 known_alignment (DR_TARGET_ALIGNMENT (first_dr_info
));
9172 if (alignment_support_scheme
== dr_aligned
)
9174 gcc_assert (aligned_access_p (first_dr_info
));
9177 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9179 align
= dr_alignment
9180 (vect_dr_behavior (vinfo
, first_dr_info
));
9184 misalign
= DR_MISALIGNMENT (first_dr_info
);
9185 if (dataref_offset
== NULL_TREE
9186 && TREE_CODE (dataref_ptr
) == SSA_NAME
)
9187 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
9192 align
= least_bit_hwi (misalign
| align
);
9193 tree ptr
= build_int_cst (ref_type
, align
);
9195 = gimple_build_call_internal (IFN_MASK_LOAD
, 3,
9198 gimple_call_set_nothrow (call
, true);
9200 data_ref
= NULL_TREE
;
9204 tree ltype
= vectype
;
9205 tree new_vtype
= NULL_TREE
;
9206 unsigned HOST_WIDE_INT gap
9207 = DR_GROUP_GAP (first_stmt_info
);
9208 unsigned int vect_align
9209 = vect_known_alignment_in_bytes (first_dr_info
);
9210 unsigned int scalar_dr_size
9211 = vect_get_scalar_dr_size (first_dr_info
);
9212 /* If there's no peeling for gaps but we have a gap
9213 with slp loads then load the lower half of the
9214 vector only. See get_group_load_store_type for
9215 when we apply this optimization. */
9218 && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
)
9220 && known_eq (nunits
, (group_size
- gap
) * 2)
9221 && known_eq (nunits
, group_size
)
9222 && gap
>= (vect_align
/ scalar_dr_size
))
9226 = vector_vector_composition_type (vectype
, 2,
9228 if (new_vtype
!= NULL_TREE
)
9232 = (dataref_offset
? dataref_offset
9233 : build_int_cst (ref_type
, 0));
9234 if (ltype
!= vectype
9235 && memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9237 unsigned HOST_WIDE_INT gap_offset
9238 = gap
* tree_to_uhwi (TYPE_SIZE_UNIT (elem_type
));
9239 tree gapcst
= build_int_cst (ref_type
, gap_offset
);
9240 offset
= size_binop (PLUS_EXPR
, offset
, gapcst
);
9243 = fold_build2 (MEM_REF
, ltype
, dataref_ptr
, offset
);
9244 if (alignment_support_scheme
== dr_aligned
)
9246 else if (DR_MISALIGNMENT (first_dr_info
) == -1)
9247 TREE_TYPE (data_ref
)
9248 = build_aligned_type (TREE_TYPE (data_ref
),
9249 align
* BITS_PER_UNIT
);
9251 TREE_TYPE (data_ref
)
9252 = build_aligned_type (TREE_TYPE (data_ref
),
9253 TYPE_ALIGN (elem_type
));
9254 if (ltype
!= vectype
)
9256 vect_copy_ref_info (data_ref
,
9257 DR_REF (first_dr_info
->dr
));
9258 tree tem
= make_ssa_name (ltype
);
9259 new_stmt
= gimple_build_assign (tem
, data_ref
);
9260 vect_finish_stmt_generation (vinfo
, stmt_info
,
9263 vec
<constructor_elt
, va_gc
> *v
;
9265 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9267 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9268 build_zero_cst (ltype
));
9269 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9273 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, tem
);
9274 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
,
9275 build_zero_cst (ltype
));
9277 gcc_assert (new_vtype
!= NULL_TREE
);
9278 if (new_vtype
== vectype
)
9279 new_stmt
= gimple_build_assign (
9280 vec_dest
, build_constructor (vectype
, v
));
9283 tree new_vname
= make_ssa_name (new_vtype
);
9284 new_stmt
= gimple_build_assign (
9285 new_vname
, build_constructor (new_vtype
, v
));
9286 vect_finish_stmt_generation (vinfo
, stmt_info
,
9288 new_stmt
= gimple_build_assign (
9289 vec_dest
, build1 (VIEW_CONVERT_EXPR
, vectype
,
9296 case dr_explicit_realign
:
9300 tree vs
= size_int (TYPE_VECTOR_SUBPARTS (vectype
));
9302 if (compute_in_loop
)
9303 msq
= vect_setup_realignment (vinfo
, first_stmt_info
, gsi
,
9305 dr_explicit_realign
,
9308 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9309 ptr
= copy_ssa_name (dataref_ptr
);
9311 ptr
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9312 // For explicit realign the target alignment should be
9313 // known at compile time.
9314 unsigned HOST_WIDE_INT align
=
9315 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9316 new_stmt
= gimple_build_assign
9317 (ptr
, BIT_AND_EXPR
, dataref_ptr
,
9319 (TREE_TYPE (dataref_ptr
),
9320 -(HOST_WIDE_INT
) align
));
9321 vect_finish_stmt_generation (vinfo
, stmt_info
,
9324 = build2 (MEM_REF
, vectype
, ptr
,
9325 build_int_cst (ref_type
, 0));
9326 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9327 vec_dest
= vect_create_destination_var (scalar_dest
,
9329 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9330 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9331 gimple_assign_set_lhs (new_stmt
, new_temp
);
9332 gimple_move_vops (new_stmt
, stmt_info
->stmt
);
9333 vect_finish_stmt_generation (vinfo
, stmt_info
,
9337 bump
= size_binop (MULT_EXPR
, vs
,
9338 TYPE_SIZE_UNIT (elem_type
));
9339 bump
= size_binop (MINUS_EXPR
, bump
, size_one_node
);
9340 ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, NULL
, gsi
,
9342 new_stmt
= gimple_build_assign
9343 (NULL_TREE
, BIT_AND_EXPR
, ptr
,
9345 (TREE_TYPE (ptr
), -(HOST_WIDE_INT
) align
));
9346 ptr
= copy_ssa_name (ptr
, new_stmt
);
9347 gimple_assign_set_lhs (new_stmt
, ptr
);
9348 vect_finish_stmt_generation (vinfo
, stmt_info
,
9351 = build2 (MEM_REF
, vectype
, ptr
,
9352 build_int_cst (ref_type
, 0));
9355 case dr_explicit_realign_optimized
:
9357 if (TREE_CODE (dataref_ptr
) == SSA_NAME
)
9358 new_temp
= copy_ssa_name (dataref_ptr
);
9360 new_temp
= make_ssa_name (TREE_TYPE (dataref_ptr
));
9361 // We should only be doing this if we know the target
9362 // alignment at compile time.
9363 unsigned HOST_WIDE_INT align
=
9364 DR_TARGET_ALIGNMENT (first_dr_info
).to_constant ();
9365 new_stmt
= gimple_build_assign
9366 (new_temp
, BIT_AND_EXPR
, dataref_ptr
,
9367 build_int_cst (TREE_TYPE (dataref_ptr
),
9368 -(HOST_WIDE_INT
) align
));
9369 vect_finish_stmt_generation (vinfo
, stmt_info
,
9372 = build2 (MEM_REF
, vectype
, new_temp
,
9373 build_int_cst (ref_type
, 0));
9379 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9380 /* DATA_REF is null if we've already built the statement. */
9383 vect_copy_ref_info (data_ref
, DR_REF (first_dr_info
->dr
));
9384 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
9386 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9387 gimple_set_lhs (new_stmt
, new_temp
);
9388 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9390 /* 3. Handle explicit realignment if necessary/supported.
9392 vec_dest = realign_load (msq, lsq, realignment_token) */
9393 if (alignment_support_scheme
== dr_explicit_realign_optimized
9394 || alignment_support_scheme
== dr_explicit_realign
)
9396 lsq
= gimple_assign_lhs (new_stmt
);
9397 if (!realignment_token
)
9398 realignment_token
= dataref_ptr
;
9399 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9400 new_stmt
= gimple_build_assign (vec_dest
, REALIGN_LOAD_EXPR
,
9401 msq
, lsq
, realignment_token
);
9402 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
9403 gimple_assign_set_lhs (new_stmt
, new_temp
);
9404 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9406 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
9409 if (i
== vec_num
- 1 && j
== ncopies
- 1)
9410 add_phi_arg (phi
, lsq
,
9411 loop_latch_edge (containing_loop
),
9417 if (memory_access_type
== VMAT_CONTIGUOUS_REVERSE
)
9419 tree perm_mask
= perm_mask_for_reverse (vectype
);
9420 new_temp
= permute_vec_elements (vinfo
, new_temp
, new_temp
,
9421 perm_mask
, stmt_info
, gsi
);
9422 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
9425 /* Collect vector loads and later create their permutation in
9426 vect_transform_grouped_load (). */
9427 if (grouped_load
|| slp_perm
)
9428 dr_chain
.quick_push (new_temp
);
9430 /* Store vector loads in the corresponding SLP_NODE. */
9431 if (slp
&& !slp_perm
)
9432 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
9434 /* With SLP permutation we load the gaps as well, without
9435 we need to skip the gaps after we manage to fully load
9436 all elements. group_gap_adj is DR_GROUP_SIZE here. */
9437 group_elt
+= nunits
;
9438 if (maybe_ne (group_gap_adj
, 0U)
9440 && known_eq (group_elt
, group_size
- group_gap_adj
))
9442 poly_wide_int bump_val
9443 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9445 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9446 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
,
9447 gsi
, stmt_info
, bump
);
9451 /* Bump the vector pointer to account for a gap or for excess
9452 elements loaded for a permuted SLP load. */
9453 if (maybe_ne (group_gap_adj
, 0U) && slp_perm
)
9455 poly_wide_int bump_val
9456 = (wi::to_wide (TYPE_SIZE_UNIT (elem_type
))
9458 tree bump
= wide_int_to_tree (sizetype
, bump_val
);
9459 dataref_ptr
= bump_vector_ptr (vinfo
, dataref_ptr
, ptr_incr
, gsi
,
9464 if (slp
&& !slp_perm
)
9470 bool ok
= vect_transform_slp_perm_load (vinfo
, slp_node
, dr_chain
,
9471 gsi
, vf
, false, &n_perms
);
9478 if (memory_access_type
!= VMAT_LOAD_STORE_LANES
)
9479 vect_transform_grouped_load (vinfo
, stmt_info
, dr_chain
,
9481 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9485 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
9488 dr_chain
.release ();
9491 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
9496 /* Function vect_is_simple_cond.
9499 LOOP - the loop that is being vectorized.
9500 COND - Condition that is checked for simple use.
9503 *COMP_VECTYPE - the vector type for the comparison.
9504 *DTS - The def types for the arguments of the comparison
9506 Returns whether a COND can be vectorized. Checks whether
9507 condition operands are supportable using vec_is_simple_use. */
9510 vect_is_simple_cond (tree cond
, vec_info
*vinfo
, stmt_vec_info stmt_info
,
9511 slp_tree slp_node
, tree
*comp_vectype
,
9512 enum vect_def_type
*dts
, tree vectype
)
9515 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9519 if (TREE_CODE (cond
) == SSA_NAME
9520 && VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (cond
)))
9522 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0, &cond
,
9523 &slp_op
, &dts
[0], comp_vectype
)
9525 || !VECTOR_BOOLEAN_TYPE_P (*comp_vectype
))
9530 if (!COMPARISON_CLASS_P (cond
))
9533 lhs
= TREE_OPERAND (cond
, 0);
9534 rhs
= TREE_OPERAND (cond
, 1);
9536 if (TREE_CODE (lhs
) == SSA_NAME
)
9538 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 0,
9539 &lhs
, &slp_op
, &dts
[0], &vectype1
))
9542 else if (TREE_CODE (lhs
) == INTEGER_CST
|| TREE_CODE (lhs
) == REAL_CST
9543 || TREE_CODE (lhs
) == FIXED_CST
)
9544 dts
[0] = vect_constant_def
;
9548 if (TREE_CODE (rhs
) == SSA_NAME
)
9550 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1,
9551 &rhs
, &slp_op
, &dts
[1], &vectype2
))
9554 else if (TREE_CODE (rhs
) == INTEGER_CST
|| TREE_CODE (rhs
) == REAL_CST
9555 || TREE_CODE (rhs
) == FIXED_CST
)
9556 dts
[1] = vect_constant_def
;
9560 if (vectype1
&& vectype2
9561 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
9562 TYPE_VECTOR_SUBPARTS (vectype2
)))
9565 *comp_vectype
= vectype1
? vectype1
: vectype2
;
9566 /* Invariant comparison. */
9567 if (! *comp_vectype
)
9569 tree scalar_type
= TREE_TYPE (lhs
);
9570 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
9571 *comp_vectype
= truth_type_for (vectype
);
9574 /* If we can widen the comparison to match vectype do so. */
9575 if (INTEGRAL_TYPE_P (scalar_type
)
9577 && tree_int_cst_lt (TYPE_SIZE (scalar_type
),
9578 TYPE_SIZE (TREE_TYPE (vectype
))))
9579 scalar_type
= build_nonstandard_integer_type
9580 (vector_element_bits (vectype
), TYPE_UNSIGNED (scalar_type
));
9581 *comp_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
9589 /* vectorizable_condition.
9591 Check if STMT_INFO is conditional modify expression that can be vectorized.
9592 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
9593 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
9596 When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
9598 Return true if STMT_INFO is vectorizable in this way. */
9601 vectorizable_condition (vec_info
*vinfo
,
9602 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
9604 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
9606 tree scalar_dest
= NULL_TREE
;
9607 tree vec_dest
= NULL_TREE
;
9608 tree cond_expr
, cond_expr0
= NULL_TREE
, cond_expr1
= NULL_TREE
;
9609 tree then_clause
, else_clause
;
9610 tree comp_vectype
= NULL_TREE
;
9611 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
9612 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
9615 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
9616 enum vect_def_type dts
[4]
9617 = {vect_unknown_def_type
, vect_unknown_def_type
,
9618 vect_unknown_def_type
, vect_unknown_def_type
};
9622 enum tree_code code
, cond_code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
9624 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
9625 vec
<tree
> vec_oprnds0
= vNULL
;
9626 vec
<tree
> vec_oprnds1
= vNULL
;
9627 vec
<tree
> vec_oprnds2
= vNULL
;
9628 vec
<tree
> vec_oprnds3
= vNULL
;
9630 bool masked
= false;
9632 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
9635 /* Is vectorizable conditional operation? */
9636 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
9640 code
= gimple_assign_rhs_code (stmt
);
9641 if (code
!= COND_EXPR
)
9644 stmt_vec_info reduc_info
= NULL
;
9645 int reduc_index
= -1;
9646 vect_reduction_type reduction_type
= TREE_CODE_REDUCTION
;
9648 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info
)) != NULL
;
9651 if (STMT_SLP_TYPE (stmt_info
))
9653 reduc_info
= info_for_reduction (vinfo
, stmt_info
);
9654 reduction_type
= STMT_VINFO_REDUC_TYPE (reduc_info
);
9655 reduc_index
= STMT_VINFO_REDUC_IDX (stmt_info
);
9656 gcc_assert (reduction_type
!= EXTRACT_LAST_REDUCTION
9657 || reduc_index
!= -1);
9661 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
9664 /* FORNOW: only supported as part of a reduction. */
9665 if (STMT_VINFO_LIVE_P (stmt_info
))
9667 if (dump_enabled_p ())
9668 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9669 "value used after loop.\n");
9674 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
9675 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
9680 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
9684 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
9688 gcc_assert (ncopies
>= 1);
9689 if (for_reduction
&& ncopies
> 1)
9690 return false; /* FORNOW */
9692 cond_expr
= gimple_assign_rhs1 (stmt
);
9694 if (!vect_is_simple_cond (cond_expr
, vinfo
, stmt_info
, slp_node
,
9695 &comp_vectype
, &dts
[0], vectype
)
9699 unsigned op_adjust
= COMPARISON_CLASS_P (cond_expr
) ? 1 : 0;
9700 slp_tree then_slp_node
, else_slp_node
;
9701 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 1 + op_adjust
,
9702 &then_clause
, &then_slp_node
, &dts
[2], &vectype1
))
9704 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
, 2 + op_adjust
,
9705 &else_clause
, &else_slp_node
, &dts
[3], &vectype2
))
9708 if (vectype1
&& !useless_type_conversion_p (vectype
, vectype1
))
9711 if (vectype2
&& !useless_type_conversion_p (vectype
, vectype2
))
9714 masked
= !COMPARISON_CLASS_P (cond_expr
);
9715 vec_cmp_type
= truth_type_for (comp_vectype
);
9717 if (vec_cmp_type
== NULL_TREE
)
9720 cond_code
= TREE_CODE (cond_expr
);
9723 cond_expr0
= TREE_OPERAND (cond_expr
, 0);
9724 cond_expr1
= TREE_OPERAND (cond_expr
, 1);
9727 /* For conditional reductions, the "then" value needs to be the candidate
9728 value calculated by this iteration while the "else" value needs to be
9729 the result carried over from previous iterations. If the COND_EXPR
9730 is the other way around, we need to swap it. */
9731 bool must_invert_cmp_result
= false;
9732 if (reduction_type
== EXTRACT_LAST_REDUCTION
&& reduc_index
== 1)
9735 must_invert_cmp_result
= true;
9738 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond_expr0
));
9739 tree_code new_code
= invert_tree_comparison (cond_code
, honor_nans
);
9740 if (new_code
== ERROR_MARK
)
9741 must_invert_cmp_result
= true;
9744 cond_code
= new_code
;
9745 /* Make sure we don't accidentally use the old condition. */
9746 cond_expr
= NULL_TREE
;
9749 std::swap (then_clause
, else_clause
);
9752 if (!masked
&& VECTOR_BOOLEAN_TYPE_P (comp_vectype
))
9754 /* Boolean values may have another representation in vectors
9755 and therefore we prefer bit operations over comparison for
9756 them (which also works for scalar masks). We store opcodes
9757 to use in bitop1 and bitop2. Statement is vectorized as
9758 BITOP2 (rhs1 BITOP1 rhs2) or rhs1 BITOP2 (BITOP1 rhs2)
9759 depending on bitop1 and bitop2 arity. */
9763 bitop1
= BIT_NOT_EXPR
;
9764 bitop2
= BIT_AND_EXPR
;
9767 bitop1
= BIT_NOT_EXPR
;
9768 bitop2
= BIT_IOR_EXPR
;
9771 bitop1
= BIT_NOT_EXPR
;
9772 bitop2
= BIT_AND_EXPR
;
9773 std::swap (cond_expr0
, cond_expr1
);
9776 bitop1
= BIT_NOT_EXPR
;
9777 bitop2
= BIT_IOR_EXPR
;
9778 std::swap (cond_expr0
, cond_expr1
);
9781 bitop1
= BIT_XOR_EXPR
;
9784 bitop1
= BIT_XOR_EXPR
;
9785 bitop2
= BIT_NOT_EXPR
;
9790 cond_code
= SSA_NAME
;
9793 if (TREE_CODE_CLASS (cond_code
) == tcc_comparison
9794 && reduction_type
== EXTRACT_LAST_REDUCTION
9795 && !expand_vec_cmp_expr_p (comp_vectype
, vec_cmp_type
, cond_code
))
9797 if (dump_enabled_p ())
9798 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9799 "reduction comparison operation not supported.\n");
9805 if (bitop1
!= NOP_EXPR
)
9807 machine_mode mode
= TYPE_MODE (comp_vectype
);
9810 optab
= optab_for_tree_code (bitop1
, comp_vectype
, optab_default
);
9811 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9814 if (bitop2
!= NOP_EXPR
)
9816 optab
= optab_for_tree_code (bitop2
, comp_vectype
,
9818 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
9823 vect_cost_for_stmt kind
= vector_stmt
;
9824 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9825 /* Count one reduction-like operation per vector. */
9826 kind
= vec_to_scalar
;
9827 else if (!expand_vec_cond_expr_p (vectype
, comp_vectype
, cond_code
))
9831 && (!vect_maybe_update_slp_op_vectype
9832 (SLP_TREE_CHILDREN (slp_node
)[0], comp_vectype
)
9834 && !vect_maybe_update_slp_op_vectype
9835 (SLP_TREE_CHILDREN (slp_node
)[1], comp_vectype
))
9836 || !vect_maybe_update_slp_op_vectype (then_slp_node
, vectype
)
9837 || !vect_maybe_update_slp_op_vectype (else_slp_node
, vectype
)))
9839 if (dump_enabled_p ())
9840 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
9841 "incompatible vector types for invariants\n");
9846 && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo
)
9847 && reduction_type
== EXTRACT_LAST_REDUCTION
)
9848 vect_record_loop_mask (loop_vinfo
, &LOOP_VINFO_MASKS (loop_vinfo
),
9849 ncopies
* vec_num
, vectype
, NULL
);
9851 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
9852 vect_model_simple_cost (vinfo
, stmt_info
, ncopies
, dts
, ndts
, slp_node
,
9861 vec_oprnds0
.create (1);
9862 vec_oprnds1
.create (1);
9863 vec_oprnds2
.create (1);
9864 vec_oprnds3
.create (1);
9868 scalar_dest
= gimple_assign_lhs (stmt
);
9869 if (reduction_type
!= EXTRACT_LAST_REDUCTION
)
9870 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
9872 bool swap_cond_operands
= false;
9874 /* See whether another part of the vectorized code applies a loop
9875 mask to the condition, or to its inverse. */
9877 vec_loop_masks
*masks
= NULL
;
9878 if (loop_vinfo
&& LOOP_VINFO_FULLY_MASKED_P (loop_vinfo
))
9880 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
9881 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
9884 scalar_cond_masked_key
cond (cond_expr
, ncopies
);
9885 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
9886 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
9889 bool honor_nans
= HONOR_NANS (TREE_TYPE (cond
.op0
));
9890 cond
.code
= invert_tree_comparison (cond
.code
, honor_nans
);
9891 if (loop_vinfo
->scalar_cond_masked_set
.contains (cond
))
9893 masks
= &LOOP_VINFO_MASKS (loop_vinfo
);
9894 cond_code
= cond
.code
;
9895 swap_cond_operands
= true;
9901 /* Handle cond expr. */
9903 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9904 cond_expr
, &vec_oprnds0
, comp_vectype
,
9905 then_clause
, &vec_oprnds2
, vectype
,
9906 reduction_type
!= EXTRACT_LAST_REDUCTION
9907 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
9909 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
9910 cond_expr0
, &vec_oprnds0
, comp_vectype
,
9911 cond_expr1
, &vec_oprnds1
, comp_vectype
,
9912 then_clause
, &vec_oprnds2
, vectype
,
9913 reduction_type
!= EXTRACT_LAST_REDUCTION
9914 ? else_clause
: NULL
, &vec_oprnds3
, vectype
);
9916 /* Arguments are ready. Create the new vector stmt. */
9917 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
9919 vec_then_clause
= vec_oprnds2
[i
];
9920 vec_else_clause
= vec_oprnds3
[i
];
9922 if (swap_cond_operands
)
9923 std::swap (vec_then_clause
, vec_else_clause
);
9926 vec_compare
= vec_cond_lhs
;
9929 vec_cond_rhs
= vec_oprnds1
[i
];
9930 if (bitop1
== NOP_EXPR
)
9931 vec_compare
= build2 (cond_code
, vec_cmp_type
,
9932 vec_cond_lhs
, vec_cond_rhs
);
9935 new_temp
= make_ssa_name (vec_cmp_type
);
9937 if (bitop1
== BIT_NOT_EXPR
)
9938 new_stmt
= gimple_build_assign (new_temp
, bitop1
,
9942 = gimple_build_assign (new_temp
, bitop1
, vec_cond_lhs
,
9944 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9945 if (bitop2
== NOP_EXPR
)
9946 vec_compare
= new_temp
;
9947 else if (bitop2
== BIT_NOT_EXPR
)
9949 /* Instead of doing ~x ? y : z do x ? z : y. */
9950 vec_compare
= new_temp
;
9951 std::swap (vec_then_clause
, vec_else_clause
);
9955 vec_compare
= make_ssa_name (vec_cmp_type
);
9957 = gimple_build_assign (vec_compare
, bitop2
,
9958 vec_cond_lhs
, new_temp
);
9959 vect_finish_stmt_generation (vinfo
, stmt_info
,
9965 /* If we decided to apply a loop mask to the result of the vector
9966 comparison, AND the comparison with the mask now. Later passes
9967 should then be able to reuse the AND results between mulitple
9971 for (int i = 0; i < 100; ++i)
9972 x[i] = y[i] ? z[i] : 10;
9974 results in following optimized GIMPLE:
9976 mask__35.8_43 = vect__4.7_41 != { 0, ... };
9977 vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
9978 _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
9979 vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
9980 vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
9981 vect_iftmp.11_47, { 10, ... }>;
9983 instead of using a masked and unmasked forms of
9984 vec != { 0, ... } (masked in the MASK_LOAD,
9985 unmasked in the VEC_COND_EXPR). */
9987 /* Force vec_compare to be an SSA_NAME rather than a comparison,
9988 in cases where that's necessary. */
9990 if (masks
|| reduction_type
== EXTRACT_LAST_REDUCTION
)
9992 if (!is_gimple_val (vec_compare
))
9994 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
9995 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
9997 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
9998 vec_compare
= vec_compare_name
;
10001 if (must_invert_cmp_result
)
10003 tree vec_compare_name
= make_ssa_name (vec_cmp_type
);
10004 gassign
*new_stmt
= gimple_build_assign (vec_compare_name
,
10007 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10008 vec_compare
= vec_compare_name
;
10013 unsigned vec_num
= vec_oprnds0
.length ();
10015 = vect_get_loop_mask (gsi
, masks
, vec_num
* ncopies
,
10017 tree tmp2
= make_ssa_name (vec_cmp_type
);
10019 = gimple_build_assign (tmp2
, BIT_AND_EXPR
, vec_compare
,
10021 vect_finish_stmt_generation (vinfo
, stmt_info
, g
, gsi
);
10022 vec_compare
= tmp2
;
10027 if (reduction_type
== EXTRACT_LAST_REDUCTION
)
10029 gimple
*old_stmt
= vect_orig_stmt (stmt_info
)->stmt
;
10030 tree lhs
= gimple_get_lhs (old_stmt
);
10031 new_stmt
= gimple_build_call_internal
10032 (IFN_FOLD_EXTRACT_LAST
, 3, else_clause
, vec_compare
,
10034 gimple_call_set_lhs (new_stmt
, lhs
);
10035 SSA_NAME_DEF_STMT (lhs
) = new_stmt
;
10036 if (old_stmt
== gsi_stmt (*gsi
))
10037 vect_finish_replace_stmt (vinfo
, stmt_info
, new_stmt
);
10040 /* In this case we're moving the definition to later in the
10041 block. That doesn't matter because the only uses of the
10042 lhs are in phi statements. */
10043 gimple_stmt_iterator old_gsi
= gsi_for_stmt (old_stmt
);
10044 gsi_remove (&old_gsi
, true);
10045 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10050 new_temp
= make_ssa_name (vec_dest
);
10051 new_stmt
= gimple_build_assign (new_temp
, VEC_COND_EXPR
, vec_compare
,
10052 vec_then_clause
, vec_else_clause
);
10053 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10056 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10058 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10062 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10064 vec_oprnds0
.release ();
10065 vec_oprnds1
.release ();
10066 vec_oprnds2
.release ();
10067 vec_oprnds3
.release ();
10072 /* vectorizable_comparison.
10074 Check if STMT_INFO is comparison expression that can be vectorized.
10075 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
10076 comparison, put it in VEC_STMT, and insert it at GSI.
10078 Return true if STMT_INFO is vectorizable in this way. */
10081 vectorizable_comparison (vec_info
*vinfo
,
10082 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10084 slp_tree slp_node
, stmt_vector_for_cost
*cost_vec
)
10086 tree lhs
, rhs1
, rhs2
;
10087 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
10088 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
10089 tree vec_rhs1
= NULL_TREE
, vec_rhs2
= NULL_TREE
;
10091 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
10092 enum vect_def_type dts
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
10094 poly_uint64 nunits
;
10096 enum tree_code code
, bitop1
= NOP_EXPR
, bitop2
= NOP_EXPR
;
10098 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10099 vec
<tree
> vec_oprnds0
= vNULL
;
10100 vec
<tree
> vec_oprnds1
= vNULL
;
10104 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
10107 if (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
))
10110 mask_type
= vectype
;
10111 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
10116 ncopies
= vect_get_num_copies (loop_vinfo
, vectype
);
10118 gcc_assert (ncopies
>= 1);
10119 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
10122 if (STMT_VINFO_LIVE_P (stmt_info
))
10124 if (dump_enabled_p ())
10125 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10126 "value used after loop.\n");
10130 gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
10134 code
= gimple_assign_rhs_code (stmt
);
10136 if (TREE_CODE_CLASS (code
) != tcc_comparison
)
10139 slp_tree slp_rhs1
, slp_rhs2
;
10140 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10141 0, &rhs1
, &slp_rhs1
, &dts
[0], &vectype1
))
10144 if (!vect_is_simple_use (vinfo
, stmt_info
, slp_node
,
10145 1, &rhs2
, &slp_rhs2
, &dts
[1], &vectype2
))
10148 if (vectype1
&& vectype2
10149 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
10150 TYPE_VECTOR_SUBPARTS (vectype2
)))
10153 vectype
= vectype1
? vectype1
: vectype2
;
10155 /* Invariant comparison. */
10158 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
10159 vectype
= mask_type
;
10161 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
),
10163 if (!vectype
|| maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
), nunits
))
10166 else if (maybe_ne (nunits
, TYPE_VECTOR_SUBPARTS (vectype
)))
10169 /* Can't compare mask and non-mask types. */
10170 if (vectype1
&& vectype2
10171 && (VECTOR_BOOLEAN_TYPE_P (vectype1
) ^ VECTOR_BOOLEAN_TYPE_P (vectype2
)))
10174 /* Boolean values may have another representation in vectors
10175 and therefore we prefer bit operations over comparison for
10176 them (which also works for scalar masks). We store opcodes
10177 to use in bitop1 and bitop2. Statement is vectorized as
10178 BITOP2 (rhs1 BITOP1 rhs2) or
10179 rhs1 BITOP2 (BITOP1 rhs2)
10180 depending on bitop1 and bitop2 arity. */
10181 bool swap_p
= false;
10182 if (VECTOR_BOOLEAN_TYPE_P (vectype
))
10184 if (code
== GT_EXPR
)
10186 bitop1
= BIT_NOT_EXPR
;
10187 bitop2
= BIT_AND_EXPR
;
10189 else if (code
== GE_EXPR
)
10191 bitop1
= BIT_NOT_EXPR
;
10192 bitop2
= BIT_IOR_EXPR
;
10194 else if (code
== LT_EXPR
)
10196 bitop1
= BIT_NOT_EXPR
;
10197 bitop2
= BIT_AND_EXPR
;
10200 else if (code
== LE_EXPR
)
10202 bitop1
= BIT_NOT_EXPR
;
10203 bitop2
= BIT_IOR_EXPR
;
10208 bitop1
= BIT_XOR_EXPR
;
10209 if (code
== EQ_EXPR
)
10210 bitop2
= BIT_NOT_EXPR
;
10216 if (bitop1
== NOP_EXPR
)
10218 if (!expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
10223 machine_mode mode
= TYPE_MODE (vectype
);
10226 optab
= optab_for_tree_code (bitop1
, vectype
, optab_default
);
10227 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10230 if (bitop2
!= NOP_EXPR
)
10232 optab
= optab_for_tree_code (bitop2
, vectype
, optab_default
);
10233 if (!optab
|| optab_handler (optab
, mode
) == CODE_FOR_nothing
)
10238 /* Put types on constant and invariant SLP children. */
10240 && (!vect_maybe_update_slp_op_vectype (slp_rhs1
, vectype
)
10241 || !vect_maybe_update_slp_op_vectype (slp_rhs2
, vectype
)))
10243 if (dump_enabled_p ())
10244 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10245 "incompatible vector types for invariants\n");
10249 STMT_VINFO_TYPE (stmt_info
) = comparison_vec_info_type
;
10250 vect_model_simple_cost (vinfo
, stmt_info
,
10251 ncopies
* (1 + (bitop2
!= NOP_EXPR
)),
10252 dts
, ndts
, slp_node
, cost_vec
);
10259 vec_oprnds0
.create (1);
10260 vec_oprnds1
.create (1);
10264 lhs
= gimple_assign_lhs (stmt
);
10265 mask
= vect_create_destination_var (lhs
, mask_type
);
10267 vect_get_vec_defs (vinfo
, stmt_info
, slp_node
, ncopies
,
10268 rhs1
, &vec_oprnds0
, vectype
,
10269 rhs2
, &vec_oprnds1
, vectype
);
10271 std::swap (vec_oprnds0
, vec_oprnds1
);
10273 /* Arguments are ready. Create the new vector stmt. */
10274 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_rhs1
)
10277 vec_rhs2
= vec_oprnds1
[i
];
10279 new_temp
= make_ssa_name (mask
);
10280 if (bitop1
== NOP_EXPR
)
10282 new_stmt
= gimple_build_assign (new_temp
, code
,
10283 vec_rhs1
, vec_rhs2
);
10284 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10288 if (bitop1
== BIT_NOT_EXPR
)
10289 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs2
);
10291 new_stmt
= gimple_build_assign (new_temp
, bitop1
, vec_rhs1
,
10293 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10294 if (bitop2
!= NOP_EXPR
)
10296 tree res
= make_ssa_name (mask
);
10297 if (bitop2
== BIT_NOT_EXPR
)
10298 new_stmt
= gimple_build_assign (res
, bitop2
, new_temp
);
10300 new_stmt
= gimple_build_assign (res
, bitop2
, vec_rhs1
,
10302 vect_finish_stmt_generation (vinfo
, stmt_info
, new_stmt
, gsi
);
10306 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
10308 STMT_VINFO_VEC_STMTS (stmt_info
).safe_push (new_stmt
);
10312 *vec_stmt
= STMT_VINFO_VEC_STMTS (stmt_info
)[0];
10314 vec_oprnds0
.release ();
10315 vec_oprnds1
.release ();
10320 /* If SLP_NODE is nonnull, return true if vectorizable_live_operation
10321 can handle all live statements in the node. Otherwise return true
10322 if STMT_INFO is not live or if vectorizable_live_operation can handle it.
10323 GSI and VEC_STMT_P are as for vectorizable_live_operation. */
10326 can_vectorize_live_stmts (loop_vec_info loop_vinfo
,
10327 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10328 slp_tree slp_node
, slp_instance slp_node_instance
,
10330 stmt_vector_for_cost
*cost_vec
)
10334 stmt_vec_info slp_stmt_info
;
10336 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node
), i
, slp_stmt_info
)
10338 if (STMT_VINFO_LIVE_P (slp_stmt_info
)
10339 && !vectorizable_live_operation (loop_vinfo
,
10340 slp_stmt_info
, gsi
, slp_node
,
10341 slp_node_instance
, i
,
10342 vec_stmt_p
, cost_vec
))
10346 else if (STMT_VINFO_LIVE_P (stmt_info
)
10347 && !vectorizable_live_operation (loop_vinfo
, stmt_info
, gsi
,
10348 slp_node
, slp_node_instance
, -1,
10349 vec_stmt_p
, cost_vec
))
10355 /* Make sure the statement is vectorizable. */
10358 vect_analyze_stmt (vec_info
*vinfo
,
10359 stmt_vec_info stmt_info
, bool *need_to_vectorize
,
10360 slp_tree node
, slp_instance node_instance
,
10361 stmt_vector_for_cost
*cost_vec
)
10363 bb_vec_info bb_vinfo
= dyn_cast
<bb_vec_info
> (vinfo
);
10364 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
10366 gimple_seq pattern_def_seq
;
10368 if (dump_enabled_p ())
10369 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: %G",
10372 if (gimple_has_volatile_ops (stmt_info
->stmt
))
10373 return opt_result::failure_at (stmt_info
->stmt
,
10375 " stmt has volatile operands: %G\n",
10378 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10380 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
10382 gimple_stmt_iterator si
;
10384 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
10386 stmt_vec_info pattern_def_stmt_info
10387 = vinfo
->lookup_stmt (gsi_stmt (si
));
10388 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info
)
10389 || STMT_VINFO_LIVE_P (pattern_def_stmt_info
))
10391 /* Analyze def stmt of STMT if it's a pattern stmt. */
10392 if (dump_enabled_p ())
10393 dump_printf_loc (MSG_NOTE
, vect_location
,
10394 "==> examining pattern def statement: %G",
10395 pattern_def_stmt_info
->stmt
);
10398 = vect_analyze_stmt (vinfo
, pattern_def_stmt_info
,
10399 need_to_vectorize
, node
, node_instance
,
10407 /* Skip stmts that do not need to be vectorized. In loops this is expected
10409 - the COND_EXPR which is the loop exit condition
10410 - any LABEL_EXPRs in the loop
10411 - computations that are used only for array indexing or loop control.
10412 In basic blocks we only analyze statements that are a part of some SLP
10413 instance, therefore, all the statements are relevant.
10415 Pattern statement needs to be analyzed instead of the original statement
10416 if the original statement is not relevant. Otherwise, we analyze both
10417 statements. In basic blocks we are called from some SLP instance
10418 traversal, don't analyze pattern stmts instead, the pattern stmts
10419 already will be part of SLP instance. */
10421 stmt_vec_info pattern_stmt_info
= STMT_VINFO_RELATED_STMT (stmt_info
);
10422 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
10423 && !STMT_VINFO_LIVE_P (stmt_info
))
10425 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10426 && pattern_stmt_info
10427 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10428 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10430 /* Analyze PATTERN_STMT instead of the original stmt. */
10431 stmt_info
= pattern_stmt_info
;
10432 if (dump_enabled_p ())
10433 dump_printf_loc (MSG_NOTE
, vect_location
,
10434 "==> examining pattern statement: %G",
10439 if (dump_enabled_p ())
10440 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
10442 return opt_result::success ();
10445 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
10447 && pattern_stmt_info
10448 && (STMT_VINFO_RELEVANT_P (pattern_stmt_info
)
10449 || STMT_VINFO_LIVE_P (pattern_stmt_info
)))
10451 /* Analyze PATTERN_STMT too. */
10452 if (dump_enabled_p ())
10453 dump_printf_loc (MSG_NOTE
, vect_location
,
10454 "==> examining pattern statement: %G",
10455 pattern_stmt_info
->stmt
);
10458 = vect_analyze_stmt (vinfo
, pattern_stmt_info
, need_to_vectorize
, node
,
10459 node_instance
, cost_vec
);
10464 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
10466 case vect_internal_def
:
10469 case vect_reduction_def
:
10470 case vect_nested_cycle
:
10471 gcc_assert (!bb_vinfo
10472 && (relevance
== vect_used_in_outer
10473 || relevance
== vect_used_in_outer_by_reduction
10474 || relevance
== vect_used_by_reduction
10475 || relevance
== vect_unused_in_scope
10476 || relevance
== vect_used_only_live
));
10479 case vect_induction_def
:
10480 gcc_assert (!bb_vinfo
);
10483 case vect_constant_def
:
10484 case vect_external_def
:
10485 case vect_unknown_def_type
:
10487 gcc_unreachable ();
10490 if (STMT_VINFO_RELEVANT_P (stmt_info
))
10492 tree type
= gimple_expr_type (stmt_info
->stmt
);
10493 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (type
)));
10494 gcall
*call
= dyn_cast
<gcall
*> (stmt_info
->stmt
);
10495 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
)
10496 || (call
&& gimple_call_lhs (call
) == NULL_TREE
));
10497 *need_to_vectorize
= true;
10500 if (PURE_SLP_STMT (stmt_info
) && !node
)
10502 if (dump_enabled_p ())
10503 dump_printf_loc (MSG_NOTE
, vect_location
,
10504 "handled only by SLP analysis\n");
10505 return opt_result::success ();
10510 && (STMT_VINFO_RELEVANT_P (stmt_info
)
10511 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
10512 /* Prefer vectorizable_call over vectorizable_simd_clone_call so
10513 -mveclibabi= takes preference over library functions with
10514 the simd attribute. */
10515 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10516 || vectorizable_simd_clone_call (vinfo
, stmt_info
, NULL
, NULL
, node
,
10518 || vectorizable_conversion (vinfo
, stmt_info
,
10519 NULL
, NULL
, node
, cost_vec
)
10520 || vectorizable_operation (vinfo
, stmt_info
,
10521 NULL
, NULL
, node
, cost_vec
)
10522 || vectorizable_assignment (vinfo
, stmt_info
,
10523 NULL
, NULL
, node
, cost_vec
)
10524 || vectorizable_load (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10525 || vectorizable_store (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10526 || vectorizable_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10527 node
, node_instance
, cost_vec
)
10528 || vectorizable_induction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10529 NULL
, NULL
, node
, cost_vec
)
10530 || vectorizable_shift (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10531 || vectorizable_condition (vinfo
, stmt_info
,
10532 NULL
, NULL
, node
, cost_vec
)
10533 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10535 || vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10536 stmt_info
, NULL
, node
));
10540 ok
= (vectorizable_call (vinfo
, stmt_info
, NULL
, NULL
, node
, cost_vec
)
10541 || vectorizable_simd_clone_call (vinfo
, stmt_info
,
10542 NULL
, NULL
, node
, cost_vec
)
10543 || vectorizable_conversion (vinfo
, stmt_info
, NULL
, NULL
, node
,
10545 || vectorizable_shift (vinfo
, stmt_info
,
10546 NULL
, NULL
, node
, cost_vec
)
10547 || vectorizable_operation (vinfo
, stmt_info
,
10548 NULL
, NULL
, node
, cost_vec
)
10549 || vectorizable_assignment (vinfo
, stmt_info
, NULL
, NULL
, node
,
10551 || vectorizable_load (vinfo
, stmt_info
,
10552 NULL
, NULL
, node
, cost_vec
)
10553 || vectorizable_store (vinfo
, stmt_info
,
10554 NULL
, NULL
, node
, cost_vec
)
10555 || vectorizable_condition (vinfo
, stmt_info
,
10556 NULL
, NULL
, node
, cost_vec
)
10557 || vectorizable_comparison (vinfo
, stmt_info
, NULL
, NULL
, node
,
10562 return opt_result::failure_at (stmt_info
->stmt
,
10564 " relevant stmt not supported: %G",
10567 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
10568 need extra handling, except for vectorizable reductions. */
10570 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
10571 && STMT_VINFO_TYPE (stmt_info
) != lc_phi_info_type
10572 && !can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
10573 stmt_info
, NULL
, node
, node_instance
,
10575 return opt_result::failure_at (stmt_info
->stmt
,
10577 " live stmt not supported: %G",
10580 return opt_result::success ();
10584 /* Function vect_transform_stmt.
10586 Create a vectorized stmt to replace STMT_INFO, and insert it at GSI. */
10589 vect_transform_stmt (vec_info
*vinfo
,
10590 stmt_vec_info stmt_info
, gimple_stmt_iterator
*gsi
,
10591 slp_tree slp_node
, slp_instance slp_node_instance
)
10593 bool is_store
= false;
10594 gimple
*vec_stmt
= NULL
;
10597 gcc_assert (slp_node
|| !PURE_SLP_STMT (stmt_info
));
10599 switch (STMT_VINFO_TYPE (stmt_info
))
10601 case type_demotion_vec_info_type
:
10602 case type_promotion_vec_info_type
:
10603 case type_conversion_vec_info_type
:
10604 done
= vectorizable_conversion (vinfo
, stmt_info
,
10605 gsi
, &vec_stmt
, slp_node
, NULL
);
10609 case induc_vec_info_type
:
10610 done
= vectorizable_induction (as_a
<loop_vec_info
> (vinfo
),
10611 stmt_info
, gsi
, &vec_stmt
, slp_node
,
10616 case shift_vec_info_type
:
10617 done
= vectorizable_shift (vinfo
, stmt_info
,
10618 gsi
, &vec_stmt
, slp_node
, NULL
);
10622 case op_vec_info_type
:
10623 done
= vectorizable_operation (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10628 case assignment_vec_info_type
:
10629 done
= vectorizable_assignment (vinfo
, stmt_info
,
10630 gsi
, &vec_stmt
, slp_node
, NULL
);
10634 case load_vec_info_type
:
10635 done
= vectorizable_load (vinfo
, stmt_info
, gsi
, &vec_stmt
, slp_node
,
10640 case store_vec_info_type
:
10641 done
= vectorizable_store (vinfo
, stmt_info
,
10642 gsi
, &vec_stmt
, slp_node
, NULL
);
10644 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
10646 /* In case of interleaving, the whole chain is vectorized when the
10647 last store in the chain is reached. Store stmts before the last
10648 one are skipped, and there vec_stmt_info shouldn't be freed
10650 stmt_vec_info group_info
= DR_GROUP_FIRST_ELEMENT (stmt_info
);
10651 if (DR_GROUP_STORE_COUNT (group_info
) == DR_GROUP_SIZE (group_info
))
10658 case condition_vec_info_type
:
10659 done
= vectorizable_condition (vinfo
, stmt_info
,
10660 gsi
, &vec_stmt
, slp_node
, NULL
);
10664 case comparison_vec_info_type
:
10665 done
= vectorizable_comparison (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10670 case call_vec_info_type
:
10671 done
= vectorizable_call (vinfo
, stmt_info
,
10672 gsi
, &vec_stmt
, slp_node
, NULL
);
10675 case call_simd_clone_vec_info_type
:
10676 done
= vectorizable_simd_clone_call (vinfo
, stmt_info
, gsi
, &vec_stmt
,
10680 case reduc_vec_info_type
:
10681 done
= vect_transform_reduction (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10682 gsi
, &vec_stmt
, slp_node
);
10686 case cycle_phi_info_type
:
10687 done
= vect_transform_cycle_phi (as_a
<loop_vec_info
> (vinfo
), stmt_info
,
10688 &vec_stmt
, slp_node
, slp_node_instance
);
10692 case lc_phi_info_type
:
10693 done
= vectorizable_lc_phi (as_a
<loop_vec_info
> (vinfo
),
10694 stmt_info
, &vec_stmt
, slp_node
);
10699 if (!STMT_VINFO_LIVE_P (stmt_info
))
10701 if (dump_enabled_p ())
10702 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
10703 "stmt not supported.\n");
10704 gcc_unreachable ();
10709 if (!slp_node
&& vec_stmt
)
10710 gcc_assert (STMT_VINFO_VEC_STMTS (stmt_info
).exists ());
10712 if (STMT_VINFO_TYPE (stmt_info
) == store_vec_info_type
)
10715 /* If this stmt defines a value used on a backedge, update the
10716 vectorized PHIs. */
10717 stmt_vec_info orig_stmt_info
= vect_orig_stmt (stmt_info
);
10718 stmt_vec_info reduc_info
;
10719 if (STMT_VINFO_REDUC_DEF (orig_stmt_info
)
10720 && vect_stmt_to_vectorize (orig_stmt_info
) == stmt_info
10721 && (reduc_info
= info_for_reduction (vinfo
, orig_stmt_info
))
10722 && STMT_VINFO_REDUC_TYPE (reduc_info
) != FOLD_LEFT_REDUCTION
10723 && STMT_VINFO_REDUC_TYPE (reduc_info
) != EXTRACT_LAST_REDUCTION
)
10728 && (phi
= dyn_cast
<gphi
*>
10729 (STMT_VINFO_REDUC_DEF (orig_stmt_info
)->stmt
))
10730 && dominated_by_p (CDI_DOMINATORS
,
10731 gimple_bb (orig_stmt_info
->stmt
), gimple_bb (phi
))
10732 && (e
= loop_latch_edge (gimple_bb (phi
)->loop_father
))
10733 && (PHI_ARG_DEF_FROM_EDGE (phi
, e
)
10734 == gimple_get_lhs (orig_stmt_info
->stmt
)))
10736 vec
<gimple
*> &phi_info
10737 = STMT_VINFO_VEC_STMTS (STMT_VINFO_REDUC_DEF (orig_stmt_info
));
10738 vec
<gimple
*> &vec_stmt
10739 = STMT_VINFO_VEC_STMTS (stmt_info
);
10740 gcc_assert (phi_info
.length () == vec_stmt
.length ());
10741 for (unsigned i
= 0; i
< phi_info
.length (); ++i
)
10742 add_phi_arg (as_a
<gphi
*> (phi_info
[i
]),
10743 gimple_get_lhs (vec_stmt
[i
]), e
,
10744 gimple_phi_arg_location (phi
, e
->dest_idx
));
10747 && slp_node
!= slp_node_instance
->reduc_phis
)
10749 slp_tree phi_node
= slp_node_instance
->reduc_phis
;
10750 gphi
*phi
= as_a
<gphi
*> (SLP_TREE_SCALAR_STMTS (phi_node
)[0]->stmt
);
10751 e
= loop_latch_edge (gimple_bb (phi
)->loop_father
);
10752 gcc_assert (SLP_TREE_VEC_STMTS (phi_node
).length ()
10753 == SLP_TREE_VEC_STMTS (slp_node
).length ());
10754 for (unsigned i
= 0; i
< SLP_TREE_VEC_STMTS (phi_node
).length (); ++i
)
10755 add_phi_arg (as_a
<gphi
*> (SLP_TREE_VEC_STMTS (phi_node
)[i
]),
10756 vect_get_slp_vect_def (slp_node
, i
),
10757 e
, gimple_phi_arg_location (phi
, e
->dest_idx
));
10761 /* Handle stmts whose DEF is used outside the loop-nest that is
10762 being vectorized. */
10763 if (is_a
<loop_vec_info
> (vinfo
))
10764 done
= can_vectorize_live_stmts (as_a
<loop_vec_info
> (vinfo
),
10765 stmt_info
, gsi
, slp_node
,
10766 slp_node_instance
, true, NULL
);
10773 /* Remove a group of stores (for SLP or interleaving), free their
10777 vect_remove_stores (vec_info
*vinfo
, stmt_vec_info first_stmt_info
)
10779 stmt_vec_info next_stmt_info
= first_stmt_info
;
10781 while (next_stmt_info
)
10783 stmt_vec_info tmp
= DR_GROUP_NEXT_ELEMENT (next_stmt_info
);
10784 next_stmt_info
= vect_orig_stmt (next_stmt_info
);
10785 /* Free the attached stmt_vec_info and remove the stmt. */
10786 vinfo
->remove_stmt (next_stmt_info
);
10787 next_stmt_info
= tmp
;
10791 /* If NUNITS is nonzero, return a vector type that contains NUNITS
10792 elements of type SCALAR_TYPE, or null if the target doesn't support
10795 If NUNITS is zero, return a vector type that contains elements of
10796 type SCALAR_TYPE, choosing whichever vector size the target prefers.
10798 If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
10799 for this vectorization region and want to "autodetect" the best choice.
10800 Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
10801 and we want the new type to be interoperable with it. PREVAILING_MODE
10802 in this case can be a scalar integer mode or a vector mode; when it
10803 is a vector mode, the function acts like a tree-level version of
10804 related_vector_mode. */
10807 get_related_vectype_for_scalar_type (machine_mode prevailing_mode
,
10808 tree scalar_type
, poly_uint64 nunits
)
10810 tree orig_scalar_type
= scalar_type
;
10811 scalar_mode inner_mode
;
10812 machine_mode simd_mode
;
10815 if (!is_int_mode (TYPE_MODE (scalar_type
), &inner_mode
)
10816 && !is_float_mode (TYPE_MODE (scalar_type
), &inner_mode
))
10819 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
10821 /* For vector types of elements whose mode precision doesn't
10822 match their types precision we use a element type of mode
10823 precision. The vectorization routines will have to make sure
10824 they support the proper result truncation/extension.
10825 We also make sure to build vector types with INTEGER_TYPE
10826 component type only. */
10827 if (INTEGRAL_TYPE_P (scalar_type
)
10828 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
10829 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
10830 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
10831 TYPE_UNSIGNED (scalar_type
));
10833 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
10834 When the component mode passes the above test simply use a type
10835 corresponding to that mode. The theory is that any use that
10836 would cause problems with this will disable vectorization anyway. */
10837 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
10838 && !INTEGRAL_TYPE_P (scalar_type
))
10839 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
10841 /* We can't build a vector type of elements with alignment bigger than
10843 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
10844 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
10845 TYPE_UNSIGNED (scalar_type
));
10847 /* If we felt back to using the mode fail if there was
10848 no scalar type for it. */
10849 if (scalar_type
== NULL_TREE
)
10852 /* If no prevailing mode was supplied, use the mode the target prefers.
10853 Otherwise lookup a vector mode based on the prevailing mode. */
10854 if (prevailing_mode
== VOIDmode
)
10856 gcc_assert (known_eq (nunits
, 0U));
10857 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
10858 if (SCALAR_INT_MODE_P (simd_mode
))
10860 /* Traditional behavior is not to take the integer mode
10861 literally, but simply to use it as a way of determining
10862 the vector size. It is up to mode_for_vector to decide
10863 what the TYPE_MODE should be.
10865 Note that nunits == 1 is allowed in order to support single
10866 element vector types. */
10867 if (!multiple_p (GET_MODE_SIZE (simd_mode
), nbytes
, &nunits
)
10868 || !mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
10872 else if (SCALAR_INT_MODE_P (prevailing_mode
)
10873 || !related_vector_mode (prevailing_mode
,
10874 inner_mode
, nunits
).exists (&simd_mode
))
10876 /* Fall back to using mode_for_vector, mostly in the hope of being
10877 able to use an integer mode. */
10878 if (known_eq (nunits
, 0U)
10879 && !multiple_p (GET_MODE_SIZE (prevailing_mode
), nbytes
, &nunits
))
10882 if (!mode_for_vector (inner_mode
, nunits
).exists (&simd_mode
))
10886 vectype
= build_vector_type_for_mode (scalar_type
, simd_mode
);
10888 /* In cases where the mode was chosen by mode_for_vector, check that
10889 the target actually supports the chosen mode, or that it at least
10890 allows the vector mode to be replaced by a like-sized integer. */
10891 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
10892 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
10895 /* Re-attach the address-space qualifier if we canonicalized the scalar
10897 if (TYPE_ADDR_SPACE (orig_scalar_type
) != TYPE_ADDR_SPACE (vectype
))
10898 return build_qualified_type
10899 (vectype
, KEEP_QUAL_ADDR_SPACE (TYPE_QUALS (orig_scalar_type
)));
10904 /* Function get_vectype_for_scalar_type.
10906 Returns the vector type corresponding to SCALAR_TYPE as supported
10907 by the target. If GROUP_SIZE is nonzero and we're performing BB
10908 vectorization, make sure that the number of elements in the vector
10909 is no bigger than GROUP_SIZE. */
10912 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
10913 unsigned int group_size
)
10915 /* For BB vectorization, we should always have a group size once we've
10916 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
10917 are tentative requests during things like early data reference
10918 analysis and pattern recognition. */
10919 if (is_a
<bb_vec_info
> (vinfo
))
10920 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
10924 tree vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
10926 if (vectype
&& vinfo
->vector_mode
== VOIDmode
)
10927 vinfo
->vector_mode
= TYPE_MODE (vectype
);
10929 /* Register the natural choice of vector type, before the group size
10930 has been applied. */
10932 vinfo
->used_vector_modes
.add (TYPE_MODE (vectype
));
10934 /* If the natural choice of vector type doesn't satisfy GROUP_SIZE,
10935 try again with an explicit number of elements. */
10938 && maybe_ge (TYPE_VECTOR_SUBPARTS (vectype
), group_size
))
10940 /* Start with the biggest number of units that fits within
10941 GROUP_SIZE and halve it until we find a valid vector type.
10942 Usually either the first attempt will succeed or all will
10943 fail (in the latter case because GROUP_SIZE is too small
10944 for the target), but it's possible that a target could have
10945 a hole between supported vector types.
10947 If GROUP_SIZE is not a power of 2, this has the effect of
10948 trying the largest power of 2 that fits within the group,
10949 even though the group is not a multiple of that vector size.
10950 The BB vectorizer will then try to carve up the group into
10952 unsigned int nunits
= 1 << floor_log2 (group_size
);
10955 vectype
= get_related_vectype_for_scalar_type (vinfo
->vector_mode
,
10956 scalar_type
, nunits
);
10959 while (nunits
> 1 && !vectype
);
10965 /* Return the vector type corresponding to SCALAR_TYPE as supported
10966 by the target. NODE, if nonnull, is the SLP tree node that will
10967 use the returned vector type. */
10970 get_vectype_for_scalar_type (vec_info
*vinfo
, tree scalar_type
, slp_tree node
)
10972 unsigned int group_size
= 0;
10974 group_size
= SLP_TREE_LANES (node
);
10975 return get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
10978 /* Function get_mask_type_for_scalar_type.
10980 Returns the mask type corresponding to a result of comparison
10981 of vectors of specified SCALAR_TYPE as supported by target.
10982 If GROUP_SIZE is nonzero and we're performing BB vectorization,
10983 make sure that the number of elements in the vector is no bigger
10984 than GROUP_SIZE. */
10987 get_mask_type_for_scalar_type (vec_info
*vinfo
, tree scalar_type
,
10988 unsigned int group_size
)
10990 tree vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
10995 return truth_type_for (vectype
);
10998 /* Function get_same_sized_vectype
11000 Returns a vector type corresponding to SCALAR_TYPE of size
11001 VECTOR_TYPE if supported by the target. */
11004 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
11006 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
11007 return truth_type_for (vector_type
);
11009 poly_uint64 nunits
;
11010 if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type
)),
11011 GET_MODE_SIZE (TYPE_MODE (scalar_type
)), &nunits
))
11014 return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type
),
11015 scalar_type
, nunits
);
11018 /* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
11019 would not change the chosen vector modes. */
11022 vect_chooses_same_modes_p (vec_info
*vinfo
, machine_mode vector_mode
)
11024 for (vec_info::mode_set::iterator i
= vinfo
->used_vector_modes
.begin ();
11025 i
!= vinfo
->used_vector_modes
.end (); ++i
)
11026 if (!VECTOR_MODE_P (*i
)
11027 || related_vector_mode (vector_mode
, GET_MODE_INNER (*i
), 0) != *i
)
11032 /* Function vect_is_simple_use.
11035 VINFO - the vect info of the loop or basic block that is being vectorized.
11036 OPERAND - operand in the loop or bb.
11038 DEF_STMT_INFO_OUT (optional) - information about the defining stmt in
11039 case OPERAND is an SSA_NAME that is defined in the vectorizable region
11040 DEF_STMT_OUT (optional) - the defining stmt in case OPERAND is an SSA_NAME;
11041 the definition could be anywhere in the function
11042 DT - the type of definition
11044 Returns whether a stmt with OPERAND can be vectorized.
11045 For loops, supportable operands are constants, loop invariants, and operands
11046 that are defined by the current iteration of the loop. Unsupportable
11047 operands are those that are defined by a previous iteration of the loop (as
11048 is the case in reduction/induction computations).
11049 For basic blocks, supportable operands are constants and bb invariants.
11050 For now, operands defined outside the basic block are not supported. */
11053 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11054 stmt_vec_info
*def_stmt_info_out
, gimple
**def_stmt_out
)
11056 if (def_stmt_info_out
)
11057 *def_stmt_info_out
= NULL
;
11059 *def_stmt_out
= NULL
;
11060 *dt
= vect_unknown_def_type
;
11062 if (dump_enabled_p ())
11064 dump_printf_loc (MSG_NOTE
, vect_location
,
11065 "vect_is_simple_use: operand ");
11066 if (TREE_CODE (operand
) == SSA_NAME
11067 && !SSA_NAME_IS_DEFAULT_DEF (operand
))
11068 dump_gimple_expr (MSG_NOTE
, TDF_SLIM
, SSA_NAME_DEF_STMT (operand
), 0);
11070 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
11073 if (CONSTANT_CLASS_P (operand
))
11074 *dt
= vect_constant_def
;
11075 else if (is_gimple_min_invariant (operand
))
11076 *dt
= vect_external_def
;
11077 else if (TREE_CODE (operand
) != SSA_NAME
)
11078 *dt
= vect_unknown_def_type
;
11079 else if (SSA_NAME_IS_DEFAULT_DEF (operand
))
11080 *dt
= vect_external_def
;
11083 gimple
*def_stmt
= SSA_NAME_DEF_STMT (operand
);
11084 stmt_vec_info stmt_vinfo
= vinfo
->lookup_def (operand
);
11086 *dt
= vect_external_def
;
11089 stmt_vinfo
= vect_stmt_to_vectorize (stmt_vinfo
);
11090 def_stmt
= stmt_vinfo
->stmt
;
11091 switch (gimple_code (def_stmt
))
11094 case GIMPLE_ASSIGN
:
11096 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
11099 *dt
= vect_unknown_def_type
;
11102 if (def_stmt_info_out
)
11103 *def_stmt_info_out
= stmt_vinfo
;
11106 *def_stmt_out
= def_stmt
;
11109 if (dump_enabled_p ())
11111 dump_printf (MSG_NOTE
, ", type of def: ");
11114 case vect_uninitialized_def
:
11115 dump_printf (MSG_NOTE
, "uninitialized\n");
11117 case vect_constant_def
:
11118 dump_printf (MSG_NOTE
, "constant\n");
11120 case vect_external_def
:
11121 dump_printf (MSG_NOTE
, "external\n");
11123 case vect_internal_def
:
11124 dump_printf (MSG_NOTE
, "internal\n");
11126 case vect_induction_def
:
11127 dump_printf (MSG_NOTE
, "induction\n");
11129 case vect_reduction_def
:
11130 dump_printf (MSG_NOTE
, "reduction\n");
11132 case vect_double_reduction_def
:
11133 dump_printf (MSG_NOTE
, "double reduction\n");
11135 case vect_nested_cycle
:
11136 dump_printf (MSG_NOTE
, "nested cycle\n");
11138 case vect_unknown_def_type
:
11139 dump_printf (MSG_NOTE
, "unknown\n");
11144 if (*dt
== vect_unknown_def_type
)
11146 if (dump_enabled_p ())
11147 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
11148 "Unsupported pattern.\n");
11155 /* Function vect_is_simple_use.
11157 Same as vect_is_simple_use but also determines the vector operand
11158 type of OPERAND and stores it to *VECTYPE. If the definition of
11159 OPERAND is vect_uninitialized_def, vect_constant_def or
11160 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
11161 is responsible to compute the best suited vector type for the
11165 vect_is_simple_use (tree operand
, vec_info
*vinfo
, enum vect_def_type
*dt
,
11166 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
,
11167 gimple
**def_stmt_out
)
11169 stmt_vec_info def_stmt_info
;
11171 if (!vect_is_simple_use (operand
, vinfo
, dt
, &def_stmt_info
, &def_stmt
))
11175 *def_stmt_out
= def_stmt
;
11176 if (def_stmt_info_out
)
11177 *def_stmt_info_out
= def_stmt_info
;
11179 /* Now get a vector type if the def is internal, otherwise supply
11180 NULL_TREE and leave it up to the caller to figure out a proper
11181 type for the use stmt. */
11182 if (*dt
== vect_internal_def
11183 || *dt
== vect_induction_def
11184 || *dt
== vect_reduction_def
11185 || *dt
== vect_double_reduction_def
11186 || *dt
== vect_nested_cycle
)
11188 *vectype
= STMT_VINFO_VECTYPE (def_stmt_info
);
11189 gcc_assert (*vectype
!= NULL_TREE
);
11190 if (dump_enabled_p ())
11191 dump_printf_loc (MSG_NOTE
, vect_location
,
11192 "vect_is_simple_use: vectype %T\n", *vectype
);
11194 else if (*dt
== vect_uninitialized_def
11195 || *dt
== vect_constant_def
11196 || *dt
== vect_external_def
)
11197 *vectype
= NULL_TREE
;
11199 gcc_unreachable ();
11204 /* Function vect_is_simple_use.
11206 Same as vect_is_simple_use but determines the operand by operand
11207 position OPERAND from either STMT or SLP_NODE, filling in *OP
11208 and *SLP_DEF (when SLP_NODE is not NULL). */
11211 vect_is_simple_use (vec_info
*vinfo
, stmt_vec_info stmt
, slp_tree slp_node
,
11212 unsigned operand
, tree
*op
, slp_tree
*slp_def
,
11213 enum vect_def_type
*dt
,
11214 tree
*vectype
, stmt_vec_info
*def_stmt_info_out
)
11218 slp_tree child
= SLP_TREE_CHILDREN (slp_node
)[operand
];
11220 if (SLP_TREE_DEF_TYPE (child
) == vect_internal_def
)
11221 *op
= gimple_get_lhs (SLP_TREE_REPRESENTATIVE (child
)->stmt
);
11224 if (def_stmt_info_out
)
11225 *def_stmt_info_out
= NULL
;
11226 *op
= SLP_TREE_SCALAR_OPS (child
)[0];
11227 *dt
= SLP_TREE_DEF_TYPE (child
);
11228 *vectype
= SLP_TREE_VECTYPE (child
);
11234 if (gassign
*ass
= dyn_cast
<gassign
*> (stmt
->stmt
))
11236 if (gimple_assign_rhs_code (ass
) == COND_EXPR
11237 && COMPARISON_CLASS_P (gimple_assign_rhs1 (ass
)))
11240 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), operand
);
11242 *op
= gimple_op (ass
, operand
);
11244 else if (gimple_assign_rhs_code (ass
) == VIEW_CONVERT_EXPR
)
11245 *op
= TREE_OPERAND (gimple_assign_rhs1 (ass
), 0);
11247 *op
= gimple_op (ass
, operand
+ 1);
11249 else if (gcall
*call
= dyn_cast
<gcall
*> (stmt
->stmt
))
11251 if (gimple_call_internal_p (call
)
11252 && internal_store_fn_p (gimple_call_internal_fn (call
)))
11253 operand
= internal_fn_stored_value_index (gimple_call_internal_fn
11255 *op
= gimple_call_arg (call
, operand
);
11258 gcc_unreachable ();
11261 /* ??? We might want to update *vectype from *slp_def here though
11262 when sharing nodes this would prevent unsharing in the caller. */
11263 return vect_is_simple_use (*op
, vinfo
, dt
, vectype
, def_stmt_info_out
);
11266 /* If OP is not NULL and is external or constant update its vector
11267 type with VECTYPE. Returns true if successful or false if not,
11268 for example when conflicting vector types are present. */
11271 vect_maybe_update_slp_op_vectype (slp_tree op
, tree vectype
)
11273 if (!op
|| SLP_TREE_DEF_TYPE (op
) == vect_internal_def
)
11275 if (SLP_TREE_VECTYPE (op
))
11276 return types_compatible_p (SLP_TREE_VECTYPE (op
), vectype
);
11277 SLP_TREE_VECTYPE (op
) = vectype
;
11281 /* Function supportable_widening_operation
11283 Check whether an operation represented by the code CODE is a
11284 widening operation that is supported by the target platform in
11285 vector form (i.e., when operating on arguments of type VECTYPE_IN
11286 producing a result of type VECTYPE_OUT).
11288 Widening operations we currently support are NOP (CONVERT), FLOAT,
11289 FIX_TRUNC and WIDEN_MULT. This function checks if these operations
11290 are supported by the target platform either directly (via vector
11291 tree-codes), or via target builtins.
11294 - CODE1 and CODE2 are codes of vector operations to be used when
11295 vectorizing the operation, if available.
11296 - MULTI_STEP_CVT determines the number of required intermediate steps in
11297 case of multi-step conversion (like char->short->int - in that case
11298 MULTI_STEP_CVT will be 1).
11299 - INTERM_TYPES contains the intermediate type required to perform the
11300 widening operation (short in the above example). */
11303 supportable_widening_operation (vec_info
*vinfo
,
11304 enum tree_code code
, stmt_vec_info stmt_info
,
11305 tree vectype_out
, tree vectype_in
,
11306 enum tree_code
*code1
, enum tree_code
*code2
,
11307 int *multi_step_cvt
,
11308 vec
<tree
> *interm_types
)
11310 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
11311 class loop
*vect_loop
= NULL
;
11312 machine_mode vec_mode
;
11313 enum insn_code icode1
, icode2
;
11314 optab optab1
, optab2
;
11315 tree vectype
= vectype_in
;
11316 tree wide_vectype
= vectype_out
;
11317 enum tree_code c1
, c2
;
11319 tree prev_type
, intermediate_type
;
11320 machine_mode intermediate_mode
, prev_mode
;
11321 optab optab3
, optab4
;
11323 *multi_step_cvt
= 0;
11325 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
11329 case WIDEN_MULT_EXPR
:
11330 /* The result of a vectorized widening operation usually requires
11331 two vectors (because the widened results do not fit into one vector).
11332 The generated vector results would normally be expected to be
11333 generated in the same order as in the original scalar computation,
11334 i.e. if 8 results are generated in each vector iteration, they are
11335 to be organized as follows:
11336 vect1: [res1,res2,res3,res4],
11337 vect2: [res5,res6,res7,res8].
11339 However, in the special case that the result of the widening
11340 operation is used in a reduction computation only, the order doesn't
11341 matter (because when vectorizing a reduction we change the order of
11342 the computation). Some targets can take advantage of this and
11343 generate more efficient code. For example, targets like Altivec,
11344 that support widen_mult using a sequence of {mult_even,mult_odd}
11345 generate the following vectors:
11346 vect1: [res1,res3,res5,res7],
11347 vect2: [res2,res4,res6,res8].
11349 When vectorizing outer-loops, we execute the inner-loop sequentially
11350 (each vectorized inner-loop iteration contributes to VF outer-loop
11351 iterations in parallel). We therefore don't allow to change the
11352 order of the computation in the inner-loop during outer-loop
11354 /* TODO: Another case in which order doesn't *really* matter is when we
11355 widen and then contract again, e.g. (short)((int)x * y >> 8).
11356 Normally, pack_trunc performs an even/odd permute, whereas the
11357 repack from an even/odd expansion would be an interleave, which
11358 would be significantly simpler for e.g. AVX2. */
11359 /* In any case, in order to avoid duplicating the code below, recurse
11360 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
11361 are properly set up for the caller. If we fail, we'll continue with
11362 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
11364 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
11365 && !nested_in_vect_loop_p (vect_loop
, stmt_info
)
11366 && supportable_widening_operation (vinfo
, VEC_WIDEN_MULT_EVEN_EXPR
,
11367 stmt_info
, vectype_out
,
11368 vectype_in
, code1
, code2
,
11369 multi_step_cvt
, interm_types
))
11371 /* Elements in a vector with vect_used_by_reduction property cannot
11372 be reordered if the use chain with this property does not have the
11373 same operation. One such an example is s += a * b, where elements
11374 in a and b cannot be reordered. Here we check if the vector defined
11375 by STMT is only directly used in the reduction statement. */
11376 tree lhs
= gimple_assign_lhs (stmt_info
->stmt
);
11377 stmt_vec_info use_stmt_info
= loop_info
->lookup_single_use (lhs
);
11379 && STMT_VINFO_DEF_TYPE (use_stmt_info
) == vect_reduction_def
)
11382 c1
= VEC_WIDEN_MULT_LO_EXPR
;
11383 c2
= VEC_WIDEN_MULT_HI_EXPR
;
11386 case DOT_PROD_EXPR
:
11387 c1
= DOT_PROD_EXPR
;
11388 c2
= DOT_PROD_EXPR
;
11396 case VEC_WIDEN_MULT_EVEN_EXPR
:
11397 /* Support the recursion induced just above. */
11398 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
11399 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
11402 case WIDEN_LSHIFT_EXPR
:
11403 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
11404 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
11408 c1
= VEC_UNPACK_LO_EXPR
;
11409 c2
= VEC_UNPACK_HI_EXPR
;
11413 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
11414 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
11417 case FIX_TRUNC_EXPR
:
11418 c1
= VEC_UNPACK_FIX_TRUNC_LO_EXPR
;
11419 c2
= VEC_UNPACK_FIX_TRUNC_HI_EXPR
;
11423 gcc_unreachable ();
11426 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
11427 std::swap (c1
, c2
);
11429 if (code
== FIX_TRUNC_EXPR
)
11431 /* The signedness is determined from output operand. */
11432 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11433 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
11435 else if (CONVERT_EXPR_CODE_P (code
)
11436 && VECTOR_BOOLEAN_TYPE_P (wide_vectype
)
11437 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11438 && TYPE_MODE (wide_vectype
) == TYPE_MODE (vectype
)
11439 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11441 /* If the input and result modes are the same, a different optab
11442 is needed where we pass in the number of units in vectype. */
11443 optab1
= vec_unpacks_sbool_lo_optab
;
11444 optab2
= vec_unpacks_sbool_hi_optab
;
11448 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11449 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
11452 if (!optab1
|| !optab2
)
11455 vec_mode
= TYPE_MODE (vectype
);
11456 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
11457 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
11463 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11464 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11466 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11468 /* For scalar masks we may have different boolean
11469 vector types having the same QImode. Thus we
11470 add additional check for elements number. */
11471 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
),
11472 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11476 /* Check if it's a multi-step conversion that can be done using intermediate
11479 prev_type
= vectype
;
11480 prev_mode
= vec_mode
;
11482 if (!CONVERT_EXPR_CODE_P (code
))
11485 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11486 intermediate steps in promotion sequence. We try
11487 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
11489 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11490 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11492 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11493 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11495 = vect_halve_mask_nunits (prev_type
, intermediate_mode
);
11498 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
11499 TYPE_UNSIGNED (prev_type
));
11501 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11502 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11503 && intermediate_mode
== prev_mode
11504 && SCALAR_INT_MODE_P (prev_mode
))
11506 /* If the input and result modes are the same, a different optab
11507 is needed where we pass in the number of units in vectype. */
11508 optab3
= vec_unpacks_sbool_lo_optab
;
11509 optab4
= vec_unpacks_sbool_hi_optab
;
11513 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11514 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
11517 if (!optab3
|| !optab4
11518 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
11519 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11520 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
11521 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
11522 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
11523 == CODE_FOR_nothing
)
11524 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
11525 == CODE_FOR_nothing
))
11528 interm_types
->quick_push (intermediate_type
);
11529 (*multi_step_cvt
)++;
11531 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
11532 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
11534 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11536 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
),
11537 TYPE_VECTOR_SUBPARTS (wide_vectype
) * 2))
11541 prev_type
= intermediate_type
;
11542 prev_mode
= intermediate_mode
;
11545 interm_types
->release ();
11550 /* Function supportable_narrowing_operation
11552 Check whether an operation represented by the code CODE is a
11553 narrowing operation that is supported by the target platform in
11554 vector form (i.e., when operating on arguments of type VECTYPE_IN
11555 and producing a result of type VECTYPE_OUT).
11557 Narrowing operations we currently support are NOP (CONVERT), FIX_TRUNC
11558 and FLOAT. This function checks if these operations are supported by
11559 the target platform directly via vector tree-codes.
11562 - CODE1 is the code of a vector operation to be used when
11563 vectorizing the operation, if available.
11564 - MULTI_STEP_CVT determines the number of required intermediate steps in
11565 case of multi-step conversion (like int->short->char - in that case
11566 MULTI_STEP_CVT will be 1).
11567 - INTERM_TYPES contains the intermediate type required to perform the
11568 narrowing operation (short in the above example). */
11571 supportable_narrowing_operation (enum tree_code code
,
11572 tree vectype_out
, tree vectype_in
,
11573 enum tree_code
*code1
, int *multi_step_cvt
,
11574 vec
<tree
> *interm_types
)
11576 machine_mode vec_mode
;
11577 enum insn_code icode1
;
11578 optab optab1
, interm_optab
;
11579 tree vectype
= vectype_in
;
11580 tree narrow_vectype
= vectype_out
;
11582 tree intermediate_type
, prev_type
;
11583 machine_mode intermediate_mode
, prev_mode
;
11587 *multi_step_cvt
= 0;
11591 c1
= VEC_PACK_TRUNC_EXPR
;
11592 if (VECTOR_BOOLEAN_TYPE_P (narrow_vectype
)
11593 && VECTOR_BOOLEAN_TYPE_P (vectype
)
11594 && TYPE_MODE (narrow_vectype
) == TYPE_MODE (vectype
)
11595 && SCALAR_INT_MODE_P (TYPE_MODE (vectype
)))
11596 optab1
= vec_pack_sbool_trunc_optab
;
11598 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11601 case FIX_TRUNC_EXPR
:
11602 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
11603 /* The signedness is determined from output operand. */
11604 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
11608 c1
= VEC_PACK_FLOAT_EXPR
;
11609 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
11613 gcc_unreachable ();
11619 vec_mode
= TYPE_MODE (vectype
);
11620 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
11625 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11627 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11629 /* For scalar masks we may have different boolean
11630 vector types having the same QImode. Thus we
11631 add additional check for elements number. */
11632 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype
) * 2,
11633 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11637 if (code
== FLOAT_EXPR
)
11640 /* Check if it's a multi-step conversion that can be done using intermediate
11642 prev_mode
= vec_mode
;
11643 prev_type
= vectype
;
11644 if (code
== FIX_TRUNC_EXPR
)
11645 uns
= TYPE_UNSIGNED (vectype_out
);
11647 uns
= TYPE_UNSIGNED (vectype
);
11649 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
11650 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
11651 costly than signed. */
11652 if (code
== FIX_TRUNC_EXPR
&& uns
)
11654 enum insn_code icode2
;
11657 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
11659 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
11660 if (interm_optab
!= unknown_optab
11661 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
11662 && insn_data
[icode1
].operand
[0].mode
11663 == insn_data
[icode2
].operand
[0].mode
)
11666 optab1
= interm_optab
;
11671 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
11672 intermediate steps in promotion sequence. We try
11673 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
11674 interm_types
->create (MAX_INTERM_CVT_STEPS
);
11675 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
11677 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
11678 if (VECTOR_BOOLEAN_TYPE_P (prev_type
))
11680 = vect_double_mask_nunits (prev_type
, intermediate_mode
);
11683 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
11684 if (VECTOR_BOOLEAN_TYPE_P (intermediate_type
)
11685 && VECTOR_BOOLEAN_TYPE_P (prev_type
)
11686 && intermediate_mode
== prev_mode
11687 && SCALAR_INT_MODE_P (prev_mode
))
11688 interm_optab
= vec_pack_sbool_trunc_optab
;
11691 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
11694 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
11695 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
11696 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
11697 == CODE_FOR_nothing
))
11700 interm_types
->quick_push (intermediate_type
);
11701 (*multi_step_cvt
)++;
11703 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
11705 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11707 if (known_eq (TYPE_VECTOR_SUBPARTS (intermediate_type
) * 2,
11708 TYPE_VECTOR_SUBPARTS (narrow_vectype
)))
11712 prev_mode
= intermediate_mode
;
11713 prev_type
= intermediate_type
;
11714 optab1
= interm_optab
;
11717 interm_types
->release ();
11721 /* Generate and return a statement that sets vector mask MASK such that
11722 MASK[I] is true iff J + START_INDEX < END_INDEX for all J <= I. */
11725 vect_gen_while (tree mask
, tree start_index
, tree end_index
)
11727 tree cmp_type
= TREE_TYPE (start_index
);
11728 tree mask_type
= TREE_TYPE (mask
);
11729 gcc_checking_assert (direct_internal_fn_supported_p (IFN_WHILE_ULT
,
11730 cmp_type
, mask_type
,
11731 OPTIMIZE_FOR_SPEED
));
11732 gcall
*call
= gimple_build_call_internal (IFN_WHILE_ULT
, 3,
11733 start_index
, end_index
,
11734 build_zero_cst (mask_type
));
11735 gimple_call_set_lhs (call
, mask
);
11739 /* Generate a vector mask of type MASK_TYPE for which index I is false iff
11740 J + START_INDEX < END_INDEX for all J <= I. Add the statements to SEQ. */
11743 vect_gen_while_not (gimple_seq
*seq
, tree mask_type
, tree start_index
,
11746 tree tmp
= make_ssa_name (mask_type
);
11747 gcall
*call
= vect_gen_while (tmp
, start_index
, end_index
);
11748 gimple_seq_add_stmt (seq
, call
);
11749 return gimple_build (seq
, BIT_NOT_EXPR
, mask_type
, tmp
);
11752 /* Try to compute the vector types required to vectorize STMT_INFO,
11753 returning true on success and false if vectorization isn't possible.
11754 If GROUP_SIZE is nonzero and we're performing BB vectorization,
11755 take sure that the number of elements in the vectors is no bigger
11760 - Set *STMT_VECTYPE_OUT to:
11761 - NULL_TREE if the statement doesn't need to be vectorized;
11762 - the equivalent of STMT_VINFO_VECTYPE otherwise.
11764 - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
11765 number of units needed to vectorize STMT_INFO, or NULL_TREE if the
11766 statement does not help to determine the overall number of units. */
11769 vect_get_vector_types_for_stmt (vec_info
*vinfo
, stmt_vec_info stmt_info
,
11770 tree
*stmt_vectype_out
,
11771 tree
*nunits_vectype_out
,
11772 unsigned int group_size
)
11774 gimple
*stmt
= stmt_info
->stmt
;
11776 /* For BB vectorization, we should always have a group size once we've
11777 constructed the SLP tree; the only valid uses of zero GROUP_SIZEs
11778 are tentative requests during things like early data reference
11779 analysis and pattern recognition. */
11780 if (is_a
<bb_vec_info
> (vinfo
))
11781 gcc_assert (vinfo
->slp_instances
.is_empty () || group_size
!= 0);
11785 *stmt_vectype_out
= NULL_TREE
;
11786 *nunits_vectype_out
= NULL_TREE
;
11788 if (gimple_get_lhs (stmt
) == NULL_TREE
11789 /* MASK_STORE has no lhs, but is ok. */
11790 && !gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11792 if (is_a
<gcall
*> (stmt
))
11794 /* Ignore calls with no lhs. These must be calls to
11795 #pragma omp simd functions, and what vectorization factor
11796 it really needs can't be determined until
11797 vectorizable_simd_clone_call. */
11798 if (dump_enabled_p ())
11799 dump_printf_loc (MSG_NOTE
, vect_location
,
11800 "defer to SIMD clone analysis.\n");
11801 return opt_result::success ();
11804 return opt_result::failure_at (stmt
,
11805 "not vectorized: irregular stmt.%G", stmt
);
11808 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))))
11809 return opt_result::failure_at (stmt
,
11810 "not vectorized: vector stmt in loop:%G",
11814 tree scalar_type
= NULL_TREE
;
11815 if (group_size
== 0 && STMT_VINFO_VECTYPE (stmt_info
))
11817 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
11818 if (dump_enabled_p ())
11819 dump_printf_loc (MSG_NOTE
, vect_location
,
11820 "precomputed vectype: %T\n", vectype
);
11822 else if (vect_use_mask_type_p (stmt_info
))
11824 unsigned int precision
= stmt_info
->mask_precision
;
11825 scalar_type
= build_nonstandard_integer_type (precision
, 1);
11826 vectype
= get_mask_type_for_scalar_type (vinfo
, scalar_type
, group_size
);
11828 return opt_result::failure_at (stmt
, "not vectorized: unsupported"
11829 " data-type %T\n", scalar_type
);
11830 if (dump_enabled_p ())
11831 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11835 if (data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
))
11836 scalar_type
= TREE_TYPE (DR_REF (dr
));
11837 else if (gimple_call_internal_p (stmt
, IFN_MASK_STORE
))
11838 scalar_type
= TREE_TYPE (gimple_call_arg (stmt
, 3));
11840 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
11842 if (dump_enabled_p ())
11845 dump_printf_loc (MSG_NOTE
, vect_location
,
11846 "get vectype for scalar type (group size %d):"
11847 " %T\n", group_size
, scalar_type
);
11849 dump_printf_loc (MSG_NOTE
, vect_location
,
11850 "get vectype for scalar type: %T\n", scalar_type
);
11852 vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
, group_size
);
11854 return opt_result::failure_at (stmt
,
11856 " unsupported data-type %T\n",
11859 if (dump_enabled_p ())
11860 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: %T\n", vectype
);
11862 *stmt_vectype_out
= vectype
;
11864 /* Don't try to compute scalar types if the stmt produces a boolean
11865 vector; use the existing vector type instead. */
11866 tree nunits_vectype
= vectype
;
11867 if (!VECTOR_BOOLEAN_TYPE_P (vectype
))
11869 /* The number of units is set according to the smallest scalar
11870 type (or the largest vector size, but we only support one
11871 vector size per vectorization). */
11872 HOST_WIDE_INT dummy
;
11873 scalar_type
= vect_get_smallest_scalar_type (stmt_info
, &dummy
, &dummy
);
11874 if (scalar_type
!= TREE_TYPE (vectype
))
11876 if (dump_enabled_p ())
11877 dump_printf_loc (MSG_NOTE
, vect_location
,
11878 "get vectype for smallest scalar type: %T\n",
11880 nunits_vectype
= get_vectype_for_scalar_type (vinfo
, scalar_type
,
11882 if (!nunits_vectype
)
11883 return opt_result::failure_at
11884 (stmt
, "not vectorized: unsupported data-type %T\n",
11886 if (dump_enabled_p ())
11887 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits vectype: %T\n",
11892 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype
),
11893 TYPE_VECTOR_SUBPARTS (*stmt_vectype_out
)));
11895 if (dump_enabled_p ())
11897 dump_printf_loc (MSG_NOTE
, vect_location
, "nunits = ");
11898 dump_dec (MSG_NOTE
, TYPE_VECTOR_SUBPARTS (nunits_vectype
));
11899 dump_printf (MSG_NOTE
, "\n");
11902 *nunits_vectype_out
= nunits_vectype
;
11903 return opt_result::success ();