1 /* Statement Analysis and Transformation for Vectorization
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "stor-layout.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-ssa.h"
39 #include "tree-phinodes.h"
40 #include "ssa-iterators.h"
41 #include "stringpool.h"
42 #include "tree-ssanames.h"
43 #include "tree-ssa-loop-manip.h"
46 #include "recog.h" /* FIXME: for insn_data */
48 #include "diagnostic-core.h"
49 #include "tree-vectorizer.h"
52 /* For lang_hooks.types.type_for_mode. */
53 #include "langhooks.h"
55 /* Return the vectorized type for the given statement. */
58 stmt_vectype (struct _stmt_vec_info
*stmt_info
)
60 return STMT_VINFO_VECTYPE (stmt_info
);
63 /* Return TRUE iff the given statement is in an inner loop relative to
64 the loop being vectorized. */
66 stmt_in_inner_loop_p (struct _stmt_vec_info
*stmt_info
)
68 gimple stmt
= STMT_VINFO_STMT (stmt_info
);
69 basic_block bb
= gimple_bb (stmt
);
70 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
76 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
78 return (bb
->loop_father
== loop
->inner
);
81 /* Record the cost of a statement, either by directly informing the
82 target model or by saving it in a vector for later processing.
83 Return a preliminary estimate of the statement's cost. */
86 record_stmt_cost (stmt_vector_for_cost
*body_cost_vec
, int count
,
87 enum vect_cost_for_stmt kind
, stmt_vec_info stmt_info
,
88 int misalign
, enum vect_cost_model_location where
)
92 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
93 add_stmt_info_to_vec (body_cost_vec
, count
, kind
,
94 stmt_info
? STMT_VINFO_STMT (stmt_info
) : NULL
,
97 (builtin_vectorization_cost (kind
, vectype
, misalign
) * count
);
102 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
103 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
104 void *target_cost_data
;
107 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
109 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
111 return add_stmt_cost (target_cost_data
, count
, kind
, stmt_info
,
116 /* Return a variable of type ELEM_TYPE[NELEMS]. */
119 create_vector_array (tree elem_type
, unsigned HOST_WIDE_INT nelems
)
121 return create_tmp_var (build_array_type_nelts (elem_type
, nelems
),
125 /* ARRAY is an array of vectors created by create_vector_array.
126 Return an SSA_NAME for the vector in index N. The reference
127 is part of the vectorization of STMT and the vector is associated
128 with scalar destination SCALAR_DEST. */
131 read_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree scalar_dest
,
132 tree array
, unsigned HOST_WIDE_INT n
)
134 tree vect_type
, vect
, vect_name
, array_ref
;
137 gcc_assert (TREE_CODE (TREE_TYPE (array
)) == ARRAY_TYPE
);
138 vect_type
= TREE_TYPE (TREE_TYPE (array
));
139 vect
= vect_create_destination_var (scalar_dest
, vect_type
);
140 array_ref
= build4 (ARRAY_REF
, vect_type
, array
,
141 build_int_cst (size_type_node
, n
),
142 NULL_TREE
, NULL_TREE
);
144 new_stmt
= gimple_build_assign (vect
, array_ref
);
145 vect_name
= make_ssa_name (vect
, new_stmt
);
146 gimple_assign_set_lhs (new_stmt
, vect_name
);
147 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
152 /* ARRAY is an array of vectors created by create_vector_array.
153 Emit code to store SSA_NAME VECT in index N of the array.
154 The store is part of the vectorization of STMT. */
157 write_vector_array (gimple stmt
, gimple_stmt_iterator
*gsi
, tree vect
,
158 tree array
, unsigned HOST_WIDE_INT n
)
163 array_ref
= build4 (ARRAY_REF
, TREE_TYPE (vect
), array
,
164 build_int_cst (size_type_node
, n
),
165 NULL_TREE
, NULL_TREE
);
167 new_stmt
= gimple_build_assign (array_ref
, vect
);
168 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
171 /* PTR is a pointer to an array of type TYPE. Return a representation
172 of *PTR. The memory reference replaces those in FIRST_DR
176 create_array_ref (tree type
, tree ptr
, struct data_reference
*first_dr
)
178 tree mem_ref
, alias_ptr_type
;
180 alias_ptr_type
= reference_alias_ptr_type (DR_REF (first_dr
));
181 mem_ref
= build2 (MEM_REF
, type
, ptr
, build_int_cst (alias_ptr_type
, 0));
182 /* Arrays have the same alignment as their type. */
183 set_ptr_info_alignment (get_ptr_info (ptr
), TYPE_ALIGN_UNIT (type
), 0);
187 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
189 /* Function vect_mark_relevant.
191 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
194 vect_mark_relevant (vec
<gimple
> *worklist
, gimple stmt
,
195 enum vect_relevant relevant
, bool live_p
,
196 bool used_in_pattern
)
198 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
199 enum vect_relevant save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
200 bool save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
203 if (dump_enabled_p ())
204 dump_printf_loc (MSG_NOTE
, vect_location
,
205 "mark relevant %d, live %d.\n", relevant
, live_p
);
207 /* If this stmt is an original stmt in a pattern, we might need to mark its
208 related pattern stmt instead of the original stmt. However, such stmts
209 may have their own uses that are not in any pattern, in such cases the
210 stmt itself should be marked. */
211 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
214 if (!used_in_pattern
)
216 imm_use_iterator imm_iter
;
220 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
221 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
223 if (is_gimple_assign (stmt
))
224 lhs
= gimple_assign_lhs (stmt
);
226 lhs
= gimple_call_lhs (stmt
);
228 /* This use is out of pattern use, if LHS has other uses that are
229 pattern uses, we should mark the stmt itself, and not the pattern
231 if (TREE_CODE (lhs
) == SSA_NAME
)
232 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
234 if (is_gimple_debug (USE_STMT (use_p
)))
236 use_stmt
= USE_STMT (use_p
);
238 if (!flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
241 if (vinfo_for_stmt (use_stmt
)
242 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt
)))
252 /* This is the last stmt in a sequence that was detected as a
253 pattern that can potentially be vectorized. Don't mark the stmt
254 as relevant/live because it's not going to be vectorized.
255 Instead mark the pattern-stmt that replaces it. */
257 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_NOTE
, vect_location
,
261 "last stmt in pattern. don't mark"
262 " relevant/live.\n");
263 stmt_info
= vinfo_for_stmt (pattern_stmt
);
264 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info
) == stmt
);
265 save_relevant
= STMT_VINFO_RELEVANT (stmt_info
);
266 save_live_p
= STMT_VINFO_LIVE_P (stmt_info
);
271 STMT_VINFO_LIVE_P (stmt_info
) |= live_p
;
272 if (relevant
> STMT_VINFO_RELEVANT (stmt_info
))
273 STMT_VINFO_RELEVANT (stmt_info
) = relevant
;
275 if (STMT_VINFO_RELEVANT (stmt_info
) == save_relevant
276 && STMT_VINFO_LIVE_P (stmt_info
) == save_live_p
)
278 if (dump_enabled_p ())
279 dump_printf_loc (MSG_NOTE
, vect_location
,
280 "already marked relevant/live.\n");
284 worklist
->safe_push (stmt
);
288 /* Function vect_stmt_relevant_p.
290 Return true if STMT in loop that is represented by LOOP_VINFO is
291 "relevant for vectorization".
293 A stmt is considered "relevant for vectorization" if:
294 - it has uses outside the loop.
295 - it has vdefs (it alters memory).
296 - control stmts in the loop (except for the exit condition).
298 CHECKME: what other side effects would the vectorizer allow? */
301 vect_stmt_relevant_p (gimple stmt
, loop_vec_info loop_vinfo
,
302 enum vect_relevant
*relevant
, bool *live_p
)
304 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
306 imm_use_iterator imm_iter
;
310 *relevant
= vect_unused_in_scope
;
313 /* cond stmt other than loop exit cond. */
314 if (is_ctrl_stmt (stmt
)
315 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt
))
316 != loop_exit_ctrl_vec_info_type
)
317 *relevant
= vect_used_in_scope
;
319 /* changing memory. */
320 if (gimple_code (stmt
) != GIMPLE_PHI
)
321 if (gimple_vdef (stmt
))
323 if (dump_enabled_p ())
324 dump_printf_loc (MSG_NOTE
, vect_location
,
325 "vec_stmt_relevant_p: stmt has vdefs.\n");
326 *relevant
= vect_used_in_scope
;
329 /* uses outside the loop. */
330 FOR_EACH_PHI_OR_STMT_DEF (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
332 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, DEF_FROM_PTR (def_p
))
334 basic_block bb
= gimple_bb (USE_STMT (use_p
));
335 if (!flow_bb_inside_loop_p (loop
, bb
))
337 if (dump_enabled_p ())
338 dump_printf_loc (MSG_NOTE
, vect_location
,
339 "vec_stmt_relevant_p: used out of loop.\n");
341 if (is_gimple_debug (USE_STMT (use_p
)))
344 /* We expect all such uses to be in the loop exit phis
345 (because of loop closed form) */
346 gcc_assert (gimple_code (USE_STMT (use_p
)) == GIMPLE_PHI
);
347 gcc_assert (bb
== single_exit (loop
)->dest
);
354 return (*live_p
|| *relevant
);
358 /* Function exist_non_indexing_operands_for_use_p
360 USE is one of the uses attached to STMT. Check if USE is
361 used in STMT for anything other than indexing an array. */
364 exist_non_indexing_operands_for_use_p (tree use
, gimple stmt
)
367 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
369 /* USE corresponds to some operand in STMT. If there is no data
370 reference in STMT, then any operand that corresponds to USE
371 is not indexing an array. */
372 if (!STMT_VINFO_DATA_REF (stmt_info
))
375 /* STMT has a data_ref. FORNOW this means that its of one of
379 (This should have been verified in analyze_data_refs).
381 'var' in the second case corresponds to a def, not a use,
382 so USE cannot correspond to any operands that are not used
385 Therefore, all we need to check is if STMT falls into the
386 first case, and whether var corresponds to USE. */
388 if (!gimple_assign_copy_p (stmt
))
390 if (TREE_CODE (gimple_assign_lhs (stmt
)) == SSA_NAME
)
392 operand
= gimple_assign_rhs1 (stmt
);
393 if (TREE_CODE (operand
) != SSA_NAME
)
404 Function process_use.
407 - a USE in STMT in a loop represented by LOOP_VINFO
408 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
409 that defined USE. This is done by calling mark_relevant and passing it
410 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
411 - FORCE is true if exist_non_indexing_operands_for_use_p check shouldn't
415 Generally, LIVE_P and RELEVANT are used to define the liveness and
416 relevance info of the DEF_STMT of this USE:
417 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
418 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
420 - case 1: If USE is used only for address computations (e.g. array indexing),
421 which does not need to be directly vectorized, then the liveness/relevance
422 of the respective DEF_STMT is left unchanged.
423 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
424 skip DEF_STMT cause it had already been processed.
425 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
426 be modified accordingly.
428 Return true if everything is as expected. Return false otherwise. */
431 process_use (gimple stmt
, tree use
, loop_vec_info loop_vinfo
, bool live_p
,
432 enum vect_relevant relevant
, vec
<gimple
> *worklist
,
435 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
436 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
437 stmt_vec_info dstmt_vinfo
;
438 basic_block bb
, def_bb
;
441 enum vect_def_type dt
;
443 /* case 1: we are only interested in uses that need to be vectorized. Uses
444 that are used for address computation are not considered relevant. */
445 if (!force
&& !exist_non_indexing_operands_for_use_p (use
, stmt
))
448 if (!vect_is_simple_use (use
, stmt
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
450 if (dump_enabled_p ())
451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
452 "not vectorized: unsupported use in stmt.\n");
456 if (!def_stmt
|| gimple_nop_p (def_stmt
))
459 def_bb
= gimple_bb (def_stmt
);
460 if (!flow_bb_inside_loop_p (loop
, def_bb
))
462 if (dump_enabled_p ())
463 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt is out of loop.\n");
467 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
468 DEF_STMT must have already been processed, because this should be the
469 only way that STMT, which is a reduction-phi, was put in the worklist,
470 as there should be no other uses for DEF_STMT in the loop. So we just
471 check that everything is as expected, and we are done. */
472 dstmt_vinfo
= vinfo_for_stmt (def_stmt
);
473 bb
= gimple_bb (stmt
);
474 if (gimple_code (stmt
) == GIMPLE_PHI
475 && STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
476 && gimple_code (def_stmt
) != GIMPLE_PHI
477 && STMT_VINFO_DEF_TYPE (dstmt_vinfo
) == vect_reduction_def
478 && bb
->loop_father
== def_bb
->loop_father
)
480 if (dump_enabled_p ())
481 dump_printf_loc (MSG_NOTE
, vect_location
,
482 "reduc-stmt defining reduc-phi in the same nest.\n");
483 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo
))
484 dstmt_vinfo
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo
));
485 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo
) < vect_used_by_reduction
);
486 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo
)
487 || STMT_VINFO_RELEVANT (dstmt_vinfo
) > vect_unused_in_scope
);
491 /* case 3a: outer-loop stmt defining an inner-loop stmt:
492 outer-loop-header-bb:
498 if (flow_loop_nested_p (def_bb
->loop_father
, bb
->loop_father
))
500 if (dump_enabled_p ())
501 dump_printf_loc (MSG_NOTE
, vect_location
,
502 "outer-loop def-stmt defining inner-loop stmt.\n");
506 case vect_unused_in_scope
:
507 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_nested_cycle
) ?
508 vect_used_in_scope
: vect_unused_in_scope
;
511 case vect_used_in_outer_by_reduction
:
512 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
513 relevant
= vect_used_by_reduction
;
516 case vect_used_in_outer
:
517 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
);
518 relevant
= vect_used_in_scope
;
521 case vect_used_in_scope
:
529 /* case 3b: inner-loop stmt defining an outer-loop stmt:
530 outer-loop-header-bb:
534 outer-loop-tail-bb (or outer-loop-exit-bb in double reduction):
536 else if (flow_loop_nested_p (bb
->loop_father
, def_bb
->loop_father
))
538 if (dump_enabled_p ())
539 dump_printf_loc (MSG_NOTE
, vect_location
,
540 "inner-loop def-stmt defining outer-loop stmt.\n");
544 case vect_unused_in_scope
:
545 relevant
= (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_reduction_def
546 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_double_reduction_def
) ?
547 vect_used_in_outer_by_reduction
: vect_unused_in_scope
;
550 case vect_used_by_reduction
:
551 relevant
= vect_used_in_outer_by_reduction
;
554 case vect_used_in_scope
:
555 relevant
= vect_used_in_outer
;
563 vect_mark_relevant (worklist
, def_stmt
, relevant
, live_p
,
564 is_pattern_stmt_p (stmt_vinfo
));
569 /* Function vect_mark_stmts_to_be_vectorized.
571 Not all stmts in the loop need to be vectorized. For example:
580 Stmt 1 and 3 do not need to be vectorized, because loop control and
581 addressing of vectorized data-refs are handled differently.
583 This pass detects such stmts. */
586 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo
)
588 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
589 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
590 unsigned int nbbs
= loop
->num_nodes
;
591 gimple_stmt_iterator si
;
594 stmt_vec_info stmt_vinfo
;
598 enum vect_relevant relevant
, tmp_relevant
;
599 enum vect_def_type def_type
;
601 if (dump_enabled_p ())
602 dump_printf_loc (MSG_NOTE
, vect_location
,
603 "=== vect_mark_stmts_to_be_vectorized ===\n");
605 stack_vec
<gimple
, 64> worklist
;
607 /* 1. Init worklist. */
608 for (i
= 0; i
< nbbs
; i
++)
611 for (si
= gsi_start_phis (bb
); !gsi_end_p (si
); gsi_next (&si
))
614 if (dump_enabled_p ())
616 dump_printf_loc (MSG_NOTE
, vect_location
, "init: phi relevant? ");
617 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, phi
, 0);
618 dump_printf (MSG_NOTE
, "\n");
621 if (vect_stmt_relevant_p (phi
, loop_vinfo
, &relevant
, &live_p
))
622 vect_mark_relevant (&worklist
, phi
, relevant
, live_p
, false);
624 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
626 stmt
= gsi_stmt (si
);
627 if (dump_enabled_p ())
629 dump_printf_loc (MSG_NOTE
, vect_location
, "init: stmt relevant? ");
630 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
631 dump_printf (MSG_NOTE
, "\n");
634 if (vect_stmt_relevant_p (stmt
, loop_vinfo
, &relevant
, &live_p
))
635 vect_mark_relevant (&worklist
, stmt
, relevant
, live_p
, false);
639 /* 2. Process_worklist */
640 while (worklist
.length () > 0)
645 stmt
= worklist
.pop ();
646 if (dump_enabled_p ())
648 dump_printf_loc (MSG_NOTE
, vect_location
, "worklist: examine stmt: ");
649 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
650 dump_printf (MSG_NOTE
, "\n");
653 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
654 (DEF_STMT) as relevant/irrelevant and live/dead according to the
655 liveness and relevance properties of STMT. */
656 stmt_vinfo
= vinfo_for_stmt (stmt
);
657 relevant
= STMT_VINFO_RELEVANT (stmt_vinfo
);
658 live_p
= STMT_VINFO_LIVE_P (stmt_vinfo
);
660 /* Generally, the liveness and relevance properties of STMT are
661 propagated as is to the DEF_STMTs of its USEs:
662 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
663 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
665 One exception is when STMT has been identified as defining a reduction
666 variable; in this case we set the liveness/relevance as follows:
668 relevant = vect_used_by_reduction
669 This is because we distinguish between two kinds of relevant stmts -
670 those that are used by a reduction computation, and those that are
671 (also) used by a regular computation. This allows us later on to
672 identify stmts that are used solely by a reduction, and therefore the
673 order of the results that they produce does not have to be kept. */
675 def_type
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
676 tmp_relevant
= relevant
;
679 case vect_reduction_def
:
680 switch (tmp_relevant
)
682 case vect_unused_in_scope
:
683 relevant
= vect_used_by_reduction
;
686 case vect_used_by_reduction
:
687 if (gimple_code (stmt
) == GIMPLE_PHI
)
692 if (dump_enabled_p ())
693 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
694 "unsupported use of reduction.\n");
701 case vect_nested_cycle
:
702 if (tmp_relevant
!= vect_unused_in_scope
703 && tmp_relevant
!= vect_used_in_outer_by_reduction
704 && tmp_relevant
!= vect_used_in_outer
)
706 if (dump_enabled_p ())
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
708 "unsupported use of nested cycle.\n");
716 case vect_double_reduction_def
:
717 if (tmp_relevant
!= vect_unused_in_scope
718 && tmp_relevant
!= vect_used_by_reduction
)
720 if (dump_enabled_p ())
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
722 "unsupported use of double reduction.\n");
734 if (is_pattern_stmt_p (stmt_vinfo
))
736 /* Pattern statements are not inserted into the code, so
737 FOR_EACH_PHI_OR_STMT_USE optimizes their operands out, and we
738 have to scan the RHS or function arguments instead. */
739 if (is_gimple_assign (stmt
))
741 enum tree_code rhs_code
= gimple_assign_rhs_code (stmt
);
742 tree op
= gimple_assign_rhs1 (stmt
);
745 if (rhs_code
== COND_EXPR
&& COMPARISON_CLASS_P (op
))
747 if (!process_use (stmt
, TREE_OPERAND (op
, 0), loop_vinfo
,
748 live_p
, relevant
, &worklist
, false)
749 || !process_use (stmt
, TREE_OPERAND (op
, 1), loop_vinfo
,
750 live_p
, relevant
, &worklist
, false))
754 for (; i
< gimple_num_ops (stmt
); i
++)
756 op
= gimple_op (stmt
, i
);
757 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
762 else if (is_gimple_call (stmt
))
764 for (i
= 0; i
< gimple_call_num_args (stmt
); i
++)
766 tree arg
= gimple_call_arg (stmt
, i
);
767 if (!process_use (stmt
, arg
, loop_vinfo
, live_p
, relevant
,
774 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
776 tree op
= USE_FROM_PTR (use_p
);
777 if (!process_use (stmt
, op
, loop_vinfo
, live_p
, relevant
,
782 if (STMT_VINFO_GATHER_P (stmt_vinfo
))
785 tree decl
= vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
787 if (!process_use (stmt
, off
, loop_vinfo
, live_p
, relevant
,
791 } /* while worklist */
797 /* Function vect_model_simple_cost.
799 Models cost for simple operations, i.e. those that only emit ncopies of a
800 single op. Right now, this does not account for multiple insns that could
801 be generated for the single vector op. We will handle that shortly. */
804 vect_model_simple_cost (stmt_vec_info stmt_info
, int ncopies
,
805 enum vect_def_type
*dt
,
806 stmt_vector_for_cost
*prologue_cost_vec
,
807 stmt_vector_for_cost
*body_cost_vec
)
810 int inside_cost
= 0, prologue_cost
= 0;
812 /* The SLP costs were already calculated during SLP tree build. */
813 if (PURE_SLP_STMT (stmt_info
))
816 /* FORNOW: Assuming maximum 2 args per stmts. */
817 for (i
= 0; i
< 2; i
++)
818 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
819 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, vector_stmt
,
820 stmt_info
, 0, vect_prologue
);
822 /* Pass the inside-of-loop statements to the target-specific cost model. */
823 inside_cost
= record_stmt_cost (body_cost_vec
, ncopies
, vector_stmt
,
824 stmt_info
, 0, vect_body
);
826 if (dump_enabled_p ())
827 dump_printf_loc (MSG_NOTE
, vect_location
,
828 "vect_model_simple_cost: inside_cost = %d, "
829 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
833 /* Model cost for type demotion and promotion operations. PWR is normally
834 zero for single-step promotions and demotions. It will be one if
835 two-step promotion/demotion is required, and so on. Each additional
836 step doubles the number of instructions required. */
839 vect_model_promotion_demotion_cost (stmt_vec_info stmt_info
,
840 enum vect_def_type
*dt
, int pwr
)
843 int inside_cost
= 0, prologue_cost
= 0;
844 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
845 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
846 void *target_cost_data
;
848 /* The SLP costs were already calculated during SLP tree build. */
849 if (PURE_SLP_STMT (stmt_info
))
853 target_cost_data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
855 target_cost_data
= BB_VINFO_TARGET_COST_DATA (bb_vinfo
);
857 for (i
= 0; i
< pwr
+ 1; i
++)
859 tmp
= (STMT_VINFO_TYPE (stmt_info
) == type_promotion_vec_info_type
) ?
861 inside_cost
+= add_stmt_cost (target_cost_data
, vect_pow2 (tmp
),
862 vec_promote_demote
, stmt_info
, 0,
866 /* FORNOW: Assuming maximum 2 args per stmts. */
867 for (i
= 0; i
< 2; i
++)
868 if (dt
[i
] == vect_constant_def
|| dt
[i
] == vect_external_def
)
869 prologue_cost
+= add_stmt_cost (target_cost_data
, 1, vector_stmt
,
870 stmt_info
, 0, vect_prologue
);
872 if (dump_enabled_p ())
873 dump_printf_loc (MSG_NOTE
, vect_location
,
874 "vect_model_promotion_demotion_cost: inside_cost = %d, "
875 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
878 /* Function vect_cost_group_size
880 For grouped load or store, return the group_size only if it is the first
881 load or store of a group, else return 1. This ensures that group size is
882 only returned once per group. */
885 vect_cost_group_size (stmt_vec_info stmt_info
)
887 gimple first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
889 if (first_stmt
== STMT_VINFO_STMT (stmt_info
))
890 return GROUP_SIZE (stmt_info
);
896 /* Function vect_model_store_cost
898 Models cost for stores. In the case of grouped accesses, one access
899 has the overhead of the grouped access attributed to it. */
902 vect_model_store_cost (stmt_vec_info stmt_info
, int ncopies
,
903 bool store_lanes_p
, enum vect_def_type dt
,
905 stmt_vector_for_cost
*prologue_cost_vec
,
906 stmt_vector_for_cost
*body_cost_vec
)
909 unsigned int inside_cost
= 0, prologue_cost
= 0;
910 struct data_reference
*first_dr
;
913 /* The SLP costs were already calculated during SLP tree build. */
914 if (PURE_SLP_STMT (stmt_info
))
917 if (dt
== vect_constant_def
|| dt
== vect_external_def
)
918 prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1, scalar_to_vec
,
919 stmt_info
, 0, vect_prologue
);
921 /* Grouped access? */
922 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
926 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
931 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
932 group_size
= vect_cost_group_size (stmt_info
);
935 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
937 /* Not a grouped access. */
941 first_dr
= STMT_VINFO_DATA_REF (stmt_info
);
944 /* We assume that the cost of a single store-lanes instruction is
945 equivalent to the cost of GROUP_SIZE separate stores. If a grouped
946 access is instead being provided by a permute-and-store operation,
947 include the cost of the permutes. */
948 if (!store_lanes_p
&& group_size
> 1)
950 /* Uses a high and low interleave operation for each needed permute. */
952 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
953 inside_cost
= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
954 stmt_info
, 0, vect_body
);
956 if (dump_enabled_p ())
957 dump_printf_loc (MSG_NOTE
, vect_location
,
958 "vect_model_store_cost: strided group_size = %d .\n",
962 /* Costs of the stores. */
963 vect_get_store_cost (first_dr
, ncopies
, &inside_cost
, body_cost_vec
);
965 if (dump_enabled_p ())
966 dump_printf_loc (MSG_NOTE
, vect_location
,
967 "vect_model_store_cost: inside_cost = %d, "
968 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
972 /* Calculate cost of DR's memory access. */
974 vect_get_store_cost (struct data_reference
*dr
, int ncopies
,
975 unsigned int *inside_cost
,
976 stmt_vector_for_cost
*body_cost_vec
)
978 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
979 gimple stmt
= DR_STMT (dr
);
980 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
982 switch (alignment_support_scheme
)
986 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
987 vector_store
, stmt_info
, 0,
990 if (dump_enabled_p ())
991 dump_printf_loc (MSG_NOTE
, vect_location
,
992 "vect_model_store_cost: aligned.\n");
996 case dr_unaligned_supported
:
998 /* Here, we assign an additional cost for the unaligned store. */
999 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1000 unaligned_store
, stmt_info
,
1001 DR_MISALIGNMENT (dr
), vect_body
);
1002 if (dump_enabled_p ())
1003 dump_printf_loc (MSG_NOTE
, vect_location
,
1004 "vect_model_store_cost: unaligned supported by "
1009 case dr_unaligned_unsupported
:
1011 *inside_cost
= VECT_MAX_COST
;
1013 if (dump_enabled_p ())
1014 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1015 "vect_model_store_cost: unsupported access.\n");
1025 /* Function vect_model_load_cost
1027 Models cost for loads. In the case of grouped accesses, the last access
1028 has the overhead of the grouped access attributed to it. Since unaligned
1029 accesses are supported for loads, we also account for the costs of the
1030 access scheme chosen. */
1033 vect_model_load_cost (stmt_vec_info stmt_info
, int ncopies
,
1034 bool load_lanes_p
, slp_tree slp_node
,
1035 stmt_vector_for_cost
*prologue_cost_vec
,
1036 stmt_vector_for_cost
*body_cost_vec
)
1040 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
;
1041 unsigned int inside_cost
= 0, prologue_cost
= 0;
1043 /* The SLP costs were already calculated during SLP tree build. */
1044 if (PURE_SLP_STMT (stmt_info
))
1047 /* Grouped accesses? */
1048 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
1049 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && first_stmt
&& !slp_node
)
1051 group_size
= vect_cost_group_size (stmt_info
);
1052 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
1054 /* Not a grouped access. */
1061 /* We assume that the cost of a single load-lanes instruction is
1062 equivalent to the cost of GROUP_SIZE separate loads. If a grouped
1063 access is instead being provided by a load-and-permute operation,
1064 include the cost of the permutes. */
1065 if (!load_lanes_p
&& group_size
> 1)
1067 /* Uses an even and odd extract operations for each needed permute. */
1068 int nstmts
= ncopies
* exact_log2 (group_size
) * group_size
;
1069 inside_cost
+= record_stmt_cost (body_cost_vec
, nstmts
, vec_perm
,
1070 stmt_info
, 0, vect_body
);
1072 if (dump_enabled_p ())
1073 dump_printf_loc (MSG_NOTE
, vect_location
,
1074 "vect_model_load_cost: strided group_size = %d .\n",
1078 /* The loads themselves. */
1079 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1081 /* N scalar loads plus gathering them into a vector. */
1082 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1083 inside_cost
+= record_stmt_cost (body_cost_vec
,
1084 ncopies
* TYPE_VECTOR_SUBPARTS (vectype
),
1085 scalar_load
, stmt_info
, 0, vect_body
);
1086 inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_construct
,
1087 stmt_info
, 0, vect_body
);
1090 vect_get_load_cost (first_dr
, ncopies
,
1091 ((!STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1092 || group_size
> 1 || slp_node
),
1093 &inside_cost
, &prologue_cost
,
1094 prologue_cost_vec
, body_cost_vec
, true);
1096 if (dump_enabled_p ())
1097 dump_printf_loc (MSG_NOTE
, vect_location
,
1098 "vect_model_load_cost: inside_cost = %d, "
1099 "prologue_cost = %d .\n", inside_cost
, prologue_cost
);
1103 /* Calculate cost of DR's memory access. */
1105 vect_get_load_cost (struct data_reference
*dr
, int ncopies
,
1106 bool add_realign_cost
, unsigned int *inside_cost
,
1107 unsigned int *prologue_cost
,
1108 stmt_vector_for_cost
*prologue_cost_vec
,
1109 stmt_vector_for_cost
*body_cost_vec
,
1110 bool record_prologue_costs
)
1112 int alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
1113 gimple stmt
= DR_STMT (dr
);
1114 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1116 switch (alignment_support_scheme
)
1120 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1121 stmt_info
, 0, vect_body
);
1123 if (dump_enabled_p ())
1124 dump_printf_loc (MSG_NOTE
, vect_location
,
1125 "vect_model_load_cost: aligned.\n");
1129 case dr_unaligned_supported
:
1131 /* Here, we assign an additional cost for the unaligned load. */
1132 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1133 unaligned_load
, stmt_info
,
1134 DR_MISALIGNMENT (dr
), vect_body
);
1136 if (dump_enabled_p ())
1137 dump_printf_loc (MSG_NOTE
, vect_location
,
1138 "vect_model_load_cost: unaligned supported by "
1143 case dr_explicit_realign
:
1145 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
* 2,
1146 vector_load
, stmt_info
, 0, vect_body
);
1147 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
,
1148 vec_perm
, stmt_info
, 0, vect_body
);
1150 /* FIXME: If the misalignment remains fixed across the iterations of
1151 the containing loop, the following cost should be added to the
1153 if (targetm
.vectorize
.builtin_mask_for_load
)
1154 *inside_cost
+= record_stmt_cost (body_cost_vec
, 1, vector_stmt
,
1155 stmt_info
, 0, vect_body
);
1157 if (dump_enabled_p ())
1158 dump_printf_loc (MSG_NOTE
, vect_location
,
1159 "vect_model_load_cost: explicit realign\n");
1163 case dr_explicit_realign_optimized
:
1165 if (dump_enabled_p ())
1166 dump_printf_loc (MSG_NOTE
, vect_location
,
1167 "vect_model_load_cost: unaligned software "
1170 /* Unaligned software pipeline has a load of an address, an initial
1171 load, and possibly a mask operation to "prime" the loop. However,
1172 if this is an access in a group of loads, which provide grouped
1173 access, then the above cost should only be considered for one
1174 access in the group. Inside the loop, there is a load op
1175 and a realignment op. */
1177 if (add_realign_cost
&& record_prologue_costs
)
1179 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 2,
1180 vector_stmt
, stmt_info
,
1182 if (targetm
.vectorize
.builtin_mask_for_load
)
1183 *prologue_cost
+= record_stmt_cost (prologue_cost_vec
, 1,
1184 vector_stmt
, stmt_info
,
1188 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vector_load
,
1189 stmt_info
, 0, vect_body
);
1190 *inside_cost
+= record_stmt_cost (body_cost_vec
, ncopies
, vec_perm
,
1191 stmt_info
, 0, vect_body
);
1193 if (dump_enabled_p ())
1194 dump_printf_loc (MSG_NOTE
, vect_location
,
1195 "vect_model_load_cost: explicit realign optimized"
1201 case dr_unaligned_unsupported
:
1203 *inside_cost
= VECT_MAX_COST
;
1205 if (dump_enabled_p ())
1206 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1207 "vect_model_load_cost: unsupported access.\n");
1216 /* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
1217 the loop preheader for the vectorized stmt STMT. */
1220 vect_init_vector_1 (gimple stmt
, gimple new_stmt
, gimple_stmt_iterator
*gsi
)
1223 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1226 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1227 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1231 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1235 if (nested_in_vect_loop_p (loop
, stmt
))
1238 pe
= loop_preheader_edge (loop
);
1239 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
1240 gcc_assert (!new_bb
);
1244 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_vinfo
);
1246 gimple_stmt_iterator gsi_bb_start
;
1248 gcc_assert (bb_vinfo
);
1249 bb
= BB_VINFO_BB (bb_vinfo
);
1250 gsi_bb_start
= gsi_after_labels (bb
);
1251 gsi_insert_before (&gsi_bb_start
, new_stmt
, GSI_SAME_STMT
);
1255 if (dump_enabled_p ())
1257 dump_printf_loc (MSG_NOTE
, vect_location
,
1258 "created new init_stmt: ");
1259 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, new_stmt
, 0);
1260 dump_printf (MSG_NOTE
, "\n");
1264 /* Function vect_init_vector.
1266 Insert a new stmt (INIT_STMT) that initializes a new variable of type
1267 TYPE with the value VAL. If TYPE is a vector type and VAL does not have
1268 vector type a vector with all elements equal to VAL is created first.
1269 Place the initialization at BSI if it is not NULL. Otherwise, place the
1270 initialization at the loop preheader.
1271 Return the DEF of INIT_STMT.
1272 It will be used in the vectorization of STMT. */
1275 vect_init_vector (gimple stmt
, tree val
, tree type
, gimple_stmt_iterator
*gsi
)
1282 if (TREE_CODE (type
) == VECTOR_TYPE
1283 && TREE_CODE (TREE_TYPE (val
)) != VECTOR_TYPE
)
1285 if (!types_compatible_p (TREE_TYPE (type
), TREE_TYPE (val
)))
1287 if (CONSTANT_CLASS_P (val
))
1288 val
= fold_unary (VIEW_CONVERT_EXPR
, TREE_TYPE (type
), val
);
1291 new_temp
= make_ssa_name (TREE_TYPE (type
), NULL
);
1292 init_stmt
= gimple_build_assign_with_ops (NOP_EXPR
,
1295 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1299 val
= build_vector_from_val (type
, val
);
1302 new_var
= vect_get_new_vect_var (type
, vect_simple_var
, "cst_");
1303 init_stmt
= gimple_build_assign (new_var
, val
);
1304 new_temp
= make_ssa_name (new_var
, init_stmt
);
1305 gimple_assign_set_lhs (init_stmt
, new_temp
);
1306 vect_init_vector_1 (stmt
, init_stmt
, gsi
);
1307 vec_oprnd
= gimple_assign_lhs (init_stmt
);
1312 /* Function vect_get_vec_def_for_operand.
1314 OP is an operand in STMT. This function returns a (vector) def that will be
1315 used in the vectorized stmt for STMT.
1317 In the case that OP is an SSA_NAME which is defined in the loop, then
1318 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1320 In case OP is an invariant or constant, a new stmt that creates a vector def
1321 needs to be introduced. */
1324 vect_get_vec_def_for_operand (tree op
, gimple stmt
, tree
*scalar_def
)
1329 stmt_vec_info def_stmt_info
= NULL
;
1330 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (stmt
);
1331 unsigned int nunits
;
1332 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1334 enum vect_def_type dt
;
1338 if (dump_enabled_p ())
1340 dump_printf_loc (MSG_NOTE
, vect_location
,
1341 "vect_get_vec_def_for_operand: ");
1342 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, op
);
1343 dump_printf (MSG_NOTE
, "\n");
1346 is_simple_use
= vect_is_simple_use (op
, stmt
, loop_vinfo
, NULL
,
1347 &def_stmt
, &def
, &dt
);
1348 gcc_assert (is_simple_use
);
1349 if (dump_enabled_p ())
1351 int loc_printed
= 0;
1354 dump_printf_loc (MSG_NOTE
, vect_location
, "def = ");
1356 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, def
);
1357 dump_printf (MSG_NOTE
, "\n");
1362 dump_printf (MSG_NOTE
, " def_stmt = ");
1364 dump_printf_loc (MSG_NOTE
, vect_location
, " def_stmt = ");
1365 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, def_stmt
, 0);
1366 dump_printf (MSG_NOTE
, "\n");
1372 /* Case 1: operand is a constant. */
1373 case vect_constant_def
:
1375 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (op
));
1376 gcc_assert (vector_type
);
1377 nunits
= TYPE_VECTOR_SUBPARTS (vector_type
);
1382 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1383 if (dump_enabled_p ())
1384 dump_printf_loc (MSG_NOTE
, vect_location
,
1385 "Create vector_cst. nunits = %d\n", nunits
);
1387 return vect_init_vector (stmt
, op
, vector_type
, NULL
);
1390 /* Case 2: operand is defined outside the loop - loop invariant. */
1391 case vect_external_def
:
1393 vector_type
= get_vectype_for_scalar_type (TREE_TYPE (def
));
1394 gcc_assert (vector_type
);
1399 /* Create 'vec_inv = {inv,inv,..,inv}' */
1400 if (dump_enabled_p ())
1401 dump_printf_loc (MSG_NOTE
, vect_location
, "Create vector_inv.\n");
1403 return vect_init_vector (stmt
, def
, vector_type
, NULL
);
1406 /* Case 3: operand is defined inside the loop. */
1407 case vect_internal_def
:
1410 *scalar_def
= NULL
/* FIXME tuples: def_stmt*/;
1412 /* Get the def from the vectorized stmt. */
1413 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1415 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1416 /* Get vectorized pattern statement. */
1418 && STMT_VINFO_IN_PATTERN_P (def_stmt_info
)
1419 && !STMT_VINFO_RELEVANT (def_stmt_info
))
1420 vec_stmt
= STMT_VINFO_VEC_STMT (vinfo_for_stmt (
1421 STMT_VINFO_RELATED_STMT (def_stmt_info
)));
1422 gcc_assert (vec_stmt
);
1423 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1424 vec_oprnd
= PHI_RESULT (vec_stmt
);
1425 else if (is_gimple_call (vec_stmt
))
1426 vec_oprnd
= gimple_call_lhs (vec_stmt
);
1428 vec_oprnd
= gimple_assign_lhs (vec_stmt
);
1432 /* Case 4: operand is defined by a loop header phi - reduction */
1433 case vect_reduction_def
:
1434 case vect_double_reduction_def
:
1435 case vect_nested_cycle
:
1439 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1440 loop
= (gimple_bb (def_stmt
))->loop_father
;
1442 /* Get the def before the loop */
1443 op
= PHI_ARG_DEF_FROM_EDGE (def_stmt
, loop_preheader_edge (loop
));
1444 return get_initial_def_for_reduction (stmt
, op
, scalar_def
);
1447 /* Case 5: operand is defined by loop-header phi - induction. */
1448 case vect_induction_def
:
1450 gcc_assert (gimple_code (def_stmt
) == GIMPLE_PHI
);
1452 /* Get the def from the vectorized stmt. */
1453 def_stmt_info
= vinfo_for_stmt (def_stmt
);
1454 vec_stmt
= STMT_VINFO_VEC_STMT (def_stmt_info
);
1455 if (gimple_code (vec_stmt
) == GIMPLE_PHI
)
1456 vec_oprnd
= PHI_RESULT (vec_stmt
);
1458 vec_oprnd
= gimple_get_lhs (vec_stmt
);
1468 /* Function vect_get_vec_def_for_stmt_copy
1470 Return a vector-def for an operand. This function is used when the
1471 vectorized stmt to be created (by the caller to this function) is a "copy"
1472 created in case the vectorized result cannot fit in one vector, and several
1473 copies of the vector-stmt are required. In this case the vector-def is
1474 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1475 of the stmt that defines VEC_OPRND.
1476 DT is the type of the vector def VEC_OPRND.
1479 In case the vectorization factor (VF) is bigger than the number
1480 of elements that can fit in a vectype (nunits), we have to generate
1481 more than one vector stmt to vectorize the scalar stmt. This situation
1482 arises when there are multiple data-types operated upon in the loop; the
1483 smallest data-type determines the VF, and as a result, when vectorizing
1484 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1485 vector stmt (each computing a vector of 'nunits' results, and together
1486 computing 'VF' results in each iteration). This function is called when
1487 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1488 which VF=16 and nunits=4, so the number of copies required is 4):
1490 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1492 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1493 VS1.1: vx.1 = memref1 VS1.2
1494 VS1.2: vx.2 = memref2 VS1.3
1495 VS1.3: vx.3 = memref3
1497 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1498 VSnew.1: vz1 = vx.1 + ... VSnew.2
1499 VSnew.2: vz2 = vx.2 + ... VSnew.3
1500 VSnew.3: vz3 = vx.3 + ...
1502 The vectorization of S1 is explained in vectorizable_load.
1503 The vectorization of S2:
1504 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1505 the function 'vect_get_vec_def_for_operand' is called to
1506 get the relevant vector-def for each operand of S2. For operand x it
1507 returns the vector-def 'vx.0'.
1509 To create the remaining copies of the vector-stmt (VSnew.j), this
1510 function is called to get the relevant vector-def for each operand. It is
1511 obtained from the respective VS1.j stmt, which is recorded in the
1512 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1514 For example, to obtain the vector-def 'vx.1' in order to create the
1515 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1516 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1517 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1518 and return its def ('vx.1').
1519 Overall, to create the above sequence this function will be called 3 times:
1520 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1521 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1522 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1525 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt
, tree vec_oprnd
)
1527 gimple vec_stmt_for_operand
;
1528 stmt_vec_info def_stmt_info
;
1530 /* Do nothing; can reuse same def. */
1531 if (dt
== vect_external_def
|| dt
== vect_constant_def
)
1534 vec_stmt_for_operand
= SSA_NAME_DEF_STMT (vec_oprnd
);
1535 def_stmt_info
= vinfo_for_stmt (vec_stmt_for_operand
);
1536 gcc_assert (def_stmt_info
);
1537 vec_stmt_for_operand
= STMT_VINFO_RELATED_STMT (def_stmt_info
);
1538 gcc_assert (vec_stmt_for_operand
);
1539 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1540 if (gimple_code (vec_stmt_for_operand
) == GIMPLE_PHI
)
1541 vec_oprnd
= PHI_RESULT (vec_stmt_for_operand
);
1543 vec_oprnd
= gimple_get_lhs (vec_stmt_for_operand
);
1548 /* Get vectorized definitions for the operands to create a copy of an original
1549 stmt. See vect_get_vec_def_for_stmt_copy () for details. */
1552 vect_get_vec_defs_for_stmt_copy (enum vect_def_type
*dt
,
1553 vec
<tree
> *vec_oprnds0
,
1554 vec
<tree
> *vec_oprnds1
)
1556 tree vec_oprnd
= vec_oprnds0
->pop ();
1558 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd
);
1559 vec_oprnds0
->quick_push (vec_oprnd
);
1561 if (vec_oprnds1
&& vec_oprnds1
->length ())
1563 vec_oprnd
= vec_oprnds1
->pop ();
1564 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
[1], vec_oprnd
);
1565 vec_oprnds1
->quick_push (vec_oprnd
);
1570 /* Get vectorized definitions for OP0 and OP1.
1571 REDUC_INDEX is the index of reduction operand in case of reduction,
1572 and -1 otherwise. */
1575 vect_get_vec_defs (tree op0
, tree op1
, gimple stmt
,
1576 vec
<tree
> *vec_oprnds0
,
1577 vec
<tree
> *vec_oprnds1
,
1578 slp_tree slp_node
, int reduc_index
)
1582 int nops
= (op1
== NULL_TREE
) ? 1 : 2;
1583 auto_vec
<tree
> ops (nops
);
1584 auto_vec
<vec
<tree
> > vec_defs (nops
);
1586 ops
.quick_push (op0
);
1588 ops
.quick_push (op1
);
1590 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, reduc_index
);
1592 *vec_oprnds0
= vec_defs
[0];
1594 *vec_oprnds1
= vec_defs
[1];
1600 vec_oprnds0
->create (1);
1601 vec_oprnd
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
1602 vec_oprnds0
->quick_push (vec_oprnd
);
1606 vec_oprnds1
->create (1);
1607 vec_oprnd
= vect_get_vec_def_for_operand (op1
, stmt
, NULL
);
1608 vec_oprnds1
->quick_push (vec_oprnd
);
1614 /* Function vect_finish_stmt_generation.
1616 Insert a new stmt. */
1619 vect_finish_stmt_generation (gimple stmt
, gimple vec_stmt
,
1620 gimple_stmt_iterator
*gsi
)
1622 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1623 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1624 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1626 gcc_assert (gimple_code (stmt
) != GIMPLE_LABEL
);
1628 if (!gsi_end_p (*gsi
)
1629 && gimple_has_mem_ops (vec_stmt
))
1631 gimple at_stmt
= gsi_stmt (*gsi
);
1632 tree vuse
= gimple_vuse (at_stmt
);
1633 if (vuse
&& TREE_CODE (vuse
) == SSA_NAME
)
1635 tree vdef
= gimple_vdef (at_stmt
);
1636 gimple_set_vuse (vec_stmt
, gimple_vuse (at_stmt
));
1637 /* If we have an SSA vuse and insert a store, update virtual
1638 SSA form to avoid triggering the renamer. Do so only
1639 if we can easily see all uses - which is what almost always
1640 happens with the way vectorized stmts are inserted. */
1641 if ((vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1642 && ((is_gimple_assign (vec_stmt
)
1643 && !is_gimple_reg (gimple_assign_lhs (vec_stmt
)))
1644 || (is_gimple_call (vec_stmt
)
1645 && !(gimple_call_flags (vec_stmt
)
1646 & (ECF_CONST
|ECF_PURE
|ECF_NOVOPS
)))))
1648 tree new_vdef
= copy_ssa_name (vuse
, vec_stmt
);
1649 gimple_set_vdef (vec_stmt
, new_vdef
);
1650 SET_USE (gimple_vuse_op (at_stmt
), new_vdef
);
1654 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
1656 set_vinfo_for_stmt (vec_stmt
, new_stmt_vec_info (vec_stmt
, loop_vinfo
,
1659 if (dump_enabled_p ())
1661 dump_printf_loc (MSG_NOTE
, vect_location
, "add new stmt: ");
1662 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, vec_stmt
, 0);
1663 dump_printf (MSG_NOTE
, "\n");
1666 gimple_set_location (vec_stmt
, gimple_location (stmt
));
1669 /* Checks if CALL can be vectorized in type VECTYPE. Returns
1670 a function declaration if the target has a vectorized version
1671 of the function, or NULL_TREE if the function cannot be vectorized. */
1674 vectorizable_function (gimple call
, tree vectype_out
, tree vectype_in
)
1676 tree fndecl
= gimple_call_fndecl (call
);
1678 /* We only handle functions that do not read or clobber memory -- i.e.
1679 const or novops ones. */
1680 if (!(gimple_call_flags (call
) & (ECF_CONST
| ECF_NOVOPS
)))
1684 || TREE_CODE (fndecl
) != FUNCTION_DECL
1685 || !DECL_BUILT_IN (fndecl
))
1688 return targetm
.vectorize
.builtin_vectorized_function (fndecl
, vectype_out
,
1692 /* Function vectorizable_call.
1694 Check if STMT performs a function call that can be vectorized.
1695 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1696 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1697 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1700 vectorizable_call (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
1706 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
1707 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
), prev_stmt_info
;
1708 tree vectype_out
, vectype_in
;
1711 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1712 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
1713 tree fndecl
, new_temp
, def
, rhs_type
;
1715 enum vect_def_type dt
[3]
1716 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
1717 gimple new_stmt
= NULL
;
1719 vec
<tree
> vargs
= vNULL
;
1720 enum { NARROW
, NONE
, WIDEN
} modifier
;
1724 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
1727 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
1730 /* Is STMT a vectorizable call? */
1731 if (!is_gimple_call (stmt
))
1734 if (TREE_CODE (gimple_call_lhs (stmt
)) != SSA_NAME
)
1737 if (stmt_can_throw_internal (stmt
))
1740 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
1742 /* Process function arguments. */
1743 rhs_type
= NULL_TREE
;
1744 vectype_in
= NULL_TREE
;
1745 nargs
= gimple_call_num_args (stmt
);
1747 /* Bail out if the function has more than three arguments, we do not have
1748 interesting builtin functions to vectorize with more than two arguments
1749 except for fma. No arguments is also not good. */
1750 if (nargs
== 0 || nargs
> 3)
1753 /* Ignore the argument of IFN_GOMP_SIMD_LANE, it is magic. */
1754 if (gimple_call_internal_p (stmt
)
1755 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1758 rhs_type
= unsigned_type_node
;
1761 for (i
= 0; i
< nargs
; i
++)
1765 op
= gimple_call_arg (stmt
, i
);
1767 /* We can only handle calls with arguments of the same type. */
1769 && !types_compatible_p (rhs_type
, TREE_TYPE (op
)))
1771 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1773 "argument types differ.\n");
1777 rhs_type
= TREE_TYPE (op
);
1779 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
1780 &def_stmt
, &def
, &dt
[i
], &opvectype
))
1782 if (dump_enabled_p ())
1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1784 "use not simple.\n");
1789 vectype_in
= opvectype
;
1791 && opvectype
!= vectype_in
)
1793 if (dump_enabled_p ())
1794 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1795 "argument vector types differ.\n");
1799 /* If all arguments are external or constant defs use a vector type with
1800 the same size as the output vector type. */
1802 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
1804 gcc_assert (vectype_in
);
1807 if (dump_enabled_p ())
1809 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1810 "no vectype for scalar type ");
1811 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
1812 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
1819 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
1820 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
1821 if (nunits_in
== nunits_out
/ 2)
1823 else if (nunits_out
== nunits_in
)
1825 else if (nunits_out
== nunits_in
/ 2)
1830 /* For now, we only vectorize functions if a target specific builtin
1831 is available. TODO -- in some cases, it might be profitable to
1832 insert the calls for pieces of the vector, in order to be able
1833 to vectorize other operations in the loop. */
1834 fndecl
= vectorizable_function (stmt
, vectype_out
, vectype_in
);
1835 if (fndecl
== NULL_TREE
)
1837 if (gimple_call_internal_p (stmt
)
1838 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
1841 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1842 && TREE_CODE (gimple_call_arg (stmt
, 0)) == SSA_NAME
1843 && LOOP_VINFO_LOOP (loop_vinfo
)->simduid
1844 == SSA_NAME_VAR (gimple_call_arg (stmt
, 0)))
1846 /* We can handle IFN_GOMP_SIMD_LANE by returning a
1847 { 0, 1, 2, ... vf - 1 } vector. */
1848 gcc_assert (nargs
== 0);
1852 if (dump_enabled_p ())
1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1854 "function is not vectorizable.\n");
1859 gcc_assert (!gimple_vuse (stmt
));
1861 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
1863 else if (modifier
== NARROW
)
1864 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
1866 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
1868 /* Sanity check: make sure that at least one copy of the vectorized stmt
1869 needs to be generated. */
1870 gcc_assert (ncopies
>= 1);
1872 if (!vec_stmt
) /* transformation not required. */
1874 STMT_VINFO_TYPE (stmt_info
) = call_vec_info_type
;
1875 if (dump_enabled_p ())
1876 dump_printf_loc (MSG_NOTE
, vect_location
, "=== vectorizable_call ==="
1878 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
1884 if (dump_enabled_p ())
1885 dump_printf_loc (MSG_NOTE
, vect_location
, "transform call.\n");
1888 scalar_dest
= gimple_call_lhs (stmt
);
1889 vec_dest
= vect_create_destination_var (scalar_dest
, vectype_out
);
1891 prev_stmt_info
= NULL
;
1895 for (j
= 0; j
< ncopies
; ++j
)
1897 /* Build argument list for the vectorized call. */
1899 vargs
.create (nargs
);
1905 auto_vec
<vec
<tree
> > vec_defs (nargs
);
1906 vec
<tree
> vec_oprnds0
;
1908 for (i
= 0; i
< nargs
; i
++)
1909 vargs
.quick_push (gimple_call_arg (stmt
, i
));
1910 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
1911 vec_oprnds0
= vec_defs
[0];
1913 /* Arguments are ready. Create the new vector stmt. */
1914 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_oprnd0
)
1917 for (k
= 0; k
< nargs
; k
++)
1919 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
1920 vargs
[k
] = vec_oprndsk
[i
];
1922 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1923 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1924 gimple_call_set_lhs (new_stmt
, new_temp
);
1925 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1926 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
1929 for (i
= 0; i
< nargs
; i
++)
1931 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
1932 vec_oprndsi
.release ();
1937 for (i
= 0; i
< nargs
; i
++)
1939 op
= gimple_call_arg (stmt
, i
);
1942 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
1945 vec_oprnd0
= gimple_call_arg (new_stmt
, i
);
1947 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
1950 vargs
.quick_push (vec_oprnd0
);
1953 if (gimple_call_internal_p (stmt
)
1954 && gimple_call_internal_fn (stmt
) == IFN_GOMP_SIMD_LANE
)
1956 tree
*v
= XALLOCAVEC (tree
, nunits_out
);
1958 for (k
= 0; k
< nunits_out
; ++k
)
1959 v
[k
] = build_int_cst (unsigned_type_node
, j
* nunits_out
+ k
);
1960 tree cst
= build_vector (vectype_out
, v
);
1962 = vect_get_new_vect_var (vectype_out
, vect_simple_var
, "cst_");
1963 gimple init_stmt
= gimple_build_assign (new_var
, cst
);
1964 new_temp
= make_ssa_name (new_var
, init_stmt
);
1965 gimple_assign_set_lhs (init_stmt
, new_temp
);
1966 vect_init_vector_1 (stmt
, init_stmt
, NULL
);
1967 new_temp
= make_ssa_name (vec_dest
, NULL
);
1968 new_stmt
= gimple_build_assign (new_temp
,
1969 gimple_assign_lhs (init_stmt
));
1973 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
1974 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
1975 gimple_call_set_lhs (new_stmt
, new_temp
);
1977 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
1980 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
1982 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
1984 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
1990 for (j
= 0; j
< ncopies
; ++j
)
1992 /* Build argument list for the vectorized call. */
1994 vargs
.create (nargs
* 2);
2000 auto_vec
<vec
<tree
> > vec_defs (nargs
);
2001 vec
<tree
> vec_oprnds0
;
2003 for (i
= 0; i
< nargs
; i
++)
2004 vargs
.quick_push (gimple_call_arg (stmt
, i
));
2005 vect_get_slp_defs (vargs
, slp_node
, &vec_defs
, -1);
2006 vec_oprnds0
= vec_defs
[0];
2008 /* Arguments are ready. Create the new vector stmt. */
2009 for (i
= 0; vec_oprnds0
.iterate (i
, &vec_oprnd0
); i
+= 2)
2013 for (k
= 0; k
< nargs
; k
++)
2015 vec
<tree
> vec_oprndsk
= vec_defs
[k
];
2016 vargs
.quick_push (vec_oprndsk
[i
]);
2017 vargs
.quick_push (vec_oprndsk
[i
+ 1]);
2019 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2020 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2021 gimple_call_set_lhs (new_stmt
, new_temp
);
2022 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2023 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2026 for (i
= 0; i
< nargs
; i
++)
2028 vec
<tree
> vec_oprndsi
= vec_defs
[i
];
2029 vec_oprndsi
.release ();
2034 for (i
= 0; i
< nargs
; i
++)
2036 op
= gimple_call_arg (stmt
, i
);
2040 = vect_get_vec_def_for_operand (op
, stmt
, NULL
);
2042 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2046 vec_oprnd1
= gimple_call_arg (new_stmt
, 2*i
+ 1);
2048 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd1
);
2050 = vect_get_vec_def_for_stmt_copy (dt
[i
], vec_oprnd0
);
2053 vargs
.quick_push (vec_oprnd0
);
2054 vargs
.quick_push (vec_oprnd1
);
2057 new_stmt
= gimple_build_call_vec (fndecl
, vargs
);
2058 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2059 gimple_call_set_lhs (new_stmt
, new_temp
);
2060 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2063 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2065 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2067 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2070 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2075 /* No current target implements this case. */
2081 /* Update the exception handling table with the vector stmt if necessary. */
2082 if (maybe_clean_or_replace_eh_stmt (stmt
, *vec_stmt
))
2083 gimple_purge_dead_eh_edges (gimple_bb (stmt
));
2085 /* The call in STMT might prevent it from being removed in dce.
2086 We however cannot remove it here, due to the way the ssa name
2087 it defines is mapped to the new definition. So just replace
2088 rhs of the statement with something harmless. */
2093 type
= TREE_TYPE (scalar_dest
);
2094 if (is_pattern_stmt_p (stmt_info
))
2095 lhs
= gimple_call_lhs (STMT_VINFO_RELATED_STMT (stmt_info
));
2097 lhs
= gimple_call_lhs (stmt
);
2098 new_stmt
= gimple_build_assign (lhs
, build_zero_cst (type
));
2099 set_vinfo_for_stmt (new_stmt
, stmt_info
);
2100 set_vinfo_for_stmt (stmt
, NULL
);
2101 STMT_VINFO_STMT (stmt_info
) = new_stmt
;
2102 gsi_replace (gsi
, new_stmt
, false);
2108 /* Function vect_gen_widened_results_half
2110 Create a vector stmt whose code, type, number of arguments, and result
2111 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
2112 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2113 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2114 needs to be created (DECL is a function-decl of a target-builtin).
2115 STMT is the original scalar stmt that we are vectorizing. */
2118 vect_gen_widened_results_half (enum tree_code code
,
2120 tree vec_oprnd0
, tree vec_oprnd1
, int op_type
,
2121 tree vec_dest
, gimple_stmt_iterator
*gsi
,
2127 /* Generate half of the widened result: */
2128 if (code
== CALL_EXPR
)
2130 /* Target specific support */
2131 if (op_type
== binary_op
)
2132 new_stmt
= gimple_build_call (decl
, 2, vec_oprnd0
, vec_oprnd1
);
2134 new_stmt
= gimple_build_call (decl
, 1, vec_oprnd0
);
2135 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2136 gimple_call_set_lhs (new_stmt
, new_temp
);
2140 /* Generic support */
2141 gcc_assert (op_type
== TREE_CODE_LENGTH (code
));
2142 if (op_type
!= binary_op
)
2144 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vec_oprnd0
,
2146 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2147 gimple_assign_set_lhs (new_stmt
, new_temp
);
2149 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2155 /* Get vectorized definitions for loop-based vectorization. For the first
2156 operand we call vect_get_vec_def_for_operand() (with OPRND containing
2157 scalar operand), and for the rest we get a copy with
2158 vect_get_vec_def_for_stmt_copy() using the previous vector definition
2159 (stored in OPRND). See vect_get_vec_def_for_stmt_copy() for details.
2160 The vectors are collected into VEC_OPRNDS. */
2163 vect_get_loop_based_defs (tree
*oprnd
, gimple stmt
, enum vect_def_type dt
,
2164 vec
<tree
> *vec_oprnds
, int multi_step_cvt
)
2168 /* Get first vector operand. */
2169 /* All the vector operands except the very first one (that is scalar oprnd)
2171 if (TREE_CODE (TREE_TYPE (*oprnd
)) != VECTOR_TYPE
)
2172 vec_oprnd
= vect_get_vec_def_for_operand (*oprnd
, stmt
, NULL
);
2174 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, *oprnd
);
2176 vec_oprnds
->quick_push (vec_oprnd
);
2178 /* Get second vector operand. */
2179 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, vec_oprnd
);
2180 vec_oprnds
->quick_push (vec_oprnd
);
2184 /* For conversion in multiple steps, continue to get operands
2187 vect_get_loop_based_defs (oprnd
, stmt
, dt
, vec_oprnds
, multi_step_cvt
- 1);
2191 /* Create vectorized demotion statements for vector operands from VEC_OPRNDS.
2192 For multi-step conversions store the resulting vectors and call the function
2196 vect_create_vectorized_demotion_stmts (vec
<tree
> *vec_oprnds
,
2197 int multi_step_cvt
, gimple stmt
,
2199 gimple_stmt_iterator
*gsi
,
2200 slp_tree slp_node
, enum tree_code code
,
2201 stmt_vec_info
*prev_stmt_info
)
2204 tree vop0
, vop1
, new_tmp
, vec_dest
;
2206 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2208 vec_dest
= vec_dsts
.pop ();
2210 for (i
= 0; i
< vec_oprnds
->length (); i
+= 2)
2212 /* Create demotion operation. */
2213 vop0
= (*vec_oprnds
)[i
];
2214 vop1
= (*vec_oprnds
)[i
+ 1];
2215 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
2216 new_tmp
= make_ssa_name (vec_dest
, new_stmt
);
2217 gimple_assign_set_lhs (new_stmt
, new_tmp
);
2218 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2221 /* Store the resulting vector for next recursive call. */
2222 (*vec_oprnds
)[i
/2] = new_tmp
;
2225 /* This is the last step of the conversion sequence. Store the
2226 vectors in SLP_NODE or in vector info of the scalar statement
2227 (or in STMT_VINFO_RELATED_STMT chain). */
2229 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2232 if (!*prev_stmt_info
)
2233 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2235 STMT_VINFO_RELATED_STMT (*prev_stmt_info
) = new_stmt
;
2237 *prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2242 /* For multi-step demotion operations we first generate demotion operations
2243 from the source type to the intermediate types, and then combine the
2244 results (stored in VEC_OPRNDS) in demotion operation to the destination
2248 /* At each level of recursion we have half of the operands we had at the
2250 vec_oprnds
->truncate ((i
+1)/2);
2251 vect_create_vectorized_demotion_stmts (vec_oprnds
, multi_step_cvt
- 1,
2252 stmt
, vec_dsts
, gsi
, slp_node
,
2253 VEC_PACK_TRUNC_EXPR
,
2257 vec_dsts
.quick_push (vec_dest
);
2261 /* Create vectorized promotion statements for vector operands from VEC_OPRNDS0
2262 and VEC_OPRNDS1 (for binary operations). For multi-step conversions store
2263 the resulting vectors and call the function recursively. */
2266 vect_create_vectorized_promotion_stmts (vec
<tree
> *vec_oprnds0
,
2267 vec
<tree
> *vec_oprnds1
,
2268 gimple stmt
, tree vec_dest
,
2269 gimple_stmt_iterator
*gsi
,
2270 enum tree_code code1
,
2271 enum tree_code code2
, tree decl1
,
2272 tree decl2
, int op_type
)
2275 tree vop0
, vop1
, new_tmp1
, new_tmp2
;
2276 gimple new_stmt1
, new_stmt2
;
2277 vec
<tree
> vec_tmp
= vNULL
;
2279 vec_tmp
.create (vec_oprnds0
->length () * 2);
2280 FOR_EACH_VEC_ELT (*vec_oprnds0
, i
, vop0
)
2282 if (op_type
== binary_op
)
2283 vop1
= (*vec_oprnds1
)[i
];
2287 /* Generate the two halves of promotion operation. */
2288 new_stmt1
= vect_gen_widened_results_half (code1
, decl1
, vop0
, vop1
,
2289 op_type
, vec_dest
, gsi
, stmt
);
2290 new_stmt2
= vect_gen_widened_results_half (code2
, decl2
, vop0
, vop1
,
2291 op_type
, vec_dest
, gsi
, stmt
);
2292 if (is_gimple_call (new_stmt1
))
2294 new_tmp1
= gimple_call_lhs (new_stmt1
);
2295 new_tmp2
= gimple_call_lhs (new_stmt2
);
2299 new_tmp1
= gimple_assign_lhs (new_stmt1
);
2300 new_tmp2
= gimple_assign_lhs (new_stmt2
);
2303 /* Store the results for the next step. */
2304 vec_tmp
.quick_push (new_tmp1
);
2305 vec_tmp
.quick_push (new_tmp2
);
2308 vec_oprnds0
->release ();
2309 *vec_oprnds0
= vec_tmp
;
2313 /* Check if STMT performs a conversion operation, that can be vectorized.
2314 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2315 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
2316 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2319 vectorizable_conversion (gimple stmt
, gimple_stmt_iterator
*gsi
,
2320 gimple
*vec_stmt
, slp_tree slp_node
)
2324 tree op0
, op1
= NULL_TREE
;
2325 tree vec_oprnd0
= NULL_TREE
, vec_oprnd1
= NULL_TREE
;
2326 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2327 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2328 enum tree_code code
, code1
= ERROR_MARK
, code2
= ERROR_MARK
;
2329 enum tree_code codecvt1
= ERROR_MARK
, codecvt2
= ERROR_MARK
;
2330 tree decl1
= NULL_TREE
, decl2
= NULL_TREE
;
2334 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2335 gimple new_stmt
= NULL
;
2336 stmt_vec_info prev_stmt_info
;
2339 tree vectype_out
, vectype_in
;
2341 tree lhs_type
, rhs_type
;
2342 enum { NARROW
, NONE
, WIDEN
} modifier
;
2343 vec
<tree
> vec_oprnds0
= vNULL
;
2344 vec
<tree
> vec_oprnds1
= vNULL
;
2346 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2347 int multi_step_cvt
= 0;
2348 vec
<tree
> vec_dsts
= vNULL
;
2349 vec
<tree
> interm_types
= vNULL
;
2350 tree last_oprnd
, intermediate_type
, cvt_type
= NULL_TREE
;
2352 enum machine_mode rhs_mode
;
2353 unsigned short fltsz
;
2355 /* Is STMT a vectorizable conversion? */
2357 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2360 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2363 if (!is_gimple_assign (stmt
))
2366 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
2369 code
= gimple_assign_rhs_code (stmt
);
2370 if (!CONVERT_EXPR_CODE_P (code
)
2371 && code
!= FIX_TRUNC_EXPR
2372 && code
!= FLOAT_EXPR
2373 && code
!= WIDEN_MULT_EXPR
2374 && code
!= WIDEN_LSHIFT_EXPR
)
2377 op_type
= TREE_CODE_LENGTH (code
);
2379 /* Check types of lhs and rhs. */
2380 scalar_dest
= gimple_assign_lhs (stmt
);
2381 lhs_type
= TREE_TYPE (scalar_dest
);
2382 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
2384 op0
= gimple_assign_rhs1 (stmt
);
2385 rhs_type
= TREE_TYPE (op0
);
2387 if ((code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2388 && !((INTEGRAL_TYPE_P (lhs_type
)
2389 && INTEGRAL_TYPE_P (rhs_type
))
2390 || (SCALAR_FLOAT_TYPE_P (lhs_type
)
2391 && SCALAR_FLOAT_TYPE_P (rhs_type
))))
2394 if ((INTEGRAL_TYPE_P (lhs_type
)
2395 && (TYPE_PRECISION (lhs_type
)
2396 != GET_MODE_PRECISION (TYPE_MODE (lhs_type
))))
2397 || (INTEGRAL_TYPE_P (rhs_type
)
2398 && (TYPE_PRECISION (rhs_type
)
2399 != GET_MODE_PRECISION (TYPE_MODE (rhs_type
)))))
2401 if (dump_enabled_p ())
2402 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2403 "type conversion to/from bit-precision unsupported."
2408 /* Check the operands of the operation. */
2409 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
2410 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2412 if (dump_enabled_p ())
2413 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2414 "use not simple.\n");
2417 if (op_type
== binary_op
)
2421 op1
= gimple_assign_rhs2 (stmt
);
2422 gcc_assert (code
== WIDEN_MULT_EXPR
|| code
== WIDEN_LSHIFT_EXPR
);
2423 /* For WIDEN_MULT_EXPR, if OP0 is a constant, use the type of
2425 if (CONSTANT_CLASS_P (op0
))
2426 ok
= vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
,
2427 &def_stmt
, &def
, &dt
[1], &vectype_in
);
2429 ok
= vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
2434 if (dump_enabled_p ())
2435 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2436 "use not simple.\n");
2441 /* If op0 is an external or constant defs use a vector type of
2442 the same size as the output vector type. */
2444 vectype_in
= get_same_sized_vectype (rhs_type
, vectype_out
);
2446 gcc_assert (vectype_in
);
2449 if (dump_enabled_p ())
2451 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2452 "no vectype for scalar type ");
2453 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, rhs_type
);
2454 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2460 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype_in
);
2461 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
2462 if (nunits_in
< nunits_out
)
2464 else if (nunits_out
== nunits_in
)
2469 /* Multiple types in SLP are handled by creating the appropriate number of
2470 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2472 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2474 else if (modifier
== NARROW
)
2475 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_out
;
2477 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
2479 /* Sanity check: make sure that at least one copy of the vectorized stmt
2480 needs to be generated. */
2481 gcc_assert (ncopies
>= 1);
2483 /* Supportable by target? */
2487 if (code
!= FIX_TRUNC_EXPR
&& code
!= FLOAT_EXPR
)
2489 if (supportable_convert_operation (code
, vectype_out
, vectype_in
,
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2496 "conversion not supported by target.\n");
2500 if (supportable_widening_operation (code
, stmt
, vectype_out
, vectype_in
,
2501 &code1
, &code2
, &multi_step_cvt
,
2504 /* Binary widening operation can only be supported directly by the
2506 gcc_assert (!(multi_step_cvt
&& op_type
== binary_op
));
2510 if (code
!= FLOAT_EXPR
2511 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2512 <= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2515 rhs_mode
= TYPE_MODE (rhs_type
);
2516 fltsz
= GET_MODE_SIZE (TYPE_MODE (lhs_type
));
2517 for (rhs_mode
= GET_MODE_2XWIDER_MODE (TYPE_MODE (rhs_type
));
2518 rhs_mode
!= VOIDmode
&& GET_MODE_SIZE (rhs_mode
) <= fltsz
;
2519 rhs_mode
= GET_MODE_2XWIDER_MODE (rhs_mode
))
2522 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2523 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2524 if (cvt_type
== NULL_TREE
)
2527 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2529 if (!supportable_convert_operation (code
, vectype_out
,
2530 cvt_type
, &decl1
, &codecvt1
))
2533 else if (!supportable_widening_operation (code
, stmt
, vectype_out
,
2534 cvt_type
, &codecvt1
,
2535 &codecvt2
, &multi_step_cvt
,
2539 gcc_assert (multi_step_cvt
== 0);
2541 if (supportable_widening_operation (NOP_EXPR
, stmt
, cvt_type
,
2542 vectype_in
, &code1
, &code2
,
2543 &multi_step_cvt
, &interm_types
))
2547 if (rhs_mode
== VOIDmode
|| GET_MODE_SIZE (rhs_mode
) > fltsz
)
2550 if (GET_MODE_SIZE (rhs_mode
) == fltsz
)
2551 codecvt2
= ERROR_MARK
;
2555 interm_types
.safe_push (cvt_type
);
2556 cvt_type
= NULL_TREE
;
2561 gcc_assert (op_type
== unary_op
);
2562 if (supportable_narrowing_operation (code
, vectype_out
, vectype_in
,
2563 &code1
, &multi_step_cvt
,
2567 if (code
!= FIX_TRUNC_EXPR
2568 || (GET_MODE_SIZE (TYPE_MODE (lhs_type
))
2569 >= GET_MODE_SIZE (TYPE_MODE (rhs_type
))))
2572 rhs_mode
= TYPE_MODE (rhs_type
);
2574 = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode
), 0);
2575 cvt_type
= get_same_sized_vectype (cvt_type
, vectype_in
);
2576 if (cvt_type
== NULL_TREE
)
2578 if (!supportable_convert_operation (code
, cvt_type
, vectype_in
,
2581 if (supportable_narrowing_operation (NOP_EXPR
, vectype_out
, cvt_type
,
2582 &code1
, &multi_step_cvt
,
2591 if (!vec_stmt
) /* transformation not required. */
2593 if (dump_enabled_p ())
2594 dump_printf_loc (MSG_NOTE
, vect_location
,
2595 "=== vectorizable_conversion ===\n");
2596 if (code
== FIX_TRUNC_EXPR
|| code
== FLOAT_EXPR
)
2598 STMT_VINFO_TYPE (stmt_info
) = type_conversion_vec_info_type
;
2599 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
2601 else if (modifier
== NARROW
)
2603 STMT_VINFO_TYPE (stmt_info
) = type_demotion_vec_info_type
;
2604 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2608 STMT_VINFO_TYPE (stmt_info
) = type_promotion_vec_info_type
;
2609 vect_model_promotion_demotion_cost (stmt_info
, dt
, multi_step_cvt
);
2611 interm_types
.release ();
2616 if (dump_enabled_p ())
2617 dump_printf_loc (MSG_NOTE
, vect_location
,
2618 "transform conversion. ncopies = %d.\n", ncopies
);
2620 if (op_type
== binary_op
)
2622 if (CONSTANT_CLASS_P (op0
))
2623 op0
= fold_convert (TREE_TYPE (op1
), op0
);
2624 else if (CONSTANT_CLASS_P (op1
))
2625 op1
= fold_convert (TREE_TYPE (op0
), op1
);
2628 /* In case of multi-step conversion, we first generate conversion operations
2629 to the intermediate types, and then from that types to the final one.
2630 We create vector destinations for the intermediate type (TYPES) received
2631 from supportable_*_operation, and store them in the correct order
2632 for future use in vect_create_vectorized_*_stmts (). */
2633 vec_dsts
.create (multi_step_cvt
+ 1);
2634 vec_dest
= vect_create_destination_var (scalar_dest
,
2635 (cvt_type
&& modifier
== WIDEN
)
2636 ? cvt_type
: vectype_out
);
2637 vec_dsts
.quick_push (vec_dest
);
2641 for (i
= interm_types
.length () - 1;
2642 interm_types
.iterate (i
, &intermediate_type
); i
--)
2644 vec_dest
= vect_create_destination_var (scalar_dest
,
2646 vec_dsts
.quick_push (vec_dest
);
2651 vec_dest
= vect_create_destination_var (scalar_dest
,
2653 ? vectype_out
: cvt_type
);
2657 if (modifier
== WIDEN
)
2659 vec_oprnds0
.create (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1);
2660 if (op_type
== binary_op
)
2661 vec_oprnds1
.create (1);
2663 else if (modifier
== NARROW
)
2664 vec_oprnds0
.create (
2665 2 * (multi_step_cvt
? vect_pow2 (multi_step_cvt
) : 1));
2667 else if (code
== WIDEN_LSHIFT_EXPR
)
2668 vec_oprnds1
.create (slp_node
->vec_stmts_size
);
2671 prev_stmt_info
= NULL
;
2675 for (j
= 0; j
< ncopies
; j
++)
2678 vect_get_vec_defs (op0
, NULL
, stmt
, &vec_oprnds0
, NULL
, slp_node
,
2681 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, NULL
);
2683 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2685 /* Arguments are ready, create the new vector stmt. */
2686 if (code1
== CALL_EXPR
)
2688 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2689 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2690 gimple_call_set_lhs (new_stmt
, new_temp
);
2694 gcc_assert (TREE_CODE_LENGTH (code1
) == unary_op
);
2695 new_stmt
= gimple_build_assign_with_ops (code1
, vec_dest
,
2697 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2698 gimple_assign_set_lhs (new_stmt
, new_temp
);
2701 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2703 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2707 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
2709 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2710 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2715 /* In case the vectorization factor (VF) is bigger than the number
2716 of elements that we can fit in a vectype (nunits), we have to
2717 generate more than one vector stmt - i.e - we need to "unroll"
2718 the vector stmt by a factor VF/nunits. */
2719 for (j
= 0; j
< ncopies
; j
++)
2726 if (code
== WIDEN_LSHIFT_EXPR
)
2731 /* Store vec_oprnd1 for every vector stmt to be created
2732 for SLP_NODE. We check during the analysis that all
2733 the shift arguments are the same. */
2734 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
2735 vec_oprnds1
.quick_push (vec_oprnd1
);
2737 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2741 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
,
2742 &vec_oprnds1
, slp_node
, -1);
2746 vec_oprnd0
= vect_get_vec_def_for_operand (op0
, stmt
, NULL
);
2747 vec_oprnds0
.quick_push (vec_oprnd0
);
2748 if (op_type
== binary_op
)
2750 if (code
== WIDEN_LSHIFT_EXPR
)
2753 vec_oprnd1
= vect_get_vec_def_for_operand (op1
, stmt
,
2755 vec_oprnds1
.quick_push (vec_oprnd1
);
2761 vec_oprnd0
= vect_get_vec_def_for_stmt_copy (dt
[0], vec_oprnd0
);
2762 vec_oprnds0
.truncate (0);
2763 vec_oprnds0
.quick_push (vec_oprnd0
);
2764 if (op_type
== binary_op
)
2766 if (code
== WIDEN_LSHIFT_EXPR
)
2769 vec_oprnd1
= vect_get_vec_def_for_stmt_copy (dt
[1],
2771 vec_oprnds1
.truncate (0);
2772 vec_oprnds1
.quick_push (vec_oprnd1
);
2776 /* Arguments are ready. Create the new vector stmts. */
2777 for (i
= multi_step_cvt
; i
>= 0; i
--)
2779 tree this_dest
= vec_dsts
[i
];
2780 enum tree_code c1
= code1
, c2
= code2
;
2781 if (i
== 0 && codecvt2
!= ERROR_MARK
)
2786 vect_create_vectorized_promotion_stmts (&vec_oprnds0
,
2788 stmt
, this_dest
, gsi
,
2789 c1
, c2
, decl1
, decl2
,
2793 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2797 if (codecvt1
== CALL_EXPR
)
2799 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2800 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2801 gimple_call_set_lhs (new_stmt
, new_temp
);
2805 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2806 new_temp
= make_ssa_name (vec_dest
, NULL
);
2807 new_stmt
= gimple_build_assign_with_ops (codecvt1
,
2812 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2815 new_stmt
= SSA_NAME_DEF_STMT (vop0
);
2818 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
2821 if (!prev_stmt_info
)
2822 STMT_VINFO_VEC_STMT (stmt_info
) = new_stmt
;
2824 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
2825 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
2830 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2834 /* In case the vectorization factor (VF) is bigger than the number
2835 of elements that we can fit in a vectype (nunits), we have to
2836 generate more than one vector stmt - i.e - we need to "unroll"
2837 the vector stmt by a factor VF/nunits. */
2838 for (j
= 0; j
< ncopies
; j
++)
2842 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
2846 vec_oprnds0
.truncate (0);
2847 vect_get_loop_based_defs (&last_oprnd
, stmt
, dt
[0], &vec_oprnds0
,
2848 vect_pow2 (multi_step_cvt
) - 1);
2851 /* Arguments are ready. Create the new vector stmts. */
2853 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
2855 if (codecvt1
== CALL_EXPR
)
2857 new_stmt
= gimple_build_call (decl1
, 1, vop0
);
2858 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
2859 gimple_call_set_lhs (new_stmt
, new_temp
);
2863 gcc_assert (TREE_CODE_LENGTH (codecvt1
) == unary_op
);
2864 new_temp
= make_ssa_name (vec_dest
, NULL
);
2865 new_stmt
= gimple_build_assign_with_ops (codecvt1
, new_temp
,
2869 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
2870 vec_oprnds0
[i
] = new_temp
;
2873 vect_create_vectorized_demotion_stmts (&vec_oprnds0
, multi_step_cvt
,
2874 stmt
, vec_dsts
, gsi
,
2879 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
2883 vec_oprnds0
.release ();
2884 vec_oprnds1
.release ();
2885 vec_dsts
.release ();
2886 interm_types
.release ();
2892 /* Function vectorizable_assignment.
2894 Check if STMT performs an assignment (copy) that can be vectorized.
2895 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2896 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2897 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2900 vectorizable_assignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
2901 gimple
*vec_stmt
, slp_tree slp_node
)
2906 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2907 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
2908 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2912 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
2913 unsigned int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
2916 vec
<tree
> vec_oprnds
= vNULL
;
2918 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2919 gimple new_stmt
= NULL
;
2920 stmt_vec_info prev_stmt_info
= NULL
;
2921 enum tree_code code
;
2924 /* Multiple types in SLP are handled by creating the appropriate number of
2925 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
2927 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
2930 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
2932 gcc_assert (ncopies
>= 1);
2934 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
2937 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
2940 /* Is vectorizable assignment? */
2941 if (!is_gimple_assign (stmt
))
2944 scalar_dest
= gimple_assign_lhs (stmt
);
2945 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
2948 code
= gimple_assign_rhs_code (stmt
);
2949 if (gimple_assign_single_p (stmt
)
2950 || code
== PAREN_EXPR
2951 || CONVERT_EXPR_CODE_P (code
))
2952 op
= gimple_assign_rhs1 (stmt
);
2956 if (code
== VIEW_CONVERT_EXPR
)
2957 op
= TREE_OPERAND (op
, 0);
2959 if (!vect_is_simple_use_1 (op
, stmt
, loop_vinfo
, bb_vinfo
,
2960 &def_stmt
, &def
, &dt
[0], &vectype_in
))
2962 if (dump_enabled_p ())
2963 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2964 "use not simple.\n");
2968 /* We can handle NOP_EXPR conversions that do not change the number
2969 of elements or the vector size. */
2970 if ((CONVERT_EXPR_CODE_P (code
)
2971 || code
== VIEW_CONVERT_EXPR
)
2973 || TYPE_VECTOR_SUBPARTS (vectype_in
) != nunits
2974 || (GET_MODE_SIZE (TYPE_MODE (vectype
))
2975 != GET_MODE_SIZE (TYPE_MODE (vectype_in
)))))
2978 /* We do not handle bit-precision changes. */
2979 if ((CONVERT_EXPR_CODE_P (code
)
2980 || code
== VIEW_CONVERT_EXPR
)
2981 && INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest
))
2982 && ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2983 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
2984 || ((TYPE_PRECISION (TREE_TYPE (op
))
2985 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (op
))))))
2986 /* But a conversion that does not change the bit-pattern is ok. */
2987 && !((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
2988 > TYPE_PRECISION (TREE_TYPE (op
)))
2989 && TYPE_UNSIGNED (TREE_TYPE (op
))))
2991 if (dump_enabled_p ())
2992 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2993 "type conversion to/from bit-precision "
2998 if (!vec_stmt
) /* transformation not required. */
3000 STMT_VINFO_TYPE (stmt_info
) = assignment_vec_info_type
;
3001 if (dump_enabled_p ())
3002 dump_printf_loc (MSG_NOTE
, vect_location
,
3003 "=== vectorizable_assignment ===\n");
3004 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3009 if (dump_enabled_p ())
3010 dump_printf_loc (MSG_NOTE
, vect_location
, "transform assignment.\n");
3013 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3016 for (j
= 0; j
< ncopies
; j
++)
3020 vect_get_vec_defs (op
, NULL
, stmt
, &vec_oprnds
, NULL
, slp_node
, -1);
3022 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds
, NULL
);
3024 /* Arguments are ready. create the new vector stmt. */
3025 FOR_EACH_VEC_ELT (vec_oprnds
, i
, vop
)
3027 if (CONVERT_EXPR_CODE_P (code
)
3028 || code
== VIEW_CONVERT_EXPR
)
3029 vop
= build1 (VIEW_CONVERT_EXPR
, vectype
, vop
);
3030 new_stmt
= gimple_build_assign (vec_dest
, vop
);
3031 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3032 gimple_assign_set_lhs (new_stmt
, new_temp
);
3033 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3035 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3042 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3044 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3046 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3049 vec_oprnds
.release ();
3054 /* Return TRUE if CODE (a shift operation) is supported for SCALAR_TYPE
3055 either as shift by a scalar or by a vector. */
3058 vect_supportable_shift (enum tree_code code
, tree scalar_type
)
3061 enum machine_mode vec_mode
;
3066 vectype
= get_vectype_for_scalar_type (scalar_type
);
3070 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3072 || optab_handler (optab
, TYPE_MODE (vectype
)) == CODE_FOR_nothing
)
3074 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3076 || (optab_handler (optab
, TYPE_MODE (vectype
))
3077 == CODE_FOR_nothing
))
3081 vec_mode
= TYPE_MODE (vectype
);
3082 icode
= (int) optab_handler (optab
, vec_mode
);
3083 if (icode
== CODE_FOR_nothing
)
3090 /* Function vectorizable_shift.
3092 Check if STMT performs a shift operation that can be vectorized.
3093 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3094 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3095 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3098 vectorizable_shift (gimple stmt
, gimple_stmt_iterator
*gsi
,
3099 gimple
*vec_stmt
, slp_tree slp_node
)
3103 tree op0
, op1
= NULL
;
3104 tree vec_oprnd1
= NULL_TREE
;
3105 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3107 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3108 enum tree_code code
;
3109 enum machine_mode vec_mode
;
3113 enum machine_mode optab_op2_mode
;
3116 enum vect_def_type dt
[2] = {vect_unknown_def_type
, vect_unknown_def_type
};
3117 gimple new_stmt
= NULL
;
3118 stmt_vec_info prev_stmt_info
;
3125 vec
<tree
> vec_oprnds0
= vNULL
;
3126 vec
<tree
> vec_oprnds1
= vNULL
;
3129 bool scalar_shift_arg
= true;
3130 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3133 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3136 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3139 /* Is STMT a vectorizable binary/unary operation? */
3140 if (!is_gimple_assign (stmt
))
3143 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3146 code
= gimple_assign_rhs_code (stmt
);
3148 if (!(code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3149 || code
== RROTATE_EXPR
))
3152 scalar_dest
= gimple_assign_lhs (stmt
);
3153 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3154 if (TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3155 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3157 if (dump_enabled_p ())
3158 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3159 "bit-precision shifts not supported.\n");
3163 op0
= gimple_assign_rhs1 (stmt
);
3164 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3165 &def_stmt
, &def
, &dt
[0], &vectype
))
3167 if (dump_enabled_p ())
3168 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3169 "use not simple.\n");
3172 /* If op0 is an external or constant def use a vector type with
3173 the same size as the output vector type. */
3175 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3177 gcc_assert (vectype
);
3180 if (dump_enabled_p ())
3181 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3182 "no vectype for scalar type\n");
3186 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3187 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3188 if (nunits_out
!= nunits_in
)
3191 op1
= gimple_assign_rhs2 (stmt
);
3192 if (!vect_is_simple_use_1 (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3193 &def
, &dt
[1], &op1_vectype
))
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3197 "use not simple.\n");
3202 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3206 /* Multiple types in SLP are handled by creating the appropriate number of
3207 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3209 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3212 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3214 gcc_assert (ncopies
>= 1);
3216 /* Determine whether the shift amount is a vector, or scalar. If the
3217 shift/rotate amount is a vector, use the vector/vector shift optabs. */
3219 if (dt
[1] == vect_internal_def
&& !slp_node
)
3220 scalar_shift_arg
= false;
3221 else if (dt
[1] == vect_constant_def
3222 || dt
[1] == vect_external_def
3223 || dt
[1] == vect_internal_def
)
3225 /* In SLP, need to check whether the shift count is the same,
3226 in loops if it is a constant or invariant, it is always
3230 vec
<gimple
> stmts
= SLP_TREE_SCALAR_STMTS (slp_node
);
3233 FOR_EACH_VEC_ELT (stmts
, k
, slpstmt
)
3234 if (!operand_equal_p (gimple_assign_rhs2 (slpstmt
), op1
, 0))
3235 scalar_shift_arg
= false;
3240 if (dump_enabled_p ())
3241 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3242 "operand mode requires invariant argument.\n");
3246 /* Vector shifted by vector. */
3247 if (!scalar_shift_arg
)
3249 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3250 if (dump_enabled_p ())
3251 dump_printf_loc (MSG_NOTE
, vect_location
,
3252 "vector/vector shift/rotate found.\n");
3255 op1_vectype
= get_same_sized_vectype (TREE_TYPE (op1
), vectype_out
);
3256 if (op1_vectype
== NULL_TREE
3257 || TYPE_MODE (op1_vectype
) != TYPE_MODE (vectype
))
3259 if (dump_enabled_p ())
3260 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3261 "unusable type for last operand in"
3262 " vector/vector shift/rotate.\n");
3266 /* See if the machine has a vector shifted by scalar insn and if not
3267 then see if it has a vector shifted by vector insn. */
3270 optab
= optab_for_tree_code (code
, vectype
, optab_scalar
);
3272 && optab_handler (optab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3274 if (dump_enabled_p ())
3275 dump_printf_loc (MSG_NOTE
, vect_location
,
3276 "vector/scalar shift/rotate found.\n");
3280 optab
= optab_for_tree_code (code
, vectype
, optab_vector
);
3282 && (optab_handler (optab
, TYPE_MODE (vectype
))
3283 != CODE_FOR_nothing
))
3285 scalar_shift_arg
= false;
3287 if (dump_enabled_p ())
3288 dump_printf_loc (MSG_NOTE
, vect_location
,
3289 "vector/vector shift/rotate found.\n");
3291 /* Unlike the other binary operators, shifts/rotates have
3292 the rhs being int, instead of the same type as the lhs,
3293 so make sure the scalar is the right type if we are
3294 dealing with vectors of long long/long/short/char. */
3295 if (dt
[1] == vect_constant_def
)
3296 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3297 else if (!useless_type_conversion_p (TREE_TYPE (vectype
),
3301 && TYPE_MODE (TREE_TYPE (vectype
))
3302 != TYPE_MODE (TREE_TYPE (op1
)))
3304 if (dump_enabled_p ())
3305 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3306 "unusable type for last operand in"
3307 " vector/vector shift/rotate.\n");
3310 if (vec_stmt
&& !slp_node
)
3312 op1
= fold_convert (TREE_TYPE (vectype
), op1
);
3313 op1
= vect_init_vector (stmt
, op1
,
3314 TREE_TYPE (vectype
), NULL
);
3321 /* Supportable by target? */
3324 if (dump_enabled_p ())
3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3329 vec_mode
= TYPE_MODE (vectype
);
3330 icode
= (int) optab_handler (optab
, vec_mode
);
3331 if (icode
== CODE_FOR_nothing
)
3333 if (dump_enabled_p ())
3334 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3335 "op not supported by target.\n");
3336 /* Check only during analysis. */
3337 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3338 || (vf
< vect_min_worthwhile_factor (code
)
3341 if (dump_enabled_p ())
3342 dump_printf_loc (MSG_NOTE
, vect_location
,
3343 "proceeding using word mode.\n");
3346 /* Worthwhile without SIMD support? Check only during analysis. */
3347 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
3348 && vf
< vect_min_worthwhile_factor (code
)
3351 if (dump_enabled_p ())
3352 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3353 "not worthwhile without SIMD support.\n");
3357 if (!vec_stmt
) /* transformation not required. */
3359 STMT_VINFO_TYPE (stmt_info
) = shift_vec_info_type
;
3360 if (dump_enabled_p ())
3361 dump_printf_loc (MSG_NOTE
, vect_location
,
3362 "=== vectorizable_shift ===\n");
3363 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_NOTE
, vect_location
,
3371 "transform binary/unary operation.\n");
3374 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3376 prev_stmt_info
= NULL
;
3377 for (j
= 0; j
< ncopies
; j
++)
3382 if (scalar_shift_arg
)
3384 /* Vector shl and shr insn patterns can be defined with scalar
3385 operand 2 (shift operand). In this case, use constant or loop
3386 invariant op1 directly, without extending it to vector mode
3388 optab_op2_mode
= insn_data
[icode
].operand
[2].mode
;
3389 if (!VECTOR_MODE_P (optab_op2_mode
))
3391 if (dump_enabled_p ())
3392 dump_printf_loc (MSG_NOTE
, vect_location
,
3393 "operand 1 using scalar mode.\n");
3395 vec_oprnds1
.create (slp_node
? slp_node
->vec_stmts_size
: 1);
3396 vec_oprnds1
.quick_push (vec_oprnd1
);
3399 /* Store vec_oprnd1 for every vector stmt to be created
3400 for SLP_NODE. We check during the analysis that all
3401 the shift arguments are the same.
3402 TODO: Allow different constants for different vector
3403 stmts generated for an SLP instance. */
3404 for (k
= 0; k
< slp_node
->vec_stmts_size
- 1; k
++)
3405 vec_oprnds1
.quick_push (vec_oprnd1
);
3410 /* vec_oprnd1 is available if operand 1 should be of a scalar-type
3411 (a special case for certain kind of vector shifts); otherwise,
3412 operand 1 should be of a vector type (the usual case). */
3414 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3417 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3421 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3423 /* Arguments are ready. Create the new vector stmt. */
3424 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3426 vop1
= vec_oprnds1
[i
];
3427 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
, vop0
, vop1
);
3428 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3429 gimple_assign_set_lhs (new_stmt
, new_temp
);
3430 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3432 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3439 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3441 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3442 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3445 vec_oprnds0
.release ();
3446 vec_oprnds1
.release ();
3452 static tree
permute_vec_elements (tree
, tree
, tree
, gimple
,
3453 gimple_stmt_iterator
*);
3456 /* Function vectorizable_operation.
3458 Check if STMT performs a binary, unary or ternary operation that can
3460 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3461 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3462 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3465 vectorizable_operation (gimple stmt
, gimple_stmt_iterator
*gsi
,
3466 gimple
*vec_stmt
, slp_tree slp_node
)
3470 tree op0
, op1
= NULL_TREE
, op2
= NULL_TREE
;
3471 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3473 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3474 enum tree_code code
;
3475 enum machine_mode vec_mode
;
3482 enum vect_def_type dt
[3]
3483 = {vect_unknown_def_type
, vect_unknown_def_type
, vect_unknown_def_type
};
3484 gimple new_stmt
= NULL
;
3485 stmt_vec_info prev_stmt_info
;
3491 vec
<tree
> vec_oprnds0
= vNULL
;
3492 vec
<tree
> vec_oprnds1
= vNULL
;
3493 vec
<tree
> vec_oprnds2
= vNULL
;
3494 tree vop0
, vop1
, vop2
;
3495 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3498 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3501 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3504 /* Is STMT a vectorizable binary/unary operation? */
3505 if (!is_gimple_assign (stmt
))
3508 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
3511 code
= gimple_assign_rhs_code (stmt
);
3513 /* For pointer addition, we should use the normal plus for
3514 the vector addition. */
3515 if (code
== POINTER_PLUS_EXPR
)
3518 /* Support only unary or binary operations. */
3519 op_type
= TREE_CODE_LENGTH (code
);
3520 if (op_type
!= unary_op
&& op_type
!= binary_op
&& op_type
!= ternary_op
)
3522 if (dump_enabled_p ())
3523 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3524 "num. args = %d (not unary/binary/ternary op).\n",
3529 scalar_dest
= gimple_assign_lhs (stmt
);
3530 vectype_out
= STMT_VINFO_VECTYPE (stmt_info
);
3532 /* Most operations cannot handle bit-precision types without extra
3534 if ((TYPE_PRECISION (TREE_TYPE (scalar_dest
))
3535 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (scalar_dest
))))
3536 /* Exception are bitwise binary operations. */
3537 && code
!= BIT_IOR_EXPR
3538 && code
!= BIT_XOR_EXPR
3539 && code
!= BIT_AND_EXPR
)
3541 if (dump_enabled_p ())
3542 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3543 "bit-precision arithmetic not supported.\n");
3547 op0
= gimple_assign_rhs1 (stmt
);
3548 if (!vect_is_simple_use_1 (op0
, stmt
, loop_vinfo
, bb_vinfo
,
3549 &def_stmt
, &def
, &dt
[0], &vectype
))
3551 if (dump_enabled_p ())
3552 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3553 "use not simple.\n");
3556 /* If op0 is an external or constant def use a vector type with
3557 the same size as the output vector type. */
3559 vectype
= get_same_sized_vectype (TREE_TYPE (op0
), vectype_out
);
3561 gcc_assert (vectype
);
3564 if (dump_enabled_p ())
3566 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3567 "no vectype for scalar type ");
3568 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
3570 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3576 nunits_out
= TYPE_VECTOR_SUBPARTS (vectype_out
);
3577 nunits_in
= TYPE_VECTOR_SUBPARTS (vectype
);
3578 if (nunits_out
!= nunits_in
)
3581 if (op_type
== binary_op
|| op_type
== ternary_op
)
3583 op1
= gimple_assign_rhs2 (stmt
);
3584 if (!vect_is_simple_use (op1
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3587 if (dump_enabled_p ())
3588 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3589 "use not simple.\n");
3593 if (op_type
== ternary_op
)
3595 op2
= gimple_assign_rhs3 (stmt
);
3596 if (!vect_is_simple_use (op2
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3599 if (dump_enabled_p ())
3600 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3601 "use not simple.\n");
3607 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
3611 /* Multiple types in SLP are handled by creating the appropriate number of
3612 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3614 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
3617 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits_in
;
3619 gcc_assert (ncopies
>= 1);
3621 /* Shifts are handled in vectorizable_shift (). */
3622 if (code
== LSHIFT_EXPR
|| code
== RSHIFT_EXPR
|| code
== LROTATE_EXPR
3623 || code
== RROTATE_EXPR
)
3626 /* Supportable by target? */
3628 vec_mode
= TYPE_MODE (vectype
);
3629 if (code
== MULT_HIGHPART_EXPR
)
3631 if (can_mult_highpart_p (vec_mode
, TYPE_UNSIGNED (vectype
)))
3632 icode
= LAST_INSN_CODE
;
3634 icode
= CODE_FOR_nothing
;
3638 optab
= optab_for_tree_code (code
, vectype
, optab_default
);
3641 if (dump_enabled_p ())
3642 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3646 icode
= (int) optab_handler (optab
, vec_mode
);
3649 if (icode
== CODE_FOR_nothing
)
3651 if (dump_enabled_p ())
3652 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3653 "op not supported by target.\n");
3654 /* Check only during analysis. */
3655 if (GET_MODE_SIZE (vec_mode
) != UNITS_PER_WORD
3656 || (!vec_stmt
&& vf
< vect_min_worthwhile_factor (code
)))
3658 if (dump_enabled_p ())
3659 dump_printf_loc (MSG_NOTE
, vect_location
,
3660 "proceeding using word mode.\n");
3663 /* Worthwhile without SIMD support? Check only during analysis. */
3664 if (!VECTOR_MODE_P (vec_mode
)
3666 && vf
< vect_min_worthwhile_factor (code
))
3668 if (dump_enabled_p ())
3669 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3670 "not worthwhile without SIMD support.\n");
3674 if (!vec_stmt
) /* transformation not required. */
3676 STMT_VINFO_TYPE (stmt_info
) = op_vec_info_type
;
3677 if (dump_enabled_p ())
3678 dump_printf_loc (MSG_NOTE
, vect_location
,
3679 "=== vectorizable_operation ===\n");
3680 vect_model_simple_cost (stmt_info
, ncopies
, dt
, NULL
, NULL
);
3686 if (dump_enabled_p ())
3687 dump_printf_loc (MSG_NOTE
, vect_location
,
3688 "transform binary/unary operation.\n");
3691 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
3693 /* In case the vectorization factor (VF) is bigger than the number
3694 of elements that we can fit in a vectype (nunits), we have to generate
3695 more than one vector stmt - i.e - we need to "unroll" the
3696 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3697 from one copy of the vector stmt to the next, in the field
3698 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3699 stages to find the correct vector defs to be used when vectorizing
3700 stmts that use the defs of the current stmt. The example below
3701 illustrates the vectorization process when VF=16 and nunits=4 (i.e.,
3702 we need to create 4 vectorized stmts):
3704 before vectorization:
3705 RELATED_STMT VEC_STMT
3709 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3711 RELATED_STMT VEC_STMT
3712 VS1_0: vx0 = memref0 VS1_1 -
3713 VS1_1: vx1 = memref1 VS1_2 -
3714 VS1_2: vx2 = memref2 VS1_3 -
3715 VS1_3: vx3 = memref3 - -
3716 S1: x = load - VS1_0
3719 step2: vectorize stmt S2 (done here):
3720 To vectorize stmt S2 we first need to find the relevant vector
3721 def for the first operand 'x'. This is, as usual, obtained from
3722 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3723 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3724 relevant vector def 'vx0'. Having found 'vx0' we can generate
3725 the vector stmt VS2_0, and as usual, record it in the
3726 STMT_VINFO_VEC_STMT of stmt S2.
3727 When creating the second copy (VS2_1), we obtain the relevant vector
3728 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3729 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3730 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3731 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3732 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3733 chain of stmts and pointers:
3734 RELATED_STMT VEC_STMT
3735 VS1_0: vx0 = memref0 VS1_1 -
3736 VS1_1: vx1 = memref1 VS1_2 -
3737 VS1_2: vx2 = memref2 VS1_3 -
3738 VS1_3: vx3 = memref3 - -
3739 S1: x = load - VS1_0
3740 VS2_0: vz0 = vx0 + v1 VS2_1 -
3741 VS2_1: vz1 = vx1 + v1 VS2_2 -
3742 VS2_2: vz2 = vx2 + v1 VS2_3 -
3743 VS2_3: vz3 = vx3 + v1 - -
3744 S2: z = x + 1 - VS2_0 */
3746 prev_stmt_info
= NULL
;
3747 for (j
= 0; j
< ncopies
; j
++)
3752 if (op_type
== binary_op
|| op_type
== ternary_op
)
3753 vect_get_vec_defs (op0
, op1
, stmt
, &vec_oprnds0
, &vec_oprnds1
,
3756 vect_get_vec_defs (op0
, NULL_TREE
, stmt
, &vec_oprnds0
, NULL
,
3758 if (op_type
== ternary_op
)
3760 vec_oprnds2
.create (1);
3761 vec_oprnds2
.quick_push (vect_get_vec_def_for_operand (op2
,
3768 vect_get_vec_defs_for_stmt_copy (dt
, &vec_oprnds0
, &vec_oprnds1
);
3769 if (op_type
== ternary_op
)
3771 tree vec_oprnd
= vec_oprnds2
.pop ();
3772 vec_oprnds2
.quick_push (vect_get_vec_def_for_stmt_copy (dt
[2],
3777 /* Arguments are ready. Create the new vector stmt. */
3778 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vop0
)
3780 vop1
= ((op_type
== binary_op
|| op_type
== ternary_op
)
3781 ? vec_oprnds1
[i
] : NULL_TREE
);
3782 vop2
= ((op_type
== ternary_op
)
3783 ? vec_oprnds2
[i
] : NULL_TREE
);
3784 new_stmt
= gimple_build_assign_with_ops (code
, vec_dest
,
3786 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
3787 gimple_assign_set_lhs (new_stmt
, new_temp
);
3788 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
3790 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
3797 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
3799 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
3800 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
3803 vec_oprnds0
.release ();
3804 vec_oprnds1
.release ();
3805 vec_oprnds2
.release ();
3810 /* A helper function to ensure data reference DR's base alignment
3814 ensure_base_align (stmt_vec_info stmt_info
, struct data_reference
*dr
)
3819 if (((dataref_aux
*)dr
->aux
)->base_misaligned
)
3821 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3822 tree base_decl
= ((dataref_aux
*)dr
->aux
)->base_decl
;
3824 DECL_ALIGN (base_decl
) = TYPE_ALIGN (vectype
);
3825 DECL_USER_ALIGN (base_decl
) = 1;
3826 ((dataref_aux
*)dr
->aux
)->base_misaligned
= false;
3831 /* Function vectorizable_store.
3833 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3835 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3836 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3837 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3840 vectorizable_store (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
3846 tree vec_oprnd
= NULL_TREE
;
3847 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3848 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
3849 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
3851 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3852 struct loop
*loop
= NULL
;
3853 enum machine_mode vec_mode
;
3855 enum dr_alignment_support alignment_support_scheme
;
3858 enum vect_def_type dt
;
3859 stmt_vec_info prev_stmt_info
= NULL
;
3860 tree dataref_ptr
= NULL_TREE
;
3861 tree dataref_offset
= NULL_TREE
;
3862 gimple ptr_incr
= NULL
;
3863 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
3866 gimple next_stmt
, first_stmt
= NULL
;
3867 bool grouped_store
= false;
3868 bool store_lanes_p
= false;
3869 unsigned int group_size
, i
;
3870 vec
<tree
> dr_chain
= vNULL
;
3871 vec
<tree
> oprnds
= vNULL
;
3872 vec
<tree
> result_chain
= vNULL
;
3874 vec
<tree
> vec_oprnds
= vNULL
;
3875 bool slp
= (slp_node
!= NULL
);
3876 unsigned int vec_num
;
3877 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
3881 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3883 /* Multiple types in SLP are handled by creating the appropriate number of
3884 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3886 if (slp
|| PURE_SLP_STMT (stmt_info
))
3889 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
3891 gcc_assert (ncopies
>= 1);
3893 /* FORNOW. This restriction should be relaxed. */
3894 if (loop
&& nested_in_vect_loop_p (loop
, stmt
) && ncopies
> 1)
3896 if (dump_enabled_p ())
3897 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3898 "multiple types in nested loop.\n");
3902 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
3905 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
3908 /* Is vectorizable store? */
3910 if (!is_gimple_assign (stmt
))
3913 scalar_dest
= gimple_assign_lhs (stmt
);
3914 if (TREE_CODE (scalar_dest
) == VIEW_CONVERT_EXPR
3915 && is_pattern_stmt_p (stmt_info
))
3916 scalar_dest
= TREE_OPERAND (scalar_dest
, 0);
3917 if (TREE_CODE (scalar_dest
) != ARRAY_REF
3918 && TREE_CODE (scalar_dest
) != BIT_FIELD_REF
3919 && TREE_CODE (scalar_dest
) != INDIRECT_REF
3920 && TREE_CODE (scalar_dest
) != COMPONENT_REF
3921 && TREE_CODE (scalar_dest
) != IMAGPART_EXPR
3922 && TREE_CODE (scalar_dest
) != REALPART_EXPR
3923 && TREE_CODE (scalar_dest
) != MEM_REF
)
3926 gcc_assert (gimple_assign_single_p (stmt
));
3927 op
= gimple_assign_rhs1 (stmt
);
3928 if (!vect_is_simple_use (op
, stmt
, loop_vinfo
, bb_vinfo
, &def_stmt
,
3931 if (dump_enabled_p ())
3932 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3933 "use not simple.\n");
3937 elem_type
= TREE_TYPE (vectype
);
3938 vec_mode
= TYPE_MODE (vectype
);
3940 /* FORNOW. In some cases can vectorize even if data-type not supported
3941 (e.g. - array initialization with 0). */
3942 if (optab_handler (mov_optab
, vec_mode
) == CODE_FOR_nothing
)
3945 if (!STMT_VINFO_DATA_REF (stmt_info
))
3948 if (tree_int_cst_compare (loop
&& nested_in_vect_loop_p (loop
, stmt
)
3949 ? STMT_VINFO_DR_STEP (stmt_info
) : DR_STEP (dr
),
3950 size_zero_node
) < 0)
3952 if (dump_enabled_p ())
3953 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3954 "negative step for store.\n");
3958 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
3960 grouped_store
= true;
3961 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
3962 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
3964 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
3965 if (vect_store_lanes_supported (vectype
, group_size
))
3966 store_lanes_p
= true;
3967 else if (!vect_grouped_store_supported (vectype
, group_size
))
3971 if (first_stmt
== stmt
)
3973 /* STMT is the leader of the group. Check the operands of all the
3974 stmts of the group. */
3975 next_stmt
= GROUP_NEXT_ELEMENT (stmt_info
);
3978 gcc_assert (gimple_assign_single_p (next_stmt
));
3979 op
= gimple_assign_rhs1 (next_stmt
);
3980 if (!vect_is_simple_use (op
, next_stmt
, loop_vinfo
, bb_vinfo
,
3981 &def_stmt
, &def
, &dt
))
3983 if (dump_enabled_p ())
3984 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3985 "use not simple.\n");
3988 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
3993 if (!vec_stmt
) /* transformation not required. */
3995 STMT_VINFO_TYPE (stmt_info
) = store_vec_info_type
;
3996 vect_model_store_cost (stmt_info
, ncopies
, store_lanes_p
, dt
,
4003 ensure_base_align (stmt_info
, dr
);
4007 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4008 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4010 GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))++;
4013 gcc_assert (!loop
|| !nested_in_vect_loop_p (loop
, stmt
));
4015 /* We vectorize all the stmts of the interleaving group when we
4016 reach the last stmt in the group. */
4017 if (GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt
))
4018 < GROUP_SIZE (vinfo_for_stmt (first_stmt
))
4027 grouped_store
= false;
4028 /* VEC_NUM is the number of vect stmts to be created for this
4030 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4031 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4032 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4033 op
= gimple_assign_rhs1 (first_stmt
);
4036 /* VEC_NUM is the number of vect stmts to be created for this
4038 vec_num
= group_size
;
4044 group_size
= vec_num
= 1;
4047 if (dump_enabled_p ())
4048 dump_printf_loc (MSG_NOTE
, vect_location
,
4049 "transform store. ncopies = %d\n", ncopies
);
4051 dr_chain
.create (group_size
);
4052 oprnds
.create (group_size
);
4054 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4055 gcc_assert (alignment_support_scheme
);
4056 /* Targets with store-lane instructions must not require explicit
4058 gcc_assert (!store_lanes_p
4059 || alignment_support_scheme
== dr_aligned
4060 || alignment_support_scheme
== dr_unaligned_supported
);
4063 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
4065 aggr_type
= vectype
;
4067 /* In case the vectorization factor (VF) is bigger than the number
4068 of elements that we can fit in a vectype (nunits), we have to generate
4069 more than one vector stmt - i.e - we need to "unroll" the
4070 vector stmt by a factor VF/nunits. For more details see documentation in
4071 vect_get_vec_def_for_copy_stmt. */
4073 /* In case of interleaving (non-unit grouped access):
4080 We create vectorized stores starting from base address (the access of the
4081 first stmt in the chain (S2 in the above example), when the last store stmt
4082 of the chain (S4) is reached:
4085 VS2: &base + vec_size*1 = vx0
4086 VS3: &base + vec_size*2 = vx1
4087 VS4: &base + vec_size*3 = vx3
4089 Then permutation statements are generated:
4091 VS5: vx5 = VEC_PERM_EXPR < vx0, vx3, {0, 8, 1, 9, 2, 10, 3, 11} >
4092 VS6: vx6 = VEC_PERM_EXPR < vx0, vx3, {4, 12, 5, 13, 6, 14, 7, 15} >
4095 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4096 (the order of the data-refs in the output of vect_permute_store_chain
4097 corresponds to the order of scalar stmts in the interleaving chain - see
4098 the documentation of vect_permute_store_chain()).
4100 In case of both multiple types and interleaving, above vector stores and
4101 permutation stmts are created for every copy. The result vector stmts are
4102 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4103 STMT_VINFO_RELATED_STMT for the next copies.
4106 prev_stmt_info
= NULL
;
4107 for (j
= 0; j
< ncopies
; j
++)
4115 /* Get vectorized arguments for SLP_NODE. */
4116 vect_get_vec_defs (op
, NULL_TREE
, stmt
, &vec_oprnds
,
4117 NULL
, slp_node
, -1);
4119 vec_oprnd
= vec_oprnds
[0];
4123 /* For interleaved stores we collect vectorized defs for all the
4124 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4125 used as an input to vect_permute_store_chain(), and OPRNDS as
4126 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4128 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4129 OPRNDS are of size 1. */
4130 next_stmt
= first_stmt
;
4131 for (i
= 0; i
< group_size
; i
++)
4133 /* Since gaps are not supported for interleaved stores,
4134 GROUP_SIZE is the exact number of stmts in the chain.
4135 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4136 there is no interleaving, GROUP_SIZE is 1, and only one
4137 iteration of the loop will be executed. */
4138 gcc_assert (next_stmt
4139 && gimple_assign_single_p (next_stmt
));
4140 op
= gimple_assign_rhs1 (next_stmt
);
4142 vec_oprnd
= vect_get_vec_def_for_operand (op
, next_stmt
,
4144 dr_chain
.quick_push (vec_oprnd
);
4145 oprnds
.quick_push (vec_oprnd
);
4146 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4150 /* We should have catched mismatched types earlier. */
4151 gcc_assert (useless_type_conversion_p (vectype
,
4152 TREE_TYPE (vec_oprnd
)));
4153 bool simd_lane_access_p
4154 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
4155 if (simd_lane_access_p
4156 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
4157 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
4158 && integer_zerop (DR_OFFSET (first_dr
))
4159 && integer_zerop (DR_INIT (first_dr
))
4160 && alias_sets_conflict_p (get_alias_set (aggr_type
),
4161 get_alias_set (DR_REF (first_dr
))))
4163 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
4164 dataref_offset
= build_int_cst (reference_alias_ptr_type
4165 (DR_REF (first_dr
)), 0);
4170 = vect_create_data_ref_ptr (first_stmt
, aggr_type
,
4171 simd_lane_access_p
? loop
: NULL
,
4172 NULL_TREE
, &dummy
, gsi
, &ptr_incr
,
4173 simd_lane_access_p
, &inv_p
);
4174 gcc_assert (bb_vinfo
|| !inv_p
);
4178 /* For interleaved stores we created vectorized defs for all the
4179 defs stored in OPRNDS in the previous iteration (previous copy).
4180 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4181 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4183 If the store is not grouped, GROUP_SIZE is 1, and DR_CHAIN and
4184 OPRNDS are of size 1. */
4185 for (i
= 0; i
< group_size
; i
++)
4188 vect_is_simple_use (op
, NULL
, loop_vinfo
, bb_vinfo
, &def_stmt
,
4190 vec_oprnd
= vect_get_vec_def_for_stmt_copy (dt
, op
);
4191 dr_chain
[i
] = vec_oprnd
;
4192 oprnds
[i
] = vec_oprnd
;
4196 = int_const_binop (PLUS_EXPR
, dataref_offset
,
4197 TYPE_SIZE_UNIT (aggr_type
));
4199 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
4200 TYPE_SIZE_UNIT (aggr_type
));
4207 /* Combine all the vectors into an array. */
4208 vec_array
= create_vector_array (vectype
, vec_num
);
4209 for (i
= 0; i
< vec_num
; i
++)
4211 vec_oprnd
= dr_chain
[i
];
4212 write_vector_array (stmt
, gsi
, vec_oprnd
, vec_array
, i
);
4216 MEM_REF[...all elements...] = STORE_LANES (VEC_ARRAY). */
4217 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
4218 new_stmt
= gimple_build_call_internal (IFN_STORE_LANES
, 1, vec_array
);
4219 gimple_call_set_lhs (new_stmt
, data_ref
);
4220 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4228 result_chain
.create (group_size
);
4230 vect_permute_store_chain (dr_chain
, group_size
, stmt
, gsi
,
4234 next_stmt
= first_stmt
;
4235 for (i
= 0; i
< vec_num
; i
++)
4237 unsigned align
, misalign
;
4240 /* Bump the vector pointer. */
4241 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
4245 vec_oprnd
= vec_oprnds
[i
];
4246 else if (grouped_store
)
4247 /* For grouped stores vectorized defs are interleaved in
4248 vect_permute_store_chain(). */
4249 vec_oprnd
= result_chain
[i
];
4251 data_ref
= build2 (MEM_REF
, TREE_TYPE (vec_oprnd
), dataref_ptr
,
4254 : build_int_cst (reference_alias_ptr_type
4255 (DR_REF (first_dr
)), 0));
4256 align
= TYPE_ALIGN_UNIT (vectype
);
4257 if (aligned_access_p (first_dr
))
4259 else if (DR_MISALIGNMENT (first_dr
) == -1)
4261 TREE_TYPE (data_ref
)
4262 = build_aligned_type (TREE_TYPE (data_ref
),
4263 TYPE_ALIGN (elem_type
));
4264 align
= TYPE_ALIGN_UNIT (elem_type
);
4269 TREE_TYPE (data_ref
)
4270 = build_aligned_type (TREE_TYPE (data_ref
),
4271 TYPE_ALIGN (elem_type
));
4272 misalign
= DR_MISALIGNMENT (first_dr
);
4274 if (dataref_offset
== NULL_TREE
)
4275 set_ptr_info_alignment (get_ptr_info (dataref_ptr
), align
,
4278 /* Arguments are ready. Create the new vector stmt. */
4279 new_stmt
= gimple_build_assign (data_ref
, vec_oprnd
);
4280 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4285 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
4293 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4295 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4296 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4300 dr_chain
.release ();
4302 result_chain
.release ();
4303 vec_oprnds
.release ();
4308 /* Given a vector type VECTYPE and permutation SEL returns
4309 the VECTOR_CST mask that implements the permutation of the
4310 vector elements. If that is impossible to do, returns NULL. */
4313 vect_gen_perm_mask (tree vectype
, unsigned char *sel
)
4315 tree mask_elt_type
, mask_type
, mask_vec
, *mask_elts
;
4318 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4320 if (!can_vec_perm_p (TYPE_MODE (vectype
), false, sel
))
4323 mask_elt_type
= lang_hooks
.types
.type_for_mode
4324 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype
))), 1);
4325 mask_type
= get_vectype_for_scalar_type (mask_elt_type
);
4327 mask_elts
= XALLOCAVEC (tree
, nunits
);
4328 for (i
= nunits
- 1; i
>= 0; i
--)
4329 mask_elts
[i
] = build_int_cst (mask_elt_type
, sel
[i
]);
4330 mask_vec
= build_vector (mask_type
, mask_elts
);
4335 /* Given a vector type VECTYPE returns the VECTOR_CST mask that implements
4336 reversal of the vector elements. If that is impossible to do,
4340 perm_mask_for_reverse (tree vectype
)
4345 nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4346 sel
= XALLOCAVEC (unsigned char, nunits
);
4348 for (i
= 0; i
< nunits
; ++i
)
4349 sel
[i
] = nunits
- 1 - i
;
4351 return vect_gen_perm_mask (vectype
, sel
);
4354 /* Given a vector variable X and Y, that was generated for the scalar
4355 STMT, generate instructions to permute the vector elements of X and Y
4356 using permutation mask MASK_VEC, insert them at *GSI and return the
4357 permuted vector variable. */
4360 permute_vec_elements (tree x
, tree y
, tree mask_vec
, gimple stmt
,
4361 gimple_stmt_iterator
*gsi
)
4363 tree vectype
= TREE_TYPE (x
);
4364 tree perm_dest
, data_ref
;
4367 perm_dest
= vect_create_destination_var (gimple_assign_lhs (stmt
), vectype
);
4368 data_ref
= make_ssa_name (perm_dest
, NULL
);
4370 /* Generate the permute statement. */
4371 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
4373 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4378 /* vectorizable_load.
4380 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4382 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4383 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4384 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4387 vectorizable_load (gimple stmt
, gimple_stmt_iterator
*gsi
, gimple
*vec_stmt
,
4388 slp_tree slp_node
, slp_instance slp_node_instance
)
4391 tree vec_dest
= NULL
;
4392 tree data_ref
= NULL
;
4393 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4394 stmt_vec_info prev_stmt_info
;
4395 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4396 struct loop
*loop
= NULL
;
4397 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4398 bool nested_in_vect_loop
= false;
4399 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
), *first_dr
= NULL
;
4400 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4403 enum machine_mode mode
;
4404 gimple new_stmt
= NULL
;
4406 enum dr_alignment_support alignment_support_scheme
;
4407 tree dataref_ptr
= NULL_TREE
;
4408 tree dataref_offset
= NULL_TREE
;
4409 gimple ptr_incr
= NULL
;
4410 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
4412 int i
, j
, group_size
, group_gap
;
4413 tree msq
= NULL_TREE
, lsq
;
4414 tree offset
= NULL_TREE
;
4415 tree realignment_token
= NULL_TREE
;
4417 vec
<tree
> dr_chain
= vNULL
;
4418 bool grouped_load
= false;
4419 bool load_lanes_p
= false;
4422 bool negative
= false;
4423 bool compute_in_loop
= false;
4424 struct loop
*at_loop
;
4426 bool slp
= (slp_node
!= NULL
);
4427 bool slp_perm
= false;
4428 enum tree_code code
;
4429 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4432 tree gather_base
= NULL_TREE
, gather_off
= NULL_TREE
;
4433 tree gather_off_vectype
= NULL_TREE
, gather_decl
= NULL_TREE
;
4434 int gather_scale
= 1;
4435 enum vect_def_type gather_dt
= vect_unknown_def_type
;
4439 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4440 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4441 vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
4446 /* Multiple types in SLP are handled by creating the appropriate number of
4447 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
4449 if (slp
|| PURE_SLP_STMT (stmt_info
))
4452 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
4454 gcc_assert (ncopies
>= 1);
4456 /* FORNOW. This restriction should be relaxed. */
4457 if (nested_in_vect_loop
&& ncopies
> 1)
4459 if (dump_enabled_p ())
4460 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4461 "multiple types in nested loop.\n");
4465 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
4468 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
)
4471 /* Is vectorizable load? */
4472 if (!is_gimple_assign (stmt
))
4475 scalar_dest
= gimple_assign_lhs (stmt
);
4476 if (TREE_CODE (scalar_dest
) != SSA_NAME
)
4479 code
= gimple_assign_rhs_code (stmt
);
4480 if (code
!= ARRAY_REF
4481 && code
!= BIT_FIELD_REF
4482 && code
!= INDIRECT_REF
4483 && code
!= COMPONENT_REF
4484 && code
!= IMAGPART_EXPR
4485 && code
!= REALPART_EXPR
4487 && TREE_CODE_CLASS (code
) != tcc_declaration
)
4490 if (!STMT_VINFO_DATA_REF (stmt_info
))
4493 elem_type
= TREE_TYPE (vectype
);
4494 mode
= TYPE_MODE (vectype
);
4496 /* FORNOW. In some cases can vectorize even if data-type not supported
4497 (e.g. - data copies). */
4498 if (optab_handler (mov_optab
, mode
) == CODE_FOR_nothing
)
4500 if (dump_enabled_p ())
4501 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4502 "Aligned load, but unsupported type.\n");
4506 /* Check if the load is a part of an interleaving chain. */
4507 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
4509 grouped_load
= true;
4511 gcc_assert (! nested_in_vect_loop
&& !STMT_VINFO_GATHER_P (stmt_info
));
4513 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4514 if (!slp
&& !PURE_SLP_STMT (stmt_info
))
4516 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4517 if (vect_load_lanes_supported (vectype
, group_size
))
4518 load_lanes_p
= true;
4519 else if (!vect_grouped_load_supported (vectype
, group_size
))
4525 if (STMT_VINFO_GATHER_P (stmt_info
))
4529 gather_decl
= vect_check_gather (stmt
, loop_vinfo
, &gather_base
,
4530 &gather_off
, &gather_scale
);
4531 gcc_assert (gather_decl
);
4532 if (!vect_is_simple_use_1 (gather_off
, NULL
, loop_vinfo
, bb_vinfo
,
4533 &def_stmt
, &def
, &gather_dt
,
4534 &gather_off_vectype
))
4536 if (dump_enabled_p ())
4537 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4538 "gather index use not simple.\n");
4542 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4546 negative
= tree_int_cst_compare (nested_in_vect_loop
4547 ? STMT_VINFO_DR_STEP (stmt_info
)
4549 size_zero_node
) < 0;
4550 if (negative
&& ncopies
> 1)
4552 if (dump_enabled_p ())
4553 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4554 "multiple types with negative step.\n");
4562 if (dump_enabled_p ())
4563 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4564 "negative step for group load not supported"
4568 alignment_support_scheme
= vect_supportable_dr_alignment (dr
, false);
4569 if (alignment_support_scheme
!= dr_aligned
4570 && alignment_support_scheme
!= dr_unaligned_supported
)
4572 if (dump_enabled_p ())
4573 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4574 "negative step but alignment required.\n");
4577 if (!perm_mask_for_reverse (vectype
))
4579 if (dump_enabled_p ())
4580 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4581 "negative step and reversing not supported."
4588 if (!vec_stmt
) /* transformation not required. */
4590 STMT_VINFO_TYPE (stmt_info
) = load_vec_info_type
;
4591 vect_model_load_cost (stmt_info
, ncopies
, load_lanes_p
, NULL
, NULL
, NULL
);
4595 if (dump_enabled_p ())
4596 dump_printf_loc (MSG_NOTE
, vect_location
,
4597 "transform load. ncopies = %d\n", ncopies
);
4601 ensure_base_align (stmt_info
, dr
);
4603 if (STMT_VINFO_GATHER_P (stmt_info
))
4605 tree vec_oprnd0
= NULL_TREE
, op
;
4606 tree arglist
= TYPE_ARG_TYPES (TREE_TYPE (gather_decl
));
4607 tree rettype
, srctype
, ptrtype
, idxtype
, masktype
, scaletype
;
4608 tree ptr
, mask
, var
, scale
, perm_mask
= NULL_TREE
, prev_res
= NULL_TREE
;
4609 edge pe
= loop_preheader_edge (loop
);
4612 enum { NARROW
, NONE
, WIDEN
} modifier
;
4613 int gather_off_nunits
= TYPE_VECTOR_SUBPARTS (gather_off_vectype
);
4615 if (nunits
== gather_off_nunits
)
4617 else if (nunits
== gather_off_nunits
/ 2)
4619 unsigned char *sel
= XALLOCAVEC (unsigned char, gather_off_nunits
);
4622 for (i
= 0; i
< gather_off_nunits
; ++i
)
4623 sel
[i
] = i
| nunits
;
4625 perm_mask
= vect_gen_perm_mask (gather_off_vectype
, sel
);
4626 gcc_assert (perm_mask
!= NULL_TREE
);
4628 else if (nunits
== gather_off_nunits
* 2)
4630 unsigned char *sel
= XALLOCAVEC (unsigned char, nunits
);
4633 for (i
= 0; i
< nunits
; ++i
)
4634 sel
[i
] = i
< gather_off_nunits
4635 ? i
: i
+ nunits
- gather_off_nunits
;
4637 perm_mask
= vect_gen_perm_mask (vectype
, sel
);
4638 gcc_assert (perm_mask
!= NULL_TREE
);
4644 rettype
= TREE_TYPE (TREE_TYPE (gather_decl
));
4645 srctype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4646 ptrtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4647 idxtype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4648 masktype
= TREE_VALUE (arglist
); arglist
= TREE_CHAIN (arglist
);
4649 scaletype
= TREE_VALUE (arglist
);
4650 gcc_checking_assert (types_compatible_p (srctype
, rettype
)
4651 && types_compatible_p (srctype
, masktype
));
4653 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4655 ptr
= fold_convert (ptrtype
, gather_base
);
4656 if (!is_gimple_min_invariant (ptr
))
4658 ptr
= force_gimple_operand (ptr
, &seq
, true, NULL_TREE
);
4659 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, seq
);
4660 gcc_assert (!new_bb
);
4663 /* Currently we support only unconditional gather loads,
4664 so mask should be all ones. */
4665 if (TREE_CODE (TREE_TYPE (masktype
)) == INTEGER_TYPE
)
4666 mask
= build_int_cst (TREE_TYPE (masktype
), -1);
4667 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (masktype
)))
4671 for (j
= 0; j
< 6; ++j
)
4673 real_from_target (&r
, tmp
, TYPE_MODE (TREE_TYPE (masktype
)));
4674 mask
= build_real (TREE_TYPE (masktype
), r
);
4678 mask
= build_vector_from_val (masktype
, mask
);
4679 mask
= vect_init_vector (stmt
, mask
, masktype
, NULL
);
4681 scale
= build_int_cst (scaletype
, gather_scale
);
4683 prev_stmt_info
= NULL
;
4684 for (j
= 0; j
< ncopies
; ++j
)
4686 if (modifier
== WIDEN
&& (j
& 1))
4687 op
= permute_vec_elements (vec_oprnd0
, vec_oprnd0
,
4688 perm_mask
, stmt
, gsi
);
4691 = vect_get_vec_def_for_operand (gather_off
, stmt
, NULL
);
4694 = vect_get_vec_def_for_stmt_copy (gather_dt
, vec_oprnd0
);
4696 if (!useless_type_conversion_p (idxtype
, TREE_TYPE (op
)))
4698 gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (op
))
4699 == TYPE_VECTOR_SUBPARTS (idxtype
));
4700 var
= vect_get_new_vect_var (idxtype
, vect_simple_var
, NULL
);
4701 var
= make_ssa_name (var
, NULL
);
4702 op
= build1 (VIEW_CONVERT_EXPR
, idxtype
, op
);
4704 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
,
4706 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4711 = gimple_build_call (gather_decl
, 5, mask
, ptr
, op
, mask
, scale
);
4713 if (!useless_type_conversion_p (vectype
, rettype
))
4715 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype
)
4716 == TYPE_VECTOR_SUBPARTS (rettype
));
4717 var
= vect_get_new_vect_var (rettype
, vect_simple_var
, NULL
);
4718 op
= make_ssa_name (var
, new_stmt
);
4719 gimple_call_set_lhs (new_stmt
, op
);
4720 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4721 var
= make_ssa_name (vec_dest
, NULL
);
4722 op
= build1 (VIEW_CONVERT_EXPR
, vectype
, op
);
4724 = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR
, var
, op
,
4729 var
= make_ssa_name (vec_dest
, new_stmt
);
4730 gimple_call_set_lhs (new_stmt
, var
);
4733 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
4735 if (modifier
== NARROW
)
4742 var
= permute_vec_elements (prev_res
, var
,
4743 perm_mask
, stmt
, gsi
);
4744 new_stmt
= SSA_NAME_DEF_STMT (var
);
4747 if (prev_stmt_info
== NULL
)
4748 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4750 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4751 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4755 else if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
4757 gimple_stmt_iterator incr_gsi
;
4763 vec
<constructor_elt
, va_gc
> *v
= NULL
;
4764 gimple_seq stmts
= NULL
;
4765 tree stride_base
, stride_step
, alias_off
;
4767 gcc_assert (!nested_in_vect_loop
);
4770 = fold_build_pointer_plus
4771 (unshare_expr (DR_BASE_ADDRESS (dr
)),
4772 size_binop (PLUS_EXPR
,
4773 convert_to_ptrofftype (unshare_expr (DR_OFFSET (dr
))),
4774 convert_to_ptrofftype (DR_INIT (dr
))));
4775 stride_step
= fold_convert (sizetype
, unshare_expr (DR_STEP (dr
)));
4777 /* For a load with loop-invariant (but other than power-of-2)
4778 stride (i.e. not a grouped access) like so:
4780 for (i = 0; i < n; i += stride)
4783 we generate a new induction variable and new accesses to
4784 form a new vector (or vectors, depending on ncopies):
4786 for (j = 0; ; j += VF*stride)
4788 tmp2 = array[j + stride];
4790 vectemp = {tmp1, tmp2, ...}
4793 ivstep
= stride_step
;
4794 ivstep
= fold_build2 (MULT_EXPR
, TREE_TYPE (ivstep
), ivstep
,
4795 build_int_cst (TREE_TYPE (ivstep
), vf
));
4797 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4799 create_iv (stride_base
, ivstep
, NULL
,
4800 loop
, &incr_gsi
, insert_after
,
4802 incr
= gsi_stmt (incr_gsi
);
4803 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4805 stride_step
= force_gimple_operand (stride_step
, &stmts
, true, NULL_TREE
);
4807 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop
), stmts
);
4809 prev_stmt_info
= NULL
;
4810 running_off
= offvar
;
4811 alias_off
= build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0);
4812 for (j
= 0; j
< ncopies
; j
++)
4816 vec_alloc (v
, nunits
);
4817 for (i
= 0; i
< nunits
; i
++)
4819 tree newref
, newoff
;
4821 newref
= build2 (MEM_REF
, TREE_TYPE (vectype
),
4822 running_off
, alias_off
);
4824 newref
= force_gimple_operand_gsi (gsi
, newref
, true,
4827 CONSTRUCTOR_APPEND_ELT (v
, NULL_TREE
, newref
);
4828 newoff
= copy_ssa_name (running_off
, NULL
);
4829 incr
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, newoff
,
4830 running_off
, stride_step
);
4831 vect_finish_stmt_generation (stmt
, incr
, gsi
);
4833 running_off
= newoff
;
4836 vec_inv
= build_constructor (vectype
, v
);
4837 new_temp
= vect_init_vector (stmt
, vec_inv
, vectype
, gsi
);
4838 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
4841 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
4843 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
4844 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
4851 first_stmt
= GROUP_FIRST_ELEMENT (stmt_info
);
4853 && !SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ()
4854 && first_stmt
!= SLP_TREE_SCALAR_STMTS (slp_node
)[0])
4855 first_stmt
= SLP_TREE_SCALAR_STMTS (slp_node
)[0];
4857 /* Check if the chain of loads is already vectorized. */
4858 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt
))
4859 /* For SLP we would need to copy over SLP_TREE_VEC_STMTS.
4860 ??? But we can only do so if there is exactly one
4861 as we have no way to get at the rest. Leave the CSE
4863 ??? With the group load eventually participating
4864 in multiple different permutations (having multiple
4865 slp nodes which refer to the same group) the CSE
4866 is even wrong code. See PR56270. */
4869 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
4872 first_dr
= STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt
));
4873 group_size
= GROUP_SIZE (vinfo_for_stmt (first_stmt
));
4875 /* VEC_NUM is the number of vect stmts to be created for this group. */
4878 grouped_load
= false;
4879 vec_num
= SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node
);
4880 if (SLP_TREE_LOAD_PERMUTATION (slp_node
).exists ())
4882 group_gap
= GROUP_GAP (vinfo_for_stmt (first_stmt
));
4886 vec_num
= group_size
;
4894 group_size
= vec_num
= 1;
4898 alignment_support_scheme
= vect_supportable_dr_alignment (first_dr
, false);
4899 gcc_assert (alignment_support_scheme
);
4900 /* Targets with load-lane instructions must not require explicit
4902 gcc_assert (!load_lanes_p
4903 || alignment_support_scheme
== dr_aligned
4904 || alignment_support_scheme
== dr_unaligned_supported
);
4906 /* In case the vectorization factor (VF) is bigger than the number
4907 of elements that we can fit in a vectype (nunits), we have to generate
4908 more than one vector stmt - i.e - we need to "unroll" the
4909 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4910 from one copy of the vector stmt to the next, in the field
4911 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4912 stages to find the correct vector defs to be used when vectorizing
4913 stmts that use the defs of the current stmt. The example below
4914 illustrates the vectorization process when VF=16 and nunits=4 (i.e., we
4915 need to create 4 vectorized stmts):
4917 before vectorization:
4918 RELATED_STMT VEC_STMT
4922 step 1: vectorize stmt S1:
4923 We first create the vector stmt VS1_0, and, as usual, record a
4924 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4925 Next, we create the vector stmt VS1_1, and record a pointer to
4926 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4927 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4929 RELATED_STMT VEC_STMT
4930 VS1_0: vx0 = memref0 VS1_1 -
4931 VS1_1: vx1 = memref1 VS1_2 -
4932 VS1_2: vx2 = memref2 VS1_3 -
4933 VS1_3: vx3 = memref3 - -
4934 S1: x = load - VS1_0
4937 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4938 information we recorded in RELATED_STMT field is used to vectorize
4941 /* In case of interleaving (non-unit grouped access):
4948 Vectorized loads are created in the order of memory accesses
4949 starting from the access of the first stmt of the chain:
4952 VS2: vx1 = &base + vec_size*1
4953 VS3: vx3 = &base + vec_size*2
4954 VS4: vx4 = &base + vec_size*3
4956 Then permutation statements are generated:
4958 VS5: vx5 = VEC_PERM_EXPR < vx0, vx1, { 0, 2, ..., i*2 } >
4959 VS6: vx6 = VEC_PERM_EXPR < vx0, vx1, { 1, 3, ..., i*2+1 } >
4962 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4963 (the order of the data-refs in the output of vect_permute_load_chain
4964 corresponds to the order of scalar stmts in the interleaving chain - see
4965 the documentation of vect_permute_load_chain()).
4966 The generation of permutation stmts and recording them in
4967 STMT_VINFO_VEC_STMT is done in vect_transform_grouped_load().
4969 In case of both multiple types and interleaving, the vector loads and
4970 permutation stmts above are created for every copy. The result vector
4971 stmts are put in STMT_VINFO_VEC_STMT for the first copy and in the
4972 corresponding STMT_VINFO_RELATED_STMT for the next copies. */
4974 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4975 on a target that supports unaligned accesses (dr_unaligned_supported)
4976 we generate the following code:
4980 p = p + indx * vectype_size;
4985 Otherwise, the data reference is potentially unaligned on a target that
4986 does not support unaligned accesses (dr_explicit_realign_optimized) -
4987 then generate the following code, in which the data in each iteration is
4988 obtained by two vector loads, one from the previous iteration, and one
4989 from the current iteration:
4991 msq_init = *(floor(p1))
4992 p2 = initial_addr + VS - 1;
4993 realignment_token = call target_builtin;
4996 p2 = p2 + indx * vectype_size
4998 vec_dest = realign_load (msq, lsq, realignment_token)
5003 /* If the misalignment remains the same throughout the execution of the
5004 loop, we can create the init_addr and permutation mask at the loop
5005 preheader. Otherwise, it needs to be created inside the loop.
5006 This can only occur when vectorizing memory accesses in the inner-loop
5007 nested within an outer-loop that is being vectorized. */
5009 if (nested_in_vect_loop
5010 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5011 % GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0))
5013 gcc_assert (alignment_support_scheme
!= dr_explicit_realign_optimized
);
5014 compute_in_loop
= true;
5017 if ((alignment_support_scheme
== dr_explicit_realign_optimized
5018 || alignment_support_scheme
== dr_explicit_realign
)
5019 && !compute_in_loop
)
5021 msq
= vect_setup_realignment (first_stmt
, gsi
, &realignment_token
,
5022 alignment_support_scheme
, NULL_TREE
,
5024 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5026 phi
= SSA_NAME_DEF_STMT (msq
);
5027 offset
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5034 offset
= size_int (-TYPE_VECTOR_SUBPARTS (vectype
) + 1);
5037 aggr_type
= build_array_type_nelts (elem_type
, vec_num
* nunits
);
5039 aggr_type
= vectype
;
5041 prev_stmt_info
= NULL
;
5042 for (j
= 0; j
< ncopies
; j
++)
5044 /* 1. Create the vector or array pointer update chain. */
5047 bool simd_lane_access_p
5048 = STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
);
5049 if (simd_lane_access_p
5050 && TREE_CODE (DR_BASE_ADDRESS (first_dr
)) == ADDR_EXPR
5051 && VAR_P (TREE_OPERAND (DR_BASE_ADDRESS (first_dr
), 0))
5052 && integer_zerop (DR_OFFSET (first_dr
))
5053 && integer_zerop (DR_INIT (first_dr
))
5054 && alias_sets_conflict_p (get_alias_set (aggr_type
),
5055 get_alias_set (DR_REF (first_dr
)))
5056 && (alignment_support_scheme
== dr_aligned
5057 || alignment_support_scheme
== dr_unaligned_supported
))
5059 dataref_ptr
= unshare_expr (DR_BASE_ADDRESS (first_dr
));
5060 dataref_offset
= build_int_cst (reference_alias_ptr_type
5061 (DR_REF (first_dr
)), 0);
5066 = vect_create_data_ref_ptr (first_stmt
, aggr_type
, at_loop
,
5067 offset
, &dummy
, gsi
, &ptr_incr
,
5068 simd_lane_access_p
, &inv_p
);
5070 else if (dataref_offset
)
5071 dataref_offset
= int_const_binop (PLUS_EXPR
, dataref_offset
,
5072 TYPE_SIZE_UNIT (aggr_type
));
5074 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
, stmt
,
5075 TYPE_SIZE_UNIT (aggr_type
));
5077 if (grouped_load
|| slp_perm
)
5078 dr_chain
.create (vec_num
);
5084 vec_array
= create_vector_array (vectype
, vec_num
);
5087 VEC_ARRAY = LOAD_LANES (MEM_REF[...all elements...]). */
5088 data_ref
= create_array_ref (aggr_type
, dataref_ptr
, first_dr
);
5089 new_stmt
= gimple_build_call_internal (IFN_LOAD_LANES
, 1, data_ref
);
5090 gimple_call_set_lhs (new_stmt
, vec_array
);
5091 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5093 /* Extract each vector into an SSA_NAME. */
5094 for (i
= 0; i
< vec_num
; i
++)
5096 new_temp
= read_vector_array (stmt
, gsi
, scalar_dest
,
5098 dr_chain
.quick_push (new_temp
);
5101 /* Record the mapping between SSA_NAMEs and statements. */
5102 vect_record_grouped_load_vectors (stmt
, dr_chain
);
5106 for (i
= 0; i
< vec_num
; i
++)
5109 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5112 /* 2. Create the vector-load in the loop. */
5113 switch (alignment_support_scheme
)
5116 case dr_unaligned_supported
:
5118 unsigned int align
, misalign
;
5121 = build2 (MEM_REF
, vectype
, dataref_ptr
,
5124 : build_int_cst (reference_alias_ptr_type
5125 (DR_REF (first_dr
)), 0));
5126 align
= TYPE_ALIGN_UNIT (vectype
);
5127 if (alignment_support_scheme
== dr_aligned
)
5129 gcc_assert (aligned_access_p (first_dr
));
5132 else if (DR_MISALIGNMENT (first_dr
) == -1)
5134 TREE_TYPE (data_ref
)
5135 = build_aligned_type (TREE_TYPE (data_ref
),
5136 TYPE_ALIGN (elem_type
));
5137 align
= TYPE_ALIGN_UNIT (elem_type
);
5142 TREE_TYPE (data_ref
)
5143 = build_aligned_type (TREE_TYPE (data_ref
),
5144 TYPE_ALIGN (elem_type
));
5145 misalign
= DR_MISALIGNMENT (first_dr
);
5147 if (dataref_offset
== NULL_TREE
)
5148 set_ptr_info_alignment (get_ptr_info (dataref_ptr
),
5152 case dr_explicit_realign
:
5157 vs_minus_1
= size_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
5159 if (compute_in_loop
)
5160 msq
= vect_setup_realignment (first_stmt
, gsi
,
5162 dr_explicit_realign
,
5165 ptr
= copy_ssa_name (dataref_ptr
, NULL
);
5166 new_stmt
= gimple_build_assign_with_ops
5167 (BIT_AND_EXPR
, ptr
, dataref_ptr
,
5169 (TREE_TYPE (dataref_ptr
),
5170 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5171 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5173 = build2 (MEM_REF
, vectype
, ptr
,
5174 build_int_cst (reference_alias_ptr_type
5175 (DR_REF (first_dr
)), 0));
5176 vec_dest
= vect_create_destination_var (scalar_dest
,
5178 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5179 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5180 gimple_assign_set_lhs (new_stmt
, new_temp
);
5181 gimple_set_vdef (new_stmt
, gimple_vdef (stmt
));
5182 gimple_set_vuse (new_stmt
, gimple_vuse (stmt
));
5183 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5186 bump
= size_binop (MULT_EXPR
, vs_minus_1
,
5187 TYPE_SIZE_UNIT (elem_type
));
5188 ptr
= bump_vector_ptr (dataref_ptr
, NULL
, gsi
, stmt
, bump
);
5189 new_stmt
= gimple_build_assign_with_ops
5190 (BIT_AND_EXPR
, NULL_TREE
, ptr
,
5193 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5194 ptr
= copy_ssa_name (dataref_ptr
, new_stmt
);
5195 gimple_assign_set_lhs (new_stmt
, ptr
);
5196 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5198 = build2 (MEM_REF
, vectype
, ptr
,
5199 build_int_cst (reference_alias_ptr_type
5200 (DR_REF (first_dr
)), 0));
5203 case dr_explicit_realign_optimized
:
5204 new_temp
= copy_ssa_name (dataref_ptr
, NULL
);
5205 new_stmt
= gimple_build_assign_with_ops
5206 (BIT_AND_EXPR
, new_temp
, dataref_ptr
,
5208 (TREE_TYPE (dataref_ptr
),
5209 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
5210 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5212 = build2 (MEM_REF
, vectype
, new_temp
,
5213 build_int_cst (reference_alias_ptr_type
5214 (DR_REF (first_dr
)), 0));
5219 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5220 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
5221 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5222 gimple_assign_set_lhs (new_stmt
, new_temp
);
5223 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5225 /* 3. Handle explicit realignment if necessary/supported.
5227 vec_dest = realign_load (msq, lsq, realignment_token) */
5228 if (alignment_support_scheme
== dr_explicit_realign_optimized
5229 || alignment_support_scheme
== dr_explicit_realign
)
5231 lsq
= gimple_assign_lhs (new_stmt
);
5232 if (!realignment_token
)
5233 realignment_token
= dataref_ptr
;
5234 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5236 = gimple_build_assign_with_ops (REALIGN_LOAD_EXPR
,
5239 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5240 gimple_assign_set_lhs (new_stmt
, new_temp
);
5241 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5243 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
5246 if (i
== vec_num
- 1 && j
== ncopies
- 1)
5247 add_phi_arg (phi
, lsq
,
5248 loop_latch_edge (containing_loop
),
5254 /* 4. Handle invariant-load. */
5255 if (inv_p
&& !bb_vinfo
)
5257 gimple_stmt_iterator gsi2
= *gsi
;
5258 gcc_assert (!grouped_load
);
5260 new_temp
= vect_init_vector (stmt
, scalar_dest
,
5262 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5267 tree perm_mask
= perm_mask_for_reverse (vectype
);
5268 new_temp
= permute_vec_elements (new_temp
, new_temp
,
5269 perm_mask
, stmt
, gsi
);
5270 new_stmt
= SSA_NAME_DEF_STMT (new_temp
);
5273 /* Collect vector loads and later create their permutation in
5274 vect_transform_grouped_load (). */
5275 if (grouped_load
|| slp_perm
)
5276 dr_chain
.quick_push (new_temp
);
5278 /* Store vector loads in the corresponding SLP_NODE. */
5279 if (slp
&& !slp_perm
)
5280 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5282 /* Bump the vector pointer to account for a gap. */
5283 if (slp
&& group_gap
!= 0)
5285 tree bump
= size_binop (MULT_EXPR
,
5286 TYPE_SIZE_UNIT (elem_type
),
5287 size_int (group_gap
));
5288 dataref_ptr
= bump_vector_ptr (dataref_ptr
, ptr_incr
, gsi
,
5293 if (slp
&& !slp_perm
)
5298 if (!vect_transform_slp_perm_load (slp_node
, dr_chain
, gsi
, vf
,
5299 slp_node_instance
, false))
5301 dr_chain
.release ();
5310 vect_transform_grouped_load (stmt
, dr_chain
, group_size
, gsi
);
5311 *vec_stmt
= STMT_VINFO_VEC_STMT (stmt_info
);
5316 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5318 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5319 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5322 dr_chain
.release ();
5328 /* Function vect_is_simple_cond.
5331 LOOP - the loop that is being vectorized.
5332 COND - Condition that is checked for simple use.
5335 *COMP_VECTYPE - the vector type for the comparison.
5337 Returns whether a COND can be vectorized. Checks whether
5338 condition operands are supportable using vec_is_simple_use. */
5341 vect_is_simple_cond (tree cond
, gimple stmt
, loop_vec_info loop_vinfo
,
5342 bb_vec_info bb_vinfo
, tree
*comp_vectype
)
5346 enum vect_def_type dt
;
5347 tree vectype1
= NULL_TREE
, vectype2
= NULL_TREE
;
5349 if (!COMPARISON_CLASS_P (cond
))
5352 lhs
= TREE_OPERAND (cond
, 0);
5353 rhs
= TREE_OPERAND (cond
, 1);
5355 if (TREE_CODE (lhs
) == SSA_NAME
)
5357 gimple lhs_def_stmt
= SSA_NAME_DEF_STMT (lhs
);
5358 if (!vect_is_simple_use_1 (lhs
, stmt
, loop_vinfo
, bb_vinfo
,
5359 &lhs_def_stmt
, &def
, &dt
, &vectype1
))
5362 else if (TREE_CODE (lhs
) != INTEGER_CST
&& TREE_CODE (lhs
) != REAL_CST
5363 && TREE_CODE (lhs
) != FIXED_CST
)
5366 if (TREE_CODE (rhs
) == SSA_NAME
)
5368 gimple rhs_def_stmt
= SSA_NAME_DEF_STMT (rhs
);
5369 if (!vect_is_simple_use_1 (rhs
, stmt
, loop_vinfo
, bb_vinfo
,
5370 &rhs_def_stmt
, &def
, &dt
, &vectype2
))
5373 else if (TREE_CODE (rhs
) != INTEGER_CST
&& TREE_CODE (rhs
) != REAL_CST
5374 && TREE_CODE (rhs
) != FIXED_CST
)
5377 *comp_vectype
= vectype1
? vectype1
: vectype2
;
5381 /* vectorizable_condition.
5383 Check if STMT is conditional modify expression that can be vectorized.
5384 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5385 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5388 When STMT is vectorized as nested cycle, REDUC_DEF is the vector variable
5389 to be used at REDUC_INDEX (in then clause if REDUC_INDEX is 1, and in
5390 else caluse if it is 2).
5392 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5395 vectorizable_condition (gimple stmt
, gimple_stmt_iterator
*gsi
,
5396 gimple
*vec_stmt
, tree reduc_def
, int reduc_index
,
5399 tree scalar_dest
= NULL_TREE
;
5400 tree vec_dest
= NULL_TREE
;
5401 tree cond_expr
, then_clause
, else_clause
;
5402 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5403 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5404 tree comp_vectype
= NULL_TREE
;
5405 tree vec_cond_lhs
= NULL_TREE
, vec_cond_rhs
= NULL_TREE
;
5406 tree vec_then_clause
= NULL_TREE
, vec_else_clause
= NULL_TREE
;
5407 tree vec_compare
, vec_cond_expr
;
5409 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5411 enum vect_def_type dt
, dts
[4];
5412 int nunits
= TYPE_VECTOR_SUBPARTS (vectype
);
5414 enum tree_code code
;
5415 stmt_vec_info prev_stmt_info
= NULL
;
5417 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5418 vec
<tree
> vec_oprnds0
= vNULL
;
5419 vec
<tree
> vec_oprnds1
= vNULL
;
5420 vec
<tree
> vec_oprnds2
= vNULL
;
5421 vec
<tree
> vec_oprnds3
= vNULL
;
5424 if (slp_node
|| PURE_SLP_STMT (stmt_info
))
5427 ncopies
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
) / nunits
;
5429 gcc_assert (ncopies
>= 1);
5430 if (reduc_index
&& ncopies
> 1)
5431 return false; /* FORNOW */
5433 if (reduc_index
&& STMT_SLP_TYPE (stmt_info
))
5436 if (!STMT_VINFO_RELEVANT_P (stmt_info
) && !bb_vinfo
)
5439 if (STMT_VINFO_DEF_TYPE (stmt_info
) != vect_internal_def
5440 && !(STMT_VINFO_DEF_TYPE (stmt_info
) == vect_nested_cycle
5444 /* FORNOW: not yet supported. */
5445 if (STMT_VINFO_LIVE_P (stmt_info
))
5447 if (dump_enabled_p ())
5448 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5449 "value used after loop.\n");
5453 /* Is vectorizable conditional operation? */
5454 if (!is_gimple_assign (stmt
))
5457 code
= gimple_assign_rhs_code (stmt
);
5459 if (code
!= COND_EXPR
)
5462 cond_expr
= gimple_assign_rhs1 (stmt
);
5463 then_clause
= gimple_assign_rhs2 (stmt
);
5464 else_clause
= gimple_assign_rhs3 (stmt
);
5466 if (!vect_is_simple_cond (cond_expr
, stmt
, loop_vinfo
, bb_vinfo
,
5471 if (TREE_CODE (then_clause
) == SSA_NAME
)
5473 gimple then_def_stmt
= SSA_NAME_DEF_STMT (then_clause
);
5474 if (!vect_is_simple_use (then_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5475 &then_def_stmt
, &def
, &dt
))
5478 else if (TREE_CODE (then_clause
) != INTEGER_CST
5479 && TREE_CODE (then_clause
) != REAL_CST
5480 && TREE_CODE (then_clause
) != FIXED_CST
)
5483 if (TREE_CODE (else_clause
) == SSA_NAME
)
5485 gimple else_def_stmt
= SSA_NAME_DEF_STMT (else_clause
);
5486 if (!vect_is_simple_use (else_clause
, stmt
, loop_vinfo
, bb_vinfo
,
5487 &else_def_stmt
, &def
, &dt
))
5490 else if (TREE_CODE (else_clause
) != INTEGER_CST
5491 && TREE_CODE (else_clause
) != REAL_CST
5492 && TREE_CODE (else_clause
) != FIXED_CST
)
5495 unsigned int prec
= GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype
)));
5496 /* The result of a vector comparison should be signed type. */
5497 tree cmp_type
= build_nonstandard_integer_type (prec
, 0);
5498 vec_cmp_type
= get_same_sized_vectype (cmp_type
, vectype
);
5499 if (vec_cmp_type
== NULL_TREE
)
5504 STMT_VINFO_TYPE (stmt_info
) = condition_vec_info_type
;
5505 return expand_vec_cond_expr_p (vectype
, comp_vectype
);
5512 vec_oprnds0
.create (1);
5513 vec_oprnds1
.create (1);
5514 vec_oprnds2
.create (1);
5515 vec_oprnds3
.create (1);
5519 scalar_dest
= gimple_assign_lhs (stmt
);
5520 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
5522 /* Handle cond expr. */
5523 for (j
= 0; j
< ncopies
; j
++)
5525 gimple new_stmt
= NULL
;
5530 stack_vec
<tree
, 4> ops
;
5531 stack_vec
<vec
<tree
>, 4> vec_defs
;
5533 ops
.safe_push (TREE_OPERAND (cond_expr
, 0));
5534 ops
.safe_push (TREE_OPERAND (cond_expr
, 1));
5535 ops
.safe_push (then_clause
);
5536 ops
.safe_push (else_clause
);
5537 vect_get_slp_defs (ops
, slp_node
, &vec_defs
, -1);
5538 vec_oprnds3
= vec_defs
.pop ();
5539 vec_oprnds2
= vec_defs
.pop ();
5540 vec_oprnds1
= vec_defs
.pop ();
5541 vec_oprnds0
= vec_defs
.pop ();
5544 vec_defs
.release ();
5550 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 0),
5552 vect_is_simple_use (TREE_OPERAND (cond_expr
, 0), stmt
,
5553 loop_vinfo
, NULL
, >emp
, &def
, &dts
[0]);
5556 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr
, 1),
5558 vect_is_simple_use (TREE_OPERAND (cond_expr
, 1), stmt
,
5559 loop_vinfo
, NULL
, >emp
, &def
, &dts
[1]);
5560 if (reduc_index
== 1)
5561 vec_then_clause
= reduc_def
;
5564 vec_then_clause
= vect_get_vec_def_for_operand (then_clause
,
5566 vect_is_simple_use (then_clause
, stmt
, loop_vinfo
,
5567 NULL
, >emp
, &def
, &dts
[2]);
5569 if (reduc_index
== 2)
5570 vec_else_clause
= reduc_def
;
5573 vec_else_clause
= vect_get_vec_def_for_operand (else_clause
,
5575 vect_is_simple_use (else_clause
, stmt
, loop_vinfo
,
5576 NULL
, >emp
, &def
, &dts
[3]);
5582 vec_cond_lhs
= vect_get_vec_def_for_stmt_copy (dts
[0],
5583 vec_oprnds0
.pop ());
5584 vec_cond_rhs
= vect_get_vec_def_for_stmt_copy (dts
[1],
5585 vec_oprnds1
.pop ());
5586 vec_then_clause
= vect_get_vec_def_for_stmt_copy (dts
[2],
5587 vec_oprnds2
.pop ());
5588 vec_else_clause
= vect_get_vec_def_for_stmt_copy (dts
[3],
5589 vec_oprnds3
.pop ());
5594 vec_oprnds0
.quick_push (vec_cond_lhs
);
5595 vec_oprnds1
.quick_push (vec_cond_rhs
);
5596 vec_oprnds2
.quick_push (vec_then_clause
);
5597 vec_oprnds3
.quick_push (vec_else_clause
);
5600 /* Arguments are ready. Create the new vector stmt. */
5601 FOR_EACH_VEC_ELT (vec_oprnds0
, i
, vec_cond_lhs
)
5603 vec_cond_rhs
= vec_oprnds1
[i
];
5604 vec_then_clause
= vec_oprnds2
[i
];
5605 vec_else_clause
= vec_oprnds3
[i
];
5607 vec_compare
= build2 (TREE_CODE (cond_expr
), vec_cmp_type
,
5608 vec_cond_lhs
, vec_cond_rhs
);
5609 vec_cond_expr
= build3 (VEC_COND_EXPR
, vectype
,
5610 vec_compare
, vec_then_clause
, vec_else_clause
);
5612 new_stmt
= gimple_build_assign (vec_dest
, vec_cond_expr
);
5613 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
5614 gimple_assign_set_lhs (new_stmt
, new_temp
);
5615 vect_finish_stmt_generation (stmt
, new_stmt
, gsi
);
5617 SLP_TREE_VEC_STMTS (slp_node
).quick_push (new_stmt
);
5624 STMT_VINFO_VEC_STMT (stmt_info
) = *vec_stmt
= new_stmt
;
5626 STMT_VINFO_RELATED_STMT (prev_stmt_info
) = new_stmt
;
5628 prev_stmt_info
= vinfo_for_stmt (new_stmt
);
5631 vec_oprnds0
.release ();
5632 vec_oprnds1
.release ();
5633 vec_oprnds2
.release ();
5634 vec_oprnds3
.release ();
5640 /* Make sure the statement is vectorizable. */
5643 vect_analyze_stmt (gimple stmt
, bool *need_to_vectorize
, slp_tree node
)
5645 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5646 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
5647 enum vect_relevant relevance
= STMT_VINFO_RELEVANT (stmt_info
);
5649 tree scalar_type
, vectype
;
5650 gimple pattern_stmt
;
5651 gimple_seq pattern_def_seq
;
5653 if (dump_enabled_p ())
5655 dump_printf_loc (MSG_NOTE
, vect_location
, "==> examining statement: ");
5656 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5657 dump_printf (MSG_NOTE
, "\n");
5660 if (gimple_has_volatile_ops (stmt
))
5662 if (dump_enabled_p ())
5663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5664 "not vectorized: stmt has volatile operands\n");
5669 /* Skip stmts that do not need to be vectorized. In loops this is expected
5671 - the COND_EXPR which is the loop exit condition
5672 - any LABEL_EXPRs in the loop
5673 - computations that are used only for array indexing or loop control.
5674 In basic blocks we only analyze statements that are a part of some SLP
5675 instance, therefore, all the statements are relevant.
5677 Pattern statement needs to be analyzed instead of the original statement
5678 if the original statement is not relevant. Otherwise, we analyze both
5679 statements. In basic blocks we are called from some SLP instance
5680 traversal, don't analyze pattern stmts instead, the pattern stmts
5681 already will be part of SLP instance. */
5683 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
5684 if (!STMT_VINFO_RELEVANT_P (stmt_info
)
5685 && !STMT_VINFO_LIVE_P (stmt_info
))
5687 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5689 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5690 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5692 /* Analyze PATTERN_STMT instead of the original stmt. */
5693 stmt
= pattern_stmt
;
5694 stmt_info
= vinfo_for_stmt (pattern_stmt
);
5695 if (dump_enabled_p ())
5697 dump_printf_loc (MSG_NOTE
, vect_location
,
5698 "==> examining pattern statement: ");
5699 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5700 dump_printf (MSG_NOTE
, "\n");
5705 if (dump_enabled_p ())
5706 dump_printf_loc (MSG_NOTE
, vect_location
, "irrelevant.\n");
5711 else if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
5714 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt
))
5715 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt
))))
5717 /* Analyze PATTERN_STMT too. */
5718 if (dump_enabled_p ())
5720 dump_printf_loc (MSG_NOTE
, vect_location
,
5721 "==> examining pattern statement: ");
5722 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
5723 dump_printf (MSG_NOTE
, "\n");
5726 if (!vect_analyze_stmt (pattern_stmt
, need_to_vectorize
, node
))
5730 if (is_pattern_stmt_p (stmt_info
)
5732 && (pattern_def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
)))
5734 gimple_stmt_iterator si
;
5736 for (si
= gsi_start (pattern_def_seq
); !gsi_end_p (si
); gsi_next (&si
))
5738 gimple pattern_def_stmt
= gsi_stmt (si
);
5739 if (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_def_stmt
))
5740 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_def_stmt
)))
5742 /* Analyze def stmt of STMT if it's a pattern stmt. */
5743 if (dump_enabled_p ())
5745 dump_printf_loc (MSG_NOTE
, vect_location
,
5746 "==> examining pattern def statement: ");
5747 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, pattern_def_stmt
, 0);
5748 dump_printf (MSG_NOTE
, "\n");
5751 if (!vect_analyze_stmt (pattern_def_stmt
,
5752 need_to_vectorize
, node
))
5758 switch (STMT_VINFO_DEF_TYPE (stmt_info
))
5760 case vect_internal_def
:
5763 case vect_reduction_def
:
5764 case vect_nested_cycle
:
5765 gcc_assert (!bb_vinfo
&& (relevance
== vect_used_in_outer
5766 || relevance
== vect_used_in_outer_by_reduction
5767 || relevance
== vect_unused_in_scope
));
5770 case vect_induction_def
:
5771 case vect_constant_def
:
5772 case vect_external_def
:
5773 case vect_unknown_def_type
:
5780 gcc_assert (PURE_SLP_STMT (stmt_info
));
5782 scalar_type
= TREE_TYPE (gimple_get_lhs (stmt
));
5783 if (dump_enabled_p ())
5785 dump_printf_loc (MSG_NOTE
, vect_location
,
5786 "get vectype for scalar type: ");
5787 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, scalar_type
);
5788 dump_printf (MSG_NOTE
, "\n");
5791 vectype
= get_vectype_for_scalar_type (scalar_type
);
5794 if (dump_enabled_p ())
5796 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5797 "not SLPed: unsupported data-type ");
5798 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
5800 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5805 if (dump_enabled_p ())
5807 dump_printf_loc (MSG_NOTE
, vect_location
, "vectype: ");
5808 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, vectype
);
5809 dump_printf (MSG_NOTE
, "\n");
5812 STMT_VINFO_VECTYPE (stmt_info
) = vectype
;
5815 if (STMT_VINFO_RELEVANT_P (stmt_info
))
5817 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt
))));
5818 gcc_assert (STMT_VINFO_VECTYPE (stmt_info
));
5819 *need_to_vectorize
= true;
5824 && (STMT_VINFO_RELEVANT_P (stmt_info
)
5825 || STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
))
5826 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, NULL
)
5827 || vectorizable_shift (stmt
, NULL
, NULL
, NULL
)
5828 || vectorizable_operation (stmt
, NULL
, NULL
, NULL
)
5829 || vectorizable_assignment (stmt
, NULL
, NULL
, NULL
)
5830 || vectorizable_load (stmt
, NULL
, NULL
, NULL
, NULL
)
5831 || vectorizable_call (stmt
, NULL
, NULL
, NULL
)
5832 || vectorizable_store (stmt
, NULL
, NULL
, NULL
)
5833 || vectorizable_reduction (stmt
, NULL
, NULL
, NULL
)
5834 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, NULL
));
5838 ok
= (vectorizable_conversion (stmt
, NULL
, NULL
, node
)
5839 || vectorizable_shift (stmt
, NULL
, NULL
, node
)
5840 || vectorizable_operation (stmt
, NULL
, NULL
, node
)
5841 || vectorizable_assignment (stmt
, NULL
, NULL
, node
)
5842 || vectorizable_load (stmt
, NULL
, NULL
, node
, NULL
)
5843 || vectorizable_call (stmt
, NULL
, NULL
, node
)
5844 || vectorizable_store (stmt
, NULL
, NULL
, node
)
5845 || vectorizable_condition (stmt
, NULL
, NULL
, NULL
, 0, node
));
5850 if (dump_enabled_p ())
5852 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5853 "not vectorized: relevant stmt not ");
5854 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5856 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5865 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
5866 need extra handling, except for vectorizable reductions. */
5867 if (STMT_VINFO_LIVE_P (stmt_info
)
5868 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
5869 ok
= vectorizable_live_operation (stmt
, NULL
, NULL
);
5873 if (dump_enabled_p ())
5875 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5876 "not vectorized: live stmt not ");
5877 dump_printf (MSG_MISSED_OPTIMIZATION
, "supported: ");
5878 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
5879 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
5889 /* Function vect_transform_stmt.
5891 Create a vectorized stmt to replace STMT, and insert it at BSI. */
5894 vect_transform_stmt (gimple stmt
, gimple_stmt_iterator
*gsi
,
5895 bool *grouped_store
, slp_tree slp_node
,
5896 slp_instance slp_node_instance
)
5898 bool is_store
= false;
5899 gimple vec_stmt
= NULL
;
5900 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5903 switch (STMT_VINFO_TYPE (stmt_info
))
5905 case type_demotion_vec_info_type
:
5906 case type_promotion_vec_info_type
:
5907 case type_conversion_vec_info_type
:
5908 done
= vectorizable_conversion (stmt
, gsi
, &vec_stmt
, slp_node
);
5912 case induc_vec_info_type
:
5913 gcc_assert (!slp_node
);
5914 done
= vectorizable_induction (stmt
, gsi
, &vec_stmt
);
5918 case shift_vec_info_type
:
5919 done
= vectorizable_shift (stmt
, gsi
, &vec_stmt
, slp_node
);
5923 case op_vec_info_type
:
5924 done
= vectorizable_operation (stmt
, gsi
, &vec_stmt
, slp_node
);
5928 case assignment_vec_info_type
:
5929 done
= vectorizable_assignment (stmt
, gsi
, &vec_stmt
, slp_node
);
5933 case load_vec_info_type
:
5934 done
= vectorizable_load (stmt
, gsi
, &vec_stmt
, slp_node
,
5939 case store_vec_info_type
:
5940 done
= vectorizable_store (stmt
, gsi
, &vec_stmt
, slp_node
);
5942 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
) && !slp_node
)
5944 /* In case of interleaving, the whole chain is vectorized when the
5945 last store in the chain is reached. Store stmts before the last
5946 one are skipped, and there vec_stmt_info shouldn't be freed
5948 *grouped_store
= true;
5949 if (STMT_VINFO_VEC_STMT (stmt_info
))
5956 case condition_vec_info_type
:
5957 done
= vectorizable_condition (stmt
, gsi
, &vec_stmt
, NULL
, 0, slp_node
);
5961 case call_vec_info_type
:
5962 done
= vectorizable_call (stmt
, gsi
, &vec_stmt
, slp_node
);
5963 stmt
= gsi_stmt (*gsi
);
5966 case reduc_vec_info_type
:
5967 done
= vectorizable_reduction (stmt
, gsi
, &vec_stmt
, slp_node
);
5972 if (!STMT_VINFO_LIVE_P (stmt_info
))
5974 if (dump_enabled_p ())
5975 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
5976 "stmt not supported.\n");
5981 /* Handle inner-loop stmts whose DEF is used in the loop-nest that
5982 is being vectorized, but outside the immediately enclosing loop. */
5984 && STMT_VINFO_LOOP_VINFO (stmt_info
)
5985 && nested_in_vect_loop_p (LOOP_VINFO_LOOP (
5986 STMT_VINFO_LOOP_VINFO (stmt_info
)), stmt
)
5987 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
5988 && (STMT_VINFO_RELEVANT (stmt_info
) == vect_used_in_outer
5989 || STMT_VINFO_RELEVANT (stmt_info
) ==
5990 vect_used_in_outer_by_reduction
))
5992 struct loop
*innerloop
= LOOP_VINFO_LOOP (
5993 STMT_VINFO_LOOP_VINFO (stmt_info
))->inner
;
5994 imm_use_iterator imm_iter
;
5995 use_operand_p use_p
;
5999 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_NOTE
, vect_location
,
6001 "Record the vdef for outer-loop vectorization.\n");
6003 /* Find the relevant loop-exit phi-node, and reord the vec_stmt there
6004 (to be used when vectorizing outer-loop stmts that use the DEF of
6006 if (gimple_code (stmt
) == GIMPLE_PHI
)
6007 scalar_dest
= PHI_RESULT (stmt
);
6009 scalar_dest
= gimple_assign_lhs (stmt
);
6011 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, scalar_dest
)
6013 if (!flow_bb_inside_loop_p (innerloop
, gimple_bb (USE_STMT (use_p
))))
6015 exit_phi
= USE_STMT (use_p
);
6016 STMT_VINFO_VEC_STMT (vinfo_for_stmt (exit_phi
)) = vec_stmt
;
6021 /* Handle stmts whose DEF is used outside the loop-nest that is
6022 being vectorized. */
6023 if (STMT_VINFO_LIVE_P (stmt_info
)
6024 && STMT_VINFO_TYPE (stmt_info
) != reduc_vec_info_type
)
6026 done
= vectorizable_live_operation (stmt
, gsi
, &vec_stmt
);
6031 STMT_VINFO_VEC_STMT (stmt_info
) = vec_stmt
;
6037 /* Remove a group of stores (for SLP or interleaving), free their
6041 vect_remove_stores (gimple first_stmt
)
6043 gimple next
= first_stmt
;
6045 gimple_stmt_iterator next_si
;
6049 stmt_vec_info stmt_info
= vinfo_for_stmt (next
);
6051 tmp
= GROUP_NEXT_ELEMENT (stmt_info
);
6052 if (is_pattern_stmt_p (stmt_info
))
6053 next
= STMT_VINFO_RELATED_STMT (stmt_info
);
6054 /* Free the attached stmt_vec_info and remove the stmt. */
6055 next_si
= gsi_for_stmt (next
);
6056 unlink_stmt_vdef (next
);
6057 gsi_remove (&next_si
, true);
6058 release_defs (next
);
6059 free_stmt_vec_info (next
);
6065 /* Function new_stmt_vec_info.
6067 Create and initialize a new stmt_vec_info struct for STMT. */
6070 new_stmt_vec_info (gimple stmt
, loop_vec_info loop_vinfo
,
6071 bb_vec_info bb_vinfo
)
6074 res
= (stmt_vec_info
) xcalloc (1, sizeof (struct _stmt_vec_info
));
6076 STMT_VINFO_TYPE (res
) = undef_vec_info_type
;
6077 STMT_VINFO_STMT (res
) = stmt
;
6078 STMT_VINFO_LOOP_VINFO (res
) = loop_vinfo
;
6079 STMT_VINFO_BB_VINFO (res
) = bb_vinfo
;
6080 STMT_VINFO_RELEVANT (res
) = vect_unused_in_scope
;
6081 STMT_VINFO_LIVE_P (res
) = false;
6082 STMT_VINFO_VECTYPE (res
) = NULL
;
6083 STMT_VINFO_VEC_STMT (res
) = NULL
;
6084 STMT_VINFO_VECTORIZABLE (res
) = true;
6085 STMT_VINFO_IN_PATTERN_P (res
) = false;
6086 STMT_VINFO_RELATED_STMT (res
) = NULL
;
6087 STMT_VINFO_PATTERN_DEF_SEQ (res
) = NULL
;
6088 STMT_VINFO_DATA_REF (res
) = NULL
;
6090 STMT_VINFO_DR_BASE_ADDRESS (res
) = NULL
;
6091 STMT_VINFO_DR_OFFSET (res
) = NULL
;
6092 STMT_VINFO_DR_INIT (res
) = NULL
;
6093 STMT_VINFO_DR_STEP (res
) = NULL
;
6094 STMT_VINFO_DR_ALIGNED_TO (res
) = NULL
;
6096 if (gimple_code (stmt
) == GIMPLE_PHI
6097 && is_loop_header_bb_p (gimple_bb (stmt
)))
6098 STMT_VINFO_DEF_TYPE (res
) = vect_unknown_def_type
;
6100 STMT_VINFO_DEF_TYPE (res
) = vect_internal_def
;
6102 STMT_VINFO_SAME_ALIGN_REFS (res
).create (0);
6103 STMT_SLP_TYPE (res
) = loop_vect
;
6104 GROUP_FIRST_ELEMENT (res
) = NULL
;
6105 GROUP_NEXT_ELEMENT (res
) = NULL
;
6106 GROUP_SIZE (res
) = 0;
6107 GROUP_STORE_COUNT (res
) = 0;
6108 GROUP_GAP (res
) = 0;
6109 GROUP_SAME_DR_STMT (res
) = NULL
;
6115 /* Create a hash table for stmt_vec_info. */
6118 init_stmt_vec_info_vec (void)
6120 gcc_assert (!stmt_vec_info_vec
.exists ());
6121 stmt_vec_info_vec
.create (50);
6125 /* Free hash table for stmt_vec_info. */
6128 free_stmt_vec_info_vec (void)
6132 FOR_EACH_VEC_ELT (stmt_vec_info_vec
, i
, info
)
6134 free_stmt_vec_info (STMT_VINFO_STMT ((stmt_vec_info
) info
));
6135 gcc_assert (stmt_vec_info_vec
.exists ());
6136 stmt_vec_info_vec
.release ();
6140 /* Free stmt vectorization related info. */
6143 free_stmt_vec_info (gimple stmt
)
6145 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6150 /* Check if this statement has a related "pattern stmt"
6151 (introduced by the vectorizer during the pattern recognition
6152 pass). Free pattern's stmt_vec_info and def stmt's stmt_vec_info
6154 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
6156 stmt_vec_info patt_info
6157 = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6160 gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (patt_info
);
6163 gimple_stmt_iterator si
;
6164 for (si
= gsi_start (seq
); !gsi_end_p (si
); gsi_next (&si
))
6165 free_stmt_vec_info (gsi_stmt (si
));
6167 free_stmt_vec_info (STMT_VINFO_RELATED_STMT (stmt_info
));
6171 STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).release ();
6172 set_vinfo_for_stmt (stmt
, NULL
);
6177 /* Function get_vectype_for_scalar_type_and_size.
6179 Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
6183 get_vectype_for_scalar_type_and_size (tree scalar_type
, unsigned size
)
6185 enum machine_mode inner_mode
= TYPE_MODE (scalar_type
);
6186 enum machine_mode simd_mode
;
6187 unsigned int nbytes
= GET_MODE_SIZE (inner_mode
);
6194 if (GET_MODE_CLASS (inner_mode
) != MODE_INT
6195 && GET_MODE_CLASS (inner_mode
) != MODE_FLOAT
)
6198 /* For vector types of elements whose mode precision doesn't
6199 match their types precision we use a element type of mode
6200 precision. The vectorization routines will have to make sure
6201 they support the proper result truncation/extension.
6202 We also make sure to build vector types with INTEGER_TYPE
6203 component type only. */
6204 if (INTEGRAL_TYPE_P (scalar_type
)
6205 && (GET_MODE_BITSIZE (inner_mode
) != TYPE_PRECISION (scalar_type
)
6206 || TREE_CODE (scalar_type
) != INTEGER_TYPE
))
6207 scalar_type
= build_nonstandard_integer_type (GET_MODE_BITSIZE (inner_mode
),
6208 TYPE_UNSIGNED (scalar_type
));
6210 /* We shouldn't end up building VECTOR_TYPEs of non-scalar components.
6211 When the component mode passes the above test simply use a type
6212 corresponding to that mode. The theory is that any use that
6213 would cause problems with this will disable vectorization anyway. */
6214 else if (!SCALAR_FLOAT_TYPE_P (scalar_type
)
6215 && !INTEGRAL_TYPE_P (scalar_type
))
6216 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
, 1);
6218 /* We can't build a vector type of elements with alignment bigger than
6220 else if (nbytes
< TYPE_ALIGN_UNIT (scalar_type
))
6221 scalar_type
= lang_hooks
.types
.type_for_mode (inner_mode
,
6222 TYPE_UNSIGNED (scalar_type
));
6224 /* If we felt back to using the mode fail if there was
6225 no scalar type for it. */
6226 if (scalar_type
== NULL_TREE
)
6229 /* If no size was supplied use the mode the target prefers. Otherwise
6230 lookup a vector mode of the specified size. */
6232 simd_mode
= targetm
.vectorize
.preferred_simd_mode (inner_mode
);
6234 simd_mode
= mode_for_vector (inner_mode
, size
/ nbytes
);
6235 nunits
= GET_MODE_SIZE (simd_mode
) / nbytes
;
6239 vectype
= build_vector_type (scalar_type
, nunits
);
6241 if (!VECTOR_MODE_P (TYPE_MODE (vectype
))
6242 && !INTEGRAL_MODE_P (TYPE_MODE (vectype
)))
6248 unsigned int current_vector_size
;
6250 /* Function get_vectype_for_scalar_type.
6252 Returns the vector type corresponding to SCALAR_TYPE as supported
6256 get_vectype_for_scalar_type (tree scalar_type
)
6259 vectype
= get_vectype_for_scalar_type_and_size (scalar_type
,
6260 current_vector_size
);
6262 && current_vector_size
== 0)
6263 current_vector_size
= GET_MODE_SIZE (TYPE_MODE (vectype
));
6267 /* Function get_same_sized_vectype
6269 Returns a vector type corresponding to SCALAR_TYPE of size
6270 VECTOR_TYPE if supported by the target. */
6273 get_same_sized_vectype (tree scalar_type
, tree vector_type
)
6275 return get_vectype_for_scalar_type_and_size
6276 (scalar_type
, GET_MODE_SIZE (TYPE_MODE (vector_type
)));
6279 /* Function vect_is_simple_use.
6282 LOOP_VINFO - the vect info of the loop that is being vectorized.
6283 BB_VINFO - the vect info of the basic block that is being vectorized.
6284 OPERAND - operand of STMT in the loop or bb.
6285 DEF - the defining stmt in case OPERAND is an SSA_NAME.
6287 Returns whether a stmt with OPERAND can be vectorized.
6288 For loops, supportable operands are constants, loop invariants, and operands
6289 that are defined by the current iteration of the loop. Unsupportable
6290 operands are those that are defined by a previous iteration of the loop (as
6291 is the case in reduction/induction computations).
6292 For basic blocks, supportable operands are constants and bb invariants.
6293 For now, operands defined outside the basic block are not supported. */
6296 vect_is_simple_use (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6297 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6298 tree
*def
, enum vect_def_type
*dt
)
6301 stmt_vec_info stmt_vinfo
;
6302 struct loop
*loop
= NULL
;
6305 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6310 if (dump_enabled_p ())
6312 dump_printf_loc (MSG_NOTE
, vect_location
,
6313 "vect_is_simple_use: operand ");
6314 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, operand
);
6315 dump_printf (MSG_NOTE
, "\n");
6318 if (CONSTANT_CLASS_P (operand
))
6320 *dt
= vect_constant_def
;
6324 if (is_gimple_min_invariant (operand
))
6327 *dt
= vect_external_def
;
6331 if (TREE_CODE (operand
) == PAREN_EXPR
)
6333 if (dump_enabled_p ())
6334 dump_printf_loc (MSG_NOTE
, vect_location
, "non-associatable copy.\n");
6335 operand
= TREE_OPERAND (operand
, 0);
6338 if (TREE_CODE (operand
) != SSA_NAME
)
6340 if (dump_enabled_p ())
6341 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6346 *def_stmt
= SSA_NAME_DEF_STMT (operand
);
6347 if (*def_stmt
== NULL
)
6349 if (dump_enabled_p ())
6350 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6355 if (dump_enabled_p ())
6357 dump_printf_loc (MSG_NOTE
, vect_location
, "def_stmt: ");
6358 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, *def_stmt
, 0);
6359 dump_printf (MSG_NOTE
, "\n");
6362 /* Empty stmt is expected only in case of a function argument.
6363 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
6364 if (gimple_nop_p (*def_stmt
))
6367 *dt
= vect_external_def
;
6371 bb
= gimple_bb (*def_stmt
);
6373 if ((loop
&& !flow_bb_inside_loop_p (loop
, bb
))
6374 || (!loop
&& bb
!= BB_VINFO_BB (bb_vinfo
))
6375 || (!loop
&& gimple_code (*def_stmt
) == GIMPLE_PHI
))
6376 *dt
= vect_external_def
;
6379 stmt_vinfo
= vinfo_for_stmt (*def_stmt
);
6380 *dt
= STMT_VINFO_DEF_TYPE (stmt_vinfo
);
6383 if (*dt
== vect_unknown_def_type
6385 && *dt
== vect_double_reduction_def
6386 && gimple_code (stmt
) != GIMPLE_PHI
))
6388 if (dump_enabled_p ())
6389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6390 "Unsupported pattern.\n");
6394 if (dump_enabled_p ())
6395 dump_printf_loc (MSG_NOTE
, vect_location
, "type of def: %d.\n", *dt
);
6397 switch (gimple_code (*def_stmt
))
6400 *def
= gimple_phi_result (*def_stmt
);
6404 *def
= gimple_assign_lhs (*def_stmt
);
6408 *def
= gimple_call_lhs (*def_stmt
);
6413 if (dump_enabled_p ())
6414 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
6415 "unsupported defining stmt:\n");
6422 /* Function vect_is_simple_use_1.
6424 Same as vect_is_simple_use_1 but also determines the vector operand
6425 type of OPERAND and stores it to *VECTYPE. If the definition of
6426 OPERAND is vect_uninitialized_def, vect_constant_def or
6427 vect_external_def *VECTYPE will be set to NULL_TREE and the caller
6428 is responsible to compute the best suited vector type for the
6432 vect_is_simple_use_1 (tree operand
, gimple stmt
, loop_vec_info loop_vinfo
,
6433 bb_vec_info bb_vinfo
, gimple
*def_stmt
,
6434 tree
*def
, enum vect_def_type
*dt
, tree
*vectype
)
6436 if (!vect_is_simple_use (operand
, stmt
, loop_vinfo
, bb_vinfo
, def_stmt
,
6440 /* Now get a vector type if the def is internal, otherwise supply
6441 NULL_TREE and leave it up to the caller to figure out a proper
6442 type for the use stmt. */
6443 if (*dt
== vect_internal_def
6444 || *dt
== vect_induction_def
6445 || *dt
== vect_reduction_def
6446 || *dt
== vect_double_reduction_def
6447 || *dt
== vect_nested_cycle
)
6449 stmt_vec_info stmt_info
= vinfo_for_stmt (*def_stmt
);
6451 if (STMT_VINFO_IN_PATTERN_P (stmt_info
)
6452 && !STMT_VINFO_RELEVANT (stmt_info
)
6453 && !STMT_VINFO_LIVE_P (stmt_info
))
6454 stmt_info
= vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info
));
6456 *vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6457 gcc_assert (*vectype
!= NULL_TREE
);
6459 else if (*dt
== vect_uninitialized_def
6460 || *dt
== vect_constant_def
6461 || *dt
== vect_external_def
)
6462 *vectype
= NULL_TREE
;
6470 /* Function supportable_widening_operation
6472 Check whether an operation represented by the code CODE is a
6473 widening operation that is supported by the target platform in
6474 vector form (i.e., when operating on arguments of type VECTYPE_IN
6475 producing a result of type VECTYPE_OUT).
6477 Widening operations we currently support are NOP (CONVERT), FLOAT
6478 and WIDEN_MULT. This function checks if these operations are supported
6479 by the target platform either directly (via vector tree-codes), or via
6483 - CODE1 and CODE2 are codes of vector operations to be used when
6484 vectorizing the operation, if available.
6485 - MULTI_STEP_CVT determines the number of required intermediate steps in
6486 case of multi-step conversion (like char->short->int - in that case
6487 MULTI_STEP_CVT will be 1).
6488 - INTERM_TYPES contains the intermediate type required to perform the
6489 widening operation (short in the above example). */
6492 supportable_widening_operation (enum tree_code code
, gimple stmt
,
6493 tree vectype_out
, tree vectype_in
,
6494 enum tree_code
*code1
, enum tree_code
*code2
,
6495 int *multi_step_cvt
,
6496 vec
<tree
> *interm_types
)
6498 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
6499 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_info
);
6500 struct loop
*vect_loop
= NULL
;
6501 enum machine_mode vec_mode
;
6502 enum insn_code icode1
, icode2
;
6503 optab optab1
, optab2
;
6504 tree vectype
= vectype_in
;
6505 tree wide_vectype
= vectype_out
;
6506 enum tree_code c1
, c2
;
6508 tree prev_type
, intermediate_type
;
6509 enum machine_mode intermediate_mode
, prev_mode
;
6510 optab optab3
, optab4
;
6512 *multi_step_cvt
= 0;
6514 vect_loop
= LOOP_VINFO_LOOP (loop_info
);
6518 case WIDEN_MULT_EXPR
:
6519 /* The result of a vectorized widening operation usually requires
6520 two vectors (because the widened results do not fit into one vector).
6521 The generated vector results would normally be expected to be
6522 generated in the same order as in the original scalar computation,
6523 i.e. if 8 results are generated in each vector iteration, they are
6524 to be organized as follows:
6525 vect1: [res1,res2,res3,res4],
6526 vect2: [res5,res6,res7,res8].
6528 However, in the special case that the result of the widening
6529 operation is used in a reduction computation only, the order doesn't
6530 matter (because when vectorizing a reduction we change the order of
6531 the computation). Some targets can take advantage of this and
6532 generate more efficient code. For example, targets like Altivec,
6533 that support widen_mult using a sequence of {mult_even,mult_odd}
6534 generate the following vectors:
6535 vect1: [res1,res3,res5,res7],
6536 vect2: [res2,res4,res6,res8].
6538 When vectorizing outer-loops, we execute the inner-loop sequentially
6539 (each vectorized inner-loop iteration contributes to VF outer-loop
6540 iterations in parallel). We therefore don't allow to change the
6541 order of the computation in the inner-loop during outer-loop
6543 /* TODO: Another case in which order doesn't *really* matter is when we
6544 widen and then contract again, e.g. (short)((int)x * y >> 8).
6545 Normally, pack_trunc performs an even/odd permute, whereas the
6546 repack from an even/odd expansion would be an interleave, which
6547 would be significantly simpler for e.g. AVX2. */
6548 /* In any case, in order to avoid duplicating the code below, recurse
6549 on VEC_WIDEN_MULT_EVEN_EXPR. If it succeeds, all the return values
6550 are properly set up for the caller. If we fail, we'll continue with
6551 a VEC_WIDEN_MULT_LO/HI_EXPR check. */
6553 && STMT_VINFO_RELEVANT (stmt_info
) == vect_used_by_reduction
6554 && !nested_in_vect_loop_p (vect_loop
, stmt
)
6555 && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR
,
6556 stmt
, vectype_out
, vectype_in
,
6557 code1
, code2
, multi_step_cvt
,
6560 c1
= VEC_WIDEN_MULT_LO_EXPR
;
6561 c2
= VEC_WIDEN_MULT_HI_EXPR
;
6564 case VEC_WIDEN_MULT_EVEN_EXPR
:
6565 /* Support the recursion induced just above. */
6566 c1
= VEC_WIDEN_MULT_EVEN_EXPR
;
6567 c2
= VEC_WIDEN_MULT_ODD_EXPR
;
6570 case WIDEN_LSHIFT_EXPR
:
6571 c1
= VEC_WIDEN_LSHIFT_LO_EXPR
;
6572 c2
= VEC_WIDEN_LSHIFT_HI_EXPR
;
6576 c1
= VEC_UNPACK_LO_EXPR
;
6577 c2
= VEC_UNPACK_HI_EXPR
;
6581 c1
= VEC_UNPACK_FLOAT_LO_EXPR
;
6582 c2
= VEC_UNPACK_FLOAT_HI_EXPR
;
6585 case FIX_TRUNC_EXPR
:
6586 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
6587 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
6588 computing the operation. */
6595 if (BYTES_BIG_ENDIAN
&& c1
!= VEC_WIDEN_MULT_EVEN_EXPR
)
6597 enum tree_code ctmp
= c1
;
6602 if (code
== FIX_TRUNC_EXPR
)
6604 /* The signedness is determined from output operand. */
6605 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6606 optab2
= optab_for_tree_code (c2
, vectype_out
, optab_default
);
6610 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6611 optab2
= optab_for_tree_code (c2
, vectype
, optab_default
);
6614 if (!optab1
|| !optab2
)
6617 vec_mode
= TYPE_MODE (vectype
);
6618 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
6619 || (icode2
= optab_handler (optab2
, vec_mode
)) == CODE_FOR_nothing
)
6625 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6626 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6629 /* Check if it's a multi-step conversion that can be done using intermediate
6632 prev_type
= vectype
;
6633 prev_mode
= vec_mode
;
6635 if (!CONVERT_EXPR_CODE_P (code
))
6638 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6639 intermediate steps in promotion sequence. We try
6640 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do
6642 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6643 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6645 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6647 = lang_hooks
.types
.type_for_mode (intermediate_mode
,
6648 TYPE_UNSIGNED (prev_type
));
6649 optab3
= optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6650 optab4
= optab_for_tree_code (c2
, intermediate_type
, optab_default
);
6652 if (!optab3
|| !optab4
6653 || (icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
6654 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6655 || (icode2
= optab_handler (optab2
, prev_mode
)) == CODE_FOR_nothing
6656 || insn_data
[icode2
].operand
[0].mode
!= intermediate_mode
6657 || ((icode1
= optab_handler (optab3
, intermediate_mode
))
6658 == CODE_FOR_nothing
)
6659 || ((icode2
= optab_handler (optab4
, intermediate_mode
))
6660 == CODE_FOR_nothing
))
6663 interm_types
->quick_push (intermediate_type
);
6664 (*multi_step_cvt
)++;
6666 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (wide_vectype
)
6667 && insn_data
[icode2
].operand
[0].mode
== TYPE_MODE (wide_vectype
))
6670 prev_type
= intermediate_type
;
6671 prev_mode
= intermediate_mode
;
6674 interm_types
->release ();
6679 /* Function supportable_narrowing_operation
6681 Check whether an operation represented by the code CODE is a
6682 narrowing operation that is supported by the target platform in
6683 vector form (i.e., when operating on arguments of type VECTYPE_IN
6684 and producing a result of type VECTYPE_OUT).
6686 Narrowing operations we currently support are NOP (CONVERT) and
6687 FIX_TRUNC. This function checks if these operations are supported by
6688 the target platform directly via vector tree-codes.
6691 - CODE1 is the code of a vector operation to be used when
6692 vectorizing the operation, if available.
6693 - MULTI_STEP_CVT determines the number of required intermediate steps in
6694 case of multi-step conversion (like int->short->char - in that case
6695 MULTI_STEP_CVT will be 1).
6696 - INTERM_TYPES contains the intermediate type required to perform the
6697 narrowing operation (short in the above example). */
6700 supportable_narrowing_operation (enum tree_code code
,
6701 tree vectype_out
, tree vectype_in
,
6702 enum tree_code
*code1
, int *multi_step_cvt
,
6703 vec
<tree
> *interm_types
)
6705 enum machine_mode vec_mode
;
6706 enum insn_code icode1
;
6707 optab optab1
, interm_optab
;
6708 tree vectype
= vectype_in
;
6709 tree narrow_vectype
= vectype_out
;
6711 tree intermediate_type
;
6712 enum machine_mode intermediate_mode
, prev_mode
;
6716 *multi_step_cvt
= 0;
6720 c1
= VEC_PACK_TRUNC_EXPR
;
6723 case FIX_TRUNC_EXPR
:
6724 c1
= VEC_PACK_FIX_TRUNC_EXPR
;
6728 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
6729 tree code and optabs used for computing the operation. */
6736 if (code
== FIX_TRUNC_EXPR
)
6737 /* The signedness is determined from output operand. */
6738 optab1
= optab_for_tree_code (c1
, vectype_out
, optab_default
);
6740 optab1
= optab_for_tree_code (c1
, vectype
, optab_default
);
6745 vec_mode
= TYPE_MODE (vectype
);
6746 if ((icode1
= optab_handler (optab1
, vec_mode
)) == CODE_FOR_nothing
)
6751 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6754 /* Check if it's a multi-step conversion that can be done using intermediate
6756 prev_mode
= vec_mode
;
6757 if (code
== FIX_TRUNC_EXPR
)
6758 uns
= TYPE_UNSIGNED (vectype_out
);
6760 uns
= TYPE_UNSIGNED (vectype
);
6762 /* For multi-step FIX_TRUNC_EXPR prefer signed floating to integer
6763 conversion over unsigned, as unsigned FIX_TRUNC_EXPR is often more
6764 costly than signed. */
6765 if (code
== FIX_TRUNC_EXPR
&& uns
)
6767 enum insn_code icode2
;
6770 = lang_hooks
.types
.type_for_mode (TYPE_MODE (vectype_out
), 0);
6772 = optab_for_tree_code (c1
, intermediate_type
, optab_default
);
6773 if (interm_optab
!= unknown_optab
6774 && (icode2
= optab_handler (optab1
, vec_mode
)) != CODE_FOR_nothing
6775 && insn_data
[icode1
].operand
[0].mode
6776 == insn_data
[icode2
].operand
[0].mode
)
6779 optab1
= interm_optab
;
6784 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
6785 intermediate steps in promotion sequence. We try
6786 MAX_INTERM_CVT_STEPS to get to NARROW_VECTYPE, and fail if we do not. */
6787 interm_types
->create (MAX_INTERM_CVT_STEPS
);
6788 for (i
= 0; i
< MAX_INTERM_CVT_STEPS
; i
++)
6790 intermediate_mode
= insn_data
[icode1
].operand
[0].mode
;
6792 = lang_hooks
.types
.type_for_mode (intermediate_mode
, uns
);
6794 = optab_for_tree_code (VEC_PACK_TRUNC_EXPR
, intermediate_type
,
6797 || ((icode1
= optab_handler (optab1
, prev_mode
)) == CODE_FOR_nothing
)
6798 || insn_data
[icode1
].operand
[0].mode
!= intermediate_mode
6799 || ((icode1
= optab_handler (interm_optab
, intermediate_mode
))
6800 == CODE_FOR_nothing
))
6803 interm_types
->quick_push (intermediate_type
);
6804 (*multi_step_cvt
)++;
6806 if (insn_data
[icode1
].operand
[0].mode
== TYPE_MODE (narrow_vectype
))
6809 prev_mode
= intermediate_mode
;
6810 optab1
= interm_optab
;
6813 interm_types
->release ();